diff options
Diffstat (limited to 'tools')
42 files changed, 1883 insertions, 236 deletions
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 067e9ea59e3b..8727765add88 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -722,6 +722,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) { + char prog_name[MAX_PROG_FULL_NAME]; unsigned int id = 0; int fd, nb_fds = 0; void *tmp; @@ -754,12 +755,20 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) goto err_close_fd; } - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) { close(fd); continue; } + if (!tag) { + get_prog_full_name(&info, fd, prog_name, + sizeof(prog_name)); + if (strncmp(nametag, prog_name, sizeof(prog_name))) { + close(fd); + continue; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -820,7 +829,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) NEXT_ARGP(); name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + if (strlen(name) > MAX_PROG_FULL_NAME - 1) { p_err("can't parse name"); return -1; } diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 7ecabf7947fb..36cf0f1517c9 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -1147,7 +1147,7 @@ exit_free: return res; #else /* Detection assumes user has specific privileges. - * We do not use libpcap so let's approximate, and restrict usage to + * We do not use libcap so let's approximate, and restrict usage to * root user only. */ if (geteuid()) { diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 451cefc2d0da..ccd7457f92bf 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -435,6 +435,16 @@ int main(int argc, char **argv) setlinebuf(stdout); +#ifdef USE_LIBCAP + /* Libcap < 2.63 hooks before main() to compute the number of + * capabilities of the running kernel, and doing so it calls prctl() + * which may fail and set errno to non-zero. + * Let's reset errno to make sure this does not interfere with the + * batch mode. + */ + errno = 0; +#endif + last_do_help = do_help; pretty_output = false; json_output = false; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 59a217ca2dfd..1d6085e15fc8 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2573,10 +2573,12 @@ union bpf_attr { * There are two supported modes at this time: * * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer - * (room space is added or removed below the layer 2 header). + * (room space is added or removed between the layer 2 and + * layer 3 headers). * * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer - * (room space is added or removed below the layer 3 header). + * (room space is added or removed between the layer 3 and + * layer 4 headers). * * The following flags are supported at this time: * @@ -3008,8 +3010,18 @@ union bpf_attr { * **BPF_F_USER_STACK** * Collect a user space stack instead of a kernel stack. * **BPF_F_USER_BUILD_ID** - * Collect buildid+offset instead of ips for user stack, - * only valid if **BPF_F_USER_STACK** is also specified. + * Collect (build_id, file_offset) instead of ips for user + * stack, only valid if **BPF_F_USER_STACK** is also + * specified. + * + * *file_offset* is an offset relative to the beginning + * of the executable or shared object file backing the vma + * which the *ip* falls in. It is *not* an offset relative + * to that object's base address. Accordingly, it must be + * adjusted by adding (sh_addr - sh_offset), where + * sh_{addr,offset} correspond to the executable section + * containing *file_offset* in the object, for comparisons + * to symbols' st_value to be valid. * * **bpf_get_stack**\ () can collect up to * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject @@ -5331,6 +5343,18 @@ union bpf_attr { * **-EACCES** if the SYN cookie is not valid. * * **-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin. + * + * u64 bpf_ktime_get_tai_ns(void) + * Description + * A nonsettable system-wide clock derived from wall-clock time but + * ignoring leap seconds. This clock does not experience + * discontinuities and backwards jumps caused by NTP inserting leap + * seconds as CLOCK_REALTIME does. + * + * See: **clock_gettime**\ (**CLOCK_TAI**) + * Return + * Current *ktime*. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5541,6 +5565,7 @@ union bpf_attr { FN(tcp_raw_gen_syncookie_ipv6), \ FN(tcp_raw_check_syncookie_ipv4), \ FN(tcp_raw_check_syncookie_ipv6), \ + FN(ktime_get_tai_ns), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index efcc06dafbd9..1d49a0352836 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -84,9 +84,7 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr, return ensure_good_fd(fd); } -#define PROG_LOAD_ATTEMPTS 5 - -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) { int fd; @@ -107,7 +105,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int */ int probe_memcg_account(void) { - const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); + const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), BPF_EXIT_INSN(), @@ -117,13 +115,13 @@ int probe_memcg_account(void) int prog_fd; /* attempt loading freplace trying to use custom BTF */ - memset(&attr, 0, prog_load_attr_sz); + memset(&attr, 0, attr_sz); attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); - prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); + prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { close(prog_fd); return 1; @@ -183,7 +181,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name) + if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -234,6 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, const struct bpf_prog_load_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, fd_array); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -253,7 +252,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, if (attempts == 0) attempts = PROG_LOAD_ATTEMPTS; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_type = prog_type; attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); @@ -263,7 +262,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); - if (prog_name) + if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -316,7 +315,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.log_level = log_level; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); if (fd >= 0) return fd; @@ -356,7 +355,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, break; } - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); if (fd >= 0) goto done; } @@ -370,7 +369,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.log_size = log_size; attr.log_level = 1; - fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); + fd = sys_bpf_prog_load(&attr, attr_sz, attempts); } done: /* free() doesn't affect errno, so we don't need to restore it */ @@ -382,127 +381,136 @@ done: int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); attr.flags = flags; - ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_delete_elem(int fd, const void *key) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); - ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags) { + const size_t attr_sz = offsetofend(union bpf_attr, flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.flags = flags; - ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_get_next_key(int fd, const void *key, void *next_key) { + const size_t attr_sz = offsetofend(union bpf_attr, next_key); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); - ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_map_freeze(int fd) { + const size_t attr_sz = offsetofend(union bpf_attr, map_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_fd = fd; - ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -511,13 +519,14 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, __u32 *count, const struct bpf_map_batch_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, batch); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_map_batch_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.batch.map_fd = fd; attr.batch.in_batch = ptr_to_u64(in_batch); attr.batch.out_batch = ptr_to_u64(out_batch); @@ -527,7 +536,7 @@ static int bpf_map_batch_common(int cmd, int fd, void *in_batch, attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0); attr.batch.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(cmd, &attr, sizeof(attr)); + ret = sys_bpf(cmd, &attr, attr_sz); *count = attr.batch.count; return libbpf_err_errno(ret); @@ -566,14 +575,15 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co int bpf_obj_pin(int fd, const char *pathname) { + const size_t attr_sz = offsetofend(union bpf_attr, file_flags); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; - ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -584,17 +594,18 @@ int bpf_obj_get(const char *pathname) int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, file_flags); union bpf_attr attr; int fd; if (!OPTS_VALID(opts, bpf_obj_get_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.pathname = ptr_to_u64((void *)pathname); attr.file_flags = OPTS_GET(opts, file_flags, 0); - fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -612,52 +623,50 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd, enum bpf_attach_type type, const struct bpf_prog_attach_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_attach_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; attr.attach_flags = OPTS_GET(opts, flags, 0); attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); - ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz); return libbpf_err_errno(ret); } -__attribute__((alias("bpf_prog_attach_opts"))) -int bpf_prog_attach_xattr(int prog_fd, int target_fd, - enum bpf_attach_type type, - const struct bpf_prog_attach_opts *opts); - int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_type = type; - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } @@ -665,6 +674,7 @@ int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, const struct bpf_link_create_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, link_create); __u32 target_btf_id, iter_info_len; union bpf_attr attr; int fd, err; @@ -683,7 +693,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; @@ -727,7 +737,7 @@ int bpf_link_create(int prog_fd, int target_fd, break; } proceed: - fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz); if (fd >= 0) return fd; /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry @@ -763,44 +773,47 @@ proceed: int bpf_link_detach(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, link_detach); union bpf_attr attr; int ret; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_detach.link_fd = link_fd; - ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_link_update(int link_fd, int new_prog_fd, const struct bpf_link_update_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, link_update); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_link_update_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_update.link_fd = link_fd; attr.link_update.new_prog_fd = new_prog_fd; attr.link_update.flags = OPTS_GET(opts, flags, 0); attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0); - ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr)); + ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz); return libbpf_err_errno(ret); } int bpf_iter_create(int link_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, iter_create); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.iter_create.link_fd = link_fd; - fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -808,13 +821,14 @@ int bpf_prog_query_opts(int target_fd, enum bpf_attach_type type, struct bpf_prog_query_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, query); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_query_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.query.target_fd = target_fd; attr.query.attach_type = type; @@ -823,7 +837,7 @@ int bpf_prog_query_opts(int target_fd, attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL)); attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL)); - ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz); OPTS_SET(opts, attach_flags, attr.query.attach_flags); OPTS_SET(opts, prog_cnt, attr.query.prog_cnt); @@ -852,13 +866,14 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, test); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_test_run_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.test.prog_fd = prog_fd; attr.test.batch_size = OPTS_GET(opts, batch_size, 0); attr.test.cpu = OPTS_GET(opts, cpu, 0); @@ -874,7 +889,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL)); attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL)); - ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz); OPTS_SET(opts, data_size_out, attr.test.data_size_out); OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out); @@ -886,13 +901,14 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts) static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.start_id = start_id; - err = sys_bpf(cmd, &attr, sizeof(attr)); + err = sys_bpf(cmd, &attr, attr_sz); if (!err) *next_id = attr.next_id; @@ -921,80 +937,84 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id) int bpf_prog_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_id = id; - fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_map_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.map_id = id; - fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_btf_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.btf_id = id; - fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_link_get_fd_by_id(__u32 id) { + const size_t attr_sz = offsetofend(union bpf_attr, open_flags); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.link_id = id; - fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) { + const size_t attr_sz = offsetofend(union bpf_attr, info); union bpf_attr attr; int err; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.info.bpf_fd = bpf_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); - err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); - + err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz); if (!err) *info_len = attr.info.info_len; - return libbpf_err_errno(err); } int bpf_raw_tracepoint_open(const char *name, int prog_fd) { + const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; - fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -1050,16 +1070,18 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr) { - union bpf_attr attr = {}; + const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query); + union bpf_attr attr; int err; + memset(&attr, 0, attr_sz); attr.task_fd_query.pid = pid; attr.task_fd_query.fd = fd; attr.task_fd_query.flags = flags; attr.task_fd_query.buf = ptr_to_u64(buf); attr.task_fd_query.buf_len = *buf_len; - err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr)); + err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz); *buf_len = attr.task_fd_query.buf_len; *prog_id = attr.task_fd_query.prog_id; @@ -1072,30 +1094,32 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, int bpf_enable_stats(enum bpf_stats_type type) { + const size_t attr_sz = offsetofend(union bpf_attr, enable_stats); union bpf_attr attr; int fd; - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.enable_stats.type = type; - fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr)); + fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz); return libbpf_err_errno(fd); } int bpf_prog_bind_map(int prog_fd, int map_fd, const struct bpf_prog_bind_opts *opts) { + const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map); union bpf_attr attr; int ret; if (!OPTS_VALID(opts, bpf_prog_bind_opts)) return libbpf_err(-EINVAL); - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, attr_sz); attr.prog_bind_map.prog_fd = prog_fd; attr.prog_bind_map.map_fd = map_fd; attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0); - ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr)); + ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 43ca3aff2292..5fdb93da423b 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -426,7 +426,7 @@ struct pt_regs; */ #define BPF_PROG(name, args...) \ name(unsigned long long *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args); \ typeof(name(0)) name(unsigned long long *ctx) \ { \ @@ -435,7 +435,7 @@ typeof(name(0)) name(unsigned long long *ctx) \ return ____##name(___bpf_ctx_cast(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(unsigned long long *ctx, ##args) struct pt_regs; @@ -460,7 +460,7 @@ struct pt_regs; */ #define BPF_KPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -469,7 +469,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_kprobe_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define ___bpf_kretprobe_args0() ctx @@ -484,7 +484,7 @@ ____##name(struct pt_regs *ctx, ##args) */ #define BPF_KRETPROBE(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -540,7 +540,7 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) #define BPF_KSYSCALL(name, args...) \ name(struct pt_regs *ctx); \ extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig; \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -555,7 +555,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_syscall_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #define BPF_KPROBE_SYSCALL BPF_KSYSCALL diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 2d14f1a52d7a..361131518d63 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1225,8 +1225,6 @@ int btf__load_into_kernel(struct btf *btf) return btf_load_into_kernel(btf, NULL, 0, 0); } -int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); - int btf__fd(const struct btf *btf) { return btf->fd; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 583760df83b4..ae543144ee30 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -116,7 +116,6 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__load_vmlinux_btf(void); LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 50d41815f431..3ad139285fad 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -223,13 +223,18 @@ __printf(2, 3) void libbpf_print(enum libbpf_print_level level, const char *format, ...) { va_list args; + int old_errno; if (!__libbpf_pr) return; + old_errno = errno; + va_start(args, format); __libbpf_pr(level, format, args); va_end(args); + + errno = old_errno; } static void pr_perm_msg(int err) @@ -412,6 +417,7 @@ struct bpf_program { int fd; bool autoload; + bool autoattach; bool mark_btf_static; enum bpf_prog_type type; enum bpf_attach_type expected_attach_type; @@ -591,7 +597,6 @@ struct elf_state { size_t strtabidx; struct elf_sec_desc *secs; int sec_cnt; - int maps_shndx; int btf_maps_shndx; __u32 btf_maps_sec_btf_id; int text_shndx; @@ -751,6 +756,8 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, prog->autoload = true; } + prog->autoattach = true; + /* inherit object's log_level */ prog->log_level = obj->log_level; @@ -1272,7 +1279,6 @@ static struct bpf_object *bpf_object__new(const char *path, */ obj->efile.obj_buf = obj_buf; obj->efile.obj_buf_sz = obj_buf_sz; - obj->efile.maps_shndx = -1; obj->efile.btf_maps_shndx = -1; obj->efile.st_ops_shndx = -1; obj->kconfig_map_idx = -1; @@ -1642,6 +1648,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) { sec_desc = &obj->efile.secs[sec_idx]; + /* Skip recognized sections with size 0. */ + if (!sec_desc->data || sec_desc->data->d_size == 0) + continue; + switch (sec_desc->sec_type) { case SEC_DATA: sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx)); @@ -3359,7 +3369,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj) if (err) return err; } else if (strcmp(name, "maps") == 0) { - obj->efile.maps_shndx = idx; + pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n"); + return -ENOTSUP; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { obj->efile.btf_maps_shndx = idx; } else if (strcmp(name, BTF_ELF_SEC) == 0) { @@ -3891,8 +3902,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj, static bool bpf_object__shndx_is_maps(const struct bpf_object *obj, int shndx) { - return shndx == obj->efile.maps_shndx || - shndx == obj->efile.btf_maps_shndx; + return shndx == obj->efile.btf_maps_shndx; } static enum libbpf_map_type @@ -4277,11 +4287,12 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { - struct bpf_map_info info = {}; + struct bpf_map_info info; __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; + memset(&info, 0, len); err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(fd, &info); @@ -4408,14 +4419,23 @@ static int probe_fd(int fd) static int probe_kern_prog_name(void) { + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }; - int ret, insn_cnt = ARRAY_SIZE(insns); + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); /* make sure loading with name works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL); + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); return probe_fd(ret); } @@ -4430,7 +4450,7 @@ static int probe_kern_global_data(void) }; int ret, map, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4563,7 +4583,7 @@ static int probe_kern_array_mmap(void) LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); int fd; - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); return probe_fd(fd); } @@ -4610,7 +4630,7 @@ static int probe_prog_bind_map(void) }; int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL); + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); if (map < 0) { ret = -errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4814,13 +4834,12 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { - struct bpf_map_info map_info = {}; + struct bpf_map_info map_info; char msg[STRERR_BUFSIZE]; - __u32 map_info_len; + __u32 map_info_len = sizeof(map_info); int err; - map_info_len = sizeof(map_info); - + memset(&map_info, 0, map_info_len); err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); if (err && errno == EINVAL) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); @@ -7244,8 +7263,6 @@ static int bpf_object_unload(struct bpf_object *obj) return 0; } -int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload"))); - static int bpf_object__sanitize_maps(struct bpf_object *obj) { struct bpf_map *m; @@ -8298,6 +8315,16 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) return 0; } +bool bpf_program__autoattach(const struct bpf_program *prog) +{ + return prog->autoattach; +} + +void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach) +{ + prog->autoattach = autoattach; +} + const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog) { return prog->insns; @@ -8978,11 +9005,12 @@ int libbpf_find_vmlinux_btf_id(const char *name, static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); struct btf *btf; int err; + memset(&info, 0, info_len); err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n", @@ -9810,13 +9838,16 @@ static int determine_uprobe_retprobe_bit(void) static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, uint64_t offset, int pid, size_t ref_ctr_off) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int type, pfd; if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) return -EINVAL; + memset(&attr, 0, attr_sz); + type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { @@ -9837,7 +9868,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, } attr.config |= 1 << bit; } - attr.size = sizeof(attr); + attr.size = attr_sz; attr.type = type; attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT; attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */ @@ -9936,7 +9967,8 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, const char *kfunc_name, size_t offset, int pid) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int type, pfd, err; @@ -9955,7 +9987,9 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); goto err_clean_legacy; } - attr.size = sizeof(attr); + + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10412,6 +10446,7 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, const char *binary_path, size_t offset, int pid) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; int type, pfd, err; @@ -10429,8 +10464,8 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, goto err_clean_legacy; } - memset(&attr, 0, sizeof(attr)); - attr.size = sizeof(attr); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = type; attr.type = PERF_TYPE_TRACEPOINT; @@ -10662,15 +10697,17 @@ static const char *arch_specific_lib_paths(void) static int resolve_full_path(const char *file, char *result, size_t result_sz) { const char *search_paths[3] = {}; - int i; + int i, perm; if (str_has_sfx(file, ".so") || strstr(file, ".so.")) { search_paths[0] = getenv("LD_LIBRARY_PATH"); search_paths[1] = "/usr/lib64:/usr/lib"; search_paths[2] = arch_specific_lib_paths(); + perm = R_OK; } else { search_paths[0] = getenv("PATH"); search_paths[1] = "/usr/bin:/usr/sbin"; + perm = R_OK | X_OK; } for (i = 0; i < ARRAY_SIZE(search_paths); i++) { @@ -10689,8 +10726,8 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz) if (!seg_len) continue; snprintf(result, result_sz, "%.*s/%s", seg_len, s, file); - /* ensure it is an executable file/link */ - if (access(result, R_OK | X_OK) < 0) + /* ensure it has required permissions */ + if (access(result, perm) < 0) continue; pr_debug("resolved '%s' to '%s'\n", file, result); return 0; @@ -10967,7 +11004,8 @@ static int determine_tracepoint_id(const char *tp_category, static int perf_event_open_tracepoint(const char *tp_category, const char *tp_name) { - struct perf_event_attr attr = {}; + const size_t attr_sz = sizeof(struct perf_event_attr); + struct perf_event_attr attr; char errmsg[STRERR_BUFSIZE]; int tp_id, pfd, err; @@ -10979,8 +11017,9 @@ static int perf_event_open_tracepoint(const char *tp_category, return tp_id; } + memset(&attr, 0, attr_sz); attr.type = PERF_TYPE_TRACEPOINT; - attr.size = sizeof(attr); + attr.size = attr_sz; attr.config = tp_id; pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */, @@ -11600,12 +11639,15 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, void *ctx, const struct perf_buffer_opts *opts) { + const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_buffer_params p = {}; - struct perf_event_attr attr = {}; + struct perf_event_attr attr; if (!OPTS_VALID(opts, perf_buffer_opts)) return libbpf_err_ptr(-EINVAL); + memset(&attr, 0, attr_sz); + attr.size = attr_sz; attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; @@ -12328,7 +12370,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) struct bpf_program *prog = *s->progs[i].prog; struct bpf_link **link = s->progs[i].link; - if (!prog->autoload) + if (!prog->autoload || !prog->autoattach) continue; /* auto-attaching not supported for this program */ diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 61493c4cddac..88a1ac34b12a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -260,6 +260,8 @@ LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog); LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); +LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog); +LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach); struct bpf_insn; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 119e6e1ea7f1..2b928dc21af0 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -358,6 +358,8 @@ LIBBPF_1.0.0 { bpf_obj_get_opts; bpf_prog_query_opts; bpf_program__attach_ksyscall; + bpf_program__autoattach; + bpf_program__set_autoattach; btf__add_enum64; btf__add_enum64_value; libbpf_bpf_attach_type_str; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4135ae0a2bc3..377642ff51fc 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -573,4 +573,7 @@ static inline bool is_pow_of_2(size_t x) return x && (x & (x - 1)) == 0; } +#define PROG_LOAD_ATTEMPTS 5 +int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 5b7e0155db6a..1e1be467bede 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -125,6 +125,8 @@ struct bpf_map; struct btf; struct btf_ext; +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog); LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 0b5398786bf3..6d495656f554 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -193,7 +193,7 @@ static int probe_map_create(enum bpf_map_type map_type) LIBBPF_OPTS(bpf_map_create_opts, opts); int key_size, value_size, max_entries; __u32 btf_key_type_id = 0, btf_value_type_id = 0; - int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; + int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0; key_size = sizeof(__u32); value_size = sizeof(__u32); diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 6c013168032d..35104580870c 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -587,11 +587,12 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn, static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd) { - struct bpf_prog_info info = {}; + struct bpf_prog_info info; __u32 info_len = sizeof(info); char name[256]; int len, ret; + memset(&info, 0, info_len); ret = bpf_obj_get_info_by_fd(fd, &info, &info_len); if (ret < 0) return ret; diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 70adf7b119b9..00c5f94b43be 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -285,6 +285,8 @@ static inline int skel_link_create(int prog_fd, int target_fd, static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { + const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array); + const size_t test_run_attr_sz = offsetofend(union bpf_attr, test); int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; @@ -302,7 +304,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) goto out; } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, prog_load_attr_sz); attr.prog_type = BPF_PROG_TYPE_SYSCALL; attr.insns = (long) opts->insns; attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); @@ -313,18 +315,18 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.log_size = opts->ctx->log_size; attr.log_buf = opts->ctx->log_buf; attr.prog_flags = BPF_F_SLEEPABLE; - err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz); if (prog_fd < 0) { opts->errstr = "failed to load loader prog"; set_err; goto out; } - memset(&attr, 0, sizeof(attr)); + memset(&attr, 0, test_run_attr_sz); attr.test.prog_fd = prog_fd; attr.test.ctx_in = (long) opts->ctx; attr.test.ctx_size_in = opts->ctx->sz; - err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr)); + err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; if (err < 0) { diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h index 4f2adc0bd6ca..fdfd235e52c4 100644 --- a/tools/lib/bpf/usdt.bpf.h +++ b/tools/lib/bpf/usdt.bpf.h @@ -232,7 +232,7 @@ long bpf_usdt_cookie(struct pt_regs *ctx) */ #define BPF_USDT(name, args...) \ name(struct pt_regs *ctx); \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args); \ typeof(name(0)) name(struct pt_regs *ctx) \ { \ @@ -241,7 +241,7 @@ typeof(name(0)) name(struct pt_regs *ctx) \ return ____##name(___bpf_usdt_args(args)); \ _Pragma("GCC diagnostic pop") \ } \ -static __attribute__((always_inline)) typeof(name(0)) \ +static __always_inline typeof(name(0)) \ ____##name(struct pt_regs *ctx, ##args) #endif /* __USDT_BPF_H__ */ diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index e33cab34d22f..9d8de15e725e 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -43,7 +43,7 @@ test_bpffs # bpffs test failed 255 test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) test_ima # failed to auto-attach program 'ima': -524 (trampoline) test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) -test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_lsm # attach unexpected error: -524 (trampoline) test_overhead # attach_fentry unexpected error: -524 (trampoline) test_profiler # unknown func bpf_probe_read_str#45 (overlapping) timer # failed to auto-attach program 'test1': -524 (trampoline) diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index fabf0c014349..3fc46f9cfb22 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,9 +50,11 @@ CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_TARGET_CT=y CONFIG_NF_CONNTRACK=y +CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y CONFIG_RC_CORE=y diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 0b899d2d8ea7..9566d9d2f6ee 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -6,19 +6,19 @@ volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes"))); /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } /* attach point for byname uprobe */ -static void trigger_func2(void) +static noinline void trigger_func2(void) { asm volatile (""); } /* attach point for byname sleepable uprobe */ -static void trigger_func3(void) +static noinline void trigger_func3(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c new file mode 100644 index 000000000000..dc5e01d279bd --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include <test_progs.h> +#include "test_autoattach.skel.h" + +void test_autoattach(void) +{ + struct test_autoattach *skel; + + skel = test_autoattach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto cleanup; + + /* disable auto-attach for prog2 */ + bpf_program__set_autoattach(skel->progs.prog2, false); + ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1"); + ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2"); + if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach")) + goto cleanup; + + usleep(1); + + ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1"); + ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2"); + +cleanup: + test_autoattach__destroy(skel); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 2974b44f80fa..2be2d61954bc 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -13,7 +13,7 @@ #include "kprobe_multi.skel.h" /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index 7a74a1579076..544bf90ac2a7 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -24,10 +24,34 @@ enum { TEST_TC_BPF, }; +#define TIMEOUT_MS 3000 + +static int connect_to_server(int srv_fd) +{ + int fd = -1; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket")) + goto out; + + if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) { + close(fd); + fd = -1; + } +out: + return fd; +} + static void test_bpf_nf_ct(int mode) { + const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0"; + int srv_fd = -1, client_fd = -1, srv_client_fd = -1; + struct sockaddr_in peer_addr = {}; struct test_bpf_nf *skel; int prog_fd, err; + socklen_t len; + u16 srv_port; + char cmd[64]; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, .data_size_in = sizeof(pkt_v4), @@ -38,6 +62,32 @@ static void test_bpf_nf_ct(int mode) if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load")) return; + /* Enable connection tracking */ + snprintf(cmd, sizeof(cmd), iptables, "-A"); + if (!ASSERT_OK(system(cmd), "iptables")) + goto end; + + srv_port = (mode == TEST_XDP) ? 5005 : 5006; + srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto end; + + client_fd = connect_to_server(srv_fd); + if (!ASSERT_GE(client_fd, 0, "connect_to_server")) + goto end; + + len = sizeof(peer_addr); + srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len); + if (!ASSERT_GE(srv_client_fd, 0, "accept")) + goto end; + if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len")) + goto end; + + skel->bss->saddr = peer_addr.sin_addr.s_addr; + skel->bss->sport = peer_addr.sin_port; + skel->bss->daddr = peer_addr.sin_addr.s_addr; + skel->bss->dport = htons(srv_port); + if (mode == TEST_XDP) prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test); else @@ -63,7 +113,17 @@ static void test_bpf_nf_ct(int mode) ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); /* expected status is IPS_SEEN_REPLY */ ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); + ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup"); + ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark"); end: + if (srv_client_fd != -1) + close(srv_client_fd); + if (client_fd != -1) + close(client_fd); + if (srv_fd != -1) + close(srv_fd); + snprintf(cmd, sizeof(cmd), iptables, "-D"); + system(cmd); test_bpf_nf__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 3c7aa82b98e2..bcf80b9f7c27 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -22,7 +22,8 @@ static struct { {"add_dynptr_to_map2", "invalid indirect read from stack"}, {"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"}, {"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"}, - {"data_slice_use_after_release", "invalid mem access 'scalar'"}, + {"data_slice_use_after_release1", "invalid mem access 'scalar'"}, + {"data_slice_use_after_release2", "invalid mem access 'scalar'"}, {"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"}, {"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"}, {"invalid_helper1", "invalid indirect read from stack"}, diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index c00eb974eb85..351fafa006fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -5,6 +5,9 @@ #include "kfunc_call_test.lskel.h" #include "kfunc_call_test_subprog.skel.h" #include "kfunc_call_test_subprog.lskel.h" +#include "kfunc_call_destructive.skel.h" + +#include "cap_helpers.h" static void test_main(void) { @@ -86,6 +89,36 @@ static void test_subprog_lskel(void) kfunc_call_test_subprog_lskel__destroy(skel); } +static int test_destructive_open_and_load(void) +{ + struct kfunc_call_destructive *skel; + int err; + + skel = kfunc_call_destructive__open(); + if (!ASSERT_OK_PTR(skel, "prog_open")) + return -1; + + err = kfunc_call_destructive__load(skel); + + kfunc_call_destructive__destroy(skel); + + return err; +} + +static void test_destructive(void) +{ + __u64 save_caps = 0; + + ASSERT_OK(test_destructive_open_and_load(), "succesful_load"); + + if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps")) + return; + + ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure"); + + cap_enable_effective(save_caps, NULL); +} + void test_kfunc_call(void) { if (test__start_subtest("main")) @@ -96,4 +129,7 @@ void test_kfunc_call(void) if (test__start_subtest("subprog_lskel")) test_subprog_lskel(); + + if (test__start_subtest("destructive")) + test_destructive(); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 61935e7e056a..f000734a3d1f 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -4,7 +4,7 @@ #include "test_task_pt_regs.skel.h" /* uprobe attach point */ -static void trigger_func(void) +static noinline void trigger_func(void) { asm volatile (""); } diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c new file mode 100644 index 000000000000..a31119823666 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022 Linutronix GmbH */ + +#include <test_progs.h> +#include <network_helpers.h> + +#include "test_time_tai.skel.h" + +#include <time.h> +#include <stdint.h> + +#define TAI_THRESHOLD 1000000000ULL /* 1s */ +#define NSEC_PER_SEC 1000000000ULL + +static __u64 ts_to_ns(const struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + +void test_time_tai(void) +{ + struct __sk_buff skb = { + .cb[0] = 0, + .cb[1] = 0, + .tstamp = 0, + }; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + .ctx_out = &skb, + .ctx_size_out = sizeof(skb), + ); + struct test_time_tai *skel; + struct timespec now_tai; + __u64 ts1, ts2, now; + int ret, prog_fd; + + /* Open and load */ + skel = test_time_tai__open_and_load(); + if (!ASSERT_OK_PTR(skel, "tai_open")) + return; + + /* Run test program */ + prog_fd = bpf_program__fd(skel->progs.time_tai); + ret = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(ret, "test_run"); + + /* Retrieve generated TAI timestamps */ + ts1 = skb.tstamp; + ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32); + + /* TAI != 0 */ + ASSERT_NEQ(ts1, 0, "tai_ts1"); + ASSERT_NEQ(ts2, 0, "tai_ts2"); + + /* TAI is moving forward only */ + ASSERT_GT(ts2, ts1, "tai_forward"); + + /* Check for future */ + ret = clock_gettime(CLOCK_TAI, &now_tai); + ASSERT_EQ(ret, 0, "tai_gettime"); + now = ts_to_ns(&now_tai); + + ASSERT_TRUE(now > ts1, "tai_future_ts1"); + ASSERT_TRUE(now > ts2, "tai_future_ts2"); + + /* Check for reasonable range */ + ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1"); + ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2"); + + test_time_tai__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 0a26c243e6e9..b0f08ff024fb 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -65,7 +65,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr) /* Every bpf_ringbuf_reserve_dynptr call must have a corresponding * bpf_ringbuf_submit/discard_dynptr call */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release1(void *ctx) { struct bpf_dynptr ptr; @@ -77,7 +77,7 @@ int ringbuf_missing_release1(void *ctx) return 0; } -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release2(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -112,7 +112,7 @@ static int missing_release_callback_fn(__u32 index, void *data) } /* Any dynptr initialized within a callback must have bpf_dynptr_put called */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_missing_release_callback(void *ctx) { bpf_loop(10, missing_release_callback_fn, NULL, 0); @@ -120,7 +120,7 @@ int ringbuf_missing_release_callback(void *ctx) } /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_release_uninit_dynptr(void *ctx) { struct bpf_dynptr ptr; @@ -132,7 +132,7 @@ int ringbuf_release_uninit_dynptr(void *ctx) } /* A dynptr can't be used after it has been invalidated */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int use_after_invalid(void *ctx) { struct bpf_dynptr ptr; @@ -151,7 +151,7 @@ int use_after_invalid(void *ctx) } /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int ringbuf_invalid_api(void *ctx) { struct bpf_dynptr ptr; @@ -173,7 +173,7 @@ done: } /* Can't add a dynptr to a map */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int add_dynptr_to_map1(void *ctx) { struct bpf_dynptr ptr; @@ -190,7 +190,7 @@ int add_dynptr_to_map1(void *ctx) } /* Can't add a struct with an embedded dynptr to a map */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int add_dynptr_to_map2(void *ctx) { struct test_info x; @@ -207,7 +207,7 @@ int add_dynptr_to_map2(void *ctx) } /* A data slice can't be accessed out of bounds */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_out_of_bounds_ringbuf(void *ctx) { struct bpf_dynptr ptr; @@ -227,7 +227,7 @@ done: return 0; } -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_out_of_bounds_map_value(void *ctx) { __u32 key = 0, map_val; @@ -247,8 +247,8 @@ int data_slice_out_of_bounds_map_value(void *ctx) } /* A data slice can't be used after it has been released */ -SEC("?raw_tp/sys_nanosleep") -int data_slice_use_after_release(void *ctx) +SEC("?raw_tp") +int data_slice_use_after_release1(void *ctx) { struct bpf_dynptr ptr; struct sample *sample; @@ -272,8 +272,44 @@ done: return 0; } +/* A data slice can't be used after it has been released. + * + * This tests the case where the data slice tracks a dynptr (ptr2) + * that is at a non-zero offset from the frame pointer (ptr1 is at fp, + * ptr2 is at fp - 16). + */ +SEC("?raw_tp") +int data_slice_use_after_release2(void *ctx) +{ + struct bpf_dynptr ptr1, ptr2; + struct sample *sample; + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1); + bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2); + + sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample)); + if (!sample) + goto done; + + sample->pid = 23; + + bpf_ringbuf_submit_dynptr(&ptr2, 0); + + /* this should fail */ + sample->pid = 23; + + bpf_ringbuf_submit_dynptr(&ptr1, 0); + + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr2, 0); + bpf_ringbuf_discard_dynptr(&ptr1, 0); + return 0; +} + /* A data slice must be first checked for NULL */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_missing_null_check1(void *ctx) { struct bpf_dynptr ptr; @@ -293,7 +329,7 @@ int data_slice_missing_null_check1(void *ctx) } /* A data slice can't be dereferenced if it wasn't checked for null */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int data_slice_missing_null_check2(void *ctx) { struct bpf_dynptr ptr; @@ -315,7 +351,7 @@ done: /* Can't pass in a dynptr as an arg to a helper function that doesn't take in a * dynptr argument */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_helper1(void *ctx) { struct bpf_dynptr ptr; @@ -329,7 +365,7 @@ int invalid_helper1(void *ctx) } /* A dynptr can't be passed into a helper function at a non-zero offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_helper2(void *ctx) { struct bpf_dynptr ptr; @@ -344,7 +380,7 @@ int invalid_helper2(void *ctx) } /* A bpf_dynptr is invalidated if it's been written into */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write1(void *ctx) { struct bpf_dynptr ptr; @@ -365,7 +401,7 @@ int invalid_write1(void *ctx) * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed * offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write2(void *ctx) { struct bpf_dynptr ptr; @@ -388,7 +424,7 @@ int invalid_write2(void *ctx) * A bpf_dynptr can't be used as a dynptr if it has been written into at a * non-const offset */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write3(void *ctx) { struct bpf_dynptr ptr; @@ -419,7 +455,7 @@ static int invalid_write4_callback(__u32 index, void *data) /* If the dynptr is written into in a callback function, it should * be invalidated as a dynptr */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_write4(void *ctx) { struct bpf_dynptr ptr; @@ -436,7 +472,7 @@ int invalid_write4(void *ctx) /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */ struct bpf_dynptr global_dynptr; -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int global(void *ctx) { /* this should fail */ @@ -448,7 +484,7 @@ int global(void *ctx) } /* A direct read should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read1(void *ctx) { struct bpf_dynptr ptr; @@ -464,7 +500,7 @@ int invalid_read1(void *ctx) } /* A direct read at an offset should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read2(void *ctx) { struct bpf_dynptr ptr; @@ -479,7 +515,7 @@ int invalid_read2(void *ctx) } /* A direct read at an offset into the lower stack slot should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read3(void *ctx) { struct bpf_dynptr ptr1, ptr2; @@ -505,7 +541,7 @@ static int invalid_read4_callback(__u32 index, void *data) } /* A direct read within a callback function should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_read4(void *ctx) { struct bpf_dynptr ptr; @@ -520,7 +556,7 @@ int invalid_read4(void *ctx) } /* Initializing a dynptr on an offset should fail */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int invalid_offset(void *ctx) { struct bpf_dynptr ptr; @@ -534,7 +570,7 @@ int invalid_offset(void *ctx) } /* Can't release a dynptr twice */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int release_twice(void *ctx) { struct bpf_dynptr ptr; @@ -560,7 +596,7 @@ static int release_twice_callback_fn(__u32 index, void *data) /* Test that releasing a dynptr twice, where one of the releases happens * within a calback function, fails */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int release_twice_callback(void *ctx) { struct bpf_dynptr ptr; @@ -575,7 +611,7 @@ int release_twice_callback(void *ctx) } /* Reject unsupported local mem types for dynptr_from_mem API */ -SEC("?raw_tp/sys_nanosleep") +SEC("?raw_tp") int dynptr_from_mem_invalid_api(void *ctx) { struct bpf_dynptr ptr; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c new file mode 100644 index 000000000000..767472bc5a97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +extern void bpf_kfunc_call_test_destructive(void) __ksym; + +SEC("tc") +int kfunc_destructive_test(void) +{ + bpf_kfunc_call_test_destructive(); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index 33694ef8acfa..d8d8af623bc2 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -4,6 +4,7 @@ * Copyright 2020 Google LLC. */ +#include "bpf_misc.h" #include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -160,7 +161,7 @@ int BPF_PROG(test_task_free, struct task_struct *task) int copy_test = 0; -SEC("fentry.s/__x64_sys_setdomainname") +SEC("fentry.s/" SYS_PREFIX "sys_setdomainname") int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) { void *ptr = (void *)PT_REGS_PARM1(regs); diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c new file mode 100644 index 000000000000..11a44493ebce --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_autoattach.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Google */ + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> + +bool prog1_called = false; +bool prog2_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c index 22d0ac8709b4..5a3a80f751c4 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c @@ -28,14 +28,14 @@ static void update(void *ctx, __u64 *res) *res |= bpf_get_attach_cookie(ctx); } -SEC("kprobe/sys_nanosleep") +SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) { update(ctx, &kprobe_res); return 0; } -SEC("kretprobe/sys_nanosleep") +SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { update(ctx, &kretprobe_res); diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index 196cd8dfe42a..2722441850cc 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -23,6 +23,12 @@ int test_insert_entry = -EAFNOSUPPORT; int test_succ_lookup = -ENOENT; u32 test_delta_timeout = 0; u32 test_status = 0; +__be32 saddr = 0; +__be16 sport = 0; +__be32 daddr = 0; +__be16 dport = 0; +int test_exist_lookup = -ENOENT; +u32 test_exist_lookup_mark = 0; struct nf_conn; @@ -160,6 +166,21 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, } test_alloc_entry = 0; } + + bpf_tuple.ipv4.saddr = saddr; + bpf_tuple.ipv4.daddr = daddr; + bpf_tuple.ipv4.sport = sport; + bpf_tuple.ipv4.dport = dport; + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + test_exist_lookup = 0; + if (ct->mark == 42) + test_exist_lookup_mark = 43; + bpf_ct_release(ct); + } else { + test_exist_lookup = opts_def.error; + } } SEC("xdp") diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c index 20ef9d433b97..5715c569ec03 100644 --- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c +++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c @@ -72,7 +72,7 @@ int tp_timer(void *ctx) return 0; } -SEC("?kprobe/sys_nanosleep") +SEC("?kprobe") int kprobe_timer(void *ctx) { timer_work(); @@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx) return 0; } -SEC("?kprobe/sys_nanosleep") +SEC("?kprobe") int kprobe_spin_lock(void *ctx) { spin_lock_work(); diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c new file mode 100644 index 000000000000..7ea0863f3ddb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_time_tai.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022 Linutronix GmbH */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("tc") +int time_tai(struct __sk_buff *skb) +{ + __u64 ts1, ts2; + + /* Get TAI timestamps */ + ts1 = bpf_ktime_get_tai_ns(); + ts2 = bpf_ktime_get_tai_ns(); + + /* Save TAI timestamps (Note: skb->hwtstamp is read-only) */ + skb->tstamp = ts1; + skb->cb[0] = ts2 & 0xffffffff; + skb->cb[1] = ts2 >> 32; + + return 0; +} diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index b86ae4a2e5c5..a29aa05ebb3e 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -307,6 +307,20 @@ update_kconfig() fi } +catch() +{ + local exit_code=$1 + local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}" + # This is just a cleanup and the directory may + # have already been unmounted. So, don't let this + # clobber the error code we intend to return. + unmount_image || true + if [[ -f "${exit_status_file}" ]]; then + exit_code="$(cat ${exit_status_file})" + fi + exit ${exit_code} +} + main() { local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" @@ -319,7 +333,7 @@ main() local exit_command="poweroff -f" local debug_shell="no" - while getopts 'hskid:j:' opt; do + while getopts ':hskid:j:' opt; do case ${opt} in i) update_image="yes" @@ -353,6 +367,8 @@ main() done shift $((OPTIND -1)) + trap 'catch "$?"' EXIT + if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then echo "No command specified, will run ${DEFAULT_COMMAND} in the vm" else @@ -409,20 +425,4 @@ main() fi } -catch() -{ - local exit_code=$1 - local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}" - # This is just a cleanup and the directory may - # have already been unmounted. So, don't let this - # clobber the error code we intend to return. - unmount_image || true - if [[ -f "${exit_status_file}" ]]; then - exit_code="$(cat ${exit_status_file})" - fi - exit ${exit_code} -} - -trap 'catch "$?"' EXIT - main "$@" diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 74d56d971baf..14b4737b223c 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -244,6 +244,11 @@ static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); } +static bool is_umem_valid(struct ifobject *ifobj) +{ + return !!ifobj->umem->umem; +} + static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) { udp_hdr->check = 0; @@ -817,12 +822,13 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size) return TEST_PASS; } -static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) +static int receive_pkts(struct test_spec *test, struct pollfd *fds) { - struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0}; + struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; + struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; - struct pkt_stream *pkt_stream = ifobj->pkt_stream; - struct xsk_socket_info *xsk = ifobj->xsk; + struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; struct pkt *pkt; int ret; @@ -843,17 +849,28 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) } kick_rx(xsk); + if (ifobj->use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret < 0) + exit_with_error(-ret); + + if (!ret) { + if (!is_umem_valid(test->ifobj_tx)) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); + return TEST_FAILURE; - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); - if (!rcvd) { - if (xsk_ring_prod__needs_wakeup(&umem->fq)) { - ret = poll(fds, 1, POLL_TMOUT); - if (ret < 0) - exit_with_error(-ret); } - continue; + + if (!(fds->revents & POLLIN)) + continue; } + rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + if (!rcvd) + continue; + if (ifobj->use_fill_ring) { ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { @@ -900,13 +917,35 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds) return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) +static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, + bool timeout) { struct xsk_socket_info *xsk = ifobject->xsk; - u32 i, idx, valid_pkts = 0; + bool use_poll = ifobject->use_poll; + u32 i, idx = 0, ret, valid_pkts = 0; + + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { + if (use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (timeout) { + if (ret < 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, ret); + return TEST_FAILURE; + } + if (ret == 0) + return TEST_PASS; + break; + } + if (ret <= 0) { + ksft_print_msg("ERROR: [%s] Poll error %d\n", + __func__, ret); + return TEST_FAILURE; + } + } - while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) complete_pkts(xsk, BATCH_SIZE); + } for (i = 0; i < BATCH_SIZE; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); @@ -933,11 +972,27 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb) xsk_ring_prod__submit(&xsk->tx, i); xsk->outstanding_tx += valid_pkts; - if (complete_pkts(xsk, i)) - return TEST_FAILURE; - usleep(10); - return TEST_PASS; + if (use_poll) { + ret = poll(fds, 1, POLL_TMOUT); + if (ret <= 0) { + if (ret == 0 && timeout) + return TEST_PASS; + + ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret); + return TEST_FAILURE; + } + } + + if (!timeout) { + if (complete_pkts(xsk, i)) + return TEST_FAILURE; + + usleep(10); + return TEST_PASS; + } + + return TEST_CONTINUE; } static void wait_for_tx_completion(struct xsk_socket_info *xsk) @@ -948,29 +1003,19 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { + bool timeout = !is_umem_valid(test->ifobj_rx); struct pollfd fds = { }; - u32 pkt_cnt = 0; + u32 pkt_cnt = 0, ret; fds.fd = xsk_socket__fd(ifobject->xsk->xsk); fds.events = POLLOUT; while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - int err; - - if (ifobject->use_poll) { - int ret; - - ret = poll(&fds, 1, POLL_TMOUT); - if (ret <= 0) - continue; - - if (!(fds.revents & POLLOUT)) - continue; - } - - err = __send_pkts(ifobject, &pkt_cnt); - if (err || test->fail) + ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); + if ((ret || test->fail) && !timeout) return TEST_FAILURE; + else if (ret == TEST_PASS && timeout) + return ret; } wait_for_tx_completion(ifobject->xsk); @@ -1235,7 +1280,7 @@ static void *worker_testapp_validate_rx(void *arg) pthread_barrier_wait(&barr); - err = receive_pkts(ifobject, &fds); + err = receive_pkts(test, &fds); if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); @@ -1251,6 +1296,33 @@ static void *worker_testapp_validate_rx(void *arg) pthread_exit(NULL); } +static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj, + enum test_type type) +{ + pthread_t t0; + + if (pthread_barrier_init(&barr, NULL, 2)) + exit_with_error(errno); + + test->current_step++; + if (type == TEST_TYPE_POLL_RXQ_TMOUT) + pkt_stream_reset(ifobj->pkt_stream); + pkts_in_flight = 0; + + /*Spawn thread */ + pthread_create(&t0, NULL, ifobj->func_ptr, test); + + if (type != TEST_TYPE_POLL_TXQ_TMOUT) + pthread_barrier_wait(&barr); + + if (pthread_barrier_destroy(&barr)) + exit_with_error(errno); + + pthread_join(t0, NULL); + + return !!test->fail; +} + static int testapp_validate_traffic(struct test_spec *test) { struct ifobject *ifobj_tx = test->ifobj_tx; @@ -1548,12 +1620,30 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ pkt_stream_restore_default(test); break; - case TEST_TYPE_POLL: - test->ifobj_tx->use_poll = true; + case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; - test_spec_set_name(test, "POLL"); + test_spec_set_name(test, "POLL_RX"); testapp_validate_traffic(test); break; + case TEST_TYPE_TX_POLL: + test->ifobj_tx->use_poll = true; + test_spec_set_name(test, "POLL_TX"); + testapp_validate_traffic(test); + break; + case TEST_TYPE_POLL_TXQ_TMOUT: + test_spec_set_name(test, "POLL_TXQ_FULL"); + test->ifobj_tx->use_poll = true; + /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ + test->ifobj_tx->umem->frame_size = 2048; + pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); + testapp_validate_traffic_single_thread(test, test->ifobj_tx, type); + pkt_stream_restore_default(test); + break; + case TEST_TYPE_POLL_RXQ_TMOUT: + test_spec_set_name(test, "POLL_RXQ_EMPTY"); + test->ifobj_rx->use_poll = true; + testapp_validate_traffic_single_thread(test, test->ifobj_rx, type); + break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); testapp_invalid_desc(test); diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 3d17053f98e5..ee97576757a9 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -27,6 +27,7 @@ #define TEST_PASS 0 #define TEST_FAILURE -1 +#define TEST_CONTINUE 1 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 7 #define MAX_INTERFACES_NAMESPACE_CHARS 10 @@ -48,7 +49,7 @@ #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 #define POLL_TMOUT 1000 -#define RECV_TMOUT 3 +#define THREAD_TMOUT 3 #define DEFAULT_PKT_CNT (4 * 1024) #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE) @@ -68,7 +69,10 @@ enum test_type { TEST_TYPE_RUN_TO_COMPLETION, TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME, TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT, - TEST_TYPE_POLL, + TEST_TYPE_RX_POLL, + TEST_TYPE_TX_POLL, + TEST_TYPE_POLL_RXQ_TMOUT, + TEST_TYPE_POLL_TXQ_TMOUT, TEST_TYPE_UNALIGNED, TEST_TYPE_ALIGNED_INV_DESC, TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME, diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh new file mode 100755 index 000000000000..0cf9e47e3209 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh @@ -0,0 +1,273 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test VLAN classification after routing and verify that the order of +# configuration does not impact switch behavior. Verify that {RIF, Port}->VID +# mapping is added correctly for existing {Port, VID}->FID mapping and that +# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1 + + $swp2 | | +# | | | | | | +# | | $swp1.10 + + $swp2.10 | | +# | | | | +# | | br0 | | +# | | 192.0.2.2/28 | | +# | +--------------------------------------------------------+ | +# | | +# | $swp3.20 + | +# | 192.0.2.17/28 | | +# | | | +# | $swp3 + | +# +---------------|--------------------------------------------+ +# | +# +---------------|--+ +# | $h3 + | +# | | | +# | $h3.20 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_vid_map_rif + rif_port_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + vlan_create $h3 20 v$h3 192.0.2.18/28 + + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + + vlan_destroy $h3 20 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + tc qdisc add dev $swp1 clsact + + ip link add dev br0 type bridge mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + vlan_create $swp2 10 + ip link set dev $swp2.10 master br0 + + ip link set dev $swp3 up + vlan_create $swp3 20 "" 192.0.2.17/28 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20) +} + +switch_destroy() +{ + vlan_destroy $swp3 20 + ip link set dev $swp3 down + + ip link set dev $swp2.10 nomaster + vlan_destroy $swp2 10 + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + tc qdisc del dev $swp1 clsact + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br0 add 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + __addr_add_del br0 del 192.0.2.2/28 +} + +port_vid_map_rif() +{ + RET=0 + + # First add {port, VID}->FID for swp1.10, then add a RIF and verify that + # packets get the correct VID after routing. + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + bridge_rif_add + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10) + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing {port, VID}->FID mapping" + + tc filter del dev $swp1 egress + + bridge_rif_del + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 +} + +rif_port_vid_map() +{ + RET=0 + + # First add an address to the bridge, which will create a RIF on top of + # it, then add a new {port, VID}->FID mapping and verify that packets + # get the correct VID after routing. + bridge_rif_add + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10) + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add {port, VID}->FID mapping for FID with a RIF" + + tc filter del dev $swp1 egress + + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 + bridge_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh new file mode 100755 index 000000000000..df2b09966886 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing over bridge and verify that the order of configuration does not +# impact switch behavior. Verify that RIF is added correctly for existing +# mappings and that new mappings use the correct RIF. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1 + + $swp2 | | +# | | | | | | +# | | $swp1.10 + + $swp2.10 | | +# | | | | +# | | br0 | | +# | | 192.0.2.2/28 | | +# | +--------------------------------------------------------+ | +# | | +# | $swp3.10 + | +# | 192.0.2.17/28 | | +# | | | +# | $swp3 + | +# +---------------|--------------------------------------------+ +# | +# +---------------|--+ +# | $h3 + | +# | | | +# | $h3.10 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_vid_map_rif + rif_port_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + vlan_create $h3 10 v$h3 192.0.2.18/28 + + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + + vlan_destroy $h3 10 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + + ip link add dev br0 type bridge mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + vlan_create $swp2 10 + ip link set dev $swp2.10 master br0 + + ip link set dev $swp3 up + vlan_create $swp3 10 "" 192.0.2.17/28 + tc qdisc add dev $swp3 clsact + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10) +} + +switch_destroy() +{ + tc qdisc del dev $swp3 clsact + vlan_destroy $swp3 10 + ip link set dev $swp3 down + + ip link set dev $swp2.10 nomaster + vlan_destroy $swp2 10 + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br0 add 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + __addr_add_del br0 del 192.0.2.2/28 +} + +port_vid_map_rif() +{ + RET=0 + + # First add {port, VID}->FID for $swp1.10, then add a RIF and verify + # that packets can be routed via the existing mapping. + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + bridge_rif_add + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing {port, VID}->FID mapping" + + tc filter del dev $swp3 egress + + bridge_rif_del + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 +} + +rif_port_vid_map() +{ + RET=0 + + # First add an address to the bridge, which will create a RIF on top of + # it, then add a new {port, VID}->FID mapping and verify that packets + # can be routed via the new mapping. + bridge_rif_add + vlan_create $swp1 10 + ip link set dev $swp1.10 master br0 + + # The hardware matches on the first ethertype which is not VLAN, + # so the protocol should be IP. + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add {port, VID}->FID mapping for FID with a RIF" + + tc filter del dev $swp3 egress + + ip link set dev $swp1.10 nomaster + vlan_destroy $swp1 10 + bridge_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh new file mode 100755 index 000000000000..577293bab88b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh @@ -0,0 +1,264 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing over bridge and verify that the order of configuration does not +# impact switch behavior. Verify that RIF is added correctly for existing +# mapping and that packets can be routed via port which is added after the FID +# already has a RIF. + +# +-------------------+ +--------------------+ +# | H1 | | H2 | +# | | | | +# | $h1.10 + | | + $h2.10 | +# | 192.0.2.1/28 | | | | 192.0.2.3/28 | +# | | | | | | +# | $h1 + | | + $h2 | +# +----------------|--+ +--|-----------------+ +# | | +# +----------------|-------------------------|-----------------+ +# | SW | | | +# | +--------------|-------------------------|---------------+ | +# | | $swp1 + + $swp2 | | +# | | | | +# | | br0 | | +# | +--------------------------------------------------------+ | +# | | | +# | br0.10 | +# | 192.0.2.2/28 | +# | | +# | | +# | $swp3 + | +# | 192.0.2.17/28 | | +# +----------------|-------------------------------------------+ +# | +# +----------------|--+ +# | $h3 + | +# | 192.0.2.18/28 | +# | | +# | H3 | +# +-------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + vid_map_rif + rif_vid_map +" + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 + + ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 +} + +h1_destroy() +{ + ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 10 v$h2 192.0.2.3/28 +} + +h2_destroy() +{ + vlan_destroy $h2 10 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.18/28 + ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 +} + +h3_destroy() +{ + ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17 + simple_if_fini $h3 192.0.2.18/28 +} + +switch_create() +{ + ip link set dev $swp1 up + + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + # By default, a link-local address is generated when netdevice becomes + # up. Adding an address to the bridge will cause creating a RIF for it. + # Prevent generating link-local address to be able to control when the + # RIF is added. + sysctl_set net.ipv6.conf.br0.addr_gen_mode 1 + ip link set dev br0 up + + ip link set dev $swp2 up + ip link set dev $swp2 master br0 + bridge vlan add vid 10 dev $swp2 + + ip link set dev $swp3 up + __addr_add_del $swp3 add 192.0.2.17/28 + tc qdisc add dev $swp3 clsact + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3) +} + +switch_destroy() +{ + tc qdisc del dev $swp3 clsact + __addr_add_del $swp3 del 192.0.2.17/28 + ip link set dev $swp3 down + + bridge vlan del vid 10 dev $swp2 + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev br0 down + sysctl_restore net.ipv6.conf.br0.addr_gen_mode + ip link del dev br0 + + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + forwarding_enable + + h1_create + h2_create + h3_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h3_destroy + h2_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +bridge_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + vlan_create br0 10 "" 192.0.2.2/28 + rifs_occ_t1=$(devlink_resource_occ_get rifs) + + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + sleep 1 +} + +bridge_rif_del() +{ + vlan_destroy br0 10 +} + +vid_map_rif() +{ + RET=0 + + # First add VID->FID for vlan 10, then add a RIF and verify that + # packets can be routed via the existing mapping. + bridge vlan add vid 10 dev br0 self + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + bridge_rif_add + + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing VID->FID mapping" + + tc filter del dev $swp3 egress + + bridge_rif_del + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 nomaster + bridge vlan del vid 10 dev br0 self +} + +rif_vid_map() +{ + RET=0 + + # Using 802.1Q, there is only one VID->FID map for each VID. That means + # that we cannot really check adding a new map for existing FID with a + # RIF. Verify that packets can be routed via port which is added after + # the FID already has a RIF, although in practice there is no new + # mapping in the hardware. + bridge vlan add vid 10 dev br0 self + bridge_rif_add + + ip link set dev $swp1 master br0 + bridge vlan add vid 10 dev $swp1 + + tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.18 action pass + + ping_do $h1.10 192.0.2.18 + check_err $? "Ping failed" + + tc_check_at_least_x_packets "dev $swp3 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add port to VID->FID mapping for FID with a RIF" + + tc filter del dev $swp3 egress + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 nomaster + + bridge_rif_del + bridge vlan del vid 10 dev br0 self +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh new file mode 100755 index 000000000000..90450216a10d --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test routing after VXLAN decapsulation and verify that the order of +# configuration does not impact switch behavior. Verify that RIF is added +# correctly for existing mapping and that new mapping uses the correct RIF. + +# +---------------------------+ +# | H1 | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|----------------------+ +# | +# +----|----------------------------------------------------------------------+ +# | SW | | +# | +--|--------------------------------------------------------------------+ | +# | | + $swp1 br1 | | +# | | vid 10 pvid untagged | | +# | | | | +# | | | | +# | | + vx4001 | | +# | | local 192.0.2.17 | | +# | | remote 192.0.2.18 | | +# | | id 104001 | | +# | | dstport $VXPORT | | +# | | vid 4001 pvid untagged | | +# | | | | +# | +----------------------------------+------------------------------------+ | +# | | | +# | +----------------------------------|------------------------------------+ | +# | | | | | +# | | +-------------------------------+---------------------------------+ | | +# | | | | | | +# | | + vlan10 vlan4001 + | | +# | | 192.0.2.2/28 | | +# | | | | +# | | vrf-green | | +# | +-----------------------------------------------------------------------+ | +# | | +# | + $rp1 +lo | +# | | 198.51.100.1/24 192.0.2.17/32 | +# +----|----------------------------------------------------------------------+ +# | +# +----|--------------------------------------------------------+ +# | | v$rp2 | +# | + $rp2 | +# | 198.51.100.2/24 | +# | | +# +-------------------------------------------------------------+ + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + vni_fid_map_rif + rif_vni_fid_map +" + +NUM_NETIFS=4 +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh +source $lib_dir/devlink_lib.sh + +: ${VXPORT:=4789} +export VXPORT + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +switch_create() +{ + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip link set dev br1 address $(mac_get $swp1) + ip link set dev br1 up + + ip link set dev $rp1 up + ip address add dev $rp1 198.51.100.1/24 + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + bridge vlan add vid 10 dev $swp1 pvid untagged + + tc qdisc add dev $swp1 clsact + + ip link add name vx4001 type vxlan id 104001 \ + local 192.0.2.17 dstport $VXPORT \ + nolearning noudpcsum tos inherit ttl 100 + ip link set dev vx4001 up + + ip link set dev vx4001 master br1 + + ip address add 192.0.2.17/32 dev lo + + # Create SVIs. + vrf_create "vrf-green" + ip link set dev vrf-green up + + ip link add link br1 name vlan10 up master vrf-green type vlan id 10 + + # Replace neighbor to avoid 1 packet which is forwarded in software due + # to "unresolved neigh". + ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1) + + ip address add 192.0.2.2/28 dev vlan10 + + bridge vlan add vid 10 dev br1 self + bridge vlan add vid 4001 dev br1 self + + sysctl_set net.ipv4.conf.all.rp_filter 0 +} + +switch_destroy() +{ + sysctl_restore net.ipv4.conf.all.rp_filter + + bridge vlan del vid 4001 dev br1 self + bridge vlan del vid 10 dev br1 self + + ip link del dev vlan10 + + vrf_destroy "vrf-green" + + ip address del 192.0.2.17/32 dev lo + + tc qdisc del dev $swp1 clsact + + bridge vlan del vid 10 dev $swp1 + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link set dev vx4001 nomaster + + ip link set dev vx4001 down + ip link del dev vx4001 + + ip address del dev $rp1 198.51.100.1/24 + ip link set dev $rp1 down + + ip link set dev br1 down + ip link del dev br1 +} + +vrp2_create() +{ + simple_if_init $rp2 198.51.100.2/24 + + ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1 +} + +vrp2_destroy() +{ + ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1 + + simple_if_fini $rp2 198.51.100.2/24 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + forwarding_enable + + h1_create + switch_create + + vrp2_create +} + +cleanup() +{ + pre_cleanup + + vrp2_destroy + + switch_destroy + h1_destroy + + forwarding_restore + vrf_cleanup +} + +payload_get() +{ + local dest_mac=$(mac_get vlan4001) + local src_mac=$(mac_get $rp1) + + p=$(: + )"08:"$( : VXLAN flags + )"00:00:00:"$( : VXLAN reserved + )"01:96:41:"$( : VXLAN VNI : 104001 + )"00:"$( : VXLAN reserved + )"$dest_mac:"$( : ETH daddr + )"$src_mac:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"3f:49:"$( : IP identification + )"00:00:"$( : IP flags + frag off + )"3f:"$( : IP TTL + )"01:"$( : IP proto + )"50:21:"$( : IP header csum + )"c6:33:64:0a:"$( : IP saddr: 198.51.100.10 + )"c0:00:02:01:"$( : IP daddr: 192.0.2.1 + ) + echo $p +} + +vlan_rif_add() +{ + rifs_occ_t0=$(devlink_resource_occ_get rifs) + + ip link add link br1 name vlan4001 up master vrf-green \ + type vlan id 4001 + + rifs_occ_t1=$(devlink_resource_occ_get rifs) + expected_rifs=$((rifs_occ_t0 + 1)) + + [[ $expected_rifs -eq $rifs_occ_t1 ]] + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" +} + +vlan_rif_del() +{ + ip link del dev vlan4001 +} + +vni_fid_map_rif() +{ + local rp1_mac=$(mac_get $rp1) + + RET=0 + + # First add VNI->FID mapping to the FID of VLAN 4001 + bridge vlan add vid 4001 dev vx4001 pvid untagged + + # Add a RIF to the FID with VNI->FID mapping + vlan_rif_add + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + payload=$(payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -A 192.0.2.18 \ + -t udp sp=12345,dp=$VXPORT,p=$payload -q + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add RIF for existing VNI->FID mapping" + + tc filter del dev $swp1 egress + + bridge vlan del vid 4001 dev vx4001 pvid untagged + vlan_rif_del +} + +rif_vni_fid_map() +{ + local rp1_mac=$(mac_get $rp1) + + RET=0 + + # First add a RIF to the FID of VLAN 4001 + vlan_rif_add + + # Add VNI->FID mapping to FID with a RIF + bridge vlan add vid 4001 dev vx4001 pvid untagged + + tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \ + flower skip_sw dst_ip 192.0.2.1 action pass + + payload=$(payload_get) + ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \ + -B 192.0.2.17 -A 192.0.2.18 \ + -t udp sp=12345,dp=$VXPORT,p=$payload -q + + tc_check_at_least_x_packets "dev $swp1 egress" 101 10 + check_err $? "Packets were not routed in hardware" + + log_test "Add VNI->FID mapping for FID with a RIF" + + tc filter del dev $swp1 egress + + bridge vlan del vid 4001 dev vx4001 pvid untagged + vlan_rif_del +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS |