From 324bda9e6c5add86ba2e1066476481c48132aca0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- kernel/bpf/core.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 917cc04a0a94..6b49e1991ae7 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1381,6 +1381,37 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +static struct { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +} empty_prog_array = { + .null_prog = NULL, +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) +{ + if (prog_cnt) + return kzalloc(sizeof(struct bpf_prog_array) + + sizeof(struct bpf_prog *) * (prog_cnt + 1), + flags); + + return &empty_prog_array.hdr; +} + +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +{ + if (!progs || + progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + return; + kfree_rcu(progs, rcu); +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; -- cgit From 468e2f64d220fe2dc11caa2bcb9b3a1e50fc7321 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:22 -0700 Subject: bpf: introduce BPF_PROG_QUERY command introduce BPF_PROG_QUERY command to retrieve a set of either attached programs to given cgroup or a set of effective programs that will execute for events within a cgroup Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- kernel/bpf/core.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6b49e1991ae7..eba966c09053 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1412,6 +1412,44 @@ void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) kfree_rcu(progs, rcu); } +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs) +{ + struct bpf_prog **prog; + u32 cnt = 0; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) + cnt++; + rcu_read_unlock(); + return cnt; +} + +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt) +{ + struct bpf_prog **prog; + u32 i = 0, id; + + rcu_read_lock(); + prog = rcu_dereference(progs)->progs; + for (; *prog; prog++) { + id = (*prog)->aux->id; + if (copy_to_user(prog_ids + i, &id, sizeof(id))) { + rcu_read_unlock(); + return -EFAULT; + } + if (++i == cnt) { + prog++; + break; + } + } + rcu_read_unlock(); + if (*prog) + return -ENOSPC; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; -- cgit From 368211fb920a0b789c238942c6af0414539f79d6 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:13 -0700 Subject: bpf: Append prog->aux->name in bpf_get_prog_name() This patch makes the bpf_prog's name available in kallsyms. The new format is bpf_prog_tag[_name]. Sample kallsyms from running selftests/bpf/test_progs: [root@arch-fb-vm1 ~]# egrep ' bpf_prog_[0-9a-fA-F]{16}' /proc/kallsyms ffffffffa0048000 t bpf_prog_dabf0207d1992486_test_obj_id ffffffffa0038000 t bpf_prog_a04f5eef06a7f555__123456789ABCDE ffffffffa0050000 t bpf_prog_a04f5eef06a7f555 Signed-off-by: Martin KaFai Lau Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c6be15ae83ee..248961af2421 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -309,12 +309,25 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog, static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) { + const char *end = sym + KSYM_NAME_LEN; + BUILD_BUG_ON(sizeof("bpf_prog_") + - sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN); + sizeof(prog->tag) * 2 + + /* name has been null terminated. + * We should need +1 for the '_' preceding + * the name. However, the null character + * is double counted between the name and the + * sizeof("bpf_prog_") above, so we omit + * the +1 here. + */ + sizeof(prog->aux->name) > KSYM_NAME_LEN); sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); - *sym = 0; + if (prog->aux->name[0]) + snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); + else + *sym = 0; } static __always_inline unsigned long -- cgit From 9185a610f8f7f1b4e4d28c9de27d1969cf58e0f1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 12 Oct 2017 18:40:02 -0400 Subject: tracing: bpf: Hide bpf trace events when they are not used All the trace events defined in include/trace/events/bpf.h are only used when CONFIG_BPF_SYSCALL is defined. But this file gets included by include/linux/bpf_trace.h which is included by the networking code with CREATE_TRACE_POINTS defined. If a trace event is created but not used it still has data structures and functions created for its use, even though nothing is using them. To not waste space, do not define the BPF trace events in bpf.h unless CONFIG_BPF_SYSCALL is defined. Signed-off-by: Steven Rostedt (VMware) Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 248961af2421..8e7c8bf2b687 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1580,5 +1580,8 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); +/* These are only used within the BPF_SYSCALL code */ +#ifdef CONFIG_BPF_SYSCALL EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type); EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu); +#endif -- cgit From e87c6bc3852b981e71c757be20771546ce9f76f3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:08 -0700 Subject: bpf: permit multiple bpf attachments for a single perf event This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- kernel/bpf/core.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8e7c8bf2b687..7fe448799d76 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1394,6 +1394,20 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +static unsigned int __bpf_prog_ret1(const void *ctx, + const struct bpf_insn *insn) +{ + return 1; +} + +static struct bpf_prog_dummy { + struct bpf_prog prog; +} dummy_bpf_prog = { + .prog = { + .bpf_func = __bpf_prog_ret1, + }, +}; + /* to avoid allocating empty bpf_prog_array for cgroups that * don't have bpf program attached use one global 'empty_prog_array' * It will not be modified the caller of bpf_prog_array_alloc() @@ -1463,6 +1477,73 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, return 0; } +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog) +{ + struct bpf_prog **prog = progs->progs; + + for (; *prog; prog++) + if (*prog == old_prog) { + WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + break; + } +} + +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array) +{ + int new_prog_cnt, carry_prog_cnt = 0; + struct bpf_prog **existing_prog; + struct bpf_prog_array *array; + int new_prog_idx = 0; + + /* Figure out how many existing progs we need to carry over to + * the new array. + */ + if (old_array) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) { + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + carry_prog_cnt++; + if (*existing_prog == include_prog) + return -EEXIST; + } + } + + /* How many progs (not NULL) will be in the new array? */ + new_prog_cnt = carry_prog_cnt; + if (include_prog) + new_prog_cnt += 1; + + /* Do we have any prog (not NULL) in the new array? */ + if (!new_prog_cnt) { + *new_array = NULL; + return 0; + } + + /* +1 as the end of prog_array is marked with NULL */ + array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); + if (!array) + return -ENOMEM; + + /* Fill in the new prog array */ + if (carry_prog_cnt) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + array->progs[new_prog_idx++] = *existing_prog; + } + if (include_prog) + array->progs[new_prog_idx++] = include_prog; + array->progs[new_prog_idx] = NULL; + *new_array = array; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; -- cgit From ab3f0063c48c26c927851b6767824e35a716d878 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:17 -0700 Subject: bpf: offload: add infrastructure for loading programs for a specific netdev The fact that we don't know which device the program is going to be used on is quite limiting in current eBPF infrastructure. We have to reverse or limit the changes which kernel makes to the loaded bytecode if we want it to be offloaded to a networking device. We also have to invent new APIs for debugging and troubleshooting support. Make it possible to load programs for a specific netdev. This helps us to bring the debug information closer to the core eBPF infrastructure (e.g. we will be able to reuse the verifer log in device JIT). It allows device JITs to perform translation on the original bytecode. __bpf_prog_get() when called to get a reference for an attachment point will now refuse to give it if program has a device assigned. Following patches will add a version of that function which passes the expected netdev in. @type argument in __bpf_prog_get() is renamed to attach_type to make it clearer that it's only set on attachment. All calls to ndo_bpf are protected by rtnl, only verifier callbacks are not. We need a wait queue to make sure netdev doesn't get destroyed while verifier is still running and calling its driver. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- kernel/bpf/core.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7fe448799d76..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1380,7 +1380,13 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) * valid program, which in this case would simply not * be JITed, but falls back to the interpreter. */ - fp = bpf_int_jit_compile(fp); + if (!bpf_prog_is_dev_bound(fp->aux)) { + fp = bpf_int_jit_compile(fp); + } else { + *err = bpf_prog_offload_compile(fp); + if (*err) + return fp; + } bpf_prog_lock_ro(fp); /* The tail call compatibility check can only be done at @@ -1549,6 +1555,8 @@ static void bpf_prog_free_deferred(struct work_struct *work) struct bpf_prog_aux *aux; aux = container_of(work, struct bpf_prog_aux, work); + if (bpf_prog_is_dev_bound(aux)) + bpf_prog_offload_destroy(aux->prog); bpf_jit_free(aux->prog); } -- cgit From dd0bb688eaa241b5655d396d45366cba9225aed9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:42 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8a6c37762330..271daad31f37 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,6 +1326,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. -- cgit From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- kernel/bpf/core.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/bpf/core.c') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 271daad31f37..8a6c37762330 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1326,9 +1326,6 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { - if (fp->kprobe_override) - return false; - if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. -- cgit