diff options
Diffstat (limited to 'net/ipv4/tcp_bpf.c')
-rw-r--r-- | net/ipv4/tcp_bpf.c | 272 |
1 files changed, 101 insertions, 171 deletions
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 8a01428f80c1..5a05327f97c1 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -10,38 +10,6 @@ #include <net/inet_common.h> #include <net/tls.h> -static bool tcp_bpf_stream_read(const struct sock *sk) -{ - struct sk_psock *psock; - bool empty = true; - - rcu_read_lock(); - psock = sk_psock(sk); - if (likely(psock)) - empty = list_empty(&psock->ingress_msg); - rcu_read_unlock(); - return !empty; -} - -static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, - int flags, long timeo, int *err) -{ - DEFINE_WAIT_FUNC(wait, woken_wake_function); - int ret = 0; - - if (!timeo) - return ret; - - add_wait_queue(sk_sleep(sk), &wait); - sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - ret = sk_wait_event(sk, &timeo, - !list_empty(&psock->ingress_msg) || - !skb_queue_empty(&sk->sk_receive_queue), &wait); - sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); - remove_wait_queue(sk_sleep(sk), &wait); - return ret; -} - int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, int len, int flags) { @@ -115,49 +83,6 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg); -int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int nonblock, int flags, int *addr_len) -{ - struct sk_psock *psock; - int copied, ret; - - psock = sk_psock_get(sk); - if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); - if (!skb_queue_empty(&sk->sk_receive_queue) && - sk_psock_queue_empty(psock)) - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - lock_sock(sk); -msg_bytes_ready: - copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); - if (!copied) { - int data, err = 0; - long timeo; - - timeo = sock_rcvtimeo(sk, nonblock); - data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); - if (data) { - if (!sk_psock_queue_empty(psock)) - goto msg_bytes_ready; - release_sock(sk); - sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - } - if (err) { - ret = err; - goto out; - } - copied = -EAGAIN; - } - ret = copied; -out: - release_sock(sk); - sk_psock_put(sk, psock); - return ret; -} - static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, u32 apply_bytes, int flags) { @@ -298,6 +223,82 @@ int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, } EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir); +#ifdef CONFIG_BPF_STREAM_PARSER +static bool tcp_bpf_stream_read(const struct sock *sk) +{ + struct sk_psock *psock; + bool empty = true; + + rcu_read_lock(); + psock = sk_psock(sk); + if (likely(psock)) + empty = list_empty(&psock->ingress_msg); + rcu_read_unlock(); + return !empty; +} + +static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock, + int flags, long timeo, int *err) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} + +static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int nonblock, int flags, int *addr_len) +{ + struct sk_psock *psock; + int copied, ret; + + psock = sk_psock_get(sk); + if (unlikely(!psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + if (!skb_queue_empty(&sk->sk_receive_queue) && + sk_psock_queue_empty(psock)) + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + lock_sock(sk); +msg_bytes_ready: + copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); + if (!copied) { + int data, err = 0; + long timeo; + + timeo = sock_rcvtimeo(sk, nonblock); + data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err); + if (data) { + if (!sk_psock_queue_empty(psock)) + goto msg_bytes_ready; + release_sock(sk); + sk_psock_put(sk, psock); + return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + } + if (err) { + ret = err; + goto out; + } + copied = -EAGAIN; + } + ret = copied; +out: + release_sock(sk); + sk_psock_put(sk, psock); + return ret; +} + static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg, int *copied, int flags) { @@ -528,57 +529,6 @@ out_err: return copied ? copied : err; } -static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) -{ - struct sk_psock_link *link; - - while ((link = sk_psock_link_pop(psock))) { - sk_psock_unlink(sk, link); - sk_psock_free_link(link); - } -} - -static void tcp_bpf_unhash(struct sock *sk) -{ - void (*saved_unhash)(struct sock *sk); - struct sk_psock *psock; - - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - if (sk->sk_prot->unhash) - sk->sk_prot->unhash(sk); - return; - } - - saved_unhash = psock->saved_unhash; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - saved_unhash(sk); -} - -static void tcp_bpf_close(struct sock *sk, long timeout) -{ - void (*saved_close)(struct sock *sk, long timeout); - struct sk_psock *psock; - - lock_sock(sk); - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - return sk->sk_prot->close(sk, timeout); - } - - saved_close = psock->saved_close; - tcp_bpf_remove(sk, psock); - rcu_read_unlock(); - release_sock(sk); - saved_close(sk, timeout); -} - enum { TCP_BPF_IPV4, TCP_BPF_IPV6, @@ -599,8 +549,8 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash; - prot[TCP_BPF_BASE].close = tcp_bpf_close; + prot[TCP_BPF_BASE].unhash = sock_map_unhash; + prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; @@ -629,26 +579,6 @@ static int __init tcp_bpf_v4_build_proto(void) } core_initcall(tcp_bpf_v4_build_proto); -static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); -} - -static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) -{ - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - - /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed - * or added requiring sk_prot hook updates. We keep original saved - * hooks in this case. - */ - sk->sk_prot = &tcp_bpf_prots[family][config]; -} - static int tcp_bpf_assert_proto_ops(struct proto *ops) { /* In order to avoid retpoline, we make assumptions when we call @@ -660,34 +590,34 @@ static int tcp_bpf_assert_proto_ops(struct proto *ops) ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; } -void tcp_bpf_reinit(struct sock *sk) +struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) { - struct sk_psock *psock; + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; - sock_owned_by_me(sk); + if (!psock->sk_proto) { + struct proto *ops = READ_ONCE(sk->sk_prot); - rcu_read_lock(); - psock = sk_psock(sk); - tcp_bpf_reinit_sk_prot(sk, psock); - rcu_read_unlock(); + if (tcp_bpf_assert_proto_ops(ops)) + return ERR_PTR(-EINVAL); + + tcp_bpf_check_v6_needs_rebuild(sk, ops); + } + + return &tcp_bpf_prots[family][config]; } -int tcp_bpf_init(struct sock *sk) +/* If a child got cloned from a listening socket that had tcp_bpf + * protocol callbacks installed, we need to restore the callbacks to + * the default ones because the child does not inherit the psock state + * that tcp_bpf callbacks expect. + */ +void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) { - struct proto *ops = READ_ONCE(sk->sk_prot); - struct sk_psock *psock; - - sock_owned_by_me(sk); + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; + struct proto *prot = newsk->sk_prot; - rcu_read_lock(); - psock = sk_psock(sk); - if (unlikely(!psock || psock->sk_proto || - tcp_bpf_assert_proto_ops(ops))) { - rcu_read_unlock(); - return -EINVAL; - } - tcp_bpf_check_v6_needs_rebuild(sk, ops); - tcp_bpf_update_sk_prot(sk, psock); - rcu_read_unlock(); - return 0; + if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) + newsk->sk_prot = sk->sk_prot_creator; } +#endif /* CONFIG_BPF_STREAM_PARSER */ |