From 773ba4fe9104a64a54d1c00f0fb6ffb95def2b03 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:26 +0100 Subject: ipv6: avoid partial copy for zc Even when zerocopy transmission is requested and possible, __ip_append_data() will still copy a small chunk of data just because it allocated some extra linear space (e.g. 128 bytes). It wastes CPU cycles on copy and iter manipulations and also misalignes potentially aligned data. Avoid such copies. And as a bonus we can allocate smaller skb. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 77e3f5970ce4..fc74ce3ed8cc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1464,6 +1464,7 @@ static int __ip6_append_data(struct sock *sk, int copy; int err; int offset = 0; + bool zc = false; u32 tskey = 0; struct rt6_info *rt = (struct rt6_info *)cork->dst; struct ipv6_txoptions *opt = v6_cork->opt; @@ -1549,6 +1550,7 @@ emsgsize: if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; + zc = true; } else { uarg->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); @@ -1630,9 +1632,12 @@ alloc_new_skb: (fraglen + alloc_extra < SKB_MAX_ALLOC || !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; - else { + else if (!zc) { alloclen = min_t(int, fraglen, MAX_HEADER); pagedlen = fraglen - alloclen; + } else { + alloclen = fragheaderlen + transhdrlen; + pagedlen = datalen - transhdrlen; } alloclen += alloc_extra; -- cgit From 1fd3ae8c906c0f521238d436566323af3f0282e8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 12 Jul 2022 21:52:34 +0100 Subject: ipv6/udp: support externally provided ubufs Teach ipv6/udp how to use external ubuf_info provided in msghdr and also prepare it for managed frags by sprinkling skb_zcopy_downgrade_managed() when it could mix managed and not managed frags. Signed-off-by: Pavel Begunkov Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_output.c | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index fc74ce3ed8cc..897ca4f9b791 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1542,18 +1542,35 @@ emsgsize: rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; - if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) { - uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); - if (!uarg) - return -ENOBUFS; - extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ - if (rt->dst.dev->features & NETIF_F_SG && - csummode == CHECKSUM_PARTIAL) { - paged = true; - zc = true; - } else { - uarg->zerocopy = 0; - skb_zcopy_set(skb, uarg, &extra_uref); + if ((flags & MSG_ZEROCOPY) && length) { + struct msghdr *msg = from; + + if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { + if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) + return -EINVAL; + + /* Leave uarg NULL if can't zerocopy, callers should + * be able to handle it. + */ + if ((rt->dst.dev->features & NETIF_F_SG) && + csummode == CHECKSUM_PARTIAL) { + paged = true; + zc = true; + uarg = msg->msg_ubuf; + } + } else if (sock_flag(sk, SOCK_ZEROCOPY)) { + uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); + if (!uarg) + return -ENOBUFS; + extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ + if (rt->dst.dev->features & NETIF_F_SG && + csummode == CHECKSUM_PARTIAL) { + paged = true; + zc = true; + } else { + uarg->zerocopy = 0; + skb_zcopy_set(skb, uarg, &extra_uref); + } } } @@ -1747,13 +1764,14 @@ alloc_new_skb: err = -EFAULT; goto error; } - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; + skb_zcopy_downgrade_managed(skb); if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; -- cgit