diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 11:56:19 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-15 11:56:19 -0800 |
commit | 5bbcc0f595fadb4cac0eddc4401035ec0bd95b09 (patch) | |
tree | 3b65e490cc36a6c6fecac1fa24d9e0ac9ced4455 /net/ipv4/tcp_cong.c | |
parent | 892204e06cb9e89fbc4b299a678f9ca358e97cac (diff) | |
parent | 50895b9de1d3e0258e015e8e55128d835d9a9f19 (diff) | |
download | linux-5bbcc0f595fadb4cac0eddc4401035ec0bd95b09.tar.gz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Maintain the TCP retransmit queue using an rbtree, with 1GB
windows at 100Gb this really has become necessary. From Eric
Dumazet.
2) Multi-program support for cgroup+bpf, from Alexei Starovoitov.
3) Perform broadcast flooding in hardware in mv88e6xxx, from Andrew
Lunn.
4) Add meter action support to openvswitch, from Andy Zhou.
5) Add a data meta pointer for BPF accessible packets, from Daniel
Borkmann.
6) Namespace-ify almost all TCP sysctl knobs, from Eric Dumazet.
7) Turn on Broadcom Tags in b53 driver, from Florian Fainelli.
8) More work to move the RTNL mutex down, from Florian Westphal.
9) Add 'bpftool' utility, to help with bpf program introspection.
From Jakub Kicinski.
10) Add new 'cpumap' type for XDP_REDIRECT action, from Jesper
Dangaard Brouer.
11) Support 'blocks' of transformations in the packet scheduler which
can span multiple network devices, from Jiri Pirko.
12) TC flower offload support in cxgb4, from Kumar Sanghvi.
13) Priority based stream scheduler for SCTP, from Marcelo Ricardo
Leitner.
14) Thunderbolt networking driver, from Amir Levy and Mika Westerberg.
15) Add RED qdisc offloadability, and use it in mlxsw driver. From
Nogah Frankel.
16) eBPF based device controller for cgroup v2, from Roman Gushchin.
17) Add some fundamental tracepoints for TCP, from Song Liu.
18) Remove garbage collection from ipv6 route layer, this is a
significant accomplishment. From Wei Wang.
19) Add multicast route offload support to mlxsw, from Yotam Gigi"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2177 commits)
tcp: highest_sack fix
geneve: fix fill_info when link down
bpf: fix lockdep splat
net: cdc_ncm: GetNtbFormat endian fix
openvswitch: meter: fix NULL pointer dereference in ovs_meter_cmd_reply_start
netem: remove unnecessary 64 bit modulus
netem: use 64 bit divide by rate
tcp: Namespace-ify sysctl_tcp_default_congestion_control
net: Protect iterations over net::fib_notifier_ops in fib_seq_sum()
ipv6: set all.accept_dad to 0 by default
uapi: fix linux/tls.h userspace compilation error
usbnet: ipheth: prevent TX queue timeouts when device not ready
vhost_net: conditionally enable tx polling
uapi: fix linux/rxrpc.h userspace compilation errors
net: stmmac: fix LPI transitioning for dwmac4
atm: horizon: Fix irq release error
net-sysfs: trigger netlink notification on ifalias change via sysfs
openvswitch: Using kfree_rcu() to simplify the code
openvswitch: Make local function ovs_nsh_key_attr_size() static
openvswitch: Fix return value check in ovs_meter_cmd_features()
...
Diffstat (limited to 'net/ipv4/tcp_cong.c')
-rw-r--r-- | net/ipv4/tcp_cong.c | 76 |
1 files changed, 36 insertions, 40 deletions
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 2f26124fd160..bc6c02f16243 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) } /* Must be called with rcu lock held */ -static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, + const char *name) { - const struct tcp_congestion_ops *ca = tcp_ca_find(name); + struct tcp_congestion_ops *ca = tcp_ca_find(name); + #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); @@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(net, name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { + struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; rcu_read_lock(); - list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (likely(try_module_get(ca->owner))) { - icsk->icsk_ca_ops = ca; - goto out; - } - /* Fallback to next available. The last really - * guaranteed fallback is Reno from this list. - */ - } -out: + ca = rcu_dereference(net->ipv4.tcp_congestion_control); + if (unlikely(!try_module_get(ca->owner))) + ca = &tcp_reno; + icsk->icsk_ca_ops = ca; rcu_read_unlock(); - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else @@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk) } /* Used by sysctl to change default congestion control */ -int tcp_set_default_congestion_control(const char *name) +int tcp_set_default_congestion_control(struct net *net, const char *name) { struct tcp_congestion_ops *ca; - int ret = -ENOENT; - - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); -#ifdef CONFIG_MODULES - if (!ca && capable(CAP_NET_ADMIN)) { - spin_unlock(&tcp_cong_list_lock); + const struct tcp_congestion_ops *prev; + int ret; - request_module("tcp_%s", name); - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); - } -#endif + rcu_read_lock(); + ca = tcp_ca_find_autoload(net, name); + if (!ca) { + ret = -ENOENT; + } else if (!try_module_get(ca->owner)) { + ret = -EBUSY; + } else { + prev = xchg(&net->ipv4.tcp_congestion_control, ca); + if (prev) + module_put(prev->owner); - if (ca) { - ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ - list_move(&ca->list, &tcp_cong_list); + ca->flags |= TCP_CONG_NON_RESTRICTED; ret = 0; } - spin_unlock(&tcp_cong_list_lock); + rcu_read_unlock(); return ret; } @@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name) /* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { - return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); + return tcp_set_default_congestion_control(&init_net, + CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); @@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) } /* Get current default congestion control */ -void tcp_get_default_congestion_control(char *name) +void tcp_get_default_congestion_control(struct net *net, char *name) { - struct tcp_congestion_ops *ca; - /* We will always have reno... */ - BUG_ON(list_empty(&tcp_cong_list)); + const struct tcp_congestion_ops *ca; rcu_read_lock(); - ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); + ca = rcu_dereference(net->ipv4.tcp_congestion_control); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } @@ -351,12 +345,14 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, boo if (!load) ca = tcp_ca_find(name); else - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(sock_net(sk), name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } + if (!ca) { err = -ENOENT; } else if (!load) { |