From 7a9546ee354ec6f23af403992b8c07baa50a23d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 12 Nov 2008 00:54:20 -0800 Subject: net: ib_net pointer should depends on CONFIG_NET_NS We can shrink size of "struct inet_bind_bucket" by 50%, using read_pnet() and write_pnet() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 44981906fb91..be41ebbec4eb 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -35,7 +35,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb != NULL) { - tb->ib_net = hold_net(net); + write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; INIT_HLIST_HEAD(&tb->owners); @@ -51,7 +51,7 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - release_net(tb->ib_net); + release_net(ib_net(tb)); kmem_cache_free(cachep, tb); } } @@ -449,7 +449,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, * unique enough. */ inet_bind_bucket_for_each(tb, node, &head->chain) { - if (tb->ib_net == net && tb->port == port) { + if (ib_net(tb) == net && tb->port == port) { WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; -- cgit From 3ab5aee7fe840b5b1b35a8d1ac11c3de5281e611 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Nov 2008 19:40:17 -0800 Subject: net: Convert TCP & DCCP hash tables to use RCU / hlist_nulls RCU was added to UDP lookups, using a fast infrastructure : - sockets kmem_cache use SLAB_DESTROY_BY_RCU and dont pay the price of call_rcu() at freeing time. - hlist_nulls permits to use few memory barriers. This patch uses same infrastructure for TCP/DCCP established and timewait sockets. Thanks to SLAB_DESTROY_BY_RCU, no slowdown for applications using short lived TCP connections. A followup patch, converting rwlocks to spinlocks will even speedup this case. __inet_lookup_established() is pretty fast now we dont have to dirty a contended cache line (read_lock/read_unlock) Only established and timewait hashtable are converted to RCU (bind table and listen table are still using traditional locking) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 78 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 23 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index be41ebbec4eb..fd269cfef0ec 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -223,35 +223,65 @@ struct sock * __inet_lookup_established(struct net *net, INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - const struct hlist_node *node; + const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); + unsigned int slot = hash & (hashinfo->ehash_size - 1); + struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; - prefetch(head->chain.first); - read_lock(lock); - sk_for_each(sk, node, &head->chain) { + rcu_read_lock(); +begin: + sk_nulls_for_each_rcu(sk, node, &head->chain) { if (INET_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; /* You sunk my battleship! */ + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) + goto begintw; + if (unlikely(!INET_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begin; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begin; +begintw: /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &head->twchain) { + sk_nulls_for_each_rcu(sk, node, &head->twchain) { if (INET_TW_MATCH(sk, net, hash, acookie, - saddr, daddr, ports, dif)) - goto hit; + saddr, daddr, ports, dif)) { + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { + sk = NULL; + goto out; + } + if (unlikely(!INET_TW_MATCH(sk, net, hash, acookie, + saddr, daddr, ports, dif))) { + sock_put(sk); + goto begintw; + } + goto out; + } } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != slot) + goto begintw; sk = NULL; out: - read_unlock(lock); + rcu_read_unlock(); return sk; -hit: - sock_hold(sk); - goto out; } EXPORT_SYMBOL_GPL(__inet_lookup_established); @@ -272,14 +302,14 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; - const struct hlist_node *node; + const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); write_lock(lock); /* Check TIME-WAIT sockets first. */ - sk_for_each(sk2, node, &head->twchain) { + sk_nulls_for_each(sk2, node, &head->twchain) { tw = inet_twsk(sk2); if (INET_TW_MATCH(sk2, net, hash, acookie, @@ -293,7 +323,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, tw = NULL; /* And established part... */ - sk_for_each(sk2, node, &head->chain) { + sk_nulls_for_each(sk2, node, &head->chain) { if (INET_MATCH(sk2, net, hash, acookie, saddr, daddr, ports, dif)) goto not_unique; @@ -306,7 +336,7 @@ unique: inet->sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); - __sk_add_node(sk, &head->chain); + __sk_nulls_add_node_rcu(sk, &head->chain); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); @@ -338,7 +368,7 @@ static inline u32 inet_sk_port_offset(const struct sock *sk) void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; + struct hlist_nulls_head *list; rwlock_t *lock; struct inet_ehash_bucket *head; @@ -350,7 +380,7 @@ void __inet_hash_nolisten(struct sock *sk) lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); - __sk_add_node(sk, list); + __sk_nulls_add_node_rcu(sk, list); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); write_unlock(lock); } @@ -400,13 +430,15 @@ void inet_unhash(struct sock *sk) local_bh_disable(); inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; + if (__sk_del_node_init(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } else { lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); + if (__sk_nulls_del_node_init_rcu(sk)) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(lock); out: if (sk->sk_state == TCP_LISTEN) -- cgit From 5caea4ea7088e80ac5410d04660346094608b909 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 00:40:07 -0800 Subject: net: listening_hash get a spinlock per bucket This patch prepares RCU migration of listening_hash table for TCP/DCCP protocols. listening_hash table being small (32 slots per protocol), we add a spinlock for each slot, instead of a single rwlock for whole table. This should reduce hold time of readers, and writers concurrency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 86 +++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 55 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fd269cfef0ec..377d004e5723 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,35 +110,6 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); -/* - * This lock without WQ_FLAG_EXCLUSIVE is good on UP and it can be very bad on SMP. - * Look, when several writers sleep and reader wakes them up, all but one - * immediately hit write lock and grab all the cpus. Exclusive sleep solves - * this, _but_ remember, it adds useless work on UP machines (wake up each - * exclusive lock release). It should be ifdefed really. - */ -void inet_listen_wlock(struct inet_hashinfo *hashinfo) - __acquires(hashinfo->lhash_lock) -{ - write_lock(&hashinfo->lhash_lock); - - if (atomic_read(&hashinfo->lhash_users)) { - DEFINE_WAIT(wait); - - for (;;) { - prepare_to_wait_exclusive(&hashinfo->lhash_wait, - &wait, TASK_UNINTERRUPTIBLE); - if (!atomic_read(&hashinfo->lhash_users)) - break; - write_unlock_bh(&hashinfo->lhash_lock); - schedule(); - write_lock_bh(&hashinfo->lhash_lock); - } - - finish_wait(&hashinfo->lhash_wait, &wait); - } -} - /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the @@ -191,25 +162,25 @@ struct sock *__inet_lookup_listener(struct net *net, const int dif) { struct sock *sk = NULL; - const struct hlist_head *head; + struct inet_listen_hashbucket *ilb; - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; + spin_lock(&ilb->lock); + if (!hlist_empty(&ilb->head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); if (inet->num == hnum && !sk->sk_node.next && (!inet->rcv_saddr || inet->rcv_saddr == daddr) && (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) goto sherry_cache; - sk = inet_lookup_listener_slow(net, head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); } if (sk) { sherry_cache: sock_hold(sk); } - read_unlock(&hashinfo->lhash_lock); + spin_unlock(&ilb->lock); return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -389,8 +360,7 @@ EXPORT_SYMBOL_GPL(__inet_hash_nolisten); static void __inet_hash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_head *list; - rwlock_t *lock; + struct inet_listen_hashbucket *ilb; if (sk->sk_state != TCP_LISTEN) { __inet_hash_nolisten(sk); @@ -398,14 +368,12 @@ static void __inet_hash(struct sock *sk) } WARN_ON(!sk_unhashed(sk)); - list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - lock = &hashinfo->lhash_lock; + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - inet_listen_wlock(hashinfo); - __sk_add_node(sk, list); + spin_lock(&ilb->lock); + __sk_add_node(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); - wake_up(&hashinfo->lhash_wait); + spin_unlock(&ilb->lock); } void inet_hash(struct sock *sk) @@ -420,29 +388,27 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - rwlock_t *lock; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; if (sk_unhashed(sk)) - goto out; + return; if (sk->sk_state == TCP_LISTEN) { - local_bh_disable(); - inet_listen_wlock(hashinfo); - lock = &hashinfo->lhash_lock; + struct inet_listen_hashbucket *ilb; + + ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + spin_lock_bh(&ilb->lock); if (__sk_del_node_init(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(&ilb->lock); } else { - lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + write_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + write_unlock_bh(lock); } - - write_unlock_bh(lock); -out: - if (sk->sk_state == TCP_LISTEN) - wake_up(&hashinfo->lhash_wait); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -556,3 +522,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, } EXPORT_SYMBOL_GPL(inet_hash_connect); + +void inet_hashinfo_init(struct inet_hashinfo *h) +{ + int i; + + for (i = 0; i < INET_LHTABLE_SIZE; i++) + spin_lock_init(&h->listening_hash[i].lock); +} + +EXPORT_SYMBOL_GPL(inet_hashinfo_init); -- cgit From 9db66bdcc83749affe61c61eb8ff3cf08f42afec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Nov 2008 20:39:09 -0800 Subject: net: convert TCP/DCCP ehash rwlocks to spinlocks Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks. /proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'. This should speedup writers, since spin_lock()/spin_unlock() only use one atomic operation instead of two for write_lock()/write_unlock() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 377d004e5723..4c273a9981a6 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - rwlock_t *lock = inet_ehash_lockp(hinfo, hash); + spinlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_nulls_node *node; struct inet_timewait_sock *tw; - prefetch(head->chain.first); - write_lock(lock); + spin_lock(lock); /* Check TIME-WAIT sockets first. */ sk_nulls_for_each(sk2, node, &head->twchain) { @@ -308,8 +307,8 @@ unique: sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); if (twp) { *twp = tw; @@ -325,7 +324,7 @@ unique: return 0; not_unique: - write_unlock(lock); + spin_unlock(lock); return -EADDRNOTAVAIL; } @@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct hlist_nulls_head *list; - rwlock_t *lock; + spinlock_t *lock; struct inet_ehash_bucket *head; WARN_ON(!sk_unhashed(sk)); @@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk) list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock(lock); + spin_lock(lock); __sk_nulls_add_node_rcu(sk, list); + spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_hash_nolisten); @@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(&ilb->lock); } else { - rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); + spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - write_lock_bh(lock); + spin_lock_bh(lock); if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - write_unlock_bh(lock); + spin_unlock_bh(lock); } } EXPORT_SYMBOL_GPL(inet_unhash); -- cgit From c25eb3bfb97294d0543a81230fbc237046b4b84c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Nov 2008 17:22:55 -0800 Subject: net: Convert TCP/DCCP listening hash tables to use RCU This is the last step to be able to perform full RCU lookups in __inet_lookup() : After established/timewait tables, we add RCU lookups to listening hash table. The only trick here is that a socket of a given type (TCP ipv4, TCP ipv6, ...) can now flight between two different tables (established and listening) during a RCU grace period, so we must use different 'nulls' end-of-chain values for two tables. We define a large value : #define LISTENING_NULLS_BASE (1U << 29) So that slots in listening table are guaranteed to have different end-of-chain values than slots in established table. A reader can still detect it finished its lookup in the right chain. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 148 ++++++++++++++++++++++----------------------- 1 file changed, 74 insertions(+), 74 deletions(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 4c273a9981a6..11fcb87a1fdd 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -110,78 +110,79 @@ void __inet_inherit_port(struct sock *sk, struct sock *child) EXPORT_SYMBOL_GPL(__inet_inherit_port); +static inline int compute_score(struct sock *sk, struct net *net, + const unsigned short hnum, const __be32 daddr, + const int dif) +{ + int score = -1; + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && inet->num == hnum && + !ipv6_only_sock(sk)) { + __be32 rcv_saddr = inet->rcv_saddr; + score = sk->sk_family == PF_INET ? 1 : 0; + if (rcv_saddr) { + if (rcv_saddr != daddr) + return -1; + score += 2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + return -1; + score += 2; + } + } + return score; +} + /* * Don't inline this cruft. Here are some nice properties to exploit here. The * BSD API does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(struct net *net, - const struct hlist_head *head, - const __be32 daddr, - const unsigned short hnum, - const int dif) -{ - struct sock *result = NULL, *sk; - const struct hlist_node *node; - int hiscore = -1; - - sk_for_each(sk, node, head) { - const struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->num == hnum && - !ipv6_only_sock(sk)) { - const __be32 rcv_saddr = inet->rcv_saddr; - int score = sk->sk_family == PF_INET ? 1 : 0; - - if (rcv_saddr) { - if (rcv_saddr != daddr) - continue; - score += 2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score += 2; - } - if (score == 5) - return sk; - if (score > hiscore) { - hiscore = score; - result = sk; - } - } - } - return result; -} -/* Optimize the common listener case. */ + struct sock *__inet_lookup_listener(struct net *net, struct inet_hashinfo *hashinfo, const __be32 daddr, const unsigned short hnum, const int dif) { - struct sock *sk = NULL; - struct inet_listen_hashbucket *ilb; + struct sock *sk, *result; + struct hlist_nulls_node *node; + unsigned int hash = inet_lhashfn(net, hnum); + struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; + int score, hiscore; - ilb = &hashinfo->listening_hash[inet_lhashfn(net, hnum)]; - spin_lock(&ilb->lock); - if (!hlist_empty(&ilb->head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(&ilb->head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if && net_eq(sock_net(sk), net)) - goto sherry_cache; - sk = inet_lookup_listener_slow(net, &ilb->head, daddr, hnum, dif); + rcu_read_lock(); +begin: + result = NULL; + hiscore = -1; + sk_nulls_for_each_rcu(sk, node, &ilb->head) { + score = compute_score(sk, net, hnum, daddr, dif); + if (score > hiscore) { + result = sk; + hiscore = score; + } } - if (sk) { -sherry_cache: - sock_hold(sk); + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(node) != hash + LISTENING_NULLS_BASE) + goto begin; + if (result) { + if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt))) + result = NULL; + else if (unlikely(compute_score(result, net, hnum, daddr, + dif) < hiscore)) { + sock_put(result); + goto begin; + } } - spin_unlock(&ilb->lock); - return sk; + rcu_read_unlock(); + return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); @@ -370,7 +371,7 @@ static void __inet_hash(struct sock *sk) ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; spin_lock(&ilb->lock); - __sk_add_node(sk, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); spin_unlock(&ilb->lock); } @@ -388,26 +389,22 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + spinlock_t *lock; + int done; if (sk_unhashed(sk)) return; - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; + if (sk->sk_state == TCP_LISTEN) + lock = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)].lock; + else + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock_bh(&ilb->lock); - if (__sk_del_node_init(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(&ilb->lock); - } else { - spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); - - spin_lock_bh(lock); - if (__sk_nulls_del_node_init_rcu(sk)) - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock_bh(lock); - } + spin_lock_bh(lock); + done =__sk_nulls_del_node_init_rcu(sk); + spin_unlock_bh(lock); + if (done) + sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } EXPORT_SYMBOL_GPL(inet_unhash); @@ -526,8 +523,11 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; - for (i = 0; i < INET_LHTABLE_SIZE; i++) + for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, + i + LISTENING_NULLS_BASE); + } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); -- cgit From 920de804bca61f88643bc9171bcd06f1a56c6258 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 24 Nov 2008 00:09:29 -0800 Subject: net: Make sure BHs are disabled in sock_prot_inuse_add() The rule of calling sock_prot_inuse_add() is that BHs must be disabled. Some new calls were added where this was not true and this tiggers warnings as reported by Ilpo. Fix this by adding explicit BH disabling around those call sites, or moving sock_prot_inuse_add() call inside an existing BH disabled section. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/inet_hashtables.c') diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 11fcb87a1fdd..6a1045da48d2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -402,9 +402,9 @@ void inet_unhash(struct sock *sk) spin_lock_bh(lock); done =__sk_nulls_del_node_init_rcu(sk); - spin_unlock_bh(lock); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + spin_unlock_bh(lock); } EXPORT_SYMBOL_GPL(inet_unhash); -- cgit