1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16#include <linux/module.h>
17#include <linux/random.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/wait.h>
21#include <linux/vmalloc.h>
22#include <linux/memblock.h>
23
24#include <net/addrconf.h>
25#include <net/inet_connection_sock.h>
26#include <net/inet_hashtables.h>
27#if IS_ENABLED(CONFIG_IPV6)
28#include <net/inet6_hashtables.h>
29#endif
30#include <net/secure_seq.h>
31#include <net/ip.h>
32#include <net/tcp.h>
33#include <net/sock_reuseport.h>
34
35static u32 inet_ehashfn(const struct net *net, const __be32 laddr,
36 const __u16 lport, const __be32 faddr,
37 const __be16 fport)
38{
39 static u32 inet_ehash_secret __read_mostly;
40
41 net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret));
42
43 return __inet_ehashfn(laddr, lport, faddr, fport,
44 inet_ehash_secret + net_hash_mix(net));
45}
46
47
48
49
50static u32 sk_ehashfn(const struct sock *sk)
51{
52#if IS_ENABLED(CONFIG_IPV6)
53 if (sk->sk_family == AF_INET6 &&
54 !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
55 return inet6_ehashfn(sock_net(sk),
56 &sk->sk_v6_rcv_saddr, sk->sk_num,
57 &sk->sk_v6_daddr, sk->sk_dport);
58#endif
59 return inet_ehashfn(sock_net(sk),
60 sk->sk_rcv_saddr, sk->sk_num,
61 sk->sk_daddr, sk->sk_dport);
62}
63
64
65
66
67
68struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
69 struct net *net,
70 struct inet_bind_hashbucket *head,
71 const unsigned short snum,
72 int l3mdev)
73{
74 struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
75
76 if (tb) {
77 write_pnet(&tb->ib_net, net);
78 tb->l3mdev = l3mdev;
79 tb->port = snum;
80 tb->fastreuse = 0;
81 tb->fastreuseport = 0;
82 INIT_HLIST_HEAD(&tb->owners);
83 hlist_add_head(&tb->node, &head->chain);
84 }
85 return tb;
86}
87
88
89
90
91void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
92{
93 if (hlist_empty(&tb->owners)) {
94 __hlist_del(&tb->node);
95 kmem_cache_free(cachep, tb);
96 }
97}
98
99void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
100 const unsigned short snum)
101{
102 inet_sk(sk)->inet_num = snum;
103 sk_add_bind_node(sk, &tb->owners);
104 inet_csk(sk)->icsk_bind_hash = tb;
105}
106
107
108
109
110static void __inet_put_port(struct sock *sk)
111{
112 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
113 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
114 hashinfo->bhash_size);
115 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
116 struct inet_bind_bucket *tb;
117
118 spin_lock(&head->lock);
119 tb = inet_csk(sk)->icsk_bind_hash;
120 __sk_del_bind_node(sk);
121 inet_csk(sk)->icsk_bind_hash = NULL;
122 inet_sk(sk)->inet_num = 0;
123 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
124 spin_unlock(&head->lock);
125}
126
127void inet_put_port(struct sock *sk)
128{
129 local_bh_disable();
130 __inet_put_port(sk);
131 local_bh_enable();
132}
133EXPORT_SYMBOL(inet_put_port);
134
135int __inet_inherit_port(const struct sock *sk, struct sock *child)
136{
137 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
138 unsigned short port = inet_sk(child)->inet_num;
139 const int bhash = inet_bhashfn(sock_net(sk), port,
140 table->bhash_size);
141 struct inet_bind_hashbucket *head = &table->bhash[bhash];
142 struct inet_bind_bucket *tb;
143 int l3mdev;
144
145 spin_lock(&head->lock);
146 tb = inet_csk(sk)->icsk_bind_hash;
147 if (unlikely(!tb)) {
148 spin_unlock(&head->lock);
149 return -ENOENT;
150 }
151 if (tb->port != port) {
152 l3mdev = inet_sk_bound_l3mdev(sk);
153
154
155
156
157
158
159 inet_bind_bucket_for_each(tb, &head->chain) {
160 if (net_eq(ib_net(tb), sock_net(sk)) &&
161 tb->l3mdev == l3mdev && tb->port == port)
162 break;
163 }
164 if (!tb) {
165 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
166 sock_net(sk), head, port,
167 l3mdev);
168 if (!tb) {
169 spin_unlock(&head->lock);
170 return -ENOMEM;
171 }
172 }
173 inet_csk_update_fastreuse(tb, child);
174 }
175 inet_bind_hash(child, tb, port);
176 spin_unlock(&head->lock);
177
178 return 0;
179}
180EXPORT_SYMBOL_GPL(__inet_inherit_port);
181
182static struct inet_listen_hashbucket *
183inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk)
184{
185 u32 hash;
186
187#if IS_ENABLED(CONFIG_IPV6)
188 if (sk->sk_family == AF_INET6)
189 hash = ipv6_portaddr_hash(sock_net(sk),
190 &sk->sk_v6_rcv_saddr,
191 inet_sk(sk)->inet_num);
192 else
193#endif
194 hash = ipv4_portaddr_hash(sock_net(sk),
195 inet_sk(sk)->inet_rcv_saddr,
196 inet_sk(sk)->inet_num);
197 return inet_lhash2_bucket(h, hash);
198}
199
200static void inet_hash2(struct inet_hashinfo *h, struct sock *sk)
201{
202 struct inet_listen_hashbucket *ilb2;
203
204 if (!h->lhash2)
205 return;
206
207 ilb2 = inet_lhash2_bucket_sk(h, sk);
208
209 spin_lock(&ilb2->lock);
210 if (sk->sk_reuseport && sk->sk_family == AF_INET6)
211 hlist_add_tail_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
212 &ilb2->head);
213 else
214 hlist_add_head_rcu(&inet_csk(sk)->icsk_listen_portaddr_node,
215 &ilb2->head);
216 ilb2->count++;
217 spin_unlock(&ilb2->lock);
218}
219
220static void inet_unhash2(struct inet_hashinfo *h, struct sock *sk)
221{
222 struct inet_listen_hashbucket *ilb2;
223
224 if (!h->lhash2 ||
225 WARN_ON_ONCE(hlist_unhashed(&inet_csk(sk)->icsk_listen_portaddr_node)))
226 return;
227
228 ilb2 = inet_lhash2_bucket_sk(h, sk);
229
230 spin_lock(&ilb2->lock);
231 hlist_del_init_rcu(&inet_csk(sk)->icsk_listen_portaddr_node);
232 ilb2->count--;
233 spin_unlock(&ilb2->lock);
234}
235
236static inline int compute_score(struct sock *sk, struct net *net,
237 const unsigned short hnum, const __be32 daddr,
238 const int dif, const int sdif)
239{
240 int score = -1;
241 struct inet_sock *inet = inet_sk(sk);
242 bool dev_match;
243
244 if (net_eq(sock_net(sk), net) && inet->inet_num == hnum &&
245 !ipv6_only_sock(sk)) {
246 __be32 rcv_saddr = inet->inet_rcv_saddr;
247 score = sk->sk_family == PF_INET ? 2 : 1;
248 if (rcv_saddr) {
249 if (rcv_saddr != daddr)
250 return -1;
251 score += 4;
252 }
253 dev_match = inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if,
254 dif, sdif);
255 if (!dev_match)
256 return -1;
257 score += 4;
258
259 if (sk->sk_incoming_cpu == raw_smp_processor_id())
260 score++;
261 }
262 return score;
263}
264
265static inline struct sock *lookup_reuseport(struct net *net, struct sock *sk,
266 struct sk_buff *skb, int doff,
267 __be32 saddr, __be16 sport,
268 __be32 daddr, unsigned short hnum)
269{
270 struct sock *reuse_sk = NULL;
271 u32 phash;
272
273 if (sk->sk_reuseport) {
274 phash = inet_ehashfn(net, daddr, hnum, saddr, sport);
275 reuse_sk = reuseport_select_sock(sk, phash, skb, doff);
276 }
277 return reuse_sk;
278}
279
280
281
282
283
284
285
286
287
288static struct sock *inet_lhash2_lookup(struct net *net,
289 struct inet_listen_hashbucket *ilb2,
290 struct sk_buff *skb, int doff,
291 const __be32 saddr, __be16 sport,
292 const __be32 daddr, const unsigned short hnum,
293 const int dif, const int sdif)
294{
295 struct inet_connection_sock *icsk;
296 struct sock *sk, *result = NULL;
297 int score, hiscore = 0;
298
299 inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
300 sk = (struct sock *)icsk;
301 score = compute_score(sk, net, hnum, daddr, dif, sdif);
302 if (score > hiscore) {
303 result = lookup_reuseport(net, sk, skb, doff,
304 saddr, sport, daddr, hnum);
305 if (result)
306 return result;
307
308 result = sk;
309 hiscore = score;
310 }
311 }
312
313 return result;
314}
315
316static inline struct sock *inet_lookup_run_bpf(struct net *net,
317 struct inet_hashinfo *hashinfo,
318 struct sk_buff *skb, int doff,
319 __be32 saddr, __be16 sport,
320 __be32 daddr, u16 hnum)
321{
322 struct sock *sk, *reuse_sk;
323 bool no_reuseport;
324
325 if (hashinfo != &tcp_hashinfo)
326 return NULL;
327
328 no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP,
329 saddr, sport, daddr, hnum, &sk);
330 if (no_reuseport || IS_ERR_OR_NULL(sk))
331 return sk;
332
333 reuse_sk = lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum);
334 if (reuse_sk)
335 sk = reuse_sk;
336 return sk;
337}
338
339struct sock *__inet_lookup_listener(struct net *net,
340 struct inet_hashinfo *hashinfo,
341 struct sk_buff *skb, int doff,
342 const __be32 saddr, __be16 sport,
343 const __be32 daddr, const unsigned short hnum,
344 const int dif, const int sdif)
345{
346 unsigned int hash = inet_lhashfn(net, hnum);
347 struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
348 struct inet_listen_hashbucket *ilb2;
349 struct sock *sk, *result = NULL;
350 int score, hiscore = 0;
351 unsigned int hash2;
352 u32 phash = 0;
353
354
355 if (static_branch_unlikely(&bpf_sk_lookup_enabled)) {
356 result = inet_lookup_run_bpf(net, hashinfo, skb, doff,
357 saddr, sport, daddr, hnum);
358 if (result)
359 goto done;
360 }
361
362 if (ilb->count <= 10 || !hashinfo->lhash2)
363 goto port_lookup;
364
365
366
367
368
369 hash2 = ipv4_portaddr_hash(net, daddr, hnum);
370 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
371 if (ilb2->count > ilb->count)
372 goto port_lookup;
373
374 result = inet_lhash2_lookup(net, ilb2, skb, doff,
375 saddr, sport, daddr, hnum,
376 dif, sdif);
377 if (result)
378 goto done;
379
380
381
382 hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
383 ilb2 = inet_lhash2_bucket(hashinfo, hash2);
384 if (ilb2->count > ilb->count)
385 goto port_lookup;
386
387 result = inet_lhash2_lookup(net, ilb2, skb, doff,
388 saddr, sport, daddr, hnum,
389 dif, sdif);
390 goto done;
391
392port_lookup:
393 sk_for_each_rcu(sk, &ilb->head) {
394 score = compute_score(sk, net, hnum, daddr,
395 dif, sdif);
396 if (score > hiscore) {
397 if (sk->sk_reuseport) {
398 phash = inet_ehashfn(net, daddr, hnum,
399 saddr, sport);
400 result = reuseport_select_sock(sk, phash,
401 skb, doff);
402 if (result)
403 goto done;
404 }
405 result = sk;
406 hiscore = score;
407 }
408 }
409done:
410 if (unlikely(IS_ERR(result)))
411 return NULL;
412 return result;
413}
414EXPORT_SYMBOL_GPL(__inet_lookup_listener);
415
416
417void sock_gen_put(struct sock *sk)
418{
419 if (!refcount_dec_and_test(&sk->sk_refcnt))
420 return;
421
422 if (sk->sk_state == TCP_TIME_WAIT)
423 inet_twsk_free(inet_twsk(sk));
424 else if (sk->sk_state == TCP_NEW_SYN_RECV)
425 reqsk_free(inet_reqsk(sk));
426 else
427 sk_free(sk);
428}
429EXPORT_SYMBOL_GPL(sock_gen_put);
430
431void sock_edemux(struct sk_buff *skb)
432{
433 sock_gen_put(skb->sk);
434}
435EXPORT_SYMBOL(sock_edemux);
436
437struct sock *__inet_lookup_established(struct net *net,
438 struct inet_hashinfo *hashinfo,
439 const __be32 saddr, const __be16 sport,
440 const __be32 daddr, const u16 hnum,
441 const int dif, const int sdif)
442{
443 INET_ADDR_COOKIE(acookie, saddr, daddr);
444 const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
445 struct sock *sk;
446 const struct hlist_nulls_node *node;
447
448
449
450 unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
451 unsigned int slot = hash & hashinfo->ehash_mask;
452 struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
453
454begin:
455 sk_nulls_for_each_rcu(sk, node, &head->chain) {
456 if (sk->sk_hash != hash)
457 continue;
458 if (likely(INET_MATCH(sk, net, acookie,
459 saddr, daddr, ports, dif, sdif))) {
460 if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
461 goto out;
462 if (unlikely(!INET_MATCH(sk, net, acookie,
463 saddr, daddr, ports,
464 dif, sdif))) {
465 sock_gen_put(sk);
466 goto begin;
467 }
468 goto found;
469 }
470 }
471
472
473
474
475
476 if (get_nulls_value(node) != slot)
477 goto begin;
478out:
479 sk = NULL;
480found:
481 return sk;
482}
483EXPORT_SYMBOL_GPL(__inet_lookup_established);
484
485
486static int __inet_check_established(struct inet_timewait_death_row *death_row,
487 struct sock *sk, __u16 lport,
488 struct inet_timewait_sock **twp)
489{
490 struct inet_hashinfo *hinfo = death_row->hashinfo;
491 struct inet_sock *inet = inet_sk(sk);
492 __be32 daddr = inet->inet_rcv_saddr;
493 __be32 saddr = inet->inet_daddr;
494 int dif = sk->sk_bound_dev_if;
495 struct net *net = sock_net(sk);
496 int sdif = l3mdev_master_ifindex_by_index(net, dif);
497 INET_ADDR_COOKIE(acookie, saddr, daddr);
498 const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
499 unsigned int hash = inet_ehashfn(net, daddr, lport,
500 saddr, inet->inet_dport);
501 struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
502 spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
503 struct sock *sk2;
504 const struct hlist_nulls_node *node;
505 struct inet_timewait_sock *tw = NULL;
506
507 spin_lock(lock);
508
509 sk_nulls_for_each(sk2, node, &head->chain) {
510 if (sk2->sk_hash != hash)
511 continue;
512
513 if (likely(INET_MATCH(sk2, net, acookie,
514 saddr, daddr, ports, dif, sdif))) {
515 if (sk2->sk_state == TCP_TIME_WAIT) {
516 tw = inet_twsk(sk2);
517 if (twsk_unique(sk, sk2, twp))
518 break;
519 }
520 goto not_unique;
521 }
522 }
523
524
525
526
527 inet->inet_num = lport;
528 inet->inet_sport = htons(lport);
529 sk->sk_hash = hash;
530 WARN_ON(!sk_unhashed(sk));
531 __sk_nulls_add_node_rcu(sk, &head->chain);
532 if (tw) {
533 sk_nulls_del_node_init_rcu((struct sock *)tw);
534 __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
535 }
536 spin_unlock(lock);
537 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
538
539 if (twp) {
540 *twp = tw;
541 } else if (tw) {
542
543 inet_twsk_deschedule_put(tw);
544 }
545 return 0;
546
547not_unique:
548 spin_unlock(lock);
549 return -EADDRNOTAVAIL;
550}
551
552static u32 inet_sk_port_offset(const struct sock *sk)
553{
554 const struct inet_sock *inet = inet_sk(sk);
555
556 return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,
557 inet->inet_daddr,
558 inet->inet_dport);
559}
560
561
562
563
564static bool inet_ehash_lookup_by_sk(struct sock *sk,
565 struct hlist_nulls_head *list)
566{
567 const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
568 const int sdif = sk->sk_bound_dev_if;
569 const int dif = sk->sk_bound_dev_if;
570 const struct hlist_nulls_node *node;
571 struct net *net = sock_net(sk);
572 struct sock *esk;
573
574 INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
575
576 sk_nulls_for_each_rcu(esk, node, list) {
577 if (esk->sk_hash != sk->sk_hash)
578 continue;
579 if (sk->sk_family == AF_INET) {
580 if (unlikely(INET_MATCH(esk, net, acookie,
581 sk->sk_daddr,
582 sk->sk_rcv_saddr,
583 ports, dif, sdif))) {
584 return true;
585 }
586 }
587#if IS_ENABLED(CONFIG_IPV6)
588 else if (sk->sk_family == AF_INET6) {
589 if (unlikely(INET6_MATCH(esk, net,
590 &sk->sk_v6_daddr,
591 &sk->sk_v6_rcv_saddr,
592 ports, dif, sdif))) {
593 return true;
594 }
595 }
596#endif
597 }
598 return false;
599}
600
601
602
603
604
605
606bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
607{
608 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
609 struct hlist_nulls_head *list;
610 struct inet_ehash_bucket *head;
611 spinlock_t *lock;
612 bool ret = true;
613
614 WARN_ON_ONCE(!sk_unhashed(sk));
615
616 sk->sk_hash = sk_ehashfn(sk);
617 head = inet_ehash_bucket(hashinfo, sk->sk_hash);
618 list = &head->chain;
619 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
620
621 spin_lock(lock);
622 if (osk) {
623 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
624 ret = sk_nulls_del_node_init_rcu(osk);
625 } else if (found_dup_sk) {
626 *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
627 if (*found_dup_sk)
628 ret = false;
629 }
630
631 if (ret)
632 __sk_nulls_add_node_rcu(sk, list);
633
634 spin_unlock(lock);
635
636 return ret;
637}
638
639bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
640{
641 bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
642
643 if (ok) {
644 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
645 } else {
646 percpu_counter_inc(sk->sk_prot->orphan_count);
647 inet_sk_set_state(sk, TCP_CLOSE);
648 sock_set_flag(sk, SOCK_DEAD);
649 inet_csk_destroy_sock(sk);
650 }
651 return ok;
652}
653EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
654
655static int inet_reuseport_add_sock(struct sock *sk,
656 struct inet_listen_hashbucket *ilb)
657{
658 struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
659 struct sock *sk2;
660 kuid_t uid = sock_i_uid(sk);
661
662 sk_for_each_rcu(sk2, &ilb->head) {
663 if (sk2 != sk &&
664 sk2->sk_family == sk->sk_family &&
665 ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
666 sk2->sk_bound_dev_if == sk->sk_bound_dev_if &&
667 inet_csk(sk2)->icsk_bind_hash == tb &&
668 sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) &&
669 inet_rcv_saddr_equal(sk, sk2, false))
670 return reuseport_add_sock(sk, sk2,
671 inet_rcv_saddr_any(sk));
672 }
673
674 return reuseport_alloc(sk, inet_rcv_saddr_any(sk));
675}
676
677int __inet_hash(struct sock *sk, struct sock *osk)
678{
679 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
680 struct inet_listen_hashbucket *ilb;
681 int err = 0;
682
683 if (sk->sk_state != TCP_LISTEN) {
684 inet_ehash_nolisten(sk, osk, NULL);
685 return 0;
686 }
687 WARN_ON(!sk_unhashed(sk));
688 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
689
690 spin_lock(&ilb->lock);
691 if (sk->sk_reuseport) {
692 err = inet_reuseport_add_sock(sk, ilb);
693 if (err)
694 goto unlock;
695 }
696 if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
697 sk->sk_family == AF_INET6)
698 hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
699 else
700 hlist_add_head_rcu(&sk->sk_node, &ilb->head);
701 inet_hash2(hashinfo, sk);
702 ilb->count++;
703 sock_set_flag(sk, SOCK_RCU_FREE);
704 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
705unlock:
706 spin_unlock(&ilb->lock);
707
708 return err;
709}
710EXPORT_SYMBOL(__inet_hash);
711
712int inet_hash(struct sock *sk)
713{
714 int err = 0;
715
716 if (sk->sk_state != TCP_CLOSE) {
717 local_bh_disable();
718 err = __inet_hash(sk, NULL);
719 local_bh_enable();
720 }
721
722 return err;
723}
724EXPORT_SYMBOL_GPL(inet_hash);
725
726void inet_unhash(struct sock *sk)
727{
728 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
729 struct inet_listen_hashbucket *ilb = NULL;
730 spinlock_t *lock;
731
732 if (sk_unhashed(sk))
733 return;
734
735 if (sk->sk_state == TCP_LISTEN) {
736 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
737 lock = &ilb->lock;
738 } else {
739 lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
740 }
741 spin_lock_bh(lock);
742 if (sk_unhashed(sk))
743 goto unlock;
744
745 if (rcu_access_pointer(sk->sk_reuseport_cb))
746 reuseport_detach_sock(sk);
747 if (ilb) {
748 inet_unhash2(hashinfo, sk);
749 __sk_del_node_init(sk);
750 ilb->count--;
751 } else {
752 __sk_nulls_del_node_init_rcu(sk);
753 }
754 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
755unlock:
756 spin_unlock_bh(lock);
757}
758EXPORT_SYMBOL_GPL(inet_unhash);
759
760int __inet_hash_connect(struct inet_timewait_death_row *death_row,
761 struct sock *sk, u32 port_offset,
762 int (*check_established)(struct inet_timewait_death_row *,
763 struct sock *, __u16, struct inet_timewait_sock **))
764{
765 struct inet_hashinfo *hinfo = death_row->hashinfo;
766 struct inet_timewait_sock *tw = NULL;
767 struct inet_bind_hashbucket *head;
768 int port = inet_sk(sk)->inet_num;
769 struct net *net = sock_net(sk);
770 struct inet_bind_bucket *tb;
771 u32 remaining, offset;
772 int ret, i, low, high;
773 static u32 hint;
774 int l3mdev;
775
776 if (port) {
777 head = &hinfo->bhash[inet_bhashfn(net, port,
778 hinfo->bhash_size)];
779 tb = inet_csk(sk)->icsk_bind_hash;
780 spin_lock_bh(&head->lock);
781 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
782 inet_ehash_nolisten(sk, NULL, NULL);
783 spin_unlock_bh(&head->lock);
784 return 0;
785 }
786 spin_unlock(&head->lock);
787
788 ret = check_established(death_row, sk, port, NULL);
789 local_bh_enable();
790 return ret;
791 }
792
793 l3mdev = inet_sk_bound_l3mdev(sk);
794
795 inet_get_local_port_range(net, &low, &high);
796 high++;
797 remaining = high - low;
798 if (likely(remaining > 1))
799 remaining &= ~1U;
800
801 offset = (hint + port_offset) % remaining;
802
803
804
805 offset &= ~1U;
806other_parity_scan:
807 port = low + offset;
808 for (i = 0; i < remaining; i += 2, port += 2) {
809 if (unlikely(port >= high))
810 port -= remaining;
811 if (inet_is_local_reserved_port(net, port))
812 continue;
813 head = &hinfo->bhash[inet_bhashfn(net, port,
814 hinfo->bhash_size)];
815 spin_lock_bh(&head->lock);
816
817
818
819
820 inet_bind_bucket_for_each(tb, &head->chain) {
821 if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
822 tb->port == port) {
823 if (tb->fastreuse >= 0 ||
824 tb->fastreuseport >= 0)
825 goto next_port;
826 WARN_ON(hlist_empty(&tb->owners));
827 if (!check_established(death_row, sk,
828 port, &tw))
829 goto ok;
830 goto next_port;
831 }
832 }
833
834 tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
835 net, head, port, l3mdev);
836 if (!tb) {
837 spin_unlock_bh(&head->lock);
838 return -ENOMEM;
839 }
840 tb->fastreuse = -1;
841 tb->fastreuseport = -1;
842 goto ok;
843next_port:
844 spin_unlock_bh(&head->lock);
845 cond_resched();
846 }
847
848 offset++;
849 if ((offset & 1) && remaining > 1)
850 goto other_parity_scan;
851
852 return -EADDRNOTAVAIL;
853
854ok:
855 hint += i + 2;
856
857
858 inet_bind_hash(sk, tb, port);
859 if (sk_unhashed(sk)) {
860 inet_sk(sk)->inet_sport = htons(port);
861 inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
862 }
863 if (tw)
864 inet_twsk_bind_unhash(tw, hinfo);
865 spin_unlock(&head->lock);
866 if (tw)
867 inet_twsk_deschedule_put(tw);
868 local_bh_enable();
869 return 0;
870}
871
872
873
874
875int inet_hash_connect(struct inet_timewait_death_row *death_row,
876 struct sock *sk)
877{
878 u32 port_offset = 0;
879
880 if (!inet_sk(sk)->inet_num)
881 port_offset = inet_sk_port_offset(sk);
882 return __inet_hash_connect(death_row, sk, port_offset,
883 __inet_check_established);
884}
885EXPORT_SYMBOL_GPL(inet_hash_connect);
886
887void inet_hashinfo_init(struct inet_hashinfo *h)
888{
889 int i;
890
891 for (i = 0; i < INET_LHTABLE_SIZE; i++) {
892 spin_lock_init(&h->listening_hash[i].lock);
893 INIT_HLIST_HEAD(&h->listening_hash[i].head);
894 h->listening_hash[i].count = 0;
895 }
896
897 h->lhash2 = NULL;
898}
899EXPORT_SYMBOL_GPL(inet_hashinfo_init);
900
901void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
902 unsigned long numentries, int scale,
903 unsigned long low_limit,
904 unsigned long high_limit)
905{
906 unsigned int i;
907
908 h->lhash2 = alloc_large_system_hash(name,
909 sizeof(*h->lhash2),
910 numentries,
911 scale,
912 0,
913 NULL,
914 &h->lhash2_mask,
915 low_limit,
916 high_limit);
917
918 for (i = 0; i <= h->lhash2_mask; i++) {
919 spin_lock_init(&h->lhash2[i].lock);
920 INIT_HLIST_HEAD(&h->lhash2[i].head);
921 h->lhash2[i].count = 0;
922 }
923}
924
925int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
926{
927 unsigned int locksz = sizeof(spinlock_t);
928 unsigned int i, nblocks = 1;
929
930 if (locksz != 0) {
931
932 nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U);
933 nblocks = roundup_pow_of_two(nblocks * num_possible_cpus());
934
935
936 nblocks = min(nblocks, hashinfo->ehash_mask + 1);
937
938 hashinfo->ehash_locks = kvmalloc_array(nblocks, locksz, GFP_KERNEL);
939 if (!hashinfo->ehash_locks)
940 return -ENOMEM;
941
942 for (i = 0; i < nblocks; i++)
943 spin_lock_init(&hashinfo->ehash_locks[i]);
944 }
945 hashinfo->ehash_locks_mask = nblocks - 1;
946 return 0;
947}
948EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
949