1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/errqueue.h>
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/sched/mm.h>
106#include <linux/timer.h>
107#include <linux/string.h>
108#include <linux/sockios.h>
109#include <linux/net.h>
110#include <linux/mm.h>
111#include <linux/slab.h>
112#include <linux/interrupt.h>
113#include <linux/poll.h>
114#include <linux/tcp.h>
115#include <linux/init.h>
116#include <linux/highmem.h>
117#include <linux/user_namespace.h>
118#include <linux/static_key.h>
119#include <linux/memcontrol.h>
120#include <linux/prefetch.h>
121
122#include <linux/uaccess.h>
123
124#include <linux/netdevice.h>
125#include <net/protocol.h>
126#include <linux/skbuff.h>
127#include <net/net_namespace.h>
128#include <net/request_sock.h>
129#include <net/sock.h>
130#include <linux/net_tstamp.h>
131#include <net/xfrm.h>
132#include <linux/ipsec.h>
133#include <net/cls_cgroup.h>
134#include <net/netprio_cgroup.h>
135#include <linux/sock_diag.h>
136
137#include <linux/filter.h>
138#include <net/sock_reuseport.h>
139
140#include <trace/events/sock.h>
141
142#include <net/tcp.h>
143#include <net/busy_poll.h>
144
145static DEFINE_MUTEX(proto_list_mutex);
146static LIST_HEAD(proto_list);
147
148
149
150
151
152
153
154
155
156
157
158bool sk_ns_capable(const struct sock *sk,
159 struct user_namespace *user_ns, int cap)
160{
161 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
162 ns_capable(user_ns, cap);
163}
164EXPORT_SYMBOL(sk_ns_capable);
165
166
167
168
169
170
171
172
173
174
175bool sk_capable(const struct sock *sk, int cap)
176{
177 return sk_ns_capable(sk, &init_user_ns, cap);
178}
179EXPORT_SYMBOL(sk_capable);
180
181
182
183
184
185
186
187
188
189
190bool sk_net_capable(const struct sock *sk, int cap)
191{
192 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
193}
194EXPORT_SYMBOL(sk_net_capable);
195
196
197
198
199
200
201static struct lock_class_key af_family_keys[AF_MAX];
202static struct lock_class_key af_family_kern_keys[AF_MAX];
203static struct lock_class_key af_family_slock_keys[AF_MAX];
204static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
205
206
207
208
209
210
211
212#define _sock_locks(x) \
213 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
214 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
215 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
216 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
217 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
218 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
219 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
220 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
221 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
222 x "27" , x "28" , x "AF_CAN" , \
223 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
224 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
225 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
226 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
227 x "AF_QIPCRTR", x "AF_SMC" , x "AF_MAX"
228
229static const char *const af_family_key_strings[AF_MAX+1] = {
230 _sock_locks("sk_lock-")
231};
232static const char *const af_family_slock_key_strings[AF_MAX+1] = {
233 _sock_locks("slock-")
234};
235static const char *const af_family_clock_key_strings[AF_MAX+1] = {
236 _sock_locks("clock-")
237};
238
239static const char *const af_family_kern_key_strings[AF_MAX+1] = {
240 _sock_locks("k-sk_lock-")
241};
242static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
243 _sock_locks("k-slock-")
244};
245static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
246 _sock_locks("k-clock-")
247};
248static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
249 "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
250 "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
251 "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
252 "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
253 "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
254 "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
255 "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
256 "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
257 "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
258 "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
259 "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
260 "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
261 "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
262 "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
263 "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_MAX"
264};
265static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
266 "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
267 "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
268 "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
269 "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
270 "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
271 "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
272 "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
273 "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
274 "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
275 "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
276 "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
277 "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
278 "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
279 "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
280 "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_MAX"
281};
282static const char *const af_family_elock_key_strings[AF_MAX+1] = {
283 "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
284 "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
285 "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
286 "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
287 "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
288 "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
289 "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
290 "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
291 "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
292 "elock-27" , "elock-28" , "elock-AF_CAN" ,
293 "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
294 "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
295 "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
296 "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
297 "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_MAX"
298};
299
300
301
302
303
304static struct lock_class_key af_callback_keys[AF_MAX];
305static struct lock_class_key af_rlock_keys[AF_MAX];
306static struct lock_class_key af_wlock_keys[AF_MAX];
307static struct lock_class_key af_elock_keys[AF_MAX];
308static struct lock_class_key af_kern_callback_keys[AF_MAX];
309
310
311
312
313
314
315#define _SK_MEM_PACKETS 256
316#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
317#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
318#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
319
320
321__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
322EXPORT_SYMBOL(sysctl_wmem_max);
323__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
324EXPORT_SYMBOL(sysctl_rmem_max);
325__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
326__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
327
328
329int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
330EXPORT_SYMBOL(sysctl_optmem_max);
331
332int sysctl_tstamp_allow_data __read_mostly = 1;
333
334struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
335EXPORT_SYMBOL_GPL(memalloc_socks);
336
337
338
339
340
341
342
343
344
345void sk_set_memalloc(struct sock *sk)
346{
347 sock_set_flag(sk, SOCK_MEMALLOC);
348 sk->sk_allocation |= __GFP_MEMALLOC;
349 static_key_slow_inc(&memalloc_socks);
350}
351EXPORT_SYMBOL_GPL(sk_set_memalloc);
352
353void sk_clear_memalloc(struct sock *sk)
354{
355 sock_reset_flag(sk, SOCK_MEMALLOC);
356 sk->sk_allocation &= ~__GFP_MEMALLOC;
357 static_key_slow_dec(&memalloc_socks);
358
359
360
361
362
363
364
365
366 sk_mem_reclaim(sk);
367}
368EXPORT_SYMBOL_GPL(sk_clear_memalloc);
369
370int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
371{
372 int ret;
373 unsigned int noreclaim_flag;
374
375
376 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
377
378 noreclaim_flag = memalloc_noreclaim_save();
379 ret = sk->sk_backlog_rcv(sk, skb);
380 memalloc_noreclaim_restore(noreclaim_flag);
381
382 return ret;
383}
384EXPORT_SYMBOL(__sk_backlog_rcv);
385
386static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
387{
388 struct timeval tv;
389
390 if (optlen < sizeof(tv))
391 return -EINVAL;
392 if (copy_from_user(&tv, optval, sizeof(tv)))
393 return -EFAULT;
394 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
395 return -EDOM;
396
397 if (tv.tv_sec < 0) {
398 static int warned __read_mostly;
399
400 *timeo_p = 0;
401 if (warned < 10 && net_ratelimit()) {
402 warned++;
403 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
404 __func__, current->comm, task_pid_nr(current));
405 }
406 return 0;
407 }
408 *timeo_p = MAX_SCHEDULE_TIMEOUT;
409 if (tv.tv_sec == 0 && tv.tv_usec == 0)
410 return 0;
411 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
412 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
413 return 0;
414}
415
416static void sock_warn_obsolete_bsdism(const char *name)
417{
418 static int warned;
419 static char warncomm[TASK_COMM_LEN];
420 if (strcmp(warncomm, current->comm) && warned < 5) {
421 strcpy(warncomm, current->comm);
422 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
423 warncomm, name);
424 warned++;
425 }
426}
427
428static bool sock_needs_netstamp(const struct sock *sk)
429{
430 switch (sk->sk_family) {
431 case AF_UNSPEC:
432 case AF_UNIX:
433 return false;
434 default:
435 return true;
436 }
437}
438
439static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
440{
441 if (sk->sk_flags & flags) {
442 sk->sk_flags &= ~flags;
443 if (sock_needs_netstamp(sk) &&
444 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
445 net_disable_timestamp();
446 }
447}
448
449
450int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
451{
452 unsigned long flags;
453 struct sk_buff_head *list = &sk->sk_receive_queue;
454
455 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
456 atomic_inc(&sk->sk_drops);
457 trace_sock_rcvqueue_full(sk, skb);
458 return -ENOMEM;
459 }
460
461 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
462 atomic_inc(&sk->sk_drops);
463 return -ENOBUFS;
464 }
465
466 skb->dev = NULL;
467 skb_set_owner_r(skb, sk);
468
469
470
471
472 skb_dst_force(skb);
473
474 spin_lock_irqsave(&list->lock, flags);
475 sock_skb_set_dropcount(sk, skb);
476 __skb_queue_tail(list, skb);
477 spin_unlock_irqrestore(&list->lock, flags);
478
479 if (!sock_flag(sk, SOCK_DEAD))
480 sk->sk_data_ready(sk);
481 return 0;
482}
483EXPORT_SYMBOL(__sock_queue_rcv_skb);
484
485int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
486{
487 int err;
488
489 err = sk_filter(sk, skb);
490 if (err)
491 return err;
492
493 return __sock_queue_rcv_skb(sk, skb);
494}
495EXPORT_SYMBOL(sock_queue_rcv_skb);
496
497int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
498 const int nested, unsigned int trim_cap, bool refcounted)
499{
500 int rc = NET_RX_SUCCESS;
501
502 if (sk_filter_trim_cap(sk, skb, trim_cap))
503 goto discard_and_relse;
504
505 skb->dev = NULL;
506
507 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
508 atomic_inc(&sk->sk_drops);
509 goto discard_and_relse;
510 }
511 if (nested)
512 bh_lock_sock_nested(sk);
513 else
514 bh_lock_sock(sk);
515 if (!sock_owned_by_user(sk)) {
516
517
518
519 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
520
521 rc = sk_backlog_rcv(sk, skb);
522
523 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
524 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
525 bh_unlock_sock(sk);
526 atomic_inc(&sk->sk_drops);
527 goto discard_and_relse;
528 }
529
530 bh_unlock_sock(sk);
531out:
532 if (refcounted)
533 sock_put(sk);
534 return rc;
535discard_and_relse:
536 kfree_skb(skb);
537 goto out;
538}
539EXPORT_SYMBOL(__sk_receive_skb);
540
541struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
542{
543 struct dst_entry *dst = __sk_dst_get(sk);
544
545 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
546 sk_tx_queue_clear(sk);
547 sk->sk_dst_pending_confirm = 0;
548 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
549 dst_release(dst);
550 return NULL;
551 }
552
553 return dst;
554}
555EXPORT_SYMBOL(__sk_dst_check);
556
557struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
558{
559 struct dst_entry *dst = sk_dst_get(sk);
560
561 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
562 sk_dst_reset(sk);
563 dst_release(dst);
564 return NULL;
565 }
566
567 return dst;
568}
569EXPORT_SYMBOL(sk_dst_check);
570
571static int sock_setbindtodevice(struct sock *sk, char __user *optval,
572 int optlen)
573{
574 int ret = -ENOPROTOOPT;
575#ifdef CONFIG_NETDEVICES
576 struct net *net = sock_net(sk);
577 char devname[IFNAMSIZ];
578 int index;
579
580
581 ret = -EPERM;
582 if (!ns_capable(net->user_ns, CAP_NET_RAW))
583 goto out;
584
585 ret = -EINVAL;
586 if (optlen < 0)
587 goto out;
588
589
590
591
592
593
594 if (optlen > IFNAMSIZ - 1)
595 optlen = IFNAMSIZ - 1;
596 memset(devname, 0, sizeof(devname));
597
598 ret = -EFAULT;
599 if (copy_from_user(devname, optval, optlen))
600 goto out;
601
602 index = 0;
603 if (devname[0] != '\0') {
604 struct net_device *dev;
605
606 rcu_read_lock();
607 dev = dev_get_by_name_rcu(net, devname);
608 if (dev)
609 index = dev->ifindex;
610 rcu_read_unlock();
611 ret = -ENODEV;
612 if (!dev)
613 goto out;
614 }
615
616 lock_sock(sk);
617 sk->sk_bound_dev_if = index;
618 sk_dst_reset(sk);
619 release_sock(sk);
620
621 ret = 0;
622
623out:
624#endif
625
626 return ret;
627}
628
629static int sock_getbindtodevice(struct sock *sk, char __user *optval,
630 int __user *optlen, int len)
631{
632 int ret = -ENOPROTOOPT;
633#ifdef CONFIG_NETDEVICES
634 struct net *net = sock_net(sk);
635 char devname[IFNAMSIZ];
636
637 if (sk->sk_bound_dev_if == 0) {
638 len = 0;
639 goto zero;
640 }
641
642 ret = -EINVAL;
643 if (len < IFNAMSIZ)
644 goto out;
645
646 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
647 if (ret)
648 goto out;
649
650 len = strlen(devname) + 1;
651
652 ret = -EFAULT;
653 if (copy_to_user(optval, devname, len))
654 goto out;
655
656zero:
657 ret = -EFAULT;
658 if (put_user(len, optlen))
659 goto out;
660
661 ret = 0;
662
663out:
664#endif
665
666 return ret;
667}
668
669static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
670{
671 if (valbool)
672 sock_set_flag(sk, bit);
673 else
674 sock_reset_flag(sk, bit);
675}
676
677bool sk_mc_loop(struct sock *sk)
678{
679 if (dev_recursion_level())
680 return false;
681 if (!sk)
682 return true;
683 switch (sk->sk_family) {
684 case AF_INET:
685 return inet_sk(sk)->mc_loop;
686#if IS_ENABLED(CONFIG_IPV6)
687 case AF_INET6:
688 return inet6_sk(sk)->mc_loop;
689#endif
690 }
691 WARN_ON(1);
692 return true;
693}
694EXPORT_SYMBOL(sk_mc_loop);
695
696
697
698
699
700
701int sock_setsockopt(struct socket *sock, int level, int optname,
702 char __user *optval, unsigned int optlen)
703{
704 struct sock *sk = sock->sk;
705 int val;
706 int valbool;
707 struct linger ling;
708 int ret = 0;
709
710
711
712
713
714 if (optname == SO_BINDTODEVICE)
715 return sock_setbindtodevice(sk, optval, optlen);
716
717 if (optlen < sizeof(int))
718 return -EINVAL;
719
720 if (get_user(val, (int __user *)optval))
721 return -EFAULT;
722
723 valbool = val ? 1 : 0;
724
725 lock_sock(sk);
726
727 switch (optname) {
728 case SO_DEBUG:
729 if (val && !capable(CAP_NET_ADMIN))
730 ret = -EACCES;
731 else
732 sock_valbool_flag(sk, SOCK_DBG, valbool);
733 break;
734 case SO_REUSEADDR:
735 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
736 break;
737 case SO_REUSEPORT:
738 sk->sk_reuseport = valbool;
739 break;
740 case SO_TYPE:
741 case SO_PROTOCOL:
742 case SO_DOMAIN:
743 case SO_ERROR:
744 ret = -ENOPROTOOPT;
745 break;
746 case SO_DONTROUTE:
747 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
748 break;
749 case SO_BROADCAST:
750 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
751 break;
752 case SO_SNDBUF:
753
754
755
756
757
758 val = min_t(u32, val, sysctl_wmem_max);
759set_sndbuf:
760 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
761 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
762
763 sk->sk_write_space(sk);
764 break;
765
766 case SO_SNDBUFFORCE:
767 if (!capable(CAP_NET_ADMIN)) {
768 ret = -EPERM;
769 break;
770 }
771 goto set_sndbuf;
772
773 case SO_RCVBUF:
774
775
776
777
778
779 val = min_t(u32, val, sysctl_rmem_max);
780set_rcvbuf:
781 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
798 break;
799
800 case SO_RCVBUFFORCE:
801 if (!capable(CAP_NET_ADMIN)) {
802 ret = -EPERM;
803 break;
804 }
805 goto set_rcvbuf;
806
807 case SO_KEEPALIVE:
808 if (sk->sk_prot->keepalive)
809 sk->sk_prot->keepalive(sk, valbool);
810 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
811 break;
812
813 case SO_OOBINLINE:
814 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
815 break;
816
817 case SO_NO_CHECK:
818 sk->sk_no_check_tx = valbool;
819 break;
820
821 case SO_PRIORITY:
822 if ((val >= 0 && val <= 6) ||
823 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
824 sk->sk_priority = val;
825 else
826 ret = -EPERM;
827 break;
828
829 case SO_LINGER:
830 if (optlen < sizeof(ling)) {
831 ret = -EINVAL;
832 break;
833 }
834 if (copy_from_user(&ling, optval, sizeof(ling))) {
835 ret = -EFAULT;
836 break;
837 }
838 if (!ling.l_onoff)
839 sock_reset_flag(sk, SOCK_LINGER);
840 else {
841#if (BITS_PER_LONG == 32)
842 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
843 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
844 else
845#endif
846 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
847 sock_set_flag(sk, SOCK_LINGER);
848 }
849 break;
850
851 case SO_BSDCOMPAT:
852 sock_warn_obsolete_bsdism("setsockopt");
853 break;
854
855 case SO_PASSCRED:
856 if (valbool)
857 set_bit(SOCK_PASSCRED, &sock->flags);
858 else
859 clear_bit(SOCK_PASSCRED, &sock->flags);
860 break;
861
862 case SO_TIMESTAMP:
863 case SO_TIMESTAMPNS:
864 if (valbool) {
865 if (optname == SO_TIMESTAMP)
866 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
867 else
868 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
869 sock_set_flag(sk, SOCK_RCVTSTAMP);
870 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
871 } else {
872 sock_reset_flag(sk, SOCK_RCVTSTAMP);
873 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
874 }
875 break;
876
877 case SO_TIMESTAMPING:
878 if (val & ~SOF_TIMESTAMPING_MASK) {
879 ret = -EINVAL;
880 break;
881 }
882
883 if (val & SOF_TIMESTAMPING_OPT_ID &&
884 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
885 if (sk->sk_protocol == IPPROTO_TCP &&
886 sk->sk_type == SOCK_STREAM) {
887 if ((1 << sk->sk_state) &
888 (TCPF_CLOSE | TCPF_LISTEN)) {
889 ret = -EINVAL;
890 break;
891 }
892 sk->sk_tskey = tcp_sk(sk)->snd_una;
893 } else {
894 sk->sk_tskey = 0;
895 }
896 }
897
898 if (val & SOF_TIMESTAMPING_OPT_STATS &&
899 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
900 ret = -EINVAL;
901 break;
902 }
903
904 sk->sk_tsflags = val;
905 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
906 sock_enable_timestamp(sk,
907 SOCK_TIMESTAMPING_RX_SOFTWARE);
908 else
909 sock_disable_timestamp(sk,
910 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
911 break;
912
913 case SO_RCVLOWAT:
914 if (val < 0)
915 val = INT_MAX;
916 sk->sk_rcvlowat = val ? : 1;
917 break;
918
919 case SO_RCVTIMEO:
920 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
921 break;
922
923 case SO_SNDTIMEO:
924 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
925 break;
926
927 case SO_ATTACH_FILTER:
928 ret = -EINVAL;
929 if (optlen == sizeof(struct sock_fprog)) {
930 struct sock_fprog fprog;
931
932 ret = -EFAULT;
933 if (copy_from_user(&fprog, optval, sizeof(fprog)))
934 break;
935
936 ret = sk_attach_filter(&fprog, sk);
937 }
938 break;
939
940 case SO_ATTACH_BPF:
941 ret = -EINVAL;
942 if (optlen == sizeof(u32)) {
943 u32 ufd;
944
945 ret = -EFAULT;
946 if (copy_from_user(&ufd, optval, sizeof(ufd)))
947 break;
948
949 ret = sk_attach_bpf(ufd, sk);
950 }
951 break;
952
953 case SO_ATTACH_REUSEPORT_CBPF:
954 ret = -EINVAL;
955 if (optlen == sizeof(struct sock_fprog)) {
956 struct sock_fprog fprog;
957
958 ret = -EFAULT;
959 if (copy_from_user(&fprog, optval, sizeof(fprog)))
960 break;
961
962 ret = sk_reuseport_attach_filter(&fprog, sk);
963 }
964 break;
965
966 case SO_ATTACH_REUSEPORT_EBPF:
967 ret = -EINVAL;
968 if (optlen == sizeof(u32)) {
969 u32 ufd;
970
971 ret = -EFAULT;
972 if (copy_from_user(&ufd, optval, sizeof(ufd)))
973 break;
974
975 ret = sk_reuseport_attach_bpf(ufd, sk);
976 }
977 break;
978
979 case SO_DETACH_FILTER:
980 ret = sk_detach_filter(sk);
981 break;
982
983 case SO_LOCK_FILTER:
984 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
985 ret = -EPERM;
986 else
987 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
988 break;
989
990 case SO_PASSSEC:
991 if (valbool)
992 set_bit(SOCK_PASSSEC, &sock->flags);
993 else
994 clear_bit(SOCK_PASSSEC, &sock->flags);
995 break;
996 case SO_MARK:
997 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
998 ret = -EPERM;
999 else
1000 sk->sk_mark = val;
1001 break;
1002
1003 case SO_RXQ_OVFL:
1004 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1005 break;
1006
1007 case SO_WIFI_STATUS:
1008 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1009 break;
1010
1011 case SO_PEEK_OFF:
1012 if (sock->ops->set_peek_off)
1013 ret = sock->ops->set_peek_off(sk, val);
1014 else
1015 ret = -EOPNOTSUPP;
1016 break;
1017
1018 case SO_NOFCS:
1019 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1020 break;
1021
1022 case SO_SELECT_ERR_QUEUE:
1023 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1024 break;
1025
1026#ifdef CONFIG_NET_RX_BUSY_POLL
1027 case SO_BUSY_POLL:
1028
1029 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1030 ret = -EPERM;
1031 else {
1032 if (val < 0)
1033 ret = -EINVAL;
1034 else
1035 sk->sk_ll_usec = val;
1036 }
1037 break;
1038#endif
1039
1040 case SO_MAX_PACING_RATE:
1041 if (val != ~0U)
1042 cmpxchg(&sk->sk_pacing_status,
1043 SK_PACING_NONE,
1044 SK_PACING_NEEDED);
1045 sk->sk_max_pacing_rate = val;
1046 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1047 sk->sk_max_pacing_rate);
1048 break;
1049
1050 case SO_INCOMING_CPU:
1051 sk->sk_incoming_cpu = val;
1052 break;
1053
1054 case SO_CNX_ADVICE:
1055 if (val == 1)
1056 dst_negative_advice(sk);
1057 break;
1058 default:
1059 ret = -ENOPROTOOPT;
1060 break;
1061 }
1062 release_sock(sk);
1063 return ret;
1064}
1065EXPORT_SYMBOL(sock_setsockopt);
1066
1067
1068static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1069 struct ucred *ucred)
1070{
1071 ucred->pid = pid_vnr(pid);
1072 ucred->uid = ucred->gid = -1;
1073 if (cred) {
1074 struct user_namespace *current_ns = current_user_ns();
1075
1076 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1077 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1078 }
1079}
1080
1081static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1082{
1083 struct user_namespace *user_ns = current_user_ns();
1084 int i;
1085
1086 for (i = 0; i < src->ngroups; i++)
1087 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1088 return -EFAULT;
1089
1090 return 0;
1091}
1092
1093int sock_getsockopt(struct socket *sock, int level, int optname,
1094 char __user *optval, int __user *optlen)
1095{
1096 struct sock *sk = sock->sk;
1097
1098 union {
1099 int val;
1100 u64 val64;
1101 struct linger ling;
1102 struct timeval tm;
1103 } v;
1104
1105 int lv = sizeof(int);
1106 int len;
1107
1108 if (get_user(len, optlen))
1109 return -EFAULT;
1110 if (len < 0)
1111 return -EINVAL;
1112
1113 memset(&v, 0, sizeof(v));
1114
1115 switch (optname) {
1116 case SO_DEBUG:
1117 v.val = sock_flag(sk, SOCK_DBG);
1118 break;
1119
1120 case SO_DONTROUTE:
1121 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1122 break;
1123
1124 case SO_BROADCAST:
1125 v.val = sock_flag(sk, SOCK_BROADCAST);
1126 break;
1127
1128 case SO_SNDBUF:
1129 v.val = sk->sk_sndbuf;
1130 break;
1131
1132 case SO_RCVBUF:
1133 v.val = sk->sk_rcvbuf;
1134 break;
1135
1136 case SO_REUSEADDR:
1137 v.val = sk->sk_reuse;
1138 break;
1139
1140 case SO_REUSEPORT:
1141 v.val = sk->sk_reuseport;
1142 break;
1143
1144 case SO_KEEPALIVE:
1145 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1146 break;
1147
1148 case SO_TYPE:
1149 v.val = sk->sk_type;
1150 break;
1151
1152 case SO_PROTOCOL:
1153 v.val = sk->sk_protocol;
1154 break;
1155
1156 case SO_DOMAIN:
1157 v.val = sk->sk_family;
1158 break;
1159
1160 case SO_ERROR:
1161 v.val = -sock_error(sk);
1162 if (v.val == 0)
1163 v.val = xchg(&sk->sk_err_soft, 0);
1164 break;
1165
1166 case SO_OOBINLINE:
1167 v.val = sock_flag(sk, SOCK_URGINLINE);
1168 break;
1169
1170 case SO_NO_CHECK:
1171 v.val = sk->sk_no_check_tx;
1172 break;
1173
1174 case SO_PRIORITY:
1175 v.val = sk->sk_priority;
1176 break;
1177
1178 case SO_LINGER:
1179 lv = sizeof(v.ling);
1180 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1181 v.ling.l_linger = sk->sk_lingertime / HZ;
1182 break;
1183
1184 case SO_BSDCOMPAT:
1185 sock_warn_obsolete_bsdism("getsockopt");
1186 break;
1187
1188 case SO_TIMESTAMP:
1189 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1190 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1191 break;
1192
1193 case SO_TIMESTAMPNS:
1194 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1195 break;
1196
1197 case SO_TIMESTAMPING:
1198 v.val = sk->sk_tsflags;
1199 break;
1200
1201 case SO_RCVTIMEO:
1202 lv = sizeof(struct timeval);
1203 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1204 v.tm.tv_sec = 0;
1205 v.tm.tv_usec = 0;
1206 } else {
1207 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1208 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1209 }
1210 break;
1211
1212 case SO_SNDTIMEO:
1213 lv = sizeof(struct timeval);
1214 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1215 v.tm.tv_sec = 0;
1216 v.tm.tv_usec = 0;
1217 } else {
1218 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1219 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1220 }
1221 break;
1222
1223 case SO_RCVLOWAT:
1224 v.val = sk->sk_rcvlowat;
1225 break;
1226
1227 case SO_SNDLOWAT:
1228 v.val = 1;
1229 break;
1230
1231 case SO_PASSCRED:
1232 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1233 break;
1234
1235 case SO_PEERCRED:
1236 {
1237 struct ucred peercred;
1238 if (len > sizeof(peercred))
1239 len = sizeof(peercred);
1240 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1241 if (copy_to_user(optval, &peercred, len))
1242 return -EFAULT;
1243 goto lenout;
1244 }
1245
1246 case SO_PEERGROUPS:
1247 {
1248 int ret, n;
1249
1250 if (!sk->sk_peer_cred)
1251 return -ENODATA;
1252
1253 n = sk->sk_peer_cred->group_info->ngroups;
1254 if (len < n * sizeof(gid_t)) {
1255 len = n * sizeof(gid_t);
1256 return put_user(len, optlen) ? -EFAULT : -ERANGE;
1257 }
1258 len = n * sizeof(gid_t);
1259
1260 ret = groups_to_user((gid_t __user *)optval,
1261 sk->sk_peer_cred->group_info);
1262 if (ret)
1263 return ret;
1264 goto lenout;
1265 }
1266
1267 case SO_PEERNAME:
1268 {
1269 char address[128];
1270
1271 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1272 return -ENOTCONN;
1273 if (lv < len)
1274 return -EINVAL;
1275 if (copy_to_user(optval, address, len))
1276 return -EFAULT;
1277 goto lenout;
1278 }
1279
1280
1281
1282
1283 case SO_ACCEPTCONN:
1284 v.val = sk->sk_state == TCP_LISTEN;
1285 break;
1286
1287 case SO_PASSSEC:
1288 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1289 break;
1290
1291 case SO_PEERSEC:
1292 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1293
1294 case SO_MARK:
1295 v.val = sk->sk_mark;
1296 break;
1297
1298 case SO_RXQ_OVFL:
1299 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1300 break;
1301
1302 case SO_WIFI_STATUS:
1303 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1304 break;
1305
1306 case SO_PEEK_OFF:
1307 if (!sock->ops->set_peek_off)
1308 return -EOPNOTSUPP;
1309
1310 v.val = sk->sk_peek_off;
1311 break;
1312 case SO_NOFCS:
1313 v.val = sock_flag(sk, SOCK_NOFCS);
1314 break;
1315
1316 case SO_BINDTODEVICE:
1317 return sock_getbindtodevice(sk, optval, optlen, len);
1318
1319 case SO_GET_FILTER:
1320 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1321 if (len < 0)
1322 return len;
1323
1324 goto lenout;
1325
1326 case SO_LOCK_FILTER:
1327 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1328 break;
1329
1330 case SO_BPF_EXTENSIONS:
1331 v.val = bpf_tell_extensions();
1332 break;
1333
1334 case SO_SELECT_ERR_QUEUE:
1335 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1336 break;
1337
1338#ifdef CONFIG_NET_RX_BUSY_POLL
1339 case SO_BUSY_POLL:
1340 v.val = sk->sk_ll_usec;
1341 break;
1342#endif
1343
1344 case SO_MAX_PACING_RATE:
1345 v.val = sk->sk_max_pacing_rate;
1346 break;
1347
1348 case SO_INCOMING_CPU:
1349 v.val = sk->sk_incoming_cpu;
1350 break;
1351
1352 case SO_MEMINFO:
1353 {
1354 u32 meminfo[SK_MEMINFO_VARS];
1355
1356 if (get_user(len, optlen))
1357 return -EFAULT;
1358
1359 sk_get_meminfo(sk, meminfo);
1360
1361 len = min_t(unsigned int, len, sizeof(meminfo));
1362 if (copy_to_user(optval, &meminfo, len))
1363 return -EFAULT;
1364
1365 goto lenout;
1366 }
1367
1368#ifdef CONFIG_NET_RX_BUSY_POLL
1369 case SO_INCOMING_NAPI_ID:
1370 v.val = READ_ONCE(sk->sk_napi_id);
1371
1372
1373 if (v.val < MIN_NAPI_ID)
1374 v.val = 0;
1375
1376 break;
1377#endif
1378
1379 case SO_COOKIE:
1380 lv = sizeof(u64);
1381 if (len < lv)
1382 return -EINVAL;
1383 v.val64 = sock_gen_cookie(sk);
1384 break;
1385
1386 default:
1387
1388
1389
1390 return -ENOPROTOOPT;
1391 }
1392
1393 if (len > lv)
1394 len = lv;
1395 if (copy_to_user(optval, &v, len))
1396 return -EFAULT;
1397lenout:
1398 if (put_user(len, optlen))
1399 return -EFAULT;
1400 return 0;
1401}
1402
1403
1404
1405
1406
1407
1408static inline void sock_lock_init(struct sock *sk)
1409{
1410 if (sk->sk_kern_sock)
1411 sock_lock_init_class_and_name(
1412 sk,
1413 af_family_kern_slock_key_strings[sk->sk_family],
1414 af_family_kern_slock_keys + sk->sk_family,
1415 af_family_kern_key_strings[sk->sk_family],
1416 af_family_kern_keys + sk->sk_family);
1417 else
1418 sock_lock_init_class_and_name(
1419 sk,
1420 af_family_slock_key_strings[sk->sk_family],
1421 af_family_slock_keys + sk->sk_family,
1422 af_family_key_strings[sk->sk_family],
1423 af_family_keys + sk->sk_family);
1424}
1425
1426
1427
1428
1429
1430
1431static void sock_copy(struct sock *nsk, const struct sock *osk)
1432{
1433#ifdef CONFIG_SECURITY_NETWORK
1434 void *sptr = nsk->sk_security;
1435#endif
1436 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1437
1438 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1439 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1440
1441#ifdef CONFIG_SECURITY_NETWORK
1442 nsk->sk_security = sptr;
1443 security_sk_clone(osk, nsk);
1444#endif
1445}
1446
1447static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1448 int family)
1449{
1450 struct sock *sk;
1451 struct kmem_cache *slab;
1452
1453 slab = prot->slab;
1454 if (slab != NULL) {
1455 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1456 if (!sk)
1457 return sk;
1458 if (priority & __GFP_ZERO)
1459 sk_prot_clear_nulls(sk, prot->obj_size);
1460 } else
1461 sk = kmalloc(prot->obj_size, priority);
1462
1463 if (sk != NULL) {
1464 kmemcheck_annotate_bitfield(sk, flags);
1465
1466 if (security_sk_alloc(sk, family, priority))
1467 goto out_free;
1468
1469 if (!try_module_get(prot->owner))
1470 goto out_free_sec;
1471 sk_tx_queue_clear(sk);
1472 }
1473
1474 return sk;
1475
1476out_free_sec:
1477 security_sk_free(sk);
1478out_free:
1479 if (slab != NULL)
1480 kmem_cache_free(slab, sk);
1481 else
1482 kfree(sk);
1483 return NULL;
1484}
1485
1486static void sk_prot_free(struct proto *prot, struct sock *sk)
1487{
1488 struct kmem_cache *slab;
1489 struct module *owner;
1490
1491 owner = prot->owner;
1492 slab = prot->slab;
1493
1494 cgroup_sk_free(&sk->sk_cgrp_data);
1495 mem_cgroup_sk_free(sk);
1496 security_sk_free(sk);
1497 if (slab != NULL)
1498 kmem_cache_free(slab, sk);
1499 else
1500 kfree(sk);
1501 module_put(owner);
1502}
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1513 struct proto *prot, int kern)
1514{
1515 struct sock *sk;
1516
1517 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1518 if (sk) {
1519 sk->sk_family = family;
1520
1521
1522
1523
1524 sk->sk_prot = sk->sk_prot_creator = prot;
1525 sk->sk_kern_sock = kern;
1526 sock_lock_init(sk);
1527 sk->sk_net_refcnt = kern ? 0 : 1;
1528 if (likely(sk->sk_net_refcnt))
1529 get_net(net);
1530 sock_net_set(sk, net);
1531 refcount_set(&sk->sk_wmem_alloc, 1);
1532
1533 mem_cgroup_sk_alloc(sk);
1534 cgroup_sk_alloc(&sk->sk_cgrp_data);
1535 sock_update_classid(&sk->sk_cgrp_data);
1536 sock_update_netprioidx(&sk->sk_cgrp_data);
1537 }
1538
1539 return sk;
1540}
1541EXPORT_SYMBOL(sk_alloc);
1542
1543
1544
1545
1546static void __sk_destruct(struct rcu_head *head)
1547{
1548 struct sock *sk = container_of(head, struct sock, sk_rcu);
1549 struct sk_filter *filter;
1550
1551 if (sk->sk_destruct)
1552 sk->sk_destruct(sk);
1553
1554 filter = rcu_dereference_check(sk->sk_filter,
1555 refcount_read(&sk->sk_wmem_alloc) == 0);
1556 if (filter) {
1557 sk_filter_uncharge(sk, filter);
1558 RCU_INIT_POINTER(sk->sk_filter, NULL);
1559 }
1560 if (rcu_access_pointer(sk->sk_reuseport_cb))
1561 reuseport_detach_sock(sk);
1562
1563 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1564
1565 if (atomic_read(&sk->sk_omem_alloc))
1566 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1567 __func__, atomic_read(&sk->sk_omem_alloc));
1568
1569 if (sk->sk_frag.page) {
1570 put_page(sk->sk_frag.page);
1571 sk->sk_frag.page = NULL;
1572 }
1573
1574 if (sk->sk_peer_cred)
1575 put_cred(sk->sk_peer_cred);
1576 put_pid(sk->sk_peer_pid);
1577 if (likely(sk->sk_net_refcnt))
1578 put_net(sock_net(sk));
1579 sk_prot_free(sk->sk_prot_creator, sk);
1580}
1581
1582void sk_destruct(struct sock *sk)
1583{
1584 if (sock_flag(sk, SOCK_RCU_FREE))
1585 call_rcu(&sk->sk_rcu, __sk_destruct);
1586 else
1587 __sk_destruct(&sk->sk_rcu);
1588}
1589
1590static void __sk_free(struct sock *sk)
1591{
1592 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1593 sock_diag_broadcast_destroy(sk);
1594 else
1595 sk_destruct(sk);
1596}
1597
1598void sk_free(struct sock *sk)
1599{
1600
1601
1602
1603
1604
1605 if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1606 __sk_free(sk);
1607}
1608EXPORT_SYMBOL(sk_free);
1609
1610static void sk_init_common(struct sock *sk)
1611{
1612 skb_queue_head_init(&sk->sk_receive_queue);
1613 skb_queue_head_init(&sk->sk_write_queue);
1614 skb_queue_head_init(&sk->sk_error_queue);
1615
1616 rwlock_init(&sk->sk_callback_lock);
1617 lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1618 af_rlock_keys + sk->sk_family,
1619 af_family_rlock_key_strings[sk->sk_family]);
1620 lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1621 af_wlock_keys + sk->sk_family,
1622 af_family_wlock_key_strings[sk->sk_family]);
1623 lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1624 af_elock_keys + sk->sk_family,
1625 af_family_elock_key_strings[sk->sk_family]);
1626 lockdep_set_class_and_name(&sk->sk_callback_lock,
1627 af_callback_keys + sk->sk_family,
1628 af_family_clock_key_strings[sk->sk_family]);
1629}
1630
1631
1632
1633
1634
1635
1636
1637
1638struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1639{
1640 struct sock *newsk;
1641 bool is_charged = true;
1642
1643 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1644 if (newsk != NULL) {
1645 struct sk_filter *filter;
1646
1647 sock_copy(newsk, sk);
1648
1649
1650 if (likely(newsk->sk_net_refcnt))
1651 get_net(sock_net(newsk));
1652 sk_node_init(&newsk->sk_node);
1653 sock_lock_init(newsk);
1654 bh_lock_sock(newsk);
1655 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1656 newsk->sk_backlog.len = 0;
1657
1658 atomic_set(&newsk->sk_rmem_alloc, 0);
1659
1660
1661
1662 refcount_set(&newsk->sk_wmem_alloc, 1);
1663 atomic_set(&newsk->sk_omem_alloc, 0);
1664 sk_init_common(newsk);
1665
1666 newsk->sk_dst_cache = NULL;
1667 newsk->sk_dst_pending_confirm = 0;
1668 newsk->sk_wmem_queued = 0;
1669 newsk->sk_forward_alloc = 0;
1670 atomic_set(&newsk->sk_drops, 0);
1671 newsk->sk_send_head = NULL;
1672 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1673
1674 sock_reset_flag(newsk, SOCK_DONE);
1675
1676 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1677 if (filter != NULL)
1678
1679
1680
1681
1682 is_charged = sk_filter_charge(newsk, filter);
1683
1684 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1685
1686
1687
1688
1689 if (!is_charged)
1690 RCU_INIT_POINTER(newsk->sk_filter, NULL);
1691 sk_free_unlock_clone(newsk);
1692 newsk = NULL;
1693 goto out;
1694 }
1695 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1696
1697 newsk->sk_err = 0;
1698 newsk->sk_err_soft = 0;
1699 newsk->sk_priority = 0;
1700 newsk->sk_incoming_cpu = raw_smp_processor_id();
1701 atomic64_set(&newsk->sk_cookie, 0);
1702
1703 mem_cgroup_sk_alloc(newsk);
1704 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1705
1706
1707
1708
1709
1710 smp_wmb();
1711 refcount_set(&newsk->sk_refcnt, 2);
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724 sk_refcnt_debug_inc(newsk);
1725 sk_set_socket(newsk, NULL);
1726 newsk->sk_wq = NULL;
1727
1728 if (newsk->sk_prot->sockets_allocated)
1729 sk_sockets_allocated_inc(newsk);
1730
1731 if (sock_needs_netstamp(sk) &&
1732 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1733 net_enable_timestamp();
1734 }
1735out:
1736 return newsk;
1737}
1738EXPORT_SYMBOL_GPL(sk_clone_lock);
1739
1740void sk_free_unlock_clone(struct sock *sk)
1741{
1742
1743
1744 sk->sk_destruct = NULL;
1745 bh_unlock_sock(sk);
1746 sk_free(sk);
1747}
1748EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1749
1750void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1751{
1752 u32 max_segs = 1;
1753
1754 sk_dst_set(sk, dst);
1755 sk->sk_route_caps = dst->dev->features;
1756 if (sk->sk_route_caps & NETIF_F_GSO)
1757 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1758 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1759 if (sk_can_gso(sk)) {
1760 if (dst->header_len) {
1761 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1762 } else {
1763 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1764 sk->sk_gso_max_size = dst->dev->gso_max_size;
1765 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1766 }
1767 }
1768 sk->sk_gso_max_segs = max_segs;
1769}
1770EXPORT_SYMBOL_GPL(sk_setup_caps);
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780void sock_wfree(struct sk_buff *skb)
1781{
1782 struct sock *sk = skb->sk;
1783 unsigned int len = skb->truesize;
1784
1785 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1786
1787
1788
1789
1790 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
1791 sk->sk_write_space(sk);
1792 len = 1;
1793 }
1794
1795
1796
1797
1798 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
1799 __sk_free(sk);
1800}
1801EXPORT_SYMBOL(sock_wfree);
1802
1803
1804
1805
1806void __sock_wfree(struct sk_buff *skb)
1807{
1808 struct sock *sk = skb->sk;
1809
1810 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1811 __sk_free(sk);
1812}
1813
1814void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1815{
1816 skb_orphan(skb);
1817 skb->sk = sk;
1818#ifdef CONFIG_INET
1819 if (unlikely(!sk_fullsock(sk))) {
1820 skb->destructor = sock_edemux;
1821 sock_hold(sk);
1822 return;
1823 }
1824#endif
1825 skb->destructor = sock_wfree;
1826 skb_set_hash_from_sk(skb, sk);
1827
1828
1829
1830
1831
1832 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1833}
1834EXPORT_SYMBOL(skb_set_owner_w);
1835
1836
1837
1838
1839
1840
1841
1842void skb_orphan_partial(struct sk_buff *skb)
1843{
1844 if (skb_is_tcp_pure_ack(skb))
1845 return;
1846
1847 if (skb->destructor == sock_wfree
1848#ifdef CONFIG_INET
1849 || skb->destructor == tcp_wfree
1850#endif
1851 ) {
1852 struct sock *sk = skb->sk;
1853
1854 if (refcount_inc_not_zero(&sk->sk_refcnt)) {
1855 WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
1856 skb->destructor = sock_efree;
1857 }
1858 } else {
1859 skb_orphan(skb);
1860 }
1861}
1862EXPORT_SYMBOL(skb_orphan_partial);
1863
1864
1865
1866
1867void sock_rfree(struct sk_buff *skb)
1868{
1869 struct sock *sk = skb->sk;
1870 unsigned int len = skb->truesize;
1871
1872 atomic_sub(len, &sk->sk_rmem_alloc);
1873 sk_mem_uncharge(sk, len);
1874}
1875EXPORT_SYMBOL(sock_rfree);
1876
1877
1878
1879
1880
1881void sock_efree(struct sk_buff *skb)
1882{
1883 sock_put(skb->sk);
1884}
1885EXPORT_SYMBOL(sock_efree);
1886
1887kuid_t sock_i_uid(struct sock *sk)
1888{
1889 kuid_t uid;
1890
1891 read_lock_bh(&sk->sk_callback_lock);
1892 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1893 read_unlock_bh(&sk->sk_callback_lock);
1894 return uid;
1895}
1896EXPORT_SYMBOL(sock_i_uid);
1897
1898unsigned long sock_i_ino(struct sock *sk)
1899{
1900 unsigned long ino;
1901
1902 read_lock_bh(&sk->sk_callback_lock);
1903 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1904 read_unlock_bh(&sk->sk_callback_lock);
1905 return ino;
1906}
1907EXPORT_SYMBOL(sock_i_ino);
1908
1909
1910
1911
1912struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1913 gfp_t priority)
1914{
1915 if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1916 struct sk_buff *skb = alloc_skb(size, priority);
1917 if (skb) {
1918 skb_set_owner_w(skb, sk);
1919 return skb;
1920 }
1921 }
1922 return NULL;
1923}
1924EXPORT_SYMBOL(sock_wmalloc);
1925
1926
1927
1928
1929void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1930{
1931 if ((unsigned int)size <= sysctl_optmem_max &&
1932 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1933 void *mem;
1934
1935
1936
1937 atomic_add(size, &sk->sk_omem_alloc);
1938 mem = kmalloc(size, priority);
1939 if (mem)
1940 return mem;
1941 atomic_sub(size, &sk->sk_omem_alloc);
1942 }
1943 return NULL;
1944}
1945EXPORT_SYMBOL(sock_kmalloc);
1946
1947
1948
1949
1950
1951static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1952 const bool nullify)
1953{
1954 if (WARN_ON_ONCE(!mem))
1955 return;
1956 if (nullify)
1957 kzfree(mem);
1958 else
1959 kfree(mem);
1960 atomic_sub(size, &sk->sk_omem_alloc);
1961}
1962
1963void sock_kfree_s(struct sock *sk, void *mem, int size)
1964{
1965 __sock_kfree_s(sk, mem, size, false);
1966}
1967EXPORT_SYMBOL(sock_kfree_s);
1968
1969void sock_kzfree_s(struct sock *sk, void *mem, int size)
1970{
1971 __sock_kfree_s(sk, mem, size, true);
1972}
1973EXPORT_SYMBOL(sock_kzfree_s);
1974
1975
1976
1977
1978static long sock_wait_for_wmem(struct sock *sk, long timeo)
1979{
1980 DEFINE_WAIT(wait);
1981
1982 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1983 for (;;) {
1984 if (!timeo)
1985 break;
1986 if (signal_pending(current))
1987 break;
1988 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1989 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1990 if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1991 break;
1992 if (sk->sk_shutdown & SEND_SHUTDOWN)
1993 break;
1994 if (sk->sk_err)
1995 break;
1996 timeo = schedule_timeout(timeo);
1997 }
1998 finish_wait(sk_sleep(sk), &wait);
1999 return timeo;
2000}
2001
2002
2003
2004
2005
2006
2007struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2008 unsigned long data_len, int noblock,
2009 int *errcode, int max_page_order)
2010{
2011 struct sk_buff *skb;
2012 long timeo;
2013 int err;
2014
2015 timeo = sock_sndtimeo(sk, noblock);
2016 for (;;) {
2017 err = sock_error(sk);
2018 if (err != 0)
2019 goto failure;
2020
2021 err = -EPIPE;
2022 if (sk->sk_shutdown & SEND_SHUTDOWN)
2023 goto failure;
2024
2025 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
2026 break;
2027
2028 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2029 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2030 err = -EAGAIN;
2031 if (!timeo)
2032 goto failure;
2033 if (signal_pending(current))
2034 goto interrupted;
2035 timeo = sock_wait_for_wmem(sk, timeo);
2036 }
2037 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2038 errcode, sk->sk_allocation);
2039 if (skb)
2040 skb_set_owner_w(skb, sk);
2041 return skb;
2042
2043interrupted:
2044 err = sock_intr_errno(timeo);
2045failure:
2046 *errcode = err;
2047 return NULL;
2048}
2049EXPORT_SYMBOL(sock_alloc_send_pskb);
2050
2051struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2052 int noblock, int *errcode)
2053{
2054 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2055}
2056EXPORT_SYMBOL(sock_alloc_send_skb);
2057
2058int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2059 struct sockcm_cookie *sockc)
2060{
2061 u32 tsflags;
2062
2063 switch (cmsg->cmsg_type) {
2064 case SO_MARK:
2065 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2066 return -EPERM;
2067 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2068 return -EINVAL;
2069 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2070 break;
2071 case SO_TIMESTAMPING:
2072 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2073 return -EINVAL;
2074
2075 tsflags = *(u32 *)CMSG_DATA(cmsg);
2076 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2077 return -EINVAL;
2078
2079 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2080 sockc->tsflags |= tsflags;
2081 break;
2082
2083 case SCM_RIGHTS:
2084 case SCM_CREDENTIALS:
2085 break;
2086 default:
2087 return -EINVAL;
2088 }
2089 return 0;
2090}
2091EXPORT_SYMBOL(__sock_cmsg_send);
2092
2093int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2094 struct sockcm_cookie *sockc)
2095{
2096 struct cmsghdr *cmsg;
2097 int ret;
2098
2099 for_each_cmsghdr(cmsg, msg) {
2100 if (!CMSG_OK(msg, cmsg))
2101 return -EINVAL;
2102 if (cmsg->cmsg_level != SOL_SOCKET)
2103 continue;
2104 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2105 if (ret)
2106 return ret;
2107 }
2108 return 0;
2109}
2110EXPORT_SYMBOL(sock_cmsg_send);
2111
2112static void sk_enter_memory_pressure(struct sock *sk)
2113{
2114 if (!sk->sk_prot->enter_memory_pressure)
2115 return;
2116
2117 sk->sk_prot->enter_memory_pressure(sk);
2118}
2119
2120static void sk_leave_memory_pressure(struct sock *sk)
2121{
2122 if (sk->sk_prot->leave_memory_pressure) {
2123 sk->sk_prot->leave_memory_pressure(sk);
2124 } else {
2125 unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2126
2127 if (memory_pressure && *memory_pressure)
2128 *memory_pressure = 0;
2129 }
2130}
2131
2132
2133#define SKB_FRAG_PAGE_ORDER get_order(32768)
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2146{
2147 if (pfrag->page) {
2148 if (page_ref_count(pfrag->page) == 1) {
2149 pfrag->offset = 0;
2150 return true;
2151 }
2152 if (pfrag->offset + sz <= pfrag->size)
2153 return true;
2154 put_page(pfrag->page);
2155 }
2156
2157 pfrag->offset = 0;
2158 if (SKB_FRAG_PAGE_ORDER) {
2159
2160 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2161 __GFP_COMP | __GFP_NOWARN |
2162 __GFP_NORETRY,
2163 SKB_FRAG_PAGE_ORDER);
2164 if (likely(pfrag->page)) {
2165 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2166 return true;
2167 }
2168 }
2169 pfrag->page = alloc_page(gfp);
2170 if (likely(pfrag->page)) {
2171 pfrag->size = PAGE_SIZE;
2172 return true;
2173 }
2174 return false;
2175}
2176EXPORT_SYMBOL(skb_page_frag_refill);
2177
2178bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2179{
2180 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2181 return true;
2182
2183 sk_enter_memory_pressure(sk);
2184 sk_stream_moderate_sndbuf(sk);
2185 return false;
2186}
2187EXPORT_SYMBOL(sk_page_frag_refill);
2188
2189static void __lock_sock(struct sock *sk)
2190 __releases(&sk->sk_lock.slock)
2191 __acquires(&sk->sk_lock.slock)
2192{
2193 DEFINE_WAIT(wait);
2194
2195 for (;;) {
2196 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2197 TASK_UNINTERRUPTIBLE);
2198 spin_unlock_bh(&sk->sk_lock.slock);
2199 schedule();
2200 spin_lock_bh(&sk->sk_lock.slock);
2201 if (!sock_owned_by_user(sk))
2202 break;
2203 }
2204 finish_wait(&sk->sk_lock.wq, &wait);
2205}
2206
2207static void __release_sock(struct sock *sk)
2208 __releases(&sk->sk_lock.slock)
2209 __acquires(&sk->sk_lock.slock)
2210{
2211 struct sk_buff *skb, *next;
2212
2213 while ((skb = sk->sk_backlog.head) != NULL) {
2214 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2215
2216 spin_unlock_bh(&sk->sk_lock.slock);
2217
2218 do {
2219 next = skb->next;
2220 prefetch(next);
2221 WARN_ON_ONCE(skb_dst_is_noref(skb));
2222 skb->next = NULL;
2223 sk_backlog_rcv(sk, skb);
2224
2225 cond_resched();
2226
2227 skb = next;
2228 } while (skb != NULL);
2229
2230 spin_lock_bh(&sk->sk_lock.slock);
2231 }
2232
2233
2234
2235
2236
2237 sk->sk_backlog.len = 0;
2238}
2239
2240void __sk_flush_backlog(struct sock *sk)
2241{
2242 spin_lock_bh(&sk->sk_lock.slock);
2243 __release_sock(sk);
2244 spin_unlock_bh(&sk->sk_lock.slock);
2245}
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2259{
2260 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2261 int rc;
2262
2263 add_wait_queue(sk_sleep(sk), &wait);
2264 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2265 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2266 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2267 remove_wait_queue(sk_sleep(sk), &wait);
2268 return rc;
2269}
2270EXPORT_SYMBOL(sk_wait_data);
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2282{
2283 struct proto *prot = sk->sk_prot;
2284 long allocated = sk_memory_allocated_add(sk, amt);
2285
2286 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2287 !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2288 goto suppress_allocation;
2289
2290
2291 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2292 sk_leave_memory_pressure(sk);
2293 return 1;
2294 }
2295
2296
2297 if (allocated > sk_prot_mem_limits(sk, 1))
2298 sk_enter_memory_pressure(sk);
2299
2300
2301 if (allocated > sk_prot_mem_limits(sk, 2))
2302 goto suppress_allocation;
2303
2304
2305 if (kind == SK_MEM_RECV) {
2306 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2307 return 1;
2308
2309 } else {
2310 if (sk->sk_type == SOCK_STREAM) {
2311 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2312 return 1;
2313 } else if (refcount_read(&sk->sk_wmem_alloc) <
2314 prot->sysctl_wmem[0])
2315 return 1;
2316 }
2317
2318 if (sk_has_memory_pressure(sk)) {
2319 int alloc;
2320
2321 if (!sk_under_memory_pressure(sk))
2322 return 1;
2323 alloc = sk_sockets_allocated_read_positive(sk);
2324 if (sk_prot_mem_limits(sk, 2) > alloc *
2325 sk_mem_pages(sk->sk_wmem_queued +
2326 atomic_read(&sk->sk_rmem_alloc) +
2327 sk->sk_forward_alloc))
2328 return 1;
2329 }
2330
2331suppress_allocation:
2332
2333 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2334 sk_stream_moderate_sndbuf(sk);
2335
2336
2337
2338
2339 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2340 return 1;
2341 }
2342
2343 trace_sock_exceed_buf_limit(sk, prot, allocated);
2344
2345 sk_memory_allocated_sub(sk, amt);
2346
2347 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2348 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2349
2350 return 0;
2351}
2352EXPORT_SYMBOL(__sk_mem_raise_allocated);
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364int __sk_mem_schedule(struct sock *sk, int size, int kind)
2365{
2366 int ret, amt = sk_mem_pages(size);
2367
2368 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2369 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2370 if (!ret)
2371 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2372 return ret;
2373}
2374EXPORT_SYMBOL(__sk_mem_schedule);
2375
2376
2377
2378
2379
2380
2381
2382
2383void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2384{
2385 sk_memory_allocated_sub(sk, amount);
2386
2387 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2388 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2389
2390 if (sk_under_memory_pressure(sk) &&
2391 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2392 sk_leave_memory_pressure(sk);
2393}
2394EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2395
2396
2397
2398
2399
2400
2401void __sk_mem_reclaim(struct sock *sk, int amount)
2402{
2403 amount >>= SK_MEM_QUANTUM_SHIFT;
2404 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2405 __sk_mem_reduce_allocated(sk, amount);
2406}
2407EXPORT_SYMBOL(__sk_mem_reclaim);
2408
2409int sk_set_peek_off(struct sock *sk, int val)
2410{
2411 if (val < 0)
2412 return -EINVAL;
2413
2414 sk->sk_peek_off = val;
2415 return 0;
2416}
2417EXPORT_SYMBOL_GPL(sk_set_peek_off);
2418
2419
2420
2421
2422
2423
2424
2425
2426int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2427{
2428 return -EOPNOTSUPP;
2429}
2430EXPORT_SYMBOL(sock_no_bind);
2431
2432int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2433 int len, int flags)
2434{
2435 return -EOPNOTSUPP;
2436}
2437EXPORT_SYMBOL(sock_no_connect);
2438
2439int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2440{
2441 return -EOPNOTSUPP;
2442}
2443EXPORT_SYMBOL(sock_no_socketpair);
2444
2445int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2446 bool kern)
2447{
2448 return -EOPNOTSUPP;
2449}
2450EXPORT_SYMBOL(sock_no_accept);
2451
2452int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2453 int *len, int peer)
2454{
2455 return -EOPNOTSUPP;
2456}
2457EXPORT_SYMBOL(sock_no_getname);
2458
2459unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2460{
2461 return 0;
2462}
2463EXPORT_SYMBOL(sock_no_poll);
2464
2465int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2466{
2467 return -EOPNOTSUPP;
2468}
2469EXPORT_SYMBOL(sock_no_ioctl);
2470
2471int sock_no_listen(struct socket *sock, int backlog)
2472{
2473 return -EOPNOTSUPP;
2474}
2475EXPORT_SYMBOL(sock_no_listen);
2476
2477int sock_no_shutdown(struct socket *sock, int how)
2478{
2479 return -EOPNOTSUPP;
2480}
2481EXPORT_SYMBOL(sock_no_shutdown);
2482
2483int sock_no_setsockopt(struct socket *sock, int level, int optname,
2484 char __user *optval, unsigned int optlen)
2485{
2486 return -EOPNOTSUPP;
2487}
2488EXPORT_SYMBOL(sock_no_setsockopt);
2489
2490int sock_no_getsockopt(struct socket *sock, int level, int optname,
2491 char __user *optval, int __user *optlen)
2492{
2493 return -EOPNOTSUPP;
2494}
2495EXPORT_SYMBOL(sock_no_getsockopt);
2496
2497int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2498{
2499 return -EOPNOTSUPP;
2500}
2501EXPORT_SYMBOL(sock_no_sendmsg);
2502
2503int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2504 int flags)
2505{
2506 return -EOPNOTSUPP;
2507}
2508EXPORT_SYMBOL(sock_no_recvmsg);
2509
2510int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2511{
2512
2513 return -ENODEV;
2514}
2515EXPORT_SYMBOL(sock_no_mmap);
2516
2517ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2518{
2519 ssize_t res;
2520 struct msghdr msg = {.msg_flags = flags};
2521 struct kvec iov;
2522 char *kaddr = kmap(page);
2523 iov.iov_base = kaddr + offset;
2524 iov.iov_len = size;
2525 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2526 kunmap(page);
2527 return res;
2528}
2529EXPORT_SYMBOL(sock_no_sendpage);
2530
2531
2532
2533
2534
2535static void sock_def_wakeup(struct sock *sk)
2536{
2537 struct socket_wq *wq;
2538
2539 rcu_read_lock();
2540 wq = rcu_dereference(sk->sk_wq);
2541 if (skwq_has_sleeper(wq))
2542 wake_up_interruptible_all(&wq->wait);
2543 rcu_read_unlock();
2544}
2545
2546static void sock_def_error_report(struct sock *sk)
2547{
2548 struct socket_wq *wq;
2549
2550 rcu_read_lock();
2551 wq = rcu_dereference(sk->sk_wq);
2552 if (skwq_has_sleeper(wq))
2553 wake_up_interruptible_poll(&wq->wait, POLLERR);
2554 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2555 rcu_read_unlock();
2556}
2557
2558static void sock_def_readable(struct sock *sk)
2559{
2560 struct socket_wq *wq;
2561
2562 rcu_read_lock();
2563 wq = rcu_dereference(sk->sk_wq);
2564 if (skwq_has_sleeper(wq))
2565 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2566 POLLRDNORM | POLLRDBAND);
2567 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2568 rcu_read_unlock();
2569}
2570
2571static void sock_def_write_space(struct sock *sk)
2572{
2573 struct socket_wq *wq;
2574
2575 rcu_read_lock();
2576
2577
2578
2579
2580 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2581 wq = rcu_dereference(sk->sk_wq);
2582 if (skwq_has_sleeper(wq))
2583 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2584 POLLWRNORM | POLLWRBAND);
2585
2586
2587 if (sock_writeable(sk))
2588 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2589 }
2590
2591 rcu_read_unlock();
2592}
2593
2594static void sock_def_destruct(struct sock *sk)
2595{
2596}
2597
2598void sk_send_sigurg(struct sock *sk)
2599{
2600 if (sk->sk_socket && sk->sk_socket->file)
2601 if (send_sigurg(&sk->sk_socket->file->f_owner))
2602 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2603}
2604EXPORT_SYMBOL(sk_send_sigurg);
2605
2606void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2607 unsigned long expires)
2608{
2609 if (!mod_timer(timer, expires))
2610 sock_hold(sk);
2611}
2612EXPORT_SYMBOL(sk_reset_timer);
2613
2614void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2615{
2616 if (del_timer(timer))
2617 __sock_put(sk);
2618}
2619EXPORT_SYMBOL(sk_stop_timer);
2620
2621void sock_init_data(struct socket *sock, struct sock *sk)
2622{
2623 sk_init_common(sk);
2624 sk->sk_send_head = NULL;
2625
2626 init_timer(&sk->sk_timer);
2627
2628 sk->sk_allocation = GFP_KERNEL;
2629 sk->sk_rcvbuf = sysctl_rmem_default;
2630 sk->sk_sndbuf = sysctl_wmem_default;
2631 sk->sk_state = TCP_CLOSE;
2632 sk_set_socket(sk, sock);
2633
2634 sock_set_flag(sk, SOCK_ZAPPED);
2635
2636 if (sock) {
2637 sk->sk_type = sock->type;
2638 sk->sk_wq = sock->wq;
2639 sock->sk = sk;
2640 sk->sk_uid = SOCK_INODE(sock)->i_uid;
2641 } else {
2642 sk->sk_wq = NULL;
2643 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
2644 }
2645
2646 rwlock_init(&sk->sk_callback_lock);
2647 if (sk->sk_kern_sock)
2648 lockdep_set_class_and_name(
2649 &sk->sk_callback_lock,
2650 af_kern_callback_keys + sk->sk_family,
2651 af_family_kern_clock_key_strings[sk->sk_family]);
2652 else
2653 lockdep_set_class_and_name(
2654 &sk->sk_callback_lock,
2655 af_callback_keys + sk->sk_family,
2656 af_family_clock_key_strings[sk->sk_family]);
2657
2658 sk->sk_state_change = sock_def_wakeup;
2659 sk->sk_data_ready = sock_def_readable;
2660 sk->sk_write_space = sock_def_write_space;
2661 sk->sk_error_report = sock_def_error_report;
2662 sk->sk_destruct = sock_def_destruct;
2663
2664 sk->sk_frag.page = NULL;
2665 sk->sk_frag.offset = 0;
2666 sk->sk_peek_off = -1;
2667
2668 sk->sk_peer_pid = NULL;
2669 sk->sk_peer_cred = NULL;
2670 sk->sk_write_pending = 0;
2671 sk->sk_rcvlowat = 1;
2672 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2673 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2674
2675 sk->sk_stamp = SK_DEFAULT_STAMP;
2676
2677#ifdef CONFIG_NET_RX_BUSY_POLL
2678 sk->sk_napi_id = 0;
2679 sk->sk_ll_usec = sysctl_net_busy_read;
2680#endif
2681
2682 sk->sk_max_pacing_rate = ~0U;
2683 sk->sk_pacing_rate = ~0U;
2684 sk->sk_incoming_cpu = -1;
2685
2686
2687
2688
2689 smp_wmb();
2690 refcount_set(&sk->sk_refcnt, 1);
2691 atomic_set(&sk->sk_drops, 0);
2692}
2693EXPORT_SYMBOL(sock_init_data);
2694
2695void lock_sock_nested(struct sock *sk, int subclass)
2696{
2697 might_sleep();
2698 spin_lock_bh(&sk->sk_lock.slock);
2699 if (sk->sk_lock.owned)
2700 __lock_sock(sk);
2701 sk->sk_lock.owned = 1;
2702 spin_unlock(&sk->sk_lock.slock);
2703
2704
2705
2706 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2707 local_bh_enable();
2708}
2709EXPORT_SYMBOL(lock_sock_nested);
2710
2711void release_sock(struct sock *sk)
2712{
2713 spin_lock_bh(&sk->sk_lock.slock);
2714 if (sk->sk_backlog.tail)
2715 __release_sock(sk);
2716
2717
2718
2719
2720 if (sk->sk_prot->release_cb)
2721 sk->sk_prot->release_cb(sk);
2722
2723 sock_release_ownership(sk);
2724 if (waitqueue_active(&sk->sk_lock.wq))
2725 wake_up(&sk->sk_lock.wq);
2726 spin_unlock_bh(&sk->sk_lock.slock);
2727}
2728EXPORT_SYMBOL(release_sock);
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743bool lock_sock_fast(struct sock *sk)
2744{
2745 might_sleep();
2746 spin_lock_bh(&sk->sk_lock.slock);
2747
2748 if (!sk->sk_lock.owned)
2749
2750
2751
2752 return false;
2753
2754 __lock_sock(sk);
2755 sk->sk_lock.owned = 1;
2756 spin_unlock(&sk->sk_lock.slock);
2757
2758
2759
2760 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2761 local_bh_enable();
2762 return true;
2763}
2764EXPORT_SYMBOL(lock_sock_fast);
2765
2766int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2767{
2768 struct timeval tv;
2769 if (!sock_flag(sk, SOCK_TIMESTAMP))
2770 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2771 tv = ktime_to_timeval(sk->sk_stamp);
2772 if (tv.tv_sec == -1)
2773 return -ENOENT;
2774 if (tv.tv_sec == 0) {
2775 sk->sk_stamp = ktime_get_real();
2776 tv = ktime_to_timeval(sk->sk_stamp);
2777 }
2778 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2779}
2780EXPORT_SYMBOL(sock_get_timestamp);
2781
2782int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2783{
2784 struct timespec ts;
2785 if (!sock_flag(sk, SOCK_TIMESTAMP))
2786 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2787 ts = ktime_to_timespec(sk->sk_stamp);
2788 if (ts.tv_sec == -1)
2789 return -ENOENT;
2790 if (ts.tv_sec == 0) {
2791 sk->sk_stamp = ktime_get_real();
2792 ts = ktime_to_timespec(sk->sk_stamp);
2793 }
2794 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2795}
2796EXPORT_SYMBOL(sock_get_timestampns);
2797
2798void sock_enable_timestamp(struct sock *sk, int flag)
2799{
2800 if (!sock_flag(sk, flag)) {
2801 unsigned long previous_flags = sk->sk_flags;
2802
2803 sock_set_flag(sk, flag);
2804
2805
2806
2807
2808
2809 if (sock_needs_netstamp(sk) &&
2810 !(previous_flags & SK_FLAGS_TIMESTAMP))
2811 net_enable_timestamp();
2812 }
2813}
2814
2815int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2816 int level, int type)
2817{
2818 struct sock_exterr_skb *serr;
2819 struct sk_buff *skb;
2820 int copied, err;
2821
2822 err = -EAGAIN;
2823 skb = sock_dequeue_err_skb(sk);
2824 if (skb == NULL)
2825 goto out;
2826
2827 copied = skb->len;
2828 if (copied > len) {
2829 msg->msg_flags |= MSG_TRUNC;
2830 copied = len;
2831 }
2832 err = skb_copy_datagram_msg(skb, 0, msg, copied);
2833 if (err)
2834 goto out_free_skb;
2835
2836 sock_recv_timestamp(msg, sk, skb);
2837
2838 serr = SKB_EXT_ERR(skb);
2839 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2840
2841 msg->msg_flags |= MSG_ERRQUEUE;
2842 err = copied;
2843
2844out_free_skb:
2845 kfree_skb(skb);
2846out:
2847 return err;
2848}
2849EXPORT_SYMBOL(sock_recv_errqueue);
2850
2851
2852
2853
2854
2855
2856
2857
2858int sock_common_getsockopt(struct socket *sock, int level, int optname,
2859 char __user *optval, int __user *optlen)
2860{
2861 struct sock *sk = sock->sk;
2862
2863 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2864}
2865EXPORT_SYMBOL(sock_common_getsockopt);
2866
2867#ifdef CONFIG_COMPAT
2868int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2869 char __user *optval, int __user *optlen)
2870{
2871 struct sock *sk = sock->sk;
2872
2873 if (sk->sk_prot->compat_getsockopt != NULL)
2874 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2875 optval, optlen);
2876 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2877}
2878EXPORT_SYMBOL(compat_sock_common_getsockopt);
2879#endif
2880
2881int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2882 int flags)
2883{
2884 struct sock *sk = sock->sk;
2885 int addr_len = 0;
2886 int err;
2887
2888 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2889 flags & ~MSG_DONTWAIT, &addr_len);
2890 if (err >= 0)
2891 msg->msg_namelen = addr_len;
2892 return err;
2893}
2894EXPORT_SYMBOL(sock_common_recvmsg);
2895
2896
2897
2898
2899int sock_common_setsockopt(struct socket *sock, int level, int optname,
2900 char __user *optval, unsigned int optlen)
2901{
2902 struct sock *sk = sock->sk;
2903
2904 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2905}
2906EXPORT_SYMBOL(sock_common_setsockopt);
2907
2908#ifdef CONFIG_COMPAT
2909int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2910 char __user *optval, unsigned int optlen)
2911{
2912 struct sock *sk = sock->sk;
2913
2914 if (sk->sk_prot->compat_setsockopt != NULL)
2915 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2916 optval, optlen);
2917 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2918}
2919EXPORT_SYMBOL(compat_sock_common_setsockopt);
2920#endif
2921
2922void sk_common_release(struct sock *sk)
2923{
2924 if (sk->sk_prot->destroy)
2925 sk->sk_prot->destroy(sk);
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935 sk->sk_prot->unhash(sk);
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949 sock_orphan(sk);
2950
2951 xfrm_sk_free_policy(sk);
2952
2953 sk_refcnt_debug_release(sk);
2954
2955 sock_put(sk);
2956}
2957EXPORT_SYMBOL(sk_common_release);
2958
2959void sk_get_meminfo(const struct sock *sk, u32 *mem)
2960{
2961 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
2962
2963 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
2964 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
2965 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
2966 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
2967 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
2968 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
2969 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
2970 mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
2971 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
2972}
2973
2974#ifdef CONFIG_PROC_FS
2975#define PROTO_INUSE_NR 64
2976struct prot_inuse {
2977 int val[PROTO_INUSE_NR];
2978};
2979
2980static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2981
2982#ifdef CONFIG_NET_NS
2983void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2984{
2985 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2986}
2987EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2988
2989int sock_prot_inuse_get(struct net *net, struct proto *prot)
2990{
2991 int cpu, idx = prot->inuse_idx;
2992 int res = 0;
2993
2994 for_each_possible_cpu(cpu)
2995 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2996
2997 return res >= 0 ? res : 0;
2998}
2999EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3000
3001static int __net_init sock_inuse_init_net(struct net *net)
3002{
3003 net->core.inuse = alloc_percpu(struct prot_inuse);
3004 return net->core.inuse ? 0 : -ENOMEM;
3005}
3006
3007static void __net_exit sock_inuse_exit_net(struct net *net)
3008{
3009 free_percpu(net->core.inuse);
3010}
3011
3012static struct pernet_operations net_inuse_ops = {
3013 .init = sock_inuse_init_net,
3014 .exit = sock_inuse_exit_net,
3015};
3016
3017static __init int net_inuse_init(void)
3018{
3019 if (register_pernet_subsys(&net_inuse_ops))
3020 panic("Cannot initialize net inuse counters");
3021
3022 return 0;
3023}
3024
3025core_initcall(net_inuse_init);
3026#else
3027static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
3028
3029void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3030{
3031 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
3032}
3033EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3034
3035int sock_prot_inuse_get(struct net *net, struct proto *prot)
3036{
3037 int cpu, idx = prot->inuse_idx;
3038 int res = 0;
3039
3040 for_each_possible_cpu(cpu)
3041 res += per_cpu(prot_inuse, cpu).val[idx];
3042
3043 return res >= 0 ? res : 0;
3044}
3045EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3046#endif
3047
3048static void assign_proto_idx(struct proto *prot)
3049{
3050 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3051
3052 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3053 pr_err("PROTO_INUSE_NR exhausted\n");
3054 return;
3055 }
3056
3057 set_bit(prot->inuse_idx, proto_inuse_idx);
3058}
3059
3060static void release_proto_idx(struct proto *prot)
3061{
3062 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3063 clear_bit(prot->inuse_idx, proto_inuse_idx);
3064}
3065#else
3066static inline void assign_proto_idx(struct proto *prot)
3067{
3068}
3069
3070static inline void release_proto_idx(struct proto *prot)
3071{
3072}
3073#endif
3074
3075static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3076{
3077 if (!rsk_prot)
3078 return;
3079 kfree(rsk_prot->slab_name);
3080 rsk_prot->slab_name = NULL;
3081 kmem_cache_destroy(rsk_prot->slab);
3082 rsk_prot->slab = NULL;
3083}
3084
3085static int req_prot_init(const struct proto *prot)
3086{
3087 struct request_sock_ops *rsk_prot = prot->rsk_prot;
3088
3089 if (!rsk_prot)
3090 return 0;
3091
3092 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3093 prot->name);
3094 if (!rsk_prot->slab_name)
3095 return -ENOMEM;
3096
3097 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3098 rsk_prot->obj_size, 0,
3099 prot->slab_flags, NULL);
3100
3101 if (!rsk_prot->slab) {
3102 pr_crit("%s: Can't create request sock SLAB cache!\n",
3103 prot->name);
3104 return -ENOMEM;
3105 }
3106 return 0;
3107}
3108
3109int proto_register(struct proto *prot, int alloc_slab)
3110{
3111 if (alloc_slab) {
3112 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
3113 SLAB_HWCACHE_ALIGN | prot->slab_flags,
3114 NULL);
3115
3116 if (prot->slab == NULL) {
3117 pr_crit("%s: Can't create sock SLAB cache!\n",
3118 prot->name);
3119 goto out;
3120 }
3121
3122 if (req_prot_init(prot))
3123 goto out_free_request_sock_slab;
3124
3125 if (prot->twsk_prot != NULL) {
3126 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3127
3128 if (prot->twsk_prot->twsk_slab_name == NULL)
3129 goto out_free_request_sock_slab;
3130
3131 prot->twsk_prot->twsk_slab =
3132 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3133 prot->twsk_prot->twsk_obj_size,
3134 0,
3135 prot->slab_flags,
3136 NULL);
3137 if (prot->twsk_prot->twsk_slab == NULL)
3138 goto out_free_timewait_sock_slab_name;
3139 }
3140 }
3141
3142 mutex_lock(&proto_list_mutex);
3143 list_add(&prot->node, &proto_list);
3144 assign_proto_idx(prot);
3145 mutex_unlock(&proto_list_mutex);
3146 return 0;
3147
3148out_free_timewait_sock_slab_name:
3149 kfree(prot->twsk_prot->twsk_slab_name);
3150out_free_request_sock_slab:
3151 req_prot_cleanup(prot->rsk_prot);
3152
3153 kmem_cache_destroy(prot->slab);
3154 prot->slab = NULL;
3155out:
3156 return -ENOBUFS;
3157}
3158EXPORT_SYMBOL(proto_register);
3159
3160void proto_unregister(struct proto *prot)
3161{
3162 mutex_lock(&proto_list_mutex);
3163 release_proto_idx(prot);
3164 list_del(&prot->node);
3165 mutex_unlock(&proto_list_mutex);
3166
3167 kmem_cache_destroy(prot->slab);
3168 prot->slab = NULL;
3169
3170 req_prot_cleanup(prot->rsk_prot);
3171
3172 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3173 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3174 kfree(prot->twsk_prot->twsk_slab_name);
3175 prot->twsk_prot->twsk_slab = NULL;
3176 }
3177}
3178EXPORT_SYMBOL(proto_unregister);
3179
3180#ifdef CONFIG_PROC_FS
3181static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3182 __acquires(proto_list_mutex)
3183{
3184 mutex_lock(&proto_list_mutex);
3185 return seq_list_start_head(&proto_list, *pos);
3186}
3187
3188static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3189{
3190 return seq_list_next(v, &proto_list, pos);
3191}
3192
3193static void proto_seq_stop(struct seq_file *seq, void *v)
3194 __releases(proto_list_mutex)
3195{
3196 mutex_unlock(&proto_list_mutex);
3197}
3198
3199static char proto_method_implemented(const void *method)
3200{
3201 return method == NULL ? 'n' : 'y';
3202}
3203static long sock_prot_memory_allocated(struct proto *proto)
3204{
3205 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3206}
3207
3208static char *sock_prot_memory_pressure(struct proto *proto)
3209{
3210 return proto->memory_pressure != NULL ?
3211 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3212}
3213
3214static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3215{
3216
3217 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
3218 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3219 proto->name,
3220 proto->obj_size,
3221 sock_prot_inuse_get(seq_file_net(seq), proto),
3222 sock_prot_memory_allocated(proto),
3223 sock_prot_memory_pressure(proto),
3224 proto->max_header,
3225 proto->slab == NULL ? "no" : "yes",
3226 module_name(proto->owner),
3227 proto_method_implemented(proto->close),
3228 proto_method_implemented(proto->connect),
3229 proto_method_implemented(proto->disconnect),
3230 proto_method_implemented(proto->accept),
3231 proto_method_implemented(proto->ioctl),
3232 proto_method_implemented(proto->init),
3233 proto_method_implemented(proto->destroy),
3234 proto_method_implemented(proto->shutdown),
3235 proto_method_implemented(proto->setsockopt),
3236 proto_method_implemented(proto->getsockopt),
3237 proto_method_implemented(proto->sendmsg),
3238 proto_method_implemented(proto->recvmsg),
3239 proto_method_implemented(proto->sendpage),
3240 proto_method_implemented(proto->bind),
3241 proto_method_implemented(proto->backlog_rcv),
3242 proto_method_implemented(proto->hash),
3243 proto_method_implemented(proto->unhash),
3244 proto_method_implemented(proto->get_port),
3245 proto_method_implemented(proto->enter_memory_pressure));
3246}
3247
3248static int proto_seq_show(struct seq_file *seq, void *v)
3249{
3250 if (v == &proto_list)
3251 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3252 "protocol",
3253 "size",
3254 "sockets",
3255 "memory",
3256 "press",
3257 "maxhdr",
3258 "slab",
3259 "module",
3260 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3261 else
3262 proto_seq_printf(seq, list_entry(v, struct proto, node));
3263 return 0;
3264}
3265
3266static const struct seq_operations proto_seq_ops = {
3267 .start = proto_seq_start,
3268 .next = proto_seq_next,
3269 .stop = proto_seq_stop,
3270 .show = proto_seq_show,
3271};
3272
3273static int proto_seq_open(struct inode *inode, struct file *file)
3274{
3275 return seq_open_net(inode, file, &proto_seq_ops,
3276 sizeof(struct seq_net_private));
3277}
3278
3279static const struct file_operations proto_seq_fops = {
3280 .owner = THIS_MODULE,
3281 .open = proto_seq_open,
3282 .read = seq_read,
3283 .llseek = seq_lseek,
3284 .release = seq_release_net,
3285};
3286
3287static __net_init int proto_init_net(struct net *net)
3288{
3289 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3290 return -ENOMEM;
3291
3292 return 0;
3293}
3294
3295static __net_exit void proto_exit_net(struct net *net)
3296{
3297 remove_proc_entry("protocols", net->proc_net);
3298}
3299
3300
3301static __net_initdata struct pernet_operations proto_net_ops = {
3302 .init = proto_init_net,
3303 .exit = proto_exit_net,
3304};
3305
3306static int __init proto_init(void)
3307{
3308 return register_pernet_subsys(&proto_net_ops);
3309}
3310
3311subsys_initcall(proto_init);
3312
3313#endif
3314
3315#ifdef CONFIG_NET_RX_BUSY_POLL
3316bool sk_busy_loop_end(void *p, unsigned long start_time)
3317{
3318 struct sock *sk = p;
3319
3320 return !skb_queue_empty(&sk->sk_receive_queue) ||
3321 sk_busy_loop_timeout(sk, start_time);
3322}
3323EXPORT_SYMBOL(sk_busy_loop_end);
3324#endif
3325