1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/errqueue.h>
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/sched/mm.h>
106#include <linux/timer.h>
107#include <linux/string.h>
108#include <linux/sockios.h>
109#include <linux/net.h>
110#include <linux/mm.h>
111#include <linux/slab.h>
112#include <linux/interrupt.h>
113#include <linux/poll.h>
114#include <linux/tcp.h>
115#include <linux/init.h>
116#include <linux/highmem.h>
117#include <linux/user_namespace.h>
118#include <linux/static_key.h>
119#include <linux/memcontrol.h>
120#include <linux/prefetch.h>
121
122#include <linux/uaccess.h>
123
124#include <linux/netdevice.h>
125#include <net/protocol.h>
126#include <linux/skbuff.h>
127#include <net/net_namespace.h>
128#include <net/request_sock.h>
129#include <net/sock.h>
130#include <linux/net_tstamp.h>
131#include <net/xfrm.h>
132#include <linux/ipsec.h>
133#include <net/cls_cgroup.h>
134#include <net/netprio_cgroup.h>
135#include <linux/sock_diag.h>
136
137#include <linux/filter.h>
138#include <net/sock_reuseport.h>
139
140#include <trace/events/sock.h>
141
142#include <net/tcp.h>
143#include <net/busy_poll.h>
144
145static DEFINE_MUTEX(proto_list_mutex);
146static LIST_HEAD(proto_list);
147
148static void sock_inuse_add(struct net *net, int val);
149
150
151
152
153
154
155
156
157
158
159
160bool sk_ns_capable(const struct sock *sk,
161 struct user_namespace *user_ns, int cap)
162{
163 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
164 ns_capable(user_ns, cap);
165}
166EXPORT_SYMBOL(sk_ns_capable);
167
168
169
170
171
172
173
174
175
176
177bool sk_capable(const struct sock *sk, int cap)
178{
179 return sk_ns_capable(sk, &init_user_ns, cap);
180}
181EXPORT_SYMBOL(sk_capable);
182
183
184
185
186
187
188
189
190
191
192bool sk_net_capable(const struct sock *sk, int cap)
193{
194 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
195}
196EXPORT_SYMBOL(sk_net_capable);
197
198
199
200
201
202
203static struct lock_class_key af_family_keys[AF_MAX];
204static struct lock_class_key af_family_kern_keys[AF_MAX];
205static struct lock_class_key af_family_slock_keys[AF_MAX];
206static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
207
208
209
210
211
212
213
214#define _sock_locks(x) \
215 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
216 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
217 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
218 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
219 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
220 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
221 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
222 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
223 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
224 x "27" , x "28" , x "AF_CAN" , \
225 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
226 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
227 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
228 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
229 x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
230 x "AF_MAX"
231
232static const char *const af_family_key_strings[AF_MAX+1] = {
233 _sock_locks("sk_lock-")
234};
235static const char *const af_family_slock_key_strings[AF_MAX+1] = {
236 _sock_locks("slock-")
237};
238static const char *const af_family_clock_key_strings[AF_MAX+1] = {
239 _sock_locks("clock-")
240};
241
242static const char *const af_family_kern_key_strings[AF_MAX+1] = {
243 _sock_locks("k-sk_lock-")
244};
245static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
246 _sock_locks("k-slock-")
247};
248static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
249 _sock_locks("k-clock-")
250};
251static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
252 "rlock-AF_UNSPEC", "rlock-AF_UNIX" , "rlock-AF_INET" ,
253 "rlock-AF_AX25" , "rlock-AF_IPX" , "rlock-AF_APPLETALK",
254 "rlock-AF_NETROM", "rlock-AF_BRIDGE" , "rlock-AF_ATMPVC" ,
255 "rlock-AF_X25" , "rlock-AF_INET6" , "rlock-AF_ROSE" ,
256 "rlock-AF_DECnet", "rlock-AF_NETBEUI" , "rlock-AF_SECURITY" ,
257 "rlock-AF_KEY" , "rlock-AF_NETLINK" , "rlock-AF_PACKET" ,
258 "rlock-AF_ASH" , "rlock-AF_ECONET" , "rlock-AF_ATMSVC" ,
259 "rlock-AF_RDS" , "rlock-AF_SNA" , "rlock-AF_IRDA" ,
260 "rlock-AF_PPPOX" , "rlock-AF_WANPIPE" , "rlock-AF_LLC" ,
261 "rlock-27" , "rlock-28" , "rlock-AF_CAN" ,
262 "rlock-AF_TIPC" , "rlock-AF_BLUETOOTH", "rlock-AF_IUCV" ,
263 "rlock-AF_RXRPC" , "rlock-AF_ISDN" , "rlock-AF_PHONET" ,
264 "rlock-AF_IEEE802154", "rlock-AF_CAIF" , "rlock-AF_ALG" ,
265 "rlock-AF_NFC" , "rlock-AF_VSOCK" , "rlock-AF_KCM" ,
266 "rlock-AF_QIPCRTR", "rlock-AF_SMC" , "rlock-AF_XDP" ,
267 "rlock-AF_MAX"
268};
269static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
270 "wlock-AF_UNSPEC", "wlock-AF_UNIX" , "wlock-AF_INET" ,
271 "wlock-AF_AX25" , "wlock-AF_IPX" , "wlock-AF_APPLETALK",
272 "wlock-AF_NETROM", "wlock-AF_BRIDGE" , "wlock-AF_ATMPVC" ,
273 "wlock-AF_X25" , "wlock-AF_INET6" , "wlock-AF_ROSE" ,
274 "wlock-AF_DECnet", "wlock-AF_NETBEUI" , "wlock-AF_SECURITY" ,
275 "wlock-AF_KEY" , "wlock-AF_NETLINK" , "wlock-AF_PACKET" ,
276 "wlock-AF_ASH" , "wlock-AF_ECONET" , "wlock-AF_ATMSVC" ,
277 "wlock-AF_RDS" , "wlock-AF_SNA" , "wlock-AF_IRDA" ,
278 "wlock-AF_PPPOX" , "wlock-AF_WANPIPE" , "wlock-AF_LLC" ,
279 "wlock-27" , "wlock-28" , "wlock-AF_CAN" ,
280 "wlock-AF_TIPC" , "wlock-AF_BLUETOOTH", "wlock-AF_IUCV" ,
281 "wlock-AF_RXRPC" , "wlock-AF_ISDN" , "wlock-AF_PHONET" ,
282 "wlock-AF_IEEE802154", "wlock-AF_CAIF" , "wlock-AF_ALG" ,
283 "wlock-AF_NFC" , "wlock-AF_VSOCK" , "wlock-AF_KCM" ,
284 "wlock-AF_QIPCRTR", "wlock-AF_SMC" , "wlock-AF_XDP" ,
285 "wlock-AF_MAX"
286};
287static const char *const af_family_elock_key_strings[AF_MAX+1] = {
288 "elock-AF_UNSPEC", "elock-AF_UNIX" , "elock-AF_INET" ,
289 "elock-AF_AX25" , "elock-AF_IPX" , "elock-AF_APPLETALK",
290 "elock-AF_NETROM", "elock-AF_BRIDGE" , "elock-AF_ATMPVC" ,
291 "elock-AF_X25" , "elock-AF_INET6" , "elock-AF_ROSE" ,
292 "elock-AF_DECnet", "elock-AF_NETBEUI" , "elock-AF_SECURITY" ,
293 "elock-AF_KEY" , "elock-AF_NETLINK" , "elock-AF_PACKET" ,
294 "elock-AF_ASH" , "elock-AF_ECONET" , "elock-AF_ATMSVC" ,
295 "elock-AF_RDS" , "elock-AF_SNA" , "elock-AF_IRDA" ,
296 "elock-AF_PPPOX" , "elock-AF_WANPIPE" , "elock-AF_LLC" ,
297 "elock-27" , "elock-28" , "elock-AF_CAN" ,
298 "elock-AF_TIPC" , "elock-AF_BLUETOOTH", "elock-AF_IUCV" ,
299 "elock-AF_RXRPC" , "elock-AF_ISDN" , "elock-AF_PHONET" ,
300 "elock-AF_IEEE802154", "elock-AF_CAIF" , "elock-AF_ALG" ,
301 "elock-AF_NFC" , "elock-AF_VSOCK" , "elock-AF_KCM" ,
302 "elock-AF_QIPCRTR", "elock-AF_SMC" , "elock-AF_XDP" ,
303 "elock-AF_MAX"
304};
305
306
307
308
309
310static struct lock_class_key af_callback_keys[AF_MAX];
311static struct lock_class_key af_rlock_keys[AF_MAX];
312static struct lock_class_key af_wlock_keys[AF_MAX];
313static struct lock_class_key af_elock_keys[AF_MAX];
314static struct lock_class_key af_kern_callback_keys[AF_MAX];
315
316
317__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
318EXPORT_SYMBOL(sysctl_wmem_max);
319__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
320EXPORT_SYMBOL(sysctl_rmem_max);
321__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
322__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
323
324
325int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
326EXPORT_SYMBOL(sysctl_optmem_max);
327
328int sysctl_tstamp_allow_data __read_mostly = 1;
329
330DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
331EXPORT_SYMBOL_GPL(memalloc_socks_key);
332
333
334
335
336
337
338
339
340
341void sk_set_memalloc(struct sock *sk)
342{
343 sock_set_flag(sk, SOCK_MEMALLOC);
344 sk->sk_allocation |= __GFP_MEMALLOC;
345 static_branch_inc(&memalloc_socks_key);
346}
347EXPORT_SYMBOL_GPL(sk_set_memalloc);
348
349void sk_clear_memalloc(struct sock *sk)
350{
351 sock_reset_flag(sk, SOCK_MEMALLOC);
352 sk->sk_allocation &= ~__GFP_MEMALLOC;
353 static_branch_dec(&memalloc_socks_key);
354
355
356
357
358
359
360
361
362 sk_mem_reclaim(sk);
363}
364EXPORT_SYMBOL_GPL(sk_clear_memalloc);
365
366int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
367{
368 int ret;
369 unsigned int noreclaim_flag;
370
371
372 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
373
374 noreclaim_flag = memalloc_noreclaim_save();
375 ret = sk->sk_backlog_rcv(sk, skb);
376 memalloc_noreclaim_restore(noreclaim_flag);
377
378 return ret;
379}
380EXPORT_SYMBOL(__sk_backlog_rcv);
381
382static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
383{
384 struct timeval tv;
385
386 if (optlen < sizeof(tv))
387 return -EINVAL;
388 if (copy_from_user(&tv, optval, sizeof(tv)))
389 return -EFAULT;
390 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
391 return -EDOM;
392
393 if (tv.tv_sec < 0) {
394 static int warned __read_mostly;
395
396 *timeo_p = 0;
397 if (warned < 10 && net_ratelimit()) {
398 warned++;
399 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
400 __func__, current->comm, task_pid_nr(current));
401 }
402 return 0;
403 }
404 *timeo_p = MAX_SCHEDULE_TIMEOUT;
405 if (tv.tv_sec == 0 && tv.tv_usec == 0)
406 return 0;
407 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
408 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC / HZ);
409 return 0;
410}
411
412static void sock_warn_obsolete_bsdism(const char *name)
413{
414 static int warned;
415 static char warncomm[TASK_COMM_LEN];
416 if (strcmp(warncomm, current->comm) && warned < 5) {
417 strcpy(warncomm, current->comm);
418 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
419 warncomm, name);
420 warned++;
421 }
422}
423
424static bool sock_needs_netstamp(const struct sock *sk)
425{
426 switch (sk->sk_family) {
427 case AF_UNSPEC:
428 case AF_UNIX:
429 return false;
430 default:
431 return true;
432 }
433}
434
435static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
436{
437 if (sk->sk_flags & flags) {
438 sk->sk_flags &= ~flags;
439 if (sock_needs_netstamp(sk) &&
440 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
441 net_disable_timestamp();
442 }
443}
444
445
446int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
447{
448 unsigned long flags;
449 struct sk_buff_head *list = &sk->sk_receive_queue;
450
451 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
452 atomic_inc(&sk->sk_drops);
453 trace_sock_rcvqueue_full(sk, skb);
454 return -ENOMEM;
455 }
456
457 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
458 atomic_inc(&sk->sk_drops);
459 return -ENOBUFS;
460 }
461
462 skb->dev = NULL;
463 skb_set_owner_r(skb, sk);
464
465
466
467
468 skb_dst_force(skb);
469
470 spin_lock_irqsave(&list->lock, flags);
471 sock_skb_set_dropcount(sk, skb);
472 __skb_queue_tail(list, skb);
473 spin_unlock_irqrestore(&list->lock, flags);
474
475 if (!sock_flag(sk, SOCK_DEAD))
476 sk->sk_data_ready(sk);
477 return 0;
478}
479EXPORT_SYMBOL(__sock_queue_rcv_skb);
480
481int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
482{
483 int err;
484
485 err = sk_filter(sk, skb);
486 if (err)
487 return err;
488
489 return __sock_queue_rcv_skb(sk, skb);
490}
491EXPORT_SYMBOL(sock_queue_rcv_skb);
492
493int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
494 const int nested, unsigned int trim_cap, bool refcounted)
495{
496 int rc = NET_RX_SUCCESS;
497
498 if (sk_filter_trim_cap(sk, skb, trim_cap))
499 goto discard_and_relse;
500
501 skb->dev = NULL;
502
503 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
504 atomic_inc(&sk->sk_drops);
505 goto discard_and_relse;
506 }
507 if (nested)
508 bh_lock_sock_nested(sk);
509 else
510 bh_lock_sock(sk);
511 if (!sock_owned_by_user(sk)) {
512
513
514
515 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
516
517 rc = sk_backlog_rcv(sk, skb);
518
519 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
520 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
521 bh_unlock_sock(sk);
522 atomic_inc(&sk->sk_drops);
523 goto discard_and_relse;
524 }
525
526 bh_unlock_sock(sk);
527out:
528 if (refcounted)
529 sock_put(sk);
530 return rc;
531discard_and_relse:
532 kfree_skb(skb);
533 goto out;
534}
535EXPORT_SYMBOL(__sk_receive_skb);
536
537struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
538{
539 struct dst_entry *dst = __sk_dst_get(sk);
540
541 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
542 sk_tx_queue_clear(sk);
543 sk->sk_dst_pending_confirm = 0;
544 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
545 dst_release(dst);
546 return NULL;
547 }
548
549 return dst;
550}
551EXPORT_SYMBOL(__sk_dst_check);
552
553struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
554{
555 struct dst_entry *dst = sk_dst_get(sk);
556
557 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
558 sk_dst_reset(sk);
559 dst_release(dst);
560 return NULL;
561 }
562
563 return dst;
564}
565EXPORT_SYMBOL(sk_dst_check);
566
567static int sock_setbindtodevice(struct sock *sk, char __user *optval,
568 int optlen)
569{
570 int ret = -ENOPROTOOPT;
571#ifdef CONFIG_NETDEVICES
572 struct net *net = sock_net(sk);
573 char devname[IFNAMSIZ];
574 int index;
575
576
577 ret = -EPERM;
578 if (!ns_capable(net->user_ns, CAP_NET_RAW))
579 goto out;
580
581 ret = -EINVAL;
582 if (optlen < 0)
583 goto out;
584
585
586
587
588
589
590 if (optlen > IFNAMSIZ - 1)
591 optlen = IFNAMSIZ - 1;
592 memset(devname, 0, sizeof(devname));
593
594 ret = -EFAULT;
595 if (copy_from_user(devname, optval, optlen))
596 goto out;
597
598 index = 0;
599 if (devname[0] != '\0') {
600 struct net_device *dev;
601
602 rcu_read_lock();
603 dev = dev_get_by_name_rcu(net, devname);
604 if (dev)
605 index = dev->ifindex;
606 rcu_read_unlock();
607 ret = -ENODEV;
608 if (!dev)
609 goto out;
610 }
611
612 lock_sock(sk);
613 sk->sk_bound_dev_if = index;
614 sk_dst_reset(sk);
615 release_sock(sk);
616
617 ret = 0;
618
619out:
620#endif
621
622 return ret;
623}
624
625static int sock_getbindtodevice(struct sock *sk, char __user *optval,
626 int __user *optlen, int len)
627{
628 int ret = -ENOPROTOOPT;
629#ifdef CONFIG_NETDEVICES
630 struct net *net = sock_net(sk);
631 char devname[IFNAMSIZ];
632
633 if (sk->sk_bound_dev_if == 0) {
634 len = 0;
635 goto zero;
636 }
637
638 ret = -EINVAL;
639 if (len < IFNAMSIZ)
640 goto out;
641
642 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
643 if (ret)
644 goto out;
645
646 len = strlen(devname) + 1;
647
648 ret = -EFAULT;
649 if (copy_to_user(optval, devname, len))
650 goto out;
651
652zero:
653 ret = -EFAULT;
654 if (put_user(len, optlen))
655 goto out;
656
657 ret = 0;
658
659out:
660#endif
661
662 return ret;
663}
664
665static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
666{
667 if (valbool)
668 sock_set_flag(sk, bit);
669 else
670 sock_reset_flag(sk, bit);
671}
672
673bool sk_mc_loop(struct sock *sk)
674{
675 if (dev_recursion_level())
676 return false;
677 if (!sk)
678 return true;
679 switch (sk->sk_family) {
680 case AF_INET:
681 return inet_sk(sk)->mc_loop;
682#if IS_ENABLED(CONFIG_IPV6)
683 case AF_INET6:
684 return inet6_sk(sk)->mc_loop;
685#endif
686 }
687 WARN_ON(1);
688 return true;
689}
690EXPORT_SYMBOL(sk_mc_loop);
691
692
693
694
695
696
697int sock_setsockopt(struct socket *sock, int level, int optname,
698 char __user *optval, unsigned int optlen)
699{
700 struct sock *sk = sock->sk;
701 int val;
702 int valbool;
703 struct linger ling;
704 int ret = 0;
705
706
707
708
709
710 if (optname == SO_BINDTODEVICE)
711 return sock_setbindtodevice(sk, optval, optlen);
712
713 if (optlen < sizeof(int))
714 return -EINVAL;
715
716 if (get_user(val, (int __user *)optval))
717 return -EFAULT;
718
719 valbool = val ? 1 : 0;
720
721 lock_sock(sk);
722
723 switch (optname) {
724 case SO_DEBUG:
725 if (val && !capable(CAP_NET_ADMIN))
726 ret = -EACCES;
727 else
728 sock_valbool_flag(sk, SOCK_DBG, valbool);
729 break;
730 case SO_REUSEADDR:
731 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
732 break;
733 case SO_REUSEPORT:
734 sk->sk_reuseport = valbool;
735 break;
736 case SO_TYPE:
737 case SO_PROTOCOL:
738 case SO_DOMAIN:
739 case SO_ERROR:
740 ret = -ENOPROTOOPT;
741 break;
742 case SO_DONTROUTE:
743 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
744 break;
745 case SO_BROADCAST:
746 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
747 break;
748 case SO_SNDBUF:
749
750
751
752
753
754 val = min_t(u32, val, sysctl_wmem_max);
755set_sndbuf:
756 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
757 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
758
759 sk->sk_write_space(sk);
760 break;
761
762 case SO_SNDBUFFORCE:
763 if (!capable(CAP_NET_ADMIN)) {
764 ret = -EPERM;
765 break;
766 }
767 goto set_sndbuf;
768
769 case SO_RCVBUF:
770
771
772
773
774
775 val = min_t(u32, val, sysctl_rmem_max);
776set_rcvbuf:
777 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
794 break;
795
796 case SO_RCVBUFFORCE:
797 if (!capable(CAP_NET_ADMIN)) {
798 ret = -EPERM;
799 break;
800 }
801 goto set_rcvbuf;
802
803 case SO_KEEPALIVE:
804 if (sk->sk_prot->keepalive)
805 sk->sk_prot->keepalive(sk, valbool);
806 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
807 break;
808
809 case SO_OOBINLINE:
810 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
811 break;
812
813 case SO_NO_CHECK:
814 sk->sk_no_check_tx = valbool;
815 break;
816
817 case SO_PRIORITY:
818 if ((val >= 0 && val <= 6) ||
819 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
820 sk->sk_priority = val;
821 else
822 ret = -EPERM;
823 break;
824
825 case SO_LINGER:
826 if (optlen < sizeof(ling)) {
827 ret = -EINVAL;
828 break;
829 }
830 if (copy_from_user(&ling, optval, sizeof(ling))) {
831 ret = -EFAULT;
832 break;
833 }
834 if (!ling.l_onoff)
835 sock_reset_flag(sk, SOCK_LINGER);
836 else {
837#if (BITS_PER_LONG == 32)
838 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
839 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
840 else
841#endif
842 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
843 sock_set_flag(sk, SOCK_LINGER);
844 }
845 break;
846
847 case SO_BSDCOMPAT:
848 sock_warn_obsolete_bsdism("setsockopt");
849 break;
850
851 case SO_PASSCRED:
852 if (valbool)
853 set_bit(SOCK_PASSCRED, &sock->flags);
854 else
855 clear_bit(SOCK_PASSCRED, &sock->flags);
856 break;
857
858 case SO_TIMESTAMP:
859 case SO_TIMESTAMPNS:
860 if (valbool) {
861 if (optname == SO_TIMESTAMP)
862 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
863 else
864 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
865 sock_set_flag(sk, SOCK_RCVTSTAMP);
866 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
867 } else {
868 sock_reset_flag(sk, SOCK_RCVTSTAMP);
869 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
870 }
871 break;
872
873 case SO_TIMESTAMPING:
874 if (val & ~SOF_TIMESTAMPING_MASK) {
875 ret = -EINVAL;
876 break;
877 }
878
879 if (val & SOF_TIMESTAMPING_OPT_ID &&
880 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
881 if (sk->sk_protocol == IPPROTO_TCP &&
882 sk->sk_type == SOCK_STREAM) {
883 if ((1 << sk->sk_state) &
884 (TCPF_CLOSE | TCPF_LISTEN)) {
885 ret = -EINVAL;
886 break;
887 }
888 sk->sk_tskey = tcp_sk(sk)->snd_una;
889 } else {
890 sk->sk_tskey = 0;
891 }
892 }
893
894 if (val & SOF_TIMESTAMPING_OPT_STATS &&
895 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
896 ret = -EINVAL;
897 break;
898 }
899
900 sk->sk_tsflags = val;
901 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
902 sock_enable_timestamp(sk,
903 SOCK_TIMESTAMPING_RX_SOFTWARE);
904 else
905 sock_disable_timestamp(sk,
906 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
907 break;
908
909 case SO_RCVLOWAT:
910 if (val < 0)
911 val = INT_MAX;
912 if (sock->ops->set_rcvlowat)
913 ret = sock->ops->set_rcvlowat(sk, val);
914 else
915 sk->sk_rcvlowat = val ? : 1;
916 break;
917
918 case SO_RCVTIMEO:
919 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
920 break;
921
922 case SO_SNDTIMEO:
923 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
924 break;
925
926 case SO_ATTACH_FILTER:
927 ret = -EINVAL;
928 if (optlen == sizeof(struct sock_fprog)) {
929 struct sock_fprog fprog;
930
931 ret = -EFAULT;
932 if (copy_from_user(&fprog, optval, sizeof(fprog)))
933 break;
934
935 ret = sk_attach_filter(&fprog, sk);
936 }
937 break;
938
939 case SO_ATTACH_BPF:
940 ret = -EINVAL;
941 if (optlen == sizeof(u32)) {
942 u32 ufd;
943
944 ret = -EFAULT;
945 if (copy_from_user(&ufd, optval, sizeof(ufd)))
946 break;
947
948 ret = sk_attach_bpf(ufd, sk);
949 }
950 break;
951
952 case SO_ATTACH_REUSEPORT_CBPF:
953 ret = -EINVAL;
954 if (optlen == sizeof(struct sock_fprog)) {
955 struct sock_fprog fprog;
956
957 ret = -EFAULT;
958 if (copy_from_user(&fprog, optval, sizeof(fprog)))
959 break;
960
961 ret = sk_reuseport_attach_filter(&fprog, sk);
962 }
963 break;
964
965 case SO_ATTACH_REUSEPORT_EBPF:
966 ret = -EINVAL;
967 if (optlen == sizeof(u32)) {
968 u32 ufd;
969
970 ret = -EFAULT;
971 if (copy_from_user(&ufd, optval, sizeof(ufd)))
972 break;
973
974 ret = sk_reuseport_attach_bpf(ufd, sk);
975 }
976 break;
977
978 case SO_DETACH_FILTER:
979 ret = sk_detach_filter(sk);
980 break;
981
982 case SO_LOCK_FILTER:
983 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
984 ret = -EPERM;
985 else
986 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
987 break;
988
989 case SO_PASSSEC:
990 if (valbool)
991 set_bit(SOCK_PASSSEC, &sock->flags);
992 else
993 clear_bit(SOCK_PASSSEC, &sock->flags);
994 break;
995 case SO_MARK:
996 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
997 ret = -EPERM;
998 else
999 sk->sk_mark = val;
1000 break;
1001
1002 case SO_RXQ_OVFL:
1003 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1004 break;
1005
1006 case SO_WIFI_STATUS:
1007 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1008 break;
1009
1010 case SO_PEEK_OFF:
1011 if (sock->ops->set_peek_off)
1012 ret = sock->ops->set_peek_off(sk, val);
1013 else
1014 ret = -EOPNOTSUPP;
1015 break;
1016
1017 case SO_NOFCS:
1018 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1019 break;
1020
1021 case SO_SELECT_ERR_QUEUE:
1022 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1023 break;
1024
1025#ifdef CONFIG_NET_RX_BUSY_POLL
1026 case SO_BUSY_POLL:
1027
1028 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1029 ret = -EPERM;
1030 else {
1031 if (val < 0)
1032 ret = -EINVAL;
1033 else
1034 sk->sk_ll_usec = val;
1035 }
1036 break;
1037#endif
1038
1039 case SO_MAX_PACING_RATE:
1040 if (val != ~0U)
1041 cmpxchg(&sk->sk_pacing_status,
1042 SK_PACING_NONE,
1043 SK_PACING_NEEDED);
1044 sk->sk_max_pacing_rate = val;
1045 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1046 sk->sk_max_pacing_rate);
1047 break;
1048
1049 case SO_INCOMING_CPU:
1050 sk->sk_incoming_cpu = val;
1051 break;
1052
1053 case SO_CNX_ADVICE:
1054 if (val == 1)
1055 dst_negative_advice(sk);
1056 break;
1057
1058 case SO_ZEROCOPY:
1059 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1060 if (sk->sk_protocol != IPPROTO_TCP)
1061 ret = -ENOTSUPP;
1062 } else if (sk->sk_family != PF_RDS) {
1063 ret = -ENOTSUPP;
1064 }
1065 if (!ret) {
1066 if (val < 0 || val > 1)
1067 ret = -EINVAL;
1068 else
1069 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1070 }
1071 break;
1072
1073 default:
1074 ret = -ENOPROTOOPT;
1075 break;
1076 }
1077 release_sock(sk);
1078 return ret;
1079}
1080EXPORT_SYMBOL(sock_setsockopt);
1081
1082
1083static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1084 struct ucred *ucred)
1085{
1086 ucred->pid = pid_vnr(pid);
1087 ucred->uid = ucred->gid = -1;
1088 if (cred) {
1089 struct user_namespace *current_ns = current_user_ns();
1090
1091 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1092 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1093 }
1094}
1095
1096static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1097{
1098 struct user_namespace *user_ns = current_user_ns();
1099 int i;
1100
1101 for (i = 0; i < src->ngroups; i++)
1102 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1103 return -EFAULT;
1104
1105 return 0;
1106}
1107
1108int sock_getsockopt(struct socket *sock, int level, int optname,
1109 char __user *optval, int __user *optlen)
1110{
1111 struct sock *sk = sock->sk;
1112
1113 union {
1114 int val;
1115 u64 val64;
1116 struct linger ling;
1117 struct timeval tm;
1118 } v;
1119
1120 int lv = sizeof(int);
1121 int len;
1122
1123 if (get_user(len, optlen))
1124 return -EFAULT;
1125 if (len < 0)
1126 return -EINVAL;
1127
1128 memset(&v, 0, sizeof(v));
1129
1130 switch (optname) {
1131 case SO_DEBUG:
1132 v.val = sock_flag(sk, SOCK_DBG);
1133 break;
1134
1135 case SO_DONTROUTE:
1136 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1137 break;
1138
1139 case SO_BROADCAST:
1140 v.val = sock_flag(sk, SOCK_BROADCAST);
1141 break;
1142
1143 case SO_SNDBUF:
1144 v.val = sk->sk_sndbuf;
1145 break;
1146
1147 case SO_RCVBUF:
1148 v.val = sk->sk_rcvbuf;
1149 break;
1150
1151 case SO_REUSEADDR:
1152 v.val = sk->sk_reuse;
1153 break;
1154
1155 case SO_REUSEPORT:
1156 v.val = sk->sk_reuseport;
1157 break;
1158
1159 case SO_KEEPALIVE:
1160 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1161 break;
1162
1163 case SO_TYPE:
1164 v.val = sk->sk_type;
1165 break;
1166
1167 case SO_PROTOCOL:
1168 v.val = sk->sk_protocol;
1169 break;
1170
1171 case SO_DOMAIN:
1172 v.val = sk->sk_family;
1173 break;
1174
1175 case SO_ERROR:
1176 v.val = -sock_error(sk);
1177 if (v.val == 0)
1178 v.val = xchg(&sk->sk_err_soft, 0);
1179 break;
1180
1181 case SO_OOBINLINE:
1182 v.val = sock_flag(sk, SOCK_URGINLINE);
1183 break;
1184
1185 case SO_NO_CHECK:
1186 v.val = sk->sk_no_check_tx;
1187 break;
1188
1189 case SO_PRIORITY:
1190 v.val = sk->sk_priority;
1191 break;
1192
1193 case SO_LINGER:
1194 lv = sizeof(v.ling);
1195 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1196 v.ling.l_linger = sk->sk_lingertime / HZ;
1197 break;
1198
1199 case SO_BSDCOMPAT:
1200 sock_warn_obsolete_bsdism("getsockopt");
1201 break;
1202
1203 case SO_TIMESTAMP:
1204 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1205 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1206 break;
1207
1208 case SO_TIMESTAMPNS:
1209 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1210 break;
1211
1212 case SO_TIMESTAMPING:
1213 v.val = sk->sk_tsflags;
1214 break;
1215
1216 case SO_RCVTIMEO:
1217 lv = sizeof(struct timeval);
1218 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1219 v.tm.tv_sec = 0;
1220 v.tm.tv_usec = 0;
1221 } else {
1222 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1223 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * USEC_PER_SEC) / HZ;
1224 }
1225 break;
1226
1227 case SO_SNDTIMEO:
1228 lv = sizeof(struct timeval);
1229 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1230 v.tm.tv_sec = 0;
1231 v.tm.tv_usec = 0;
1232 } else {
1233 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1234 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * USEC_PER_SEC) / HZ;
1235 }
1236 break;
1237
1238 case SO_RCVLOWAT:
1239 v.val = sk->sk_rcvlowat;
1240 break;
1241
1242 case SO_SNDLOWAT:
1243 v.val = 1;
1244 break;
1245
1246 case SO_PASSCRED:
1247 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1248 break;
1249
1250 case SO_PEERCRED:
1251 {
1252 struct ucred peercred;
1253 if (len > sizeof(peercred))
1254 len = sizeof(peercred);
1255 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1256 if (copy_to_user(optval, &peercred, len))
1257 return -EFAULT;
1258 goto lenout;
1259 }
1260
1261 case SO_PEERGROUPS:
1262 {
1263 int ret, n;
1264
1265 if (!sk->sk_peer_cred)
1266 return -ENODATA;
1267
1268 n = sk->sk_peer_cred->group_info->ngroups;
1269 if (len < n * sizeof(gid_t)) {
1270 len = n * sizeof(gid_t);
1271 return put_user(len, optlen) ? -EFAULT : -ERANGE;
1272 }
1273 len = n * sizeof(gid_t);
1274
1275 ret = groups_to_user((gid_t __user *)optval,
1276 sk->sk_peer_cred->group_info);
1277 if (ret)
1278 return ret;
1279 goto lenout;
1280 }
1281
1282 case SO_PEERNAME:
1283 {
1284 char address[128];
1285
1286 lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1287 if (lv < 0)
1288 return -ENOTCONN;
1289 if (lv < len)
1290 return -EINVAL;
1291 if (copy_to_user(optval, address, len))
1292 return -EFAULT;
1293 goto lenout;
1294 }
1295
1296
1297
1298
1299 case SO_ACCEPTCONN:
1300 v.val = sk->sk_state == TCP_LISTEN;
1301 break;
1302
1303 case SO_PASSSEC:
1304 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1305 break;
1306
1307 case SO_PEERSEC:
1308 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1309
1310 case SO_MARK:
1311 v.val = sk->sk_mark;
1312 break;
1313
1314 case SO_RXQ_OVFL:
1315 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1316 break;
1317
1318 case SO_WIFI_STATUS:
1319 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1320 break;
1321
1322 case SO_PEEK_OFF:
1323 if (!sock->ops->set_peek_off)
1324 return -EOPNOTSUPP;
1325
1326 v.val = sk->sk_peek_off;
1327 break;
1328 case SO_NOFCS:
1329 v.val = sock_flag(sk, SOCK_NOFCS);
1330 break;
1331
1332 case SO_BINDTODEVICE:
1333 return sock_getbindtodevice(sk, optval, optlen, len);
1334
1335 case SO_GET_FILTER:
1336 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1337 if (len < 0)
1338 return len;
1339
1340 goto lenout;
1341
1342 case SO_LOCK_FILTER:
1343 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1344 break;
1345
1346 case SO_BPF_EXTENSIONS:
1347 v.val = bpf_tell_extensions();
1348 break;
1349
1350 case SO_SELECT_ERR_QUEUE:
1351 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1352 break;
1353
1354#ifdef CONFIG_NET_RX_BUSY_POLL
1355 case SO_BUSY_POLL:
1356 v.val = sk->sk_ll_usec;
1357 break;
1358#endif
1359
1360 case SO_MAX_PACING_RATE:
1361 v.val = sk->sk_max_pacing_rate;
1362 break;
1363
1364 case SO_INCOMING_CPU:
1365 v.val = sk->sk_incoming_cpu;
1366 break;
1367
1368 case SO_MEMINFO:
1369 {
1370 u32 meminfo[SK_MEMINFO_VARS];
1371
1372 if (get_user(len, optlen))
1373 return -EFAULT;
1374
1375 sk_get_meminfo(sk, meminfo);
1376
1377 len = min_t(unsigned int, len, sizeof(meminfo));
1378 if (copy_to_user(optval, &meminfo, len))
1379 return -EFAULT;
1380
1381 goto lenout;
1382 }
1383
1384#ifdef CONFIG_NET_RX_BUSY_POLL
1385 case SO_INCOMING_NAPI_ID:
1386 v.val = READ_ONCE(sk->sk_napi_id);
1387
1388
1389 if (v.val < MIN_NAPI_ID)
1390 v.val = 0;
1391
1392 break;
1393#endif
1394
1395 case SO_COOKIE:
1396 lv = sizeof(u64);
1397 if (len < lv)
1398 return -EINVAL;
1399 v.val64 = sock_gen_cookie(sk);
1400 break;
1401
1402 case SO_ZEROCOPY:
1403 v.val = sock_flag(sk, SOCK_ZEROCOPY);
1404 break;
1405
1406 default:
1407
1408
1409
1410 return -ENOPROTOOPT;
1411 }
1412
1413 if (len > lv)
1414 len = lv;
1415 if (copy_to_user(optval, &v, len))
1416 return -EFAULT;
1417lenout:
1418 if (put_user(len, optlen))
1419 return -EFAULT;
1420 return 0;
1421}
1422
1423
1424
1425
1426
1427
1428static inline void sock_lock_init(struct sock *sk)
1429{
1430 if (sk->sk_kern_sock)
1431 sock_lock_init_class_and_name(
1432 sk,
1433 af_family_kern_slock_key_strings[sk->sk_family],
1434 af_family_kern_slock_keys + sk->sk_family,
1435 af_family_kern_key_strings[sk->sk_family],
1436 af_family_kern_keys + sk->sk_family);
1437 else
1438 sock_lock_init_class_and_name(
1439 sk,
1440 af_family_slock_key_strings[sk->sk_family],
1441 af_family_slock_keys + sk->sk_family,
1442 af_family_key_strings[sk->sk_family],
1443 af_family_keys + sk->sk_family);
1444}
1445
1446
1447
1448
1449
1450
1451static void sock_copy(struct sock *nsk, const struct sock *osk)
1452{
1453#ifdef CONFIG_SECURITY_NETWORK
1454 void *sptr = nsk->sk_security;
1455#endif
1456 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1457
1458 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1459 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1460
1461#ifdef CONFIG_SECURITY_NETWORK
1462 nsk->sk_security = sptr;
1463 security_sk_clone(osk, nsk);
1464#endif
1465}
1466
1467static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1468 int family)
1469{
1470 struct sock *sk;
1471 struct kmem_cache *slab;
1472
1473 slab = prot->slab;
1474 if (slab != NULL) {
1475 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1476 if (!sk)
1477 return sk;
1478 if (priority & __GFP_ZERO)
1479 sk_prot_clear_nulls(sk, prot->obj_size);
1480 } else
1481 sk = kmalloc(prot->obj_size, priority);
1482
1483 if (sk != NULL) {
1484 if (security_sk_alloc(sk, family, priority))
1485 goto out_free;
1486
1487 if (!try_module_get(prot->owner))
1488 goto out_free_sec;
1489 sk_tx_queue_clear(sk);
1490 }
1491
1492 return sk;
1493
1494out_free_sec:
1495 security_sk_free(sk);
1496out_free:
1497 if (slab != NULL)
1498 kmem_cache_free(slab, sk);
1499 else
1500 kfree(sk);
1501 return NULL;
1502}
1503
1504static void sk_prot_free(struct proto *prot, struct sock *sk)
1505{
1506 struct kmem_cache *slab;
1507 struct module *owner;
1508
1509 owner = prot->owner;
1510 slab = prot->slab;
1511
1512 cgroup_sk_free(&sk->sk_cgrp_data);
1513 mem_cgroup_sk_free(sk);
1514 security_sk_free(sk);
1515 if (slab != NULL)
1516 kmem_cache_free(slab, sk);
1517 else
1518 kfree(sk);
1519 module_put(owner);
1520}
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1531 struct proto *prot, int kern)
1532{
1533 struct sock *sk;
1534
1535 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1536 if (sk) {
1537 sk->sk_family = family;
1538
1539
1540
1541
1542 sk->sk_prot = sk->sk_prot_creator = prot;
1543 sk->sk_kern_sock = kern;
1544 sock_lock_init(sk);
1545 sk->sk_net_refcnt = kern ? 0 : 1;
1546 if (likely(sk->sk_net_refcnt)) {
1547 get_net(net);
1548 sock_inuse_add(net, 1);
1549 }
1550
1551 sock_net_set(sk, net);
1552 refcount_set(&sk->sk_wmem_alloc, 1);
1553
1554 mem_cgroup_sk_alloc(sk);
1555 cgroup_sk_alloc(&sk->sk_cgrp_data);
1556 sock_update_classid(&sk->sk_cgrp_data);
1557 sock_update_netprioidx(&sk->sk_cgrp_data);
1558 }
1559
1560 return sk;
1561}
1562EXPORT_SYMBOL(sk_alloc);
1563
1564
1565
1566
1567static void __sk_destruct(struct rcu_head *head)
1568{
1569 struct sock *sk = container_of(head, struct sock, sk_rcu);
1570 struct sk_filter *filter;
1571
1572 if (sk->sk_destruct)
1573 sk->sk_destruct(sk);
1574
1575 filter = rcu_dereference_check(sk->sk_filter,
1576 refcount_read(&sk->sk_wmem_alloc) == 0);
1577 if (filter) {
1578 sk_filter_uncharge(sk, filter);
1579 RCU_INIT_POINTER(sk->sk_filter, NULL);
1580 }
1581 if (rcu_access_pointer(sk->sk_reuseport_cb))
1582 reuseport_detach_sock(sk);
1583
1584 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1585
1586 if (atomic_read(&sk->sk_omem_alloc))
1587 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1588 __func__, atomic_read(&sk->sk_omem_alloc));
1589
1590 if (sk->sk_frag.page) {
1591 put_page(sk->sk_frag.page);
1592 sk->sk_frag.page = NULL;
1593 }
1594
1595 if (sk->sk_peer_cred)
1596 put_cred(sk->sk_peer_cred);
1597 put_pid(sk->sk_peer_pid);
1598 if (likely(sk->sk_net_refcnt))
1599 put_net(sock_net(sk));
1600 sk_prot_free(sk->sk_prot_creator, sk);
1601}
1602
1603void sk_destruct(struct sock *sk)
1604{
1605 if (sock_flag(sk, SOCK_RCU_FREE))
1606 call_rcu(&sk->sk_rcu, __sk_destruct);
1607 else
1608 __sk_destruct(&sk->sk_rcu);
1609}
1610
1611static void __sk_free(struct sock *sk)
1612{
1613 if (likely(sk->sk_net_refcnt))
1614 sock_inuse_add(sock_net(sk), -1);
1615
1616 if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1617 sock_diag_broadcast_destroy(sk);
1618 else
1619 sk_destruct(sk);
1620}
1621
1622void sk_free(struct sock *sk)
1623{
1624
1625
1626
1627
1628
1629 if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1630 __sk_free(sk);
1631}
1632EXPORT_SYMBOL(sk_free);
1633
1634static void sk_init_common(struct sock *sk)
1635{
1636 skb_queue_head_init(&sk->sk_receive_queue);
1637 skb_queue_head_init(&sk->sk_write_queue);
1638 skb_queue_head_init(&sk->sk_error_queue);
1639
1640 rwlock_init(&sk->sk_callback_lock);
1641 lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1642 af_rlock_keys + sk->sk_family,
1643 af_family_rlock_key_strings[sk->sk_family]);
1644 lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1645 af_wlock_keys + sk->sk_family,
1646 af_family_wlock_key_strings[sk->sk_family]);
1647 lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1648 af_elock_keys + sk->sk_family,
1649 af_family_elock_key_strings[sk->sk_family]);
1650 lockdep_set_class_and_name(&sk->sk_callback_lock,
1651 af_callback_keys + sk->sk_family,
1652 af_family_clock_key_strings[sk->sk_family]);
1653}
1654
1655
1656
1657
1658
1659
1660
1661
1662struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1663{
1664 struct sock *newsk;
1665 bool is_charged = true;
1666
1667 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1668 if (newsk != NULL) {
1669 struct sk_filter *filter;
1670
1671 sock_copy(newsk, sk);
1672
1673 newsk->sk_prot_creator = sk->sk_prot;
1674
1675
1676 if (likely(newsk->sk_net_refcnt))
1677 get_net(sock_net(newsk));
1678 sk_node_init(&newsk->sk_node);
1679 sock_lock_init(newsk);
1680 bh_lock_sock(newsk);
1681 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1682 newsk->sk_backlog.len = 0;
1683
1684 atomic_set(&newsk->sk_rmem_alloc, 0);
1685
1686
1687
1688 refcount_set(&newsk->sk_wmem_alloc, 1);
1689 atomic_set(&newsk->sk_omem_alloc, 0);
1690 sk_init_common(newsk);
1691
1692 newsk->sk_dst_cache = NULL;
1693 newsk->sk_dst_pending_confirm = 0;
1694 newsk->sk_wmem_queued = 0;
1695 newsk->sk_forward_alloc = 0;
1696 atomic_set(&newsk->sk_drops, 0);
1697 newsk->sk_send_head = NULL;
1698 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1699 atomic_set(&newsk->sk_zckey, 0);
1700
1701 sock_reset_flag(newsk, SOCK_DONE);
1702 mem_cgroup_sk_alloc(newsk);
1703 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1704
1705 rcu_read_lock();
1706 filter = rcu_dereference(sk->sk_filter);
1707 if (filter != NULL)
1708
1709
1710
1711
1712 is_charged = sk_filter_charge(newsk, filter);
1713 RCU_INIT_POINTER(newsk->sk_filter, filter);
1714 rcu_read_unlock();
1715
1716 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1717
1718
1719
1720
1721 if (!is_charged)
1722 RCU_INIT_POINTER(newsk->sk_filter, NULL);
1723 sk_free_unlock_clone(newsk);
1724 newsk = NULL;
1725 goto out;
1726 }
1727 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1728
1729 newsk->sk_err = 0;
1730 newsk->sk_err_soft = 0;
1731 newsk->sk_priority = 0;
1732 newsk->sk_incoming_cpu = raw_smp_processor_id();
1733 atomic64_set(&newsk->sk_cookie, 0);
1734 if (likely(newsk->sk_net_refcnt))
1735 sock_inuse_add(sock_net(newsk), 1);
1736
1737
1738
1739
1740
1741 smp_wmb();
1742 refcount_set(&newsk->sk_refcnt, 2);
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755 sk_refcnt_debug_inc(newsk);
1756 sk_set_socket(newsk, NULL);
1757 newsk->sk_wq = NULL;
1758
1759 if (newsk->sk_prot->sockets_allocated)
1760 sk_sockets_allocated_inc(newsk);
1761
1762 if (sock_needs_netstamp(sk) &&
1763 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1764 net_enable_timestamp();
1765 }
1766out:
1767 return newsk;
1768}
1769EXPORT_SYMBOL_GPL(sk_clone_lock);
1770
1771void sk_free_unlock_clone(struct sock *sk)
1772{
1773
1774
1775 sk->sk_destruct = NULL;
1776 bh_unlock_sock(sk);
1777 sk_free(sk);
1778}
1779EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
1780
1781void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1782{
1783 u32 max_segs = 1;
1784
1785 sk_dst_set(sk, dst);
1786 sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
1787 if (sk->sk_route_caps & NETIF_F_GSO)
1788 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1789 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1790 if (sk_can_gso(sk)) {
1791 if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
1792 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1793 } else {
1794 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1795 sk->sk_gso_max_size = dst->dev->gso_max_size;
1796 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1797 }
1798 }
1799 sk->sk_gso_max_segs = max_segs;
1800}
1801EXPORT_SYMBOL_GPL(sk_setup_caps);
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811void sock_wfree(struct sk_buff *skb)
1812{
1813 struct sock *sk = skb->sk;
1814 unsigned int len = skb->truesize;
1815
1816 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1817
1818
1819
1820
1821 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
1822 sk->sk_write_space(sk);
1823 len = 1;
1824 }
1825
1826
1827
1828
1829 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
1830 __sk_free(sk);
1831}
1832EXPORT_SYMBOL(sock_wfree);
1833
1834
1835
1836
1837void __sock_wfree(struct sk_buff *skb)
1838{
1839 struct sock *sk = skb->sk;
1840
1841 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1842 __sk_free(sk);
1843}
1844
1845void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1846{
1847 skb_orphan(skb);
1848 skb->sk = sk;
1849#ifdef CONFIG_INET
1850 if (unlikely(!sk_fullsock(sk))) {
1851 skb->destructor = sock_edemux;
1852 sock_hold(sk);
1853 return;
1854 }
1855#endif
1856 skb->destructor = sock_wfree;
1857 skb_set_hash_from_sk(skb, sk);
1858
1859
1860
1861
1862
1863 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
1864}
1865EXPORT_SYMBOL(skb_set_owner_w);
1866
1867
1868
1869
1870
1871
1872
1873void skb_orphan_partial(struct sk_buff *skb)
1874{
1875 if (skb_is_tcp_pure_ack(skb))
1876 return;
1877
1878 if (skb->destructor == sock_wfree
1879#ifdef CONFIG_INET
1880 || skb->destructor == tcp_wfree
1881#endif
1882 ) {
1883 struct sock *sk = skb->sk;
1884
1885 if (refcount_inc_not_zero(&sk->sk_refcnt)) {
1886 WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
1887 skb->destructor = sock_efree;
1888 }
1889 } else {
1890 skb_orphan(skb);
1891 }
1892}
1893EXPORT_SYMBOL(skb_orphan_partial);
1894
1895
1896
1897
1898void sock_rfree(struct sk_buff *skb)
1899{
1900 struct sock *sk = skb->sk;
1901 unsigned int len = skb->truesize;
1902
1903 atomic_sub(len, &sk->sk_rmem_alloc);
1904 sk_mem_uncharge(sk, len);
1905}
1906EXPORT_SYMBOL(sock_rfree);
1907
1908
1909
1910
1911
1912void sock_efree(struct sk_buff *skb)
1913{
1914 sock_put(skb->sk);
1915}
1916EXPORT_SYMBOL(sock_efree);
1917
1918kuid_t sock_i_uid(struct sock *sk)
1919{
1920 kuid_t uid;
1921
1922 read_lock_bh(&sk->sk_callback_lock);
1923 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1924 read_unlock_bh(&sk->sk_callback_lock);
1925 return uid;
1926}
1927EXPORT_SYMBOL(sock_i_uid);
1928
1929unsigned long sock_i_ino(struct sock *sk)
1930{
1931 unsigned long ino;
1932
1933 read_lock_bh(&sk->sk_callback_lock);
1934 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1935 read_unlock_bh(&sk->sk_callback_lock);
1936 return ino;
1937}
1938EXPORT_SYMBOL(sock_i_ino);
1939
1940
1941
1942
1943struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1944 gfp_t priority)
1945{
1946 if (force || refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1947 struct sk_buff *skb = alloc_skb(size, priority);
1948 if (skb) {
1949 skb_set_owner_w(skb, sk);
1950 return skb;
1951 }
1952 }
1953 return NULL;
1954}
1955EXPORT_SYMBOL(sock_wmalloc);
1956
1957static void sock_ofree(struct sk_buff *skb)
1958{
1959 struct sock *sk = skb->sk;
1960
1961 atomic_sub(skb->truesize, &sk->sk_omem_alloc);
1962}
1963
1964struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
1965 gfp_t priority)
1966{
1967 struct sk_buff *skb;
1968
1969
1970 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
1971 sysctl_optmem_max)
1972 return NULL;
1973
1974 skb = alloc_skb(size, priority);
1975 if (!skb)
1976 return NULL;
1977
1978 atomic_add(skb->truesize, &sk->sk_omem_alloc);
1979 skb->sk = sk;
1980 skb->destructor = sock_ofree;
1981 return skb;
1982}
1983
1984
1985
1986
1987void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1988{
1989 if ((unsigned int)size <= sysctl_optmem_max &&
1990 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1991 void *mem;
1992
1993
1994
1995 atomic_add(size, &sk->sk_omem_alloc);
1996 mem = kmalloc(size, priority);
1997 if (mem)
1998 return mem;
1999 atomic_sub(size, &sk->sk_omem_alloc);
2000 }
2001 return NULL;
2002}
2003EXPORT_SYMBOL(sock_kmalloc);
2004
2005
2006
2007
2008
2009static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2010 const bool nullify)
2011{
2012 if (WARN_ON_ONCE(!mem))
2013 return;
2014 if (nullify)
2015 kzfree(mem);
2016 else
2017 kfree(mem);
2018 atomic_sub(size, &sk->sk_omem_alloc);
2019}
2020
2021void sock_kfree_s(struct sock *sk, void *mem, int size)
2022{
2023 __sock_kfree_s(sk, mem, size, false);
2024}
2025EXPORT_SYMBOL(sock_kfree_s);
2026
2027void sock_kzfree_s(struct sock *sk, void *mem, int size)
2028{
2029 __sock_kfree_s(sk, mem, size, true);
2030}
2031EXPORT_SYMBOL(sock_kzfree_s);
2032
2033
2034
2035
2036static long sock_wait_for_wmem(struct sock *sk, long timeo)
2037{
2038 DEFINE_WAIT(wait);
2039
2040 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2041 for (;;) {
2042 if (!timeo)
2043 break;
2044 if (signal_pending(current))
2045 break;
2046 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2047 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2048 if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
2049 break;
2050 if (sk->sk_shutdown & SEND_SHUTDOWN)
2051 break;
2052 if (sk->sk_err)
2053 break;
2054 timeo = schedule_timeout(timeo);
2055 }
2056 finish_wait(sk_sleep(sk), &wait);
2057 return timeo;
2058}
2059
2060
2061
2062
2063
2064
2065struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2066 unsigned long data_len, int noblock,
2067 int *errcode, int max_page_order)
2068{
2069 struct sk_buff *skb;
2070 long timeo;
2071 int err;
2072
2073 timeo = sock_sndtimeo(sk, noblock);
2074 for (;;) {
2075 err = sock_error(sk);
2076 if (err != 0)
2077 goto failure;
2078
2079 err = -EPIPE;
2080 if (sk->sk_shutdown & SEND_SHUTDOWN)
2081 goto failure;
2082
2083 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
2084 break;
2085
2086 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2087 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2088 err = -EAGAIN;
2089 if (!timeo)
2090 goto failure;
2091 if (signal_pending(current))
2092 goto interrupted;
2093 timeo = sock_wait_for_wmem(sk, timeo);
2094 }
2095 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2096 errcode, sk->sk_allocation);
2097 if (skb)
2098 skb_set_owner_w(skb, sk);
2099 return skb;
2100
2101interrupted:
2102 err = sock_intr_errno(timeo);
2103failure:
2104 *errcode = err;
2105 return NULL;
2106}
2107EXPORT_SYMBOL(sock_alloc_send_pskb);
2108
2109struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2110 int noblock, int *errcode)
2111{
2112 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2113}
2114EXPORT_SYMBOL(sock_alloc_send_skb);
2115
2116int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2117 struct sockcm_cookie *sockc)
2118{
2119 u32 tsflags;
2120
2121 switch (cmsg->cmsg_type) {
2122 case SO_MARK:
2123 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2124 return -EPERM;
2125 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2126 return -EINVAL;
2127 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2128 break;
2129 case SO_TIMESTAMPING:
2130 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2131 return -EINVAL;
2132
2133 tsflags = *(u32 *)CMSG_DATA(cmsg);
2134 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2135 return -EINVAL;
2136
2137 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2138 sockc->tsflags |= tsflags;
2139 break;
2140
2141 case SCM_RIGHTS:
2142 case SCM_CREDENTIALS:
2143 break;
2144 default:
2145 return -EINVAL;
2146 }
2147 return 0;
2148}
2149EXPORT_SYMBOL(__sock_cmsg_send);
2150
2151int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2152 struct sockcm_cookie *sockc)
2153{
2154 struct cmsghdr *cmsg;
2155 int ret;
2156
2157 for_each_cmsghdr(cmsg, msg) {
2158 if (!CMSG_OK(msg, cmsg))
2159 return -EINVAL;
2160 if (cmsg->cmsg_level != SOL_SOCKET)
2161 continue;
2162 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2163 if (ret)
2164 return ret;
2165 }
2166 return 0;
2167}
2168EXPORT_SYMBOL(sock_cmsg_send);
2169
2170static void sk_enter_memory_pressure(struct sock *sk)
2171{
2172 if (!sk->sk_prot->enter_memory_pressure)
2173 return;
2174
2175 sk->sk_prot->enter_memory_pressure(sk);
2176}
2177
2178static void sk_leave_memory_pressure(struct sock *sk)
2179{
2180 if (sk->sk_prot->leave_memory_pressure) {
2181 sk->sk_prot->leave_memory_pressure(sk);
2182 } else {
2183 unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2184
2185 if (memory_pressure && *memory_pressure)
2186 *memory_pressure = 0;
2187 }
2188}
2189
2190
2191#define SKB_FRAG_PAGE_ORDER get_order(32768)
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2204{
2205 if (pfrag->page) {
2206 if (page_ref_count(pfrag->page) == 1) {
2207 pfrag->offset = 0;
2208 return true;
2209 }
2210 if (pfrag->offset + sz <= pfrag->size)
2211 return true;
2212 put_page(pfrag->page);
2213 }
2214
2215 pfrag->offset = 0;
2216 if (SKB_FRAG_PAGE_ORDER) {
2217
2218 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2219 __GFP_COMP | __GFP_NOWARN |
2220 __GFP_NORETRY,
2221 SKB_FRAG_PAGE_ORDER);
2222 if (likely(pfrag->page)) {
2223 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2224 return true;
2225 }
2226 }
2227 pfrag->page = alloc_page(gfp);
2228 if (likely(pfrag->page)) {
2229 pfrag->size = PAGE_SIZE;
2230 return true;
2231 }
2232 return false;
2233}
2234EXPORT_SYMBOL(skb_page_frag_refill);
2235
2236bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2237{
2238 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2239 return true;
2240
2241 sk_enter_memory_pressure(sk);
2242 sk_stream_moderate_sndbuf(sk);
2243 return false;
2244}
2245EXPORT_SYMBOL(sk_page_frag_refill);
2246
2247int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
2248 int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
2249 int first_coalesce)
2250{
2251 int sg_curr = *sg_curr_index, use = 0, rc = 0;
2252 unsigned int size = *sg_curr_size;
2253 struct page_frag *pfrag;
2254 struct scatterlist *sge;
2255
2256 len -= size;
2257 pfrag = sk_page_frag(sk);
2258
2259 while (len > 0) {
2260 unsigned int orig_offset;
2261
2262 if (!sk_page_frag_refill(sk, pfrag)) {
2263 rc = -ENOMEM;
2264 goto out;
2265 }
2266
2267 use = min_t(int, len, pfrag->size - pfrag->offset);
2268
2269 if (!sk_wmem_schedule(sk, use)) {
2270 rc = -ENOMEM;
2271 goto out;
2272 }
2273
2274 sk_mem_charge(sk, use);
2275 size += use;
2276 orig_offset = pfrag->offset;
2277 pfrag->offset += use;
2278
2279 sge = sg + sg_curr - 1;
2280 if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
2281 sge->offset + sge->length == orig_offset) {
2282 sge->length += use;
2283 } else {
2284 sge = sg + sg_curr;
2285 sg_unmark_end(sge);
2286 sg_set_page(sge, pfrag->page, use, orig_offset);
2287 get_page(pfrag->page);
2288 sg_curr++;
2289
2290 if (sg_curr == MAX_SKB_FRAGS)
2291 sg_curr = 0;
2292
2293 if (sg_curr == sg_start) {
2294 rc = -ENOSPC;
2295 break;
2296 }
2297 }
2298
2299 len -= use;
2300 }
2301out:
2302 *sg_curr_size = size;
2303 *sg_curr_index = sg_curr;
2304 return rc;
2305}
2306EXPORT_SYMBOL(sk_alloc_sg);
2307
2308static void __lock_sock(struct sock *sk)
2309 __releases(&sk->sk_lock.slock)
2310 __acquires(&sk->sk_lock.slock)
2311{
2312 DEFINE_WAIT(wait);
2313
2314 for (;;) {
2315 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2316 TASK_UNINTERRUPTIBLE);
2317 spin_unlock_bh(&sk->sk_lock.slock);
2318 schedule();
2319 spin_lock_bh(&sk->sk_lock.slock);
2320 if (!sock_owned_by_user(sk))
2321 break;
2322 }
2323 finish_wait(&sk->sk_lock.wq, &wait);
2324}
2325
2326static void __release_sock(struct sock *sk)
2327 __releases(&sk->sk_lock.slock)
2328 __acquires(&sk->sk_lock.slock)
2329{
2330 struct sk_buff *skb, *next;
2331
2332 while ((skb = sk->sk_backlog.head) != NULL) {
2333 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2334
2335 spin_unlock_bh(&sk->sk_lock.slock);
2336
2337 do {
2338 next = skb->next;
2339 prefetch(next);
2340 WARN_ON_ONCE(skb_dst_is_noref(skb));
2341 skb->next = NULL;
2342 sk_backlog_rcv(sk, skb);
2343
2344 cond_resched();
2345
2346 skb = next;
2347 } while (skb != NULL);
2348
2349 spin_lock_bh(&sk->sk_lock.slock);
2350 }
2351
2352
2353
2354
2355
2356 sk->sk_backlog.len = 0;
2357}
2358
2359void __sk_flush_backlog(struct sock *sk)
2360{
2361 spin_lock_bh(&sk->sk_lock.slock);
2362 __release_sock(sk);
2363 spin_unlock_bh(&sk->sk_lock.slock);
2364}
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2378{
2379 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2380 int rc;
2381
2382 add_wait_queue(sk_sleep(sk), &wait);
2383 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2384 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2385 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2386 remove_wait_queue(sk_sleep(sk), &wait);
2387 return rc;
2388}
2389EXPORT_SYMBOL(sk_wait_data);
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2401{
2402 struct proto *prot = sk->sk_prot;
2403 long allocated = sk_memory_allocated_add(sk, amt);
2404
2405 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2406 !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2407 goto suppress_allocation;
2408
2409
2410 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2411 sk_leave_memory_pressure(sk);
2412 return 1;
2413 }
2414
2415
2416 if (allocated > sk_prot_mem_limits(sk, 1))
2417 sk_enter_memory_pressure(sk);
2418
2419
2420 if (allocated > sk_prot_mem_limits(sk, 2))
2421 goto suppress_allocation;
2422
2423
2424 if (kind == SK_MEM_RECV) {
2425 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2426 return 1;
2427
2428 } else {
2429 int wmem0 = sk_get_wmem0(sk, prot);
2430
2431 if (sk->sk_type == SOCK_STREAM) {
2432 if (sk->sk_wmem_queued < wmem0)
2433 return 1;
2434 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2435 return 1;
2436 }
2437 }
2438
2439 if (sk_has_memory_pressure(sk)) {
2440 int alloc;
2441
2442 if (!sk_under_memory_pressure(sk))
2443 return 1;
2444 alloc = sk_sockets_allocated_read_positive(sk);
2445 if (sk_prot_mem_limits(sk, 2) > alloc *
2446 sk_mem_pages(sk->sk_wmem_queued +
2447 atomic_read(&sk->sk_rmem_alloc) +
2448 sk->sk_forward_alloc))
2449 return 1;
2450 }
2451
2452suppress_allocation:
2453
2454 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2455 sk_stream_moderate_sndbuf(sk);
2456
2457
2458
2459
2460 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2461 return 1;
2462 }
2463
2464 trace_sock_exceed_buf_limit(sk, prot, allocated);
2465
2466 sk_memory_allocated_sub(sk, amt);
2467
2468 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2469 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2470
2471 return 0;
2472}
2473EXPORT_SYMBOL(__sk_mem_raise_allocated);
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485int __sk_mem_schedule(struct sock *sk, int size, int kind)
2486{
2487 int ret, amt = sk_mem_pages(size);
2488
2489 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2490 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2491 if (!ret)
2492 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2493 return ret;
2494}
2495EXPORT_SYMBOL(__sk_mem_schedule);
2496
2497
2498
2499
2500
2501
2502
2503
2504void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2505{
2506 sk_memory_allocated_sub(sk, amount);
2507
2508 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2509 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2510
2511 if (sk_under_memory_pressure(sk) &&
2512 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2513 sk_leave_memory_pressure(sk);
2514}
2515EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2516
2517
2518
2519
2520
2521
2522void __sk_mem_reclaim(struct sock *sk, int amount)
2523{
2524 amount >>= SK_MEM_QUANTUM_SHIFT;
2525 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2526 __sk_mem_reduce_allocated(sk, amount);
2527}
2528EXPORT_SYMBOL(__sk_mem_reclaim);
2529
2530int sk_set_peek_off(struct sock *sk, int val)
2531{
2532 sk->sk_peek_off = val;
2533 return 0;
2534}
2535EXPORT_SYMBOL_GPL(sk_set_peek_off);
2536
2537
2538
2539
2540
2541
2542
2543
2544int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2545{
2546 return -EOPNOTSUPP;
2547}
2548EXPORT_SYMBOL(sock_no_bind);
2549
2550int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2551 int len, int flags)
2552{
2553 return -EOPNOTSUPP;
2554}
2555EXPORT_SYMBOL(sock_no_connect);
2556
2557int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2558{
2559 return -EOPNOTSUPP;
2560}
2561EXPORT_SYMBOL(sock_no_socketpair);
2562
2563int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2564 bool kern)
2565{
2566 return -EOPNOTSUPP;
2567}
2568EXPORT_SYMBOL(sock_no_accept);
2569
2570int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2571 int peer)
2572{
2573 return -EOPNOTSUPP;
2574}
2575EXPORT_SYMBOL(sock_no_getname);
2576
2577int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2578{
2579 return -EOPNOTSUPP;
2580}
2581EXPORT_SYMBOL(sock_no_ioctl);
2582
2583int sock_no_listen(struct socket *sock, int backlog)
2584{
2585 return -EOPNOTSUPP;
2586}
2587EXPORT_SYMBOL(sock_no_listen);
2588
2589int sock_no_shutdown(struct socket *sock, int how)
2590{
2591 return -EOPNOTSUPP;
2592}
2593EXPORT_SYMBOL(sock_no_shutdown);
2594
2595int sock_no_setsockopt(struct socket *sock, int level, int optname,
2596 char __user *optval, unsigned int optlen)
2597{
2598 return -EOPNOTSUPP;
2599}
2600EXPORT_SYMBOL(sock_no_setsockopt);
2601
2602int sock_no_getsockopt(struct socket *sock, int level, int optname,
2603 char __user *optval, int __user *optlen)
2604{
2605 return -EOPNOTSUPP;
2606}
2607EXPORT_SYMBOL(sock_no_getsockopt);
2608
2609int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2610{
2611 return -EOPNOTSUPP;
2612}
2613EXPORT_SYMBOL(sock_no_sendmsg);
2614
2615int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2616{
2617 return -EOPNOTSUPP;
2618}
2619EXPORT_SYMBOL(sock_no_sendmsg_locked);
2620
2621int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2622 int flags)
2623{
2624 return -EOPNOTSUPP;
2625}
2626EXPORT_SYMBOL(sock_no_recvmsg);
2627
2628int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2629{
2630
2631 return -ENODEV;
2632}
2633EXPORT_SYMBOL(sock_no_mmap);
2634
2635ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2636{
2637 ssize_t res;
2638 struct msghdr msg = {.msg_flags = flags};
2639 struct kvec iov;
2640 char *kaddr = kmap(page);
2641 iov.iov_base = kaddr + offset;
2642 iov.iov_len = size;
2643 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2644 kunmap(page);
2645 return res;
2646}
2647EXPORT_SYMBOL(sock_no_sendpage);
2648
2649ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2650 int offset, size_t size, int flags)
2651{
2652 ssize_t res;
2653 struct msghdr msg = {.msg_flags = flags};
2654 struct kvec iov;
2655 char *kaddr = kmap(page);
2656
2657 iov.iov_base = kaddr + offset;
2658 iov.iov_len = size;
2659 res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2660 kunmap(page);
2661 return res;
2662}
2663EXPORT_SYMBOL(sock_no_sendpage_locked);
2664
2665
2666
2667
2668
2669static void sock_def_wakeup(struct sock *sk)
2670{
2671 struct socket_wq *wq;
2672
2673 rcu_read_lock();
2674 wq = rcu_dereference(sk->sk_wq);
2675 if (skwq_has_sleeper(wq))
2676 wake_up_interruptible_all(&wq->wait);
2677 rcu_read_unlock();
2678}
2679
2680static void sock_def_error_report(struct sock *sk)
2681{
2682 struct socket_wq *wq;
2683
2684 rcu_read_lock();
2685 wq = rcu_dereference(sk->sk_wq);
2686 if (skwq_has_sleeper(wq))
2687 wake_up_interruptible_poll(&wq->wait, EPOLLERR);
2688 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2689 rcu_read_unlock();
2690}
2691
2692static void sock_def_readable(struct sock *sk)
2693{
2694 struct socket_wq *wq;
2695
2696 rcu_read_lock();
2697 wq = rcu_dereference(sk->sk_wq);
2698 if (skwq_has_sleeper(wq))
2699 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
2700 EPOLLRDNORM | EPOLLRDBAND);
2701 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2702 rcu_read_unlock();
2703}
2704
2705static void sock_def_write_space(struct sock *sk)
2706{
2707 struct socket_wq *wq;
2708
2709 rcu_read_lock();
2710
2711
2712
2713
2714 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2715 wq = rcu_dereference(sk->sk_wq);
2716 if (skwq_has_sleeper(wq))
2717 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
2718 EPOLLWRNORM | EPOLLWRBAND);
2719
2720
2721 if (sock_writeable(sk))
2722 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2723 }
2724
2725 rcu_read_unlock();
2726}
2727
2728static void sock_def_destruct(struct sock *sk)
2729{
2730}
2731
2732void sk_send_sigurg(struct sock *sk)
2733{
2734 if (sk->sk_socket && sk->sk_socket->file)
2735 if (send_sigurg(&sk->sk_socket->file->f_owner))
2736 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2737}
2738EXPORT_SYMBOL(sk_send_sigurg);
2739
2740void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2741 unsigned long expires)
2742{
2743 if (!mod_timer(timer, expires))
2744 sock_hold(sk);
2745}
2746EXPORT_SYMBOL(sk_reset_timer);
2747
2748void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2749{
2750 if (del_timer(timer))
2751 __sock_put(sk);
2752}
2753EXPORT_SYMBOL(sk_stop_timer);
2754
2755void sock_init_data(struct socket *sock, struct sock *sk)
2756{
2757 sk_init_common(sk);
2758 sk->sk_send_head = NULL;
2759
2760 timer_setup(&sk->sk_timer, NULL, 0);
2761
2762 sk->sk_allocation = GFP_KERNEL;
2763 sk->sk_rcvbuf = sysctl_rmem_default;
2764 sk->sk_sndbuf = sysctl_wmem_default;
2765 sk->sk_state = TCP_CLOSE;
2766 sk_set_socket(sk, sock);
2767
2768 sock_set_flag(sk, SOCK_ZAPPED);
2769
2770 if (sock) {
2771 sk->sk_type = sock->type;
2772 sk->sk_wq = sock->wq;
2773 sock->sk = sk;
2774 sk->sk_uid = SOCK_INODE(sock)->i_uid;
2775 } else {
2776 sk->sk_wq = NULL;
2777 sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
2778 }
2779
2780 rwlock_init(&sk->sk_callback_lock);
2781 if (sk->sk_kern_sock)
2782 lockdep_set_class_and_name(
2783 &sk->sk_callback_lock,
2784 af_kern_callback_keys + sk->sk_family,
2785 af_family_kern_clock_key_strings[sk->sk_family]);
2786 else
2787 lockdep_set_class_and_name(
2788 &sk->sk_callback_lock,
2789 af_callback_keys + sk->sk_family,
2790 af_family_clock_key_strings[sk->sk_family]);
2791
2792 sk->sk_state_change = sock_def_wakeup;
2793 sk->sk_data_ready = sock_def_readable;
2794 sk->sk_write_space = sock_def_write_space;
2795 sk->sk_error_report = sock_def_error_report;
2796 sk->sk_destruct = sock_def_destruct;
2797
2798 sk->sk_frag.page = NULL;
2799 sk->sk_frag.offset = 0;
2800 sk->sk_peek_off = -1;
2801
2802 sk->sk_peer_pid = NULL;
2803 sk->sk_peer_cred = NULL;
2804 sk->sk_write_pending = 0;
2805 sk->sk_rcvlowat = 1;
2806 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2807 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2808
2809 sk->sk_stamp = SK_DEFAULT_STAMP;
2810 atomic_set(&sk->sk_zckey, 0);
2811
2812#ifdef CONFIG_NET_RX_BUSY_POLL
2813 sk->sk_napi_id = 0;
2814 sk->sk_ll_usec = sysctl_net_busy_read;
2815#endif
2816
2817 sk->sk_max_pacing_rate = ~0U;
2818 sk->sk_pacing_rate = ~0U;
2819 sk->sk_pacing_shift = 10;
2820 sk->sk_incoming_cpu = -1;
2821
2822
2823
2824
2825 smp_wmb();
2826 refcount_set(&sk->sk_refcnt, 1);
2827 atomic_set(&sk->sk_drops, 0);
2828}
2829EXPORT_SYMBOL(sock_init_data);
2830
2831void lock_sock_nested(struct sock *sk, int subclass)
2832{
2833 might_sleep();
2834 spin_lock_bh(&sk->sk_lock.slock);
2835 if (sk->sk_lock.owned)
2836 __lock_sock(sk);
2837 sk->sk_lock.owned = 1;
2838 spin_unlock(&sk->sk_lock.slock);
2839
2840
2841
2842 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2843 local_bh_enable();
2844}
2845EXPORT_SYMBOL(lock_sock_nested);
2846
2847void release_sock(struct sock *sk)
2848{
2849 spin_lock_bh(&sk->sk_lock.slock);
2850 if (sk->sk_backlog.tail)
2851 __release_sock(sk);
2852
2853
2854
2855
2856 if (sk->sk_prot->release_cb)
2857 sk->sk_prot->release_cb(sk);
2858
2859 sock_release_ownership(sk);
2860 if (waitqueue_active(&sk->sk_lock.wq))
2861 wake_up(&sk->sk_lock.wq);
2862 spin_unlock_bh(&sk->sk_lock.slock);
2863}
2864EXPORT_SYMBOL(release_sock);
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879bool lock_sock_fast(struct sock *sk)
2880{
2881 might_sleep();
2882 spin_lock_bh(&sk->sk_lock.slock);
2883
2884 if (!sk->sk_lock.owned)
2885
2886
2887
2888 return false;
2889
2890 __lock_sock(sk);
2891 sk->sk_lock.owned = 1;
2892 spin_unlock(&sk->sk_lock.slock);
2893
2894
2895
2896 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2897 local_bh_enable();
2898 return true;
2899}
2900EXPORT_SYMBOL(lock_sock_fast);
2901
2902int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2903{
2904 struct timeval tv;
2905 if (!sock_flag(sk, SOCK_TIMESTAMP))
2906 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2907 tv = ktime_to_timeval(sk->sk_stamp);
2908 if (tv.tv_sec == -1)
2909 return -ENOENT;
2910 if (tv.tv_sec == 0) {
2911 sk->sk_stamp = ktime_get_real();
2912 tv = ktime_to_timeval(sk->sk_stamp);
2913 }
2914 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2915}
2916EXPORT_SYMBOL(sock_get_timestamp);
2917
2918int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2919{
2920 struct timespec ts;
2921 if (!sock_flag(sk, SOCK_TIMESTAMP))
2922 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2923 ts = ktime_to_timespec(sk->sk_stamp);
2924 if (ts.tv_sec == -1)
2925 return -ENOENT;
2926 if (ts.tv_sec == 0) {
2927 sk->sk_stamp = ktime_get_real();
2928 ts = ktime_to_timespec(sk->sk_stamp);
2929 }
2930 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2931}
2932EXPORT_SYMBOL(sock_get_timestampns);
2933
2934void sock_enable_timestamp(struct sock *sk, int flag)
2935{
2936 if (!sock_flag(sk, flag)) {
2937 unsigned long previous_flags = sk->sk_flags;
2938
2939 sock_set_flag(sk, flag);
2940
2941
2942
2943
2944
2945 if (sock_needs_netstamp(sk) &&
2946 !(previous_flags & SK_FLAGS_TIMESTAMP))
2947 net_enable_timestamp();
2948 }
2949}
2950
2951int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2952 int level, int type)
2953{
2954 struct sock_exterr_skb *serr;
2955 struct sk_buff *skb;
2956 int copied, err;
2957
2958 err = -EAGAIN;
2959 skb = sock_dequeue_err_skb(sk);
2960 if (skb == NULL)
2961 goto out;
2962
2963 copied = skb->len;
2964 if (copied > len) {
2965 msg->msg_flags |= MSG_TRUNC;
2966 copied = len;
2967 }
2968 err = skb_copy_datagram_msg(skb, 0, msg, copied);
2969 if (err)
2970 goto out_free_skb;
2971
2972 sock_recv_timestamp(msg, sk, skb);
2973
2974 serr = SKB_EXT_ERR(skb);
2975 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2976
2977 msg->msg_flags |= MSG_ERRQUEUE;
2978 err = copied;
2979
2980out_free_skb:
2981 kfree_skb(skb);
2982out:
2983 return err;
2984}
2985EXPORT_SYMBOL(sock_recv_errqueue);
2986
2987
2988
2989
2990
2991
2992
2993
2994int sock_common_getsockopt(struct socket *sock, int level, int optname,
2995 char __user *optval, int __user *optlen)
2996{
2997 struct sock *sk = sock->sk;
2998
2999 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3000}
3001EXPORT_SYMBOL(sock_common_getsockopt);
3002
3003#ifdef CONFIG_COMPAT
3004int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
3005 char __user *optval, int __user *optlen)
3006{
3007 struct sock *sk = sock->sk;
3008
3009 if (sk->sk_prot->compat_getsockopt != NULL)
3010 return sk->sk_prot->compat_getsockopt(sk, level, optname,
3011 optval, optlen);
3012 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
3013}
3014EXPORT_SYMBOL(compat_sock_common_getsockopt);
3015#endif
3016
3017int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3018 int flags)
3019{
3020 struct sock *sk = sock->sk;
3021 int addr_len = 0;
3022 int err;
3023
3024 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3025 flags & ~MSG_DONTWAIT, &addr_len);
3026 if (err >= 0)
3027 msg->msg_namelen = addr_len;
3028 return err;
3029}
3030EXPORT_SYMBOL(sock_common_recvmsg);
3031
3032
3033
3034
3035int sock_common_setsockopt(struct socket *sock, int level, int optname,
3036 char __user *optval, unsigned int optlen)
3037{
3038 struct sock *sk = sock->sk;
3039
3040 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3041}
3042EXPORT_SYMBOL(sock_common_setsockopt);
3043
3044#ifdef CONFIG_COMPAT
3045int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
3046 char __user *optval, unsigned int optlen)
3047{
3048 struct sock *sk = sock->sk;
3049
3050 if (sk->sk_prot->compat_setsockopt != NULL)
3051 return sk->sk_prot->compat_setsockopt(sk, level, optname,
3052 optval, optlen);
3053 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
3054}
3055EXPORT_SYMBOL(compat_sock_common_setsockopt);
3056#endif
3057
3058void sk_common_release(struct sock *sk)
3059{
3060 if (sk->sk_prot->destroy)
3061 sk->sk_prot->destroy(sk);
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071 sk->sk_prot->unhash(sk);
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085 sock_orphan(sk);
3086
3087 xfrm_sk_free_policy(sk);
3088
3089 sk_refcnt_debug_release(sk);
3090
3091 sock_put(sk);
3092}
3093EXPORT_SYMBOL(sk_common_release);
3094
3095void sk_get_meminfo(const struct sock *sk, u32 *mem)
3096{
3097 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3098
3099 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3100 mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
3101 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3102 mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
3103 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3104 mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
3105 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3106 mem[SK_MEMINFO_BACKLOG] = sk->sk_backlog.len;
3107 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3108}
3109
3110#ifdef CONFIG_PROC_FS
3111#define PROTO_INUSE_NR 64
3112struct prot_inuse {
3113 int val[PROTO_INUSE_NR];
3114};
3115
3116static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3117
3118void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3119{
3120 __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3121}
3122EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3123
3124int sock_prot_inuse_get(struct net *net, struct proto *prot)
3125{
3126 int cpu, idx = prot->inuse_idx;
3127 int res = 0;
3128
3129 for_each_possible_cpu(cpu)
3130 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3131
3132 return res >= 0 ? res : 0;
3133}
3134EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3135
3136static void sock_inuse_add(struct net *net, int val)
3137{
3138 this_cpu_add(*net->core.sock_inuse, val);
3139}
3140
3141int sock_inuse_get(struct net *net)
3142{
3143 int cpu, res = 0;
3144
3145 for_each_possible_cpu(cpu)
3146 res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3147
3148 return res;
3149}
3150
3151EXPORT_SYMBOL_GPL(sock_inuse_get);
3152
3153static int __net_init sock_inuse_init_net(struct net *net)
3154{
3155 net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3156 if (net->core.prot_inuse == NULL)
3157 return -ENOMEM;
3158
3159 net->core.sock_inuse = alloc_percpu(int);
3160 if (net->core.sock_inuse == NULL)
3161 goto out;
3162
3163 return 0;
3164
3165out:
3166 free_percpu(net->core.prot_inuse);
3167 return -ENOMEM;
3168}
3169
3170static void __net_exit sock_inuse_exit_net(struct net *net)
3171{
3172 free_percpu(net->core.prot_inuse);
3173 free_percpu(net->core.sock_inuse);
3174}
3175
3176static struct pernet_operations net_inuse_ops = {
3177 .init = sock_inuse_init_net,
3178 .exit = sock_inuse_exit_net,
3179};
3180
3181static __init int net_inuse_init(void)
3182{
3183 if (register_pernet_subsys(&net_inuse_ops))
3184 panic("Cannot initialize net inuse counters");
3185
3186 return 0;
3187}
3188
3189core_initcall(net_inuse_init);
3190
3191static void assign_proto_idx(struct proto *prot)
3192{
3193 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3194
3195 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3196 pr_err("PROTO_INUSE_NR exhausted\n");
3197 return;
3198 }
3199
3200 set_bit(prot->inuse_idx, proto_inuse_idx);
3201}
3202
3203static void release_proto_idx(struct proto *prot)
3204{
3205 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3206 clear_bit(prot->inuse_idx, proto_inuse_idx);
3207}
3208#else
3209static inline void assign_proto_idx(struct proto *prot)
3210{
3211}
3212
3213static inline void release_proto_idx(struct proto *prot)
3214{
3215}
3216
3217static void sock_inuse_add(struct net *net, int val)
3218{
3219}
3220#endif
3221
3222static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3223{
3224 if (!rsk_prot)
3225 return;
3226 kfree(rsk_prot->slab_name);
3227 rsk_prot->slab_name = NULL;
3228 kmem_cache_destroy(rsk_prot->slab);
3229 rsk_prot->slab = NULL;
3230}
3231
3232static int req_prot_init(const struct proto *prot)
3233{
3234 struct request_sock_ops *rsk_prot = prot->rsk_prot;
3235
3236 if (!rsk_prot)
3237 return 0;
3238
3239 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3240 prot->name);
3241 if (!rsk_prot->slab_name)
3242 return -ENOMEM;
3243
3244 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3245 rsk_prot->obj_size, 0,
3246 SLAB_ACCOUNT | prot->slab_flags,
3247 NULL);
3248
3249 if (!rsk_prot->slab) {
3250 pr_crit("%s: Can't create request sock SLAB cache!\n",
3251 prot->name);
3252 return -ENOMEM;
3253 }
3254 return 0;
3255}
3256
3257int proto_register(struct proto *prot, int alloc_slab)
3258{
3259 if (alloc_slab) {
3260 prot->slab = kmem_cache_create_usercopy(prot->name,
3261 prot->obj_size, 0,
3262 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3263 prot->slab_flags,
3264 prot->useroffset, prot->usersize,
3265 NULL);
3266
3267 if (prot->slab == NULL) {
3268 pr_crit("%s: Can't create sock SLAB cache!\n",
3269 prot->name);
3270 goto out;
3271 }
3272
3273 if (req_prot_init(prot))
3274 goto out_free_request_sock_slab;
3275
3276 if (prot->twsk_prot != NULL) {
3277 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3278
3279 if (prot->twsk_prot->twsk_slab_name == NULL)
3280 goto out_free_request_sock_slab;
3281
3282 prot->twsk_prot->twsk_slab =
3283 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3284 prot->twsk_prot->twsk_obj_size,
3285 0,
3286 SLAB_ACCOUNT |
3287 prot->slab_flags,
3288 NULL);
3289 if (prot->twsk_prot->twsk_slab == NULL)
3290 goto out_free_timewait_sock_slab_name;
3291 }
3292 }
3293
3294 mutex_lock(&proto_list_mutex);
3295 list_add(&prot->node, &proto_list);
3296 assign_proto_idx(prot);
3297 mutex_unlock(&proto_list_mutex);
3298 return 0;
3299
3300out_free_timewait_sock_slab_name:
3301 kfree(prot->twsk_prot->twsk_slab_name);
3302out_free_request_sock_slab:
3303 req_prot_cleanup(prot->rsk_prot);
3304
3305 kmem_cache_destroy(prot->slab);
3306 prot->slab = NULL;
3307out:
3308 return -ENOBUFS;
3309}
3310EXPORT_SYMBOL(proto_register);
3311
3312void proto_unregister(struct proto *prot)
3313{
3314 mutex_lock(&proto_list_mutex);
3315 release_proto_idx(prot);
3316 list_del(&prot->node);
3317 mutex_unlock(&proto_list_mutex);
3318
3319 kmem_cache_destroy(prot->slab);
3320 prot->slab = NULL;
3321
3322 req_prot_cleanup(prot->rsk_prot);
3323
3324 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
3325 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
3326 kfree(prot->twsk_prot->twsk_slab_name);
3327 prot->twsk_prot->twsk_slab = NULL;
3328 }
3329}
3330EXPORT_SYMBOL(proto_unregister);
3331
3332int sock_load_diag_module(int family, int protocol)
3333{
3334 if (!protocol) {
3335 if (!sock_is_registered(family))
3336 return -ENOENT;
3337
3338 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3339 NETLINK_SOCK_DIAG, family);
3340 }
3341
3342#ifdef CONFIG_INET
3343 if (family == AF_INET &&
3344 !rcu_access_pointer(inet_protos[protocol]))
3345 return -ENOENT;
3346#endif
3347
3348 return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3349 NETLINK_SOCK_DIAG, family, protocol);
3350}
3351EXPORT_SYMBOL(sock_load_diag_module);
3352
3353#ifdef CONFIG_PROC_FS
3354static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
3355 __acquires(proto_list_mutex)
3356{
3357 mutex_lock(&proto_list_mutex);
3358 return seq_list_start_head(&proto_list, *pos);
3359}
3360
3361static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3362{
3363 return seq_list_next(v, &proto_list, pos);
3364}
3365
3366static void proto_seq_stop(struct seq_file *seq, void *v)
3367 __releases(proto_list_mutex)
3368{
3369 mutex_unlock(&proto_list_mutex);
3370}
3371
3372static char proto_method_implemented(const void *method)
3373{
3374 return method == NULL ? 'n' : 'y';
3375}
3376static long sock_prot_memory_allocated(struct proto *proto)
3377{
3378 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3379}
3380
3381static char *sock_prot_memory_pressure(struct proto *proto)
3382{
3383 return proto->memory_pressure != NULL ?
3384 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3385}
3386
3387static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3388{
3389
3390 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
3391 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3392 proto->name,
3393 proto->obj_size,
3394 sock_prot_inuse_get(seq_file_net(seq), proto),
3395 sock_prot_memory_allocated(proto),
3396 sock_prot_memory_pressure(proto),
3397 proto->max_header,
3398 proto->slab == NULL ? "no" : "yes",
3399 module_name(proto->owner),
3400 proto_method_implemented(proto->close),
3401 proto_method_implemented(proto->connect),
3402 proto_method_implemented(proto->disconnect),
3403 proto_method_implemented(proto->accept),
3404 proto_method_implemented(proto->ioctl),
3405 proto_method_implemented(proto->init),
3406 proto_method_implemented(proto->destroy),
3407 proto_method_implemented(proto->shutdown),
3408 proto_method_implemented(proto->setsockopt),
3409 proto_method_implemented(proto->getsockopt),
3410 proto_method_implemented(proto->sendmsg),
3411 proto_method_implemented(proto->recvmsg),
3412 proto_method_implemented(proto->sendpage),
3413 proto_method_implemented(proto->bind),
3414 proto_method_implemented(proto->backlog_rcv),
3415 proto_method_implemented(proto->hash),
3416 proto_method_implemented(proto->unhash),
3417 proto_method_implemented(proto->get_port),
3418 proto_method_implemented(proto->enter_memory_pressure));
3419}
3420
3421static int proto_seq_show(struct seq_file *seq, void *v)
3422{
3423 if (v == &proto_list)
3424 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3425 "protocol",
3426 "size",
3427 "sockets",
3428 "memory",
3429 "press",
3430 "maxhdr",
3431 "slab",
3432 "module",
3433 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3434 else
3435 proto_seq_printf(seq, list_entry(v, struct proto, node));
3436 return 0;
3437}
3438
3439static const struct seq_operations proto_seq_ops = {
3440 .start = proto_seq_start,
3441 .next = proto_seq_next,
3442 .stop = proto_seq_stop,
3443 .show = proto_seq_show,
3444};
3445
3446static __net_init int proto_init_net(struct net *net)
3447{
3448 if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3449 sizeof(struct seq_net_private)))
3450 return -ENOMEM;
3451
3452 return 0;
3453}
3454
3455static __net_exit void proto_exit_net(struct net *net)
3456{
3457 remove_proc_entry("protocols", net->proc_net);
3458}
3459
3460
3461static __net_initdata struct pernet_operations proto_net_ops = {
3462 .init = proto_init_net,
3463 .exit = proto_exit_net,
3464};
3465
3466static int __init proto_init(void)
3467{
3468 return register_pernet_subsys(&proto_net_ops);
3469}
3470
3471subsys_initcall(proto_init);
3472
3473#endif
3474
3475#ifdef CONFIG_NET_RX_BUSY_POLL
3476bool sk_busy_loop_end(void *p, unsigned long start_time)
3477{
3478 struct sock *sk = p;
3479
3480 return !skb_queue_empty(&sk->sk_receive_queue) ||
3481 sk_busy_loop_timeout(sk, start_time);
3482}
3483EXPORT_SYMBOL(sk_busy_loop_end);
3484#endif
3485