1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115#include <linux/user_namespace.h>
116#include <linux/static_key.h>
117#include <linux/memcontrol.h>
118#include <linux/prefetch.h>
119
120#include <asm/uaccess.h>
121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
125#include <net/net_namespace.h>
126#include <net/request_sock.h>
127#include <net/sock.h>
128#include <linux/net_tstamp.h>
129#include <net/xfrm.h>
130#include <linux/ipsec.h>
131#include <net/cls_cgroup.h>
132#include <net/netprio_cgroup.h>
133
134#include <linux/filter.h>
135
136#include <trace/events/sock.h>
137
138#ifdef CONFIG_INET
139#include <net/tcp.h>
140#endif
141
142#include <net/busy_poll.h>
143
144static DEFINE_MUTEX(proto_list_mutex);
145static LIST_HEAD(proto_list);
146
147#ifdef CONFIG_MEMCG_KMEM
148int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
149{
150 struct proto *proto;
151 int ret = 0;
152
153 mutex_lock(&proto_list_mutex);
154 list_for_each_entry(proto, &proto_list, node) {
155 if (proto->init_cgroup) {
156 ret = proto->init_cgroup(memcg, ss);
157 if (ret)
158 goto out;
159 }
160 }
161
162 mutex_unlock(&proto_list_mutex);
163 return ret;
164out:
165 list_for_each_entry_continue_reverse(proto, &proto_list, node)
166 if (proto->destroy_cgroup)
167 proto->destroy_cgroup(memcg);
168 mutex_unlock(&proto_list_mutex);
169 return ret;
170}
171
172void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
173{
174 struct proto *proto;
175
176 mutex_lock(&proto_list_mutex);
177 list_for_each_entry_reverse(proto, &proto_list, node)
178 if (proto->destroy_cgroup)
179 proto->destroy_cgroup(memcg);
180 mutex_unlock(&proto_list_mutex);
181}
182#endif
183
184
185
186
187
188static struct lock_class_key af_family_keys[AF_MAX];
189static struct lock_class_key af_family_slock_keys[AF_MAX];
190
191#if defined(CONFIG_MEMCG_KMEM)
192struct static_key memcg_socket_limit_enabled;
193EXPORT_SYMBOL(memcg_socket_limit_enabled);
194#endif
195
196
197
198
199
200
201static const char *const af_family_key_strings[AF_MAX+1] = {
202 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
203 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
204 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
205 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
206 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
207 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
208 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
209 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
210 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
211 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
212 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
213 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
214 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
215 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
216};
217static const char *const af_family_slock_key_strings[AF_MAX+1] = {
218 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
219 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
220 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
221 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
222 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
223 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
224 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
225 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
226 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
227 "slock-27" , "slock-28" , "slock-AF_CAN" ,
228 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
229 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
230 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
231 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
232};
233static const char *const af_family_clock_key_strings[AF_MAX+1] = {
234 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
235 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
236 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
237 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
238 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
239 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
240 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
241 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
242 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
243 "clock-27" , "clock-28" , "clock-AF_CAN" ,
244 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
245 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
246 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
247 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
248};
249
250
251
252
253
254static struct lock_class_key af_callback_keys[AF_MAX];
255
256
257
258
259
260
261#define _SK_MEM_PACKETS 256
262#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
263#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
264#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
265
266
267__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
268EXPORT_SYMBOL(sysctl_wmem_max);
269__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
270EXPORT_SYMBOL(sysctl_rmem_max);
271__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
272__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
273
274
275int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
276EXPORT_SYMBOL(sysctl_optmem_max);
277
278struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
279EXPORT_SYMBOL_GPL(memalloc_socks);
280
281
282
283
284
285
286
287
288
289void sk_set_memalloc(struct sock *sk)
290{
291 sock_set_flag(sk, SOCK_MEMALLOC);
292 sk->sk_allocation |= __GFP_MEMALLOC;
293 static_key_slow_inc(&memalloc_socks);
294}
295EXPORT_SYMBOL_GPL(sk_set_memalloc);
296
297void sk_clear_memalloc(struct sock *sk)
298{
299 sock_reset_flag(sk, SOCK_MEMALLOC);
300 sk->sk_allocation &= ~__GFP_MEMALLOC;
301 static_key_slow_dec(&memalloc_socks);
302
303
304
305
306
307
308
309
310
311
312 if (WARN_ON(sk->sk_forward_alloc))
313 sk_mem_reclaim(sk);
314}
315EXPORT_SYMBOL_GPL(sk_clear_memalloc);
316
317int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
318{
319 int ret;
320 unsigned long pflags = current->flags;
321
322
323 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
324
325 current->flags |= PF_MEMALLOC;
326 ret = sk->sk_backlog_rcv(sk, skb);
327 tsk_restore_flags(current, pflags, PF_MEMALLOC);
328
329 return ret;
330}
331EXPORT_SYMBOL(__sk_backlog_rcv);
332
333static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
334{
335 struct timeval tv;
336
337 if (optlen < sizeof(tv))
338 return -EINVAL;
339 if (copy_from_user(&tv, optval, sizeof(tv)))
340 return -EFAULT;
341 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
342 return -EDOM;
343
344 if (tv.tv_sec < 0) {
345 static int warned __read_mostly;
346
347 *timeo_p = 0;
348 if (warned < 10 && net_ratelimit()) {
349 warned++;
350 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
351 __func__, current->comm, task_pid_nr(current));
352 }
353 return 0;
354 }
355 *timeo_p = MAX_SCHEDULE_TIMEOUT;
356 if (tv.tv_sec == 0 && tv.tv_usec == 0)
357 return 0;
358 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
359 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
360 return 0;
361}
362
363static void sock_warn_obsolete_bsdism(const char *name)
364{
365 static int warned;
366 static char warncomm[TASK_COMM_LEN];
367 if (strcmp(warncomm, current->comm) && warned < 5) {
368 strcpy(warncomm, current->comm);
369 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
370 warncomm, name);
371 warned++;
372 }
373}
374
375static bool sock_needs_netstamp(const struct sock *sk)
376{
377 switch (sk->sk_family) {
378 case AF_UNSPEC:
379 case AF_UNIX:
380 return false;
381 default:
382 return true;
383 }
384}
385
386static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
387{
388 if (sk->sk_flags & flags) {
389 sk->sk_flags &= ~flags;
390 if (sock_needs_netstamp(sk) &&
391 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
392 net_disable_timestamp();
393 }
394}
395
396
397int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
398{
399 int skb_len;
400 unsigned long flags;
401 struct sk_buff_head *list = &sk->sk_receive_queue;
402
403 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
404 atomic_inc(&sk->sk_drops);
405 trace_sock_rcvqueue_full(sk, skb);
406 return -ENOMEM;
407 }
408
409 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
410 atomic_inc(&sk->sk_drops);
411 return -ENOBUFS;
412 }
413
414 skb->dev = NULL;
415 skb_set_owner_r(skb, sk);
416
417
418
419
420
421
422 skb_len = skb->len;
423
424
425
426
427 skb_dst_force(skb);
428
429 spin_lock_irqsave(&list->lock, flags);
430 sock_skb_set_dropcount(sk, skb);
431 __skb_queue_tail(list, skb);
432 spin_unlock_irqrestore(&list->lock, flags);
433
434 if (!sock_flag(sk, SOCK_DEAD))
435 sk->sk_data_ready(sk, skb_len);
436 return 0;
437}
438EXPORT_SYMBOL(__sock_queue_rcv_skb);
439
440int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
441{
442 int err;
443
444 err = sk_filter(sk, skb);
445 if (err)
446 return err;
447
448 return __sock_queue_rcv_skb(sk, skb);
449}
450EXPORT_SYMBOL(sock_queue_rcv_skb);
451
452int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
453{
454 int rc = NET_RX_SUCCESS;
455
456 if (sk_filter(sk, skb))
457 goto discard_and_relse;
458
459 skb->dev = NULL;
460
461 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
462 atomic_inc(&sk->sk_drops);
463 goto discard_and_relse;
464 }
465 if (nested)
466 bh_lock_sock_nested(sk);
467 else
468 bh_lock_sock(sk);
469 if (!sock_owned_by_user(sk)) {
470
471
472
473 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
474
475 rc = sk_backlog_rcv(sk, skb);
476
477 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
478 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
479 bh_unlock_sock(sk);
480 atomic_inc(&sk->sk_drops);
481 goto discard_and_relse;
482 }
483
484 bh_unlock_sock(sk);
485out:
486 sock_put(sk);
487 return rc;
488discard_and_relse:
489 kfree_skb(skb);
490 goto out;
491}
492EXPORT_SYMBOL(sk_receive_skb);
493
494void sk_reset_txq(struct sock *sk)
495{
496 sk_tx_queue_clear(sk);
497}
498EXPORT_SYMBOL(sk_reset_txq);
499
500struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
501{
502 struct dst_entry *dst = __sk_dst_get(sk);
503
504 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
505 sk_tx_queue_clear(sk);
506 sk->sk_dst_pending_confirm = 0;
507 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
508 dst_release(dst);
509 return NULL;
510 }
511
512 return dst;
513}
514EXPORT_SYMBOL(__sk_dst_check);
515
516struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
517{
518 struct dst_entry *dst = sk_dst_get(sk);
519
520 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
521 sk_dst_reset(sk);
522 dst_release(dst);
523 return NULL;
524 }
525
526 return dst;
527}
528EXPORT_SYMBOL(sk_dst_check);
529
530static int sock_setbindtodevice(struct sock *sk, char __user *optval,
531 int optlen)
532{
533 int ret = -ENOPROTOOPT;
534#ifdef CONFIG_NETDEVICES
535 struct net *net = sock_net(sk);
536 char devname[IFNAMSIZ];
537 int index;
538
539
540 ret = -EPERM;
541 if (!ns_capable(net->user_ns, CAP_NET_RAW))
542 goto out;
543
544 ret = -EINVAL;
545 if (optlen < 0)
546 goto out;
547
548
549
550
551
552
553 if (optlen > IFNAMSIZ - 1)
554 optlen = IFNAMSIZ - 1;
555 memset(devname, 0, sizeof(devname));
556
557 ret = -EFAULT;
558 if (copy_from_user(devname, optval, optlen))
559 goto out;
560
561 index = 0;
562 if (devname[0] != '\0') {
563 struct net_device *dev;
564
565 rcu_read_lock();
566 dev = dev_get_by_name_rcu(net, devname);
567 if (dev)
568 index = dev->ifindex;
569 rcu_read_unlock();
570 ret = -ENODEV;
571 if (!dev)
572 goto out;
573 }
574
575 lock_sock(sk);
576 sk->sk_bound_dev_if = index;
577 sk_dst_reset(sk);
578 release_sock(sk);
579
580 ret = 0;
581
582out:
583#endif
584
585 return ret;
586}
587
588static int sock_getbindtodevice(struct sock *sk, char __user *optval,
589 int __user *optlen, int len)
590{
591 int ret = -ENOPROTOOPT;
592#ifdef CONFIG_NETDEVICES
593 struct net *net = sock_net(sk);
594 char devname[IFNAMSIZ];
595
596 if (sk->sk_bound_dev_if == 0) {
597 len = 0;
598 goto zero;
599 }
600
601 ret = -EINVAL;
602 if (len < IFNAMSIZ)
603 goto out;
604
605 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
606 if (ret)
607 goto out;
608
609 len = strlen(devname) + 1;
610
611 ret = -EFAULT;
612 if (copy_to_user(optval, devname, len))
613 goto out;
614
615zero:
616 ret = -EFAULT;
617 if (put_user(len, optlen))
618 goto out;
619
620 ret = 0;
621
622out:
623#endif
624
625 return ret;
626}
627
628static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
629{
630 if (valbool)
631 sock_set_flag(sk, bit);
632 else
633 sock_reset_flag(sk, bit);
634}
635
636bool sk_mc_loop(struct sock *sk)
637{
638 if (dev_recursion_level())
639 return false;
640 if (!sk)
641 return true;
642 switch (sk->sk_family) {
643 case AF_INET:
644 return inet_sk(sk)->mc_loop;
645#if IS_ENABLED(CONFIG_IPV6)
646 case AF_INET6:
647 return inet6_sk(sk)->mc_loop;
648#endif
649 }
650 WARN_ON(1);
651 return true;
652}
653EXPORT_SYMBOL(sk_mc_loop);
654
655
656
657
658
659
660int sock_setsockopt(struct socket *sock, int level, int optname,
661 char __user *optval, unsigned int optlen)
662{
663 struct sock *sk = sock->sk;
664 int val;
665 int valbool;
666 struct linger ling;
667 int ret = 0;
668
669
670
671
672
673 if (optname == SO_BINDTODEVICE)
674 return sock_setbindtodevice(sk, optval, optlen);
675
676 if (optlen < sizeof(int))
677 return -EINVAL;
678
679 if (get_user(val, (int __user *)optval))
680 return -EFAULT;
681
682 valbool = val ? 1 : 0;
683
684 lock_sock(sk);
685
686 switch (optname) {
687 case SO_DEBUG:
688 if (val && !capable(CAP_NET_ADMIN))
689 ret = -EACCES;
690 else
691 sock_valbool_flag(sk, SOCK_DBG, valbool);
692 break;
693 case SO_REUSEADDR:
694 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
695 break;
696 case SO_REUSEPORT:
697 sk->sk_reuseport = valbool;
698 break;
699 case SO_TYPE:
700 case SO_PROTOCOL:
701 case SO_DOMAIN:
702 case SO_ERROR:
703 ret = -ENOPROTOOPT;
704 break;
705 case SO_DONTROUTE:
706 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
707 break;
708 case SO_BROADCAST:
709 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
710 break;
711 case SO_SNDBUF:
712
713
714
715
716
717 val = min_t(u32, val, sysctl_wmem_max);
718set_sndbuf:
719 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
720 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
721
722 sk->sk_write_space(sk);
723 break;
724
725 case SO_SNDBUFFORCE:
726 if (!capable(CAP_NET_ADMIN)) {
727 ret = -EPERM;
728 break;
729 }
730 goto set_sndbuf;
731
732 case SO_RCVBUF:
733
734
735
736
737
738 val = min_t(u32, val, sysctl_rmem_max);
739set_rcvbuf:
740 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
757 break;
758
759 case SO_RCVBUFFORCE:
760 if (!capable(CAP_NET_ADMIN)) {
761 ret = -EPERM;
762 break;
763 }
764 goto set_rcvbuf;
765
766 case SO_KEEPALIVE:
767#ifdef CONFIG_INET
768 if (sk->sk_protocol == IPPROTO_TCP &&
769 sk->sk_type == SOCK_STREAM)
770 tcp_set_keepalive(sk, valbool);
771#endif
772 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
773 break;
774
775 case SO_OOBINLINE:
776 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
777 break;
778
779 case SO_NO_CHECK:
780 sk->sk_no_check_tx = valbool;
781 break;
782
783 case SO_PRIORITY:
784 if ((val >= 0 && val <= 6) ||
785 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
786 sk->sk_priority = val;
787 else
788 ret = -EPERM;
789 break;
790
791 case SO_LINGER:
792 if (optlen < sizeof(ling)) {
793 ret = -EINVAL;
794 break;
795 }
796 if (copy_from_user(&ling, optval, sizeof(ling))) {
797 ret = -EFAULT;
798 break;
799 }
800 if (!ling.l_onoff)
801 sock_reset_flag(sk, SOCK_LINGER);
802 else {
803#if (BITS_PER_LONG == 32)
804 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
805 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
806 else
807#endif
808 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
809 sock_set_flag(sk, SOCK_LINGER);
810 }
811 break;
812
813 case SO_BSDCOMPAT:
814 sock_warn_obsolete_bsdism("setsockopt");
815 break;
816
817 case SO_PASSCRED:
818 if (valbool)
819 set_bit(SOCK_PASSCRED, &sock->flags);
820 else
821 clear_bit(SOCK_PASSCRED, &sock->flags);
822 break;
823
824 case SO_TIMESTAMP:
825 case SO_TIMESTAMPNS:
826 if (valbool) {
827 if (optname == SO_TIMESTAMP)
828 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
829 else
830 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
831 sock_set_flag(sk, SOCK_RCVTSTAMP);
832 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
833 } else {
834 sock_reset_flag(sk, SOCK_RCVTSTAMP);
835 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
836 }
837 break;
838
839 case SO_TIMESTAMPING:
840 if (val & ~SOF_TIMESTAMPING_MASK ||
841 val & __RH_RESERVED_SOF_TIMESTAMPING_OPT_ID ||
842 val & __RH_RESERVED_SOF_TIMESTAMPING_TX_SCHED ||
843 val & __RH_RESERVED_SOF_TIMESTAMPING_TX_ACK) {
844 ret = -EINVAL;
845 break;
846 }
847 sk->sk_tsflags = val;
848 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
849 sock_enable_timestamp(sk,
850 SOCK_TIMESTAMPING_RX_SOFTWARE);
851 else
852 sock_disable_timestamp(sk,
853 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
854 break;
855
856 case SO_RCVLOWAT:
857 if (val < 0)
858 val = INT_MAX;
859 sk->sk_rcvlowat = val ? : 1;
860 break;
861
862 case SO_RCVTIMEO:
863 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
864 break;
865
866 case SO_SNDTIMEO:
867 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
868 break;
869
870 case SO_ATTACH_FILTER:
871 ret = -EINVAL;
872 if (optlen == sizeof(struct sock_fprog)) {
873 struct sock_fprog fprog;
874
875 ret = -EFAULT;
876 if (copy_from_user(&fprog, optval, sizeof(fprog)))
877 break;
878
879 ret = sk_attach_filter(&fprog, sk);
880 }
881 break;
882
883 case SO_DETACH_FILTER:
884 ret = sk_detach_filter(sk);
885 break;
886
887 case SO_LOCK_FILTER:
888 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
889 ret = -EPERM;
890 else
891 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
892 break;
893
894 case SO_PASSSEC:
895 if (valbool)
896 set_bit(SOCK_PASSSEC, &sock->flags);
897 else
898 clear_bit(SOCK_PASSSEC, &sock->flags);
899 break;
900 case SO_MARK:
901 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
902 ret = -EPERM;
903 else
904 sk->sk_mark = val;
905 break;
906
907
908
909 case SO_RXQ_OVFL:
910 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
911 break;
912
913 case SO_WIFI_STATUS:
914 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
915 break;
916
917 case SO_PEEK_OFF:
918 if (sock->ops->set_peek_off)
919 ret = sock->ops->set_peek_off(sk, val);
920 else
921 ret = -EOPNOTSUPP;
922 break;
923
924 case SO_NOFCS:
925 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
926 break;
927
928 case SO_SELECT_ERR_QUEUE:
929 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
930 break;
931
932#ifdef CONFIG_NET_RX_BUSY_POLL
933 case SO_BUSY_POLL:
934
935 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
936 ret = -EPERM;
937 else {
938 if (val < 0)
939 ret = -EINVAL;
940 else
941 sk->sk_ll_usec = val;
942 }
943 break;
944#endif
945
946 case SO_MAX_PACING_RATE:
947 sk->sk_max_pacing_rate = val;
948 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
949 sk->sk_max_pacing_rate);
950 break;
951
952 default:
953 ret = -ENOPROTOOPT;
954 break;
955 }
956 release_sock(sk);
957 return ret;
958}
959EXPORT_SYMBOL(sock_setsockopt);
960
961
962void cred_to_ucred(struct pid *pid, const struct cred *cred,
963 struct ucred *ucred)
964{
965 ucred->pid = pid_vnr(pid);
966 ucred->uid = ucred->gid = -1;
967 if (cred) {
968 struct user_namespace *current_ns = current_user_ns();
969
970 ucred->uid = from_kuid_munged(current_ns, cred->euid);
971 ucred->gid = from_kgid_munged(current_ns, cred->egid);
972 }
973}
974EXPORT_SYMBOL_GPL(cred_to_ucred);
975
976int sock_getsockopt(struct socket *sock, int level, int optname,
977 char __user *optval, int __user *optlen)
978{
979 struct sock *sk = sock->sk;
980
981 union {
982 int val;
983 struct linger ling;
984 struct timeval tm;
985 } v;
986
987 int lv = sizeof(int);
988 int len;
989
990 if (get_user(len, optlen))
991 return -EFAULT;
992 if (len < 0)
993 return -EINVAL;
994
995 memset(&v, 0, sizeof(v));
996
997 switch (optname) {
998 case SO_DEBUG:
999 v.val = sock_flag(sk, SOCK_DBG);
1000 break;
1001
1002 case SO_DONTROUTE:
1003 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1004 break;
1005
1006 case SO_BROADCAST:
1007 v.val = sock_flag(sk, SOCK_BROADCAST);
1008 break;
1009
1010 case SO_SNDBUF:
1011 v.val = sk->sk_sndbuf;
1012 break;
1013
1014 case SO_RCVBUF:
1015 v.val = sk->sk_rcvbuf;
1016 break;
1017
1018 case SO_REUSEADDR:
1019 v.val = sk->sk_reuse;
1020 break;
1021
1022 case SO_REUSEPORT:
1023 v.val = sk->sk_reuseport;
1024 break;
1025
1026 case SO_KEEPALIVE:
1027 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1028 break;
1029
1030 case SO_TYPE:
1031 v.val = sk->sk_type;
1032 break;
1033
1034 case SO_PROTOCOL:
1035 v.val = sk->sk_protocol;
1036 break;
1037
1038 case SO_DOMAIN:
1039 v.val = sk->sk_family;
1040 break;
1041
1042 case SO_ERROR:
1043 v.val = -sock_error(sk);
1044 if (v.val == 0)
1045 v.val = xchg(&sk->sk_err_soft, 0);
1046 break;
1047
1048 case SO_OOBINLINE:
1049 v.val = sock_flag(sk, SOCK_URGINLINE);
1050 break;
1051
1052 case SO_NO_CHECK:
1053 v.val = sk->sk_no_check_tx;
1054 break;
1055
1056 case SO_PRIORITY:
1057 v.val = sk->sk_priority;
1058 break;
1059
1060 case SO_LINGER:
1061 lv = sizeof(v.ling);
1062 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1063 v.ling.l_linger = sk->sk_lingertime / HZ;
1064 break;
1065
1066 case SO_BSDCOMPAT:
1067 sock_warn_obsolete_bsdism("getsockopt");
1068 break;
1069
1070 case SO_TIMESTAMP:
1071 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1072 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1073 break;
1074
1075 case SO_TIMESTAMPNS:
1076 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1077 break;
1078
1079 case SO_TIMESTAMPING:
1080 v.val = sk->sk_tsflags;
1081 break;
1082
1083 case SO_RCVTIMEO:
1084 lv = sizeof(struct timeval);
1085 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1086 v.tm.tv_sec = 0;
1087 v.tm.tv_usec = 0;
1088 } else {
1089 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1090 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1091 }
1092 break;
1093
1094 case SO_SNDTIMEO:
1095 lv = sizeof(struct timeval);
1096 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1097 v.tm.tv_sec = 0;
1098 v.tm.tv_usec = 0;
1099 } else {
1100 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1101 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1102 }
1103 break;
1104
1105 case SO_RCVLOWAT:
1106 v.val = sk->sk_rcvlowat;
1107 break;
1108
1109 case SO_SNDLOWAT:
1110 v.val = 1;
1111 break;
1112
1113 case SO_PASSCRED:
1114 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1115 break;
1116
1117 case SO_PEERCRED:
1118 {
1119 struct ucred peercred;
1120 if (len > sizeof(peercred))
1121 len = sizeof(peercred);
1122 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1123 if (copy_to_user(optval, &peercred, len))
1124 return -EFAULT;
1125 goto lenout;
1126 }
1127
1128 case SO_PEERNAME:
1129 {
1130 char address[128];
1131
1132 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1133 return -ENOTCONN;
1134 if (lv < len)
1135 return -EINVAL;
1136 if (copy_to_user(optval, address, len))
1137 return -EFAULT;
1138 goto lenout;
1139 }
1140
1141
1142
1143
1144 case SO_ACCEPTCONN:
1145 v.val = sk->sk_state == TCP_LISTEN;
1146 break;
1147
1148 case SO_PASSSEC:
1149 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1150 break;
1151
1152 case SO_PEERSEC:
1153 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1154
1155 case SO_MARK:
1156 v.val = sk->sk_mark;
1157 break;
1158
1159 case SO_RXQ_OVFL:
1160 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1161 break;
1162
1163 case SO_WIFI_STATUS:
1164 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1165 break;
1166
1167 case SO_PEEK_OFF:
1168 if (!sock->ops->set_peek_off)
1169 return -EOPNOTSUPP;
1170
1171 v.val = sk->sk_peek_off;
1172 break;
1173 case SO_NOFCS:
1174 v.val = sock_flag(sk, SOCK_NOFCS);
1175 break;
1176
1177 case SO_BINDTODEVICE:
1178 return sock_getbindtodevice(sk, optval, optlen, len);
1179
1180 case SO_GET_FILTER:
1181 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1182 if (len < 0)
1183 return len;
1184
1185 goto lenout;
1186
1187 case SO_LOCK_FILTER:
1188 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1189 break;
1190
1191 case SO_BPF_EXTENSIONS:
1192 v.val = bpf_tell_extensions();
1193 break;
1194
1195 case SO_SELECT_ERR_QUEUE:
1196 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1197 break;
1198
1199#ifdef CONFIG_NET_RX_BUSY_POLL
1200 case SO_BUSY_POLL:
1201 v.val = sk->sk_ll_usec;
1202 break;
1203#endif
1204
1205 case SO_MAX_PACING_RATE:
1206 v.val = sk->sk_max_pacing_rate;
1207 break;
1208
1209 default:
1210 return -ENOPROTOOPT;
1211 }
1212
1213 if (len > lv)
1214 len = lv;
1215 if (copy_to_user(optval, &v, len))
1216 return -EFAULT;
1217lenout:
1218 if (put_user(len, optlen))
1219 return -EFAULT;
1220 return 0;
1221}
1222
1223
1224
1225
1226
1227
1228static inline void sock_lock_init(struct sock *sk)
1229{
1230 sock_lock_init_class_and_name(sk,
1231 af_family_slock_key_strings[sk->sk_family],
1232 af_family_slock_keys + sk->sk_family,
1233 af_family_key_strings[sk->sk_family],
1234 af_family_keys + sk->sk_family);
1235}
1236
1237
1238
1239
1240
1241
1242static void sock_copy(struct sock *nsk, const struct sock *osk)
1243{
1244#ifdef CONFIG_SECURITY_NETWORK
1245 void *sptr = nsk->sk_security;
1246#endif
1247 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1248
1249 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1250 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1251
1252#ifdef CONFIG_SECURITY_NETWORK
1253 nsk->sk_security = sptr;
1254 security_sk_clone(osk, nsk);
1255#endif
1256}
1257
1258void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1259{
1260 unsigned long nulls1, nulls2;
1261
1262 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1263 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1264 if (nulls1 > nulls2)
1265 swap(nulls1, nulls2);
1266
1267 if (nulls1 != 0)
1268 memset((char *)sk, 0, nulls1);
1269 memset((char *)sk + nulls1 + sizeof(void *), 0,
1270 nulls2 - nulls1 - sizeof(void *));
1271 memset((char *)sk + nulls2 + sizeof(void *), 0,
1272 size - nulls2 - sizeof(void *));
1273}
1274EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1275
1276static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1277 int family)
1278{
1279 struct sock *sk;
1280 struct kmem_cache *slab;
1281
1282 slab = prot->slab;
1283 if (slab != NULL) {
1284 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1285 if (!sk)
1286 return sk;
1287 if (priority & __GFP_ZERO) {
1288 if (prot->clear_sk)
1289 prot->clear_sk(sk, prot->obj_size);
1290 else
1291 sk_prot_clear_nulls(sk, prot->obj_size);
1292 }
1293 } else
1294 sk = kmalloc(prot->obj_size, priority);
1295
1296 if (sk != NULL) {
1297 kmemcheck_annotate_bitfield(sk, flags);
1298
1299 if (security_sk_alloc(sk, family, priority))
1300 goto out_free;
1301
1302 if (!try_module_get(prot->owner))
1303 goto out_free_sec;
1304 sk_tx_queue_clear(sk);
1305 }
1306
1307 return sk;
1308
1309out_free_sec:
1310 security_sk_free(sk);
1311out_free:
1312 if (slab != NULL)
1313 kmem_cache_free(slab, sk);
1314 else
1315 kfree(sk);
1316 return NULL;
1317}
1318
1319static void sk_prot_free(struct proto *prot, struct sock *sk)
1320{
1321 struct kmem_cache *slab;
1322 struct module *owner;
1323
1324 owner = prot->owner;
1325 slab = prot->slab;
1326
1327 security_sk_free(sk);
1328 if (slab != NULL)
1329 kmem_cache_free(slab, sk);
1330 else
1331 kfree(sk);
1332 module_put(owner);
1333}
1334
1335#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1336void sock_update_classid(struct sock *sk)
1337{
1338 u32 classid;
1339
1340 classid = task_cls_classid(current);
1341 if (classid != sk->sk_classid)
1342 sk->sk_classid = classid;
1343}
1344EXPORT_SYMBOL(sock_update_classid);
1345#endif
1346
1347#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1348void sock_update_netprioidx(struct sock *sk)
1349{
1350 if (in_interrupt())
1351 return;
1352
1353 sk->sk_cgrp_prioidx = task_netprioidx(current);
1354}
1355EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1356#endif
1357
1358
1359
1360
1361
1362
1363
1364
1365struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1366 struct proto *prot)
1367{
1368 struct sock *sk;
1369
1370 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1371 if (sk) {
1372 sk->sk_family = family;
1373
1374
1375
1376
1377 sk->sk_prot = sk->sk_prot_creator = prot;
1378 sock_lock_init(sk);
1379 sock_net_set(sk, get_net(net));
1380 atomic_set(&sk->sk_wmem_alloc, 1);
1381
1382 sock_update_classid(sk);
1383 sock_update_netprioidx(sk);
1384 }
1385
1386 return sk;
1387}
1388EXPORT_SYMBOL(sk_alloc);
1389
1390static void __sk_free(struct sock *sk)
1391{
1392 struct sk_filter *filter;
1393
1394 if (sk->sk_destruct)
1395 sk->sk_destruct(sk);
1396
1397 filter = rcu_dereference_check(sk->sk_filter,
1398 atomic_read(&sk->sk_wmem_alloc) == 0);
1399 if (filter) {
1400 sk_filter_uncharge(sk, filter);
1401 RCU_INIT_POINTER(sk->sk_filter, NULL);
1402 }
1403
1404 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1405
1406 if (atomic_read(&sk->sk_omem_alloc))
1407 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1408 __func__, atomic_read(&sk->sk_omem_alloc));
1409
1410 if (sk->sk_peer_cred)
1411 put_cred(sk->sk_peer_cred);
1412 put_pid(sk->sk_peer_pid);
1413 put_net(sock_net(sk));
1414 sk_prot_free(sk->sk_prot_creator, sk);
1415}
1416
1417void sk_free(struct sock *sk)
1418{
1419
1420
1421
1422
1423
1424 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1425 __sk_free(sk);
1426}
1427EXPORT_SYMBOL(sk_free);
1428
1429
1430
1431
1432
1433
1434
1435
1436void sk_release_kernel(struct sock *sk)
1437{
1438 if (sk == NULL || sk->sk_socket == NULL)
1439 return;
1440
1441 sock_hold(sk);
1442 sock_release(sk->sk_socket);
1443 sock_net_set(sk, get_net(&init_net));
1444 sock_put(sk);
1445}
1446EXPORT_SYMBOL(sk_release_kernel);
1447
1448static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1449{
1450 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1451 sock_update_memcg(newsk);
1452}
1453
1454
1455
1456
1457
1458
1459
1460
1461struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1462{
1463 struct sock *newsk;
1464
1465 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1466 if (newsk != NULL) {
1467 struct sk_filter *filter;
1468
1469 sock_copy(newsk, sk);
1470
1471
1472 get_net(sock_net(newsk));
1473 sk_node_init(&newsk->sk_node);
1474 sock_lock_init(newsk);
1475 bh_lock_sock(newsk);
1476 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1477 newsk->sk_backlog.len = 0;
1478
1479 atomic_set(&newsk->sk_rmem_alloc, 0);
1480
1481
1482
1483 atomic_set(&newsk->sk_wmem_alloc, 1);
1484 atomic_set(&newsk->sk_omem_alloc, 0);
1485 skb_queue_head_init(&newsk->sk_receive_queue);
1486 skb_queue_head_init(&newsk->sk_write_queue);
1487
1488 rwlock_init(&newsk->sk_callback_lock);
1489 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1490 af_callback_keys + newsk->sk_family,
1491 af_family_clock_key_strings[newsk->sk_family]);
1492
1493 newsk->sk_dst_cache = NULL;
1494 newsk->sk_dst_pending_confirm = 0;
1495 newsk->sk_wmem_queued = 0;
1496 newsk->sk_forward_alloc = 0;
1497 newsk->sk_send_head = NULL;
1498 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1499
1500 sock_reset_flag(newsk, SOCK_DONE);
1501 skb_queue_head_init(&newsk->sk_error_queue);
1502
1503 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1504 if (filter != NULL)
1505 sk_filter_charge(newsk, filter);
1506
1507 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1508
1509
1510 newsk->sk_destruct = NULL;
1511 bh_unlock_sock(newsk);
1512 sk_free(newsk);
1513 newsk = NULL;
1514 goto out;
1515 }
1516
1517 newsk->sk_err = 0;
1518 newsk->sk_priority = 0;
1519
1520
1521
1522
1523 smp_wmb();
1524 atomic_set(&newsk->sk_refcnt, 2);
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537 sk_refcnt_debug_inc(newsk);
1538 sk_set_socket(newsk, NULL);
1539 newsk->sk_wq = NULL;
1540
1541 sk_update_clone(sk, newsk);
1542
1543 if (newsk->sk_prot->sockets_allocated)
1544 sk_sockets_allocated_inc(newsk);
1545
1546 if (sock_needs_netstamp(sk) &&
1547 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1548 net_enable_timestamp();
1549 }
1550out:
1551 return newsk;
1552}
1553EXPORT_SYMBOL_GPL(sk_clone_lock);
1554
1555void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1556{
1557 sk_dst_set(sk, dst);
1558 sk->sk_route_caps = dst->dev->features;
1559 if (sk->sk_route_caps & NETIF_F_GSO)
1560 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1561 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1562 if (sk_can_gso(sk)) {
1563 if (dst->header_len) {
1564 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1565 } else {
1566 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1567 sk->sk_gso_max_size = dst->dev->gso_max_size;
1568 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1569 }
1570 }
1571}
1572EXPORT_SYMBOL_GPL(sk_setup_caps);
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582void sock_wfree(struct sk_buff *skb)
1583{
1584 struct sock *sk = skb->sk;
1585 unsigned int len = skb->truesize;
1586
1587 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1588
1589
1590
1591
1592 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1593 sk->sk_write_space(sk);
1594 len = 1;
1595 }
1596
1597
1598
1599
1600 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1601 __sk_free(sk);
1602}
1603EXPORT_SYMBOL(sock_wfree);
1604
1605
1606
1607
1608void sock_rfree(struct sk_buff *skb)
1609{
1610 struct sock *sk = skb->sk;
1611 unsigned int len = skb->truesize;
1612
1613 atomic_sub(len, &sk->sk_rmem_alloc);
1614 sk_mem_uncharge(sk, len);
1615}
1616EXPORT_SYMBOL(sock_rfree);
1617
1618void sock_efree(struct sk_buff *skb)
1619{
1620 sock_put(skb->sk);
1621}
1622EXPORT_SYMBOL(sock_efree);
1623
1624#ifdef CONFIG_INET
1625void sock_edemux(struct sk_buff *skb)
1626{
1627 struct sock *sk = skb->sk;
1628
1629 if (sk->sk_state == TCP_TIME_WAIT)
1630 inet_twsk_put(inet_twsk(sk));
1631 else
1632 sock_put(sk);
1633}
1634EXPORT_SYMBOL(sock_edemux);
1635#endif
1636
1637kuid_t sock_i_uid(struct sock *sk)
1638{
1639 kuid_t uid;
1640
1641 read_lock_bh(&sk->sk_callback_lock);
1642 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1643 read_unlock_bh(&sk->sk_callback_lock);
1644 return uid;
1645}
1646EXPORT_SYMBOL(sock_i_uid);
1647
1648unsigned long sock_i_ino(struct sock *sk)
1649{
1650 unsigned long ino;
1651
1652 read_lock_bh(&sk->sk_callback_lock);
1653 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1654 read_unlock_bh(&sk->sk_callback_lock);
1655 return ino;
1656}
1657EXPORT_SYMBOL(sock_i_ino);
1658
1659
1660
1661
1662struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1663 gfp_t priority)
1664{
1665 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1666 struct sk_buff *skb = alloc_skb(size, priority);
1667 if (skb) {
1668 skb_set_owner_w(skb, sk);
1669 return skb;
1670 }
1671 }
1672 return NULL;
1673}
1674EXPORT_SYMBOL(sock_wmalloc);
1675
1676
1677
1678
1679struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1680 gfp_t priority)
1681{
1682 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1683 struct sk_buff *skb = alloc_skb(size, priority);
1684 if (skb) {
1685 skb_set_owner_r(skb, sk);
1686 return skb;
1687 }
1688 }
1689 return NULL;
1690}
1691
1692
1693
1694
1695void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1696{
1697 if ((unsigned int)size <= sysctl_optmem_max &&
1698 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1699 void *mem;
1700
1701
1702
1703 atomic_add(size, &sk->sk_omem_alloc);
1704 mem = kmalloc(size, priority);
1705 if (mem)
1706 return mem;
1707 atomic_sub(size, &sk->sk_omem_alloc);
1708 }
1709 return NULL;
1710}
1711EXPORT_SYMBOL(sock_kmalloc);
1712
1713
1714
1715
1716void sock_kfree_s(struct sock *sk, void *mem, int size)
1717{
1718 kfree(mem);
1719 atomic_sub(size, &sk->sk_omem_alloc);
1720}
1721EXPORT_SYMBOL(sock_kfree_s);
1722
1723
1724
1725
1726static long sock_wait_for_wmem(struct sock *sk, long timeo)
1727{
1728 DEFINE_WAIT(wait);
1729
1730 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1731 for (;;) {
1732 if (!timeo)
1733 break;
1734 if (signal_pending(current))
1735 break;
1736 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1737 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1738 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1739 break;
1740 if (sk->sk_shutdown & SEND_SHUTDOWN)
1741 break;
1742 if (sk->sk_err)
1743 break;
1744 timeo = schedule_timeout(timeo);
1745 }
1746 finish_wait(sk_sleep(sk), &wait);
1747 return timeo;
1748}
1749
1750
1751
1752
1753
1754
1755struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1756 unsigned long data_len, int noblock,
1757 int *errcode, int max_page_order)
1758{
1759 struct sk_buff *skb;
1760 long timeo;
1761 int err;
1762
1763 timeo = sock_sndtimeo(sk, noblock);
1764 for (;;) {
1765 err = sock_error(sk);
1766 if (err != 0)
1767 goto failure;
1768
1769 err = -EPIPE;
1770 if (sk->sk_shutdown & SEND_SHUTDOWN)
1771 goto failure;
1772
1773 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1774 break;
1775
1776 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1777 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1778 err = -EAGAIN;
1779 if (!timeo)
1780 goto failure;
1781 if (signal_pending(current))
1782 goto interrupted;
1783 timeo = sock_wait_for_wmem(sk, timeo);
1784 }
1785 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1786 errcode, sk->sk_allocation);
1787 if (skb)
1788 skb_set_owner_w(skb, sk);
1789 return skb;
1790
1791interrupted:
1792 err = sock_intr_errno(timeo);
1793failure:
1794 *errcode = err;
1795 return NULL;
1796}
1797EXPORT_SYMBOL(sock_alloc_send_pskb);
1798
1799struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1800 int noblock, int *errcode)
1801{
1802 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1803}
1804EXPORT_SYMBOL(sock_alloc_send_skb);
1805
1806
1807#define SKB_FRAG_PAGE_ORDER get_order(32768)
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1820{
1821 if (pfrag->page) {
1822 if (page_ref_count(pfrag->page) == 1) {
1823 pfrag->offset = 0;
1824 return true;
1825 }
1826 if (pfrag->offset + sz <= pfrag->size)
1827 return true;
1828 put_page(pfrag->page);
1829 }
1830
1831 pfrag->offset = 0;
1832 if (SKB_FRAG_PAGE_ORDER) {
1833 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1834 __GFP_NOWARN | __GFP_NORETRY,
1835 SKB_FRAG_PAGE_ORDER);
1836 if (likely(pfrag->page)) {
1837 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
1838 return true;
1839 }
1840 }
1841 pfrag->page = alloc_page(gfp);
1842 if (likely(pfrag->page)) {
1843 pfrag->size = PAGE_SIZE;
1844 return true;
1845 }
1846 return false;
1847}
1848EXPORT_SYMBOL(skb_page_frag_refill);
1849
1850bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1851{
1852 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
1853 return true;
1854
1855 sk_enter_memory_pressure(sk);
1856 sk_stream_moderate_sndbuf(sk);
1857 return false;
1858}
1859EXPORT_SYMBOL(sk_page_frag_refill);
1860
1861static void __lock_sock(struct sock *sk)
1862 __releases(&sk->sk_lock.slock)
1863 __acquires(&sk->sk_lock.slock)
1864{
1865 DEFINE_WAIT(wait);
1866
1867 for (;;) {
1868 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1869 TASK_UNINTERRUPTIBLE);
1870 spin_unlock_bh(&sk->sk_lock.slock);
1871 schedule();
1872 spin_lock_bh(&sk->sk_lock.slock);
1873 if (!sock_owned_by_user(sk))
1874 break;
1875 }
1876 finish_wait(&sk->sk_lock.wq, &wait);
1877}
1878
1879static void __release_sock(struct sock *sk)
1880 __releases(&sk->sk_lock.slock)
1881 __acquires(&sk->sk_lock.slock)
1882{
1883 struct sk_buff *skb = sk->sk_backlog.head;
1884
1885 do {
1886 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1887 bh_unlock_sock(sk);
1888
1889 do {
1890 struct sk_buff *next = skb->next;
1891
1892 prefetch(next);
1893 WARN_ON_ONCE(skb_dst_is_noref(skb));
1894 skb->next = NULL;
1895 sk_backlog_rcv(sk, skb);
1896
1897
1898
1899
1900
1901
1902
1903 cond_resched_softirq();
1904
1905 skb = next;
1906 } while (skb != NULL);
1907
1908 bh_lock_sock(sk);
1909 } while ((skb = sk->sk_backlog.head) != NULL);
1910
1911
1912
1913
1914
1915 sk->sk_backlog.len = 0;
1916}
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
1930{
1931 int rc;
1932 DEFINE_WAIT(wait);
1933
1934 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1935 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1936 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
1937 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1938 finish_wait(sk_sleep(sk), &wait);
1939 return rc;
1940}
1941EXPORT_SYMBOL(sk_wait_data);
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
1953{
1954 struct proto *prot = sk->sk_prot;
1955 int parent_status = UNDER_LIMIT;
1956 long allocated = sk_memory_allocated_add(sk, amt, &parent_status);
1957
1958
1959 if (parent_status == UNDER_LIMIT &&
1960 allocated <= sk_prot_mem_limits(sk, 0)) {
1961 sk_leave_memory_pressure(sk);
1962 return 1;
1963 }
1964
1965
1966 if ((parent_status > SOFT_LIMIT) ||
1967 allocated > sk_prot_mem_limits(sk, 1))
1968 sk_enter_memory_pressure(sk);
1969
1970
1971 if ((parent_status == OVER_LIMIT) ||
1972 (allocated > sk_prot_mem_limits(sk, 2)))
1973 goto suppress_allocation;
1974
1975
1976 if (kind == SK_MEM_RECV) {
1977 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1978 return 1;
1979
1980 } else {
1981 if (sk->sk_type == SOCK_STREAM) {
1982 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1983 return 1;
1984 } else if (atomic_read(&sk->sk_wmem_alloc) <
1985 prot->sysctl_wmem[0])
1986 return 1;
1987 }
1988
1989 if (sk_has_memory_pressure(sk)) {
1990 int alloc;
1991
1992 if (!sk_under_memory_pressure(sk))
1993 return 1;
1994 alloc = sk_sockets_allocated_read_positive(sk);
1995 if (sk_prot_mem_limits(sk, 2) > alloc *
1996 sk_mem_pages(sk->sk_wmem_queued +
1997 atomic_read(&sk->sk_rmem_alloc) +
1998 sk->sk_forward_alloc))
1999 return 1;
2000 }
2001
2002suppress_allocation:
2003
2004 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2005 sk_stream_moderate_sndbuf(sk);
2006
2007
2008
2009
2010 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2011 return 1;
2012 }
2013
2014 trace_sock_exceed_buf_limit(sk, prot, allocated);
2015
2016 sk_memory_allocated_sub(sk, amt);
2017
2018 return 0;
2019}
2020EXPORT_SYMBOL(__sk_mem_raise_allocated);
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032int __sk_mem_schedule(struct sock *sk, int size, int kind)
2033{
2034 int ret, amt = sk_mem_pages(size);
2035
2036 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2037 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2038 if (!ret)
2039 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2040 return ret;
2041}
2042EXPORT_SYMBOL(__sk_mem_schedule);
2043
2044
2045
2046
2047
2048
2049
2050
2051void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2052{
2053 sk_memory_allocated_sub(sk, amount);
2054
2055 if (sk_under_memory_pressure(sk) &&
2056 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2057 sk_leave_memory_pressure(sk);
2058}
2059EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2060
2061
2062
2063
2064
2065
2066void __sk_mem_reclaim(struct sock *sk, int amount)
2067{
2068 amount >>= SK_MEM_QUANTUM_SHIFT;
2069 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2070 __sk_mem_reduce_allocated(sk, amount);
2071}
2072EXPORT_SYMBOL(__sk_mem_reclaim);
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2083{
2084 return -EOPNOTSUPP;
2085}
2086EXPORT_SYMBOL(sock_no_bind);
2087
2088int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2089 int len, int flags)
2090{
2091 return -EOPNOTSUPP;
2092}
2093EXPORT_SYMBOL(sock_no_connect);
2094
2095int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2096{
2097 return -EOPNOTSUPP;
2098}
2099EXPORT_SYMBOL(sock_no_socketpair);
2100
2101int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2102{
2103 return -EOPNOTSUPP;
2104}
2105EXPORT_SYMBOL(sock_no_accept);
2106
2107int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2108 int *len, int peer)
2109{
2110 return -EOPNOTSUPP;
2111}
2112EXPORT_SYMBOL(sock_no_getname);
2113
2114unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2115{
2116 return 0;
2117}
2118EXPORT_SYMBOL(sock_no_poll);
2119
2120int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2121{
2122 return -EOPNOTSUPP;
2123}
2124EXPORT_SYMBOL(sock_no_ioctl);
2125
2126int sock_no_listen(struct socket *sock, int backlog)
2127{
2128 return -EOPNOTSUPP;
2129}
2130EXPORT_SYMBOL(sock_no_listen);
2131
2132int sock_no_shutdown(struct socket *sock, int how)
2133{
2134 return -EOPNOTSUPP;
2135}
2136EXPORT_SYMBOL(sock_no_shutdown);
2137
2138int sock_no_setsockopt(struct socket *sock, int level, int optname,
2139 char __user *optval, unsigned int optlen)
2140{
2141 return -EOPNOTSUPP;
2142}
2143EXPORT_SYMBOL(sock_no_setsockopt);
2144
2145int sock_no_getsockopt(struct socket *sock, int level, int optname,
2146 char __user *optval, int __user *optlen)
2147{
2148 return -EOPNOTSUPP;
2149}
2150EXPORT_SYMBOL(sock_no_getsockopt);
2151
2152int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2153 size_t len)
2154{
2155 return -EOPNOTSUPP;
2156}
2157EXPORT_SYMBOL(sock_no_sendmsg);
2158
2159int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2160 size_t len, int flags)
2161{
2162 return -EOPNOTSUPP;
2163}
2164EXPORT_SYMBOL(sock_no_recvmsg);
2165
2166int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2167{
2168
2169 return -ENODEV;
2170}
2171EXPORT_SYMBOL(sock_no_mmap);
2172
2173ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2174{
2175 ssize_t res;
2176 struct msghdr msg = {.msg_flags = flags};
2177 struct kvec iov;
2178 char *kaddr = kmap(page);
2179 iov.iov_base = kaddr + offset;
2180 iov.iov_len = size;
2181 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2182 kunmap(page);
2183 return res;
2184}
2185EXPORT_SYMBOL(sock_no_sendpage);
2186
2187
2188
2189
2190
2191static void sock_def_wakeup(struct sock *sk)
2192{
2193 struct socket_wq *wq;
2194
2195 rcu_read_lock();
2196 wq = rcu_dereference(sk->sk_wq);
2197 if (wq_has_sleeper(wq))
2198 wake_up_interruptible_all(&wq->wait);
2199 rcu_read_unlock();
2200}
2201
2202static void sock_def_error_report(struct sock *sk)
2203{
2204 struct socket_wq *wq;
2205
2206 rcu_read_lock();
2207 wq = rcu_dereference(sk->sk_wq);
2208 if (wq_has_sleeper(wq))
2209 wake_up_interruptible_poll(&wq->wait, POLLERR);
2210 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2211 rcu_read_unlock();
2212}
2213
2214static void sock_def_readable(struct sock *sk, int len)
2215{
2216 struct socket_wq *wq;
2217
2218 rcu_read_lock();
2219 wq = rcu_dereference(sk->sk_wq);
2220 if (wq_has_sleeper(wq))
2221 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2222 POLLRDNORM | POLLRDBAND);
2223 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2224 rcu_read_unlock();
2225}
2226
2227static void sock_def_write_space(struct sock *sk)
2228{
2229 struct socket_wq *wq;
2230
2231 rcu_read_lock();
2232
2233
2234
2235
2236 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2237 wq = rcu_dereference(sk->sk_wq);
2238 if (wq_has_sleeper(wq))
2239 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2240 POLLWRNORM | POLLWRBAND);
2241
2242
2243 if (sock_writeable(sk))
2244 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2245 }
2246
2247 rcu_read_unlock();
2248}
2249
2250static void sock_def_destruct(struct sock *sk)
2251{
2252 kfree(sk->sk_protinfo);
2253}
2254
2255void sk_send_sigurg(struct sock *sk)
2256{
2257 if (sk->sk_socket && sk->sk_socket->file)
2258 if (send_sigurg(&sk->sk_socket->file->f_owner))
2259 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2260}
2261EXPORT_SYMBOL(sk_send_sigurg);
2262
2263void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2264 unsigned long expires)
2265{
2266 if (!mod_timer(timer, expires))
2267 sock_hold(sk);
2268}
2269EXPORT_SYMBOL(sk_reset_timer);
2270
2271void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2272{
2273 if (del_timer(timer))
2274 __sock_put(sk);
2275}
2276EXPORT_SYMBOL(sk_stop_timer);
2277
2278void sock_init_data(struct socket *sock, struct sock *sk)
2279{
2280 skb_queue_head_init(&sk->sk_receive_queue);
2281 skb_queue_head_init(&sk->sk_write_queue);
2282 skb_queue_head_init(&sk->sk_error_queue);
2283
2284 sk->sk_send_head = NULL;
2285
2286 init_timer(&sk->sk_timer);
2287
2288 sk->sk_allocation = GFP_KERNEL;
2289 sk->sk_rcvbuf = sysctl_rmem_default;
2290 sk->sk_sndbuf = sysctl_wmem_default;
2291 sk->sk_state = TCP_CLOSE;
2292 sk_set_socket(sk, sock);
2293
2294 sock_set_flag(sk, SOCK_ZAPPED);
2295
2296 if (sock) {
2297 sk->sk_type = sock->type;
2298 sk->sk_wq = sock->wq;
2299 sock->sk = sk;
2300 } else
2301 sk->sk_wq = NULL;
2302
2303 rwlock_init(&sk->sk_callback_lock);
2304 lockdep_set_class_and_name(&sk->sk_callback_lock,
2305 af_callback_keys + sk->sk_family,
2306 af_family_clock_key_strings[sk->sk_family]);
2307
2308 sk->sk_state_change = sock_def_wakeup;
2309 sk->sk_data_ready = sock_def_readable;
2310 sk->sk_write_space = sock_def_write_space;
2311 sk->sk_error_report = sock_def_error_report;
2312 sk->sk_destruct = sock_def_destruct;
2313
2314 sk->sk_frag.page = NULL;
2315 sk->sk_frag.offset = 0;
2316 sk->sk_peek_off = -1;
2317
2318 sk->sk_peer_pid = NULL;
2319 sk->sk_peer_cred = NULL;
2320 sk->sk_write_pending = 0;
2321 sk->sk_rcvlowat = 1;
2322 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2323 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2324
2325 sk->sk_stamp = ktime_set(-1L, 0);
2326
2327#ifdef CONFIG_NET_RX_BUSY_POLL
2328 sk->sk_napi_id = 0;
2329 sk->sk_ll_usec = sysctl_net_busy_read;
2330#endif
2331
2332 sk->sk_max_pacing_rate = ~0U;
2333 sk->sk_pacing_rate = ~0U;
2334
2335
2336
2337
2338 smp_wmb();
2339 atomic_set(&sk->sk_refcnt, 1);
2340 atomic_set(&sk->sk_drops, 0);
2341}
2342EXPORT_SYMBOL(sock_init_data);
2343
2344void lock_sock_nested(struct sock *sk, int subclass)
2345{
2346 might_sleep();
2347 spin_lock_bh(&sk->sk_lock.slock);
2348 if (sk->sk_lock.owned)
2349 __lock_sock(sk);
2350 sk->sk_lock.owned = 1;
2351 spin_unlock(&sk->sk_lock.slock);
2352
2353
2354
2355 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2356 local_bh_enable();
2357}
2358EXPORT_SYMBOL(lock_sock_nested);
2359
2360void release_sock(struct sock *sk)
2361{
2362
2363
2364
2365 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2366
2367 spin_lock_bh(&sk->sk_lock.slock);
2368 if (sk->sk_backlog.tail)
2369 __release_sock(sk);
2370
2371
2372
2373
2374 if (sk->sk_prot->release_cb)
2375 sk->sk_prot->release_cb(sk);
2376
2377 sock_release_ownership(sk);
2378 if (waitqueue_active(&sk->sk_lock.wq))
2379 wake_up(&sk->sk_lock.wq);
2380 spin_unlock_bh(&sk->sk_lock.slock);
2381}
2382EXPORT_SYMBOL(release_sock);
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394bool lock_sock_fast(struct sock *sk)
2395{
2396 might_sleep();
2397 spin_lock_bh(&sk->sk_lock.slock);
2398
2399 if (!sk->sk_lock.owned)
2400
2401
2402
2403 return false;
2404
2405 __lock_sock(sk);
2406 sk->sk_lock.owned = 1;
2407 spin_unlock(&sk->sk_lock.slock);
2408
2409
2410
2411 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2412 local_bh_enable();
2413 return true;
2414}
2415EXPORT_SYMBOL(lock_sock_fast);
2416
2417int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2418{
2419 struct timeval tv;
2420 if (!sock_flag(sk, SOCK_TIMESTAMP))
2421 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2422 tv = ktime_to_timeval(sk->sk_stamp);
2423 if (tv.tv_sec == -1)
2424 return -ENOENT;
2425 if (tv.tv_sec == 0) {
2426 sk->sk_stamp = ktime_get_real();
2427 tv = ktime_to_timeval(sk->sk_stamp);
2428 }
2429 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2430}
2431EXPORT_SYMBOL(sock_get_timestamp);
2432
2433int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2434{
2435 struct timespec ts;
2436 if (!sock_flag(sk, SOCK_TIMESTAMP))
2437 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2438 ts = ktime_to_timespec(sk->sk_stamp);
2439 if (ts.tv_sec == -1)
2440 return -ENOENT;
2441 if (ts.tv_sec == 0) {
2442 sk->sk_stamp = ktime_get_real();
2443 ts = ktime_to_timespec(sk->sk_stamp);
2444 }
2445 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2446}
2447EXPORT_SYMBOL(sock_get_timestampns);
2448
2449void sock_enable_timestamp(struct sock *sk, int flag)
2450{
2451 if (!sock_flag(sk, flag)) {
2452 unsigned long previous_flags = sk->sk_flags;
2453
2454 sock_set_flag(sk, flag);
2455
2456
2457
2458
2459
2460 if (sock_needs_netstamp(sk) &&
2461 !(previous_flags & SK_FLAGS_TIMESTAMP))
2462 net_enable_timestamp();
2463 }
2464}
2465
2466
2467
2468
2469
2470
2471
2472
2473int sock_common_getsockopt(struct socket *sock, int level, int optname,
2474 char __user *optval, int __user *optlen)
2475{
2476 struct sock *sk = sock->sk;
2477
2478 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2479}
2480EXPORT_SYMBOL(sock_common_getsockopt);
2481
2482#ifdef CONFIG_COMPAT
2483int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2484 char __user *optval, int __user *optlen)
2485{
2486 struct sock *sk = sock->sk;
2487
2488 if (sk->sk_prot->compat_getsockopt != NULL)
2489 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2490 optval, optlen);
2491 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2492}
2493EXPORT_SYMBOL(compat_sock_common_getsockopt);
2494#endif
2495
2496int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2497 struct msghdr *msg, size_t size, int flags)
2498{
2499 struct sock *sk = sock->sk;
2500 int addr_len = 0;
2501 int err;
2502
2503 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2504 flags & ~MSG_DONTWAIT, &addr_len);
2505 if (err >= 0)
2506 msg->msg_namelen = addr_len;
2507 return err;
2508}
2509EXPORT_SYMBOL(sock_common_recvmsg);
2510
2511
2512
2513
2514int sock_common_setsockopt(struct socket *sock, int level, int optname,
2515 char __user *optval, unsigned int optlen)
2516{
2517 struct sock *sk = sock->sk;
2518
2519 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2520}
2521EXPORT_SYMBOL(sock_common_setsockopt);
2522
2523#ifdef CONFIG_COMPAT
2524int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2525 char __user *optval, unsigned int optlen)
2526{
2527 struct sock *sk = sock->sk;
2528
2529 if (sk->sk_prot->compat_setsockopt != NULL)
2530 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2531 optval, optlen);
2532 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2533}
2534EXPORT_SYMBOL(compat_sock_common_setsockopt);
2535#endif
2536
2537void sk_common_release(struct sock *sk)
2538{
2539 if (sk->sk_prot->destroy)
2540 sk->sk_prot->destroy(sk);
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550 sk->sk_prot->unhash(sk);
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564 sock_orphan(sk);
2565
2566 xfrm_sk_free_policy(sk);
2567
2568 sk_refcnt_debug_release(sk);
2569
2570 if (sk->sk_frag.page) {
2571 put_page(sk->sk_frag.page);
2572 sk->sk_frag.page = NULL;
2573 }
2574
2575 sock_put(sk);
2576}
2577EXPORT_SYMBOL(sk_common_release);
2578
2579#ifdef CONFIG_PROC_FS
2580#define PROTO_INUSE_NR 64
2581struct prot_inuse {
2582 int val[PROTO_INUSE_NR];
2583};
2584
2585static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2586
2587#ifdef CONFIG_NET_NS
2588void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2589{
2590 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2591}
2592EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2593
2594int sock_prot_inuse_get(struct net *net, struct proto *prot)
2595{
2596 int cpu, idx = prot->inuse_idx;
2597 int res = 0;
2598
2599 for_each_possible_cpu(cpu)
2600 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2601
2602 return res >= 0 ? res : 0;
2603}
2604EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2605
2606static int __net_init sock_inuse_init_net(struct net *net)
2607{
2608 net->core.inuse = alloc_percpu(struct prot_inuse);
2609 return net->core.inuse ? 0 : -ENOMEM;
2610}
2611
2612static void __net_exit sock_inuse_exit_net(struct net *net)
2613{
2614 free_percpu(net->core.inuse);
2615}
2616
2617static struct pernet_operations net_inuse_ops = {
2618 .init = sock_inuse_init_net,
2619 .exit = sock_inuse_exit_net,
2620};
2621
2622static __init int net_inuse_init(void)
2623{
2624 if (register_pernet_subsys(&net_inuse_ops))
2625 panic("Cannot initialize net inuse counters");
2626
2627 return 0;
2628}
2629
2630core_initcall(net_inuse_init);
2631#else
2632static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2633
2634void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2635{
2636 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2637}
2638EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2639
2640int sock_prot_inuse_get(struct net *net, struct proto *prot)
2641{
2642 int cpu, idx = prot->inuse_idx;
2643 int res = 0;
2644
2645 for_each_possible_cpu(cpu)
2646 res += per_cpu(prot_inuse, cpu).val[idx];
2647
2648 return res >= 0 ? res : 0;
2649}
2650EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2651#endif
2652
2653static void assign_proto_idx(struct proto *prot)
2654{
2655 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2656
2657 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2658 pr_err("PROTO_INUSE_NR exhausted\n");
2659 return;
2660 }
2661
2662 set_bit(prot->inuse_idx, proto_inuse_idx);
2663}
2664
2665static void release_proto_idx(struct proto *prot)
2666{
2667 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2668 clear_bit(prot->inuse_idx, proto_inuse_idx);
2669}
2670#else
2671static inline void assign_proto_idx(struct proto *prot)
2672{
2673}
2674
2675static inline void release_proto_idx(struct proto *prot)
2676{
2677}
2678#endif
2679
2680int proto_register(struct proto *prot, int alloc_slab)
2681{
2682 if (alloc_slab) {
2683 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2684 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2685 NULL);
2686
2687 if (prot->slab == NULL) {
2688 pr_crit("%s: Can't create sock SLAB cache!\n",
2689 prot->name);
2690 goto out;
2691 }
2692
2693 if (prot->rsk_prot != NULL) {
2694 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2695 if (prot->rsk_prot->slab_name == NULL)
2696 goto out_free_sock_slab;
2697
2698 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2699 prot->rsk_prot->obj_size, 0,
2700 SLAB_HWCACHE_ALIGN, NULL);
2701
2702 if (prot->rsk_prot->slab == NULL) {
2703 pr_crit("%s: Can't create request sock SLAB cache!\n",
2704 prot->name);
2705 goto out_free_request_sock_slab_name;
2706 }
2707 }
2708
2709 if (prot->twsk_prot != NULL) {
2710 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2711
2712 if (prot->twsk_prot->twsk_slab_name == NULL)
2713 goto out_free_request_sock_slab;
2714
2715 prot->twsk_prot->twsk_slab =
2716 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2717 prot->twsk_prot->twsk_obj_size,
2718 0,
2719 SLAB_HWCACHE_ALIGN |
2720 prot->slab_flags,
2721 NULL);
2722 if (prot->twsk_prot->twsk_slab == NULL)
2723 goto out_free_timewait_sock_slab_name;
2724 }
2725 }
2726
2727 mutex_lock(&proto_list_mutex);
2728 list_add(&prot->node, &proto_list);
2729 assign_proto_idx(prot);
2730 mutex_unlock(&proto_list_mutex);
2731 return 0;
2732
2733out_free_timewait_sock_slab_name:
2734 kfree(prot->twsk_prot->twsk_slab_name);
2735out_free_request_sock_slab:
2736 if (prot->rsk_prot && prot->rsk_prot->slab) {
2737 kmem_cache_destroy(prot->rsk_prot->slab);
2738 prot->rsk_prot->slab = NULL;
2739 }
2740out_free_request_sock_slab_name:
2741 if (prot->rsk_prot)
2742 kfree(prot->rsk_prot->slab_name);
2743out_free_sock_slab:
2744 kmem_cache_destroy(prot->slab);
2745 prot->slab = NULL;
2746out:
2747 return -ENOBUFS;
2748}
2749EXPORT_SYMBOL(proto_register);
2750
2751void proto_unregister(struct proto *prot)
2752{
2753 mutex_lock(&proto_list_mutex);
2754 release_proto_idx(prot);
2755 list_del(&prot->node);
2756 mutex_unlock(&proto_list_mutex);
2757
2758 if (prot->slab != NULL) {
2759 kmem_cache_destroy(prot->slab);
2760 prot->slab = NULL;
2761 }
2762
2763 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2764 kmem_cache_destroy(prot->rsk_prot->slab);
2765 kfree(prot->rsk_prot->slab_name);
2766 prot->rsk_prot->slab = NULL;
2767 }
2768
2769 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2770 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2771 kfree(prot->twsk_prot->twsk_slab_name);
2772 prot->twsk_prot->twsk_slab = NULL;
2773 }
2774}
2775EXPORT_SYMBOL(proto_unregister);
2776
2777#ifdef CONFIG_PROC_FS
2778static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2779 __acquires(proto_list_mutex)
2780{
2781 mutex_lock(&proto_list_mutex);
2782 return seq_list_start_head(&proto_list, *pos);
2783}
2784
2785static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2786{
2787 return seq_list_next(v, &proto_list, pos);
2788}
2789
2790static void proto_seq_stop(struct seq_file *seq, void *v)
2791 __releases(proto_list_mutex)
2792{
2793 mutex_unlock(&proto_list_mutex);
2794}
2795
2796static char proto_method_implemented(const void *method)
2797{
2798 return method == NULL ? 'n' : 'y';
2799}
2800static long sock_prot_memory_allocated(struct proto *proto)
2801{
2802 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2803}
2804
2805static char *sock_prot_memory_pressure(struct proto *proto)
2806{
2807 return proto->memory_pressure != NULL ?
2808 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2809}
2810
2811static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2812{
2813
2814 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2815 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2816 proto->name,
2817 proto->obj_size,
2818 sock_prot_inuse_get(seq_file_net(seq), proto),
2819 sock_prot_memory_allocated(proto),
2820 sock_prot_memory_pressure(proto),
2821 proto->max_header,
2822 proto->slab == NULL ? "no" : "yes",
2823 module_name(proto->owner),
2824 proto_method_implemented(proto->close),
2825 proto_method_implemented(proto->connect),
2826 proto_method_implemented(proto->disconnect),
2827 proto_method_implemented(proto->accept),
2828 proto_method_implemented(proto->ioctl),
2829 proto_method_implemented(proto->init),
2830 proto_method_implemented(proto->destroy),
2831 proto_method_implemented(proto->shutdown),
2832 proto_method_implemented(proto->setsockopt),
2833 proto_method_implemented(proto->getsockopt),
2834 proto_method_implemented(proto->sendmsg),
2835 proto_method_implemented(proto->recvmsg),
2836 proto_method_implemented(proto->sendpage),
2837 proto_method_implemented(proto->bind),
2838 proto_method_implemented(proto->backlog_rcv),
2839 proto_method_implemented(proto->hash),
2840 proto_method_implemented(proto->unhash),
2841 proto_method_implemented(proto->get_port),
2842 proto_method_implemented(proto->enter_memory_pressure));
2843}
2844
2845static int proto_seq_show(struct seq_file *seq, void *v)
2846{
2847 if (v == &proto_list)
2848 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2849 "protocol",
2850 "size",
2851 "sockets",
2852 "memory",
2853 "press",
2854 "maxhdr",
2855 "slab",
2856 "module",
2857 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2858 else
2859 proto_seq_printf(seq, list_entry(v, struct proto, node));
2860 return 0;
2861}
2862
2863static const struct seq_operations proto_seq_ops = {
2864 .start = proto_seq_start,
2865 .next = proto_seq_next,
2866 .stop = proto_seq_stop,
2867 .show = proto_seq_show,
2868};
2869
2870static int proto_seq_open(struct inode *inode, struct file *file)
2871{
2872 return seq_open_net(inode, file, &proto_seq_ops,
2873 sizeof(struct seq_net_private));
2874}
2875
2876static const struct file_operations proto_seq_fops = {
2877 .owner = THIS_MODULE,
2878 .open = proto_seq_open,
2879 .read = seq_read,
2880 .llseek = seq_lseek,
2881 .release = seq_release_net,
2882};
2883
2884static __net_init int proto_init_net(struct net *net)
2885{
2886 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2887 return -ENOMEM;
2888
2889 return 0;
2890}
2891
2892static __net_exit void proto_exit_net(struct net *net)
2893{
2894 remove_proc_entry("protocols", net->proc_net);
2895}
2896
2897
2898static __net_initdata struct pernet_operations proto_net_ops = {
2899 .init = proto_init_net,
2900 .exit = proto_exit_net,
2901};
2902
2903static int __init proto_init(void)
2904{
2905 return register_pernet_subsys(&proto_net_ops);
2906}
2907
2908subsys_initcall(proto_init);
2909
2910#endif
2911