1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115#include <linux/user_namespace.h>
116#include <linux/static_key.h>
117#include <linux/memcontrol.h>
118#include <linux/prefetch.h>
119
120#include <asm/uaccess.h>
121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
125#include <net/net_namespace.h>
126#include <net/request_sock.h>
127#include <net/sock.h>
128#include <linux/net_tstamp.h>
129#include <net/xfrm.h>
130#include <linux/ipsec.h>
131#include <net/cls_cgroup.h>
132#include <net/netprio_cgroup.h>
133
134#include <linux/filter.h>
135
136#include <trace/events/sock.h>
137
138#ifdef CONFIG_INET
139#include <net/tcp.h>
140#endif
141
142static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list);
144
145#ifdef CONFIG_MEMCG_KMEM
146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
147{
148 struct proto *proto;
149 int ret = 0;
150
151 mutex_lock(&proto_list_mutex);
152 list_for_each_entry(proto, &proto_list, node) {
153 if (proto->init_cgroup) {
154 ret = proto->init_cgroup(memcg, ss);
155 if (ret)
156 goto out;
157 }
158 }
159
160 mutex_unlock(&proto_list_mutex);
161 return ret;
162out:
163 list_for_each_entry_continue_reverse(proto, &proto_list, node)
164 if (proto->destroy_cgroup)
165 proto->destroy_cgroup(memcg);
166 mutex_unlock(&proto_list_mutex);
167 return ret;
168}
169
170void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
171{
172 struct proto *proto;
173
174 mutex_lock(&proto_list_mutex);
175 list_for_each_entry_reverse(proto, &proto_list, node)
176 if (proto->destroy_cgroup)
177 proto->destroy_cgroup(memcg);
178 mutex_unlock(&proto_list_mutex);
179}
180#endif
181
182
183
184
185
186static struct lock_class_key af_family_keys[AF_MAX];
187static struct lock_class_key af_family_slock_keys[AF_MAX];
188
189#if defined(CONFIG_MEMCG_KMEM)
190struct static_key memcg_socket_limit_enabled;
191EXPORT_SYMBOL(memcg_socket_limit_enabled);
192#endif
193
194
195
196
197
198
199static const char *const af_family_key_strings[AF_MAX+1] = {
200 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
201 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
202 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
203 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
204 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
205 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
206 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
207 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
208 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
209 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
210 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
211 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
212 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
213 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
214};
215static const char *const af_family_slock_key_strings[AF_MAX+1] = {
216 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
217 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
218 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
219 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
220 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
221 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
222 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
223 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
224 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
225 "slock-27" , "slock-28" , "slock-AF_CAN" ,
226 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
227 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
228 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
229 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
230};
231static const char *const af_family_clock_key_strings[AF_MAX+1] = {
232 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
233 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
234 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
235 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
236 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
237 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
238 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
239 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
240 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
241 "clock-27" , "clock-28" , "clock-AF_CAN" ,
242 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
243 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
244 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
245 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
246};
247
248
249
250
251
252static struct lock_class_key af_callback_keys[AF_MAX];
253
254
255
256
257
258
259#define _SK_MEM_PACKETS 256
260#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
261#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
262#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
263
264
265__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
266EXPORT_SYMBOL(sysctl_wmem_max);
267__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
268EXPORT_SYMBOL(sysctl_rmem_max);
269__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
270__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
271
272
273int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
274EXPORT_SYMBOL(sysctl_optmem_max);
275
276struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
277EXPORT_SYMBOL_GPL(memalloc_socks);
278
279
280
281
282
283
284
285
286
287void sk_set_memalloc(struct sock *sk)
288{
289 sock_set_flag(sk, SOCK_MEMALLOC);
290 sk->sk_allocation |= __GFP_MEMALLOC;
291 static_key_slow_inc(&memalloc_socks);
292}
293EXPORT_SYMBOL_GPL(sk_set_memalloc);
294
295void sk_clear_memalloc(struct sock *sk)
296{
297 sock_reset_flag(sk, SOCK_MEMALLOC);
298 sk->sk_allocation &= ~__GFP_MEMALLOC;
299 static_key_slow_dec(&memalloc_socks);
300
301
302
303
304
305
306
307
308
309
310 if (WARN_ON(sk->sk_forward_alloc))
311 sk_mem_reclaim(sk);
312}
313EXPORT_SYMBOL_GPL(sk_clear_memalloc);
314
315int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
316{
317 int ret;
318 unsigned long pflags = current->flags;
319
320
321 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
322
323 current->flags |= PF_MEMALLOC;
324 ret = sk->sk_backlog_rcv(sk, skb);
325 tsk_restore_flags(current, pflags, PF_MEMALLOC);
326
327 return ret;
328}
329EXPORT_SYMBOL(__sk_backlog_rcv);
330
331static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
332{
333 struct timeval tv;
334
335 if (optlen < sizeof(tv))
336 return -EINVAL;
337 if (copy_from_user(&tv, optval, sizeof(tv)))
338 return -EFAULT;
339 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
340 return -EDOM;
341
342 if (tv.tv_sec < 0) {
343 static int warned __read_mostly;
344
345 *timeo_p = 0;
346 if (warned < 10 && net_ratelimit()) {
347 warned++;
348 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
349 __func__, current->comm, task_pid_nr(current));
350 }
351 return 0;
352 }
353 *timeo_p = MAX_SCHEDULE_TIMEOUT;
354 if (tv.tv_sec == 0 && tv.tv_usec == 0)
355 return 0;
356 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
357 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
358 return 0;
359}
360
361static void sock_warn_obsolete_bsdism(const char *name)
362{
363 static int warned;
364 static char warncomm[TASK_COMM_LEN];
365 if (strcmp(warncomm, current->comm) && warned < 5) {
366 strcpy(warncomm, current->comm);
367 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
368 warncomm, name);
369 warned++;
370 }
371}
372
373#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
374
375static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
376{
377 if (sk->sk_flags & flags) {
378 sk->sk_flags &= ~flags;
379 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
380 net_disable_timestamp();
381 }
382}
383
384
385int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
386{
387 int err;
388 int skb_len;
389 unsigned long flags;
390 struct sk_buff_head *list = &sk->sk_receive_queue;
391
392 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
393 atomic_inc(&sk->sk_drops);
394 trace_sock_rcvqueue_full(sk, skb);
395 return -ENOMEM;
396 }
397
398 err = sk_filter(sk, skb);
399 if (err)
400 return err;
401
402 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
403 atomic_inc(&sk->sk_drops);
404 return -ENOBUFS;
405 }
406
407 skb->dev = NULL;
408 skb_set_owner_r(skb, sk);
409
410
411
412
413
414
415 skb_len = skb->len;
416
417
418
419
420 skb_dst_force(skb);
421
422 spin_lock_irqsave(&list->lock, flags);
423 skb->dropcount = atomic_read(&sk->sk_drops);
424 __skb_queue_tail(list, skb);
425 spin_unlock_irqrestore(&list->lock, flags);
426
427 if (!sock_flag(sk, SOCK_DEAD))
428 sk->sk_data_ready(sk, skb_len);
429 return 0;
430}
431EXPORT_SYMBOL(sock_queue_rcv_skb);
432
433int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
434{
435 int rc = NET_RX_SUCCESS;
436
437 if (sk_filter(sk, skb))
438 goto discard_and_relse;
439
440 skb->dev = NULL;
441
442 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
443 atomic_inc(&sk->sk_drops);
444 goto discard_and_relse;
445 }
446 if (nested)
447 bh_lock_sock_nested(sk);
448 else
449 bh_lock_sock(sk);
450 if (!sock_owned_by_user(sk)) {
451
452
453
454 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
455
456 rc = sk_backlog_rcv(sk, skb);
457
458 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
459 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
460 bh_unlock_sock(sk);
461 atomic_inc(&sk->sk_drops);
462 goto discard_and_relse;
463 }
464
465 bh_unlock_sock(sk);
466out:
467 sock_put(sk);
468 return rc;
469discard_and_relse:
470 kfree_skb(skb);
471 goto out;
472}
473EXPORT_SYMBOL(sk_receive_skb);
474
475void sk_reset_txq(struct sock *sk)
476{
477 sk_tx_queue_clear(sk);
478}
479EXPORT_SYMBOL(sk_reset_txq);
480
481struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
482{
483 struct dst_entry *dst = __sk_dst_get(sk);
484
485 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
486 sk_tx_queue_clear(sk);
487 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
488 dst_release(dst);
489 return NULL;
490 }
491
492 return dst;
493}
494EXPORT_SYMBOL(__sk_dst_check);
495
496struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
497{
498 struct dst_entry *dst = sk_dst_get(sk);
499
500 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
501 sk_dst_reset(sk);
502 dst_release(dst);
503 return NULL;
504 }
505
506 return dst;
507}
508EXPORT_SYMBOL(sk_dst_check);
509
510static int sock_setbindtodevice(struct sock *sk, char __user *optval,
511 int optlen)
512{
513 int ret = -ENOPROTOOPT;
514#ifdef CONFIG_NETDEVICES
515 struct net *net = sock_net(sk);
516 char devname[IFNAMSIZ];
517 int index;
518
519
520 ret = -EPERM;
521 if (!ns_capable(net->user_ns, CAP_NET_RAW))
522 goto out;
523
524 ret = -EINVAL;
525 if (optlen < 0)
526 goto out;
527
528
529
530
531
532
533 if (optlen > IFNAMSIZ - 1)
534 optlen = IFNAMSIZ - 1;
535 memset(devname, 0, sizeof(devname));
536
537 ret = -EFAULT;
538 if (copy_from_user(devname, optval, optlen))
539 goto out;
540
541 index = 0;
542 if (devname[0] != '\0') {
543 struct net_device *dev;
544
545 rcu_read_lock();
546 dev = dev_get_by_name_rcu(net, devname);
547 if (dev)
548 index = dev->ifindex;
549 rcu_read_unlock();
550 ret = -ENODEV;
551 if (!dev)
552 goto out;
553 }
554
555 lock_sock(sk);
556 sk->sk_bound_dev_if = index;
557 sk_dst_reset(sk);
558 release_sock(sk);
559
560 ret = 0;
561
562out:
563#endif
564
565 return ret;
566}
567
568static int sock_getbindtodevice(struct sock *sk, char __user *optval,
569 int __user *optlen, int len)
570{
571 int ret = -ENOPROTOOPT;
572#ifdef CONFIG_NETDEVICES
573 struct net *net = sock_net(sk);
574 char devname[IFNAMSIZ];
575
576 if (sk->sk_bound_dev_if == 0) {
577 len = 0;
578 goto zero;
579 }
580
581 ret = -EINVAL;
582 if (len < IFNAMSIZ)
583 goto out;
584
585 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
586 if (ret)
587 goto out;
588
589 len = strlen(devname) + 1;
590
591 ret = -EFAULT;
592 if (copy_to_user(optval, devname, len))
593 goto out;
594
595zero:
596 ret = -EFAULT;
597 if (put_user(len, optlen))
598 goto out;
599
600 ret = 0;
601
602out:
603#endif
604
605 return ret;
606}
607
608static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
609{
610 if (valbool)
611 sock_set_flag(sk, bit);
612 else
613 sock_reset_flag(sk, bit);
614}
615
616
617
618
619
620
621int sock_setsockopt(struct socket *sock, int level, int optname,
622 char __user *optval, unsigned int optlen)
623{
624 struct sock *sk = sock->sk;
625 int val;
626 int valbool;
627 struct linger ling;
628 int ret = 0;
629
630
631
632
633
634 if (optname == SO_BINDTODEVICE)
635 return sock_setbindtodevice(sk, optval, optlen);
636
637 if (optlen < sizeof(int))
638 return -EINVAL;
639
640 if (get_user(val, (int __user *)optval))
641 return -EFAULT;
642
643 valbool = val ? 1 : 0;
644
645 lock_sock(sk);
646
647 switch (optname) {
648 case SO_DEBUG:
649 if (val && !capable(CAP_NET_ADMIN))
650 ret = -EACCES;
651 else
652 sock_valbool_flag(sk, SOCK_DBG, valbool);
653 break;
654 case SO_REUSEADDR:
655 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
656 break;
657 case SO_REUSEPORT:
658 sk->sk_reuseport = valbool;
659 break;
660 case SO_TYPE:
661 case SO_PROTOCOL:
662 case SO_DOMAIN:
663 case SO_ERROR:
664 ret = -ENOPROTOOPT;
665 break;
666 case SO_DONTROUTE:
667 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
668 break;
669 case SO_BROADCAST:
670 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
671 break;
672 case SO_SNDBUF:
673
674
675
676
677
678 val = min_t(u32, val, sysctl_wmem_max);
679set_sndbuf:
680 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
681 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
682
683 sk->sk_write_space(sk);
684 break;
685
686 case SO_SNDBUFFORCE:
687 if (!capable(CAP_NET_ADMIN)) {
688 ret = -EPERM;
689 break;
690 }
691 goto set_sndbuf;
692
693 case SO_RCVBUF:
694
695
696
697
698
699 val = min_t(u32, val, sysctl_rmem_max);
700set_rcvbuf:
701 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
718 break;
719
720 case SO_RCVBUFFORCE:
721 if (!capable(CAP_NET_ADMIN)) {
722 ret = -EPERM;
723 break;
724 }
725 goto set_rcvbuf;
726
727 case SO_KEEPALIVE:
728#ifdef CONFIG_INET
729 if (sk->sk_protocol == IPPROTO_TCP &&
730 sk->sk_type == SOCK_STREAM)
731 tcp_set_keepalive(sk, valbool);
732#endif
733 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
734 break;
735
736 case SO_OOBINLINE:
737 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
738 break;
739
740 case SO_NO_CHECK:
741 sk->sk_no_check = valbool;
742 break;
743
744 case SO_PRIORITY:
745 if ((val >= 0 && val <= 6) ||
746 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
747 sk->sk_priority = val;
748 else
749 ret = -EPERM;
750 break;
751
752 case SO_LINGER:
753 if (optlen < sizeof(ling)) {
754 ret = -EINVAL;
755 break;
756 }
757 if (copy_from_user(&ling, optval, sizeof(ling))) {
758 ret = -EFAULT;
759 break;
760 }
761 if (!ling.l_onoff)
762 sock_reset_flag(sk, SOCK_LINGER);
763 else {
764#if (BITS_PER_LONG == 32)
765 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
766 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
767 else
768#endif
769 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
770 sock_set_flag(sk, SOCK_LINGER);
771 }
772 break;
773
774 case SO_BSDCOMPAT:
775 sock_warn_obsolete_bsdism("setsockopt");
776 break;
777
778 case SO_PASSCRED:
779 if (valbool)
780 set_bit(SOCK_PASSCRED, &sock->flags);
781 else
782 clear_bit(SOCK_PASSCRED, &sock->flags);
783 break;
784
785 case SO_TIMESTAMP:
786 case SO_TIMESTAMPNS:
787 if (valbool) {
788 if (optname == SO_TIMESTAMP)
789 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
790 else
791 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
792 sock_set_flag(sk, SOCK_RCVTSTAMP);
793 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
794 } else {
795 sock_reset_flag(sk, SOCK_RCVTSTAMP);
796 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
797 }
798 break;
799
800 case SO_TIMESTAMPING:
801 if (val & ~SOF_TIMESTAMPING_MASK) {
802 ret = -EINVAL;
803 break;
804 }
805 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
806 val & SOF_TIMESTAMPING_TX_HARDWARE);
807 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
808 val & SOF_TIMESTAMPING_TX_SOFTWARE);
809 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
810 val & SOF_TIMESTAMPING_RX_HARDWARE);
811 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
812 sock_enable_timestamp(sk,
813 SOCK_TIMESTAMPING_RX_SOFTWARE);
814 else
815 sock_disable_timestamp(sk,
816 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
817 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
818 val & SOF_TIMESTAMPING_SOFTWARE);
819 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
820 val & SOF_TIMESTAMPING_SYS_HARDWARE);
821 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
822 val & SOF_TIMESTAMPING_RAW_HARDWARE);
823 break;
824
825 case SO_RCVLOWAT:
826 if (val < 0)
827 val = INT_MAX;
828 sk->sk_rcvlowat = val ? : 1;
829 break;
830
831 case SO_RCVTIMEO:
832 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
833 break;
834
835 case SO_SNDTIMEO:
836 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
837 break;
838
839 case SO_ATTACH_FILTER:
840 ret = -EINVAL;
841 if (optlen == sizeof(struct sock_fprog)) {
842 struct sock_fprog fprog;
843
844 ret = -EFAULT;
845 if (copy_from_user(&fprog, optval, sizeof(fprog)))
846 break;
847
848 ret = sk_attach_filter(&fprog, sk);
849 }
850 break;
851
852 case SO_DETACH_FILTER:
853 ret = sk_detach_filter(sk);
854 break;
855
856 case SO_LOCK_FILTER:
857 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
858 ret = -EPERM;
859 else
860 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
861 break;
862
863 case SO_PASSSEC:
864 if (valbool)
865 set_bit(SOCK_PASSSEC, &sock->flags);
866 else
867 clear_bit(SOCK_PASSSEC, &sock->flags);
868 break;
869 case SO_MARK:
870 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
871 ret = -EPERM;
872 else
873 sk->sk_mark = val;
874 break;
875
876
877
878 case SO_RXQ_OVFL:
879 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
880 break;
881
882 case SO_WIFI_STATUS:
883 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
884 break;
885
886 case SO_PEEK_OFF:
887 if (sock->ops->set_peek_off)
888 sock->ops->set_peek_off(sk, val);
889 else
890 ret = -EOPNOTSUPP;
891 break;
892
893 case SO_NOFCS:
894 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
895 break;
896
897 case SO_SELECT_ERR_QUEUE:
898 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
899 break;
900
901 default:
902 ret = -ENOPROTOOPT;
903 break;
904 }
905 release_sock(sk);
906 return ret;
907}
908EXPORT_SYMBOL(sock_setsockopt);
909
910
911void cred_to_ucred(struct pid *pid, const struct cred *cred,
912 struct ucred *ucred)
913{
914 ucred->pid = pid_vnr(pid);
915 ucred->uid = ucred->gid = -1;
916 if (cred) {
917 struct user_namespace *current_ns = current_user_ns();
918
919 ucred->uid = from_kuid_munged(current_ns, cred->euid);
920 ucred->gid = from_kgid_munged(current_ns, cred->egid);
921 }
922}
923EXPORT_SYMBOL_GPL(cred_to_ucred);
924
925int sock_getsockopt(struct socket *sock, int level, int optname,
926 char __user *optval, int __user *optlen)
927{
928 struct sock *sk = sock->sk;
929
930 union {
931 int val;
932 struct linger ling;
933 struct timeval tm;
934 } v;
935
936 int lv = sizeof(int);
937 int len;
938
939 if (get_user(len, optlen))
940 return -EFAULT;
941 if (len < 0)
942 return -EINVAL;
943
944 memset(&v, 0, sizeof(v));
945
946 switch (optname) {
947 case SO_DEBUG:
948 v.val = sock_flag(sk, SOCK_DBG);
949 break;
950
951 case SO_DONTROUTE:
952 v.val = sock_flag(sk, SOCK_LOCALROUTE);
953 break;
954
955 case SO_BROADCAST:
956 v.val = sock_flag(sk, SOCK_BROADCAST);
957 break;
958
959 case SO_SNDBUF:
960 v.val = sk->sk_sndbuf;
961 break;
962
963 case SO_RCVBUF:
964 v.val = sk->sk_rcvbuf;
965 break;
966
967 case SO_REUSEADDR:
968 v.val = sk->sk_reuse;
969 break;
970
971 case SO_REUSEPORT:
972 v.val = sk->sk_reuseport;
973 break;
974
975 case SO_KEEPALIVE:
976 v.val = sock_flag(sk, SOCK_KEEPOPEN);
977 break;
978
979 case SO_TYPE:
980 v.val = sk->sk_type;
981 break;
982
983 case SO_PROTOCOL:
984 v.val = sk->sk_protocol;
985 break;
986
987 case SO_DOMAIN:
988 v.val = sk->sk_family;
989 break;
990
991 case SO_ERROR:
992 v.val = -sock_error(sk);
993 if (v.val == 0)
994 v.val = xchg(&sk->sk_err_soft, 0);
995 break;
996
997 case SO_OOBINLINE:
998 v.val = sock_flag(sk, SOCK_URGINLINE);
999 break;
1000
1001 case SO_NO_CHECK:
1002 v.val = sk->sk_no_check;
1003 break;
1004
1005 case SO_PRIORITY:
1006 v.val = sk->sk_priority;
1007 break;
1008
1009 case SO_LINGER:
1010 lv = sizeof(v.ling);
1011 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1012 v.ling.l_linger = sk->sk_lingertime / HZ;
1013 break;
1014
1015 case SO_BSDCOMPAT:
1016 sock_warn_obsolete_bsdism("getsockopt");
1017 break;
1018
1019 case SO_TIMESTAMP:
1020 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1021 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1022 break;
1023
1024 case SO_TIMESTAMPNS:
1025 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1026 break;
1027
1028 case SO_TIMESTAMPING:
1029 v.val = 0;
1030 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
1031 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
1032 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
1033 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
1034 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
1035 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
1036 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
1037 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
1038 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
1039 v.val |= SOF_TIMESTAMPING_SOFTWARE;
1040 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
1041 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
1042 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
1043 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
1044 break;
1045
1046 case SO_RCVTIMEO:
1047 lv = sizeof(struct timeval);
1048 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1049 v.tm.tv_sec = 0;
1050 v.tm.tv_usec = 0;
1051 } else {
1052 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1053 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1054 }
1055 break;
1056
1057 case SO_SNDTIMEO:
1058 lv = sizeof(struct timeval);
1059 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1060 v.tm.tv_sec = 0;
1061 v.tm.tv_usec = 0;
1062 } else {
1063 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1064 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1065 }
1066 break;
1067
1068 case SO_RCVLOWAT:
1069 v.val = sk->sk_rcvlowat;
1070 break;
1071
1072 case SO_SNDLOWAT:
1073 v.val = 1;
1074 break;
1075
1076 case SO_PASSCRED:
1077 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1078 break;
1079
1080 case SO_PEERCRED:
1081 {
1082 struct ucred peercred;
1083 if (len > sizeof(peercred))
1084 len = sizeof(peercred);
1085 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1086 if (copy_to_user(optval, &peercred, len))
1087 return -EFAULT;
1088 goto lenout;
1089 }
1090
1091 case SO_PEERNAME:
1092 {
1093 char address[128];
1094
1095 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1096 return -ENOTCONN;
1097 if (lv < len)
1098 return -EINVAL;
1099 if (copy_to_user(optval, address, len))
1100 return -EFAULT;
1101 goto lenout;
1102 }
1103
1104
1105
1106
1107 case SO_ACCEPTCONN:
1108 v.val = sk->sk_state == TCP_LISTEN;
1109 break;
1110
1111 case SO_PASSSEC:
1112 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1113 break;
1114
1115 case SO_PEERSEC:
1116 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1117
1118 case SO_MARK:
1119 v.val = sk->sk_mark;
1120 break;
1121
1122 case SO_RXQ_OVFL:
1123 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1124 break;
1125
1126 case SO_WIFI_STATUS:
1127 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1128 break;
1129
1130 case SO_PEEK_OFF:
1131 if (!sock->ops->set_peek_off)
1132 return -EOPNOTSUPP;
1133
1134 v.val = sk->sk_peek_off;
1135 break;
1136 case SO_NOFCS:
1137 v.val = sock_flag(sk, SOCK_NOFCS);
1138 break;
1139
1140 case SO_BINDTODEVICE:
1141 return sock_getbindtodevice(sk, optval, optlen, len);
1142
1143 case SO_GET_FILTER:
1144 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1145 if (len < 0)
1146 return len;
1147
1148 goto lenout;
1149
1150 case SO_LOCK_FILTER:
1151 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1152 break;
1153
1154 case SO_SELECT_ERR_QUEUE:
1155 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1156 break;
1157
1158 default:
1159 return -ENOPROTOOPT;
1160 }
1161
1162 if (len > lv)
1163 len = lv;
1164 if (copy_to_user(optval, &v, len))
1165 return -EFAULT;
1166lenout:
1167 if (put_user(len, optlen))
1168 return -EFAULT;
1169 return 0;
1170}
1171
1172
1173
1174
1175
1176
1177static inline void sock_lock_init(struct sock *sk)
1178{
1179 sock_lock_init_class_and_name(sk,
1180 af_family_slock_key_strings[sk->sk_family],
1181 af_family_slock_keys + sk->sk_family,
1182 af_family_key_strings[sk->sk_family],
1183 af_family_keys + sk->sk_family);
1184}
1185
1186
1187
1188
1189
1190
1191static void sock_copy(struct sock *nsk, const struct sock *osk)
1192{
1193#ifdef CONFIG_SECURITY_NETWORK
1194 void *sptr = nsk->sk_security;
1195#endif
1196 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1197
1198 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1199 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1200
1201#ifdef CONFIG_SECURITY_NETWORK
1202 nsk->sk_security = sptr;
1203 security_sk_clone(osk, nsk);
1204#endif
1205}
1206
1207void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1208{
1209 unsigned long nulls1, nulls2;
1210
1211 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1212 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1213 if (nulls1 > nulls2)
1214 swap(nulls1, nulls2);
1215
1216 if (nulls1 != 0)
1217 memset((char *)sk, 0, nulls1);
1218 memset((char *)sk + nulls1 + sizeof(void *), 0,
1219 nulls2 - nulls1 - sizeof(void *));
1220 memset((char *)sk + nulls2 + sizeof(void *), 0,
1221 size - nulls2 - sizeof(void *));
1222}
1223EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1224
1225static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1226 int family)
1227{
1228 struct sock *sk;
1229 struct kmem_cache *slab;
1230
1231 slab = prot->slab;
1232 if (slab != NULL) {
1233 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1234 if (!sk)
1235 return sk;
1236 if (priority & __GFP_ZERO) {
1237 if (prot->clear_sk)
1238 prot->clear_sk(sk, prot->obj_size);
1239 else
1240 sk_prot_clear_nulls(sk, prot->obj_size);
1241 }
1242 } else
1243 sk = kmalloc(prot->obj_size, priority);
1244
1245 if (sk != NULL) {
1246 kmemcheck_annotate_bitfield(sk, flags);
1247
1248 if (security_sk_alloc(sk, family, priority))
1249 goto out_free;
1250
1251 if (!try_module_get(prot->owner))
1252 goto out_free_sec;
1253 sk_tx_queue_clear(sk);
1254 }
1255
1256 return sk;
1257
1258out_free_sec:
1259 security_sk_free(sk);
1260out_free:
1261 if (slab != NULL)
1262 kmem_cache_free(slab, sk);
1263 else
1264 kfree(sk);
1265 return NULL;
1266}
1267
1268static void sk_prot_free(struct proto *prot, struct sock *sk)
1269{
1270 struct kmem_cache *slab;
1271 struct module *owner;
1272
1273 owner = prot->owner;
1274 slab = prot->slab;
1275
1276 security_sk_free(sk);
1277 if (slab != NULL)
1278 kmem_cache_free(slab, sk);
1279 else
1280 kfree(sk);
1281 module_put(owner);
1282}
1283
1284#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1285void sock_update_classid(struct sock *sk)
1286{
1287 u32 classid;
1288
1289 classid = task_cls_classid(current);
1290 if (classid != sk->sk_classid)
1291 sk->sk_classid = classid;
1292}
1293EXPORT_SYMBOL(sock_update_classid);
1294#endif
1295
1296#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1297void sock_update_netprioidx(struct sock *sk)
1298{
1299 if (in_interrupt())
1300 return;
1301
1302 sk->sk_cgrp_prioidx = task_netprioidx(current);
1303}
1304EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1305#endif
1306
1307
1308
1309
1310
1311
1312
1313
1314struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1315 struct proto *prot)
1316{
1317 struct sock *sk;
1318
1319 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1320 if (sk) {
1321 sk->sk_family = family;
1322
1323
1324
1325
1326 sk->sk_prot = sk->sk_prot_creator = prot;
1327 sock_lock_init(sk);
1328 sock_net_set(sk, get_net(net));
1329 atomic_set(&sk->sk_wmem_alloc, 1);
1330
1331 sock_update_classid(sk);
1332 sock_update_netprioidx(sk);
1333 }
1334
1335 return sk;
1336}
1337EXPORT_SYMBOL(sk_alloc);
1338
1339static void __sk_free(struct sock *sk)
1340{
1341 struct sk_filter *filter;
1342
1343 if (sk->sk_destruct)
1344 sk->sk_destruct(sk);
1345
1346 filter = rcu_dereference_check(sk->sk_filter,
1347 atomic_read(&sk->sk_wmem_alloc) == 0);
1348 if (filter) {
1349 sk_filter_uncharge(sk, filter);
1350 RCU_INIT_POINTER(sk->sk_filter, NULL);
1351 }
1352
1353 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1354
1355 if (atomic_read(&sk->sk_omem_alloc))
1356 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1357 __func__, atomic_read(&sk->sk_omem_alloc));
1358
1359 if (sk->sk_peer_cred)
1360 put_cred(sk->sk_peer_cred);
1361 put_pid(sk->sk_peer_pid);
1362 put_net(sock_net(sk));
1363 sk_prot_free(sk->sk_prot_creator, sk);
1364}
1365
1366void sk_free(struct sock *sk)
1367{
1368
1369
1370
1371
1372
1373 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1374 __sk_free(sk);
1375}
1376EXPORT_SYMBOL(sk_free);
1377
1378
1379
1380
1381
1382
1383
1384
1385void sk_release_kernel(struct sock *sk)
1386{
1387 if (sk == NULL || sk->sk_socket == NULL)
1388 return;
1389
1390 sock_hold(sk);
1391 sock_release(sk->sk_socket);
1392 release_net(sock_net(sk));
1393 sock_net_set(sk, get_net(&init_net));
1394 sock_put(sk);
1395}
1396EXPORT_SYMBOL(sk_release_kernel);
1397
1398static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1399{
1400 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1401 sock_update_memcg(newsk);
1402}
1403
1404
1405
1406
1407
1408
1409
1410
1411struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1412{
1413 struct sock *newsk;
1414
1415 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1416 if (newsk != NULL) {
1417 struct sk_filter *filter;
1418
1419 sock_copy(newsk, sk);
1420
1421
1422 get_net(sock_net(newsk));
1423 sk_node_init(&newsk->sk_node);
1424 sock_lock_init(newsk);
1425 bh_lock_sock(newsk);
1426 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1427 newsk->sk_backlog.len = 0;
1428
1429 atomic_set(&newsk->sk_rmem_alloc, 0);
1430
1431
1432
1433 atomic_set(&newsk->sk_wmem_alloc, 1);
1434 atomic_set(&newsk->sk_omem_alloc, 0);
1435 skb_queue_head_init(&newsk->sk_receive_queue);
1436 skb_queue_head_init(&newsk->sk_write_queue);
1437#ifdef CONFIG_NET_DMA
1438 skb_queue_head_init(&newsk->sk_async_wait_queue);
1439#endif
1440
1441 spin_lock_init(&newsk->sk_dst_lock);
1442 rwlock_init(&newsk->sk_callback_lock);
1443 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1444 af_callback_keys + newsk->sk_family,
1445 af_family_clock_key_strings[newsk->sk_family]);
1446
1447 newsk->sk_dst_cache = NULL;
1448 newsk->sk_wmem_queued = 0;
1449 newsk->sk_forward_alloc = 0;
1450 newsk->sk_send_head = NULL;
1451 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1452
1453 sock_reset_flag(newsk, SOCK_DONE);
1454 skb_queue_head_init(&newsk->sk_error_queue);
1455
1456 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1457 if (filter != NULL)
1458 sk_filter_charge(newsk, filter);
1459
1460 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1461
1462
1463 newsk->sk_destruct = NULL;
1464 bh_unlock_sock(newsk);
1465 sk_free(newsk);
1466 newsk = NULL;
1467 goto out;
1468 }
1469
1470 newsk->sk_err = 0;
1471 newsk->sk_priority = 0;
1472
1473
1474
1475
1476 smp_wmb();
1477 atomic_set(&newsk->sk_refcnt, 2);
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490 sk_refcnt_debug_inc(newsk);
1491 sk_set_socket(newsk, NULL);
1492 newsk->sk_wq = NULL;
1493
1494 sk_update_clone(sk, newsk);
1495
1496 if (newsk->sk_prot->sockets_allocated)
1497 sk_sockets_allocated_inc(newsk);
1498
1499 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1500 net_enable_timestamp();
1501 }
1502out:
1503 return newsk;
1504}
1505EXPORT_SYMBOL_GPL(sk_clone_lock);
1506
1507void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1508{
1509 __sk_dst_set(sk, dst);
1510 sk->sk_route_caps = dst->dev->features;
1511 if (sk->sk_route_caps & NETIF_F_GSO)
1512 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1513 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1514 if (sk_can_gso(sk)) {
1515 if (dst->header_len) {
1516 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1517 } else {
1518 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1519 sk->sk_gso_max_size = dst->dev->gso_max_size;
1520 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1521 }
1522 }
1523}
1524EXPORT_SYMBOL_GPL(sk_setup_caps);
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534void sock_wfree(struct sk_buff *skb)
1535{
1536 struct sock *sk = skb->sk;
1537 unsigned int len = skb->truesize;
1538
1539 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1540
1541
1542
1543
1544 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1545 sk->sk_write_space(sk);
1546 len = 1;
1547 }
1548
1549
1550
1551
1552 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1553 __sk_free(sk);
1554}
1555EXPORT_SYMBOL(sock_wfree);
1556
1557
1558
1559
1560void sock_rfree(struct sk_buff *skb)
1561{
1562 struct sock *sk = skb->sk;
1563 unsigned int len = skb->truesize;
1564
1565 atomic_sub(len, &sk->sk_rmem_alloc);
1566 sk_mem_uncharge(sk, len);
1567}
1568EXPORT_SYMBOL(sock_rfree);
1569
1570void sock_edemux(struct sk_buff *skb)
1571{
1572 struct sock *sk = skb->sk;
1573
1574#ifdef CONFIG_INET
1575 if (sk->sk_state == TCP_TIME_WAIT)
1576 inet_twsk_put(inet_twsk(sk));
1577 else
1578#endif
1579 sock_put(sk);
1580}
1581EXPORT_SYMBOL(sock_edemux);
1582
1583kuid_t sock_i_uid(struct sock *sk)
1584{
1585 kuid_t uid;
1586
1587 read_lock_bh(&sk->sk_callback_lock);
1588 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1589 read_unlock_bh(&sk->sk_callback_lock);
1590 return uid;
1591}
1592EXPORT_SYMBOL(sock_i_uid);
1593
1594unsigned long sock_i_ino(struct sock *sk)
1595{
1596 unsigned long ino;
1597
1598 read_lock_bh(&sk->sk_callback_lock);
1599 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1600 read_unlock_bh(&sk->sk_callback_lock);
1601 return ino;
1602}
1603EXPORT_SYMBOL(sock_i_ino);
1604
1605
1606
1607
1608struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1609 gfp_t priority)
1610{
1611 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1612 struct sk_buff *skb = alloc_skb(size, priority);
1613 if (skb) {
1614 skb_set_owner_w(skb, sk);
1615 return skb;
1616 }
1617 }
1618 return NULL;
1619}
1620EXPORT_SYMBOL(sock_wmalloc);
1621
1622
1623
1624
1625struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1626 gfp_t priority)
1627{
1628 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1629 struct sk_buff *skb = alloc_skb(size, priority);
1630 if (skb) {
1631 skb_set_owner_r(skb, sk);
1632 return skb;
1633 }
1634 }
1635 return NULL;
1636}
1637
1638
1639
1640
1641void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1642{
1643 if ((unsigned int)size <= sysctl_optmem_max &&
1644 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1645 void *mem;
1646
1647
1648
1649 atomic_add(size, &sk->sk_omem_alloc);
1650 mem = kmalloc(size, priority);
1651 if (mem)
1652 return mem;
1653 atomic_sub(size, &sk->sk_omem_alloc);
1654 }
1655 return NULL;
1656}
1657EXPORT_SYMBOL(sock_kmalloc);
1658
1659
1660
1661
1662void sock_kfree_s(struct sock *sk, void *mem, int size)
1663{
1664 kfree(mem);
1665 atomic_sub(size, &sk->sk_omem_alloc);
1666}
1667EXPORT_SYMBOL(sock_kfree_s);
1668
1669
1670
1671
1672static long sock_wait_for_wmem(struct sock *sk, long timeo)
1673{
1674 DEFINE_WAIT(wait);
1675
1676 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1677 for (;;) {
1678 if (!timeo)
1679 break;
1680 if (signal_pending(current))
1681 break;
1682 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1683 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1684 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1685 break;
1686 if (sk->sk_shutdown & SEND_SHUTDOWN)
1687 break;
1688 if (sk->sk_err)
1689 break;
1690 timeo = schedule_timeout(timeo);
1691 }
1692 finish_wait(sk_sleep(sk), &wait);
1693 return timeo;
1694}
1695
1696
1697
1698
1699
1700
1701struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1702 unsigned long data_len, int noblock,
1703 int *errcode)
1704{
1705 struct sk_buff *skb;
1706 gfp_t gfp_mask;
1707 long timeo;
1708 int err;
1709 int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
1710
1711 err = -EMSGSIZE;
1712 if (npages > MAX_SKB_FRAGS)
1713 goto failure;
1714
1715 gfp_mask = sk->sk_allocation;
1716 if (gfp_mask & __GFP_WAIT)
1717 gfp_mask |= __GFP_REPEAT;
1718
1719 timeo = sock_sndtimeo(sk, noblock);
1720 while (1) {
1721 err = sock_error(sk);
1722 if (err != 0)
1723 goto failure;
1724
1725 err = -EPIPE;
1726 if (sk->sk_shutdown & SEND_SHUTDOWN)
1727 goto failure;
1728
1729 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1730 skb = alloc_skb(header_len, gfp_mask);
1731 if (skb) {
1732 int i;
1733
1734
1735 if (!data_len)
1736 break;
1737
1738 skb->truesize += data_len;
1739 skb_shinfo(skb)->nr_frags = npages;
1740 for (i = 0; i < npages; i++) {
1741 struct page *page;
1742
1743 page = alloc_pages(sk->sk_allocation, 0);
1744 if (!page) {
1745 err = -ENOBUFS;
1746 skb_shinfo(skb)->nr_frags = i;
1747 kfree_skb(skb);
1748 goto failure;
1749 }
1750
1751 __skb_fill_page_desc(skb, i,
1752 page, 0,
1753 (data_len >= PAGE_SIZE ?
1754 PAGE_SIZE :
1755 data_len));
1756 data_len -= PAGE_SIZE;
1757 }
1758
1759
1760 break;
1761 }
1762 err = -ENOBUFS;
1763 goto failure;
1764 }
1765 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1766 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1767 err = -EAGAIN;
1768 if (!timeo)
1769 goto failure;
1770 if (signal_pending(current))
1771 goto interrupted;
1772 timeo = sock_wait_for_wmem(sk, timeo);
1773 }
1774
1775 skb_set_owner_w(skb, sk);
1776 return skb;
1777
1778interrupted:
1779 err = sock_intr_errno(timeo);
1780failure:
1781 *errcode = err;
1782 return NULL;
1783}
1784EXPORT_SYMBOL(sock_alloc_send_pskb);
1785
1786struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1787 int noblock, int *errcode)
1788{
1789 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode);
1790}
1791EXPORT_SYMBOL(sock_alloc_send_skb);
1792
1793
1794#define SKB_FRAG_PAGE_ORDER get_order(32768)
1795
1796bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1797{
1798 int order;
1799
1800 if (pfrag->page) {
1801 if (atomic_read(&pfrag->page->_count) == 1) {
1802 pfrag->offset = 0;
1803 return true;
1804 }
1805 if (pfrag->offset < pfrag->size)
1806 return true;
1807 put_page(pfrag->page);
1808 }
1809
1810
1811 order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0;
1812
1813 do {
1814 gfp_t gfp = sk->sk_allocation;
1815
1816 if (order)
1817 gfp |= __GFP_COMP | __GFP_NOWARN;
1818 pfrag->page = alloc_pages(gfp, order);
1819 if (likely(pfrag->page)) {
1820 pfrag->offset = 0;
1821 pfrag->size = PAGE_SIZE << order;
1822 return true;
1823 }
1824 } while (--order >= 0);
1825
1826 sk_enter_memory_pressure(sk);
1827 sk_stream_moderate_sndbuf(sk);
1828 return false;
1829}
1830EXPORT_SYMBOL(sk_page_frag_refill);
1831
1832static void __lock_sock(struct sock *sk)
1833 __releases(&sk->sk_lock.slock)
1834 __acquires(&sk->sk_lock.slock)
1835{
1836 DEFINE_WAIT(wait);
1837
1838 for (;;) {
1839 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1840 TASK_UNINTERRUPTIBLE);
1841 spin_unlock_bh(&sk->sk_lock.slock);
1842 schedule();
1843 spin_lock_bh(&sk->sk_lock.slock);
1844 if (!sock_owned_by_user(sk))
1845 break;
1846 }
1847 finish_wait(&sk->sk_lock.wq, &wait);
1848}
1849
1850static void __release_sock(struct sock *sk)
1851 __releases(&sk->sk_lock.slock)
1852 __acquires(&sk->sk_lock.slock)
1853{
1854 struct sk_buff *skb = sk->sk_backlog.head;
1855
1856 do {
1857 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1858 bh_unlock_sock(sk);
1859
1860 do {
1861 struct sk_buff *next = skb->next;
1862
1863 prefetch(next);
1864 WARN_ON_ONCE(skb_dst_is_noref(skb));
1865 skb->next = NULL;
1866 sk_backlog_rcv(sk, skb);
1867
1868
1869
1870
1871
1872
1873
1874 cond_resched_softirq();
1875
1876 skb = next;
1877 } while (skb != NULL);
1878
1879 bh_lock_sock(sk);
1880 } while ((skb = sk->sk_backlog.head) != NULL);
1881
1882
1883
1884
1885
1886 sk->sk_backlog.len = 0;
1887}
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899int sk_wait_data(struct sock *sk, long *timeo)
1900{
1901 int rc;
1902 DEFINE_WAIT(wait);
1903
1904 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1905 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1906 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1907 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1908 finish_wait(sk_sleep(sk), &wait);
1909 return rc;
1910}
1911EXPORT_SYMBOL(sk_wait_data);
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923int __sk_mem_schedule(struct sock *sk, int size, int kind)
1924{
1925 struct proto *prot = sk->sk_prot;
1926 int amt = sk_mem_pages(size);
1927 long allocated;
1928 int parent_status = UNDER_LIMIT;
1929
1930 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
1931
1932 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
1933
1934
1935 if (parent_status == UNDER_LIMIT &&
1936 allocated <= sk_prot_mem_limits(sk, 0)) {
1937 sk_leave_memory_pressure(sk);
1938 return 1;
1939 }
1940
1941
1942 if ((parent_status > SOFT_LIMIT) ||
1943 allocated > sk_prot_mem_limits(sk, 1))
1944 sk_enter_memory_pressure(sk);
1945
1946
1947 if ((parent_status == OVER_LIMIT) ||
1948 (allocated > sk_prot_mem_limits(sk, 2)))
1949 goto suppress_allocation;
1950
1951
1952 if (kind == SK_MEM_RECV) {
1953 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
1954 return 1;
1955
1956 } else {
1957 if (sk->sk_type == SOCK_STREAM) {
1958 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
1959 return 1;
1960 } else if (atomic_read(&sk->sk_wmem_alloc) <
1961 prot->sysctl_wmem[0])
1962 return 1;
1963 }
1964
1965 if (sk_has_memory_pressure(sk)) {
1966 int alloc;
1967
1968 if (!sk_under_memory_pressure(sk))
1969 return 1;
1970 alloc = sk_sockets_allocated_read_positive(sk);
1971 if (sk_prot_mem_limits(sk, 2) > alloc *
1972 sk_mem_pages(sk->sk_wmem_queued +
1973 atomic_read(&sk->sk_rmem_alloc) +
1974 sk->sk_forward_alloc))
1975 return 1;
1976 }
1977
1978suppress_allocation:
1979
1980 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
1981 sk_stream_moderate_sndbuf(sk);
1982
1983
1984
1985
1986 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
1987 return 1;
1988 }
1989
1990 trace_sock_exceed_buf_limit(sk, prot, allocated);
1991
1992
1993 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
1994
1995 sk_memory_allocated_sub(sk, amt);
1996
1997 return 0;
1998}
1999EXPORT_SYMBOL(__sk_mem_schedule);
2000
2001
2002
2003
2004
2005void __sk_mem_reclaim(struct sock *sk)
2006{
2007 sk_memory_allocated_sub(sk,
2008 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
2009 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2010
2011 if (sk_under_memory_pressure(sk) &&
2012 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2013 sk_leave_memory_pressure(sk);
2014}
2015EXPORT_SYMBOL(__sk_mem_reclaim);
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2026{
2027 return -EOPNOTSUPP;
2028}
2029EXPORT_SYMBOL(sock_no_bind);
2030
2031int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2032 int len, int flags)
2033{
2034 return -EOPNOTSUPP;
2035}
2036EXPORT_SYMBOL(sock_no_connect);
2037
2038int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2039{
2040 return -EOPNOTSUPP;
2041}
2042EXPORT_SYMBOL(sock_no_socketpair);
2043
2044int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2045{
2046 return -EOPNOTSUPP;
2047}
2048EXPORT_SYMBOL(sock_no_accept);
2049
2050int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2051 int *len, int peer)
2052{
2053 return -EOPNOTSUPP;
2054}
2055EXPORT_SYMBOL(sock_no_getname);
2056
2057unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2058{
2059 return 0;
2060}
2061EXPORT_SYMBOL(sock_no_poll);
2062
2063int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2064{
2065 return -EOPNOTSUPP;
2066}
2067EXPORT_SYMBOL(sock_no_ioctl);
2068
2069int sock_no_listen(struct socket *sock, int backlog)
2070{
2071 return -EOPNOTSUPP;
2072}
2073EXPORT_SYMBOL(sock_no_listen);
2074
2075int sock_no_shutdown(struct socket *sock, int how)
2076{
2077 return -EOPNOTSUPP;
2078}
2079EXPORT_SYMBOL(sock_no_shutdown);
2080
2081int sock_no_setsockopt(struct socket *sock, int level, int optname,
2082 char __user *optval, unsigned int optlen)
2083{
2084 return -EOPNOTSUPP;
2085}
2086EXPORT_SYMBOL(sock_no_setsockopt);
2087
2088int sock_no_getsockopt(struct socket *sock, int level, int optname,
2089 char __user *optval, int __user *optlen)
2090{
2091 return -EOPNOTSUPP;
2092}
2093EXPORT_SYMBOL(sock_no_getsockopt);
2094
2095int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2096 size_t len)
2097{
2098 return -EOPNOTSUPP;
2099}
2100EXPORT_SYMBOL(sock_no_sendmsg);
2101
2102int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2103 size_t len, int flags)
2104{
2105 return -EOPNOTSUPP;
2106}
2107EXPORT_SYMBOL(sock_no_recvmsg);
2108
2109int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2110{
2111
2112 return -ENODEV;
2113}
2114EXPORT_SYMBOL(sock_no_mmap);
2115
2116ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2117{
2118 ssize_t res;
2119 struct msghdr msg = {.msg_flags = flags};
2120 struct kvec iov;
2121 char *kaddr = kmap(page);
2122 iov.iov_base = kaddr + offset;
2123 iov.iov_len = size;
2124 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2125 kunmap(page);
2126 return res;
2127}
2128EXPORT_SYMBOL(sock_no_sendpage);
2129
2130
2131
2132
2133
2134static void sock_def_wakeup(struct sock *sk)
2135{
2136 struct socket_wq *wq;
2137
2138 rcu_read_lock();
2139 wq = rcu_dereference(sk->sk_wq);
2140 if (wq_has_sleeper(wq))
2141 wake_up_interruptible_all(&wq->wait);
2142 rcu_read_unlock();
2143}
2144
2145static void sock_def_error_report(struct sock *sk)
2146{
2147 struct socket_wq *wq;
2148
2149 rcu_read_lock();
2150 wq = rcu_dereference(sk->sk_wq);
2151 if (wq_has_sleeper(wq))
2152 wake_up_interruptible_poll(&wq->wait, POLLERR);
2153 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2154 rcu_read_unlock();
2155}
2156
2157static void sock_def_readable(struct sock *sk, int len)
2158{
2159 struct socket_wq *wq;
2160
2161 rcu_read_lock();
2162 wq = rcu_dereference(sk->sk_wq);
2163 if (wq_has_sleeper(wq))
2164 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2165 POLLRDNORM | POLLRDBAND);
2166 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2167 rcu_read_unlock();
2168}
2169
2170static void sock_def_write_space(struct sock *sk)
2171{
2172 struct socket_wq *wq;
2173
2174 rcu_read_lock();
2175
2176
2177
2178
2179 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2180 wq = rcu_dereference(sk->sk_wq);
2181 if (wq_has_sleeper(wq))
2182 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2183 POLLWRNORM | POLLWRBAND);
2184
2185
2186 if (sock_writeable(sk))
2187 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2188 }
2189
2190 rcu_read_unlock();
2191}
2192
2193static void sock_def_destruct(struct sock *sk)
2194{
2195 kfree(sk->sk_protinfo);
2196}
2197
2198void sk_send_sigurg(struct sock *sk)
2199{
2200 if (sk->sk_socket && sk->sk_socket->file)
2201 if (send_sigurg(&sk->sk_socket->file->f_owner))
2202 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2203}
2204EXPORT_SYMBOL(sk_send_sigurg);
2205
2206void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2207 unsigned long expires)
2208{
2209 if (!mod_timer(timer, expires))
2210 sock_hold(sk);
2211}
2212EXPORT_SYMBOL(sk_reset_timer);
2213
2214void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2215{
2216 if (del_timer(timer))
2217 __sock_put(sk);
2218}
2219EXPORT_SYMBOL(sk_stop_timer);
2220
2221void sock_init_data(struct socket *sock, struct sock *sk)
2222{
2223 skb_queue_head_init(&sk->sk_receive_queue);
2224 skb_queue_head_init(&sk->sk_write_queue);
2225 skb_queue_head_init(&sk->sk_error_queue);
2226#ifdef CONFIG_NET_DMA
2227 skb_queue_head_init(&sk->sk_async_wait_queue);
2228#endif
2229
2230 sk->sk_send_head = NULL;
2231
2232 init_timer(&sk->sk_timer);
2233
2234 sk->sk_allocation = GFP_KERNEL;
2235 sk->sk_rcvbuf = sysctl_rmem_default;
2236 sk->sk_sndbuf = sysctl_wmem_default;
2237 sk->sk_state = TCP_CLOSE;
2238 sk_set_socket(sk, sock);
2239
2240 sock_set_flag(sk, SOCK_ZAPPED);
2241
2242 if (sock) {
2243 sk->sk_type = sock->type;
2244 sk->sk_wq = sock->wq;
2245 sock->sk = sk;
2246 } else
2247 sk->sk_wq = NULL;
2248
2249 spin_lock_init(&sk->sk_dst_lock);
2250 rwlock_init(&sk->sk_callback_lock);
2251 lockdep_set_class_and_name(&sk->sk_callback_lock,
2252 af_callback_keys + sk->sk_family,
2253 af_family_clock_key_strings[sk->sk_family]);
2254
2255 sk->sk_state_change = sock_def_wakeup;
2256 sk->sk_data_ready = sock_def_readable;
2257 sk->sk_write_space = sock_def_write_space;
2258 sk->sk_error_report = sock_def_error_report;
2259 sk->sk_destruct = sock_def_destruct;
2260
2261 sk->sk_frag.page = NULL;
2262 sk->sk_frag.offset = 0;
2263 sk->sk_peek_off = -1;
2264
2265 sk->sk_peer_pid = NULL;
2266 sk->sk_peer_cred = NULL;
2267 sk->sk_write_pending = 0;
2268 sk->sk_rcvlowat = 1;
2269 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2270 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2271
2272 sk->sk_stamp = ktime_set(-1L, 0);
2273
2274
2275
2276
2277
2278 smp_wmb();
2279 atomic_set(&sk->sk_refcnt, 1);
2280 atomic_set(&sk->sk_drops, 0);
2281}
2282EXPORT_SYMBOL(sock_init_data);
2283
2284void lock_sock_nested(struct sock *sk, int subclass)
2285{
2286 might_sleep();
2287 spin_lock_bh(&sk->sk_lock.slock);
2288 if (sk->sk_lock.owned)
2289 __lock_sock(sk);
2290 sk->sk_lock.owned = 1;
2291 spin_unlock(&sk->sk_lock.slock);
2292
2293
2294
2295 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2296 local_bh_enable();
2297}
2298EXPORT_SYMBOL(lock_sock_nested);
2299
2300void release_sock(struct sock *sk)
2301{
2302
2303
2304
2305 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2306
2307 spin_lock_bh(&sk->sk_lock.slock);
2308 if (sk->sk_backlog.tail)
2309 __release_sock(sk);
2310
2311 if (sk->sk_prot->release_cb)
2312 sk->sk_prot->release_cb(sk);
2313
2314 sk->sk_lock.owned = 0;
2315 if (waitqueue_active(&sk->sk_lock.wq))
2316 wake_up(&sk->sk_lock.wq);
2317 spin_unlock_bh(&sk->sk_lock.slock);
2318}
2319EXPORT_SYMBOL(release_sock);
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331bool lock_sock_fast(struct sock *sk)
2332{
2333 might_sleep();
2334 spin_lock_bh(&sk->sk_lock.slock);
2335
2336 if (!sk->sk_lock.owned)
2337
2338
2339
2340 return false;
2341
2342 __lock_sock(sk);
2343 sk->sk_lock.owned = 1;
2344 spin_unlock(&sk->sk_lock.slock);
2345
2346
2347
2348 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2349 local_bh_enable();
2350 return true;
2351}
2352EXPORT_SYMBOL(lock_sock_fast);
2353
2354int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2355{
2356 struct timeval tv;
2357 if (!sock_flag(sk, SOCK_TIMESTAMP))
2358 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2359 tv = ktime_to_timeval(sk->sk_stamp);
2360 if (tv.tv_sec == -1)
2361 return -ENOENT;
2362 if (tv.tv_sec == 0) {
2363 sk->sk_stamp = ktime_get_real();
2364 tv = ktime_to_timeval(sk->sk_stamp);
2365 }
2366 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2367}
2368EXPORT_SYMBOL(sock_get_timestamp);
2369
2370int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2371{
2372 struct timespec ts;
2373 if (!sock_flag(sk, SOCK_TIMESTAMP))
2374 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2375 ts = ktime_to_timespec(sk->sk_stamp);
2376 if (ts.tv_sec == -1)
2377 return -ENOENT;
2378 if (ts.tv_sec == 0) {
2379 sk->sk_stamp = ktime_get_real();
2380 ts = ktime_to_timespec(sk->sk_stamp);
2381 }
2382 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2383}
2384EXPORT_SYMBOL(sock_get_timestampns);
2385
2386void sock_enable_timestamp(struct sock *sk, int flag)
2387{
2388 if (!sock_flag(sk, flag)) {
2389 unsigned long previous_flags = sk->sk_flags;
2390
2391 sock_set_flag(sk, flag);
2392
2393
2394
2395
2396
2397 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2398 net_enable_timestamp();
2399 }
2400}
2401
2402
2403
2404
2405
2406
2407
2408
2409int sock_common_getsockopt(struct socket *sock, int level, int optname,
2410 char __user *optval, int __user *optlen)
2411{
2412 struct sock *sk = sock->sk;
2413
2414 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2415}
2416EXPORT_SYMBOL(sock_common_getsockopt);
2417
2418#ifdef CONFIG_COMPAT
2419int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2420 char __user *optval, int __user *optlen)
2421{
2422 struct sock *sk = sock->sk;
2423
2424 if (sk->sk_prot->compat_getsockopt != NULL)
2425 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2426 optval, optlen);
2427 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2428}
2429EXPORT_SYMBOL(compat_sock_common_getsockopt);
2430#endif
2431
2432int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2433 struct msghdr *msg, size_t size, int flags)
2434{
2435 struct sock *sk = sock->sk;
2436 int addr_len = 0;
2437 int err;
2438
2439 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2440 flags & ~MSG_DONTWAIT, &addr_len);
2441 if (err >= 0)
2442 msg->msg_namelen = addr_len;
2443 return err;
2444}
2445EXPORT_SYMBOL(sock_common_recvmsg);
2446
2447
2448
2449
2450int sock_common_setsockopt(struct socket *sock, int level, int optname,
2451 char __user *optval, unsigned int optlen)
2452{
2453 struct sock *sk = sock->sk;
2454
2455 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2456}
2457EXPORT_SYMBOL(sock_common_setsockopt);
2458
2459#ifdef CONFIG_COMPAT
2460int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2461 char __user *optval, unsigned int optlen)
2462{
2463 struct sock *sk = sock->sk;
2464
2465 if (sk->sk_prot->compat_setsockopt != NULL)
2466 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2467 optval, optlen);
2468 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2469}
2470EXPORT_SYMBOL(compat_sock_common_setsockopt);
2471#endif
2472
2473void sk_common_release(struct sock *sk)
2474{
2475 if (sk->sk_prot->destroy)
2476 sk->sk_prot->destroy(sk);
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486 sk->sk_prot->unhash(sk);
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500 sock_orphan(sk);
2501
2502 xfrm_sk_free_policy(sk);
2503
2504 sk_refcnt_debug_release(sk);
2505
2506 if (sk->sk_frag.page) {
2507 put_page(sk->sk_frag.page);
2508 sk->sk_frag.page = NULL;
2509 }
2510
2511 sock_put(sk);
2512}
2513EXPORT_SYMBOL(sk_common_release);
2514
2515#ifdef CONFIG_PROC_FS
2516#define PROTO_INUSE_NR 64
2517struct prot_inuse {
2518 int val[PROTO_INUSE_NR];
2519};
2520
2521static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2522
2523#ifdef CONFIG_NET_NS
2524void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2525{
2526 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2527}
2528EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2529
2530int sock_prot_inuse_get(struct net *net, struct proto *prot)
2531{
2532 int cpu, idx = prot->inuse_idx;
2533 int res = 0;
2534
2535 for_each_possible_cpu(cpu)
2536 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2537
2538 return res >= 0 ? res : 0;
2539}
2540EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2541
2542static int __net_init sock_inuse_init_net(struct net *net)
2543{
2544 net->core.inuse = alloc_percpu(struct prot_inuse);
2545 return net->core.inuse ? 0 : -ENOMEM;
2546}
2547
2548static void __net_exit sock_inuse_exit_net(struct net *net)
2549{
2550 free_percpu(net->core.inuse);
2551}
2552
2553static struct pernet_operations net_inuse_ops = {
2554 .init = sock_inuse_init_net,
2555 .exit = sock_inuse_exit_net,
2556};
2557
2558static __init int net_inuse_init(void)
2559{
2560 if (register_pernet_subsys(&net_inuse_ops))
2561 panic("Cannot initialize net inuse counters");
2562
2563 return 0;
2564}
2565
2566core_initcall(net_inuse_init);
2567#else
2568static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2569
2570void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2571{
2572 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2573}
2574EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2575
2576int sock_prot_inuse_get(struct net *net, struct proto *prot)
2577{
2578 int cpu, idx = prot->inuse_idx;
2579 int res = 0;
2580
2581 for_each_possible_cpu(cpu)
2582 res += per_cpu(prot_inuse, cpu).val[idx];
2583
2584 return res >= 0 ? res : 0;
2585}
2586EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2587#endif
2588
2589static void assign_proto_idx(struct proto *prot)
2590{
2591 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2592
2593 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2594 pr_err("PROTO_INUSE_NR exhausted\n");
2595 return;
2596 }
2597
2598 set_bit(prot->inuse_idx, proto_inuse_idx);
2599}
2600
2601static void release_proto_idx(struct proto *prot)
2602{
2603 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2604 clear_bit(prot->inuse_idx, proto_inuse_idx);
2605}
2606#else
2607static inline void assign_proto_idx(struct proto *prot)
2608{
2609}
2610
2611static inline void release_proto_idx(struct proto *prot)
2612{
2613}
2614#endif
2615
2616int proto_register(struct proto *prot, int alloc_slab)
2617{
2618 if (alloc_slab) {
2619 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2620 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2621 NULL);
2622
2623 if (prot->slab == NULL) {
2624 pr_crit("%s: Can't create sock SLAB cache!\n",
2625 prot->name);
2626 goto out;
2627 }
2628
2629 if (prot->rsk_prot != NULL) {
2630 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2631 if (prot->rsk_prot->slab_name == NULL)
2632 goto out_free_sock_slab;
2633
2634 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2635 prot->rsk_prot->obj_size, 0,
2636 SLAB_HWCACHE_ALIGN, NULL);
2637
2638 if (prot->rsk_prot->slab == NULL) {
2639 pr_crit("%s: Can't create request sock SLAB cache!\n",
2640 prot->name);
2641 goto out_free_request_sock_slab_name;
2642 }
2643 }
2644
2645 if (prot->twsk_prot != NULL) {
2646 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2647
2648 if (prot->twsk_prot->twsk_slab_name == NULL)
2649 goto out_free_request_sock_slab;
2650
2651 prot->twsk_prot->twsk_slab =
2652 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2653 prot->twsk_prot->twsk_obj_size,
2654 0,
2655 SLAB_HWCACHE_ALIGN |
2656 prot->slab_flags,
2657 NULL);
2658 if (prot->twsk_prot->twsk_slab == NULL)
2659 goto out_free_timewait_sock_slab_name;
2660 }
2661 }
2662
2663 mutex_lock(&proto_list_mutex);
2664 list_add(&prot->node, &proto_list);
2665 assign_proto_idx(prot);
2666 mutex_unlock(&proto_list_mutex);
2667 return 0;
2668
2669out_free_timewait_sock_slab_name:
2670 kfree(prot->twsk_prot->twsk_slab_name);
2671out_free_request_sock_slab:
2672 if (prot->rsk_prot && prot->rsk_prot->slab) {
2673 kmem_cache_destroy(prot->rsk_prot->slab);
2674 prot->rsk_prot->slab = NULL;
2675 }
2676out_free_request_sock_slab_name:
2677 if (prot->rsk_prot)
2678 kfree(prot->rsk_prot->slab_name);
2679out_free_sock_slab:
2680 kmem_cache_destroy(prot->slab);
2681 prot->slab = NULL;
2682out:
2683 return -ENOBUFS;
2684}
2685EXPORT_SYMBOL(proto_register);
2686
2687void proto_unregister(struct proto *prot)
2688{
2689 mutex_lock(&proto_list_mutex);
2690 release_proto_idx(prot);
2691 list_del(&prot->node);
2692 mutex_unlock(&proto_list_mutex);
2693
2694 if (prot->slab != NULL) {
2695 kmem_cache_destroy(prot->slab);
2696 prot->slab = NULL;
2697 }
2698
2699 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2700 kmem_cache_destroy(prot->rsk_prot->slab);
2701 kfree(prot->rsk_prot->slab_name);
2702 prot->rsk_prot->slab = NULL;
2703 }
2704
2705 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2706 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2707 kfree(prot->twsk_prot->twsk_slab_name);
2708 prot->twsk_prot->twsk_slab = NULL;
2709 }
2710}
2711EXPORT_SYMBOL(proto_unregister);
2712
2713#ifdef CONFIG_PROC_FS
2714static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2715 __acquires(proto_list_mutex)
2716{
2717 mutex_lock(&proto_list_mutex);
2718 return seq_list_start_head(&proto_list, *pos);
2719}
2720
2721static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2722{
2723 return seq_list_next(v, &proto_list, pos);
2724}
2725
2726static void proto_seq_stop(struct seq_file *seq, void *v)
2727 __releases(proto_list_mutex)
2728{
2729 mutex_unlock(&proto_list_mutex);
2730}
2731
2732static char proto_method_implemented(const void *method)
2733{
2734 return method == NULL ? 'n' : 'y';
2735}
2736static long sock_prot_memory_allocated(struct proto *proto)
2737{
2738 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2739}
2740
2741static char *sock_prot_memory_pressure(struct proto *proto)
2742{
2743 return proto->memory_pressure != NULL ?
2744 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2745}
2746
2747static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2748{
2749
2750 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2751 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2752 proto->name,
2753 proto->obj_size,
2754 sock_prot_inuse_get(seq_file_net(seq), proto),
2755 sock_prot_memory_allocated(proto),
2756 sock_prot_memory_pressure(proto),
2757 proto->max_header,
2758 proto->slab == NULL ? "no" : "yes",
2759 module_name(proto->owner),
2760 proto_method_implemented(proto->close),
2761 proto_method_implemented(proto->connect),
2762 proto_method_implemented(proto->disconnect),
2763 proto_method_implemented(proto->accept),
2764 proto_method_implemented(proto->ioctl),
2765 proto_method_implemented(proto->init),
2766 proto_method_implemented(proto->destroy),
2767 proto_method_implemented(proto->shutdown),
2768 proto_method_implemented(proto->setsockopt),
2769 proto_method_implemented(proto->getsockopt),
2770 proto_method_implemented(proto->sendmsg),
2771 proto_method_implemented(proto->recvmsg),
2772 proto_method_implemented(proto->sendpage),
2773 proto_method_implemented(proto->bind),
2774 proto_method_implemented(proto->backlog_rcv),
2775 proto_method_implemented(proto->hash),
2776 proto_method_implemented(proto->unhash),
2777 proto_method_implemented(proto->get_port),
2778 proto_method_implemented(proto->enter_memory_pressure));
2779}
2780
2781static int proto_seq_show(struct seq_file *seq, void *v)
2782{
2783 if (v == &proto_list)
2784 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2785 "protocol",
2786 "size",
2787 "sockets",
2788 "memory",
2789 "press",
2790 "maxhdr",
2791 "slab",
2792 "module",
2793 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2794 else
2795 proto_seq_printf(seq, list_entry(v, struct proto, node));
2796 return 0;
2797}
2798
2799static const struct seq_operations proto_seq_ops = {
2800 .start = proto_seq_start,
2801 .next = proto_seq_next,
2802 .stop = proto_seq_stop,
2803 .show = proto_seq_show,
2804};
2805
2806static int proto_seq_open(struct inode *inode, struct file *file)
2807{
2808 return seq_open_net(inode, file, &proto_seq_ops,
2809 sizeof(struct seq_net_private));
2810}
2811
2812static const struct file_operations proto_seq_fops = {
2813 .owner = THIS_MODULE,
2814 .open = proto_seq_open,
2815 .read = seq_read,
2816 .llseek = seq_lseek,
2817 .release = seq_release_net,
2818};
2819
2820static __net_init int proto_init_net(struct net *net)
2821{
2822 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2823 return -ENOMEM;
2824
2825 return 0;
2826}
2827
2828static __net_exit void proto_exit_net(struct net *net)
2829{
2830 remove_proc_entry("protocols", net->proc_net);
2831}
2832
2833
2834static __net_initdata struct pernet_operations proto_net_ops = {
2835 .init = proto_init_net,
2836 .exit = proto_exit_net,
2837};
2838
2839static int __init proto_init(void)
2840{
2841 return register_pernet_subsys(&proto_net_ops);
2842}
2843
2844subsys_initcall(proto_init);
2845
2846#endif
2847