1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/errqueue.h>
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
115#include <linux/highmem.h>
116#include <linux/user_namespace.h>
117#include <linux/static_key.h>
118#include <linux/memcontrol.h>
119#include <linux/prefetch.h>
120
121#include <asm/uaccess.h>
122
123#include <linux/netdevice.h>
124#include <net/protocol.h>
125#include <linux/skbuff.h>
126#include <net/net_namespace.h>
127#include <net/request_sock.h>
128#include <net/sock.h>
129#include <linux/net_tstamp.h>
130#include <net/xfrm.h>
131#include <linux/ipsec.h>
132#include <net/cls_cgroup.h>
133#include <net/netprio_cgroup.h>
134
135#include <linux/filter.h>
136
137#include <trace/events/sock.h>
138
139#ifdef CONFIG_INET
140#include <net/tcp.h>
141#endif
142
143#include <net/busy_poll.h>
144
145static DEFINE_MUTEX(proto_list_mutex);
146static LIST_HEAD(proto_list);
147
148
149
150
151
152
153
154
155
156
157
158bool sk_ns_capable(const struct sock *sk,
159 struct user_namespace *user_ns, int cap)
160{
161 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
162 ns_capable(user_ns, cap);
163}
164EXPORT_SYMBOL(sk_ns_capable);
165
166
167
168
169
170
171
172
173
174
175bool sk_capable(const struct sock *sk, int cap)
176{
177 return sk_ns_capable(sk, &init_user_ns, cap);
178}
179EXPORT_SYMBOL(sk_capable);
180
181
182
183
184
185
186
187
188
189
190bool sk_net_capable(const struct sock *sk, int cap)
191{
192 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
193}
194EXPORT_SYMBOL(sk_net_capable);
195
196
197#ifdef CONFIG_MEMCG_KMEM
198int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
199{
200 struct proto *proto;
201 int ret = 0;
202
203 mutex_lock(&proto_list_mutex);
204 list_for_each_entry(proto, &proto_list, node) {
205 if (proto->init_cgroup) {
206 ret = proto->init_cgroup(memcg, ss);
207 if (ret)
208 goto out;
209 }
210 }
211
212 mutex_unlock(&proto_list_mutex);
213 return ret;
214out:
215 list_for_each_entry_continue_reverse(proto, &proto_list, node)
216 if (proto->destroy_cgroup)
217 proto->destroy_cgroup(memcg);
218 mutex_unlock(&proto_list_mutex);
219 return ret;
220}
221
222void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
223{
224 struct proto *proto;
225
226 mutex_lock(&proto_list_mutex);
227 list_for_each_entry_reverse(proto, &proto_list, node)
228 if (proto->destroy_cgroup)
229 proto->destroy_cgroup(memcg);
230 mutex_unlock(&proto_list_mutex);
231}
232#endif
233
234
235
236
237
238static struct lock_class_key af_family_keys[AF_MAX];
239static struct lock_class_key af_family_slock_keys[AF_MAX];
240
241#if defined(CONFIG_MEMCG_KMEM)
242struct static_key memcg_socket_limit_enabled;
243EXPORT_SYMBOL(memcg_socket_limit_enabled);
244#endif
245
246
247
248
249
250
251static const char *const af_family_key_strings[AF_MAX+1] = {
252 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
253 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
254 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
255 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
256 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
257 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
258 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
259 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
260 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
261 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
262 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
263 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
264 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
265 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
266};
267static const char *const af_family_slock_key_strings[AF_MAX+1] = {
268 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
269 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
270 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
271 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
272 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
273 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
274 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
275 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
276 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
277 "slock-27" , "slock-28" , "slock-AF_CAN" ,
278 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
279 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
280 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
281 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
282};
283static const char *const af_family_clock_key_strings[AF_MAX+1] = {
284 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
285 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
286 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
287 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
288 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
289 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
290 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
291 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
292 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
293 "clock-27" , "clock-28" , "clock-AF_CAN" ,
294 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
295 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
296 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
297 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
298};
299
300
301
302
303
304static struct lock_class_key af_callback_keys[AF_MAX];
305
306
307
308
309
310
311#define _SK_MEM_PACKETS 256
312#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
313#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
314#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
315
316
317__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
318EXPORT_SYMBOL(sysctl_wmem_max);
319__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
320EXPORT_SYMBOL(sysctl_rmem_max);
321__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
322__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
323
324
325int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
326EXPORT_SYMBOL(sysctl_optmem_max);
327
328int sysctl_tstamp_allow_data __read_mostly = 1;
329
330struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
331EXPORT_SYMBOL_GPL(memalloc_socks);
332
333
334
335
336
337
338
339
340
341void sk_set_memalloc(struct sock *sk)
342{
343 sock_set_flag(sk, SOCK_MEMALLOC);
344 sk->sk_allocation |= __GFP_MEMALLOC;
345 static_key_slow_inc(&memalloc_socks);
346}
347EXPORT_SYMBOL_GPL(sk_set_memalloc);
348
349void sk_clear_memalloc(struct sock *sk)
350{
351 sock_reset_flag(sk, SOCK_MEMALLOC);
352 sk->sk_allocation &= ~__GFP_MEMALLOC;
353 static_key_slow_dec(&memalloc_socks);
354
355
356
357
358
359
360
361
362 sk_mem_reclaim(sk);
363}
364EXPORT_SYMBOL_GPL(sk_clear_memalloc);
365
366int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
367{
368 int ret;
369 unsigned long pflags = current->flags;
370
371
372 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
373
374 current->flags |= PF_MEMALLOC;
375 ret = sk->sk_backlog_rcv(sk, skb);
376 tsk_restore_flags(current, pflags, PF_MEMALLOC);
377
378 return ret;
379}
380EXPORT_SYMBOL(__sk_backlog_rcv);
381
382static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
383{
384 struct timeval tv;
385
386 if (optlen < sizeof(tv))
387 return -EINVAL;
388 if (copy_from_user(&tv, optval, sizeof(tv)))
389 return -EFAULT;
390 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
391 return -EDOM;
392
393 if (tv.tv_sec < 0) {
394 static int warned __read_mostly;
395
396 *timeo_p = 0;
397 if (warned < 10 && net_ratelimit()) {
398 warned++;
399 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
400 __func__, current->comm, task_pid_nr(current));
401 }
402 return 0;
403 }
404 *timeo_p = MAX_SCHEDULE_TIMEOUT;
405 if (tv.tv_sec == 0 && tv.tv_usec == 0)
406 return 0;
407 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
408 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
409 return 0;
410}
411
412static void sock_warn_obsolete_bsdism(const char *name)
413{
414 static int warned;
415 static char warncomm[TASK_COMM_LEN];
416 if (strcmp(warncomm, current->comm) && warned < 5) {
417 strcpy(warncomm, current->comm);
418 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
419 warncomm, name);
420 warned++;
421 }
422}
423
424#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
425
426static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
427{
428 if (sk->sk_flags & flags) {
429 sk->sk_flags &= ~flags;
430 if (!(sk->sk_flags & SK_FLAGS_TIMESTAMP))
431 net_disable_timestamp();
432 }
433}
434
435
436int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
437{
438 int err;
439 unsigned long flags;
440 struct sk_buff_head *list = &sk->sk_receive_queue;
441
442 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
443 atomic_inc(&sk->sk_drops);
444 trace_sock_rcvqueue_full(sk, skb);
445 return -ENOMEM;
446 }
447
448 err = sk_filter(sk, skb);
449 if (err)
450 return err;
451
452 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
453 atomic_inc(&sk->sk_drops);
454 return -ENOBUFS;
455 }
456
457 skb->dev = NULL;
458 skb_set_owner_r(skb, sk);
459
460
461
462
463 skb_dst_force(skb);
464
465 spin_lock_irqsave(&list->lock, flags);
466 sock_skb_set_dropcount(sk, skb);
467 __skb_queue_tail(list, skb);
468 spin_unlock_irqrestore(&list->lock, flags);
469
470 if (!sock_flag(sk, SOCK_DEAD))
471 sk->sk_data_ready(sk);
472 return 0;
473}
474EXPORT_SYMBOL(sock_queue_rcv_skb);
475
476int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
477{
478 int rc = NET_RX_SUCCESS;
479
480 if (sk_filter(sk, skb))
481 goto discard_and_relse;
482
483 skb->dev = NULL;
484
485 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
486 atomic_inc(&sk->sk_drops);
487 goto discard_and_relse;
488 }
489 if (nested)
490 bh_lock_sock_nested(sk);
491 else
492 bh_lock_sock(sk);
493 if (!sock_owned_by_user(sk)) {
494
495
496
497 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
498
499 rc = sk_backlog_rcv(sk, skb);
500
501 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
502 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
503 bh_unlock_sock(sk);
504 atomic_inc(&sk->sk_drops);
505 goto discard_and_relse;
506 }
507
508 bh_unlock_sock(sk);
509out:
510 sock_put(sk);
511 return rc;
512discard_and_relse:
513 kfree_skb(skb);
514 goto out;
515}
516EXPORT_SYMBOL(sk_receive_skb);
517
518struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
519{
520 struct dst_entry *dst = __sk_dst_get(sk);
521
522 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
523 sk_tx_queue_clear(sk);
524 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
525 dst_release(dst);
526 return NULL;
527 }
528
529 return dst;
530}
531EXPORT_SYMBOL(__sk_dst_check);
532
533struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
534{
535 struct dst_entry *dst = sk_dst_get(sk);
536
537 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
538 sk_dst_reset(sk);
539 dst_release(dst);
540 return NULL;
541 }
542
543 return dst;
544}
545EXPORT_SYMBOL(sk_dst_check);
546
547static int sock_setbindtodevice(struct sock *sk, char __user *optval,
548 int optlen)
549{
550 int ret = -ENOPROTOOPT;
551#ifdef CONFIG_NETDEVICES
552 struct net *net = sock_net(sk);
553 char devname[IFNAMSIZ];
554 int index;
555
556
557 ret = -EPERM;
558 if (!ns_capable(net->user_ns, CAP_NET_RAW))
559 goto out;
560
561 ret = -EINVAL;
562 if (optlen < 0)
563 goto out;
564
565
566
567
568
569
570 if (optlen > IFNAMSIZ - 1)
571 optlen = IFNAMSIZ - 1;
572 memset(devname, 0, sizeof(devname));
573
574 ret = -EFAULT;
575 if (copy_from_user(devname, optval, optlen))
576 goto out;
577
578 index = 0;
579 if (devname[0] != '\0') {
580 struct net_device *dev;
581
582 rcu_read_lock();
583 dev = dev_get_by_name_rcu(net, devname);
584 if (dev)
585 index = dev->ifindex;
586 rcu_read_unlock();
587 ret = -ENODEV;
588 if (!dev)
589 goto out;
590 }
591
592 lock_sock(sk);
593 sk->sk_bound_dev_if = index;
594 sk_dst_reset(sk);
595 release_sock(sk);
596
597 ret = 0;
598
599out:
600#endif
601
602 return ret;
603}
604
605static int sock_getbindtodevice(struct sock *sk, char __user *optval,
606 int __user *optlen, int len)
607{
608 int ret = -ENOPROTOOPT;
609#ifdef CONFIG_NETDEVICES
610 struct net *net = sock_net(sk);
611 char devname[IFNAMSIZ];
612
613 if (sk->sk_bound_dev_if == 0) {
614 len = 0;
615 goto zero;
616 }
617
618 ret = -EINVAL;
619 if (len < IFNAMSIZ)
620 goto out;
621
622 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
623 if (ret)
624 goto out;
625
626 len = strlen(devname) + 1;
627
628 ret = -EFAULT;
629 if (copy_to_user(optval, devname, len))
630 goto out;
631
632zero:
633 ret = -EFAULT;
634 if (put_user(len, optlen))
635 goto out;
636
637 ret = 0;
638
639out:
640#endif
641
642 return ret;
643}
644
645static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
646{
647 if (valbool)
648 sock_set_flag(sk, bit);
649 else
650 sock_reset_flag(sk, bit);
651}
652
653bool sk_mc_loop(struct sock *sk)
654{
655 if (dev_recursion_level())
656 return false;
657 if (!sk)
658 return true;
659 switch (sk->sk_family) {
660 case AF_INET:
661 return inet_sk(sk)->mc_loop;
662#if IS_ENABLED(CONFIG_IPV6)
663 case AF_INET6:
664 return inet6_sk(sk)->mc_loop;
665#endif
666 }
667 WARN_ON(1);
668 return true;
669}
670EXPORT_SYMBOL(sk_mc_loop);
671
672
673
674
675
676
677int sock_setsockopt(struct socket *sock, int level, int optname,
678 char __user *optval, unsigned int optlen)
679{
680 struct sock *sk = sock->sk;
681 int val;
682 int valbool;
683 struct linger ling;
684 int ret = 0;
685
686
687
688
689
690 if (optname == SO_BINDTODEVICE)
691 return sock_setbindtodevice(sk, optval, optlen);
692
693 if (optlen < sizeof(int))
694 return -EINVAL;
695
696 if (get_user(val, (int __user *)optval))
697 return -EFAULT;
698
699 valbool = val ? 1 : 0;
700
701 lock_sock(sk);
702
703 switch (optname) {
704 case SO_DEBUG:
705 if (val && !capable(CAP_NET_ADMIN))
706 ret = -EACCES;
707 else
708 sock_valbool_flag(sk, SOCK_DBG, valbool);
709 break;
710 case SO_REUSEADDR:
711 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
712 break;
713 case SO_REUSEPORT:
714 sk->sk_reuseport = valbool;
715 break;
716 case SO_TYPE:
717 case SO_PROTOCOL:
718 case SO_DOMAIN:
719 case SO_ERROR:
720 ret = -ENOPROTOOPT;
721 break;
722 case SO_DONTROUTE:
723 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
724 break;
725 case SO_BROADCAST:
726 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
727 break;
728 case SO_SNDBUF:
729
730
731
732
733
734 val = min_t(u32, val, sysctl_wmem_max);
735set_sndbuf:
736 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
737 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
738
739 sk->sk_write_space(sk);
740 break;
741
742 case SO_SNDBUFFORCE:
743 if (!capable(CAP_NET_ADMIN)) {
744 ret = -EPERM;
745 break;
746 }
747 goto set_sndbuf;
748
749 case SO_RCVBUF:
750
751
752
753
754
755 val = min_t(u32, val, sysctl_rmem_max);
756set_rcvbuf:
757 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
774 break;
775
776 case SO_RCVBUFFORCE:
777 if (!capable(CAP_NET_ADMIN)) {
778 ret = -EPERM;
779 break;
780 }
781 goto set_rcvbuf;
782
783 case SO_KEEPALIVE:
784#ifdef CONFIG_INET
785 if (sk->sk_protocol == IPPROTO_TCP &&
786 sk->sk_type == SOCK_STREAM)
787 tcp_set_keepalive(sk, valbool);
788#endif
789 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
790 break;
791
792 case SO_OOBINLINE:
793 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
794 break;
795
796 case SO_NO_CHECK:
797 sk->sk_no_check_tx = valbool;
798 break;
799
800 case SO_PRIORITY:
801 if ((val >= 0 && val <= 6) ||
802 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
803 sk->sk_priority = val;
804 else
805 ret = -EPERM;
806 break;
807
808 case SO_LINGER:
809 if (optlen < sizeof(ling)) {
810 ret = -EINVAL;
811 break;
812 }
813 if (copy_from_user(&ling, optval, sizeof(ling))) {
814 ret = -EFAULT;
815 break;
816 }
817 if (!ling.l_onoff)
818 sock_reset_flag(sk, SOCK_LINGER);
819 else {
820#if (BITS_PER_LONG == 32)
821 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
822 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
823 else
824#endif
825 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
826 sock_set_flag(sk, SOCK_LINGER);
827 }
828 break;
829
830 case SO_BSDCOMPAT:
831 sock_warn_obsolete_bsdism("setsockopt");
832 break;
833
834 case SO_PASSCRED:
835 if (valbool)
836 set_bit(SOCK_PASSCRED, &sock->flags);
837 else
838 clear_bit(SOCK_PASSCRED, &sock->flags);
839 break;
840
841 case SO_TIMESTAMP:
842 case SO_TIMESTAMPNS:
843 if (valbool) {
844 if (optname == SO_TIMESTAMP)
845 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
846 else
847 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
848 sock_set_flag(sk, SOCK_RCVTSTAMP);
849 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
850 } else {
851 sock_reset_flag(sk, SOCK_RCVTSTAMP);
852 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
853 }
854 break;
855
856 case SO_TIMESTAMPING:
857 if (val & ~SOF_TIMESTAMPING_MASK) {
858 ret = -EINVAL;
859 break;
860 }
861
862 if (val & SOF_TIMESTAMPING_OPT_ID &&
863 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
864 if (sk->sk_protocol == IPPROTO_TCP) {
865 if (sk->sk_state != TCP_ESTABLISHED) {
866 ret = -EINVAL;
867 break;
868 }
869 sk->sk_tskey = tcp_sk(sk)->snd_una;
870 } else {
871 sk->sk_tskey = 0;
872 }
873 }
874 sk->sk_tsflags = val;
875 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
876 sock_enable_timestamp(sk,
877 SOCK_TIMESTAMPING_RX_SOFTWARE);
878 else
879 sock_disable_timestamp(sk,
880 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
881 break;
882
883 case SO_RCVLOWAT:
884 if (val < 0)
885 val = INT_MAX;
886 sk->sk_rcvlowat = val ? : 1;
887 break;
888
889 case SO_RCVTIMEO:
890 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
891 break;
892
893 case SO_SNDTIMEO:
894 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
895 break;
896
897 case SO_ATTACH_FILTER:
898 ret = -EINVAL;
899 if (optlen == sizeof(struct sock_fprog)) {
900 struct sock_fprog fprog;
901
902 ret = -EFAULT;
903 if (copy_from_user(&fprog, optval, sizeof(fprog)))
904 break;
905
906 ret = sk_attach_filter(&fprog, sk);
907 }
908 break;
909
910 case SO_ATTACH_BPF:
911 ret = -EINVAL;
912 if (optlen == sizeof(u32)) {
913 u32 ufd;
914
915 ret = -EFAULT;
916 if (copy_from_user(&ufd, optval, sizeof(ufd)))
917 break;
918
919 ret = sk_attach_bpf(ufd, sk);
920 }
921 break;
922
923 case SO_DETACH_FILTER:
924 ret = sk_detach_filter(sk);
925 break;
926
927 case SO_LOCK_FILTER:
928 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
929 ret = -EPERM;
930 else
931 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
932 break;
933
934 case SO_PASSSEC:
935 if (valbool)
936 set_bit(SOCK_PASSSEC, &sock->flags);
937 else
938 clear_bit(SOCK_PASSSEC, &sock->flags);
939 break;
940 case SO_MARK:
941 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
942 ret = -EPERM;
943 else
944 sk->sk_mark = val;
945 break;
946
947 case SO_RXQ_OVFL:
948 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
949 break;
950
951 case SO_WIFI_STATUS:
952 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
953 break;
954
955 case SO_PEEK_OFF:
956 if (sock->ops->set_peek_off)
957 ret = sock->ops->set_peek_off(sk, val);
958 else
959 ret = -EOPNOTSUPP;
960 break;
961
962 case SO_NOFCS:
963 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
964 break;
965
966 case SO_SELECT_ERR_QUEUE:
967 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
968 break;
969
970#ifdef CONFIG_NET_RX_BUSY_POLL
971 case SO_BUSY_POLL:
972
973 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
974 ret = -EPERM;
975 else {
976 if (val < 0)
977 ret = -EINVAL;
978 else
979 sk->sk_ll_usec = val;
980 }
981 break;
982#endif
983
984 case SO_MAX_PACING_RATE:
985 sk->sk_max_pacing_rate = val;
986 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
987 sk->sk_max_pacing_rate);
988 break;
989
990 default:
991 ret = -ENOPROTOOPT;
992 break;
993 }
994 release_sock(sk);
995 return ret;
996}
997EXPORT_SYMBOL(sock_setsockopt);
998
999
1000static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1001 struct ucred *ucred)
1002{
1003 ucred->pid = pid_vnr(pid);
1004 ucred->uid = ucred->gid = -1;
1005 if (cred) {
1006 struct user_namespace *current_ns = current_user_ns();
1007
1008 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1009 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1010 }
1011}
1012
1013int sock_getsockopt(struct socket *sock, int level, int optname,
1014 char __user *optval, int __user *optlen)
1015{
1016 struct sock *sk = sock->sk;
1017
1018 union {
1019 int val;
1020 struct linger ling;
1021 struct timeval tm;
1022 } v;
1023
1024 int lv = sizeof(int);
1025 int len;
1026
1027 if (get_user(len, optlen))
1028 return -EFAULT;
1029 if (len < 0)
1030 return -EINVAL;
1031
1032 memset(&v, 0, sizeof(v));
1033
1034 switch (optname) {
1035 case SO_DEBUG:
1036 v.val = sock_flag(sk, SOCK_DBG);
1037 break;
1038
1039 case SO_DONTROUTE:
1040 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1041 break;
1042
1043 case SO_BROADCAST:
1044 v.val = sock_flag(sk, SOCK_BROADCAST);
1045 break;
1046
1047 case SO_SNDBUF:
1048 v.val = sk->sk_sndbuf;
1049 break;
1050
1051 case SO_RCVBUF:
1052 v.val = sk->sk_rcvbuf;
1053 break;
1054
1055 case SO_REUSEADDR:
1056 v.val = sk->sk_reuse;
1057 break;
1058
1059 case SO_REUSEPORT:
1060 v.val = sk->sk_reuseport;
1061 break;
1062
1063 case SO_KEEPALIVE:
1064 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1065 break;
1066
1067 case SO_TYPE:
1068 v.val = sk->sk_type;
1069 break;
1070
1071 case SO_PROTOCOL:
1072 v.val = sk->sk_protocol;
1073 break;
1074
1075 case SO_DOMAIN:
1076 v.val = sk->sk_family;
1077 break;
1078
1079 case SO_ERROR:
1080 v.val = -sock_error(sk);
1081 if (v.val == 0)
1082 v.val = xchg(&sk->sk_err_soft, 0);
1083 break;
1084
1085 case SO_OOBINLINE:
1086 v.val = sock_flag(sk, SOCK_URGINLINE);
1087 break;
1088
1089 case SO_NO_CHECK:
1090 v.val = sk->sk_no_check_tx;
1091 break;
1092
1093 case SO_PRIORITY:
1094 v.val = sk->sk_priority;
1095 break;
1096
1097 case SO_LINGER:
1098 lv = sizeof(v.ling);
1099 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1100 v.ling.l_linger = sk->sk_lingertime / HZ;
1101 break;
1102
1103 case SO_BSDCOMPAT:
1104 sock_warn_obsolete_bsdism("getsockopt");
1105 break;
1106
1107 case SO_TIMESTAMP:
1108 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1109 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1110 break;
1111
1112 case SO_TIMESTAMPNS:
1113 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1114 break;
1115
1116 case SO_TIMESTAMPING:
1117 v.val = sk->sk_tsflags;
1118 break;
1119
1120 case SO_RCVTIMEO:
1121 lv = sizeof(struct timeval);
1122 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1123 v.tm.tv_sec = 0;
1124 v.tm.tv_usec = 0;
1125 } else {
1126 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1127 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1128 }
1129 break;
1130
1131 case SO_SNDTIMEO:
1132 lv = sizeof(struct timeval);
1133 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1134 v.tm.tv_sec = 0;
1135 v.tm.tv_usec = 0;
1136 } else {
1137 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1138 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1139 }
1140 break;
1141
1142 case SO_RCVLOWAT:
1143 v.val = sk->sk_rcvlowat;
1144 break;
1145
1146 case SO_SNDLOWAT:
1147 v.val = 1;
1148 break;
1149
1150 case SO_PASSCRED:
1151 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1152 break;
1153
1154 case SO_PEERCRED:
1155 {
1156 struct ucred peercred;
1157 if (len > sizeof(peercred))
1158 len = sizeof(peercred);
1159 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1160 if (copy_to_user(optval, &peercred, len))
1161 return -EFAULT;
1162 goto lenout;
1163 }
1164
1165 case SO_PEERNAME:
1166 {
1167 char address[128];
1168
1169 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1170 return -ENOTCONN;
1171 if (lv < len)
1172 return -EINVAL;
1173 if (copy_to_user(optval, address, len))
1174 return -EFAULT;
1175 goto lenout;
1176 }
1177
1178
1179
1180
1181 case SO_ACCEPTCONN:
1182 v.val = sk->sk_state == TCP_LISTEN;
1183 break;
1184
1185 case SO_PASSSEC:
1186 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1187 break;
1188
1189 case SO_PEERSEC:
1190 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1191
1192 case SO_MARK:
1193 v.val = sk->sk_mark;
1194 break;
1195
1196 case SO_RXQ_OVFL:
1197 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1198 break;
1199
1200 case SO_WIFI_STATUS:
1201 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1202 break;
1203
1204 case SO_PEEK_OFF:
1205 if (!sock->ops->set_peek_off)
1206 return -EOPNOTSUPP;
1207
1208 v.val = sk->sk_peek_off;
1209 break;
1210 case SO_NOFCS:
1211 v.val = sock_flag(sk, SOCK_NOFCS);
1212 break;
1213
1214 case SO_BINDTODEVICE:
1215 return sock_getbindtodevice(sk, optval, optlen, len);
1216
1217 case SO_GET_FILTER:
1218 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1219 if (len < 0)
1220 return len;
1221
1222 goto lenout;
1223
1224 case SO_LOCK_FILTER:
1225 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1226 break;
1227
1228 case SO_BPF_EXTENSIONS:
1229 v.val = bpf_tell_extensions();
1230 break;
1231
1232 case SO_SELECT_ERR_QUEUE:
1233 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1234 break;
1235
1236#ifdef CONFIG_NET_RX_BUSY_POLL
1237 case SO_BUSY_POLL:
1238 v.val = sk->sk_ll_usec;
1239 break;
1240#endif
1241
1242 case SO_MAX_PACING_RATE:
1243 v.val = sk->sk_max_pacing_rate;
1244 break;
1245
1246 case SO_INCOMING_CPU:
1247 v.val = sk->sk_incoming_cpu;
1248 break;
1249
1250 default:
1251
1252
1253
1254 return -ENOPROTOOPT;
1255 }
1256
1257 if (len > lv)
1258 len = lv;
1259 if (copy_to_user(optval, &v, len))
1260 return -EFAULT;
1261lenout:
1262 if (put_user(len, optlen))
1263 return -EFAULT;
1264 return 0;
1265}
1266
1267
1268
1269
1270
1271
1272static inline void sock_lock_init(struct sock *sk)
1273{
1274 sock_lock_init_class_and_name(sk,
1275 af_family_slock_key_strings[sk->sk_family],
1276 af_family_slock_keys + sk->sk_family,
1277 af_family_key_strings[sk->sk_family],
1278 af_family_keys + sk->sk_family);
1279}
1280
1281
1282
1283
1284
1285
1286static void sock_copy(struct sock *nsk, const struct sock *osk)
1287{
1288#ifdef CONFIG_SECURITY_NETWORK
1289 void *sptr = nsk->sk_security;
1290#endif
1291 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1292
1293 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1294 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1295
1296#ifdef CONFIG_SECURITY_NETWORK
1297 nsk->sk_security = sptr;
1298 security_sk_clone(osk, nsk);
1299#endif
1300}
1301
1302void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1303{
1304 unsigned long nulls1, nulls2;
1305
1306 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1307 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1308 if (nulls1 > nulls2)
1309 swap(nulls1, nulls2);
1310
1311 if (nulls1 != 0)
1312 memset((char *)sk, 0, nulls1);
1313 memset((char *)sk + nulls1 + sizeof(void *), 0,
1314 nulls2 - nulls1 - sizeof(void *));
1315 memset((char *)sk + nulls2 + sizeof(void *), 0,
1316 size - nulls2 - sizeof(void *));
1317}
1318EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1319
1320static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1321 int family)
1322{
1323 struct sock *sk;
1324 struct kmem_cache *slab;
1325
1326 slab = prot->slab;
1327 if (slab != NULL) {
1328 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1329 if (!sk)
1330 return sk;
1331 if (priority & __GFP_ZERO) {
1332 if (prot->clear_sk)
1333 prot->clear_sk(sk, prot->obj_size);
1334 else
1335 sk_prot_clear_nulls(sk, prot->obj_size);
1336 }
1337 } else
1338 sk = kmalloc(prot->obj_size, priority);
1339
1340 if (sk != NULL) {
1341 kmemcheck_annotate_bitfield(sk, flags);
1342
1343 if (security_sk_alloc(sk, family, priority))
1344 goto out_free;
1345
1346 if (!try_module_get(prot->owner))
1347 goto out_free_sec;
1348 sk_tx_queue_clear(sk);
1349 }
1350
1351 return sk;
1352
1353out_free_sec:
1354 security_sk_free(sk);
1355out_free:
1356 if (slab != NULL)
1357 kmem_cache_free(slab, sk);
1358 else
1359 kfree(sk);
1360 return NULL;
1361}
1362
1363static void sk_prot_free(struct proto *prot, struct sock *sk)
1364{
1365 struct kmem_cache *slab;
1366 struct module *owner;
1367
1368 owner = prot->owner;
1369 slab = prot->slab;
1370
1371 security_sk_free(sk);
1372 if (slab != NULL)
1373 kmem_cache_free(slab, sk);
1374 else
1375 kfree(sk);
1376 module_put(owner);
1377}
1378
1379#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
1380void sock_update_netprioidx(struct sock *sk)
1381{
1382 if (in_interrupt())
1383 return;
1384
1385 sk->sk_cgrp_prioidx = task_netprioidx(current);
1386}
1387EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1388#endif
1389
1390
1391
1392
1393
1394
1395
1396
1397struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1398 struct proto *prot)
1399{
1400 struct sock *sk;
1401
1402 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1403 if (sk) {
1404 sk->sk_family = family;
1405
1406
1407
1408
1409 sk->sk_prot = sk->sk_prot_creator = prot;
1410 sock_lock_init(sk);
1411 sock_net_set(sk, get_net(net));
1412 atomic_set(&sk->sk_wmem_alloc, 1);
1413
1414 sock_update_classid(sk);
1415 sock_update_netprioidx(sk);
1416 }
1417
1418 return sk;
1419}
1420EXPORT_SYMBOL(sk_alloc);
1421
1422static void __sk_free(struct sock *sk)
1423{
1424 struct sk_filter *filter;
1425
1426 if (sk->sk_destruct)
1427 sk->sk_destruct(sk);
1428
1429 filter = rcu_dereference_check(sk->sk_filter,
1430 atomic_read(&sk->sk_wmem_alloc) == 0);
1431 if (filter) {
1432 sk_filter_uncharge(sk, filter);
1433 RCU_INIT_POINTER(sk->sk_filter, NULL);
1434 }
1435
1436 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1437
1438 if (atomic_read(&sk->sk_omem_alloc))
1439 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1440 __func__, atomic_read(&sk->sk_omem_alloc));
1441
1442 if (sk->sk_peer_cred)
1443 put_cred(sk->sk_peer_cred);
1444 put_pid(sk->sk_peer_pid);
1445 put_net(sock_net(sk));
1446 sk_prot_free(sk->sk_prot_creator, sk);
1447}
1448
1449void sk_free(struct sock *sk)
1450{
1451
1452
1453
1454
1455
1456 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1457 __sk_free(sk);
1458}
1459EXPORT_SYMBOL(sk_free);
1460
1461
1462
1463
1464
1465
1466
1467
1468void sk_release_kernel(struct sock *sk)
1469{
1470 if (sk == NULL || sk->sk_socket == NULL)
1471 return;
1472
1473 sock_hold(sk);
1474 sock_release(sk->sk_socket);
1475 sock_net_set(sk, get_net(&init_net));
1476 sock_put(sk);
1477}
1478EXPORT_SYMBOL(sk_release_kernel);
1479
1480static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1481{
1482 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1483 sock_update_memcg(newsk);
1484}
1485
1486
1487
1488
1489
1490
1491
1492
1493struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1494{
1495 struct sock *newsk;
1496 bool is_charged = true;
1497
1498 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1499 if (newsk != NULL) {
1500 struct sk_filter *filter;
1501
1502 sock_copy(newsk, sk);
1503
1504
1505 get_net(sock_net(newsk));
1506 sk_node_init(&newsk->sk_node);
1507 sock_lock_init(newsk);
1508 bh_lock_sock(newsk);
1509 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1510 newsk->sk_backlog.len = 0;
1511
1512 atomic_set(&newsk->sk_rmem_alloc, 0);
1513
1514
1515
1516 atomic_set(&newsk->sk_wmem_alloc, 1);
1517 atomic_set(&newsk->sk_omem_alloc, 0);
1518 skb_queue_head_init(&newsk->sk_receive_queue);
1519 skb_queue_head_init(&newsk->sk_write_queue);
1520
1521 spin_lock_init(&newsk->sk_dst_lock);
1522 rwlock_init(&newsk->sk_callback_lock);
1523 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1524 af_callback_keys + newsk->sk_family,
1525 af_family_clock_key_strings[newsk->sk_family]);
1526
1527 newsk->sk_dst_cache = NULL;
1528 newsk->sk_wmem_queued = 0;
1529 newsk->sk_forward_alloc = 0;
1530 newsk->sk_send_head = NULL;
1531 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1532
1533 sock_reset_flag(newsk, SOCK_DONE);
1534 skb_queue_head_init(&newsk->sk_error_queue);
1535
1536 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1537 if (filter != NULL)
1538
1539
1540
1541
1542 is_charged = sk_filter_charge(newsk, filter);
1543
1544 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk))) {
1545
1546
1547 newsk->sk_destruct = NULL;
1548 bh_unlock_sock(newsk);
1549 sk_free(newsk);
1550 newsk = NULL;
1551 goto out;
1552 }
1553
1554 newsk->sk_err = 0;
1555 newsk->sk_priority = 0;
1556 newsk->sk_incoming_cpu = raw_smp_processor_id();
1557 atomic64_set(&newsk->sk_cookie, 0);
1558
1559
1560
1561
1562 smp_wmb();
1563 atomic_set(&newsk->sk_refcnt, 2);
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576 sk_refcnt_debug_inc(newsk);
1577 sk_set_socket(newsk, NULL);
1578 newsk->sk_wq = NULL;
1579
1580 sk_update_clone(sk, newsk);
1581
1582 if (newsk->sk_prot->sockets_allocated)
1583 sk_sockets_allocated_inc(newsk);
1584
1585 if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1586 net_enable_timestamp();
1587 }
1588out:
1589 return newsk;
1590}
1591EXPORT_SYMBOL_GPL(sk_clone_lock);
1592
1593void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1594{
1595 __sk_dst_set(sk, dst);
1596 sk->sk_route_caps = dst->dev->features;
1597 if (sk->sk_route_caps & NETIF_F_GSO)
1598 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1599 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1600 if (sk_can_gso(sk)) {
1601 if (dst->header_len) {
1602 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1603 } else {
1604 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1605 sk->sk_gso_max_size = dst->dev->gso_max_size;
1606 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1607 }
1608 }
1609}
1610EXPORT_SYMBOL_GPL(sk_setup_caps);
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620void sock_wfree(struct sk_buff *skb)
1621{
1622 struct sock *sk = skb->sk;
1623 unsigned int len = skb->truesize;
1624
1625 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1626
1627
1628
1629
1630 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1631 sk->sk_write_space(sk);
1632 len = 1;
1633 }
1634
1635
1636
1637
1638 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1639 __sk_free(sk);
1640}
1641EXPORT_SYMBOL(sock_wfree);
1642
1643void skb_orphan_partial(struct sk_buff *skb)
1644{
1645
1646
1647
1648
1649 if (skb->destructor == sock_wfree
1650#ifdef CONFIG_INET
1651 || skb->destructor == tcp_wfree
1652#endif
1653 ) {
1654 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1655 skb->truesize = 1;
1656 } else {
1657 skb_orphan(skb);
1658 }
1659}
1660EXPORT_SYMBOL(skb_orphan_partial);
1661
1662
1663
1664
1665void sock_rfree(struct sk_buff *skb)
1666{
1667 struct sock *sk = skb->sk;
1668 unsigned int len = skb->truesize;
1669
1670 atomic_sub(len, &sk->sk_rmem_alloc);
1671 sk_mem_uncharge(sk, len);
1672}
1673EXPORT_SYMBOL(sock_rfree);
1674
1675
1676
1677
1678
1679void sock_efree(struct sk_buff *skb)
1680{
1681 sock_put(skb->sk);
1682}
1683EXPORT_SYMBOL(sock_efree);
1684
1685kuid_t sock_i_uid(struct sock *sk)
1686{
1687 kuid_t uid;
1688
1689 read_lock_bh(&sk->sk_callback_lock);
1690 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1691 read_unlock_bh(&sk->sk_callback_lock);
1692 return uid;
1693}
1694EXPORT_SYMBOL(sock_i_uid);
1695
1696unsigned long sock_i_ino(struct sock *sk)
1697{
1698 unsigned long ino;
1699
1700 read_lock_bh(&sk->sk_callback_lock);
1701 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1702 read_unlock_bh(&sk->sk_callback_lock);
1703 return ino;
1704}
1705EXPORT_SYMBOL(sock_i_ino);
1706
1707
1708
1709
1710struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1711 gfp_t priority)
1712{
1713 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1714 struct sk_buff *skb = alloc_skb(size, priority);
1715 if (skb) {
1716 skb_set_owner_w(skb, sk);
1717 return skb;
1718 }
1719 }
1720 return NULL;
1721}
1722EXPORT_SYMBOL(sock_wmalloc);
1723
1724
1725
1726
1727void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1728{
1729 if ((unsigned int)size <= sysctl_optmem_max &&
1730 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1731 void *mem;
1732
1733
1734
1735 atomic_add(size, &sk->sk_omem_alloc);
1736 mem = kmalloc(size, priority);
1737 if (mem)
1738 return mem;
1739 atomic_sub(size, &sk->sk_omem_alloc);
1740 }
1741 return NULL;
1742}
1743EXPORT_SYMBOL(sock_kmalloc);
1744
1745
1746
1747
1748
1749static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1750 const bool nullify)
1751{
1752 if (WARN_ON_ONCE(!mem))
1753 return;
1754 if (nullify)
1755 kzfree(mem);
1756 else
1757 kfree(mem);
1758 atomic_sub(size, &sk->sk_omem_alloc);
1759}
1760
1761void sock_kfree_s(struct sock *sk, void *mem, int size)
1762{
1763 __sock_kfree_s(sk, mem, size, false);
1764}
1765EXPORT_SYMBOL(sock_kfree_s);
1766
1767void sock_kzfree_s(struct sock *sk, void *mem, int size)
1768{
1769 __sock_kfree_s(sk, mem, size, true);
1770}
1771EXPORT_SYMBOL(sock_kzfree_s);
1772
1773
1774
1775
1776static long sock_wait_for_wmem(struct sock *sk, long timeo)
1777{
1778 DEFINE_WAIT(wait);
1779
1780 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1781 for (;;) {
1782 if (!timeo)
1783 break;
1784 if (signal_pending(current))
1785 break;
1786 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1787 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1788 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1789 break;
1790 if (sk->sk_shutdown & SEND_SHUTDOWN)
1791 break;
1792 if (sk->sk_err)
1793 break;
1794 timeo = schedule_timeout(timeo);
1795 }
1796 finish_wait(sk_sleep(sk), &wait);
1797 return timeo;
1798}
1799
1800
1801
1802
1803
1804
1805struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1806 unsigned long data_len, int noblock,
1807 int *errcode, int max_page_order)
1808{
1809 struct sk_buff *skb;
1810 long timeo;
1811 int err;
1812
1813 timeo = sock_sndtimeo(sk, noblock);
1814 for (;;) {
1815 err = sock_error(sk);
1816 if (err != 0)
1817 goto failure;
1818
1819 err = -EPIPE;
1820 if (sk->sk_shutdown & SEND_SHUTDOWN)
1821 goto failure;
1822
1823 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1824 break;
1825
1826 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1827 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1828 err = -EAGAIN;
1829 if (!timeo)
1830 goto failure;
1831 if (signal_pending(current))
1832 goto interrupted;
1833 timeo = sock_wait_for_wmem(sk, timeo);
1834 }
1835 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1836 errcode, sk->sk_allocation);
1837 if (skb)
1838 skb_set_owner_w(skb, sk);
1839 return skb;
1840
1841interrupted:
1842 err = sock_intr_errno(timeo);
1843failure:
1844 *errcode = err;
1845 return NULL;
1846}
1847EXPORT_SYMBOL(sock_alloc_send_pskb);
1848
1849struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1850 int noblock, int *errcode)
1851{
1852 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1853}
1854EXPORT_SYMBOL(sock_alloc_send_skb);
1855
1856
1857#define SKB_FRAG_PAGE_ORDER get_order(32768)
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1870{
1871 if (pfrag->page) {
1872 if (atomic_read(&pfrag->page->_count) == 1) {
1873 pfrag->offset = 0;
1874 return true;
1875 }
1876 if (pfrag->offset + sz <= pfrag->size)
1877 return true;
1878 put_page(pfrag->page);
1879 }
1880
1881 pfrag->offset = 0;
1882 if (SKB_FRAG_PAGE_ORDER) {
1883 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1884 __GFP_NOWARN | __GFP_NORETRY,
1885 SKB_FRAG_PAGE_ORDER);
1886 if (likely(pfrag->page)) {
1887 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
1888 return true;
1889 }
1890 }
1891 pfrag->page = alloc_page(gfp);
1892 if (likely(pfrag->page)) {
1893 pfrag->size = PAGE_SIZE;
1894 return true;
1895 }
1896 return false;
1897}
1898EXPORT_SYMBOL(skb_page_frag_refill);
1899
1900bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
1901{
1902 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
1903 return true;
1904
1905 sk_enter_memory_pressure(sk);
1906 sk_stream_moderate_sndbuf(sk);
1907 return false;
1908}
1909EXPORT_SYMBOL(sk_page_frag_refill);
1910
1911static void __lock_sock(struct sock *sk)
1912 __releases(&sk->sk_lock.slock)
1913 __acquires(&sk->sk_lock.slock)
1914{
1915 DEFINE_WAIT(wait);
1916
1917 for (;;) {
1918 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
1919 TASK_UNINTERRUPTIBLE);
1920 spin_unlock_bh(&sk->sk_lock.slock);
1921 schedule();
1922 spin_lock_bh(&sk->sk_lock.slock);
1923 if (!sock_owned_by_user(sk))
1924 break;
1925 }
1926 finish_wait(&sk->sk_lock.wq, &wait);
1927}
1928
1929static void __release_sock(struct sock *sk)
1930 __releases(&sk->sk_lock.slock)
1931 __acquires(&sk->sk_lock.slock)
1932{
1933 struct sk_buff *skb = sk->sk_backlog.head;
1934
1935 do {
1936 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
1937 bh_unlock_sock(sk);
1938
1939 do {
1940 struct sk_buff *next = skb->next;
1941
1942 prefetch(next);
1943 WARN_ON_ONCE(skb_dst_is_noref(skb));
1944 skb->next = NULL;
1945 sk_backlog_rcv(sk, skb);
1946
1947
1948
1949
1950
1951
1952
1953 cond_resched_softirq();
1954
1955 skb = next;
1956 } while (skb != NULL);
1957
1958 bh_lock_sock(sk);
1959 } while ((skb = sk->sk_backlog.head) != NULL);
1960
1961
1962
1963
1964
1965 sk->sk_backlog.len = 0;
1966}
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978int sk_wait_data(struct sock *sk, long *timeo)
1979{
1980 int rc;
1981 DEFINE_WAIT(wait);
1982
1983 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1984 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1985 rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
1986 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1987 finish_wait(sk_sleep(sk), &wait);
1988 return rc;
1989}
1990EXPORT_SYMBOL(sk_wait_data);
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002int __sk_mem_schedule(struct sock *sk, int size, int kind)
2003{
2004 struct proto *prot = sk->sk_prot;
2005 int amt = sk_mem_pages(size);
2006 long allocated;
2007 int parent_status = UNDER_LIMIT;
2008
2009 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2010
2011 allocated = sk_memory_allocated_add(sk, amt, &parent_status);
2012
2013
2014 if (parent_status == UNDER_LIMIT &&
2015 allocated <= sk_prot_mem_limits(sk, 0)) {
2016 sk_leave_memory_pressure(sk);
2017 return 1;
2018 }
2019
2020
2021 if ((parent_status > SOFT_LIMIT) ||
2022 allocated > sk_prot_mem_limits(sk, 1))
2023 sk_enter_memory_pressure(sk);
2024
2025
2026 if ((parent_status == OVER_LIMIT) ||
2027 (allocated > sk_prot_mem_limits(sk, 2)))
2028 goto suppress_allocation;
2029
2030
2031 if (kind == SK_MEM_RECV) {
2032 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2033 return 1;
2034
2035 } else {
2036 if (sk->sk_type == SOCK_STREAM) {
2037 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2038 return 1;
2039 } else if (atomic_read(&sk->sk_wmem_alloc) <
2040 prot->sysctl_wmem[0])
2041 return 1;
2042 }
2043
2044 if (sk_has_memory_pressure(sk)) {
2045 int alloc;
2046
2047 if (!sk_under_memory_pressure(sk))
2048 return 1;
2049 alloc = sk_sockets_allocated_read_positive(sk);
2050 if (sk_prot_mem_limits(sk, 2) > alloc *
2051 sk_mem_pages(sk->sk_wmem_queued +
2052 atomic_read(&sk->sk_rmem_alloc) +
2053 sk->sk_forward_alloc))
2054 return 1;
2055 }
2056
2057suppress_allocation:
2058
2059 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2060 sk_stream_moderate_sndbuf(sk);
2061
2062
2063
2064
2065 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2066 return 1;
2067 }
2068
2069 trace_sock_exceed_buf_limit(sk, prot, allocated);
2070
2071
2072 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2073
2074 sk_memory_allocated_sub(sk, amt);
2075
2076 return 0;
2077}
2078EXPORT_SYMBOL(__sk_mem_schedule);
2079
2080
2081
2082
2083
2084void __sk_mem_reclaim(struct sock *sk)
2085{
2086 sk_memory_allocated_sub(sk,
2087 sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
2088 sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
2089
2090 if (sk_under_memory_pressure(sk) &&
2091 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2092 sk_leave_memory_pressure(sk);
2093}
2094EXPORT_SYMBOL(__sk_mem_reclaim);
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2105{
2106 return -EOPNOTSUPP;
2107}
2108EXPORT_SYMBOL(sock_no_bind);
2109
2110int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2111 int len, int flags)
2112{
2113 return -EOPNOTSUPP;
2114}
2115EXPORT_SYMBOL(sock_no_connect);
2116
2117int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2118{
2119 return -EOPNOTSUPP;
2120}
2121EXPORT_SYMBOL(sock_no_socketpair);
2122
2123int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2124{
2125 return -EOPNOTSUPP;
2126}
2127EXPORT_SYMBOL(sock_no_accept);
2128
2129int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2130 int *len, int peer)
2131{
2132 return -EOPNOTSUPP;
2133}
2134EXPORT_SYMBOL(sock_no_getname);
2135
2136unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2137{
2138 return 0;
2139}
2140EXPORT_SYMBOL(sock_no_poll);
2141
2142int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2143{
2144 return -EOPNOTSUPP;
2145}
2146EXPORT_SYMBOL(sock_no_ioctl);
2147
2148int sock_no_listen(struct socket *sock, int backlog)
2149{
2150 return -EOPNOTSUPP;
2151}
2152EXPORT_SYMBOL(sock_no_listen);
2153
2154int sock_no_shutdown(struct socket *sock, int how)
2155{
2156 return -EOPNOTSUPP;
2157}
2158EXPORT_SYMBOL(sock_no_shutdown);
2159
2160int sock_no_setsockopt(struct socket *sock, int level, int optname,
2161 char __user *optval, unsigned int optlen)
2162{
2163 return -EOPNOTSUPP;
2164}
2165EXPORT_SYMBOL(sock_no_setsockopt);
2166
2167int sock_no_getsockopt(struct socket *sock, int level, int optname,
2168 char __user *optval, int __user *optlen)
2169{
2170 return -EOPNOTSUPP;
2171}
2172EXPORT_SYMBOL(sock_no_getsockopt);
2173
2174int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2175{
2176 return -EOPNOTSUPP;
2177}
2178EXPORT_SYMBOL(sock_no_sendmsg);
2179
2180int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2181 int flags)
2182{
2183 return -EOPNOTSUPP;
2184}
2185EXPORT_SYMBOL(sock_no_recvmsg);
2186
2187int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2188{
2189
2190 return -ENODEV;
2191}
2192EXPORT_SYMBOL(sock_no_mmap);
2193
2194ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2195{
2196 ssize_t res;
2197 struct msghdr msg = {.msg_flags = flags};
2198 struct kvec iov;
2199 char *kaddr = kmap(page);
2200 iov.iov_base = kaddr + offset;
2201 iov.iov_len = size;
2202 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2203 kunmap(page);
2204 return res;
2205}
2206EXPORT_SYMBOL(sock_no_sendpage);
2207
2208
2209
2210
2211
2212static void sock_def_wakeup(struct sock *sk)
2213{
2214 struct socket_wq *wq;
2215
2216 rcu_read_lock();
2217 wq = rcu_dereference(sk->sk_wq);
2218 if (wq_has_sleeper(wq))
2219 wake_up_interruptible_all(&wq->wait);
2220 rcu_read_unlock();
2221}
2222
2223static void sock_def_error_report(struct sock *sk)
2224{
2225 struct socket_wq *wq;
2226
2227 rcu_read_lock();
2228 wq = rcu_dereference(sk->sk_wq);
2229 if (wq_has_sleeper(wq))
2230 wake_up_interruptible_poll(&wq->wait, POLLERR);
2231 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2232 rcu_read_unlock();
2233}
2234
2235static void sock_def_readable(struct sock *sk)
2236{
2237 struct socket_wq *wq;
2238
2239 rcu_read_lock();
2240 wq = rcu_dereference(sk->sk_wq);
2241 if (wq_has_sleeper(wq))
2242 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2243 POLLRDNORM | POLLRDBAND);
2244 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2245 rcu_read_unlock();
2246}
2247
2248static void sock_def_write_space(struct sock *sk)
2249{
2250 struct socket_wq *wq;
2251
2252 rcu_read_lock();
2253
2254
2255
2256
2257 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2258 wq = rcu_dereference(sk->sk_wq);
2259 if (wq_has_sleeper(wq))
2260 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2261 POLLWRNORM | POLLWRBAND);
2262
2263
2264 if (sock_writeable(sk))
2265 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2266 }
2267
2268 rcu_read_unlock();
2269}
2270
2271static void sock_def_destruct(struct sock *sk)
2272{
2273 kfree(sk->sk_protinfo);
2274}
2275
2276void sk_send_sigurg(struct sock *sk)
2277{
2278 if (sk->sk_socket && sk->sk_socket->file)
2279 if (send_sigurg(&sk->sk_socket->file->f_owner))
2280 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2281}
2282EXPORT_SYMBOL(sk_send_sigurg);
2283
2284void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2285 unsigned long expires)
2286{
2287 if (!mod_timer(timer, expires))
2288 sock_hold(sk);
2289}
2290EXPORT_SYMBOL(sk_reset_timer);
2291
2292void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2293{
2294 if (del_timer(timer))
2295 __sock_put(sk);
2296}
2297EXPORT_SYMBOL(sk_stop_timer);
2298
2299void sock_init_data(struct socket *sock, struct sock *sk)
2300{
2301 skb_queue_head_init(&sk->sk_receive_queue);
2302 skb_queue_head_init(&sk->sk_write_queue);
2303 skb_queue_head_init(&sk->sk_error_queue);
2304
2305 sk->sk_send_head = NULL;
2306
2307 init_timer(&sk->sk_timer);
2308
2309 sk->sk_allocation = GFP_KERNEL;
2310 sk->sk_rcvbuf = sysctl_rmem_default;
2311 sk->sk_sndbuf = sysctl_wmem_default;
2312 sk->sk_state = TCP_CLOSE;
2313 sk_set_socket(sk, sock);
2314
2315 sock_set_flag(sk, SOCK_ZAPPED);
2316
2317 if (sock) {
2318 sk->sk_type = sock->type;
2319 sk->sk_wq = sock->wq;
2320 sock->sk = sk;
2321 } else
2322 sk->sk_wq = NULL;
2323
2324 spin_lock_init(&sk->sk_dst_lock);
2325 rwlock_init(&sk->sk_callback_lock);
2326 lockdep_set_class_and_name(&sk->sk_callback_lock,
2327 af_callback_keys + sk->sk_family,
2328 af_family_clock_key_strings[sk->sk_family]);
2329
2330 sk->sk_state_change = sock_def_wakeup;
2331 sk->sk_data_ready = sock_def_readable;
2332 sk->sk_write_space = sock_def_write_space;
2333 sk->sk_error_report = sock_def_error_report;
2334 sk->sk_destruct = sock_def_destruct;
2335
2336 sk->sk_frag.page = NULL;
2337 sk->sk_frag.offset = 0;
2338 sk->sk_peek_off = -1;
2339
2340 sk->sk_peer_pid = NULL;
2341 sk->sk_peer_cred = NULL;
2342 sk->sk_write_pending = 0;
2343 sk->sk_rcvlowat = 1;
2344 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2345 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2346
2347 sk->sk_stamp = ktime_set(-1L, 0);
2348
2349#ifdef CONFIG_NET_RX_BUSY_POLL
2350 sk->sk_napi_id = 0;
2351 sk->sk_ll_usec = sysctl_net_busy_read;
2352#endif
2353
2354 sk->sk_max_pacing_rate = ~0U;
2355 sk->sk_pacing_rate = ~0U;
2356
2357
2358
2359
2360 smp_wmb();
2361 atomic_set(&sk->sk_refcnt, 1);
2362 atomic_set(&sk->sk_drops, 0);
2363}
2364EXPORT_SYMBOL(sock_init_data);
2365
2366void lock_sock_nested(struct sock *sk, int subclass)
2367{
2368 might_sleep();
2369 spin_lock_bh(&sk->sk_lock.slock);
2370 if (sk->sk_lock.owned)
2371 __lock_sock(sk);
2372 sk->sk_lock.owned = 1;
2373 spin_unlock(&sk->sk_lock.slock);
2374
2375
2376
2377 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2378 local_bh_enable();
2379}
2380EXPORT_SYMBOL(lock_sock_nested);
2381
2382void release_sock(struct sock *sk)
2383{
2384
2385
2386
2387 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
2388
2389 spin_lock_bh(&sk->sk_lock.slock);
2390 if (sk->sk_backlog.tail)
2391 __release_sock(sk);
2392
2393
2394
2395
2396 if (sk->sk_prot->release_cb)
2397 sk->sk_prot->release_cb(sk);
2398
2399 sock_release_ownership(sk);
2400 if (waitqueue_active(&sk->sk_lock.wq))
2401 wake_up(&sk->sk_lock.wq);
2402 spin_unlock_bh(&sk->sk_lock.slock);
2403}
2404EXPORT_SYMBOL(release_sock);
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416bool lock_sock_fast(struct sock *sk)
2417{
2418 might_sleep();
2419 spin_lock_bh(&sk->sk_lock.slock);
2420
2421 if (!sk->sk_lock.owned)
2422
2423
2424
2425 return false;
2426
2427 __lock_sock(sk);
2428 sk->sk_lock.owned = 1;
2429 spin_unlock(&sk->sk_lock.slock);
2430
2431
2432
2433 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2434 local_bh_enable();
2435 return true;
2436}
2437EXPORT_SYMBOL(lock_sock_fast);
2438
2439int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2440{
2441 struct timeval tv;
2442 if (!sock_flag(sk, SOCK_TIMESTAMP))
2443 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2444 tv = ktime_to_timeval(sk->sk_stamp);
2445 if (tv.tv_sec == -1)
2446 return -ENOENT;
2447 if (tv.tv_sec == 0) {
2448 sk->sk_stamp = ktime_get_real();
2449 tv = ktime_to_timeval(sk->sk_stamp);
2450 }
2451 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2452}
2453EXPORT_SYMBOL(sock_get_timestamp);
2454
2455int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2456{
2457 struct timespec ts;
2458 if (!sock_flag(sk, SOCK_TIMESTAMP))
2459 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2460 ts = ktime_to_timespec(sk->sk_stamp);
2461 if (ts.tv_sec == -1)
2462 return -ENOENT;
2463 if (ts.tv_sec == 0) {
2464 sk->sk_stamp = ktime_get_real();
2465 ts = ktime_to_timespec(sk->sk_stamp);
2466 }
2467 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2468}
2469EXPORT_SYMBOL(sock_get_timestampns);
2470
2471void sock_enable_timestamp(struct sock *sk, int flag)
2472{
2473 if (!sock_flag(sk, flag)) {
2474 unsigned long previous_flags = sk->sk_flags;
2475
2476 sock_set_flag(sk, flag);
2477
2478
2479
2480
2481
2482 if (!(previous_flags & SK_FLAGS_TIMESTAMP))
2483 net_enable_timestamp();
2484 }
2485}
2486
2487int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2488 int level, int type)
2489{
2490 struct sock_exterr_skb *serr;
2491 struct sk_buff *skb;
2492 int copied, err;
2493
2494 err = -EAGAIN;
2495 skb = sock_dequeue_err_skb(sk);
2496 if (skb == NULL)
2497 goto out;
2498
2499 copied = skb->len;
2500 if (copied > len) {
2501 msg->msg_flags |= MSG_TRUNC;
2502 copied = len;
2503 }
2504 err = skb_copy_datagram_msg(skb, 0, msg, copied);
2505 if (err)
2506 goto out_free_skb;
2507
2508 sock_recv_timestamp(msg, sk, skb);
2509
2510 serr = SKB_EXT_ERR(skb);
2511 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2512
2513 msg->msg_flags |= MSG_ERRQUEUE;
2514 err = copied;
2515
2516out_free_skb:
2517 kfree_skb(skb);
2518out:
2519 return err;
2520}
2521EXPORT_SYMBOL(sock_recv_errqueue);
2522
2523
2524
2525
2526
2527
2528
2529
2530int sock_common_getsockopt(struct socket *sock, int level, int optname,
2531 char __user *optval, int __user *optlen)
2532{
2533 struct sock *sk = sock->sk;
2534
2535 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2536}
2537EXPORT_SYMBOL(sock_common_getsockopt);
2538
2539#ifdef CONFIG_COMPAT
2540int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2541 char __user *optval, int __user *optlen)
2542{
2543 struct sock *sk = sock->sk;
2544
2545 if (sk->sk_prot->compat_getsockopt != NULL)
2546 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2547 optval, optlen);
2548 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2549}
2550EXPORT_SYMBOL(compat_sock_common_getsockopt);
2551#endif
2552
2553int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2554 int flags)
2555{
2556 struct sock *sk = sock->sk;
2557 int addr_len = 0;
2558 int err;
2559
2560 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2561 flags & ~MSG_DONTWAIT, &addr_len);
2562 if (err >= 0)
2563 msg->msg_namelen = addr_len;
2564 return err;
2565}
2566EXPORT_SYMBOL(sock_common_recvmsg);
2567
2568
2569
2570
2571int sock_common_setsockopt(struct socket *sock, int level, int optname,
2572 char __user *optval, unsigned int optlen)
2573{
2574 struct sock *sk = sock->sk;
2575
2576 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2577}
2578EXPORT_SYMBOL(sock_common_setsockopt);
2579
2580#ifdef CONFIG_COMPAT
2581int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2582 char __user *optval, unsigned int optlen)
2583{
2584 struct sock *sk = sock->sk;
2585
2586 if (sk->sk_prot->compat_setsockopt != NULL)
2587 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2588 optval, optlen);
2589 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2590}
2591EXPORT_SYMBOL(compat_sock_common_setsockopt);
2592#endif
2593
2594void sk_common_release(struct sock *sk)
2595{
2596 if (sk->sk_prot->destroy)
2597 sk->sk_prot->destroy(sk);
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607 sk->sk_prot->unhash(sk);
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621 sock_orphan(sk);
2622
2623 xfrm_sk_free_policy(sk);
2624
2625 sk_refcnt_debug_release(sk);
2626
2627 if (sk->sk_frag.page) {
2628 put_page(sk->sk_frag.page);
2629 sk->sk_frag.page = NULL;
2630 }
2631
2632 sock_put(sk);
2633}
2634EXPORT_SYMBOL(sk_common_release);
2635
2636#ifdef CONFIG_PROC_FS
2637#define PROTO_INUSE_NR 64
2638struct prot_inuse {
2639 int val[PROTO_INUSE_NR];
2640};
2641
2642static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2643
2644#ifdef CONFIG_NET_NS
2645void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2646{
2647 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2648}
2649EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2650
2651int sock_prot_inuse_get(struct net *net, struct proto *prot)
2652{
2653 int cpu, idx = prot->inuse_idx;
2654 int res = 0;
2655
2656 for_each_possible_cpu(cpu)
2657 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2658
2659 return res >= 0 ? res : 0;
2660}
2661EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2662
2663static int __net_init sock_inuse_init_net(struct net *net)
2664{
2665 net->core.inuse = alloc_percpu(struct prot_inuse);
2666 return net->core.inuse ? 0 : -ENOMEM;
2667}
2668
2669static void __net_exit sock_inuse_exit_net(struct net *net)
2670{
2671 free_percpu(net->core.inuse);
2672}
2673
2674static struct pernet_operations net_inuse_ops = {
2675 .init = sock_inuse_init_net,
2676 .exit = sock_inuse_exit_net,
2677};
2678
2679static __init int net_inuse_init(void)
2680{
2681 if (register_pernet_subsys(&net_inuse_ops))
2682 panic("Cannot initialize net inuse counters");
2683
2684 return 0;
2685}
2686
2687core_initcall(net_inuse_init);
2688#else
2689static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2690
2691void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2692{
2693 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2694}
2695EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2696
2697int sock_prot_inuse_get(struct net *net, struct proto *prot)
2698{
2699 int cpu, idx = prot->inuse_idx;
2700 int res = 0;
2701
2702 for_each_possible_cpu(cpu)
2703 res += per_cpu(prot_inuse, cpu).val[idx];
2704
2705 return res >= 0 ? res : 0;
2706}
2707EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2708#endif
2709
2710static void assign_proto_idx(struct proto *prot)
2711{
2712 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2713
2714 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2715 pr_err("PROTO_INUSE_NR exhausted\n");
2716 return;
2717 }
2718
2719 set_bit(prot->inuse_idx, proto_inuse_idx);
2720}
2721
2722static void release_proto_idx(struct proto *prot)
2723{
2724 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2725 clear_bit(prot->inuse_idx, proto_inuse_idx);
2726}
2727#else
2728static inline void assign_proto_idx(struct proto *prot)
2729{
2730}
2731
2732static inline void release_proto_idx(struct proto *prot)
2733{
2734}
2735#endif
2736
2737static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
2738{
2739 if (!rsk_prot)
2740 return;
2741 kfree(rsk_prot->slab_name);
2742 rsk_prot->slab_name = NULL;
2743 if (rsk_prot->slab) {
2744 kmem_cache_destroy(rsk_prot->slab);
2745 rsk_prot->slab = NULL;
2746 }
2747}
2748
2749static int req_prot_init(const struct proto *prot)
2750{
2751 struct request_sock_ops *rsk_prot = prot->rsk_prot;
2752
2753 if (!rsk_prot)
2754 return 0;
2755
2756 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
2757 prot->name);
2758 if (!rsk_prot->slab_name)
2759 return -ENOMEM;
2760
2761 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
2762 rsk_prot->obj_size, 0,
2763 0, NULL);
2764
2765 if (!rsk_prot->slab) {
2766 pr_crit("%s: Can't create request sock SLAB cache!\n",
2767 prot->name);
2768 return -ENOMEM;
2769 }
2770 return 0;
2771}
2772
2773int proto_register(struct proto *prot, int alloc_slab)
2774{
2775 if (alloc_slab) {
2776 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2777 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2778 NULL);
2779
2780 if (prot->slab == NULL) {
2781 pr_crit("%s: Can't create sock SLAB cache!\n",
2782 prot->name);
2783 goto out;
2784 }
2785
2786 if (req_prot_init(prot))
2787 goto out_free_request_sock_slab;
2788
2789 if (prot->twsk_prot != NULL) {
2790 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2791
2792 if (prot->twsk_prot->twsk_slab_name == NULL)
2793 goto out_free_request_sock_slab;
2794
2795 prot->twsk_prot->twsk_slab =
2796 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2797 prot->twsk_prot->twsk_obj_size,
2798 0,
2799 prot->slab_flags,
2800 NULL);
2801 if (prot->twsk_prot->twsk_slab == NULL)
2802 goto out_free_timewait_sock_slab_name;
2803 }
2804 }
2805
2806 mutex_lock(&proto_list_mutex);
2807 list_add(&prot->node, &proto_list);
2808 assign_proto_idx(prot);
2809 mutex_unlock(&proto_list_mutex);
2810 return 0;
2811
2812out_free_timewait_sock_slab_name:
2813 kfree(prot->twsk_prot->twsk_slab_name);
2814out_free_request_sock_slab:
2815 req_prot_cleanup(prot->rsk_prot);
2816
2817 kmem_cache_destroy(prot->slab);
2818 prot->slab = NULL;
2819out:
2820 return -ENOBUFS;
2821}
2822EXPORT_SYMBOL(proto_register);
2823
2824void proto_unregister(struct proto *prot)
2825{
2826 mutex_lock(&proto_list_mutex);
2827 release_proto_idx(prot);
2828 list_del(&prot->node);
2829 mutex_unlock(&proto_list_mutex);
2830
2831 if (prot->slab != NULL) {
2832 kmem_cache_destroy(prot->slab);
2833 prot->slab = NULL;
2834 }
2835
2836 req_prot_cleanup(prot->rsk_prot);
2837
2838 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2839 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2840 kfree(prot->twsk_prot->twsk_slab_name);
2841 prot->twsk_prot->twsk_slab = NULL;
2842 }
2843}
2844EXPORT_SYMBOL(proto_unregister);
2845
2846#ifdef CONFIG_PROC_FS
2847static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2848 __acquires(proto_list_mutex)
2849{
2850 mutex_lock(&proto_list_mutex);
2851 return seq_list_start_head(&proto_list, *pos);
2852}
2853
2854static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2855{
2856 return seq_list_next(v, &proto_list, pos);
2857}
2858
2859static void proto_seq_stop(struct seq_file *seq, void *v)
2860 __releases(proto_list_mutex)
2861{
2862 mutex_unlock(&proto_list_mutex);
2863}
2864
2865static char proto_method_implemented(const void *method)
2866{
2867 return method == NULL ? 'n' : 'y';
2868}
2869static long sock_prot_memory_allocated(struct proto *proto)
2870{
2871 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2872}
2873
2874static char *sock_prot_memory_pressure(struct proto *proto)
2875{
2876 return proto->memory_pressure != NULL ?
2877 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2878}
2879
2880static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2881{
2882
2883 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2884 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2885 proto->name,
2886 proto->obj_size,
2887 sock_prot_inuse_get(seq_file_net(seq), proto),
2888 sock_prot_memory_allocated(proto),
2889 sock_prot_memory_pressure(proto),
2890 proto->max_header,
2891 proto->slab == NULL ? "no" : "yes",
2892 module_name(proto->owner),
2893 proto_method_implemented(proto->close),
2894 proto_method_implemented(proto->connect),
2895 proto_method_implemented(proto->disconnect),
2896 proto_method_implemented(proto->accept),
2897 proto_method_implemented(proto->ioctl),
2898 proto_method_implemented(proto->init),
2899 proto_method_implemented(proto->destroy),
2900 proto_method_implemented(proto->shutdown),
2901 proto_method_implemented(proto->setsockopt),
2902 proto_method_implemented(proto->getsockopt),
2903 proto_method_implemented(proto->sendmsg),
2904 proto_method_implemented(proto->recvmsg),
2905 proto_method_implemented(proto->sendpage),
2906 proto_method_implemented(proto->bind),
2907 proto_method_implemented(proto->backlog_rcv),
2908 proto_method_implemented(proto->hash),
2909 proto_method_implemented(proto->unhash),
2910 proto_method_implemented(proto->get_port),
2911 proto_method_implemented(proto->enter_memory_pressure));
2912}
2913
2914static int proto_seq_show(struct seq_file *seq, void *v)
2915{
2916 if (v == &proto_list)
2917 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
2918 "protocol",
2919 "size",
2920 "sockets",
2921 "memory",
2922 "press",
2923 "maxhdr",
2924 "slab",
2925 "module",
2926 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
2927 else
2928 proto_seq_printf(seq, list_entry(v, struct proto, node));
2929 return 0;
2930}
2931
2932static const struct seq_operations proto_seq_ops = {
2933 .start = proto_seq_start,
2934 .next = proto_seq_next,
2935 .stop = proto_seq_stop,
2936 .show = proto_seq_show,
2937};
2938
2939static int proto_seq_open(struct inode *inode, struct file *file)
2940{
2941 return seq_open_net(inode, file, &proto_seq_ops,
2942 sizeof(struct seq_net_private));
2943}
2944
2945static const struct file_operations proto_seq_fops = {
2946 .owner = THIS_MODULE,
2947 .open = proto_seq_open,
2948 .read = seq_read,
2949 .llseek = seq_lseek,
2950 .release = seq_release_net,
2951};
2952
2953static __net_init int proto_init_net(struct net *net)
2954{
2955 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
2956 return -ENOMEM;
2957
2958 return 0;
2959}
2960
2961static __net_exit void proto_exit_net(struct net *net)
2962{
2963 remove_proc_entry("protocols", net->proc_net);
2964}
2965
2966
2967static __net_initdata struct pernet_operations proto_net_ops = {
2968 .init = proto_init_net,
2969 .exit = proto_exit_net,
2970};
2971
2972static int __init proto_init(void)
2973{
2974 return register_pernet_subsys(&proto_net_ops);
2975}
2976
2977subsys_initcall(proto_init);
2978
2979#endif
2980