1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/errqueue.h>
97#include <linux/types.h>
98#include <linux/socket.h>
99#include <linux/in.h>
100#include <linux/kernel.h>
101#include <linux/module.h>
102#include <linux/proc_fs.h>
103#include <linux/seq_file.h>
104#include <linux/sched.h>
105#include <linux/timer.h>
106#include <linux/string.h>
107#include <linux/sockios.h>
108#include <linux/net.h>
109#include <linux/mm.h>
110#include <linux/slab.h>
111#include <linux/interrupt.h>
112#include <linux/poll.h>
113#include <linux/tcp.h>
114#include <linux/init.h>
115#include <linux/highmem.h>
116#include <linux/user_namespace.h>
117#include <linux/static_key.h>
118#include <linux/memcontrol.h>
119#include <linux/prefetch.h>
120
121#include <asm/uaccess.h>
122
123#include <linux/netdevice.h>
124#include <net/protocol.h>
125#include <linux/skbuff.h>
126#include <net/net_namespace.h>
127#include <net/request_sock.h>
128#include <net/sock.h>
129#include <linux/net_tstamp.h>
130#include <net/xfrm.h>
131#include <linux/ipsec.h>
132#include <net/cls_cgroup.h>
133#include <net/netprio_cgroup.h>
134#include <linux/sock_diag.h>
135
136#include <linux/filter.h>
137#include <net/sock_reuseport.h>
138
139#include <trace/events/sock.h>
140
141#ifdef CONFIG_INET
142#include <net/tcp.h>
143#endif
144
145#include <net/busy_poll.h>
146
147static DEFINE_MUTEX(proto_list_mutex);
148static LIST_HEAD(proto_list);
149
150
151
152
153
154
155
156
157
158
159
160bool sk_ns_capable(const struct sock *sk,
161 struct user_namespace *user_ns, int cap)
162{
163 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
164 ns_capable(user_ns, cap);
165}
166EXPORT_SYMBOL(sk_ns_capable);
167
168
169
170
171
172
173
174
175
176
177bool sk_capable(const struct sock *sk, int cap)
178{
179 return sk_ns_capable(sk, &init_user_ns, cap);
180}
181EXPORT_SYMBOL(sk_capable);
182
183
184
185
186
187
188
189
190
191
192bool sk_net_capable(const struct sock *sk, int cap)
193{
194 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
195}
196EXPORT_SYMBOL(sk_net_capable);
197
198
199
200
201
202static struct lock_class_key af_family_keys[AF_MAX];
203static struct lock_class_key af_family_slock_keys[AF_MAX];
204
205
206
207
208
209
210static const char *const af_family_key_strings[AF_MAX+1] = {
211 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
212 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
213 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
214 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
215 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
216 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
217 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
218 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
219 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
220 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
221 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
222 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
223 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
224 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_KCM" ,
225 "sk_lock-AF_MAX"
226};
227static const char *const af_family_slock_key_strings[AF_MAX+1] = {
228 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
229 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
230 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
231 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
232 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
233 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
234 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
235 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
236 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
237 "slock-27" , "slock-28" , "slock-AF_CAN" ,
238 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
239 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
240 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
241 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_KCM" ,
242 "slock-AF_MAX"
243};
244static const char *const af_family_clock_key_strings[AF_MAX+1] = {
245 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
246 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
247 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
248 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
249 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
250 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
251 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
252 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
253 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
254 "clock-27" , "clock-28" , "clock-AF_CAN" ,
255 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
256 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
257 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
258 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_KCM" ,
259 "clock-AF_MAX"
260};
261
262
263
264
265
266static struct lock_class_key af_callback_keys[AF_MAX];
267
268
269
270
271
272
273#define _SK_MEM_PACKETS 256
274#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
275#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
276#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
277
278
279__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
280EXPORT_SYMBOL(sysctl_wmem_max);
281__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
282EXPORT_SYMBOL(sysctl_rmem_max);
283__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
284__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
285
286
287int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
288EXPORT_SYMBOL(sysctl_optmem_max);
289
290int sysctl_tstamp_allow_data __read_mostly = 1;
291
292struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
293EXPORT_SYMBOL_GPL(memalloc_socks);
294
295
296
297
298
299
300
301
302
303void sk_set_memalloc(struct sock *sk)
304{
305 sock_set_flag(sk, SOCK_MEMALLOC);
306 sk->sk_allocation |= __GFP_MEMALLOC;
307 static_key_slow_inc(&memalloc_socks);
308}
309EXPORT_SYMBOL_GPL(sk_set_memalloc);
310
311void sk_clear_memalloc(struct sock *sk)
312{
313 sock_reset_flag(sk, SOCK_MEMALLOC);
314 sk->sk_allocation &= ~__GFP_MEMALLOC;
315 static_key_slow_dec(&memalloc_socks);
316
317
318
319
320
321
322
323
324 sk_mem_reclaim(sk);
325}
326EXPORT_SYMBOL_GPL(sk_clear_memalloc);
327
328int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
329{
330 int ret;
331 unsigned long pflags = current->flags;
332
333
334 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
335
336 current->flags |= PF_MEMALLOC;
337 ret = sk->sk_backlog_rcv(sk, skb);
338 tsk_restore_flags(current, pflags, PF_MEMALLOC);
339
340 return ret;
341}
342EXPORT_SYMBOL(__sk_backlog_rcv);
343
344static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
345{
346 struct timeval tv;
347
348 if (optlen < sizeof(tv))
349 return -EINVAL;
350 if (copy_from_user(&tv, optval, sizeof(tv)))
351 return -EFAULT;
352 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
353 return -EDOM;
354
355 if (tv.tv_sec < 0) {
356 static int warned __read_mostly;
357
358 *timeo_p = 0;
359 if (warned < 10 && net_ratelimit()) {
360 warned++;
361 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
362 __func__, current->comm, task_pid_nr(current));
363 }
364 return 0;
365 }
366 *timeo_p = MAX_SCHEDULE_TIMEOUT;
367 if (tv.tv_sec == 0 && tv.tv_usec == 0)
368 return 0;
369 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
370 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
371 return 0;
372}
373
374static void sock_warn_obsolete_bsdism(const char *name)
375{
376 static int warned;
377 static char warncomm[TASK_COMM_LEN];
378 if (strcmp(warncomm, current->comm) && warned < 5) {
379 strcpy(warncomm, current->comm);
380 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
381 warncomm, name);
382 warned++;
383 }
384}
385
386static bool sock_needs_netstamp(const struct sock *sk)
387{
388 switch (sk->sk_family) {
389 case AF_UNSPEC:
390 case AF_UNIX:
391 return false;
392 default:
393 return true;
394 }
395}
396
397static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
398{
399 if (sk->sk_flags & flags) {
400 sk->sk_flags &= ~flags;
401 if (sock_needs_netstamp(sk) &&
402 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
403 net_disable_timestamp();
404 }
405}
406
407
408int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
409{
410 unsigned long flags;
411 struct sk_buff_head *list = &sk->sk_receive_queue;
412
413 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
414 atomic_inc(&sk->sk_drops);
415 trace_sock_rcvqueue_full(sk, skb);
416 return -ENOMEM;
417 }
418
419 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
420 atomic_inc(&sk->sk_drops);
421 return -ENOBUFS;
422 }
423
424 skb->dev = NULL;
425 skb_set_owner_r(skb, sk);
426
427
428
429
430 skb_dst_force(skb);
431
432 spin_lock_irqsave(&list->lock, flags);
433 sock_skb_set_dropcount(sk, skb);
434 __skb_queue_tail(list, skb);
435 spin_unlock_irqrestore(&list->lock, flags);
436
437 if (!sock_flag(sk, SOCK_DEAD))
438 sk->sk_data_ready(sk);
439 return 0;
440}
441EXPORT_SYMBOL(__sock_queue_rcv_skb);
442
443int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
444{
445 int err;
446
447 err = sk_filter(sk, skb);
448 if (err)
449 return err;
450
451 return __sock_queue_rcv_skb(sk, skb);
452}
453EXPORT_SYMBOL(sock_queue_rcv_skb);
454
455int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
456 const int nested, unsigned int trim_cap)
457{
458 int rc = NET_RX_SUCCESS;
459
460 if (sk_filter_trim_cap(sk, skb, trim_cap))
461 goto discard_and_relse;
462
463 skb->dev = NULL;
464
465 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
466 atomic_inc(&sk->sk_drops);
467 goto discard_and_relse;
468 }
469 if (nested)
470 bh_lock_sock_nested(sk);
471 else
472 bh_lock_sock(sk);
473 if (!sock_owned_by_user(sk)) {
474
475
476
477 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
478
479 rc = sk_backlog_rcv(sk, skb);
480
481 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
482 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
483 bh_unlock_sock(sk);
484 atomic_inc(&sk->sk_drops);
485 goto discard_and_relse;
486 }
487
488 bh_unlock_sock(sk);
489out:
490 sock_put(sk);
491 return rc;
492discard_and_relse:
493 kfree_skb(skb);
494 goto out;
495}
496EXPORT_SYMBOL(__sk_receive_skb);
497
498struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
499{
500 struct dst_entry *dst = __sk_dst_get(sk);
501
502 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
503 sk_tx_queue_clear(sk);
504 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
505 dst_release(dst);
506 return NULL;
507 }
508
509 return dst;
510}
511EXPORT_SYMBOL(__sk_dst_check);
512
513struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
514{
515 struct dst_entry *dst = sk_dst_get(sk);
516
517 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
518 sk_dst_reset(sk);
519 dst_release(dst);
520 return NULL;
521 }
522
523 return dst;
524}
525EXPORT_SYMBOL(sk_dst_check);
526
527static int sock_setbindtodevice(struct sock *sk, char __user *optval,
528 int optlen)
529{
530 int ret = -ENOPROTOOPT;
531#ifdef CONFIG_NETDEVICES
532 struct net *net = sock_net(sk);
533 char devname[IFNAMSIZ];
534 int index;
535
536
537 ret = -EPERM;
538 if (!ns_capable(net->user_ns, CAP_NET_RAW))
539 goto out;
540
541 ret = -EINVAL;
542 if (optlen < 0)
543 goto out;
544
545
546
547
548
549
550 if (optlen > IFNAMSIZ - 1)
551 optlen = IFNAMSIZ - 1;
552 memset(devname, 0, sizeof(devname));
553
554 ret = -EFAULT;
555 if (copy_from_user(devname, optval, optlen))
556 goto out;
557
558 index = 0;
559 if (devname[0] != '\0') {
560 struct net_device *dev;
561
562 rcu_read_lock();
563 dev = dev_get_by_name_rcu(net, devname);
564 if (dev)
565 index = dev->ifindex;
566 rcu_read_unlock();
567 ret = -ENODEV;
568 if (!dev)
569 goto out;
570 }
571
572 lock_sock(sk);
573 sk->sk_bound_dev_if = index;
574 sk_dst_reset(sk);
575 release_sock(sk);
576
577 ret = 0;
578
579out:
580#endif
581
582 return ret;
583}
584
585static int sock_getbindtodevice(struct sock *sk, char __user *optval,
586 int __user *optlen, int len)
587{
588 int ret = -ENOPROTOOPT;
589#ifdef CONFIG_NETDEVICES
590 struct net *net = sock_net(sk);
591 char devname[IFNAMSIZ];
592
593 if (sk->sk_bound_dev_if == 0) {
594 len = 0;
595 goto zero;
596 }
597
598 ret = -EINVAL;
599 if (len < IFNAMSIZ)
600 goto out;
601
602 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
603 if (ret)
604 goto out;
605
606 len = strlen(devname) + 1;
607
608 ret = -EFAULT;
609 if (copy_to_user(optval, devname, len))
610 goto out;
611
612zero:
613 ret = -EFAULT;
614 if (put_user(len, optlen))
615 goto out;
616
617 ret = 0;
618
619out:
620#endif
621
622 return ret;
623}
624
625static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
626{
627 if (valbool)
628 sock_set_flag(sk, bit);
629 else
630 sock_reset_flag(sk, bit);
631}
632
633bool sk_mc_loop(struct sock *sk)
634{
635 if (dev_recursion_level())
636 return false;
637 if (!sk)
638 return true;
639 switch (sk->sk_family) {
640 case AF_INET:
641 return inet_sk(sk)->mc_loop;
642#if IS_ENABLED(CONFIG_IPV6)
643 case AF_INET6:
644 return inet6_sk(sk)->mc_loop;
645#endif
646 }
647 WARN_ON(1);
648 return true;
649}
650EXPORT_SYMBOL(sk_mc_loop);
651
652
653
654
655
656
657int sock_setsockopt(struct socket *sock, int level, int optname,
658 char __user *optval, unsigned int optlen)
659{
660 struct sock *sk = sock->sk;
661 int val;
662 int valbool;
663 struct linger ling;
664 int ret = 0;
665
666
667
668
669
670 if (optname == SO_BINDTODEVICE)
671 return sock_setbindtodevice(sk, optval, optlen);
672
673 if (optlen < sizeof(int))
674 return -EINVAL;
675
676 if (get_user(val, (int __user *)optval))
677 return -EFAULT;
678
679 valbool = val ? 1 : 0;
680
681 lock_sock(sk);
682
683 switch (optname) {
684 case SO_DEBUG:
685 if (val && !capable(CAP_NET_ADMIN))
686 ret = -EACCES;
687 else
688 sock_valbool_flag(sk, SOCK_DBG, valbool);
689 break;
690 case SO_REUSEADDR:
691 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
692 break;
693 case SO_REUSEPORT:
694 sk->sk_reuseport = valbool;
695 break;
696 case SO_TYPE:
697 case SO_PROTOCOL:
698 case SO_DOMAIN:
699 case SO_ERROR:
700 ret = -ENOPROTOOPT;
701 break;
702 case SO_DONTROUTE:
703 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
704 break;
705 case SO_BROADCAST:
706 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
707 break;
708 case SO_SNDBUF:
709
710
711
712
713
714 val = min_t(u32, val, sysctl_wmem_max);
715set_sndbuf:
716 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
717 sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF);
718
719 sk->sk_write_space(sk);
720 break;
721
722 case SO_SNDBUFFORCE:
723 if (!capable(CAP_NET_ADMIN)) {
724 ret = -EPERM;
725 break;
726 }
727 goto set_sndbuf;
728
729 case SO_RCVBUF:
730
731
732
733
734
735 val = min_t(u32, val, sysctl_rmem_max);
736set_rcvbuf:
737 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753 sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF);
754 break;
755
756 case SO_RCVBUFFORCE:
757 if (!capable(CAP_NET_ADMIN)) {
758 ret = -EPERM;
759 break;
760 }
761 goto set_rcvbuf;
762
763 case SO_KEEPALIVE:
764#ifdef CONFIG_INET
765 if (sk->sk_protocol == IPPROTO_TCP &&
766 sk->sk_type == SOCK_STREAM)
767 tcp_set_keepalive(sk, valbool);
768#endif
769 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
770 break;
771
772 case SO_OOBINLINE:
773 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
774 break;
775
776 case SO_NO_CHECK:
777 sk->sk_no_check_tx = valbool;
778 break;
779
780 case SO_PRIORITY:
781 if ((val >= 0 && val <= 6) ||
782 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
783 sk->sk_priority = val;
784 else
785 ret = -EPERM;
786 break;
787
788 case SO_LINGER:
789 if (optlen < sizeof(ling)) {
790 ret = -EINVAL;
791 break;
792 }
793 if (copy_from_user(&ling, optval, sizeof(ling))) {
794 ret = -EFAULT;
795 break;
796 }
797 if (!ling.l_onoff)
798 sock_reset_flag(sk, SOCK_LINGER);
799 else {
800#if (BITS_PER_LONG == 32)
801 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
802 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
803 else
804#endif
805 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
806 sock_set_flag(sk, SOCK_LINGER);
807 }
808 break;
809
810 case SO_BSDCOMPAT:
811 sock_warn_obsolete_bsdism("setsockopt");
812 break;
813
814 case SO_PASSCRED:
815 if (valbool)
816 set_bit(SOCK_PASSCRED, &sock->flags);
817 else
818 clear_bit(SOCK_PASSCRED, &sock->flags);
819 break;
820
821 case SO_TIMESTAMP:
822 case SO_TIMESTAMPNS:
823 if (valbool) {
824 if (optname == SO_TIMESTAMP)
825 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
826 else
827 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
828 sock_set_flag(sk, SOCK_RCVTSTAMP);
829 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
830 } else {
831 sock_reset_flag(sk, SOCK_RCVTSTAMP);
832 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
833 }
834 break;
835
836 case SO_TIMESTAMPING:
837 if (val & ~SOF_TIMESTAMPING_MASK) {
838 ret = -EINVAL;
839 break;
840 }
841
842 if (val & SOF_TIMESTAMPING_OPT_ID &&
843 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
844 if (sk->sk_protocol == IPPROTO_TCP &&
845 sk->sk_type == SOCK_STREAM) {
846 if ((1 << sk->sk_state) &
847 (TCPF_CLOSE | TCPF_LISTEN)) {
848 ret = -EINVAL;
849 break;
850 }
851 sk->sk_tskey = tcp_sk(sk)->snd_una;
852 } else {
853 sk->sk_tskey = 0;
854 }
855 }
856 sk->sk_tsflags = val;
857 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
858 sock_enable_timestamp(sk,
859 SOCK_TIMESTAMPING_RX_SOFTWARE);
860 else
861 sock_disable_timestamp(sk,
862 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
863 break;
864
865 case SO_RCVLOWAT:
866 if (val < 0)
867 val = INT_MAX;
868 sk->sk_rcvlowat = val ? : 1;
869 break;
870
871 case SO_RCVTIMEO:
872 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
873 break;
874
875 case SO_SNDTIMEO:
876 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
877 break;
878
879 case SO_ATTACH_FILTER:
880 ret = -EINVAL;
881 if (optlen == sizeof(struct sock_fprog)) {
882 struct sock_fprog fprog;
883
884 ret = -EFAULT;
885 if (copy_from_user(&fprog, optval, sizeof(fprog)))
886 break;
887
888 ret = sk_attach_filter(&fprog, sk);
889 }
890 break;
891
892 case SO_ATTACH_BPF:
893 ret = -EINVAL;
894 if (optlen == sizeof(u32)) {
895 u32 ufd;
896
897 ret = -EFAULT;
898 if (copy_from_user(&ufd, optval, sizeof(ufd)))
899 break;
900
901 ret = sk_attach_bpf(ufd, sk);
902 }
903 break;
904
905 case SO_ATTACH_REUSEPORT_CBPF:
906 ret = -EINVAL;
907 if (optlen == sizeof(struct sock_fprog)) {
908 struct sock_fprog fprog;
909
910 ret = -EFAULT;
911 if (copy_from_user(&fprog, optval, sizeof(fprog)))
912 break;
913
914 ret = sk_reuseport_attach_filter(&fprog, sk);
915 }
916 break;
917
918 case SO_ATTACH_REUSEPORT_EBPF:
919 ret = -EINVAL;
920 if (optlen == sizeof(u32)) {
921 u32 ufd;
922
923 ret = -EFAULT;
924 if (copy_from_user(&ufd, optval, sizeof(ufd)))
925 break;
926
927 ret = sk_reuseport_attach_bpf(ufd, sk);
928 }
929 break;
930
931 case SO_DETACH_FILTER:
932 ret = sk_detach_filter(sk);
933 break;
934
935 case SO_LOCK_FILTER:
936 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
937 ret = -EPERM;
938 else
939 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
940 break;
941
942 case SO_PASSSEC:
943 if (valbool)
944 set_bit(SOCK_PASSSEC, &sock->flags);
945 else
946 clear_bit(SOCK_PASSSEC, &sock->flags);
947 break;
948 case SO_MARK:
949 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
950 ret = -EPERM;
951 else
952 sk->sk_mark = val;
953 break;
954
955 case SO_RXQ_OVFL:
956 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
957 break;
958
959 case SO_WIFI_STATUS:
960 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
961 break;
962
963 case SO_PEEK_OFF:
964 if (sock->ops->set_peek_off)
965 ret = sock->ops->set_peek_off(sk, val);
966 else
967 ret = -EOPNOTSUPP;
968 break;
969
970 case SO_NOFCS:
971 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
972 break;
973
974 case SO_SELECT_ERR_QUEUE:
975 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
976 break;
977
978#ifdef CONFIG_NET_RX_BUSY_POLL
979 case SO_BUSY_POLL:
980
981 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
982 ret = -EPERM;
983 else {
984 if (val < 0)
985 ret = -EINVAL;
986 else
987 sk->sk_ll_usec = val;
988 }
989 break;
990#endif
991
992 case SO_MAX_PACING_RATE:
993 sk->sk_max_pacing_rate = val;
994 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
995 sk->sk_max_pacing_rate);
996 break;
997
998 case SO_INCOMING_CPU:
999 sk->sk_incoming_cpu = val;
1000 break;
1001
1002 case SO_CNX_ADVICE:
1003 if (val == 1)
1004 dst_negative_advice(sk);
1005 break;
1006 default:
1007 ret = -ENOPROTOOPT;
1008 break;
1009 }
1010 release_sock(sk);
1011 return ret;
1012}
1013EXPORT_SYMBOL(sock_setsockopt);
1014
1015
1016static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1017 struct ucred *ucred)
1018{
1019 ucred->pid = pid_vnr(pid);
1020 ucred->uid = ucred->gid = -1;
1021 if (cred) {
1022 struct user_namespace *current_ns = current_user_ns();
1023
1024 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1025 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1026 }
1027}
1028
1029int sock_getsockopt(struct socket *sock, int level, int optname,
1030 char __user *optval, int __user *optlen)
1031{
1032 struct sock *sk = sock->sk;
1033
1034 union {
1035 int val;
1036 struct linger ling;
1037 struct timeval tm;
1038 } v;
1039
1040 int lv = sizeof(int);
1041 int len;
1042
1043 if (get_user(len, optlen))
1044 return -EFAULT;
1045 if (len < 0)
1046 return -EINVAL;
1047
1048 memset(&v, 0, sizeof(v));
1049
1050 switch (optname) {
1051 case SO_DEBUG:
1052 v.val = sock_flag(sk, SOCK_DBG);
1053 break;
1054
1055 case SO_DONTROUTE:
1056 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1057 break;
1058
1059 case SO_BROADCAST:
1060 v.val = sock_flag(sk, SOCK_BROADCAST);
1061 break;
1062
1063 case SO_SNDBUF:
1064 v.val = sk->sk_sndbuf;
1065 break;
1066
1067 case SO_RCVBUF:
1068 v.val = sk->sk_rcvbuf;
1069 break;
1070
1071 case SO_REUSEADDR:
1072 v.val = sk->sk_reuse;
1073 break;
1074
1075 case SO_REUSEPORT:
1076 v.val = sk->sk_reuseport;
1077 break;
1078
1079 case SO_KEEPALIVE:
1080 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1081 break;
1082
1083 case SO_TYPE:
1084 v.val = sk->sk_type;
1085 break;
1086
1087 case SO_PROTOCOL:
1088 v.val = sk->sk_protocol;
1089 break;
1090
1091 case SO_DOMAIN:
1092 v.val = sk->sk_family;
1093 break;
1094
1095 case SO_ERROR:
1096 v.val = -sock_error(sk);
1097 if (v.val == 0)
1098 v.val = xchg(&sk->sk_err_soft, 0);
1099 break;
1100
1101 case SO_OOBINLINE:
1102 v.val = sock_flag(sk, SOCK_URGINLINE);
1103 break;
1104
1105 case SO_NO_CHECK:
1106 v.val = sk->sk_no_check_tx;
1107 break;
1108
1109 case SO_PRIORITY:
1110 v.val = sk->sk_priority;
1111 break;
1112
1113 case SO_LINGER:
1114 lv = sizeof(v.ling);
1115 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1116 v.ling.l_linger = sk->sk_lingertime / HZ;
1117 break;
1118
1119 case SO_BSDCOMPAT:
1120 sock_warn_obsolete_bsdism("getsockopt");
1121 break;
1122
1123 case SO_TIMESTAMP:
1124 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1125 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1126 break;
1127
1128 case SO_TIMESTAMPNS:
1129 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1130 break;
1131
1132 case SO_TIMESTAMPING:
1133 v.val = sk->sk_tsflags;
1134 break;
1135
1136 case SO_RCVTIMEO:
1137 lv = sizeof(struct timeval);
1138 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1139 v.tm.tv_sec = 0;
1140 v.tm.tv_usec = 0;
1141 } else {
1142 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1143 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1144 }
1145 break;
1146
1147 case SO_SNDTIMEO:
1148 lv = sizeof(struct timeval);
1149 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1150 v.tm.tv_sec = 0;
1151 v.tm.tv_usec = 0;
1152 } else {
1153 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1154 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1155 }
1156 break;
1157
1158 case SO_RCVLOWAT:
1159 v.val = sk->sk_rcvlowat;
1160 break;
1161
1162 case SO_SNDLOWAT:
1163 v.val = 1;
1164 break;
1165
1166 case SO_PASSCRED:
1167 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1168 break;
1169
1170 case SO_PEERCRED:
1171 {
1172 struct ucred peercred;
1173 if (len > sizeof(peercred))
1174 len = sizeof(peercred);
1175 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1176 if (copy_to_user(optval, &peercred, len))
1177 return -EFAULT;
1178 goto lenout;
1179 }
1180
1181 case SO_PEERNAME:
1182 {
1183 char address[128];
1184
1185 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1186 return -ENOTCONN;
1187 if (lv < len)
1188 return -EINVAL;
1189 if (copy_to_user(optval, address, len))
1190 return -EFAULT;
1191 goto lenout;
1192 }
1193
1194
1195
1196
1197 case SO_ACCEPTCONN:
1198 v.val = sk->sk_state == TCP_LISTEN;
1199 break;
1200
1201 case SO_PASSSEC:
1202 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1203 break;
1204
1205 case SO_PEERSEC:
1206 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1207
1208 case SO_MARK:
1209 v.val = sk->sk_mark;
1210 break;
1211
1212 case SO_RXQ_OVFL:
1213 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1214 break;
1215
1216 case SO_WIFI_STATUS:
1217 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1218 break;
1219
1220 case SO_PEEK_OFF:
1221 if (!sock->ops->set_peek_off)
1222 return -EOPNOTSUPP;
1223
1224 v.val = sk->sk_peek_off;
1225 break;
1226 case SO_NOFCS:
1227 v.val = sock_flag(sk, SOCK_NOFCS);
1228 break;
1229
1230 case SO_BINDTODEVICE:
1231 return sock_getbindtodevice(sk, optval, optlen, len);
1232
1233 case SO_GET_FILTER:
1234 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1235 if (len < 0)
1236 return len;
1237
1238 goto lenout;
1239
1240 case SO_LOCK_FILTER:
1241 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1242 break;
1243
1244 case SO_BPF_EXTENSIONS:
1245 v.val = bpf_tell_extensions();
1246 break;
1247
1248 case SO_SELECT_ERR_QUEUE:
1249 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1250 break;
1251
1252#ifdef CONFIG_NET_RX_BUSY_POLL
1253 case SO_BUSY_POLL:
1254 v.val = sk->sk_ll_usec;
1255 break;
1256#endif
1257
1258 case SO_MAX_PACING_RATE:
1259 v.val = sk->sk_max_pacing_rate;
1260 break;
1261
1262 case SO_INCOMING_CPU:
1263 v.val = sk->sk_incoming_cpu;
1264 break;
1265
1266 default:
1267
1268
1269
1270 return -ENOPROTOOPT;
1271 }
1272
1273 if (len > lv)
1274 len = lv;
1275 if (copy_to_user(optval, &v, len))
1276 return -EFAULT;
1277lenout:
1278 if (put_user(len, optlen))
1279 return -EFAULT;
1280 return 0;
1281}
1282
1283
1284
1285
1286
1287
1288static inline void sock_lock_init(struct sock *sk)
1289{
1290 sock_lock_init_class_and_name(sk,
1291 af_family_slock_key_strings[sk->sk_family],
1292 af_family_slock_keys + sk->sk_family,
1293 af_family_key_strings[sk->sk_family],
1294 af_family_keys + sk->sk_family);
1295}
1296
1297
1298
1299
1300
1301
1302static void sock_copy(struct sock *nsk, const struct sock *osk)
1303{
1304#ifdef CONFIG_SECURITY_NETWORK
1305 void *sptr = nsk->sk_security;
1306#endif
1307 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1308
1309 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1310 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1311
1312#ifdef CONFIG_SECURITY_NETWORK
1313 nsk->sk_security = sptr;
1314 security_sk_clone(osk, nsk);
1315#endif
1316}
1317
1318void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1319{
1320 unsigned long nulls1, nulls2;
1321
1322 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1323 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1324 if (nulls1 > nulls2)
1325 swap(nulls1, nulls2);
1326
1327 if (nulls1 != 0)
1328 memset((char *)sk, 0, nulls1);
1329 memset((char *)sk + nulls1 + sizeof(void *), 0,
1330 nulls2 - nulls1 - sizeof(void *));
1331 memset((char *)sk + nulls2 + sizeof(void *), 0,
1332 size - nulls2 - sizeof(void *));
1333}
1334EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1335
1336static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1337 int family)
1338{
1339 struct sock *sk;
1340 struct kmem_cache *slab;
1341
1342 slab = prot->slab;
1343 if (slab != NULL) {
1344 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1345 if (!sk)
1346 return sk;
1347 if (priority & __GFP_ZERO) {
1348 if (prot->clear_sk)
1349 prot->clear_sk(sk, prot->obj_size);
1350 else
1351 sk_prot_clear_nulls(sk, prot->obj_size);
1352 }
1353 } else
1354 sk = kmalloc(prot->obj_size, priority);
1355
1356 if (sk != NULL) {
1357 kmemcheck_annotate_bitfield(sk, flags);
1358
1359 if (security_sk_alloc(sk, family, priority))
1360 goto out_free;
1361
1362 if (!try_module_get(prot->owner))
1363 goto out_free_sec;
1364 sk_tx_queue_clear(sk);
1365 }
1366
1367 return sk;
1368
1369out_free_sec:
1370 security_sk_free(sk);
1371out_free:
1372 if (slab != NULL)
1373 kmem_cache_free(slab, sk);
1374 else
1375 kfree(sk);
1376 return NULL;
1377}
1378
1379static void sk_prot_free(struct proto *prot, struct sock *sk)
1380{
1381 struct kmem_cache *slab;
1382 struct module *owner;
1383
1384 owner = prot->owner;
1385 slab = prot->slab;
1386
1387 cgroup_sk_free(&sk->sk_cgrp_data);
1388 security_sk_free(sk);
1389 if (slab != NULL)
1390 kmem_cache_free(slab, sk);
1391 else
1392 kfree(sk);
1393 module_put(owner);
1394}
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1405 struct proto *prot, int kern)
1406{
1407 struct sock *sk;
1408
1409 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1410 if (sk) {
1411 sk->sk_family = family;
1412
1413
1414
1415
1416 sk->sk_prot = sk->sk_prot_creator = prot;
1417 sock_lock_init(sk);
1418 sk->sk_net_refcnt = kern ? 0 : 1;
1419 if (likely(sk->sk_net_refcnt))
1420 get_net(net);
1421 sock_net_set(sk, net);
1422 atomic_set(&sk->sk_wmem_alloc, 1);
1423
1424 cgroup_sk_alloc(&sk->sk_cgrp_data);
1425 sock_update_classid(&sk->sk_cgrp_data);
1426 sock_update_netprioidx(&sk->sk_cgrp_data);
1427 }
1428
1429 return sk;
1430}
1431EXPORT_SYMBOL(sk_alloc);
1432
1433
1434
1435
1436static void __sk_destruct(struct rcu_head *head)
1437{
1438 struct sock *sk = container_of(head, struct sock, sk_rcu);
1439 struct sk_filter *filter;
1440
1441 if (sk->sk_destruct)
1442 sk->sk_destruct(sk);
1443
1444 filter = rcu_dereference_check(sk->sk_filter,
1445 atomic_read(&sk->sk_wmem_alloc) == 0);
1446 if (filter) {
1447 sk_filter_uncharge(sk, filter);
1448 RCU_INIT_POINTER(sk->sk_filter, NULL);
1449 }
1450 if (rcu_access_pointer(sk->sk_reuseport_cb))
1451 reuseport_detach_sock(sk);
1452
1453 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1454
1455 if (atomic_read(&sk->sk_omem_alloc))
1456 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1457 __func__, atomic_read(&sk->sk_omem_alloc));
1458
1459 if (sk->sk_peer_cred)
1460 put_cred(sk->sk_peer_cred);
1461 put_pid(sk->sk_peer_pid);
1462 if (likely(sk->sk_net_refcnt))
1463 put_net(sock_net(sk));
1464 sk_prot_free(sk->sk_prot_creator, sk);
1465}
1466
1467void sk_destruct(struct sock *sk)
1468{
1469 if (sock_flag(sk, SOCK_RCU_FREE))
1470 call_rcu(&sk->sk_rcu, __sk_destruct);
1471 else
1472 __sk_destruct(&sk->sk_rcu);
1473}
1474
1475static void __sk_free(struct sock *sk)
1476{
1477 if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
1478 sock_diag_broadcast_destroy(sk);
1479 else
1480 sk_destruct(sk);
1481}
1482
1483void sk_free(struct sock *sk)
1484{
1485
1486
1487
1488
1489
1490 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1491 __sk_free(sk);
1492}
1493EXPORT_SYMBOL(sk_free);
1494
1495
1496
1497
1498
1499
1500
1501
1502struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1503{
1504 struct sock *newsk;
1505 bool is_charged = true;
1506
1507 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1508 if (newsk != NULL) {
1509 struct sk_filter *filter;
1510
1511 sock_copy(newsk, sk);
1512
1513
1514 if (likely(newsk->sk_net_refcnt))
1515 get_net(sock_net(newsk));
1516 sk_node_init(&newsk->sk_node);
1517 sock_lock_init(newsk);
1518 bh_lock_sock(newsk);
1519 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1520 newsk->sk_backlog.len = 0;
1521
1522 atomic_set(&newsk->sk_rmem_alloc, 0);
1523
1524
1525
1526 atomic_set(&newsk->sk_wmem_alloc, 1);
1527 atomic_set(&newsk->sk_omem_alloc, 0);
1528 skb_queue_head_init(&newsk->sk_receive_queue);
1529 skb_queue_head_init(&newsk->sk_write_queue);
1530
1531 rwlock_init(&newsk->sk_callback_lock);
1532 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1533 af_callback_keys + newsk->sk_family,
1534 af_family_clock_key_strings[newsk->sk_family]);
1535
1536 newsk->sk_dst_cache = NULL;
1537 newsk->sk_wmem_queued = 0;
1538 newsk->sk_forward_alloc = 0;
1539 atomic_set(&newsk->sk_drops, 0);
1540 newsk->sk_send_head = NULL;
1541 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1542
1543 sock_reset_flag(newsk, SOCK_DONE);
1544 skb_queue_head_init(&newsk->sk_error_queue);
1545
1546 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1547 if (filter != NULL)
1548
1549
1550
1551
1552 is_charged = sk_filter_charge(newsk, filter);
1553
1554 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1555
1556
1557 newsk->sk_destruct = NULL;
1558 bh_unlock_sock(newsk);
1559 sk_free(newsk);
1560 newsk = NULL;
1561 goto out;
1562 }
1563 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1564
1565 newsk->sk_err = 0;
1566 newsk->sk_priority = 0;
1567 newsk->sk_incoming_cpu = raw_smp_processor_id();
1568 atomic64_set(&newsk->sk_cookie, 0);
1569
1570 cgroup_sk_alloc(&newsk->sk_cgrp_data);
1571
1572
1573
1574
1575
1576 smp_wmb();
1577 atomic_set(&newsk->sk_refcnt, 2);
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590 sk_refcnt_debug_inc(newsk);
1591 sk_set_socket(newsk, NULL);
1592 newsk->sk_wq = NULL;
1593
1594 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1595 sock_update_memcg(newsk);
1596
1597 if (newsk->sk_prot->sockets_allocated)
1598 sk_sockets_allocated_inc(newsk);
1599
1600 if (sock_needs_netstamp(sk) &&
1601 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1602 net_enable_timestamp();
1603 }
1604out:
1605 return newsk;
1606}
1607EXPORT_SYMBOL_GPL(sk_clone_lock);
1608
1609void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1610{
1611 u32 max_segs = 1;
1612
1613 sk_dst_set(sk, dst);
1614 sk->sk_route_caps = dst->dev->features;
1615 if (sk->sk_route_caps & NETIF_F_GSO)
1616 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1617 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1618 if (sk_can_gso(sk)) {
1619 if (dst->header_len) {
1620 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1621 } else {
1622 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1623 sk->sk_gso_max_size = dst->dev->gso_max_size;
1624 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
1625 }
1626 }
1627 sk->sk_gso_max_segs = max_segs;
1628}
1629EXPORT_SYMBOL_GPL(sk_setup_caps);
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639void sock_wfree(struct sk_buff *skb)
1640{
1641 struct sock *sk = skb->sk;
1642 unsigned int len = skb->truesize;
1643
1644 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1645
1646
1647
1648
1649 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1650 sk->sk_write_space(sk);
1651 len = 1;
1652 }
1653
1654
1655
1656
1657 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1658 __sk_free(sk);
1659}
1660EXPORT_SYMBOL(sock_wfree);
1661
1662
1663
1664
1665void __sock_wfree(struct sk_buff *skb)
1666{
1667 struct sock *sk = skb->sk;
1668
1669 if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1670 __sk_free(sk);
1671}
1672
1673void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
1674{
1675 skb_orphan(skb);
1676 skb->sk = sk;
1677#ifdef CONFIG_INET
1678 if (unlikely(!sk_fullsock(sk))) {
1679 skb->destructor = sock_edemux;
1680 sock_hold(sk);
1681 return;
1682 }
1683#endif
1684 skb->destructor = sock_wfree;
1685 skb_set_hash_from_sk(skb, sk);
1686
1687
1688
1689
1690
1691 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1692}
1693EXPORT_SYMBOL(skb_set_owner_w);
1694
1695
1696
1697
1698
1699
1700
1701
1702void skb_orphan_partial(struct sk_buff *skb)
1703{
1704
1705
1706
1707 if (skb->truesize <= 2)
1708 return;
1709
1710
1711
1712
1713
1714 if (skb->destructor == sock_wfree
1715#ifdef CONFIG_INET
1716 || skb->destructor == tcp_wfree
1717#endif
1718 ) {
1719 atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
1720 skb->truesize = 1;
1721 } else {
1722 skb_orphan(skb);
1723 }
1724}
1725EXPORT_SYMBOL(skb_orphan_partial);
1726
1727
1728
1729
1730void sock_rfree(struct sk_buff *skb)
1731{
1732 struct sock *sk = skb->sk;
1733 unsigned int len = skb->truesize;
1734
1735 atomic_sub(len, &sk->sk_rmem_alloc);
1736 sk_mem_uncharge(sk, len);
1737}
1738EXPORT_SYMBOL(sock_rfree);
1739
1740
1741
1742
1743
1744void sock_efree(struct sk_buff *skb)
1745{
1746 sock_put(skb->sk);
1747}
1748EXPORT_SYMBOL(sock_efree);
1749
1750kuid_t sock_i_uid(struct sock *sk)
1751{
1752 kuid_t uid;
1753
1754 read_lock_bh(&sk->sk_callback_lock);
1755 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1756 read_unlock_bh(&sk->sk_callback_lock);
1757 return uid;
1758}
1759EXPORT_SYMBOL(sock_i_uid);
1760
1761unsigned long sock_i_ino(struct sock *sk)
1762{
1763 unsigned long ino;
1764
1765 read_lock_bh(&sk->sk_callback_lock);
1766 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1767 read_unlock_bh(&sk->sk_callback_lock);
1768 return ino;
1769}
1770EXPORT_SYMBOL(sock_i_ino);
1771
1772
1773
1774
1775struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1776 gfp_t priority)
1777{
1778 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1779 struct sk_buff *skb = alloc_skb(size, priority);
1780 if (skb) {
1781 skb_set_owner_w(skb, sk);
1782 return skb;
1783 }
1784 }
1785 return NULL;
1786}
1787EXPORT_SYMBOL(sock_wmalloc);
1788
1789
1790
1791
1792void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1793{
1794 if ((unsigned int)size <= sysctl_optmem_max &&
1795 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1796 void *mem;
1797
1798
1799
1800 atomic_add(size, &sk->sk_omem_alloc);
1801 mem = kmalloc(size, priority);
1802 if (mem)
1803 return mem;
1804 atomic_sub(size, &sk->sk_omem_alloc);
1805 }
1806 return NULL;
1807}
1808EXPORT_SYMBOL(sock_kmalloc);
1809
1810
1811
1812
1813
1814static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1815 const bool nullify)
1816{
1817 if (WARN_ON_ONCE(!mem))
1818 return;
1819 if (nullify)
1820 kzfree(mem);
1821 else
1822 kfree(mem);
1823 atomic_sub(size, &sk->sk_omem_alloc);
1824}
1825
1826void sock_kfree_s(struct sock *sk, void *mem, int size)
1827{
1828 __sock_kfree_s(sk, mem, size, false);
1829}
1830EXPORT_SYMBOL(sock_kfree_s);
1831
1832void sock_kzfree_s(struct sock *sk, void *mem, int size)
1833{
1834 __sock_kfree_s(sk, mem, size, true);
1835}
1836EXPORT_SYMBOL(sock_kzfree_s);
1837
1838
1839
1840
1841static long sock_wait_for_wmem(struct sock *sk, long timeo)
1842{
1843 DEFINE_WAIT(wait);
1844
1845 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1846 for (;;) {
1847 if (!timeo)
1848 break;
1849 if (signal_pending(current))
1850 break;
1851 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1852 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1853 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1854 break;
1855 if (sk->sk_shutdown & SEND_SHUTDOWN)
1856 break;
1857 if (sk->sk_err)
1858 break;
1859 timeo = schedule_timeout(timeo);
1860 }
1861 finish_wait(sk_sleep(sk), &wait);
1862 return timeo;
1863}
1864
1865
1866
1867
1868
1869
1870struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1871 unsigned long data_len, int noblock,
1872 int *errcode, int max_page_order)
1873{
1874 struct sk_buff *skb;
1875 long timeo;
1876 int err;
1877
1878 timeo = sock_sndtimeo(sk, noblock);
1879 for (;;) {
1880 err = sock_error(sk);
1881 if (err != 0)
1882 goto failure;
1883
1884 err = -EPIPE;
1885 if (sk->sk_shutdown & SEND_SHUTDOWN)
1886 goto failure;
1887
1888 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1889 break;
1890
1891 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1892 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1893 err = -EAGAIN;
1894 if (!timeo)
1895 goto failure;
1896 if (signal_pending(current))
1897 goto interrupted;
1898 timeo = sock_wait_for_wmem(sk, timeo);
1899 }
1900 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1901 errcode, sk->sk_allocation);
1902 if (skb)
1903 skb_set_owner_w(skb, sk);
1904 return skb;
1905
1906interrupted:
1907 err = sock_intr_errno(timeo);
1908failure:
1909 *errcode = err;
1910 return NULL;
1911}
1912EXPORT_SYMBOL(sock_alloc_send_pskb);
1913
1914struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1915 int noblock, int *errcode)
1916{
1917 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1918}
1919EXPORT_SYMBOL(sock_alloc_send_skb);
1920
1921int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
1922 struct sockcm_cookie *sockc)
1923{
1924 u32 tsflags;
1925
1926 switch (cmsg->cmsg_type) {
1927 case SO_MARK:
1928 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1929 return -EPERM;
1930 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
1931 return -EINVAL;
1932 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
1933 break;
1934 case SO_TIMESTAMPING:
1935 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
1936 return -EINVAL;
1937
1938 tsflags = *(u32 *)CMSG_DATA(cmsg);
1939 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
1940 return -EINVAL;
1941
1942 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
1943 sockc->tsflags |= tsflags;
1944 break;
1945
1946 case SCM_RIGHTS:
1947 case SCM_CREDENTIALS:
1948 break;
1949 default:
1950 return -EINVAL;
1951 }
1952 return 0;
1953}
1954EXPORT_SYMBOL(__sock_cmsg_send);
1955
1956int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
1957 struct sockcm_cookie *sockc)
1958{
1959 struct cmsghdr *cmsg;
1960 int ret;
1961
1962 for_each_cmsghdr(cmsg, msg) {
1963 if (!CMSG_OK(msg, cmsg))
1964 return -EINVAL;
1965 if (cmsg->cmsg_level != SOL_SOCKET)
1966 continue;
1967 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
1968 if (ret)
1969 return ret;
1970 }
1971 return 0;
1972}
1973EXPORT_SYMBOL(sock_cmsg_send);
1974
1975
1976#define SKB_FRAG_PAGE_ORDER get_order(32768)
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1989{
1990 if (pfrag->page) {
1991 if (page_ref_count(pfrag->page) == 1) {
1992 pfrag->offset = 0;
1993 return true;
1994 }
1995 if (pfrag->offset + sz <= pfrag->size)
1996 return true;
1997 put_page(pfrag->page);
1998 }
1999
2000 pfrag->offset = 0;
2001 if (SKB_FRAG_PAGE_ORDER) {
2002
2003 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2004 __GFP_COMP | __GFP_NOWARN |
2005 __GFP_NORETRY,
2006 SKB_FRAG_PAGE_ORDER);
2007 if (likely(pfrag->page)) {
2008 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2009 return true;
2010 }
2011 }
2012 pfrag->page = alloc_page(gfp);
2013 if (likely(pfrag->page)) {
2014 pfrag->size = PAGE_SIZE;
2015 return true;
2016 }
2017 return false;
2018}
2019EXPORT_SYMBOL(skb_page_frag_refill);
2020
2021bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2022{
2023 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2024 return true;
2025
2026 sk_enter_memory_pressure(sk);
2027 sk_stream_moderate_sndbuf(sk);
2028 return false;
2029}
2030EXPORT_SYMBOL(sk_page_frag_refill);
2031
2032static void __lock_sock(struct sock *sk)
2033 __releases(&sk->sk_lock.slock)
2034 __acquires(&sk->sk_lock.slock)
2035{
2036 DEFINE_WAIT(wait);
2037
2038 for (;;) {
2039 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2040 TASK_UNINTERRUPTIBLE);
2041 spin_unlock_bh(&sk->sk_lock.slock);
2042 schedule();
2043 spin_lock_bh(&sk->sk_lock.slock);
2044 if (!sock_owned_by_user(sk))
2045 break;
2046 }
2047 finish_wait(&sk->sk_lock.wq, &wait);
2048}
2049
2050static void __release_sock(struct sock *sk)
2051 __releases(&sk->sk_lock.slock)
2052 __acquires(&sk->sk_lock.slock)
2053{
2054 struct sk_buff *skb, *next;
2055
2056 while ((skb = sk->sk_backlog.head) != NULL) {
2057 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2058
2059 spin_unlock_bh(&sk->sk_lock.slock);
2060
2061 do {
2062 next = skb->next;
2063 prefetch(next);
2064 WARN_ON_ONCE(skb_dst_is_noref(skb));
2065 skb->next = NULL;
2066 sk_backlog_rcv(sk, skb);
2067
2068 cond_resched();
2069
2070 skb = next;
2071 } while (skb != NULL);
2072
2073 spin_lock_bh(&sk->sk_lock.slock);
2074 }
2075
2076
2077
2078
2079
2080 sk->sk_backlog.len = 0;
2081}
2082
2083void __sk_flush_backlog(struct sock *sk)
2084{
2085 spin_lock_bh(&sk->sk_lock.slock);
2086 __release_sock(sk);
2087 spin_unlock_bh(&sk->sk_lock.slock);
2088}
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2102{
2103 int rc;
2104 DEFINE_WAIT(wait);
2105
2106 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2107 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2108 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
2109 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2110 finish_wait(sk_sleep(sk), &wait);
2111 return rc;
2112}
2113EXPORT_SYMBOL(sk_wait_data);
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125int __sk_mem_schedule(struct sock *sk, int size, int kind)
2126{
2127 struct proto *prot = sk->sk_prot;
2128 int amt = sk_mem_pages(size);
2129 long allocated;
2130
2131 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2132
2133 allocated = sk_memory_allocated_add(sk, amt);
2134
2135 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2136 !mem_cgroup_charge_skmem(sk->sk_memcg, amt))
2137 goto suppress_allocation;
2138
2139
2140 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2141 sk_leave_memory_pressure(sk);
2142 return 1;
2143 }
2144
2145
2146 if (allocated > sk_prot_mem_limits(sk, 1))
2147 sk_enter_memory_pressure(sk);
2148
2149
2150 if (allocated > sk_prot_mem_limits(sk, 2))
2151 goto suppress_allocation;
2152
2153
2154 if (kind == SK_MEM_RECV) {
2155 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2156 return 1;
2157
2158 } else {
2159 if (sk->sk_type == SOCK_STREAM) {
2160 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2161 return 1;
2162 } else if (atomic_read(&sk->sk_wmem_alloc) <
2163 prot->sysctl_wmem[0])
2164 return 1;
2165 }
2166
2167 if (sk_has_memory_pressure(sk)) {
2168 int alloc;
2169
2170 if (!sk_under_memory_pressure(sk))
2171 return 1;
2172 alloc = sk_sockets_allocated_read_positive(sk);
2173 if (sk_prot_mem_limits(sk, 2) > alloc *
2174 sk_mem_pages(sk->sk_wmem_queued +
2175 atomic_read(&sk->sk_rmem_alloc) +
2176 sk->sk_forward_alloc))
2177 return 1;
2178 }
2179
2180suppress_allocation:
2181
2182 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2183 sk_stream_moderate_sndbuf(sk);
2184
2185
2186
2187
2188 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2189 return 1;
2190 }
2191
2192 trace_sock_exceed_buf_limit(sk, prot, allocated);
2193
2194
2195 sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
2196
2197 sk_memory_allocated_sub(sk, amt);
2198
2199 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2200 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2201
2202 return 0;
2203}
2204EXPORT_SYMBOL(__sk_mem_schedule);
2205
2206
2207
2208
2209
2210
2211void __sk_mem_reclaim(struct sock *sk, int amount)
2212{
2213 amount >>= SK_MEM_QUANTUM_SHIFT;
2214 sk_memory_allocated_sub(sk, amount);
2215 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2216
2217 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2218 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2219
2220 if (sk_under_memory_pressure(sk) &&
2221 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2222 sk_leave_memory_pressure(sk);
2223}
2224EXPORT_SYMBOL(__sk_mem_reclaim);
2225
2226int sk_set_peek_off(struct sock *sk, int val)
2227{
2228 if (val < 0)
2229 return -EINVAL;
2230
2231 sk->sk_peek_off = val;
2232 return 0;
2233}
2234EXPORT_SYMBOL_GPL(sk_set_peek_off);
2235
2236
2237
2238
2239
2240
2241
2242
2243int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2244{
2245 return -EOPNOTSUPP;
2246}
2247EXPORT_SYMBOL(sock_no_bind);
2248
2249int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2250 int len, int flags)
2251{
2252 return -EOPNOTSUPP;
2253}
2254EXPORT_SYMBOL(sock_no_connect);
2255
2256int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2257{
2258 return -EOPNOTSUPP;
2259}
2260EXPORT_SYMBOL(sock_no_socketpair);
2261
2262int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2263{
2264 return -EOPNOTSUPP;
2265}
2266EXPORT_SYMBOL(sock_no_accept);
2267
2268int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2269 int *len, int peer)
2270{
2271 return -EOPNOTSUPP;
2272}
2273EXPORT_SYMBOL(sock_no_getname);
2274
2275unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2276{
2277 return 0;
2278}
2279EXPORT_SYMBOL(sock_no_poll);
2280
2281int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2282{
2283 return -EOPNOTSUPP;
2284}
2285EXPORT_SYMBOL(sock_no_ioctl);
2286
2287int sock_no_listen(struct socket *sock, int backlog)
2288{
2289 return -EOPNOTSUPP;
2290}
2291EXPORT_SYMBOL(sock_no_listen);
2292
2293int sock_no_shutdown(struct socket *sock, int how)
2294{
2295 return -EOPNOTSUPP;
2296}
2297EXPORT_SYMBOL(sock_no_shutdown);
2298
2299int sock_no_setsockopt(struct socket *sock, int level, int optname,
2300 char __user *optval, unsigned int optlen)
2301{
2302 return -EOPNOTSUPP;
2303}
2304EXPORT_SYMBOL(sock_no_setsockopt);
2305
2306int sock_no_getsockopt(struct socket *sock, int level, int optname,
2307 char __user *optval, int __user *optlen)
2308{
2309 return -EOPNOTSUPP;
2310}
2311EXPORT_SYMBOL(sock_no_getsockopt);
2312
2313int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2314{
2315 return -EOPNOTSUPP;
2316}
2317EXPORT_SYMBOL(sock_no_sendmsg);
2318
2319int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2320 int flags)
2321{
2322 return -EOPNOTSUPP;
2323}
2324EXPORT_SYMBOL(sock_no_recvmsg);
2325
2326int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2327{
2328
2329 return -ENODEV;
2330}
2331EXPORT_SYMBOL(sock_no_mmap);
2332
2333ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2334{
2335 ssize_t res;
2336 struct msghdr msg = {.msg_flags = flags};
2337 struct kvec iov;
2338 char *kaddr = kmap(page);
2339 iov.iov_base = kaddr + offset;
2340 iov.iov_len = size;
2341 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2342 kunmap(page);
2343 return res;
2344}
2345EXPORT_SYMBOL(sock_no_sendpage);
2346
2347
2348
2349
2350
2351static void sock_def_wakeup(struct sock *sk)
2352{
2353 struct socket_wq *wq;
2354
2355 rcu_read_lock();
2356 wq = rcu_dereference(sk->sk_wq);
2357 if (skwq_has_sleeper(wq))
2358 wake_up_interruptible_all(&wq->wait);
2359 rcu_read_unlock();
2360}
2361
2362static void sock_def_error_report(struct sock *sk)
2363{
2364 struct socket_wq *wq;
2365
2366 rcu_read_lock();
2367 wq = rcu_dereference(sk->sk_wq);
2368 if (skwq_has_sleeper(wq))
2369 wake_up_interruptible_poll(&wq->wait, POLLERR);
2370 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2371 rcu_read_unlock();
2372}
2373
2374static void sock_def_readable(struct sock *sk)
2375{
2376 struct socket_wq *wq;
2377
2378 rcu_read_lock();
2379 wq = rcu_dereference(sk->sk_wq);
2380 if (skwq_has_sleeper(wq))
2381 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2382 POLLRDNORM | POLLRDBAND);
2383 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2384 rcu_read_unlock();
2385}
2386
2387static void sock_def_write_space(struct sock *sk)
2388{
2389 struct socket_wq *wq;
2390
2391 rcu_read_lock();
2392
2393
2394
2395
2396 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2397 wq = rcu_dereference(sk->sk_wq);
2398 if (skwq_has_sleeper(wq))
2399 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2400 POLLWRNORM | POLLWRBAND);
2401
2402
2403 if (sock_writeable(sk))
2404 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2405 }
2406
2407 rcu_read_unlock();
2408}
2409
2410static void sock_def_destruct(struct sock *sk)
2411{
2412}
2413
2414void sk_send_sigurg(struct sock *sk)
2415{
2416 if (sk->sk_socket && sk->sk_socket->file)
2417 if (send_sigurg(&sk->sk_socket->file->f_owner))
2418 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2419}
2420EXPORT_SYMBOL(sk_send_sigurg);
2421
2422void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2423 unsigned long expires)
2424{
2425 if (!mod_timer(timer, expires))
2426 sock_hold(sk);
2427}
2428EXPORT_SYMBOL(sk_reset_timer);
2429
2430void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2431{
2432 if (del_timer(timer))
2433 __sock_put(sk);
2434}
2435EXPORT_SYMBOL(sk_stop_timer);
2436
2437void sock_init_data(struct socket *sock, struct sock *sk)
2438{
2439 skb_queue_head_init(&sk->sk_receive_queue);
2440 skb_queue_head_init(&sk->sk_write_queue);
2441 skb_queue_head_init(&sk->sk_error_queue);
2442
2443 sk->sk_send_head = NULL;
2444
2445 init_timer(&sk->sk_timer);
2446
2447 sk->sk_allocation = GFP_KERNEL;
2448 sk->sk_rcvbuf = sysctl_rmem_default;
2449 sk->sk_sndbuf = sysctl_wmem_default;
2450 sk->sk_state = TCP_CLOSE;
2451 sk_set_socket(sk, sock);
2452
2453 sock_set_flag(sk, SOCK_ZAPPED);
2454
2455 if (sock) {
2456 sk->sk_type = sock->type;
2457 sk->sk_wq = sock->wq;
2458 sock->sk = sk;
2459 } else
2460 sk->sk_wq = NULL;
2461
2462 rwlock_init(&sk->sk_callback_lock);
2463 lockdep_set_class_and_name(&sk->sk_callback_lock,
2464 af_callback_keys + sk->sk_family,
2465 af_family_clock_key_strings[sk->sk_family]);
2466
2467 sk->sk_state_change = sock_def_wakeup;
2468 sk->sk_data_ready = sock_def_readable;
2469 sk->sk_write_space = sock_def_write_space;
2470 sk->sk_error_report = sock_def_error_report;
2471 sk->sk_destruct = sock_def_destruct;
2472
2473 sk->sk_frag.page = NULL;
2474 sk->sk_frag.offset = 0;
2475 sk->sk_peek_off = -1;
2476
2477 sk->sk_peer_pid = NULL;
2478 sk->sk_peer_cred = NULL;
2479 sk->sk_write_pending = 0;
2480 sk->sk_rcvlowat = 1;
2481 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2482 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2483
2484 sk->sk_stamp = ktime_set(-1L, 0);
2485
2486#ifdef CONFIG_NET_RX_BUSY_POLL
2487 sk->sk_napi_id = 0;
2488 sk->sk_ll_usec = sysctl_net_busy_read;
2489#endif
2490
2491 sk->sk_max_pacing_rate = ~0U;
2492 sk->sk_pacing_rate = ~0U;
2493 sk->sk_incoming_cpu = -1;
2494
2495
2496
2497
2498 smp_wmb();
2499 atomic_set(&sk->sk_refcnt, 1);
2500 atomic_set(&sk->sk_drops, 0);
2501}
2502EXPORT_SYMBOL(sock_init_data);
2503
2504void lock_sock_nested(struct sock *sk, int subclass)
2505{
2506 might_sleep();
2507 spin_lock_bh(&sk->sk_lock.slock);
2508 if (sk->sk_lock.owned)
2509 __lock_sock(sk);
2510 sk->sk_lock.owned = 1;
2511 spin_unlock(&sk->sk_lock.slock);
2512
2513
2514
2515 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2516 local_bh_enable();
2517}
2518EXPORT_SYMBOL(lock_sock_nested);
2519
2520void release_sock(struct sock *sk)
2521{
2522 spin_lock_bh(&sk->sk_lock.slock);
2523 if (sk->sk_backlog.tail)
2524 __release_sock(sk);
2525
2526
2527
2528
2529 if (sk->sk_prot->release_cb)
2530 sk->sk_prot->release_cb(sk);
2531
2532 sock_release_ownership(sk);
2533 if (waitqueue_active(&sk->sk_lock.wq))
2534 wake_up(&sk->sk_lock.wq);
2535 spin_unlock_bh(&sk->sk_lock.slock);
2536}
2537EXPORT_SYMBOL(release_sock);
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549bool lock_sock_fast(struct sock *sk)
2550{
2551 might_sleep();
2552 spin_lock_bh(&sk->sk_lock.slock);
2553
2554 if (!sk->sk_lock.owned)
2555
2556
2557
2558 return false;
2559
2560 __lock_sock(sk);
2561 sk->sk_lock.owned = 1;
2562 spin_unlock(&sk->sk_lock.slock);
2563
2564
2565
2566 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2567 local_bh_enable();
2568 return true;
2569}
2570EXPORT_SYMBOL(lock_sock_fast);
2571
2572int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2573{
2574 struct timeval tv;
2575 if (!sock_flag(sk, SOCK_TIMESTAMP))
2576 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2577 tv = ktime_to_timeval(sk->sk_stamp);
2578 if (tv.tv_sec == -1)
2579 return -ENOENT;
2580 if (tv.tv_sec == 0) {
2581 sk->sk_stamp = ktime_get_real();
2582 tv = ktime_to_timeval(sk->sk_stamp);
2583 }
2584 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2585}
2586EXPORT_SYMBOL(sock_get_timestamp);
2587
2588int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2589{
2590 struct timespec ts;
2591 if (!sock_flag(sk, SOCK_TIMESTAMP))
2592 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2593 ts = ktime_to_timespec(sk->sk_stamp);
2594 if (ts.tv_sec == -1)
2595 return -ENOENT;
2596 if (ts.tv_sec == 0) {
2597 sk->sk_stamp = ktime_get_real();
2598 ts = ktime_to_timespec(sk->sk_stamp);
2599 }
2600 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2601}
2602EXPORT_SYMBOL(sock_get_timestampns);
2603
2604void sock_enable_timestamp(struct sock *sk, int flag)
2605{
2606 if (!sock_flag(sk, flag)) {
2607 unsigned long previous_flags = sk->sk_flags;
2608
2609 sock_set_flag(sk, flag);
2610
2611
2612
2613
2614
2615 if (sock_needs_netstamp(sk) &&
2616 !(previous_flags & SK_FLAGS_TIMESTAMP))
2617 net_enable_timestamp();
2618 }
2619}
2620
2621int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
2622 int level, int type)
2623{
2624 struct sock_exterr_skb *serr;
2625 struct sk_buff *skb;
2626 int copied, err;
2627
2628 err = -EAGAIN;
2629 skb = sock_dequeue_err_skb(sk);
2630 if (skb == NULL)
2631 goto out;
2632
2633 copied = skb->len;
2634 if (copied > len) {
2635 msg->msg_flags |= MSG_TRUNC;
2636 copied = len;
2637 }
2638 err = skb_copy_datagram_msg(skb, 0, msg, copied);
2639 if (err)
2640 goto out_free_skb;
2641
2642 sock_recv_timestamp(msg, sk, skb);
2643
2644 serr = SKB_EXT_ERR(skb);
2645 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
2646
2647 msg->msg_flags |= MSG_ERRQUEUE;
2648 err = copied;
2649
2650out_free_skb:
2651 kfree_skb(skb);
2652out:
2653 return err;
2654}
2655EXPORT_SYMBOL(sock_recv_errqueue);
2656
2657
2658
2659
2660
2661
2662
2663
2664int sock_common_getsockopt(struct socket *sock, int level, int optname,
2665 char __user *optval, int __user *optlen)
2666{
2667 struct sock *sk = sock->sk;
2668
2669 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2670}
2671EXPORT_SYMBOL(sock_common_getsockopt);
2672
2673#ifdef CONFIG_COMPAT
2674int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2675 char __user *optval, int __user *optlen)
2676{
2677 struct sock *sk = sock->sk;
2678
2679 if (sk->sk_prot->compat_getsockopt != NULL)
2680 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2681 optval, optlen);
2682 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2683}
2684EXPORT_SYMBOL(compat_sock_common_getsockopt);
2685#endif
2686
2687int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
2688 int flags)
2689{
2690 struct sock *sk = sock->sk;
2691 int addr_len = 0;
2692 int err;
2693
2694 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
2695 flags & ~MSG_DONTWAIT, &addr_len);
2696 if (err >= 0)
2697 msg->msg_namelen = addr_len;
2698 return err;
2699}
2700EXPORT_SYMBOL(sock_common_recvmsg);
2701
2702
2703
2704
2705int sock_common_setsockopt(struct socket *sock, int level, int optname,
2706 char __user *optval, unsigned int optlen)
2707{
2708 struct sock *sk = sock->sk;
2709
2710 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2711}
2712EXPORT_SYMBOL(sock_common_setsockopt);
2713
2714#ifdef CONFIG_COMPAT
2715int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2716 char __user *optval, unsigned int optlen)
2717{
2718 struct sock *sk = sock->sk;
2719
2720 if (sk->sk_prot->compat_setsockopt != NULL)
2721 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2722 optval, optlen);
2723 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2724}
2725EXPORT_SYMBOL(compat_sock_common_setsockopt);
2726#endif
2727
2728void sk_common_release(struct sock *sk)
2729{
2730 if (sk->sk_prot->destroy)
2731 sk->sk_prot->destroy(sk);
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741 sk->sk_prot->unhash(sk);
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755 sock_orphan(sk);
2756
2757 xfrm_sk_free_policy(sk);
2758
2759 sk_refcnt_debug_release(sk);
2760
2761 if (sk->sk_frag.page) {
2762 put_page(sk->sk_frag.page);
2763 sk->sk_frag.page = NULL;
2764 }
2765
2766 sock_put(sk);
2767}
2768EXPORT_SYMBOL(sk_common_release);
2769
2770#ifdef CONFIG_PROC_FS
2771#define PROTO_INUSE_NR 64
2772struct prot_inuse {
2773 int val[PROTO_INUSE_NR];
2774};
2775
2776static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2777
2778#ifdef CONFIG_NET_NS
2779void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2780{
2781 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2782}
2783EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2784
2785int sock_prot_inuse_get(struct net *net, struct proto *prot)
2786{
2787 int cpu, idx = prot->inuse_idx;
2788 int res = 0;
2789
2790 for_each_possible_cpu(cpu)
2791 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2792
2793 return res >= 0 ? res : 0;
2794}
2795EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2796
2797static int __net_init sock_inuse_init_net(struct net *net)
2798{
2799 net->core.inuse = alloc_percpu(struct prot_inuse);
2800 return net->core.inuse ? 0 : -ENOMEM;
2801}
2802
2803static void __net_exit sock_inuse_exit_net(struct net *net)
2804{
2805 free_percpu(net->core.inuse);
2806}
2807
2808static struct pernet_operations net_inuse_ops = {
2809 .init = sock_inuse_init_net,
2810 .exit = sock_inuse_exit_net,
2811};
2812
2813static __init int net_inuse_init(void)
2814{
2815 if (register_pernet_subsys(&net_inuse_ops))
2816 panic("Cannot initialize net inuse counters");
2817
2818 return 0;
2819}
2820
2821core_initcall(net_inuse_init);
2822#else
2823static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2824
2825void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2826{
2827 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2828}
2829EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2830
2831int sock_prot_inuse_get(struct net *net, struct proto *prot)
2832{
2833 int cpu, idx = prot->inuse_idx;
2834 int res = 0;
2835
2836 for_each_possible_cpu(cpu)
2837 res += per_cpu(prot_inuse, cpu).val[idx];
2838
2839 return res >= 0 ? res : 0;
2840}
2841EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2842#endif
2843
2844static void assign_proto_idx(struct proto *prot)
2845{
2846 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2847
2848 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2849 pr_err("PROTO_INUSE_NR exhausted\n");
2850 return;
2851 }
2852
2853 set_bit(prot->inuse_idx, proto_inuse_idx);
2854}
2855
2856static void release_proto_idx(struct proto *prot)
2857{
2858 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2859 clear_bit(prot->inuse_idx, proto_inuse_idx);
2860}
2861#else
2862static inline void assign_proto_idx(struct proto *prot)
2863{
2864}
2865
2866static inline void release_proto_idx(struct proto *prot)
2867{
2868}
2869#endif
2870
2871static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
2872{
2873 if (!rsk_prot)
2874 return;
2875 kfree(rsk_prot->slab_name);
2876 rsk_prot->slab_name = NULL;
2877 kmem_cache_destroy(rsk_prot->slab);
2878 rsk_prot->slab = NULL;
2879}
2880
2881static int req_prot_init(const struct proto *prot)
2882{
2883 struct request_sock_ops *rsk_prot = prot->rsk_prot;
2884
2885 if (!rsk_prot)
2886 return 0;
2887
2888 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
2889 prot->name);
2890 if (!rsk_prot->slab_name)
2891 return -ENOMEM;
2892
2893 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
2894 rsk_prot->obj_size, 0,
2895 prot->slab_flags, NULL);
2896
2897 if (!rsk_prot->slab) {
2898 pr_crit("%s: Can't create request sock SLAB cache!\n",
2899 prot->name);
2900 return -ENOMEM;
2901 }
2902 return 0;
2903}
2904
2905int proto_register(struct proto *prot, int alloc_slab)
2906{
2907 if (alloc_slab) {
2908 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2909 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2910 NULL);
2911
2912 if (prot->slab == NULL) {
2913 pr_crit("%s: Can't create sock SLAB cache!\n",
2914 prot->name);
2915 goto out;
2916 }
2917
2918 if (req_prot_init(prot))
2919 goto out_free_request_sock_slab;
2920
2921 if (prot->twsk_prot != NULL) {
2922 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2923
2924 if (prot->twsk_prot->twsk_slab_name == NULL)
2925 goto out_free_request_sock_slab;
2926
2927 prot->twsk_prot->twsk_slab =
2928 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2929 prot->twsk_prot->twsk_obj_size,
2930 0,
2931 prot->slab_flags,
2932 NULL);
2933 if (prot->twsk_prot->twsk_slab == NULL)
2934 goto out_free_timewait_sock_slab_name;
2935 }
2936 }
2937
2938 mutex_lock(&proto_list_mutex);
2939 list_add(&prot->node, &proto_list);
2940 assign_proto_idx(prot);
2941 mutex_unlock(&proto_list_mutex);
2942 return 0;
2943
2944out_free_timewait_sock_slab_name:
2945 kfree(prot->twsk_prot->twsk_slab_name);
2946out_free_request_sock_slab:
2947 req_prot_cleanup(prot->rsk_prot);
2948
2949 kmem_cache_destroy(prot->slab);
2950 prot->slab = NULL;
2951out:
2952 return -ENOBUFS;
2953}
2954EXPORT_SYMBOL(proto_register);
2955
2956void proto_unregister(struct proto *prot)
2957{
2958 mutex_lock(&proto_list_mutex);
2959 release_proto_idx(prot);
2960 list_del(&prot->node);
2961 mutex_unlock(&proto_list_mutex);
2962
2963 kmem_cache_destroy(prot->slab);
2964 prot->slab = NULL;
2965
2966 req_prot_cleanup(prot->rsk_prot);
2967
2968 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2969 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2970 kfree(prot->twsk_prot->twsk_slab_name);
2971 prot->twsk_prot->twsk_slab = NULL;
2972 }
2973}
2974EXPORT_SYMBOL(proto_unregister);
2975
2976#ifdef CONFIG_PROC_FS
2977static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2978 __acquires(proto_list_mutex)
2979{
2980 mutex_lock(&proto_list_mutex);
2981 return seq_list_start_head(&proto_list, *pos);
2982}
2983
2984static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2985{
2986 return seq_list_next(v, &proto_list, pos);
2987}
2988
2989static void proto_seq_stop(struct seq_file *seq, void *v)
2990 __releases(proto_list_mutex)
2991{
2992 mutex_unlock(&proto_list_mutex);
2993}
2994
2995static char proto_method_implemented(const void *method)
2996{
2997 return method == NULL ? 'n' : 'y';
2998}
2999static long sock_prot_memory_allocated(struct proto *proto)
3000{
3001 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3002}
3003
3004static char *sock_prot_memory_pressure(struct proto *proto)
3005{
3006 return proto->memory_pressure != NULL ?
3007 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3008}
3009
3010static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3011{
3012
3013 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
3014 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3015 proto->name,
3016 proto->obj_size,
3017 sock_prot_inuse_get(seq_file_net(seq), proto),
3018 sock_prot_memory_allocated(proto),
3019 sock_prot_memory_pressure(proto),
3020 proto->max_header,
3021 proto->slab == NULL ? "no" : "yes",
3022 module_name(proto->owner),
3023 proto_method_implemented(proto->close),
3024 proto_method_implemented(proto->connect),
3025 proto_method_implemented(proto->disconnect),
3026 proto_method_implemented(proto->accept),
3027 proto_method_implemented(proto->ioctl),
3028 proto_method_implemented(proto->init),
3029 proto_method_implemented(proto->destroy),
3030 proto_method_implemented(proto->shutdown),
3031 proto_method_implemented(proto->setsockopt),
3032 proto_method_implemented(proto->getsockopt),
3033 proto_method_implemented(proto->sendmsg),
3034 proto_method_implemented(proto->recvmsg),
3035 proto_method_implemented(proto->sendpage),
3036 proto_method_implemented(proto->bind),
3037 proto_method_implemented(proto->backlog_rcv),
3038 proto_method_implemented(proto->hash),
3039 proto_method_implemented(proto->unhash),
3040 proto_method_implemented(proto->get_port),
3041 proto_method_implemented(proto->enter_memory_pressure));
3042}
3043
3044static int proto_seq_show(struct seq_file *seq, void *v)
3045{
3046 if (v == &proto_list)
3047 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3048 "protocol",
3049 "size",
3050 "sockets",
3051 "memory",
3052 "press",
3053 "maxhdr",
3054 "slab",
3055 "module",
3056 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3057 else
3058 proto_seq_printf(seq, list_entry(v, struct proto, node));
3059 return 0;
3060}
3061
3062static const struct seq_operations proto_seq_ops = {
3063 .start = proto_seq_start,
3064 .next = proto_seq_next,
3065 .stop = proto_seq_stop,
3066 .show = proto_seq_show,
3067};
3068
3069static int proto_seq_open(struct inode *inode, struct file *file)
3070{
3071 return seq_open_net(inode, file, &proto_seq_ops,
3072 sizeof(struct seq_net_private));
3073}
3074
3075static const struct file_operations proto_seq_fops = {
3076 .owner = THIS_MODULE,
3077 .open = proto_seq_open,
3078 .read = seq_read,
3079 .llseek = seq_lseek,
3080 .release = seq_release_net,
3081};
3082
3083static __net_init int proto_init_net(struct net *net)
3084{
3085 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3086 return -ENOMEM;
3087
3088 return 0;
3089}
3090
3091static __net_exit void proto_exit_net(struct net *net)
3092{
3093 remove_proc_entry("protocols", net->proc_net);
3094}
3095
3096
3097static __net_initdata struct pernet_operations proto_net_ops = {
3098 .init = proto_init_net,
3099 .exit = proto_exit_net,
3100};
3101
3102static int __init proto_init(void)
3103{
3104 return register_pernet_subsys(&proto_net_ops);
3105}
3106
3107subsys_initcall(proto_init);
3108
3109#endif
3110