1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
93
94#include <linux/capability.h>
95#include <linux/errno.h>
96#include <linux/types.h>
97#include <linux/socket.h>
98#include <linux/in.h>
99#include <linux/kernel.h>
100#include <linux/module.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/sched.h>
104#include <linux/timer.h>
105#include <linux/string.h>
106#include <linux/sockios.h>
107#include <linux/net.h>
108#include <linux/mm.h>
109#include <linux/slab.h>
110#include <linux/interrupt.h>
111#include <linux/poll.h>
112#include <linux/tcp.h>
113#include <linux/init.h>
114#include <linux/highmem.h>
115#include <linux/user_namespace.h>
116#include <linux/static_key.h>
117#include <linux/memcontrol.h>
118#include <linux/prefetch.h>
119
120#include <asm/uaccess.h>
121
122#include <linux/netdevice.h>
123#include <net/protocol.h>
124#include <linux/skbuff.h>
125#include <net/net_namespace.h>
126#include <net/request_sock.h>
127#include <net/sock.h>
128#include <linux/net_tstamp.h>
129#include <net/xfrm.h>
130#include <linux/ipsec.h>
131#include <net/cls_cgroup.h>
132#include <net/netprio_cgroup.h>
133
134#include <linux/filter.h>
135
136#include <trace/events/sock.h>
137
138#include <net/tcp.h>
139#include <net/busy_poll.h>
140
141static DEFINE_MUTEX(proto_list_mutex);
142static LIST_HEAD(proto_list);
143
144
145
146
147
148
149
150
151
152
153
154bool sk_ns_capable(const struct sock *sk,
155 struct user_namespace *user_ns, int cap)
156{
157 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
158 ns_capable(user_ns, cap);
159}
160EXPORT_SYMBOL(sk_ns_capable);
161
162
163
164
165
166
167
168
169
170
171bool sk_capable(const struct sock *sk, int cap)
172{
173 return sk_ns_capable(sk, &init_user_ns, cap);
174}
175EXPORT_SYMBOL(sk_capable);
176
177
178
179
180
181
182
183
184
185
186bool sk_net_capable(const struct sock *sk, int cap)
187{
188 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
189}
190EXPORT_SYMBOL(sk_net_capable);
191
192
193#ifdef CONFIG_MEMCG_KMEM
194int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
195{
196 struct proto *proto;
197 int ret = 0;
198
199 mutex_lock(&proto_list_mutex);
200 list_for_each_entry(proto, &proto_list, node) {
201 if (proto->init_cgroup) {
202 ret = proto->init_cgroup(memcg, ss);
203 if (ret)
204 goto out;
205 }
206 }
207
208 mutex_unlock(&proto_list_mutex);
209 return ret;
210out:
211 list_for_each_entry_continue_reverse(proto, &proto_list, node)
212 if (proto->destroy_cgroup)
213 proto->destroy_cgroup(memcg);
214 mutex_unlock(&proto_list_mutex);
215 return ret;
216}
217
218void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
219{
220 struct proto *proto;
221
222 mutex_lock(&proto_list_mutex);
223 list_for_each_entry_reverse(proto, &proto_list, node)
224 if (proto->destroy_cgroup)
225 proto->destroy_cgroup(memcg);
226 mutex_unlock(&proto_list_mutex);
227}
228#endif
229
230
231
232
233
234static struct lock_class_key af_family_keys[AF_MAX];
235static struct lock_class_key af_family_slock_keys[AF_MAX];
236
237#if defined(CONFIG_MEMCG_KMEM)
238struct static_key memcg_socket_limit_enabled;
239EXPORT_SYMBOL(memcg_socket_limit_enabled);
240#endif
241
242
243
244
245
246
247static const char *const af_family_key_strings[AF_MAX+1] = {
248 "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX" , "sk_lock-AF_INET" ,
249 "sk_lock-AF_AX25" , "sk_lock-AF_IPX" , "sk_lock-AF_APPLETALK",
250 "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE" , "sk_lock-AF_ATMPVC" ,
251 "sk_lock-AF_X25" , "sk_lock-AF_INET6" , "sk_lock-AF_ROSE" ,
252 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
253 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
254 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
255 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
256 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
257 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
258 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
259 "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" ,
260 "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" ,
261 "sk_lock-AF_NFC" , "sk_lock-AF_VSOCK" , "sk_lock-AF_MAX"
262};
263static const char *const af_family_slock_key_strings[AF_MAX+1] = {
264 "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" ,
265 "slock-AF_AX25" , "slock-AF_IPX" , "slock-AF_APPLETALK",
266 "slock-AF_NETROM", "slock-AF_BRIDGE" , "slock-AF_ATMPVC" ,
267 "slock-AF_X25" , "slock-AF_INET6" , "slock-AF_ROSE" ,
268 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
269 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
270 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
271 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
272 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
273 "slock-27" , "slock-28" , "slock-AF_CAN" ,
274 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
275 "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" ,
276 "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" ,
277 "slock-AF_NFC" , "slock-AF_VSOCK" ,"slock-AF_MAX"
278};
279static const char *const af_family_clock_key_strings[AF_MAX+1] = {
280 "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" ,
281 "clock-AF_AX25" , "clock-AF_IPX" , "clock-AF_APPLETALK",
282 "clock-AF_NETROM", "clock-AF_BRIDGE" , "clock-AF_ATMPVC" ,
283 "clock-AF_X25" , "clock-AF_INET6" , "clock-AF_ROSE" ,
284 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
285 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
286 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
287 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
288 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
289 "clock-27" , "clock-28" , "clock-AF_CAN" ,
290 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
291 "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" ,
292 "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" ,
293 "clock-AF_NFC" , "clock-AF_VSOCK" , "clock-AF_MAX"
294};
295
296
297
298
299
300static struct lock_class_key af_callback_keys[AF_MAX];
301
302
303
304
305
306
307#define _SK_MEM_PACKETS 256
308#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256)
309#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
310#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS)
311
312
313__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
314EXPORT_SYMBOL(sysctl_wmem_max);
315__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
316EXPORT_SYMBOL(sysctl_rmem_max);
317__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
318__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
319
320
321int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
322EXPORT_SYMBOL(sysctl_optmem_max);
323
324struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
325EXPORT_SYMBOL_GPL(memalloc_socks);
326
327
328
329
330
331
332
333
334
335void sk_set_memalloc(struct sock *sk)
336{
337 sock_set_flag(sk, SOCK_MEMALLOC);
338 sk->sk_allocation |= __GFP_MEMALLOC;
339 static_key_slow_inc(&memalloc_socks);
340}
341EXPORT_SYMBOL_GPL(sk_set_memalloc);
342
343void sk_clear_memalloc(struct sock *sk)
344{
345 sock_reset_flag(sk, SOCK_MEMALLOC);
346 sk->sk_allocation &= ~__GFP_MEMALLOC;
347 static_key_slow_dec(&memalloc_socks);
348
349
350
351
352
353
354
355
356
357
358 if (WARN_ON(sk->sk_forward_alloc))
359 sk_mem_reclaim(sk);
360}
361EXPORT_SYMBOL_GPL(sk_clear_memalloc);
362
363int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
364{
365 int ret;
366 unsigned long pflags = current->flags;
367
368
369 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
370
371 current->flags |= PF_MEMALLOC;
372 ret = sk->sk_backlog_rcv(sk, skb);
373 tsk_restore_flags(current, pflags, PF_MEMALLOC);
374
375 return ret;
376}
377EXPORT_SYMBOL(__sk_backlog_rcv);
378
379static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
380{
381 struct timeval tv;
382
383 if (optlen < sizeof(tv))
384 return -EINVAL;
385 if (copy_from_user(&tv, optval, sizeof(tv)))
386 return -EFAULT;
387 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
388 return -EDOM;
389
390 if (tv.tv_sec < 0) {
391 static int warned __read_mostly;
392
393 *timeo_p = 0;
394 if (warned < 10 && net_ratelimit()) {
395 warned++;
396 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
397 __func__, current->comm, task_pid_nr(current));
398 }
399 return 0;
400 }
401 *timeo_p = MAX_SCHEDULE_TIMEOUT;
402 if (tv.tv_sec == 0 && tv.tv_usec == 0)
403 return 0;
404 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT/HZ - 1))
405 *timeo_p = tv.tv_sec*HZ + (tv.tv_usec+(1000000/HZ-1))/(1000000/HZ);
406 return 0;
407}
408
409static void sock_warn_obsolete_bsdism(const char *name)
410{
411 static int warned;
412 static char warncomm[TASK_COMM_LEN];
413 if (strcmp(warncomm, current->comm) && warned < 5) {
414 strcpy(warncomm, current->comm);
415 pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n",
416 warncomm, name);
417 warned++;
418 }
419}
420
421static bool sock_needs_netstamp(const struct sock *sk)
422{
423 switch (sk->sk_family) {
424 case AF_UNSPEC:
425 case AF_UNIX:
426 return false;
427 default:
428 return true;
429 }
430}
431
432static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
433{
434 if (sk->sk_flags & flags) {
435 sk->sk_flags &= ~flags;
436 if (sock_needs_netstamp(sk) &&
437 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
438 net_disable_timestamp();
439 }
440}
441
442
443int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
444{
445 int skb_len;
446 unsigned long flags;
447 struct sk_buff_head *list = &sk->sk_receive_queue;
448
449 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
450 atomic_inc(&sk->sk_drops);
451 trace_sock_rcvqueue_full(sk, skb);
452 return -ENOMEM;
453 }
454
455 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
456 atomic_inc(&sk->sk_drops);
457 return -ENOBUFS;
458 }
459
460 skb->dev = NULL;
461 skb_set_owner_r(skb, sk);
462
463
464
465
466
467
468 skb_len = skb->len;
469
470
471
472
473 skb_dst_force(skb);
474
475 spin_lock_irqsave(&list->lock, flags);
476 sock_skb_set_dropcount(sk, skb);
477 __skb_queue_tail(list, skb);
478 spin_unlock_irqrestore(&list->lock, flags);
479
480 if (!sock_flag(sk, SOCK_DEAD))
481 sk->sk_data_ready(sk, skb_len);
482 return 0;
483}
484EXPORT_SYMBOL(__sock_queue_rcv_skb);
485
486int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
487{
488 int err;
489
490 err = sk_filter(sk, skb);
491 if (err)
492 return err;
493
494 return __sock_queue_rcv_skb(sk, skb);
495}
496EXPORT_SYMBOL(sock_queue_rcv_skb);
497
498int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
499{
500 int rc = NET_RX_SUCCESS;
501
502 if (sk_filter(sk, skb))
503 goto discard_and_relse;
504
505 skb->dev = NULL;
506
507 if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) {
508 atomic_inc(&sk->sk_drops);
509 goto discard_and_relse;
510 }
511 if (nested)
512 bh_lock_sock_nested(sk);
513 else
514 bh_lock_sock(sk);
515 if (!sock_owned_by_user(sk)) {
516
517
518
519 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
520
521 rc = sk_backlog_rcv(sk, skb);
522
523 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_);
524 } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) {
525 bh_unlock_sock(sk);
526 atomic_inc(&sk->sk_drops);
527 goto discard_and_relse;
528 }
529
530 bh_unlock_sock(sk);
531out:
532 sock_put(sk);
533 return rc;
534discard_and_relse:
535 kfree_skb(skb);
536 goto out;
537}
538EXPORT_SYMBOL(sk_receive_skb);
539
540void sk_reset_txq(struct sock *sk)
541{
542 sk_tx_queue_clear(sk);
543}
544EXPORT_SYMBOL(sk_reset_txq);
545
546struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
547{
548 struct dst_entry *dst = __sk_dst_get(sk);
549
550 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
551 sk_tx_queue_clear(sk);
552 sk->sk_dst_pending_confirm = 0;
553 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
554 dst_release(dst);
555 return NULL;
556 }
557
558 return dst;
559}
560EXPORT_SYMBOL(__sk_dst_check);
561
562struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
563{
564 struct dst_entry *dst = sk_dst_get(sk);
565
566 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
567 sk_dst_reset(sk);
568 dst_release(dst);
569 return NULL;
570 }
571
572 return dst;
573}
574EXPORT_SYMBOL(sk_dst_check);
575
576static int sock_setbindtodevice(struct sock *sk, char __user *optval,
577 int optlen)
578{
579 int ret = -ENOPROTOOPT;
580#ifdef CONFIG_NETDEVICES
581 struct net *net = sock_net(sk);
582 char devname[IFNAMSIZ];
583 int index;
584
585
586 ret = -EPERM;
587 if (!ns_capable(net->user_ns, CAP_NET_RAW))
588 goto out;
589
590 ret = -EINVAL;
591 if (optlen < 0)
592 goto out;
593
594
595
596
597
598
599 if (optlen > IFNAMSIZ - 1)
600 optlen = IFNAMSIZ - 1;
601 memset(devname, 0, sizeof(devname));
602
603 ret = -EFAULT;
604 if (copy_from_user(devname, optval, optlen))
605 goto out;
606
607 index = 0;
608 if (devname[0] != '\0') {
609 struct net_device *dev;
610
611 rcu_read_lock();
612 dev = dev_get_by_name_rcu(net, devname);
613 if (dev)
614 index = dev->ifindex;
615 rcu_read_unlock();
616 ret = -ENODEV;
617 if (!dev)
618 goto out;
619 }
620
621 lock_sock(sk);
622 sk->sk_bound_dev_if = index;
623 sk_dst_reset(sk);
624 release_sock(sk);
625
626 ret = 0;
627
628out:
629#endif
630
631 return ret;
632}
633
634static int sock_getbindtodevice(struct sock *sk, char __user *optval,
635 int __user *optlen, int len)
636{
637 int ret = -ENOPROTOOPT;
638#ifdef CONFIG_NETDEVICES
639 struct net *net = sock_net(sk);
640 char devname[IFNAMSIZ];
641
642 if (sk->sk_bound_dev_if == 0) {
643 len = 0;
644 goto zero;
645 }
646
647 ret = -EINVAL;
648 if (len < IFNAMSIZ)
649 goto out;
650
651 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
652 if (ret)
653 goto out;
654
655 len = strlen(devname) + 1;
656
657 ret = -EFAULT;
658 if (copy_to_user(optval, devname, len))
659 goto out;
660
661zero:
662 ret = -EFAULT;
663 if (put_user(len, optlen))
664 goto out;
665
666 ret = 0;
667
668out:
669#endif
670
671 return ret;
672}
673
674static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
675{
676 if (valbool)
677 sock_set_flag(sk, bit);
678 else
679 sock_reset_flag(sk, bit);
680}
681
682bool sk_mc_loop(struct sock *sk)
683{
684 if (dev_recursion_level())
685 return false;
686 if (!sk)
687 return true;
688 switch (sk->sk_family) {
689 case AF_INET:
690 return inet_sk(sk)->mc_loop;
691#if IS_ENABLED(CONFIG_IPV6)
692 case AF_INET6:
693 return inet6_sk(sk)->mc_loop;
694#endif
695 }
696 WARN_ON(1);
697 return true;
698}
699EXPORT_SYMBOL(sk_mc_loop);
700
701
702
703
704
705
706int sock_setsockopt(struct socket *sock, int level, int optname,
707 char __user *optval, unsigned int optlen)
708{
709 struct sock *sk = sock->sk;
710 int val;
711 int valbool;
712 struct linger ling;
713 int ret = 0;
714
715
716
717
718
719 if (optname == SO_BINDTODEVICE)
720 return sock_setbindtodevice(sk, optval, optlen);
721
722 if (optlen < sizeof(int))
723 return -EINVAL;
724
725 if (get_user(val, (int __user *)optval))
726 return -EFAULT;
727
728 valbool = val ? 1 : 0;
729
730 lock_sock(sk);
731
732 switch (optname) {
733 case SO_DEBUG:
734 if (val && !capable(CAP_NET_ADMIN))
735 ret = -EACCES;
736 else
737 sock_valbool_flag(sk, SOCK_DBG, valbool);
738 break;
739 case SO_REUSEADDR:
740 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
741 break;
742 case SO_REUSEPORT:
743 sk->sk_reuseport = valbool;
744 break;
745 case SO_TYPE:
746 case SO_PROTOCOL:
747 case SO_DOMAIN:
748 case SO_ERROR:
749 ret = -ENOPROTOOPT;
750 break;
751 case SO_DONTROUTE:
752 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
753 break;
754 case SO_BROADCAST:
755 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
756 break;
757 case SO_SNDBUF:
758
759
760
761
762
763 val = min_t(u32, val, sysctl_wmem_max);
764set_sndbuf:
765
766
767
768 val = min_t(int, val, INT_MAX / 2);
769 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
770 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
771
772 sk->sk_write_space(sk);
773 break;
774
775 case SO_SNDBUFFORCE:
776 if (!capable(CAP_NET_ADMIN)) {
777 ret = -EPERM;
778 break;
779 }
780
781
782
783
784 if (val < 0)
785 val = 0;
786 goto set_sndbuf;
787
788 case SO_RCVBUF:
789
790
791
792
793
794 val = min_t(u32, val, sysctl_rmem_max);
795set_rcvbuf:
796
797
798
799 val = min_t(int, val, INT_MAX / 2);
800 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
817 break;
818
819 case SO_RCVBUFFORCE:
820 if (!capable(CAP_NET_ADMIN)) {
821 ret = -EPERM;
822 break;
823 }
824
825
826
827
828 if (val < 0)
829 val = 0;
830 goto set_rcvbuf;
831
832 case SO_KEEPALIVE:
833#ifdef CONFIG_INET
834 if (sk->sk_protocol == IPPROTO_TCP &&
835 sk->sk_type == SOCK_STREAM)
836 tcp_set_keepalive(sk, valbool);
837#endif
838 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
839 break;
840
841 case SO_OOBINLINE:
842 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
843 break;
844
845 case SO_NO_CHECK:
846 sk->sk_no_check_tx = valbool;
847 break;
848
849 case SO_PRIORITY:
850 if ((val >= 0 && val <= 6) ||
851 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
852 sk->sk_priority = val;
853 else
854 ret = -EPERM;
855 break;
856
857 case SO_LINGER:
858 if (optlen < sizeof(ling)) {
859 ret = -EINVAL;
860 break;
861 }
862 if (copy_from_user(&ling, optval, sizeof(ling))) {
863 ret = -EFAULT;
864 break;
865 }
866 if (!ling.l_onoff)
867 sock_reset_flag(sk, SOCK_LINGER);
868 else {
869#if (BITS_PER_LONG == 32)
870 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
871 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
872 else
873#endif
874 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
875 sock_set_flag(sk, SOCK_LINGER);
876 }
877 break;
878
879 case SO_BSDCOMPAT:
880 sock_warn_obsolete_bsdism("setsockopt");
881 break;
882
883 case SO_PASSCRED:
884 if (valbool)
885 set_bit(SOCK_PASSCRED, &sock->flags);
886 else
887 clear_bit(SOCK_PASSCRED, &sock->flags);
888 break;
889
890 case SO_TIMESTAMP:
891 case SO_TIMESTAMPNS:
892 if (valbool) {
893 if (optname == SO_TIMESTAMP)
894 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
895 else
896 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
897 sock_set_flag(sk, SOCK_RCVTSTAMP);
898 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
899 } else {
900 sock_reset_flag(sk, SOCK_RCVTSTAMP);
901 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
902 }
903 break;
904
905 case SO_TIMESTAMPING:
906 if (val & ~SOF_TIMESTAMPING_MASK ||
907 val & __RH_RESERVED_SOF_TIMESTAMPING_OPT_ID ||
908 val & __RH_RESERVED_SOF_TIMESTAMPING_TX_SCHED ||
909 val & __RH_RESERVED_SOF_TIMESTAMPING_TX_ACK ||
910 val & __RH_RESERVED_SOF_TIMESTAMPING_OPT_TSONLY ||
911 val & __RH_RESERVED_SOF_TIMESTAMPING_OPT_STATS) {
912 ret = -EINVAL;
913 break;
914 }
915 sk->sk_tsflags = val;
916 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
917 sock_enable_timestamp(sk,
918 SOCK_TIMESTAMPING_RX_SOFTWARE);
919 else
920 sock_disable_timestamp(sk,
921 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
922 break;
923
924 case SO_RCVLOWAT:
925 if (val < 0)
926 val = INT_MAX;
927 sk->sk_rcvlowat = val ? : 1;
928 break;
929
930 case SO_RCVTIMEO:
931 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen);
932 break;
933
934 case SO_SNDTIMEO:
935 ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen);
936 break;
937
938 case SO_ATTACH_FILTER:
939 ret = -EINVAL;
940 if (optlen == sizeof(struct sock_fprog)) {
941 struct sock_fprog fprog;
942
943 ret = -EFAULT;
944 if (copy_from_user(&fprog, optval, sizeof(fprog)))
945 break;
946
947 ret = sk_attach_filter(&fprog, sk);
948 }
949 break;
950
951 case SO_DETACH_FILTER:
952 ret = sk_detach_filter(sk);
953 break;
954
955 case SO_LOCK_FILTER:
956 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
957 ret = -EPERM;
958 else
959 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
960 break;
961
962 case SO_PASSSEC:
963 if (valbool)
964 set_bit(SOCK_PASSSEC, &sock->flags);
965 else
966 clear_bit(SOCK_PASSSEC, &sock->flags);
967 break;
968 case SO_MARK:
969 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
970 ret = -EPERM;
971 else
972 sk->sk_mark = val;
973 break;
974
975
976
977 case SO_RXQ_OVFL:
978 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
979 break;
980
981 case SO_WIFI_STATUS:
982 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
983 break;
984
985 case SO_PEEK_OFF:
986 if (sock->ops->set_peek_off)
987 ret = sock->ops->set_peek_off(sk, val);
988 else
989 ret = -EOPNOTSUPP;
990 break;
991
992 case SO_NOFCS:
993 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
994 break;
995
996 case SO_SELECT_ERR_QUEUE:
997 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
998 break;
999
1000#ifdef CONFIG_NET_RX_BUSY_POLL
1001 case SO_BUSY_POLL:
1002
1003 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1004 ret = -EPERM;
1005 else {
1006 if (val < 0)
1007 ret = -EINVAL;
1008 else
1009 sk->sk_ll_usec = val;
1010 }
1011 break;
1012#endif
1013
1014 case SO_MAX_PACING_RATE:
1015 sk->sk_max_pacing_rate = val;
1016 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
1017 sk->sk_max_pacing_rate);
1018 break;
1019
1020 default:
1021 ret = -ENOPROTOOPT;
1022 break;
1023 }
1024 release_sock(sk);
1025 return ret;
1026}
1027EXPORT_SYMBOL(sock_setsockopt);
1028
1029
1030void cred_to_ucred(struct pid *pid, const struct cred *cred,
1031 struct ucred *ucred)
1032{
1033 ucred->pid = pid_vnr(pid);
1034 ucred->uid = ucred->gid = -1;
1035 if (cred) {
1036 struct user_namespace *current_ns = current_user_ns();
1037
1038 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1039 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1040 }
1041}
1042EXPORT_SYMBOL_GPL(cred_to_ucred);
1043
1044int sock_getsockopt(struct socket *sock, int level, int optname,
1045 char __user *optval, int __user *optlen)
1046{
1047 struct sock *sk = sock->sk;
1048
1049 union {
1050 int val;
1051 struct linger ling;
1052 struct timeval tm;
1053 } v;
1054
1055 int lv = sizeof(int);
1056 int len;
1057
1058 if (get_user(len, optlen))
1059 return -EFAULT;
1060 if (len < 0)
1061 return -EINVAL;
1062
1063 memset(&v, 0, sizeof(v));
1064
1065 switch (optname) {
1066 case SO_DEBUG:
1067 v.val = sock_flag(sk, SOCK_DBG);
1068 break;
1069
1070 case SO_DONTROUTE:
1071 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1072 break;
1073
1074 case SO_BROADCAST:
1075 v.val = sock_flag(sk, SOCK_BROADCAST);
1076 break;
1077
1078 case SO_SNDBUF:
1079 v.val = sk->sk_sndbuf;
1080 break;
1081
1082 case SO_RCVBUF:
1083 v.val = sk->sk_rcvbuf;
1084 break;
1085
1086 case SO_REUSEADDR:
1087 v.val = sk->sk_reuse;
1088 break;
1089
1090 case SO_REUSEPORT:
1091 v.val = sk->sk_reuseport;
1092 break;
1093
1094 case SO_KEEPALIVE:
1095 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1096 break;
1097
1098 case SO_TYPE:
1099 v.val = sk->sk_type;
1100 break;
1101
1102 case SO_PROTOCOL:
1103 v.val = sk->sk_protocol;
1104 break;
1105
1106 case SO_DOMAIN:
1107 v.val = sk->sk_family;
1108 break;
1109
1110 case SO_ERROR:
1111 v.val = -sock_error(sk);
1112 if (v.val == 0)
1113 v.val = xchg(&sk->sk_err_soft, 0);
1114 break;
1115
1116 case SO_OOBINLINE:
1117 v.val = sock_flag(sk, SOCK_URGINLINE);
1118 break;
1119
1120 case SO_NO_CHECK:
1121 v.val = sk->sk_no_check_tx;
1122 break;
1123
1124 case SO_PRIORITY:
1125 v.val = sk->sk_priority;
1126 break;
1127
1128 case SO_LINGER:
1129 lv = sizeof(v.ling);
1130 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1131 v.ling.l_linger = sk->sk_lingertime / HZ;
1132 break;
1133
1134 case SO_BSDCOMPAT:
1135 sock_warn_obsolete_bsdism("getsockopt");
1136 break;
1137
1138 case SO_TIMESTAMP:
1139 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1140 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1141 break;
1142
1143 case SO_TIMESTAMPNS:
1144 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
1145 break;
1146
1147 case SO_TIMESTAMPING:
1148 v.val = sk->sk_tsflags;
1149 break;
1150
1151 case SO_RCVTIMEO:
1152 lv = sizeof(struct timeval);
1153 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
1154 v.tm.tv_sec = 0;
1155 v.tm.tv_usec = 0;
1156 } else {
1157 v.tm.tv_sec = sk->sk_rcvtimeo / HZ;
1158 v.tm.tv_usec = ((sk->sk_rcvtimeo % HZ) * 1000000) / HZ;
1159 }
1160 break;
1161
1162 case SO_SNDTIMEO:
1163 lv = sizeof(struct timeval);
1164 if (sk->sk_sndtimeo == MAX_SCHEDULE_TIMEOUT) {
1165 v.tm.tv_sec = 0;
1166 v.tm.tv_usec = 0;
1167 } else {
1168 v.tm.tv_sec = sk->sk_sndtimeo / HZ;
1169 v.tm.tv_usec = ((sk->sk_sndtimeo % HZ) * 1000000) / HZ;
1170 }
1171 break;
1172
1173 case SO_RCVLOWAT:
1174 v.val = sk->sk_rcvlowat;
1175 break;
1176
1177 case SO_SNDLOWAT:
1178 v.val = 1;
1179 break;
1180
1181 case SO_PASSCRED:
1182 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1183 break;
1184
1185 case SO_PEERCRED:
1186 {
1187 struct ucred peercred;
1188 if (len > sizeof(peercred))
1189 len = sizeof(peercred);
1190 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1191 if (copy_to_user(optval, &peercred, len))
1192 return -EFAULT;
1193 goto lenout;
1194 }
1195
1196 case SO_PEERNAME:
1197 {
1198 char address[128];
1199
1200 if (sock->ops->getname(sock, (struct sockaddr *)address, &lv, 2))
1201 return -ENOTCONN;
1202 if (lv < len)
1203 return -EINVAL;
1204 if (copy_to_user(optval, address, len))
1205 return -EFAULT;
1206 goto lenout;
1207 }
1208
1209
1210
1211
1212 case SO_ACCEPTCONN:
1213 v.val = sk->sk_state == TCP_LISTEN;
1214 break;
1215
1216 case SO_PASSSEC:
1217 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1218 break;
1219
1220 case SO_PEERSEC:
1221 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1222
1223 case SO_MARK:
1224 v.val = sk->sk_mark;
1225 break;
1226
1227 case SO_RXQ_OVFL:
1228 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1229 break;
1230
1231 case SO_WIFI_STATUS:
1232 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1233 break;
1234
1235 case SO_PEEK_OFF:
1236 if (!sock->ops->set_peek_off)
1237 return -EOPNOTSUPP;
1238
1239 v.val = sk->sk_peek_off;
1240 break;
1241 case SO_NOFCS:
1242 v.val = sock_flag(sk, SOCK_NOFCS);
1243 break;
1244
1245 case SO_BINDTODEVICE:
1246 return sock_getbindtodevice(sk, optval, optlen, len);
1247
1248 case SO_GET_FILTER:
1249 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1250 if (len < 0)
1251 return len;
1252
1253 goto lenout;
1254
1255 case SO_LOCK_FILTER:
1256 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1257 break;
1258
1259 case SO_BPF_EXTENSIONS:
1260 v.val = bpf_tell_extensions();
1261 break;
1262
1263 case SO_SELECT_ERR_QUEUE:
1264 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1265 break;
1266
1267#ifdef CONFIG_NET_RX_BUSY_POLL
1268 case SO_BUSY_POLL:
1269 v.val = sk->sk_ll_usec;
1270 break;
1271#endif
1272
1273 case SO_MAX_PACING_RATE:
1274 v.val = sk->sk_max_pacing_rate;
1275 break;
1276
1277 default:
1278 return -ENOPROTOOPT;
1279 }
1280
1281 if (len > lv)
1282 len = lv;
1283 if (copy_to_user(optval, &v, len))
1284 return -EFAULT;
1285lenout:
1286 if (put_user(len, optlen))
1287 return -EFAULT;
1288 return 0;
1289}
1290
1291
1292
1293
1294
1295
1296static inline void sock_lock_init(struct sock *sk)
1297{
1298 sock_lock_init_class_and_name(sk,
1299 af_family_slock_key_strings[sk->sk_family],
1300 af_family_slock_keys + sk->sk_family,
1301 af_family_key_strings[sk->sk_family],
1302 af_family_keys + sk->sk_family);
1303}
1304
1305
1306
1307
1308
1309
1310static void sock_copy(struct sock *nsk, const struct sock *osk)
1311{
1312#ifdef CONFIG_SECURITY_NETWORK
1313 void *sptr = nsk->sk_security;
1314#endif
1315 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1316
1317 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1318 osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1319
1320#ifdef CONFIG_SECURITY_NETWORK
1321 nsk->sk_security = sptr;
1322 security_sk_clone(osk, nsk);
1323#endif
1324}
1325
1326void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
1327{
1328 unsigned long nulls1, nulls2;
1329
1330 nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
1331 nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
1332 if (nulls1 > nulls2)
1333 swap(nulls1, nulls2);
1334
1335 if (nulls1 != 0)
1336 memset((char *)sk, 0, nulls1);
1337 memset((char *)sk + nulls1 + sizeof(void *), 0,
1338 nulls2 - nulls1 - sizeof(void *));
1339 memset((char *)sk + nulls2 + sizeof(void *), 0,
1340 size - nulls2 - sizeof(void *));
1341}
1342EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
1343
1344static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1345 int family)
1346{
1347 struct sock *sk;
1348 struct kmem_cache *slab;
1349
1350 slab = prot->slab;
1351 if (slab != NULL) {
1352 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1353 if (!sk)
1354 return sk;
1355 if (priority & __GFP_ZERO) {
1356 if (prot->clear_sk)
1357 prot->clear_sk(sk, prot->obj_size);
1358 else
1359 sk_prot_clear_nulls(sk, prot->obj_size);
1360 }
1361 } else
1362 sk = kmalloc(prot->obj_size, priority);
1363
1364 if (sk != NULL) {
1365 kmemcheck_annotate_bitfield(sk, flags);
1366
1367 if (security_sk_alloc(sk, family, priority))
1368 goto out_free;
1369
1370 if (!try_module_get(prot->owner))
1371 goto out_free_sec;
1372 sk_tx_queue_clear(sk);
1373 }
1374
1375 return sk;
1376
1377out_free_sec:
1378 security_sk_free(sk);
1379out_free:
1380 if (slab != NULL)
1381 kmem_cache_free(slab, sk);
1382 else
1383 kfree(sk);
1384 return NULL;
1385}
1386
1387static void sk_prot_free(struct proto *prot, struct sock *sk)
1388{
1389 struct kmem_cache *slab;
1390 struct module *owner;
1391
1392 owner = prot->owner;
1393 slab = prot->slab;
1394
1395 security_sk_free(sk);
1396 if (slab != NULL)
1397 kmem_cache_free(slab, sk);
1398 else
1399 kfree(sk);
1400 module_put(owner);
1401}
1402
1403#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
1404void sock_update_classid(struct sock *sk)
1405{
1406 u32 classid;
1407
1408 classid = task_cls_classid(current);
1409 if (classid != sk->sk_classid)
1410 sk->sk_classid = classid;
1411}
1412EXPORT_SYMBOL(sock_update_classid);
1413#endif
1414
1415#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
1416void sock_update_netprioidx(struct sock *sk)
1417{
1418 if (in_interrupt())
1419 return;
1420
1421 sk->sk_cgrp_prioidx = task_netprioidx(current);
1422}
1423EXPORT_SYMBOL_GPL(sock_update_netprioidx);
1424#endif
1425
1426
1427
1428
1429
1430
1431
1432
1433struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1434 struct proto *prot)
1435{
1436 struct sock *sk;
1437
1438 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1439 if (sk) {
1440 sk->sk_family = family;
1441
1442
1443
1444
1445 sk->sk_prot = sk->sk_prot_creator = prot;
1446 sock_lock_init(sk);
1447 sock_net_set(sk, get_net(net));
1448 atomic_set(&sk->sk_wmem_alloc, 1);
1449
1450 sock_update_classid(sk);
1451 sock_update_netprioidx(sk);
1452 }
1453
1454 return sk;
1455}
1456EXPORT_SYMBOL(sk_alloc);
1457
1458
1459
1460
1461static void __sk_destruct(struct rcu_head *head)
1462{
1463 struct sock *sk = container_of(head, struct sock, sk_rcu);
1464 struct sk_filter *filter;
1465
1466 if (sk->sk_destruct)
1467 sk->sk_destruct(sk);
1468
1469 filter = rcu_dereference_check(sk->sk_filter,
1470 atomic_read(&sk->sk_wmem_alloc) == 0);
1471 if (filter) {
1472 sk_filter_uncharge(sk, filter);
1473 RCU_INIT_POINTER(sk->sk_filter, NULL);
1474 }
1475
1476 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1477
1478 if (atomic_read(&sk->sk_omem_alloc))
1479 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1480 __func__, atomic_read(&sk->sk_omem_alloc));
1481
1482 if (sk->sk_frag.page) {
1483 put_page(sk->sk_frag.page);
1484 sk->sk_frag.page = NULL;
1485 }
1486
1487 if (sk->sk_peer_cred)
1488 put_cred(sk->sk_peer_cred);
1489 put_pid(sk->sk_peer_pid);
1490 put_net(sock_net(sk));
1491 sk_prot_free(sk->sk_prot_creator, sk);
1492}
1493
1494void sk_destruct(struct sock *sk)
1495{
1496 if (sock_flag(sk, SOCK_RCU_FREE))
1497 call_rcu(&sk->sk_rcu, __sk_destruct);
1498 else
1499 __sk_destruct(&sk->sk_rcu);
1500}
1501
1502static void __sk_free(struct sock *sk)
1503{
1504 sk_destruct(sk);
1505}
1506
1507void sk_free(struct sock *sk)
1508{
1509
1510
1511
1512
1513
1514 if (atomic_dec_and_test(&sk->sk_wmem_alloc))
1515 __sk_free(sk);
1516}
1517EXPORT_SYMBOL(sk_free);
1518
1519
1520
1521
1522
1523
1524
1525
1526void sk_release_kernel(struct sock *sk)
1527{
1528 if (sk == NULL || sk->sk_socket == NULL)
1529 return;
1530
1531 sock_hold(sk);
1532 sock_release(sk->sk_socket);
1533 sock_net_set(sk, get_net(&init_net));
1534 sock_put(sk);
1535}
1536EXPORT_SYMBOL(sk_release_kernel);
1537
1538static void sk_update_clone(const struct sock *sk, struct sock *newsk)
1539{
1540 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
1541 sock_update_memcg(newsk);
1542}
1543
1544
1545
1546
1547
1548
1549
1550
1551struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1552{
1553 struct sock *newsk;
1554
1555 newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
1556 if (newsk != NULL) {
1557 struct sk_filter *filter;
1558
1559 sock_copy(newsk, sk);
1560
1561 newsk->sk_prot_creator = sk->sk_prot;
1562
1563
1564 get_net(sock_net(newsk));
1565 sk_node_init(&newsk->sk_node);
1566 sock_lock_init(newsk);
1567 bh_lock_sock(newsk);
1568 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1569 newsk->sk_backlog.len = 0;
1570
1571 atomic_set(&newsk->sk_rmem_alloc, 0);
1572
1573
1574
1575 atomic_set(&newsk->sk_wmem_alloc, 1);
1576 atomic_set(&newsk->sk_omem_alloc, 0);
1577 skb_queue_head_init(&newsk->sk_receive_queue);
1578 skb_queue_head_init(&newsk->sk_write_queue);
1579
1580 rwlock_init(&newsk->sk_callback_lock);
1581 lockdep_set_class_and_name(&newsk->sk_callback_lock,
1582 af_callback_keys + newsk->sk_family,
1583 af_family_clock_key_strings[newsk->sk_family]);
1584
1585 newsk->sk_dst_cache = NULL;
1586 newsk->sk_dst_pending_confirm = 0;
1587 newsk->sk_wmem_queued = 0;
1588 newsk->sk_forward_alloc = 0;
1589 newsk->sk_send_head = NULL;
1590 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1591
1592 sock_reset_flag(newsk, SOCK_DONE);
1593 skb_queue_head_init(&newsk->sk_error_queue);
1594
1595 filter = rcu_dereference_protected(newsk->sk_filter, 1);
1596 if (filter != NULL)
1597 sk_filter_charge(newsk, filter);
1598
1599 if (unlikely(xfrm_sk_clone_policy(newsk))) {
1600
1601
1602 newsk->sk_destruct = NULL;
1603 bh_unlock_sock(newsk);
1604 sk_free(newsk);
1605 newsk = NULL;
1606 goto out;
1607 }
1608
1609 newsk->sk_err = 0;
1610 newsk->sk_priority = 0;
1611
1612
1613
1614
1615 smp_wmb();
1616 atomic_set(&newsk->sk_refcnt, 2);
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629 sk_refcnt_debug_inc(newsk);
1630 sk_set_socket(newsk, NULL);
1631 newsk->sk_wq = NULL;
1632
1633 sk_update_clone(sk, newsk);
1634
1635 if (newsk->sk_prot->sockets_allocated)
1636 sk_sockets_allocated_inc(newsk);
1637
1638 if (sock_needs_netstamp(sk) &&
1639 newsk->sk_flags & SK_FLAGS_TIMESTAMP)
1640 net_enable_timestamp();
1641 }
1642out:
1643 return newsk;
1644}
1645EXPORT_SYMBOL_GPL(sk_clone_lock);
1646
1647void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
1648{
1649 sk_dst_set(sk, dst);
1650 sk->sk_route_caps = dst->dev->features;
1651 if (sk->sk_route_caps & NETIF_F_GSO)
1652 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
1653 sk->sk_route_caps &= ~sk->sk_route_nocaps;
1654 if (sk_can_gso(sk)) {
1655 if (dst->header_len) {
1656 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
1657 } else {
1658 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
1659 sk->sk_gso_max_size = dst->dev->gso_max_size;
1660 sk->sk_gso_max_segs = dst->dev->gso_max_segs;
1661 }
1662 }
1663}
1664EXPORT_SYMBOL_GPL(sk_setup_caps);
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674void sock_wfree(struct sk_buff *skb)
1675{
1676 struct sock *sk = skb->sk;
1677 unsigned int len = skb->truesize;
1678
1679 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
1680
1681
1682
1683
1684 atomic_sub(len - 1, &sk->sk_wmem_alloc);
1685 sk->sk_write_space(sk);
1686 len = 1;
1687 }
1688
1689
1690
1691
1692 if (atomic_sub_and_test(len, &sk->sk_wmem_alloc))
1693 __sk_free(sk);
1694}
1695EXPORT_SYMBOL(sock_wfree);
1696
1697
1698
1699
1700void __sock_wfree(struct sk_buff *skb)
1701{
1702 struct sock *sk = skb->sk;
1703
1704 if (atomic_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
1705 __sk_free(sk);
1706}
1707
1708
1709
1710
1711
1712
1713
1714void skb_orphan_partial(struct sk_buff *skb)
1715{
1716 if (skb_is_tcp_pure_ack(skb))
1717 return;
1718
1719 if (skb->destructor == sock_wfree
1720#ifdef CONFIG_INET
1721 || skb->destructor == tcp_wfree
1722#endif
1723 ) {
1724 struct sock *sk = skb->sk;
1725
1726 if (atomic_inc_not_zero(&sk->sk_refcnt)) {
1727 atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
1728 skb->destructor = sock_efree;
1729 }
1730 } else {
1731 skb_orphan(skb);
1732 }
1733}
1734EXPORT_SYMBOL(skb_orphan_partial);
1735
1736
1737
1738
1739void sock_rfree(struct sk_buff *skb)
1740{
1741 struct sock *sk = skb->sk;
1742 unsigned int len = skb->truesize;
1743
1744 atomic_sub(len, &sk->sk_rmem_alloc);
1745 sk_mem_uncharge(sk, len);
1746}
1747EXPORT_SYMBOL(sock_rfree);
1748
1749void sock_efree(struct sk_buff *skb)
1750{
1751 sock_put(skb->sk);
1752}
1753EXPORT_SYMBOL(sock_efree);
1754
1755#ifdef CONFIG_INET
1756void sock_edemux(struct sk_buff *skb)
1757{
1758 struct sock *sk = skb->sk;
1759
1760 if (sk->sk_state == TCP_TIME_WAIT)
1761 inet_twsk_put(inet_twsk(sk));
1762 else
1763 sock_put(sk);
1764}
1765EXPORT_SYMBOL(sock_edemux);
1766#endif
1767
1768kuid_t sock_i_uid(struct sock *sk)
1769{
1770 kuid_t uid;
1771
1772 read_lock_bh(&sk->sk_callback_lock);
1773 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
1774 read_unlock_bh(&sk->sk_callback_lock);
1775 return uid;
1776}
1777EXPORT_SYMBOL(sock_i_uid);
1778
1779unsigned long sock_i_ino(struct sock *sk)
1780{
1781 unsigned long ino;
1782
1783 read_lock_bh(&sk->sk_callback_lock);
1784 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
1785 read_unlock_bh(&sk->sk_callback_lock);
1786 return ino;
1787}
1788EXPORT_SYMBOL(sock_i_ino);
1789
1790
1791
1792
1793struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
1794 gfp_t priority)
1795{
1796 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
1797 struct sk_buff *skb = alloc_skb(size, priority);
1798 if (skb) {
1799 skb_set_owner_w(skb, sk);
1800 return skb;
1801 }
1802 }
1803 return NULL;
1804}
1805EXPORT_SYMBOL(sock_wmalloc);
1806
1807
1808
1809
1810struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
1811 gfp_t priority)
1812{
1813 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1814 struct sk_buff *skb = alloc_skb(size, priority);
1815 if (skb) {
1816 skb_set_owner_r(skb, sk);
1817 return skb;
1818 }
1819 }
1820 return NULL;
1821}
1822
1823
1824
1825
1826void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
1827{
1828 if ((unsigned int)size <= sysctl_optmem_max &&
1829 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
1830 void *mem;
1831
1832
1833
1834 atomic_add(size, &sk->sk_omem_alloc);
1835 mem = kmalloc(size, priority);
1836 if (mem)
1837 return mem;
1838 atomic_sub(size, &sk->sk_omem_alloc);
1839 }
1840 return NULL;
1841}
1842EXPORT_SYMBOL(sock_kmalloc);
1843
1844
1845
1846
1847
1848static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
1849 const bool nullify)
1850{
1851 if (WARN_ON_ONCE(!mem))
1852 return;
1853 if (nullify)
1854 kzfree(mem);
1855 else
1856 kfree(mem);
1857 atomic_sub(size, &sk->sk_omem_alloc);
1858}
1859
1860void sock_kfree_s(struct sock *sk, void *mem, int size)
1861{
1862 __sock_kfree_s(sk, mem, size, false);
1863}
1864EXPORT_SYMBOL(sock_kfree_s);
1865
1866void sock_kzfree_s(struct sock *sk, void *mem, int size)
1867{
1868 __sock_kfree_s(sk, mem, size, true);
1869}
1870EXPORT_SYMBOL(sock_kzfree_s);
1871
1872
1873
1874
1875static long sock_wait_for_wmem(struct sock *sk, long timeo)
1876{
1877 DEFINE_WAIT(wait);
1878
1879 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1880 for (;;) {
1881 if (!timeo)
1882 break;
1883 if (signal_pending(current))
1884 break;
1885 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1886 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1887 if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
1888 break;
1889 if (sk->sk_shutdown & SEND_SHUTDOWN)
1890 break;
1891 if (sk->sk_err)
1892 break;
1893 timeo = schedule_timeout(timeo);
1894 }
1895 finish_wait(sk_sleep(sk), &wait);
1896 return timeo;
1897}
1898
1899
1900
1901
1902
1903
1904struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1905 unsigned long data_len, int noblock,
1906 int *errcode, int max_page_order)
1907{
1908 struct sk_buff *skb;
1909 long timeo;
1910 int err;
1911
1912 timeo = sock_sndtimeo(sk, noblock);
1913 for (;;) {
1914 err = sock_error(sk);
1915 if (err != 0)
1916 goto failure;
1917
1918 err = -EPIPE;
1919 if (sk->sk_shutdown & SEND_SHUTDOWN)
1920 goto failure;
1921
1922 if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
1923 break;
1924
1925 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
1926 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1927 err = -EAGAIN;
1928 if (!timeo)
1929 goto failure;
1930 if (signal_pending(current))
1931 goto interrupted;
1932 timeo = sock_wait_for_wmem(sk, timeo);
1933 }
1934 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
1935 errcode, sk->sk_allocation);
1936 if (skb)
1937 skb_set_owner_w(skb, sk);
1938 return skb;
1939
1940interrupted:
1941 err = sock_intr_errno(timeo);
1942failure:
1943 *errcode = err;
1944 return NULL;
1945}
1946EXPORT_SYMBOL(sock_alloc_send_pskb);
1947
1948struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1949 int noblock, int *errcode)
1950{
1951 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
1952}
1953EXPORT_SYMBOL(sock_alloc_send_skb);
1954
1955
1956#define SKB_FRAG_PAGE_ORDER get_order(32768)
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
1969{
1970 if (pfrag->page) {
1971 if (page_ref_count(pfrag->page) == 1) {
1972 pfrag->offset = 0;
1973 return true;
1974 }
1975 if (pfrag->offset + sz <= pfrag->size)
1976 return true;
1977 put_page(pfrag->page);
1978 }
1979
1980 pfrag->offset = 0;
1981 if (SKB_FRAG_PAGE_ORDER) {
1982 pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
1983 __GFP_NOWARN | __GFP_NORETRY,
1984 SKB_FRAG_PAGE_ORDER);
1985 if (likely(pfrag->page)) {
1986 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
1987 return true;
1988 }
1989 }
1990 pfrag->page = alloc_page(gfp);
1991 if (likely(pfrag->page)) {
1992 pfrag->size = PAGE_SIZE;
1993 return true;
1994 }
1995 return false;
1996}
1997EXPORT_SYMBOL(skb_page_frag_refill);
1998
1999bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2000{
2001 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2002 return true;
2003
2004 sk_enter_memory_pressure(sk);
2005 sk_stream_moderate_sndbuf(sk);
2006 return false;
2007}
2008EXPORT_SYMBOL(sk_page_frag_refill);
2009
2010static void __lock_sock(struct sock *sk)
2011 __releases(&sk->sk_lock.slock)
2012 __acquires(&sk->sk_lock.slock)
2013{
2014 DEFINE_WAIT(wait);
2015
2016 for (;;) {
2017 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2018 TASK_UNINTERRUPTIBLE);
2019 spin_unlock_bh(&sk->sk_lock.slock);
2020 schedule();
2021 spin_lock_bh(&sk->sk_lock.slock);
2022 if (!sock_owned_by_user(sk))
2023 break;
2024 }
2025 finish_wait(&sk->sk_lock.wq, &wait);
2026}
2027
2028static void __release_sock(struct sock *sk)
2029 __releases(&sk->sk_lock.slock)
2030 __acquires(&sk->sk_lock.slock)
2031{
2032 struct sk_buff *skb = sk->sk_backlog.head;
2033
2034 do {
2035 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2036 bh_unlock_sock(sk);
2037
2038 do {
2039 struct sk_buff *next = skb->next;
2040
2041 prefetch(next);
2042 WARN_ON_ONCE(skb_dst_is_noref(skb));
2043 skb->next = NULL;
2044 sk_backlog_rcv(sk, skb);
2045
2046
2047
2048
2049
2050
2051
2052 cond_resched_softirq();
2053
2054 skb = next;
2055 } while (skb != NULL);
2056
2057 bh_lock_sock(sk);
2058 } while ((skb = sk->sk_backlog.head) != NULL);
2059
2060
2061
2062
2063
2064 sk->sk_backlog.len = 0;
2065}
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2079{
2080 int rc;
2081 DEFINE_WAIT(wait);
2082
2083 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2084 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2085 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb);
2086 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
2087 finish_wait(sk_sleep(sk), &wait);
2088 return rc;
2089}
2090EXPORT_SYMBOL(sk_wait_data);
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2102{
2103 struct proto *prot = sk->sk_prot;
2104 int parent_status = UNDER_LIMIT;
2105 long allocated = sk_memory_allocated_add(sk, amt, &parent_status);
2106
2107
2108 if (parent_status == UNDER_LIMIT &&
2109 allocated <= sk_prot_mem_limits(sk, 0)) {
2110 sk_leave_memory_pressure(sk);
2111 return 1;
2112 }
2113
2114
2115 if ((parent_status > SOFT_LIMIT) ||
2116 allocated > sk_prot_mem_limits(sk, 1))
2117 sk_enter_memory_pressure(sk);
2118
2119
2120 if ((parent_status == OVER_LIMIT) ||
2121 (allocated > sk_prot_mem_limits(sk, 2)))
2122 goto suppress_allocation;
2123
2124
2125 if (kind == SK_MEM_RECV) {
2126 if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0])
2127 return 1;
2128
2129 } else {
2130 if (sk->sk_type == SOCK_STREAM) {
2131 if (sk->sk_wmem_queued < prot->sysctl_wmem[0])
2132 return 1;
2133 } else if (atomic_read(&sk->sk_wmem_alloc) <
2134 prot->sysctl_wmem[0])
2135 return 1;
2136 }
2137
2138 if (sk_has_memory_pressure(sk)) {
2139 int alloc;
2140
2141 if (!sk_under_memory_pressure(sk))
2142 return 1;
2143 alloc = sk_sockets_allocated_read_positive(sk);
2144 if (sk_prot_mem_limits(sk, 2) > alloc *
2145 sk_mem_pages(sk->sk_wmem_queued +
2146 atomic_read(&sk->sk_rmem_alloc) +
2147 sk->sk_forward_alloc))
2148 return 1;
2149 }
2150
2151suppress_allocation:
2152
2153 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2154 sk_stream_moderate_sndbuf(sk);
2155
2156
2157
2158
2159 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2160 return 1;
2161 }
2162
2163 trace_sock_exceed_buf_limit(sk, prot, allocated);
2164
2165 sk_memory_allocated_sub(sk, amt);
2166
2167 return 0;
2168}
2169EXPORT_SYMBOL(__sk_mem_raise_allocated);
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181int __sk_mem_schedule(struct sock *sk, int size, int kind)
2182{
2183 int ret, amt = sk_mem_pages(size);
2184
2185 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2186 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2187 if (!ret)
2188 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2189 return ret;
2190}
2191EXPORT_SYMBOL(__sk_mem_schedule);
2192
2193
2194
2195
2196
2197
2198
2199
2200void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2201{
2202 sk_memory_allocated_sub(sk, amount);
2203
2204 if (sk_under_memory_pressure(sk) &&
2205 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2206 sk_leave_memory_pressure(sk);
2207}
2208EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2209
2210
2211
2212
2213
2214
2215void __sk_mem_reclaim(struct sock *sk, int amount)
2216{
2217 amount >>= SK_MEM_QUANTUM_SHIFT;
2218 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2219 __sk_mem_reduce_allocated(sk, amount);
2220}
2221EXPORT_SYMBOL(__sk_mem_reclaim);
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2232{
2233 return -EOPNOTSUPP;
2234}
2235EXPORT_SYMBOL(sock_no_bind);
2236
2237int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2238 int len, int flags)
2239{
2240 return -EOPNOTSUPP;
2241}
2242EXPORT_SYMBOL(sock_no_connect);
2243
2244int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2245{
2246 return -EOPNOTSUPP;
2247}
2248EXPORT_SYMBOL(sock_no_socketpair);
2249
2250int sock_no_accept(struct socket *sock, struct socket *newsock, int flags)
2251{
2252 return -EOPNOTSUPP;
2253}
2254EXPORT_SYMBOL(sock_no_accept);
2255
2256int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2257 int *len, int peer)
2258{
2259 return -EOPNOTSUPP;
2260}
2261EXPORT_SYMBOL(sock_no_getname);
2262
2263unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
2264{
2265 return 0;
2266}
2267EXPORT_SYMBOL(sock_no_poll);
2268
2269int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2270{
2271 return -EOPNOTSUPP;
2272}
2273EXPORT_SYMBOL(sock_no_ioctl);
2274
2275int sock_no_listen(struct socket *sock, int backlog)
2276{
2277 return -EOPNOTSUPP;
2278}
2279EXPORT_SYMBOL(sock_no_listen);
2280
2281int sock_no_shutdown(struct socket *sock, int how)
2282{
2283 return -EOPNOTSUPP;
2284}
2285EXPORT_SYMBOL(sock_no_shutdown);
2286
2287int sock_no_setsockopt(struct socket *sock, int level, int optname,
2288 char __user *optval, unsigned int optlen)
2289{
2290 return -EOPNOTSUPP;
2291}
2292EXPORT_SYMBOL(sock_no_setsockopt);
2293
2294int sock_no_getsockopt(struct socket *sock, int level, int optname,
2295 char __user *optval, int __user *optlen)
2296{
2297 return -EOPNOTSUPP;
2298}
2299EXPORT_SYMBOL(sock_no_getsockopt);
2300
2301int sock_no_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2302 size_t len)
2303{
2304 return -EOPNOTSUPP;
2305}
2306EXPORT_SYMBOL(sock_no_sendmsg);
2307
2308int sock_no_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
2309 size_t len, int flags)
2310{
2311 return -EOPNOTSUPP;
2312}
2313EXPORT_SYMBOL(sock_no_recvmsg);
2314
2315int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2316{
2317
2318 return -ENODEV;
2319}
2320EXPORT_SYMBOL(sock_no_mmap);
2321
2322ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2323{
2324 ssize_t res;
2325 struct msghdr msg = {.msg_flags = flags};
2326 struct kvec iov;
2327 char *kaddr = kmap(page);
2328 iov.iov_base = kaddr + offset;
2329 iov.iov_len = size;
2330 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2331 kunmap(page);
2332 return res;
2333}
2334EXPORT_SYMBOL(sock_no_sendpage);
2335
2336
2337
2338
2339
2340static void sock_def_wakeup(struct sock *sk)
2341{
2342 struct socket_wq *wq;
2343
2344 rcu_read_lock();
2345 wq = rcu_dereference(sk->sk_wq);
2346 if (wq_has_sleeper(wq))
2347 wake_up_interruptible_all(&wq->wait);
2348 rcu_read_unlock();
2349}
2350
2351static void sock_def_error_report(struct sock *sk)
2352{
2353 struct socket_wq *wq;
2354
2355 rcu_read_lock();
2356 wq = rcu_dereference(sk->sk_wq);
2357 if (wq_has_sleeper(wq))
2358 wake_up_interruptible_poll(&wq->wait, POLLERR);
2359 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2360 rcu_read_unlock();
2361}
2362
2363static void sock_def_readable(struct sock *sk, int len)
2364{
2365 struct socket_wq *wq;
2366
2367 rcu_read_lock();
2368 wq = rcu_dereference(sk->sk_wq);
2369 if (wq_has_sleeper(wq))
2370 wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
2371 POLLRDNORM | POLLRDBAND);
2372 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2373 rcu_read_unlock();
2374}
2375
2376static void sock_def_write_space(struct sock *sk)
2377{
2378 struct socket_wq *wq;
2379
2380 rcu_read_lock();
2381
2382
2383
2384
2385 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
2386 wq = rcu_dereference(sk->sk_wq);
2387 if (wq_has_sleeper(wq))
2388 wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
2389 POLLWRNORM | POLLWRBAND);
2390
2391
2392 if (sock_writeable(sk))
2393 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2394 }
2395
2396 rcu_read_unlock();
2397}
2398
2399static void sock_def_destruct(struct sock *sk)
2400{
2401 kfree(sk->sk_protinfo);
2402}
2403
2404void sk_send_sigurg(struct sock *sk)
2405{
2406 if (sk->sk_socket && sk->sk_socket->file)
2407 if (send_sigurg(&sk->sk_socket->file->f_owner))
2408 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2409}
2410EXPORT_SYMBOL(sk_send_sigurg);
2411
2412void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2413 unsigned long expires)
2414{
2415 if (!mod_timer(timer, expires))
2416 sock_hold(sk);
2417}
2418EXPORT_SYMBOL(sk_reset_timer);
2419
2420void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2421{
2422 if (del_timer(timer))
2423 __sock_put(sk);
2424}
2425EXPORT_SYMBOL(sk_stop_timer);
2426
2427void sock_init_data(struct socket *sock, struct sock *sk)
2428{
2429 skb_queue_head_init(&sk->sk_receive_queue);
2430 skb_queue_head_init(&sk->sk_write_queue);
2431 skb_queue_head_init(&sk->sk_error_queue);
2432
2433 sk->sk_send_head = NULL;
2434
2435 init_timer(&sk->sk_timer);
2436
2437 sk->sk_allocation = GFP_KERNEL;
2438 sk->sk_rcvbuf = sysctl_rmem_default;
2439 sk->sk_sndbuf = sysctl_wmem_default;
2440 sk->sk_state = TCP_CLOSE;
2441 sk_set_socket(sk, sock);
2442
2443 sock_set_flag(sk, SOCK_ZAPPED);
2444
2445 if (sock) {
2446 sk->sk_type = sock->type;
2447 sk->sk_wq = sock->wq;
2448 sock->sk = sk;
2449 } else
2450 sk->sk_wq = NULL;
2451
2452 rwlock_init(&sk->sk_callback_lock);
2453 lockdep_set_class_and_name(&sk->sk_callback_lock,
2454 af_callback_keys + sk->sk_family,
2455 af_family_clock_key_strings[sk->sk_family]);
2456
2457 sk->sk_state_change = sock_def_wakeup;
2458 sk->sk_data_ready = sock_def_readable;
2459 sk->sk_write_space = sock_def_write_space;
2460 sk->sk_error_report = sock_def_error_report;
2461 sk->sk_destruct = sock_def_destruct;
2462
2463 sk->sk_frag.page = NULL;
2464 sk->sk_frag.offset = 0;
2465 sk->sk_peek_off = -1;
2466
2467 sk->sk_peer_pid = NULL;
2468 sk->sk_peer_cred = NULL;
2469 sk->sk_write_pending = 0;
2470 sk->sk_rcvlowat = 1;
2471 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
2472 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
2473
2474 sk->sk_stamp = ktime_set(-1L, 0);
2475
2476#ifdef CONFIG_NET_RX_BUSY_POLL
2477 sk->sk_napi_id = 0;
2478 sk->sk_ll_usec = sysctl_net_busy_read;
2479#endif
2480
2481 sk->sk_max_pacing_rate = ~0U;
2482 sk->sk_pacing_rate = ~0U;
2483
2484
2485
2486
2487 smp_wmb();
2488 atomic_set(&sk->sk_refcnt, 1);
2489 atomic_set(&sk->sk_drops, 0);
2490}
2491EXPORT_SYMBOL(sock_init_data);
2492
2493void lock_sock_nested(struct sock *sk, int subclass)
2494{
2495 might_sleep();
2496 spin_lock_bh(&sk->sk_lock.slock);
2497 if (sk->sk_lock.owned)
2498 __lock_sock(sk);
2499 sk->sk_lock.owned = 1;
2500 spin_unlock(&sk->sk_lock.slock);
2501
2502
2503
2504 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
2505 local_bh_enable();
2506}
2507EXPORT_SYMBOL(lock_sock_nested);
2508
2509void release_sock(struct sock *sk)
2510{
2511 spin_lock_bh(&sk->sk_lock.slock);
2512 if (sk->sk_backlog.tail)
2513 __release_sock(sk);
2514
2515
2516
2517
2518 if (sk->sk_prot->release_cb)
2519 sk->sk_prot->release_cb(sk);
2520
2521 sock_release_ownership(sk);
2522 if (waitqueue_active(&sk->sk_lock.wq))
2523 wake_up(&sk->sk_lock.wq);
2524 spin_unlock_bh(&sk->sk_lock.slock);
2525}
2526EXPORT_SYMBOL(release_sock);
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538bool lock_sock_fast(struct sock *sk)
2539{
2540 might_sleep();
2541 spin_lock_bh(&sk->sk_lock.slock);
2542
2543 if (!sk->sk_lock.owned)
2544
2545
2546
2547 return false;
2548
2549 __lock_sock(sk);
2550 sk->sk_lock.owned = 1;
2551 spin_unlock(&sk->sk_lock.slock);
2552
2553
2554
2555 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
2556 local_bh_enable();
2557 return true;
2558}
2559EXPORT_SYMBOL(lock_sock_fast);
2560
2561int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
2562{
2563 struct timeval tv;
2564 if (!sock_flag(sk, SOCK_TIMESTAMP))
2565 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2566 tv = ktime_to_timeval(sk->sk_stamp);
2567 if (tv.tv_sec == -1)
2568 return -ENOENT;
2569 if (tv.tv_sec == 0) {
2570 sk->sk_stamp = ktime_get_real();
2571 tv = ktime_to_timeval(sk->sk_stamp);
2572 }
2573 return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
2574}
2575EXPORT_SYMBOL(sock_get_timestamp);
2576
2577int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
2578{
2579 struct timespec ts;
2580 if (!sock_flag(sk, SOCK_TIMESTAMP))
2581 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
2582 ts = ktime_to_timespec(sk->sk_stamp);
2583 if (ts.tv_sec == -1)
2584 return -ENOENT;
2585 if (ts.tv_sec == 0) {
2586 sk->sk_stamp = ktime_get_real();
2587 ts = ktime_to_timespec(sk->sk_stamp);
2588 }
2589 return copy_to_user(userstamp, &ts, sizeof(ts)) ? -EFAULT : 0;
2590}
2591EXPORT_SYMBOL(sock_get_timestampns);
2592
2593void sock_enable_timestamp(struct sock *sk, int flag)
2594{
2595 if (!sock_flag(sk, flag)) {
2596 unsigned long previous_flags = sk->sk_flags;
2597
2598 sock_set_flag(sk, flag);
2599
2600
2601
2602
2603
2604 if (sock_needs_netstamp(sk) &&
2605 !(previous_flags & SK_FLAGS_TIMESTAMP))
2606 net_enable_timestamp();
2607 }
2608}
2609
2610
2611
2612
2613
2614
2615
2616
2617int sock_common_getsockopt(struct socket *sock, int level, int optname,
2618 char __user *optval, int __user *optlen)
2619{
2620 struct sock *sk = sock->sk;
2621
2622 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2623}
2624EXPORT_SYMBOL(sock_common_getsockopt);
2625
2626#ifdef CONFIG_COMPAT
2627int compat_sock_common_getsockopt(struct socket *sock, int level, int optname,
2628 char __user *optval, int __user *optlen)
2629{
2630 struct sock *sk = sock->sk;
2631
2632 if (sk->sk_prot->compat_getsockopt != NULL)
2633 return sk->sk_prot->compat_getsockopt(sk, level, optname,
2634 optval, optlen);
2635 return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen);
2636}
2637EXPORT_SYMBOL(compat_sock_common_getsockopt);
2638#endif
2639
2640int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
2641 struct msghdr *msg, size_t size, int flags)
2642{
2643 struct sock *sk = sock->sk;
2644 int addr_len = 0;
2645 int err;
2646
2647 err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
2648 flags & ~MSG_DONTWAIT, &addr_len);
2649 if (err >= 0)
2650 msg->msg_namelen = addr_len;
2651 return err;
2652}
2653EXPORT_SYMBOL(sock_common_recvmsg);
2654
2655
2656
2657
2658int sock_common_setsockopt(struct socket *sock, int level, int optname,
2659 char __user *optval, unsigned int optlen)
2660{
2661 struct sock *sk = sock->sk;
2662
2663 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2664}
2665EXPORT_SYMBOL(sock_common_setsockopt);
2666
2667#ifdef CONFIG_COMPAT
2668int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
2669 char __user *optval, unsigned int optlen)
2670{
2671 struct sock *sk = sock->sk;
2672
2673 if (sk->sk_prot->compat_setsockopt != NULL)
2674 return sk->sk_prot->compat_setsockopt(sk, level, optname,
2675 optval, optlen);
2676 return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen);
2677}
2678EXPORT_SYMBOL(compat_sock_common_setsockopt);
2679#endif
2680
2681void sk_common_release(struct sock *sk)
2682{
2683 if (sk->sk_prot->destroy)
2684 sk->sk_prot->destroy(sk);
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694 sk->sk_prot->unhash(sk);
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708 sock_orphan(sk);
2709
2710 xfrm_sk_free_policy(sk);
2711
2712 sk_refcnt_debug_release(sk);
2713
2714 sock_put(sk);
2715}
2716EXPORT_SYMBOL(sk_common_release);
2717
2718#ifdef CONFIG_PROC_FS
2719#define PROTO_INUSE_NR 64
2720struct prot_inuse {
2721 int val[PROTO_INUSE_NR];
2722};
2723
2724static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
2725
2726#ifdef CONFIG_NET_NS
2727void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2728{
2729 __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
2730}
2731EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2732
2733int sock_prot_inuse_get(struct net *net, struct proto *prot)
2734{
2735 int cpu, idx = prot->inuse_idx;
2736 int res = 0;
2737
2738 for_each_possible_cpu(cpu)
2739 res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
2740
2741 return res >= 0 ? res : 0;
2742}
2743EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2744
2745static int __net_init sock_inuse_init_net(struct net *net)
2746{
2747 net->core.inuse = alloc_percpu(struct prot_inuse);
2748 return net->core.inuse ? 0 : -ENOMEM;
2749}
2750
2751static void __net_exit sock_inuse_exit_net(struct net *net)
2752{
2753 free_percpu(net->core.inuse);
2754}
2755
2756static struct pernet_operations net_inuse_ops = {
2757 .init = sock_inuse_init_net,
2758 .exit = sock_inuse_exit_net,
2759};
2760
2761static __init int net_inuse_init(void)
2762{
2763 if (register_pernet_subsys(&net_inuse_ops))
2764 panic("Cannot initialize net inuse counters");
2765
2766 return 0;
2767}
2768
2769core_initcall(net_inuse_init);
2770#else
2771static DEFINE_PER_CPU(struct prot_inuse, prot_inuse);
2772
2773void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
2774{
2775 __this_cpu_add(prot_inuse.val[prot->inuse_idx], val);
2776}
2777EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
2778
2779int sock_prot_inuse_get(struct net *net, struct proto *prot)
2780{
2781 int cpu, idx = prot->inuse_idx;
2782 int res = 0;
2783
2784 for_each_possible_cpu(cpu)
2785 res += per_cpu(prot_inuse, cpu).val[idx];
2786
2787 return res >= 0 ? res : 0;
2788}
2789EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
2790#endif
2791
2792static void assign_proto_idx(struct proto *prot)
2793{
2794 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
2795
2796 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
2797 pr_err("PROTO_INUSE_NR exhausted\n");
2798 return;
2799 }
2800
2801 set_bit(prot->inuse_idx, proto_inuse_idx);
2802}
2803
2804static void release_proto_idx(struct proto *prot)
2805{
2806 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
2807 clear_bit(prot->inuse_idx, proto_inuse_idx);
2808}
2809#else
2810static inline void assign_proto_idx(struct proto *prot)
2811{
2812}
2813
2814static inline void release_proto_idx(struct proto *prot)
2815{
2816}
2817#endif
2818
2819int proto_register(struct proto *prot, int alloc_slab)
2820{
2821 if (alloc_slab) {
2822 prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0,
2823 SLAB_HWCACHE_ALIGN | prot->slab_flags,
2824 NULL);
2825
2826 if (prot->slab == NULL) {
2827 pr_crit("%s: Can't create sock SLAB cache!\n",
2828 prot->name);
2829 goto out;
2830 }
2831
2832 if (prot->rsk_prot != NULL) {
2833 prot->rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name);
2834 if (prot->rsk_prot->slab_name == NULL)
2835 goto out_free_sock_slab;
2836
2837 prot->rsk_prot->slab = kmem_cache_create(prot->rsk_prot->slab_name,
2838 prot->rsk_prot->obj_size, 0,
2839 SLAB_HWCACHE_ALIGN, NULL);
2840
2841 if (prot->rsk_prot->slab == NULL) {
2842 pr_crit("%s: Can't create request sock SLAB cache!\n",
2843 prot->name);
2844 goto out_free_request_sock_slab_name;
2845 }
2846 }
2847
2848 if (prot->twsk_prot != NULL) {
2849 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
2850
2851 if (prot->twsk_prot->twsk_slab_name == NULL)
2852 goto out_free_request_sock_slab;
2853
2854 prot->twsk_prot->twsk_slab =
2855 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
2856 prot->twsk_prot->twsk_obj_size,
2857 0,
2858 SLAB_HWCACHE_ALIGN |
2859 prot->slab_flags,
2860 NULL);
2861 if (prot->twsk_prot->twsk_slab == NULL)
2862 goto out_free_timewait_sock_slab_name;
2863 }
2864 }
2865
2866 mutex_lock(&proto_list_mutex);
2867 list_add(&prot->node, &proto_list);
2868 assign_proto_idx(prot);
2869 mutex_unlock(&proto_list_mutex);
2870 return 0;
2871
2872out_free_timewait_sock_slab_name:
2873 kfree(prot->twsk_prot->twsk_slab_name);
2874out_free_request_sock_slab:
2875 if (prot->rsk_prot && prot->rsk_prot->slab) {
2876 kmem_cache_destroy(prot->rsk_prot->slab);
2877 prot->rsk_prot->slab = NULL;
2878 }
2879out_free_request_sock_slab_name:
2880 if (prot->rsk_prot)
2881 kfree(prot->rsk_prot->slab_name);
2882out_free_sock_slab:
2883 kmem_cache_destroy(prot->slab);
2884 prot->slab = NULL;
2885out:
2886 return -ENOBUFS;
2887}
2888EXPORT_SYMBOL(proto_register);
2889
2890void proto_unregister(struct proto *prot)
2891{
2892 mutex_lock(&proto_list_mutex);
2893 release_proto_idx(prot);
2894 list_del(&prot->node);
2895 mutex_unlock(&proto_list_mutex);
2896
2897 if (prot->slab != NULL) {
2898 kmem_cache_destroy(prot->slab);
2899 prot->slab = NULL;
2900 }
2901
2902 if (prot->rsk_prot != NULL && prot->rsk_prot->slab != NULL) {
2903 kmem_cache_destroy(prot->rsk_prot->slab);
2904 kfree(prot->rsk_prot->slab_name);
2905 prot->rsk_prot->slab = NULL;
2906 }
2907
2908 if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
2909 kmem_cache_destroy(prot->twsk_prot->twsk_slab);
2910 kfree(prot->twsk_prot->twsk_slab_name);
2911 prot->twsk_prot->twsk_slab = NULL;
2912 }
2913}
2914EXPORT_SYMBOL(proto_unregister);
2915
2916int sock_load_diag_module(int family, int protocol)
2917{
2918 if (!protocol) {
2919 if (!sock_is_registered(family))
2920 return -ENOENT;
2921
2922 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
2923 NETLINK_SOCK_DIAG, family);
2924 }
2925
2926#ifdef CONFIG_INET
2927 if (family == AF_INET &&
2928 !rcu_access_pointer(inet_protos[protocol]))
2929 return -ENOENT;
2930#endif
2931
2932 return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
2933 NETLINK_SOCK_DIAG, family, protocol);
2934}
2935EXPORT_SYMBOL(sock_load_diag_module);
2936
2937#ifdef CONFIG_PROC_FS
2938static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
2939 __acquires(proto_list_mutex)
2940{
2941 mutex_lock(&proto_list_mutex);
2942 return seq_list_start_head(&proto_list, *pos);
2943}
2944
2945static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2946{
2947 return seq_list_next(v, &proto_list, pos);
2948}
2949
2950static void proto_seq_stop(struct seq_file *seq, void *v)
2951 __releases(proto_list_mutex)
2952{
2953 mutex_unlock(&proto_list_mutex);
2954}
2955
2956static char proto_method_implemented(const void *method)
2957{
2958 return method == NULL ? 'n' : 'y';
2959}
2960static long sock_prot_memory_allocated(struct proto *proto)
2961{
2962 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
2963}
2964
2965static char *sock_prot_memory_pressure(struct proto *proto)
2966{
2967 return proto->memory_pressure != NULL ?
2968 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
2969}
2970
2971static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
2972{
2973
2974 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
2975 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
2976 proto->name,
2977 proto->obj_size,
2978 sock_prot_inuse_get(seq_file_net(seq), proto),
2979 sock_prot_memory_allocated(proto),
2980 sock_prot_memory_pressure(proto),
2981 proto->max_header,
2982 proto->slab == NULL ? "no" : "yes",
2983 module_name(proto->owner),
2984 proto_method_implemented(proto->close),
2985 proto_method_implemented(proto->connect),
2986 proto_method_implemented(proto->disconnect),
2987 proto_method_implemented(proto->accept),
2988 proto_method_implemented(proto->ioctl),
2989 proto_method_implemented(proto->init),
2990 proto_method_implemented(proto->destroy),
2991 proto_method_implemented(proto->shutdown),
2992 proto_method_implemented(proto->setsockopt),
2993 proto_method_implemented(proto->getsockopt),
2994 proto_method_implemented(proto->sendmsg),
2995 proto_method_implemented(proto->recvmsg),
2996 proto_method_implemented(proto->sendpage),
2997 proto_method_implemented(proto->bind),
2998 proto_method_implemented(proto->backlog_rcv),
2999 proto_method_implemented(proto->hash),
3000 proto_method_implemented(proto->unhash),
3001 proto_method_implemented(proto->get_port),
3002 proto_method_implemented(proto->enter_memory_pressure));
3003}
3004
3005static int proto_seq_show(struct seq_file *seq, void *v)
3006{
3007 if (v == &proto_list)
3008 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3009 "protocol",
3010 "size",
3011 "sockets",
3012 "memory",
3013 "press",
3014 "maxhdr",
3015 "slab",
3016 "module",
3017 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3018 else
3019 proto_seq_printf(seq, list_entry(v, struct proto, node));
3020 return 0;
3021}
3022
3023static const struct seq_operations proto_seq_ops = {
3024 .start = proto_seq_start,
3025 .next = proto_seq_next,
3026 .stop = proto_seq_stop,
3027 .show = proto_seq_show,
3028};
3029
3030static int proto_seq_open(struct inode *inode, struct file *file)
3031{
3032 return seq_open_net(inode, file, &proto_seq_ops,
3033 sizeof(struct seq_net_private));
3034}
3035
3036static const struct file_operations proto_seq_fops = {
3037 .owner = THIS_MODULE,
3038 .open = proto_seq_open,
3039 .read = seq_read,
3040 .llseek = seq_lseek,
3041 .release = seq_release_net,
3042};
3043
3044static __net_init int proto_init_net(struct net *net)
3045{
3046 if (!proc_create("protocols", S_IRUGO, net->proc_net, &proto_seq_fops))
3047 return -ENOMEM;
3048
3049 return 0;
3050}
3051
3052static __net_exit void proto_exit_net(struct net *net)
3053{
3054 remove_proc_entry("protocols", net->proc_net);
3055}
3056
3057
3058static __net_initdata struct pernet_operations proto_net_ops = {
3059 .init = proto_init_net,
3060 .exit = proto_exit_net,
3061};
3062
3063static int __init proto_init(void)
3064{
3065 return register_pernet_subsys(&proto_net_ops);
3066}
3067
3068subsys_initcall(proto_init);
3069
3070#endif
3071