1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/uaccess.h>
41#include <linux/mm.h>
42#include <linux/interrupt.h>
43#include <linux/errno.h>
44#include <linux/sched.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/rtnetlink.h>
48#include <linux/poll.h>
49#include <linux/highmem.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/pagemap.h>
53#include <linux/uio.h>
54#include <linux/indirect_call_wrapper.h>
55
56#include <net/protocol.h>
57#include <linux/skbuff.h>
58
59#include <net/checksum.h>
60#include <net/sock.h>
61#include <net/tcp_states.h>
62#include <trace/events/skb.h>
63#include <net/busy_poll.h>
64
65#include "datagram.h"
66
67
68
69
70static inline int connection_based(struct sock *sk)
71{
72 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
73}
74
75static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
76 void *key)
77{
78
79
80
81 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
82 return 0;
83 return autoremove_wake_function(wait, mode, sync, key);
84}
85
86
87
88int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
89 int *err, long *timeo_p,
90 const struct sk_buff *skb)
91{
92 int error;
93 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
94
95 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
96
97
98 error = sock_error(sk);
99 if (error)
100 goto out_err;
101
102 if (READ_ONCE(queue->prev) != skb)
103 goto out;
104
105
106 if (sk->sk_shutdown & RCV_SHUTDOWN)
107 goto out_noerr;
108
109
110
111
112 error = -ENOTCONN;
113 if (connection_based(sk) &&
114 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
115 goto out_err;
116
117
118 if (signal_pending(current))
119 goto interrupted;
120
121 error = 0;
122 *timeo_p = schedule_timeout(*timeo_p);
123out:
124 finish_wait(sk_sleep(sk), &wait);
125 return error;
126interrupted:
127 error = sock_intr_errno(*timeo_p);
128out_err:
129 *err = error;
130 goto out;
131out_noerr:
132 *err = 0;
133 error = 1;
134 goto out;
135}
136EXPORT_SYMBOL(__skb_wait_for_more_packets);
137
138static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
139{
140 struct sk_buff *nskb;
141
142 if (skb->peeked)
143 return skb;
144
145
146 if (!skb_shared(skb))
147 goto done;
148
149 nskb = skb_clone(skb, GFP_ATOMIC);
150 if (!nskb)
151 return ERR_PTR(-ENOMEM);
152
153 skb->prev->next = nskb;
154 skb->next->prev = nskb;
155 nskb->prev = skb->prev;
156 nskb->next = skb->next;
157
158 consume_skb(skb);
159 skb = nskb;
160
161done:
162 skb->peeked = 1;
163
164 return skb;
165}
166
167struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
168 struct sk_buff_head *queue,
169 unsigned int flags,
170 int *off, int *err,
171 struct sk_buff **last)
172{
173 bool peek_at_off = false;
174 struct sk_buff *skb;
175 int _off = 0;
176
177 if (unlikely(flags & MSG_PEEK && *off >= 0)) {
178 peek_at_off = true;
179 _off = *off;
180 }
181
182 *last = queue->prev;
183 skb_queue_walk(queue, skb) {
184 if (flags & MSG_PEEK) {
185 if (peek_at_off && _off >= skb->len &&
186 (_off || skb->peeked)) {
187 _off -= skb->len;
188 continue;
189 }
190 if (!skb->len) {
191 skb = skb_set_peeked(skb);
192 if (IS_ERR(skb)) {
193 *err = PTR_ERR(skb);
194 return NULL;
195 }
196 }
197 refcount_inc(&skb->users);
198 } else {
199 __skb_unlink(skb, queue);
200 }
201 *off = _off;
202 return skb;
203 }
204 return NULL;
205}
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
243 struct sk_buff_head *queue,
244 unsigned int flags, int *off, int *err,
245 struct sk_buff **last)
246{
247 struct sk_buff *skb;
248 unsigned long cpu_flags;
249
250
251
252 int error = sock_error(sk);
253
254 if (error)
255 goto no_packet;
256
257 do {
258
259
260
261
262
263
264 spin_lock_irqsave(&queue->lock, cpu_flags);
265 skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
266 last);
267 spin_unlock_irqrestore(&queue->lock, cpu_flags);
268 if (error)
269 goto no_packet;
270 if (skb)
271 return skb;
272
273 if (!sk_can_busy_loop(sk))
274 break;
275
276 sk_busy_loop(sk, flags & MSG_DONTWAIT);
277 } while (READ_ONCE(queue->prev) != *last);
278
279 error = -EAGAIN;
280
281no_packet:
282 *err = error;
283 return NULL;
284}
285EXPORT_SYMBOL(__skb_try_recv_datagram);
286
287struct sk_buff *__skb_recv_datagram(struct sock *sk,
288 struct sk_buff_head *sk_queue,
289 unsigned int flags, int *off, int *err)
290{
291 struct sk_buff *skb, *last;
292 long timeo;
293
294 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
295
296 do {
297 skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
298 &last);
299 if (skb)
300 return skb;
301
302 if (*err != -EAGAIN)
303 break;
304 } while (timeo &&
305 !__skb_wait_for_more_packets(sk, sk_queue, err,
306 &timeo, last));
307
308 return NULL;
309}
310EXPORT_SYMBOL(__skb_recv_datagram);
311
312struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
313 int noblock, int *err)
314{
315 int off = 0;
316
317 return __skb_recv_datagram(sk, &sk->sk_receive_queue,
318 flags | (noblock ? MSG_DONTWAIT : 0),
319 &off, err);
320}
321EXPORT_SYMBOL(skb_recv_datagram);
322
323void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
324{
325 consume_skb(skb);
326 sk_mem_reclaim_partial(sk);
327}
328EXPORT_SYMBOL(skb_free_datagram);
329
330void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
331{
332 bool slow;
333
334 if (!skb_unref(skb)) {
335 sk_peek_offset_bwd(sk, len);
336 return;
337 }
338
339 slow = lock_sock_fast(sk);
340 sk_peek_offset_bwd(sk, len);
341 skb_orphan(skb);
342 sk_mem_reclaim_partial(sk);
343 unlock_sock_fast(sk, slow);
344
345
346 __kfree_skb(skb);
347}
348EXPORT_SYMBOL(__skb_free_datagram_locked);
349
350int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
351 struct sk_buff *skb, unsigned int flags,
352 void (*destructor)(struct sock *sk,
353 struct sk_buff *skb))
354{
355 int err = 0;
356
357 if (flags & MSG_PEEK) {
358 err = -ENOENT;
359 spin_lock_bh(&sk_queue->lock);
360 if (skb->next) {
361 __skb_unlink(skb, sk_queue);
362 refcount_dec(&skb->users);
363 if (destructor)
364 destructor(sk, skb);
365 err = 0;
366 }
367 spin_unlock_bh(&sk_queue->lock);
368 }
369
370 atomic_inc(&sk->sk_drops);
371 return err;
372}
373EXPORT_SYMBOL(__sk_queue_drop_skb);
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
397{
398 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
399 NULL);
400
401 kfree_skb(skb);
402 sk_mem_reclaim_partial(sk);
403 return err;
404}
405EXPORT_SYMBOL(skb_kill_datagram);
406
407INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
408 size_t bytes,
409 void *data __always_unused,
410 struct iov_iter *i));
411
412static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
413 struct iov_iter *to, int len, bool fault_short,
414 size_t (*cb)(const void *, size_t, void *,
415 struct iov_iter *), void *data)
416{
417 int start = skb_headlen(skb);
418 int i, copy = start - offset, start_off = offset, n;
419 struct sk_buff *frag_iter;
420
421
422 if (copy > 0) {
423 if (copy > len)
424 copy = len;
425 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
426 skb->data + offset, copy, data, to);
427 offset += n;
428 if (n != copy)
429 goto short_copy;
430 if ((len -= copy) == 0)
431 return 0;
432 }
433
434
435 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
436 int end;
437 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
438
439 WARN_ON(start > offset + len);
440
441 end = start + skb_frag_size(frag);
442 if ((copy = end - offset) > 0) {
443 struct page *page = skb_frag_page(frag);
444 u8 *vaddr = kmap(page);
445
446 if (copy > len)
447 copy = len;
448 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
449 vaddr + skb_frag_off(frag) + offset - start,
450 copy, data, to);
451 kunmap(page);
452 offset += n;
453 if (n != copy)
454 goto short_copy;
455 if (!(len -= copy))
456 return 0;
457 }
458 start = end;
459 }
460
461 skb_walk_frags(skb, frag_iter) {
462 int end;
463
464 WARN_ON(start > offset + len);
465
466 end = start + frag_iter->len;
467 if ((copy = end - offset) > 0) {
468 if (copy > len)
469 copy = len;
470 if (__skb_datagram_iter(frag_iter, offset - start,
471 to, copy, fault_short, cb, data))
472 goto fault;
473 if ((len -= copy) == 0)
474 return 0;
475 offset += copy;
476 }
477 start = end;
478 }
479 if (!len)
480 return 0;
481
482
483
484
485
486
487fault:
488 iov_iter_revert(to, offset - start_off);
489 return -EFAULT;
490
491short_copy:
492 if (fault_short || iov_iter_count(to))
493 goto fault;
494
495 return 0;
496}
497
498
499
500
501
502
503
504
505
506
507int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
508 struct iov_iter *to, int len,
509 struct ahash_request *hash)
510{
511 return __skb_datagram_iter(skb, offset, to, len, true,
512 hash_and_copy_to_iter, hash);
513}
514EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
515
516static size_t simple_copy_to_iter(const void *addr, size_t bytes,
517 void *data __always_unused, struct iov_iter *i)
518{
519 return copy_to_iter(addr, bytes, i);
520}
521
522
523
524
525
526
527
528
529int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
530 struct iov_iter *to, int len)
531{
532 trace_skb_copy_datagram_iovec(skb, len);
533 return __skb_datagram_iter(skb, offset, to, len, false,
534 simple_copy_to_iter, NULL);
535}
536EXPORT_SYMBOL(skb_copy_datagram_iter);
537
538
539
540
541
542
543
544
545
546
547int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
548 struct iov_iter *from,
549 int len)
550{
551 int start = skb_headlen(skb);
552 int i, copy = start - offset;
553 struct sk_buff *frag_iter;
554
555
556 if (copy > 0) {
557 if (copy > len)
558 copy = len;
559 if (copy_from_iter(skb->data + offset, copy, from) != copy)
560 goto fault;
561 if ((len -= copy) == 0)
562 return 0;
563 offset += copy;
564 }
565
566
567 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
568 int end;
569 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
570
571 WARN_ON(start > offset + len);
572
573 end = start + skb_frag_size(frag);
574 if ((copy = end - offset) > 0) {
575 size_t copied;
576
577 if (copy > len)
578 copy = len;
579 copied = copy_page_from_iter(skb_frag_page(frag),
580 skb_frag_off(frag) + offset - start,
581 copy, from);
582 if (copied != copy)
583 goto fault;
584
585 if (!(len -= copy))
586 return 0;
587 offset += copy;
588 }
589 start = end;
590 }
591
592 skb_walk_frags(skb, frag_iter) {
593 int end;
594
595 WARN_ON(start > offset + len);
596
597 end = start + frag_iter->len;
598 if ((copy = end - offset) > 0) {
599 if (copy > len)
600 copy = len;
601 if (skb_copy_datagram_from_iter(frag_iter,
602 offset - start,
603 from, copy))
604 goto fault;
605 if ((len -= copy) == 0)
606 return 0;
607 offset += copy;
608 }
609 start = end;
610 }
611 if (!len)
612 return 0;
613
614fault:
615 return -EFAULT;
616}
617EXPORT_SYMBOL(skb_copy_datagram_from_iter);
618
619int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
620 struct iov_iter *from, size_t length)
621{
622 int frag = skb_shinfo(skb)->nr_frags;
623
624 while (length && iov_iter_count(from)) {
625 struct page *pages[MAX_SKB_FRAGS];
626 struct page *last_head = NULL;
627 size_t start;
628 ssize_t copied;
629 unsigned long truesize;
630 int refs, n = 0;
631
632 if (frag == MAX_SKB_FRAGS)
633 return -EMSGSIZE;
634
635 copied = iov_iter_get_pages(from, pages, length,
636 MAX_SKB_FRAGS - frag, &start);
637 if (copied < 0)
638 return -EFAULT;
639
640 iov_iter_advance(from, copied);
641 length -= copied;
642
643 truesize = PAGE_ALIGN(copied + start);
644 skb->data_len += copied;
645 skb->len += copied;
646 skb->truesize += truesize;
647 if (sk && sk->sk_type == SOCK_STREAM) {
648 sk_wmem_queued_add(sk, truesize);
649 if (!skb_zcopy_pure(skb))
650 sk_mem_charge(sk, truesize);
651 } else {
652 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
653 }
654 for (refs = 0; copied != 0; start = 0) {
655 int size = min_t(int, copied, PAGE_SIZE - start);
656 struct page *head = compound_head(pages[n]);
657
658 start += (pages[n] - head) << PAGE_SHIFT;
659 copied -= size;
660 n++;
661 if (frag) {
662 skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
663
664 if (head == skb_frag_page(last) &&
665 start == skb_frag_off(last) + skb_frag_size(last)) {
666 skb_frag_size_add(last, size);
667
668
669
670
671
672 last_head = head;
673 refs++;
674 continue;
675 }
676 }
677 if (refs) {
678 page_ref_sub(last_head, refs);
679 refs = 0;
680 }
681 skb_fill_page_desc(skb, frag++, head, start, size);
682 }
683 if (refs)
684 page_ref_sub(last_head, refs);
685 }
686 return 0;
687}
688EXPORT_SYMBOL(__zerocopy_sg_from_iter);
689
690
691
692
693
694
695
696
697
698
699
700int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
701{
702 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
703
704
705 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
706 return -EFAULT;
707
708 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
709}
710EXPORT_SYMBOL(zerocopy_sg_from_iter);
711
712
713
714
715
716
717
718
719
720
721static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
722 struct iov_iter *to, int len,
723 __wsum *csump)
724{
725 struct csum_state csdata = { .csum = *csump };
726 int ret;
727
728 ret = __skb_datagram_iter(skb, offset, to, len, true,
729 csum_and_copy_to_iter, &csdata);
730 if (ret)
731 return ret;
732
733 *csump = csdata.csum;
734 return 0;
735}
736
737
738
739
740
741
742
743
744
745
746
747
748
749int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
750 int hlen, struct msghdr *msg)
751{
752 __wsum csum;
753 int chunk = skb->len - hlen;
754
755 if (!chunk)
756 return 0;
757
758 if (msg_data_left(msg) < chunk) {
759 if (__skb_checksum_complete(skb))
760 return -EINVAL;
761 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
762 goto fault;
763 } else {
764 csum = csum_partial(skb->data, hlen, skb->csum);
765 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
766 chunk, &csum))
767 goto fault;
768
769 if (csum_fold(csum)) {
770 iov_iter_revert(&msg->msg_iter, chunk);
771 return -EINVAL;
772 }
773
774 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
775 !skb->csum_complete_sw)
776 netdev_rx_csum_fault(NULL, skb);
777 }
778 return 0;
779fault:
780 return -EFAULT;
781}
782EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798__poll_t datagram_poll(struct file *file, struct socket *sock,
799 poll_table *wait)
800{
801 struct sock *sk = sock->sk;
802 __poll_t mask;
803
804 sock_poll_wait(file, sock, wait);
805 mask = 0;
806
807
808 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
809 mask |= EPOLLERR |
810 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
811
812 if (sk->sk_shutdown & RCV_SHUTDOWN)
813 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
814 if (sk->sk_shutdown == SHUTDOWN_MASK)
815 mask |= EPOLLHUP;
816
817
818 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
819 mask |= EPOLLIN | EPOLLRDNORM;
820
821
822 if (connection_based(sk)) {
823 if (sk->sk_state == TCP_CLOSE)
824 mask |= EPOLLHUP;
825
826 if (sk->sk_state == TCP_SYN_SENT)
827 return mask;
828 }
829
830
831 if (sock_writeable(sk))
832 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
833 else
834 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
835
836 return mask;
837}
838EXPORT_SYMBOL(datagram_poll);
839