1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/uaccess.h>
41#include <linux/mm.h>
42#include <linux/interrupt.h>
43#include <linux/errno.h>
44#include <linux/sched.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/rtnetlink.h>
48#include <linux/poll.h>
49#include <linux/highmem.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/pagemap.h>
53#include <linux/uio.h>
54#include <linux/indirect_call_wrapper.h>
55
56#include <net/protocol.h>
57#include <linux/skbuff.h>
58
59#include <net/checksum.h>
60#include <net/sock.h>
61#include <net/tcp_states.h>
62#include <trace/events/skb.h>
63#include <net/busy_poll.h>
64
65#include "datagram.h"
66
67
68
69
70static inline int connection_based(struct sock *sk)
71{
72 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
73}
74
75static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
76 void *key)
77{
78
79
80
81 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
82 return 0;
83 return autoremove_wake_function(wait, mode, sync, key);
84}
85
86
87
88int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
89 int *err, long *timeo_p,
90 const struct sk_buff *skb)
91{
92 int error;
93 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
94
95 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
96
97
98 error = sock_error(sk);
99 if (error)
100 goto out_err;
101
102 if (READ_ONCE(queue->prev) != skb)
103 goto out;
104
105
106 if (sk->sk_shutdown & RCV_SHUTDOWN)
107 goto out_noerr;
108
109
110
111
112 error = -ENOTCONN;
113 if (connection_based(sk) &&
114 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
115 goto out_err;
116
117
118 if (signal_pending(current))
119 goto interrupted;
120
121 error = 0;
122 *timeo_p = schedule_timeout(*timeo_p);
123out:
124 finish_wait(sk_sleep(sk), &wait);
125 return error;
126interrupted:
127 error = sock_intr_errno(*timeo_p);
128out_err:
129 *err = error;
130 goto out;
131out_noerr:
132 *err = 0;
133 error = 1;
134 goto out;
135}
136EXPORT_SYMBOL(__skb_wait_for_more_packets);
137
138static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
139{
140 struct sk_buff *nskb;
141
142 if (skb->peeked)
143 return skb;
144
145
146 if (!skb_shared(skb))
147 goto done;
148
149 nskb = skb_clone(skb, GFP_ATOMIC);
150 if (!nskb)
151 return ERR_PTR(-ENOMEM);
152
153 skb->prev->next = nskb;
154 skb->next->prev = nskb;
155 nskb->prev = skb->prev;
156 nskb->next = skb->next;
157
158 consume_skb(skb);
159 skb = nskb;
160
161done:
162 skb->peeked = 1;
163
164 return skb;
165}
166
167struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
168 struct sk_buff_head *queue,
169 unsigned int flags,
170 int *off, int *err,
171 struct sk_buff **last)
172{
173 bool peek_at_off = false;
174 struct sk_buff *skb;
175 int _off = 0;
176
177 if (unlikely(flags & MSG_PEEK && *off >= 0)) {
178 peek_at_off = true;
179 _off = *off;
180 }
181
182 *last = queue->prev;
183 skb_queue_walk(queue, skb) {
184 if (flags & MSG_PEEK) {
185 if (peek_at_off && _off >= skb->len &&
186 (_off || skb->peeked)) {
187 _off -= skb->len;
188 continue;
189 }
190 if (!skb->len) {
191 skb = skb_set_peeked(skb);
192 if (IS_ERR(skb)) {
193 *err = PTR_ERR(skb);
194 return NULL;
195 }
196 }
197 refcount_inc(&skb->users);
198 } else {
199 __skb_unlink(skb, queue);
200 }
201 *off = _off;
202 return skb;
203 }
204 return NULL;
205}
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
243 struct sk_buff_head *queue,
244 unsigned int flags, int *off, int *err,
245 struct sk_buff **last)
246{
247 struct sk_buff *skb;
248 unsigned long cpu_flags;
249
250
251
252 int error = sock_error(sk);
253
254 if (error)
255 goto no_packet;
256
257 do {
258
259
260
261
262
263
264 spin_lock_irqsave(&queue->lock, cpu_flags);
265 skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
266 last);
267 spin_unlock_irqrestore(&queue->lock, cpu_flags);
268 if (error)
269 goto no_packet;
270 if (skb)
271 return skb;
272
273 if (!sk_can_busy_loop(sk))
274 break;
275
276 sk_busy_loop(sk, flags & MSG_DONTWAIT);
277 } while (READ_ONCE(queue->prev) != *last);
278
279 error = -EAGAIN;
280
281no_packet:
282 *err = error;
283 return NULL;
284}
285EXPORT_SYMBOL(__skb_try_recv_datagram);
286
287struct sk_buff *__skb_recv_datagram(struct sock *sk,
288 struct sk_buff_head *sk_queue,
289 unsigned int flags, int *off, int *err)
290{
291 struct sk_buff *skb, *last;
292 long timeo;
293
294 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
295
296 do {
297 skb = __skb_try_recv_datagram(sk, sk_queue, flags, off, err,
298 &last);
299 if (skb)
300 return skb;
301
302 if (*err != -EAGAIN)
303 break;
304 } while (timeo &&
305 !__skb_wait_for_more_packets(sk, sk_queue, err,
306 &timeo, last));
307
308 return NULL;
309}
310EXPORT_SYMBOL(__skb_recv_datagram);
311
312struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
313 int noblock, int *err)
314{
315 int off = 0;
316
317 return __skb_recv_datagram(sk, &sk->sk_receive_queue,
318 flags | (noblock ? MSG_DONTWAIT : 0),
319 &off, err);
320}
321EXPORT_SYMBOL(skb_recv_datagram);
322
323void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
324{
325 consume_skb(skb);
326 sk_mem_reclaim_partial(sk);
327}
328EXPORT_SYMBOL(skb_free_datagram);
329
330void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
331{
332 bool slow;
333
334 if (!skb_unref(skb)) {
335 sk_peek_offset_bwd(sk, len);
336 return;
337 }
338
339 slow = lock_sock_fast(sk);
340 sk_peek_offset_bwd(sk, len);
341 skb_orphan(skb);
342 sk_mem_reclaim_partial(sk);
343 unlock_sock_fast(sk, slow);
344
345
346 __kfree_skb(skb);
347}
348EXPORT_SYMBOL(__skb_free_datagram_locked);
349
350int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
351 struct sk_buff *skb, unsigned int flags,
352 void (*destructor)(struct sock *sk,
353 struct sk_buff *skb))
354{
355 int err = 0;
356
357 if (flags & MSG_PEEK) {
358 err = -ENOENT;
359 spin_lock_bh(&sk_queue->lock);
360 if (skb->next) {
361 __skb_unlink(skb, sk_queue);
362 refcount_dec(&skb->users);
363 if (destructor)
364 destructor(sk, skb);
365 err = 0;
366 }
367 spin_unlock_bh(&sk_queue->lock);
368 }
369
370 atomic_inc(&sk->sk_drops);
371 return err;
372}
373EXPORT_SYMBOL(__sk_queue_drop_skb);
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
397{
398 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
399 NULL);
400
401 kfree_skb(skb);
402 sk_mem_reclaim_partial(sk);
403 return err;
404}
405EXPORT_SYMBOL(skb_kill_datagram);
406
407INDIRECT_CALLABLE_DECLARE(static size_t simple_copy_to_iter(const void *addr,
408 size_t bytes,
409 void *data __always_unused,
410 struct iov_iter *i));
411
412static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
413 struct iov_iter *to, int len, bool fault_short,
414 size_t (*cb)(const void *, size_t, void *,
415 struct iov_iter *), void *data)
416{
417 int start = skb_headlen(skb);
418 int i, copy = start - offset, start_off = offset, n;
419 struct sk_buff *frag_iter;
420
421
422 if (copy > 0) {
423 if (copy > len)
424 copy = len;
425 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
426 skb->data + offset, copy, data, to);
427 offset += n;
428 if (n != copy)
429 goto short_copy;
430 if ((len -= copy) == 0)
431 return 0;
432 }
433
434
435 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
436 int end;
437 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
438
439 WARN_ON(start > offset + len);
440
441 end = start + skb_frag_size(frag);
442 if ((copy = end - offset) > 0) {
443 struct page *page = skb_frag_page(frag);
444 u8 *vaddr = kmap(page);
445
446 if (copy > len)
447 copy = len;
448 n = INDIRECT_CALL_1(cb, simple_copy_to_iter,
449 vaddr + skb_frag_off(frag) + offset - start,
450 copy, data, to);
451 kunmap(page);
452 offset += n;
453 if (n != copy)
454 goto short_copy;
455 if (!(len -= copy))
456 return 0;
457 }
458 start = end;
459 }
460
461 skb_walk_frags(skb, frag_iter) {
462 int end;
463
464 WARN_ON(start > offset + len);
465
466 end = start + frag_iter->len;
467 if ((copy = end - offset) > 0) {
468 if (copy > len)
469 copy = len;
470 if (__skb_datagram_iter(frag_iter, offset - start,
471 to, copy, fault_short, cb, data))
472 goto fault;
473 if ((len -= copy) == 0)
474 return 0;
475 offset += copy;
476 }
477 start = end;
478 }
479 if (!len)
480 return 0;
481
482
483
484
485
486
487fault:
488 iov_iter_revert(to, offset - start_off);
489 return -EFAULT;
490
491short_copy:
492 if (fault_short || iov_iter_count(to))
493 goto fault;
494
495 return 0;
496}
497
498
499
500
501
502
503
504
505
506
507int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
508 struct iov_iter *to, int len,
509 struct ahash_request *hash)
510{
511 return __skb_datagram_iter(skb, offset, to, len, true,
512 hash_and_copy_to_iter, hash);
513}
514EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
515
516static size_t simple_copy_to_iter(const void *addr, size_t bytes,
517 void *data __always_unused, struct iov_iter *i)
518{
519 return copy_to_iter(addr, bytes, i);
520}
521
522
523
524
525
526
527
528
529int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
530 struct iov_iter *to, int len)
531{
532 trace_skb_copy_datagram_iovec(skb, len);
533 return __skb_datagram_iter(skb, offset, to, len, false,
534 simple_copy_to_iter, NULL);
535}
536EXPORT_SYMBOL(skb_copy_datagram_iter);
537
538
539
540
541
542
543
544
545
546
547int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
548 struct iov_iter *from,
549 int len)
550{
551 int start = skb_headlen(skb);
552 int i, copy = start - offset;
553 struct sk_buff *frag_iter;
554
555
556 if (copy > 0) {
557 if (copy > len)
558 copy = len;
559 if (copy_from_iter(skb->data + offset, copy, from) != copy)
560 goto fault;
561 if ((len -= copy) == 0)
562 return 0;
563 offset += copy;
564 }
565
566
567 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
568 int end;
569 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
570
571 WARN_ON(start > offset + len);
572
573 end = start + skb_frag_size(frag);
574 if ((copy = end - offset) > 0) {
575 size_t copied;
576
577 if (copy > len)
578 copy = len;
579 copied = copy_page_from_iter(skb_frag_page(frag),
580 skb_frag_off(frag) + offset - start,
581 copy, from);
582 if (copied != copy)
583 goto fault;
584
585 if (!(len -= copy))
586 return 0;
587 offset += copy;
588 }
589 start = end;
590 }
591
592 skb_walk_frags(skb, frag_iter) {
593 int end;
594
595 WARN_ON(start > offset + len);
596
597 end = start + frag_iter->len;
598 if ((copy = end - offset) > 0) {
599 if (copy > len)
600 copy = len;
601 if (skb_copy_datagram_from_iter(frag_iter,
602 offset - start,
603 from, copy))
604 goto fault;
605 if ((len -= copy) == 0)
606 return 0;
607 offset += copy;
608 }
609 start = end;
610 }
611 if (!len)
612 return 0;
613
614fault:
615 return -EFAULT;
616}
617EXPORT_SYMBOL(skb_copy_datagram_from_iter);
618
619int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
620 struct iov_iter *from, size_t length)
621{
622 int frag = skb_shinfo(skb)->nr_frags;
623
624 while (length && iov_iter_count(from)) {
625 struct page *pages[MAX_SKB_FRAGS];
626 struct page *last_head = NULL;
627 size_t start;
628 ssize_t copied;
629 unsigned long truesize;
630 int refs, n = 0;
631
632 if (frag == MAX_SKB_FRAGS)
633 return -EMSGSIZE;
634
635 copied = iov_iter_get_pages(from, pages, length,
636 MAX_SKB_FRAGS - frag, &start);
637 if (copied < 0)
638 return -EFAULT;
639
640 iov_iter_advance(from, copied);
641 length -= copied;
642
643 truesize = PAGE_ALIGN(copied + start);
644 skb->data_len += copied;
645 skb->len += copied;
646 skb->truesize += truesize;
647 if (sk && sk->sk_type == SOCK_STREAM) {
648 sk_wmem_queued_add(sk, truesize);
649 sk_mem_charge(sk, truesize);
650 } else {
651 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
652 }
653 for (refs = 0; copied != 0; start = 0) {
654 int size = min_t(int, copied, PAGE_SIZE - start);
655 struct page *head = compound_head(pages[n]);
656
657 start += (pages[n] - head) << PAGE_SHIFT;
658 copied -= size;
659 n++;
660 if (frag) {
661 skb_frag_t *last = &skb_shinfo(skb)->frags[frag - 1];
662
663 if (head == skb_frag_page(last) &&
664 start == skb_frag_off(last) + skb_frag_size(last)) {
665 skb_frag_size_add(last, size);
666
667
668
669
670
671 last_head = head;
672 refs++;
673 continue;
674 }
675 }
676 if (refs) {
677 page_ref_sub(last_head, refs);
678 refs = 0;
679 }
680 skb_fill_page_desc(skb, frag++, head, start, size);
681 }
682 if (refs)
683 page_ref_sub(last_head, refs);
684 }
685 return 0;
686}
687EXPORT_SYMBOL(__zerocopy_sg_from_iter);
688
689
690
691
692
693
694
695
696
697
698
699int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
700{
701 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
702
703
704 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
705 return -EFAULT;
706
707 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
708}
709EXPORT_SYMBOL(zerocopy_sg_from_iter);
710
711
712
713
714
715
716
717
718
719
720static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
721 struct iov_iter *to, int len,
722 __wsum *csump)
723{
724 return __skb_datagram_iter(skb, offset, to, len, true,
725 csum_and_copy_to_iter, csump);
726}
727
728
729
730
731
732
733
734
735
736
737
738
739
740int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
741 int hlen, struct msghdr *msg)
742{
743 __wsum csum;
744 int chunk = skb->len - hlen;
745
746 if (!chunk)
747 return 0;
748
749 if (msg_data_left(msg) < chunk) {
750 if (__skb_checksum_complete(skb))
751 return -EINVAL;
752 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
753 goto fault;
754 } else {
755 csum = csum_partial(skb->data, hlen, skb->csum);
756 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
757 chunk, &csum))
758 goto fault;
759
760 if (csum_fold(csum)) {
761 iov_iter_revert(&msg->msg_iter, chunk);
762 return -EINVAL;
763 }
764
765 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
766 !skb->csum_complete_sw)
767 netdev_rx_csum_fault(NULL, skb);
768 }
769 return 0;
770fault:
771 return -EFAULT;
772}
773EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789__poll_t datagram_poll(struct file *file, struct socket *sock,
790 poll_table *wait)
791{
792 struct sock *sk = sock->sk;
793 __poll_t mask;
794
795 sock_poll_wait(file, sock, wait);
796 mask = 0;
797
798
799 if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
800 mask |= EPOLLERR |
801 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
802
803 if (sk->sk_shutdown & RCV_SHUTDOWN)
804 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
805 if (sk->sk_shutdown == SHUTDOWN_MASK)
806 mask |= EPOLLHUP;
807
808
809 if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
810 mask |= EPOLLIN | EPOLLRDNORM;
811
812
813 if (connection_based(sk)) {
814 if (sk->sk_state == TCP_CLOSE)
815 mask |= EPOLLHUP;
816
817 if (sk->sk_state == TCP_SYN_SENT)
818 return mask;
819 }
820
821
822 if (sock_writeable(sk))
823 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
824 else
825 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
826
827 return mask;
828}
829EXPORT_SYMBOL(datagram_poll);
830