1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/uaccess.h>
41#include <linux/mm.h>
42#include <linux/interrupt.h>
43#include <linux/errno.h>
44#include <linux/sched.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/rtnetlink.h>
48#include <linux/poll.h>
49#include <linux/highmem.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/pagemap.h>
53#include <linux/uio.h>
54
55#include <net/protocol.h>
56#include <linux/skbuff.h>
57
58#include <net/checksum.h>
59#include <net/sock.h>
60#include <net/tcp_states.h>
61#include <trace/events/skb.h>
62#include <net/busy_poll.h>
63
64
65
66
67static inline int connection_based(struct sock *sk)
68{
69 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
70}
71
72static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
73 void *key)
74{
75 unsigned long bits = (unsigned long)key;
76
77
78
79
80 if (bits && !(bits & (POLLIN | POLLERR)))
81 return 0;
82 return autoremove_wake_function(wait, mode, sync, key);
83}
84
85
86
87int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
88 const struct sk_buff *skb)
89{
90 int error;
91 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
92
93 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
94
95
96 error = sock_error(sk);
97 if (error)
98 goto out_err;
99
100 if (sk->sk_receive_queue.prev != skb)
101 goto out;
102
103
104 if (sk->sk_shutdown & RCV_SHUTDOWN)
105 goto out_noerr;
106
107
108
109
110 error = -ENOTCONN;
111 if (connection_based(sk) &&
112 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
113 goto out_err;
114
115
116 if (signal_pending(current))
117 goto interrupted;
118
119 error = 0;
120 *timeo_p = schedule_timeout(*timeo_p);
121out:
122 finish_wait(sk_sleep(sk), &wait);
123 return error;
124interrupted:
125 error = sock_intr_errno(*timeo_p);
126out_err:
127 *err = error;
128 goto out;
129out_noerr:
130 *err = 0;
131 error = 1;
132 goto out;
133}
134EXPORT_SYMBOL(__skb_wait_for_more_packets);
135
136static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
137{
138 struct sk_buff *nskb;
139
140 if (skb->peeked)
141 return skb;
142
143
144 if (!skb_shared(skb))
145 goto done;
146
147 nskb = skb_clone(skb, GFP_ATOMIC);
148 if (!nskb)
149 return ERR_PTR(-ENOMEM);
150
151 skb->prev->next = nskb;
152 skb->next->prev = nskb;
153 nskb->prev = skb->prev;
154 nskb->next = skb->next;
155
156 consume_skb(skb);
157 skb = nskb;
158
159done:
160 skb->peeked = 1;
161
162 return skb;
163}
164
165struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
166 struct sk_buff_head *queue,
167 unsigned int flags,
168 void (*destructor)(struct sock *sk,
169 struct sk_buff *skb),
170 int *peeked, int *off, int *err,
171 struct sk_buff **last)
172{
173 bool peek_at_off = false;
174 struct sk_buff *skb;
175 int _off = 0;
176
177 if (unlikely(flags & MSG_PEEK && *off >= 0)) {
178 peek_at_off = true;
179 _off = *off;
180 }
181
182 *last = queue->prev;
183 skb_queue_walk(queue, skb) {
184 if (flags & MSG_PEEK) {
185 if (peek_at_off && _off >= skb->len &&
186 (_off || skb->peeked)) {
187 _off -= skb->len;
188 continue;
189 }
190 if (!skb->len) {
191 skb = skb_set_peeked(skb);
192 if (unlikely(IS_ERR(skb))) {
193 *err = PTR_ERR(skb);
194 return NULL;
195 }
196 }
197 *peeked = 1;
198 refcount_inc(&skb->users);
199 } else {
200 __skb_unlink(skb, queue);
201 if (destructor)
202 destructor(sk, skb);
203 }
204 *off = _off;
205 return skb;
206 }
207 return NULL;
208}
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
247 void (*destructor)(struct sock *sk,
248 struct sk_buff *skb),
249 int *peeked, int *off, int *err,
250 struct sk_buff **last)
251{
252 struct sk_buff_head *queue = &sk->sk_receive_queue;
253 struct sk_buff *skb;
254 unsigned long cpu_flags;
255
256
257
258 int error = sock_error(sk);
259
260 if (error)
261 goto no_packet;
262
263 *peeked = 0;
264 do {
265
266
267
268
269
270
271 spin_lock_irqsave(&queue->lock, cpu_flags);
272 skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
273 peeked, off, &error, last);
274 spin_unlock_irqrestore(&queue->lock, cpu_flags);
275 if (error)
276 goto no_packet;
277 if (skb)
278 return skb;
279
280 if (!sk_can_busy_loop(sk))
281 break;
282
283 sk_busy_loop(sk, flags & MSG_DONTWAIT);
284 } while (!skb_queue_empty(&sk->sk_receive_queue));
285
286 error = -EAGAIN;
287
288no_packet:
289 *err = error;
290 return NULL;
291}
292EXPORT_SYMBOL(__skb_try_recv_datagram);
293
294struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
295 void (*destructor)(struct sock *sk,
296 struct sk_buff *skb),
297 int *peeked, int *off, int *err)
298{
299 struct sk_buff *skb, *last;
300 long timeo;
301
302 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
303
304 do {
305 skb = __skb_try_recv_datagram(sk, flags, destructor, peeked,
306 off, err, &last);
307 if (skb)
308 return skb;
309
310 if (*err != -EAGAIN)
311 break;
312 } while (timeo &&
313 !__skb_wait_for_more_packets(sk, err, &timeo, last));
314
315 return NULL;
316}
317EXPORT_SYMBOL(__skb_recv_datagram);
318
319struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
320 int noblock, int *err)
321{
322 int peeked, off = 0;
323
324 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
325 NULL, &peeked, &off, err);
326}
327EXPORT_SYMBOL(skb_recv_datagram);
328
329void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
330{
331 consume_skb(skb);
332 sk_mem_reclaim_partial(sk);
333}
334EXPORT_SYMBOL(skb_free_datagram);
335
336void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
337{
338 bool slow;
339
340 if (!skb_unref(skb)) {
341 sk_peek_offset_bwd(sk, len);
342 return;
343 }
344
345 slow = lock_sock_fast(sk);
346 sk_peek_offset_bwd(sk, len);
347 skb_orphan(skb);
348 sk_mem_reclaim_partial(sk);
349 unlock_sock_fast(sk, slow);
350
351
352 __kfree_skb(skb);
353}
354EXPORT_SYMBOL(__skb_free_datagram_locked);
355
356int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
357 struct sk_buff *skb, unsigned int flags,
358 void (*destructor)(struct sock *sk,
359 struct sk_buff *skb))
360{
361 int err = 0;
362
363 if (flags & MSG_PEEK) {
364 err = -ENOENT;
365 spin_lock_bh(&sk_queue->lock);
366 if (skb->next) {
367 __skb_unlink(skb, sk_queue);
368 refcount_dec(&skb->users);
369 if (destructor)
370 destructor(sk, skb);
371 err = 0;
372 }
373 spin_unlock_bh(&sk_queue->lock);
374 }
375
376 atomic_inc(&sk->sk_drops);
377 return err;
378}
379EXPORT_SYMBOL(__sk_queue_drop_skb);
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
403{
404 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
405 NULL);
406
407 kfree_skb(skb);
408 sk_mem_reclaim_partial(sk);
409 return err;
410}
411EXPORT_SYMBOL(skb_kill_datagram);
412
413
414
415
416
417
418
419
420int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
421 struct iov_iter *to, int len)
422{
423 int start = skb_headlen(skb);
424 int i, copy = start - offset, start_off = offset, n;
425 struct sk_buff *frag_iter;
426
427 trace_skb_copy_datagram_iovec(skb, len);
428
429
430 if (copy > 0) {
431 if (copy > len)
432 copy = len;
433 n = copy_to_iter(skb->data + offset, copy, to);
434 offset += n;
435 if (n != copy)
436 goto short_copy;
437 if ((len -= copy) == 0)
438 return 0;
439 }
440
441
442 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
443 int end;
444 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
445
446 WARN_ON(start > offset + len);
447
448 end = start + skb_frag_size(frag);
449 if ((copy = end - offset) > 0) {
450 if (copy > len)
451 copy = len;
452 n = copy_page_to_iter(skb_frag_page(frag),
453 frag->page_offset + offset -
454 start, copy, to);
455 offset += n;
456 if (n != copy)
457 goto short_copy;
458 if (!(len -= copy))
459 return 0;
460 }
461 start = end;
462 }
463
464 skb_walk_frags(skb, frag_iter) {
465 int end;
466
467 WARN_ON(start > offset + len);
468
469 end = start + frag_iter->len;
470 if ((copy = end - offset) > 0) {
471 if (copy > len)
472 copy = len;
473 if (skb_copy_datagram_iter(frag_iter, offset - start,
474 to, copy))
475 goto fault;
476 if ((len -= copy) == 0)
477 return 0;
478 offset += copy;
479 }
480 start = end;
481 }
482 if (!len)
483 return 0;
484
485
486
487
488
489
490fault:
491 iov_iter_revert(to, offset - start_off);
492 return -EFAULT;
493
494short_copy:
495 if (iov_iter_count(to))
496 goto fault;
497
498 return 0;
499}
500EXPORT_SYMBOL(skb_copy_datagram_iter);
501
502
503
504
505
506
507
508
509
510
511int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
512 struct iov_iter *from,
513 int len)
514{
515 int start = skb_headlen(skb);
516 int i, copy = start - offset;
517 struct sk_buff *frag_iter;
518
519
520 if (copy > 0) {
521 if (copy > len)
522 copy = len;
523 if (copy_from_iter(skb->data + offset, copy, from) != copy)
524 goto fault;
525 if ((len -= copy) == 0)
526 return 0;
527 offset += copy;
528 }
529
530
531 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
532 int end;
533 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
534
535 WARN_ON(start > offset + len);
536
537 end = start + skb_frag_size(frag);
538 if ((copy = end - offset) > 0) {
539 size_t copied;
540
541 if (copy > len)
542 copy = len;
543 copied = copy_page_from_iter(skb_frag_page(frag),
544 frag->page_offset + offset - start,
545 copy, from);
546 if (copied != copy)
547 goto fault;
548
549 if (!(len -= copy))
550 return 0;
551 offset += copy;
552 }
553 start = end;
554 }
555
556 skb_walk_frags(skb, frag_iter) {
557 int end;
558
559 WARN_ON(start > offset + len);
560
561 end = start + frag_iter->len;
562 if ((copy = end - offset) > 0) {
563 if (copy > len)
564 copy = len;
565 if (skb_copy_datagram_from_iter(frag_iter,
566 offset - start,
567 from, copy))
568 goto fault;
569 if ((len -= copy) == 0)
570 return 0;
571 offset += copy;
572 }
573 start = end;
574 }
575 if (!len)
576 return 0;
577
578fault:
579 return -EFAULT;
580}
581EXPORT_SYMBOL(skb_copy_datagram_from_iter);
582
583int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
584 struct iov_iter *from, size_t length)
585{
586 int frag = skb_shinfo(skb)->nr_frags;
587
588 while (length && iov_iter_count(from)) {
589 struct page *pages[MAX_SKB_FRAGS];
590 size_t start;
591 ssize_t copied;
592 unsigned long truesize;
593 int n = 0;
594
595 if (frag == MAX_SKB_FRAGS)
596 return -EMSGSIZE;
597
598 copied = iov_iter_get_pages(from, pages, length,
599 MAX_SKB_FRAGS - frag, &start);
600 if (copied < 0)
601 return -EFAULT;
602
603 iov_iter_advance(from, copied);
604 length -= copied;
605
606 truesize = PAGE_ALIGN(copied + start);
607 skb->data_len += copied;
608 skb->len += copied;
609 skb->truesize += truesize;
610 if (sk && sk->sk_type == SOCK_STREAM) {
611 sk->sk_wmem_queued += truesize;
612 sk_mem_charge(sk, truesize);
613 } else {
614 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
615 }
616 while (copied) {
617 int size = min_t(int, copied, PAGE_SIZE - start);
618 skb_fill_page_desc(skb, frag++, pages[n], start, size);
619 start = 0;
620 copied -= size;
621 n++;
622 }
623 }
624 return 0;
625}
626EXPORT_SYMBOL(__zerocopy_sg_from_iter);
627
628
629
630
631
632
633
634
635
636
637
638int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
639{
640 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
641
642
643 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
644 return -EFAULT;
645
646 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
647}
648EXPORT_SYMBOL(zerocopy_sg_from_iter);
649
650static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
651 struct iov_iter *to, int len,
652 __wsum *csump)
653{
654 int start = skb_headlen(skb);
655 int i, copy = start - offset, start_off = offset;
656 struct sk_buff *frag_iter;
657 int pos = 0;
658 int n;
659
660
661 if (copy > 0) {
662 if (copy > len)
663 copy = len;
664 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
665 offset += n;
666 if (n != copy)
667 goto fault;
668 if ((len -= copy) == 0)
669 return 0;
670 pos = copy;
671 }
672
673 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
674 int end;
675 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
676
677 WARN_ON(start > offset + len);
678
679 end = start + skb_frag_size(frag);
680 if ((copy = end - offset) > 0) {
681 __wsum csum2 = 0;
682 struct page *page = skb_frag_page(frag);
683 u8 *vaddr = kmap(page);
684
685 if (copy > len)
686 copy = len;
687 n = csum_and_copy_to_iter(vaddr + frag->page_offset +
688 offset - start, copy,
689 &csum2, to);
690 kunmap(page);
691 offset += n;
692 if (n != copy)
693 goto fault;
694 *csump = csum_block_add(*csump, csum2, pos);
695 if (!(len -= copy))
696 return 0;
697 pos += copy;
698 }
699 start = end;
700 }
701
702 skb_walk_frags(skb, frag_iter) {
703 int end;
704
705 WARN_ON(start > offset + len);
706
707 end = start + frag_iter->len;
708 if ((copy = end - offset) > 0) {
709 __wsum csum2 = 0;
710 if (copy > len)
711 copy = len;
712 if (skb_copy_and_csum_datagram(frag_iter,
713 offset - start,
714 to, copy,
715 &csum2))
716 goto fault;
717 *csump = csum_block_add(*csump, csum2, pos);
718 if ((len -= copy) == 0)
719 return 0;
720 offset += copy;
721 pos += copy;
722 }
723 start = end;
724 }
725 if (!len)
726 return 0;
727
728fault:
729 iov_iter_revert(to, offset - start_off);
730 return -EFAULT;
731}
732
733__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
734{
735 __sum16 sum;
736
737 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
738 if (likely(!sum)) {
739 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
740 !skb->csum_complete_sw)
741 netdev_rx_csum_fault(skb->dev);
742 }
743 if (!skb_shared(skb))
744 skb->csum_valid = !sum;
745 return sum;
746}
747EXPORT_SYMBOL(__skb_checksum_complete_head);
748
749__sum16 __skb_checksum_complete(struct sk_buff *skb)
750{
751 __wsum csum;
752 __sum16 sum;
753
754 csum = skb_checksum(skb, 0, skb->len, 0);
755
756
757 sum = csum_fold(csum_add(skb->csum, csum));
758 if (likely(!sum)) {
759 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
760 !skb->csum_complete_sw)
761 netdev_rx_csum_fault(skb->dev);
762 }
763
764 if (!skb_shared(skb)) {
765
766 skb->csum = csum;
767 skb->ip_summed = CHECKSUM_COMPLETE;
768 skb->csum_complete_sw = 1;
769 skb->csum_valid = !sum;
770 }
771
772 return sum;
773}
774EXPORT_SYMBOL(__skb_checksum_complete);
775
776
777
778
779
780
781
782
783
784
785
786
787
788int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
789 int hlen, struct msghdr *msg)
790{
791 __wsum csum;
792 int chunk = skb->len - hlen;
793
794 if (!chunk)
795 return 0;
796
797 if (msg_data_left(msg) < chunk) {
798 if (__skb_checksum_complete(skb))
799 return -EINVAL;
800 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
801 goto fault;
802 } else {
803 csum = csum_partial(skb->data, hlen, skb->csum);
804 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
805 chunk, &csum))
806 goto fault;
807
808 if (csum_fold(csum)) {
809 iov_iter_revert(&msg->msg_iter, chunk);
810 return -EINVAL;
811 }
812
813 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
814 netdev_rx_csum_fault(skb->dev);
815 }
816 return 0;
817fault:
818 return -EFAULT;
819}
820EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836unsigned int datagram_poll(struct file *file, struct socket *sock,
837 poll_table *wait)
838{
839 struct sock *sk = sock->sk;
840 unsigned int mask;
841
842 sock_poll_wait(file, sk_sleep(sk), wait);
843 mask = 0;
844
845
846 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
847 mask |= POLLERR |
848 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
849
850 if (sk->sk_shutdown & RCV_SHUTDOWN)
851 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
852 if (sk->sk_shutdown == SHUTDOWN_MASK)
853 mask |= POLLHUP;
854
855
856 if (!skb_queue_empty(&sk->sk_receive_queue))
857 mask |= POLLIN | POLLRDNORM;
858
859
860 if (connection_based(sk)) {
861 if (sk->sk_state == TCP_CLOSE)
862 mask |= POLLHUP;
863
864 if (sk->sk_state == TCP_SYN_SENT)
865 return mask;
866 }
867
868
869 if (sock_writeable(sk))
870 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
871 else
872 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
873
874 return mask;
875}
876EXPORT_SYMBOL(datagram_poll);
877