1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/module.h>
37#include <linux/types.h>
38#include <linux/kernel.h>
39#include <asm/uaccess.h>
40#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/errno.h>
43#include <linux/sched.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/poll.h>
48#include <linux/highmem.h>
49#include <linux/spinlock.h>
50#include <linux/slab.h>
51#include <linux/pagemap.h>
52
53#include <net/protocol.h>
54#include <linux/skbuff.h>
55
56#include <net/checksum.h>
57#include <net/sock.h>
58#include <net/tcp_states.h>
59#include <trace/events/skb.h>
60#include <net/busy_poll.h>
61
62
63
64
65static inline int connection_based(struct sock *sk)
66{
67 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
68}
69
70static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
71 void *key)
72{
73 unsigned long bits = (unsigned long)key;
74
75
76
77
78 if (bits && !(bits & (POLLIN | POLLERR)))
79 return 0;
80 return autoremove_wake_function(wait, mode, sync, key);
81}
82
83
84
85static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
86 const struct sk_buff *skb)
87{
88 int error;
89 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
90
91 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
92
93
94 error = sock_error(sk);
95 if (error)
96 goto out_err;
97
98 if (sk->sk_receive_queue.prev != skb)
99 goto out;
100
101
102 if (sk->sk_shutdown & RCV_SHUTDOWN)
103 goto out_noerr;
104
105
106
107
108 error = -ENOTCONN;
109 if (connection_based(sk) &&
110 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
111 goto out_err;
112
113
114 if (signal_pending(current))
115 goto interrupted;
116
117 error = 0;
118 *timeo_p = schedule_timeout(*timeo_p);
119out:
120 finish_wait(sk_sleep(sk), &wait);
121 return error;
122interrupted:
123 error = sock_intr_errno(*timeo_p);
124out_err:
125 *err = error;
126 goto out;
127out_noerr:
128 *err = 0;
129 error = 1;
130 goto out;
131}
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
165 int *peeked, int *off, int *err)
166{
167 struct sk_buff *skb, *last;
168 long timeo;
169
170
171
172 int error = sock_error(sk);
173
174 if (error)
175 goto no_packet;
176
177 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
178
179 do {
180
181
182
183
184
185
186 unsigned long cpu_flags;
187 struct sk_buff_head *queue = &sk->sk_receive_queue;
188 int _off = *off;
189
190 last = (struct sk_buff *)queue;
191 spin_lock_irqsave(&queue->lock, cpu_flags);
192 skb_queue_walk(queue, skb) {
193 last = skb;
194 *peeked = skb->peeked;
195 if (flags & MSG_PEEK) {
196 if (_off >= skb->len && (skb->len || _off ||
197 skb->peeked)) {
198 _off -= skb->len;
199 continue;
200 }
201 skb->peeked = 1;
202 atomic_inc(&skb->users);
203 } else
204 __skb_unlink(skb, queue);
205
206 spin_unlock_irqrestore(&queue->lock, cpu_flags);
207 *off = _off;
208 return skb;
209 }
210 spin_unlock_irqrestore(&queue->lock, cpu_flags);
211
212 if (sk_can_busy_loop(sk) &&
213 sk_busy_loop(sk, flags & MSG_DONTWAIT))
214 continue;
215
216
217 error = -EAGAIN;
218 if (!timeo)
219 goto no_packet;
220
221 } while (!wait_for_more_packets(sk, err, &timeo, last));
222
223 return NULL;
224
225no_packet:
226 *err = error;
227 return NULL;
228}
229EXPORT_SYMBOL(__skb_recv_datagram);
230
231struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
232 int noblock, int *err)
233{
234 int peeked, off = 0;
235
236 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
237 &peeked, &off, err);
238}
239EXPORT_SYMBOL(skb_recv_datagram);
240
241void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
242{
243 consume_skb(skb);
244 sk_mem_reclaim_partial(sk);
245}
246EXPORT_SYMBOL(skb_free_datagram);
247
248void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
249{
250 bool slow;
251
252 if (likely(atomic_read(&skb->users) == 1))
253 smp_rmb();
254 else if (likely(!atomic_dec_and_test(&skb->users)))
255 return;
256
257 slow = lock_sock_fast(sk);
258 skb_orphan(skb);
259 sk_mem_reclaim_partial(sk);
260 unlock_sock_fast(sk, slow);
261
262
263 __kfree_skb(skb);
264}
265EXPORT_SYMBOL(skb_free_datagram_locked);
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
289{
290 int err = 0;
291
292 if (flags & MSG_PEEK) {
293 err = -ENOENT;
294 spin_lock_bh(&sk->sk_receive_queue.lock);
295 if (skb == skb_peek(&sk->sk_receive_queue)) {
296 __skb_unlink(skb, &sk->sk_receive_queue);
297 atomic_dec(&skb->users);
298 err = 0;
299 }
300 spin_unlock_bh(&sk->sk_receive_queue.lock);
301 }
302
303 kfree_skb(skb);
304 atomic_inc(&sk->sk_drops);
305 sk_mem_reclaim_partial(sk);
306
307 return err;
308}
309EXPORT_SYMBOL(skb_kill_datagram);
310
311
312
313
314
315
316
317
318
319
320int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
321 struct iovec *to, int len)
322{
323 int start = skb_headlen(skb);
324 int i, copy = start - offset;
325 struct sk_buff *frag_iter;
326
327 trace_skb_copy_datagram_iovec(skb, len);
328
329
330 if (copy > 0) {
331 if (copy > len)
332 copy = len;
333 if (memcpy_toiovec(to, skb->data + offset, copy))
334 goto fault;
335 if ((len -= copy) == 0)
336 return 0;
337 offset += copy;
338 }
339
340
341 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
342 int end;
343 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
344
345 WARN_ON(start > offset + len);
346
347 end = start + skb_frag_size(frag);
348 if ((copy = end - offset) > 0) {
349 int err;
350 u8 *vaddr;
351 struct page *page = skb_frag_page(frag);
352
353 if (copy > len)
354 copy = len;
355 vaddr = kmap(page);
356 err = memcpy_toiovec(to, vaddr + frag->page_offset +
357 offset - start, copy);
358 kunmap(page);
359 if (err)
360 goto fault;
361 if (!(len -= copy))
362 return 0;
363 offset += copy;
364 }
365 start = end;
366 }
367
368 skb_walk_frags(skb, frag_iter) {
369 int end;
370
371 WARN_ON(start > offset + len);
372
373 end = start + frag_iter->len;
374 if ((copy = end - offset) > 0) {
375 if (copy > len)
376 copy = len;
377 if (skb_copy_datagram_iovec(frag_iter,
378 offset - start,
379 to, copy))
380 goto fault;
381 if ((len -= copy) == 0)
382 return 0;
383 offset += copy;
384 }
385 start = end;
386 }
387 if (!len)
388 return 0;
389
390fault:
391 return -EFAULT;
392}
393EXPORT_SYMBOL(skb_copy_datagram_iovec);
394
395
396
397
398
399
400
401
402
403
404
405
406int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
407 const struct iovec *to, int to_offset,
408 int len)
409{
410 int start = skb_headlen(skb);
411 int i, copy = start - offset;
412 struct sk_buff *frag_iter;
413
414
415 if (copy > 0) {
416 if (copy > len)
417 copy = len;
418 if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
419 goto fault;
420 if ((len -= copy) == 0)
421 return 0;
422 offset += copy;
423 to_offset += copy;
424 }
425
426
427 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
428 int end;
429 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
430
431 WARN_ON(start > offset + len);
432
433 end = start + skb_frag_size(frag);
434 if ((copy = end - offset) > 0) {
435 int err;
436 u8 *vaddr;
437 struct page *page = skb_frag_page(frag);
438
439 if (copy > len)
440 copy = len;
441 vaddr = kmap(page);
442 err = memcpy_toiovecend(to, vaddr + frag->page_offset +
443 offset - start, to_offset, copy);
444 kunmap(page);
445 if (err)
446 goto fault;
447 if (!(len -= copy))
448 return 0;
449 offset += copy;
450 to_offset += copy;
451 }
452 start = end;
453 }
454
455 skb_walk_frags(skb, frag_iter) {
456 int end;
457
458 WARN_ON(start > offset + len);
459
460 end = start + frag_iter->len;
461 if ((copy = end - offset) > 0) {
462 if (copy > len)
463 copy = len;
464 if (skb_copy_datagram_const_iovec(frag_iter,
465 offset - start,
466 to, to_offset,
467 copy))
468 goto fault;
469 if ((len -= copy) == 0)
470 return 0;
471 offset += copy;
472 to_offset += copy;
473 }
474 start = end;
475 }
476 if (!len)
477 return 0;
478
479fault:
480 return -EFAULT;
481}
482EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
483
484
485
486
487
488
489
490
491
492
493
494
495int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
496 const struct iovec *from, int from_offset,
497 int len)
498{
499 int start = skb_headlen(skb);
500 int i, copy = start - offset;
501 struct sk_buff *frag_iter;
502
503
504 if (copy > 0) {
505 if (copy > len)
506 copy = len;
507 if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
508 copy))
509 goto fault;
510 if ((len -= copy) == 0)
511 return 0;
512 offset += copy;
513 from_offset += copy;
514 }
515
516
517 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
518 int end;
519 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
520
521 WARN_ON(start > offset + len);
522
523 end = start + skb_frag_size(frag);
524 if ((copy = end - offset) > 0) {
525 int err;
526 u8 *vaddr;
527 struct page *page = skb_frag_page(frag);
528
529 if (copy > len)
530 copy = len;
531 vaddr = kmap(page);
532 err = memcpy_fromiovecend(vaddr + frag->page_offset +
533 offset - start,
534 from, from_offset, copy);
535 kunmap(page);
536 if (err)
537 goto fault;
538
539 if (!(len -= copy))
540 return 0;
541 offset += copy;
542 from_offset += copy;
543 }
544 start = end;
545 }
546
547 skb_walk_frags(skb, frag_iter) {
548 int end;
549
550 WARN_ON(start > offset + len);
551
552 end = start + frag_iter->len;
553 if ((copy = end - offset) > 0) {
554 if (copy > len)
555 copy = len;
556 if (skb_copy_datagram_from_iovec(frag_iter,
557 offset - start,
558 from,
559 from_offset,
560 copy))
561 goto fault;
562 if ((len -= copy) == 0)
563 return 0;
564 offset += copy;
565 from_offset += copy;
566 }
567 start = end;
568 }
569 if (!len)
570 return 0;
571
572fault:
573 return -EFAULT;
574}
575EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
591 int offset, size_t count)
592{
593 int len = iov_length(from, count) - offset;
594 int copy = min_t(int, skb_headlen(skb), len);
595 int size;
596 int i = 0;
597
598
599 if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
600 return -EFAULT;
601
602 if (len == copy)
603 return 0;
604
605 offset += copy;
606 while (count--) {
607 struct page *page[MAX_SKB_FRAGS];
608 int num_pages;
609 unsigned long base;
610 unsigned long truesize;
611
612
613 if (offset >= from->iov_len) {
614 offset -= from->iov_len;
615 ++from;
616 continue;
617 }
618 len = from->iov_len - offset;
619 base = (unsigned long)from->iov_base + offset;
620 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
621 if (i + size > MAX_SKB_FRAGS)
622 return -EMSGSIZE;
623 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
624 if (num_pages != size) {
625 release_pages(&page[i], num_pages, 0);
626 return -EFAULT;
627 }
628 truesize = size * PAGE_SIZE;
629 skb->data_len += len;
630 skb->len += len;
631 skb->truesize += truesize;
632 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
633 while (len) {
634 int off = base & ~PAGE_MASK;
635 int size = min_t(int, len, PAGE_SIZE - off);
636 skb_fill_page_desc(skb, i, page[i], off, size);
637 base += size;
638 len -= size;
639 i++;
640 }
641 offset = 0;
642 ++from;
643 }
644 return 0;
645}
646EXPORT_SYMBOL(zerocopy_sg_from_iovec);
647
648static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
649 u8 __user *to, int len,
650 __wsum *csump)
651{
652 int start = skb_headlen(skb);
653 int i, copy = start - offset;
654 struct sk_buff *frag_iter;
655 int pos = 0;
656
657
658 if (copy > 0) {
659 int err = 0;
660 if (copy > len)
661 copy = len;
662 *csump = csum_and_copy_to_user(skb->data + offset, to, copy,
663 *csump, &err);
664 if (err)
665 goto fault;
666 if ((len -= copy) == 0)
667 return 0;
668 offset += copy;
669 to += copy;
670 pos = copy;
671 }
672
673 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
674 int end;
675 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
676
677 WARN_ON(start > offset + len);
678
679 end = start + skb_frag_size(frag);
680 if ((copy = end - offset) > 0) {
681 __wsum csum2;
682 int err = 0;
683 u8 *vaddr;
684 struct page *page = skb_frag_page(frag);
685
686 if (copy > len)
687 copy = len;
688 vaddr = kmap(page);
689 csum2 = csum_and_copy_to_user(vaddr +
690 frag->page_offset +
691 offset - start,
692 to, copy, 0, &err);
693 kunmap(page);
694 if (err)
695 goto fault;
696 *csump = csum_block_add(*csump, csum2, pos);
697 if (!(len -= copy))
698 return 0;
699 offset += copy;
700 to += copy;
701 pos += copy;
702 }
703 start = end;
704 }
705
706 skb_walk_frags(skb, frag_iter) {
707 int end;
708
709 WARN_ON(start > offset + len);
710
711 end = start + frag_iter->len;
712 if ((copy = end - offset) > 0) {
713 __wsum csum2 = 0;
714 if (copy > len)
715 copy = len;
716 if (skb_copy_and_csum_datagram(frag_iter,
717 offset - start,
718 to, copy,
719 &csum2))
720 goto fault;
721 *csump = csum_block_add(*csump, csum2, pos);
722 if ((len -= copy) == 0)
723 return 0;
724 offset += copy;
725 to += copy;
726 pos += copy;
727 }
728 start = end;
729 }
730 if (!len)
731 return 0;
732
733fault:
734 return -EFAULT;
735}
736
737__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
738{
739 __sum16 sum;
740
741 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
742 if (likely(!sum)) {
743 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
744 netdev_rx_csum_fault(skb->dev);
745 skb->ip_summed = CHECKSUM_UNNECESSARY;
746 }
747 return sum;
748}
749EXPORT_SYMBOL(__skb_checksum_complete_head);
750
751__sum16 __skb_checksum_complete(struct sk_buff *skb)
752{
753 return __skb_checksum_complete_head(skb, skb->len);
754}
755EXPORT_SYMBOL(__skb_checksum_complete);
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
771 int hlen, struct iovec *iov)
772{
773 __wsum csum;
774 int chunk = skb->len - hlen;
775
776 if (!chunk)
777 return 0;
778
779
780
781
782 while (!iov->iov_len)
783 iov++;
784
785 if (iov->iov_len < chunk) {
786 if (__skb_checksum_complete(skb))
787 goto csum_error;
788 if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
789 goto fault;
790 } else {
791 csum = csum_partial(skb->data, hlen, skb->csum);
792 if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
793 chunk, &csum))
794 goto fault;
795 if (csum_fold(csum))
796 goto csum_error;
797 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
798 netdev_rx_csum_fault(skb->dev);
799 iov->iov_len -= chunk;
800 iov->iov_base += chunk;
801 }
802 return 0;
803csum_error:
804 return -EINVAL;
805fault:
806 return -EFAULT;
807}
808EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824unsigned int datagram_poll(struct file *file, struct socket *sock,
825 poll_table *wait)
826{
827 struct sock *sk = sock->sk;
828 unsigned int mask;
829
830 sock_poll_wait(file, sk_sleep(sk), wait);
831 mask = 0;
832
833
834 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
835 mask |= POLLERR |
836 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
837
838 if (sk->sk_shutdown & RCV_SHUTDOWN)
839 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
840 if (sk->sk_shutdown == SHUTDOWN_MASK)
841 mask |= POLLHUP;
842
843
844 if (!skb_queue_empty(&sk->sk_receive_queue))
845 mask |= POLLIN | POLLRDNORM;
846
847
848 if (connection_based(sk)) {
849 if (sk->sk_state == TCP_CLOSE)
850 mask |= POLLHUP;
851
852 if (sk->sk_state == TCP_SYN_SENT)
853 return mask;
854 }
855
856
857 if (sock_writeable(sk))
858 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
859 else
860 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
861
862 return mask;
863}
864EXPORT_SYMBOL(datagram_poll);
865