1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/module.h>
37#include <linux/types.h>
38#include <linux/kernel.h>
39#include <asm/uaccess.h>
40#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/errno.h>
43#include <linux/sched.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/poll.h>
48#include <linux/highmem.h>
49#include <linux/spinlock.h>
50#include <linux/slab.h>
51#include <linux/pagemap.h>
52#include <linux/uio.h>
53
54#include <net/protocol.h>
55#include <linux/skbuff.h>
56
57#include <net/checksum.h>
58#include <net/sock.h>
59#include <net/tcp_states.h>
60#include <trace/events/skb.h>
61#include <net/busy_poll.h>
62
63
64
65
66static inline int connection_based(struct sock *sk)
67{
68 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
69}
70
71static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
72 void *key)
73{
74 unsigned long bits = (unsigned long)key;
75
76
77
78
79 if (bits && !(bits & (POLLIN | POLLERR)))
80 return 0;
81 return autoremove_wake_function(wait, mode, sync, key);
82}
83
84
85
86int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
87 const struct sk_buff *skb)
88{
89 int error;
90 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
91
92 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
93
94
95 error = sock_error(sk);
96 if (error)
97 goto out_err;
98
99 if (sk->sk_receive_queue.prev != skb)
100 goto out;
101
102
103 if (sk->sk_shutdown & RCV_SHUTDOWN)
104 goto out_noerr;
105
106
107
108
109 error = -ENOTCONN;
110 if (connection_based(sk) &&
111 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
112 goto out_err;
113
114
115 if (signal_pending(current))
116 goto interrupted;
117
118 error = 0;
119 *timeo_p = schedule_timeout(*timeo_p);
120out:
121 finish_wait(sk_sleep(sk), &wait);
122 return error;
123interrupted:
124 error = sock_intr_errno(*timeo_p);
125out_err:
126 *err = error;
127 goto out;
128out_noerr:
129 *err = 0;
130 error = 1;
131 goto out;
132}
133EXPORT_SYMBOL(__skb_wait_for_more_packets);
134
135static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
136{
137 struct sk_buff *nskb;
138
139 if (skb->peeked)
140 return skb;
141
142
143 if (!skb_shared(skb))
144 goto done;
145
146 nskb = skb_clone(skb, GFP_ATOMIC);
147 if (!nskb)
148 return ERR_PTR(-ENOMEM);
149
150 skb->prev->next = nskb;
151 skb->next->prev = nskb;
152 nskb->prev = skb->prev;
153 nskb->next = skb->next;
154
155 consume_skb(skb);
156 skb = nskb;
157
158done:
159 skb->peeked = 1;
160
161 return skb;
162}
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
200 int *peeked, int *off, int *err,
201 struct sk_buff **last)
202{
203 struct sk_buff_head *queue = &sk->sk_receive_queue;
204 struct sk_buff *skb;
205 unsigned long cpu_flags;
206
207
208
209 int error = sock_error(sk);
210
211 if (error)
212 goto no_packet;
213
214 do {
215
216
217
218
219
220
221 int _off = *off;
222
223 *last = (struct sk_buff *)queue;
224 spin_lock_irqsave(&queue->lock, cpu_flags);
225 skb_queue_walk(queue, skb) {
226 *last = skb;
227 *peeked = skb->peeked;
228 if (flags & MSG_PEEK) {
229 if (_off >= skb->len && (skb->len || _off ||
230 skb->peeked)) {
231 _off -= skb->len;
232 continue;
233 }
234
235 skb = skb_set_peeked(skb);
236 error = PTR_ERR(skb);
237 if (IS_ERR(skb)) {
238 spin_unlock_irqrestore(&queue->lock,
239 cpu_flags);
240 goto no_packet;
241 }
242
243 atomic_inc(&skb->users);
244 } else
245 __skb_unlink(skb, queue);
246
247 spin_unlock_irqrestore(&queue->lock, cpu_flags);
248 *off = _off;
249 return skb;
250 }
251
252 spin_unlock_irqrestore(&queue->lock, cpu_flags);
253 } while (sk_can_busy_loop(sk) &&
254 sk_busy_loop(sk, flags & MSG_DONTWAIT));
255
256 error = -EAGAIN;
257
258no_packet:
259 *err = error;
260 return NULL;
261}
262EXPORT_SYMBOL(__skb_try_recv_datagram);
263
264struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
265 int *peeked, int *off, int *err)
266{
267 struct sk_buff *skb, *last;
268 long timeo;
269
270 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
271
272 do {
273 skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
274 &last);
275 if (skb)
276 return skb;
277
278 if (*err != -EAGAIN)
279 break;
280 } while (timeo &&
281 !__skb_wait_for_more_packets(sk, err, &timeo, last));
282
283 return NULL;
284}
285EXPORT_SYMBOL(__skb_recv_datagram);
286
287struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
288 int noblock, int *err)
289{
290 int peeked, off = 0;
291
292 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
293 &peeked, &off, err);
294}
295EXPORT_SYMBOL(skb_recv_datagram);
296
297void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
298{
299 consume_skb(skb);
300 sk_mem_reclaim_partial(sk);
301}
302EXPORT_SYMBOL(skb_free_datagram);
303
304void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
305{
306 bool slow;
307
308 if (likely(atomic_read(&skb->users) == 1))
309 smp_rmb();
310 else if (likely(!atomic_dec_and_test(&skb->users))) {
311 sk_peek_offset_bwd(sk, len);
312 return;
313 }
314
315 slow = lock_sock_fast(sk);
316 sk_peek_offset_bwd(sk, len);
317 skb_orphan(skb);
318 sk_mem_reclaim_partial(sk);
319 unlock_sock_fast(sk, slow);
320
321
322 __kfree_skb(skb);
323}
324EXPORT_SYMBOL(__skb_free_datagram_locked);
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
348{
349 int err = 0;
350
351 if (flags & MSG_PEEK) {
352 err = -ENOENT;
353 spin_lock_bh(&sk->sk_receive_queue.lock);
354 if (skb == skb_peek(&sk->sk_receive_queue)) {
355 __skb_unlink(skb, &sk->sk_receive_queue);
356 atomic_dec(&skb->users);
357 err = 0;
358 }
359 spin_unlock_bh(&sk->sk_receive_queue.lock);
360 }
361
362 kfree_skb(skb);
363 atomic_inc(&sk->sk_drops);
364 sk_mem_reclaim_partial(sk);
365
366 return err;
367}
368EXPORT_SYMBOL(skb_kill_datagram);
369
370
371
372
373
374
375
376
377int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
378 struct iov_iter *to, int len)
379{
380 int start = skb_headlen(skb);
381 int i, copy = start - offset;
382 struct sk_buff *frag_iter;
383
384 trace_skb_copy_datagram_iovec(skb, len);
385
386
387 if (copy > 0) {
388 if (copy > len)
389 copy = len;
390 if (copy_to_iter(skb->data + offset, copy, to) != copy)
391 goto short_copy;
392 if ((len -= copy) == 0)
393 return 0;
394 offset += copy;
395 }
396
397
398 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
399 int end;
400 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
401
402 WARN_ON(start > offset + len);
403
404 end = start + skb_frag_size(frag);
405 if ((copy = end - offset) > 0) {
406 if (copy > len)
407 copy = len;
408 if (copy_page_to_iter(skb_frag_page(frag),
409 frag->page_offset + offset -
410 start, copy, to) != copy)
411 goto short_copy;
412 if (!(len -= copy))
413 return 0;
414 offset += copy;
415 }
416 start = end;
417 }
418
419 skb_walk_frags(skb, frag_iter) {
420 int end;
421
422 WARN_ON(start > offset + len);
423
424 end = start + frag_iter->len;
425 if ((copy = end - offset) > 0) {
426 if (copy > len)
427 copy = len;
428 if (skb_copy_datagram_iter(frag_iter, offset - start,
429 to, copy))
430 goto fault;
431 if ((len -= copy) == 0)
432 return 0;
433 offset += copy;
434 }
435 start = end;
436 }
437 if (!len)
438 return 0;
439
440
441
442
443
444
445fault:
446 return -EFAULT;
447
448short_copy:
449 if (iov_iter_count(to))
450 goto fault;
451
452 return 0;
453}
454EXPORT_SYMBOL(skb_copy_datagram_iter);
455
456
457
458
459
460
461
462
463
464
465int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
466 struct iov_iter *from,
467 int len)
468{
469 int start = skb_headlen(skb);
470 int i, copy = start - offset;
471 struct sk_buff *frag_iter;
472
473
474 if (copy > 0) {
475 if (copy > len)
476 copy = len;
477 if (copy_from_iter(skb->data + offset, copy, from) != copy)
478 goto fault;
479 if ((len -= copy) == 0)
480 return 0;
481 offset += copy;
482 }
483
484
485 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
486 int end;
487 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
488
489 WARN_ON(start > offset + len);
490
491 end = start + skb_frag_size(frag);
492 if ((copy = end - offset) > 0) {
493 size_t copied;
494
495 if (copy > len)
496 copy = len;
497 copied = copy_page_from_iter(skb_frag_page(frag),
498 frag->page_offset + offset - start,
499 copy, from);
500 if (copied != copy)
501 goto fault;
502
503 if (!(len -= copy))
504 return 0;
505 offset += copy;
506 }
507 start = end;
508 }
509
510 skb_walk_frags(skb, frag_iter) {
511 int end;
512
513 WARN_ON(start > offset + len);
514
515 end = start + frag_iter->len;
516 if ((copy = end - offset) > 0) {
517 if (copy > len)
518 copy = len;
519 if (skb_copy_datagram_from_iter(frag_iter,
520 offset - start,
521 from, copy))
522 goto fault;
523 if ((len -= copy) == 0)
524 return 0;
525 offset += copy;
526 }
527 start = end;
528 }
529 if (!len)
530 return 0;
531
532fault:
533 return -EFAULT;
534}
535EXPORT_SYMBOL(skb_copy_datagram_from_iter);
536
537
538
539
540
541
542
543
544
545
546
547int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
548{
549 int len = iov_iter_count(from);
550 int copy = min_t(int, skb_headlen(skb), len);
551 int frag = 0;
552
553
554 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
555 return -EFAULT;
556
557 while (iov_iter_count(from)) {
558 struct page *pages[MAX_SKB_FRAGS];
559 size_t start;
560 ssize_t copied;
561 unsigned long truesize;
562 int n = 0;
563
564 if (frag == MAX_SKB_FRAGS)
565 return -EMSGSIZE;
566
567 copied = iov_iter_get_pages(from, pages, ~0U,
568 MAX_SKB_FRAGS - frag, &start);
569 if (copied < 0)
570 return -EFAULT;
571
572 iov_iter_advance(from, copied);
573
574 truesize = PAGE_ALIGN(copied + start);
575 skb->data_len += copied;
576 skb->len += copied;
577 skb->truesize += truesize;
578 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
579 while (copied) {
580 int size = min_t(int, copied, PAGE_SIZE - start);
581 skb_fill_page_desc(skb, frag++, pages[n], start, size);
582 start = 0;
583 copied -= size;
584 n++;
585 }
586 }
587 return 0;
588}
589EXPORT_SYMBOL(zerocopy_sg_from_iter);
590
591static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
592 struct iov_iter *to, int len,
593 __wsum *csump)
594{
595 int start = skb_headlen(skb);
596 int i, copy = start - offset;
597 struct sk_buff *frag_iter;
598 int pos = 0;
599 int n;
600
601
602 if (copy > 0) {
603 if (copy > len)
604 copy = len;
605 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
606 if (n != copy)
607 goto fault;
608 if ((len -= copy) == 0)
609 return 0;
610 offset += copy;
611 pos = copy;
612 }
613
614 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
615 int end;
616 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
617
618 WARN_ON(start > offset + len);
619
620 end = start + skb_frag_size(frag);
621 if ((copy = end - offset) > 0) {
622 __wsum csum2 = 0;
623 struct page *page = skb_frag_page(frag);
624 u8 *vaddr = kmap(page);
625
626 if (copy > len)
627 copy = len;
628 n = csum_and_copy_to_iter(vaddr + frag->page_offset +
629 offset - start, copy,
630 &csum2, to);
631 kunmap(page);
632 if (n != copy)
633 goto fault;
634 *csump = csum_block_add(*csump, csum2, pos);
635 if (!(len -= copy))
636 return 0;
637 offset += copy;
638 pos += copy;
639 }
640 start = end;
641 }
642
643 skb_walk_frags(skb, frag_iter) {
644 int end;
645
646 WARN_ON(start > offset + len);
647
648 end = start + frag_iter->len;
649 if ((copy = end - offset) > 0) {
650 __wsum csum2 = 0;
651 if (copy > len)
652 copy = len;
653 if (skb_copy_and_csum_datagram(frag_iter,
654 offset - start,
655 to, copy,
656 &csum2))
657 goto fault;
658 *csump = csum_block_add(*csump, csum2, pos);
659 if ((len -= copy) == 0)
660 return 0;
661 offset += copy;
662 pos += copy;
663 }
664 start = end;
665 }
666 if (!len)
667 return 0;
668
669fault:
670 return -EFAULT;
671}
672
673__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
674{
675 __sum16 sum;
676
677 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
678 if (likely(!sum)) {
679 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
680 !skb->csum_complete_sw)
681 netdev_rx_csum_fault(skb->dev);
682 }
683 if (!skb_shared(skb))
684 skb->csum_valid = !sum;
685 return sum;
686}
687EXPORT_SYMBOL(__skb_checksum_complete_head);
688
689__sum16 __skb_checksum_complete(struct sk_buff *skb)
690{
691 __wsum csum;
692 __sum16 sum;
693
694 csum = skb_checksum(skb, 0, skb->len, 0);
695
696
697 sum = csum_fold(csum_add(skb->csum, csum));
698 if (likely(!sum)) {
699 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
700 !skb->csum_complete_sw)
701 netdev_rx_csum_fault(skb->dev);
702 }
703
704 if (!skb_shared(skb)) {
705
706 skb->csum = csum;
707 skb->ip_summed = CHECKSUM_COMPLETE;
708 skb->csum_complete_sw = 1;
709 skb->csum_valid = !sum;
710 }
711
712 return sum;
713}
714EXPORT_SYMBOL(__skb_checksum_complete);
715
716
717
718
719
720
721
722
723
724
725
726
727
728int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
729 int hlen, struct msghdr *msg)
730{
731 __wsum csum;
732 int chunk = skb->len - hlen;
733
734 if (!chunk)
735 return 0;
736
737 if (msg_data_left(msg) < chunk) {
738 if (__skb_checksum_complete(skb))
739 goto csum_error;
740 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
741 goto fault;
742 } else {
743 csum = csum_partial(skb->data, hlen, skb->csum);
744 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
745 chunk, &csum))
746 goto fault;
747 if (csum_fold(csum))
748 goto csum_error;
749 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
750 netdev_rx_csum_fault(skb->dev);
751 }
752 return 0;
753csum_error:
754 return -EINVAL;
755fault:
756 return -EFAULT;
757}
758EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774unsigned int datagram_poll(struct file *file, struct socket *sock,
775 poll_table *wait)
776{
777 struct sock *sk = sock->sk;
778 unsigned int mask;
779
780 sock_poll_wait(file, sk_sleep(sk), wait);
781 mask = 0;
782
783
784 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
785 mask |= POLLERR |
786 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
787
788 if (sk->sk_shutdown & RCV_SHUTDOWN)
789 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
790 if (sk->sk_shutdown == SHUTDOWN_MASK)
791 mask |= POLLHUP;
792
793
794 if (!skb_queue_empty(&sk->sk_receive_queue))
795 mask |= POLLIN | POLLRDNORM;
796
797
798 if (connection_based(sk)) {
799 if (sk->sk_state == TCP_CLOSE)
800 mask |= POLLHUP;
801
802 if (sk->sk_state == TCP_SYN_SENT)
803 return mask;
804 }
805
806
807 if (sock_writeable(sk))
808 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
809 else
810 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
811
812 return mask;
813}
814EXPORT_SYMBOL(datagram_poll);
815