1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/module.h>
37#include <linux/types.h>
38#include <linux/kernel.h>
39#include <asm/uaccess.h>
40#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/errno.h>
43#include <linux/sched.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/poll.h>
48#include <linux/highmem.h>
49#include <linux/spinlock.h>
50#include <linux/slab.h>
51#include <linux/pagemap.h>
52#include <linux/uio.h>
53
54#include <net/protocol.h>
55#include <linux/skbuff.h>
56
57#include <net/checksum.h>
58#include <net/sock.h>
59#include <net/tcp_states.h>
60#include <trace/events/skb.h>
61#include <net/busy_poll.h>
62
63
64
65
66static inline int connection_based(struct sock *sk)
67{
68 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
69}
70
71static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
72 void *key)
73{
74 unsigned long bits = (unsigned long)key;
75
76
77
78
79 if (bits && !(bits & (POLLIN | POLLERR)))
80 return 0;
81 return autoremove_wake_function(wait, mode, sync, key);
82}
83
84
85
86static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
87 const struct sk_buff *skb)
88{
89 int error;
90 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
91
92 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
93
94
95 error = sock_error(sk);
96 if (error)
97 goto out_err;
98
99 if (sk->sk_receive_queue.prev != skb)
100 goto out;
101
102
103 if (sk->sk_shutdown & RCV_SHUTDOWN)
104 goto out_noerr;
105
106
107
108
109 error = -ENOTCONN;
110 if (connection_based(sk) &&
111 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
112 goto out_err;
113
114
115 if (signal_pending(current))
116 goto interrupted;
117
118 error = 0;
119 *timeo_p = schedule_timeout(*timeo_p);
120out:
121 finish_wait(sk_sleep(sk), &wait);
122 return error;
123interrupted:
124 error = sock_intr_errno(*timeo_p);
125out_err:
126 *err = error;
127 goto out;
128out_noerr:
129 *err = 0;
130 error = 1;
131 goto out;
132}
133
134static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
135{
136 struct sk_buff *nskb;
137
138 if (skb->peeked)
139 return skb;
140
141
142 if (!skb_shared(skb))
143 goto done;
144
145 nskb = skb_clone(skb, GFP_ATOMIC);
146 if (!nskb)
147 return ERR_PTR(-ENOMEM);
148
149 skb->prev->next = nskb;
150 skb->next->prev = nskb;
151 nskb->prev = skb->prev;
152 nskb->next = skb->next;
153
154 consume_skb(skb);
155 skb = nskb;
156
157done:
158 skb->peeked = 1;
159
160 return skb;
161}
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
195 int *peeked, int *off, int *err)
196{
197 struct sk_buff_head *queue = &sk->sk_receive_queue;
198 struct sk_buff *skb, *last;
199 unsigned long cpu_flags;
200 long timeo;
201
202
203
204 int error = sock_error(sk);
205
206 if (error)
207 goto no_packet;
208
209 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
210
211 do {
212
213
214
215
216
217
218 int _off = *off;
219
220 last = (struct sk_buff *)queue;
221 spin_lock_irqsave(&queue->lock, cpu_flags);
222 skb_queue_walk(queue, skb) {
223 last = skb;
224 *peeked = skb->peeked;
225 if (flags & MSG_PEEK) {
226 if (_off >= skb->len && (skb->len || _off ||
227 skb->peeked)) {
228 _off -= skb->len;
229 continue;
230 }
231
232 skb = skb_set_peeked(skb);
233 error = PTR_ERR(skb);
234 if (IS_ERR(skb))
235 goto unlock_err;
236
237 atomic_inc(&skb->users);
238 } else
239 __skb_unlink(skb, queue);
240
241 spin_unlock_irqrestore(&queue->lock, cpu_flags);
242 *off = _off;
243 return skb;
244 }
245 spin_unlock_irqrestore(&queue->lock, cpu_flags);
246
247 if (sk_can_busy_loop(sk) &&
248 sk_busy_loop(sk, flags & MSG_DONTWAIT))
249 continue;
250
251
252 error = -EAGAIN;
253 if (!timeo)
254 goto no_packet;
255
256 } while (!wait_for_more_packets(sk, err, &timeo, last));
257
258 return NULL;
259
260unlock_err:
261 spin_unlock_irqrestore(&queue->lock, cpu_flags);
262no_packet:
263 *err = error;
264 return NULL;
265}
266EXPORT_SYMBOL(__skb_recv_datagram);
267
268struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
269 int noblock, int *err)
270{
271 int peeked, off = 0;
272
273 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
274 &peeked, &off, err);
275}
276EXPORT_SYMBOL(skb_recv_datagram);
277
278void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
279{
280 consume_skb(skb);
281 sk_mem_reclaim_partial(sk);
282}
283EXPORT_SYMBOL(skb_free_datagram);
284
285void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
286{
287 bool slow;
288
289 if (likely(atomic_read(&skb->users) == 1))
290 smp_rmb();
291 else if (likely(!atomic_dec_and_test(&skb->users)))
292 return;
293
294 slow = lock_sock_fast(sk);
295 skb_orphan(skb);
296 sk_mem_reclaim_partial(sk);
297 unlock_sock_fast(sk, slow);
298
299
300 __kfree_skb(skb);
301}
302EXPORT_SYMBOL(skb_free_datagram_locked);
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
326{
327 int err = 0;
328
329 if (flags & MSG_PEEK) {
330 err = -ENOENT;
331 spin_lock_bh(&sk->sk_receive_queue.lock);
332 if (skb == skb_peek(&sk->sk_receive_queue)) {
333 __skb_unlink(skb, &sk->sk_receive_queue);
334 atomic_dec(&skb->users);
335 err = 0;
336 }
337 spin_unlock_bh(&sk->sk_receive_queue.lock);
338 }
339
340 kfree_skb(skb);
341 atomic_inc(&sk->sk_drops);
342 sk_mem_reclaim_partial(sk);
343
344 return err;
345}
346EXPORT_SYMBOL(skb_kill_datagram);
347
348
349
350
351
352
353
354
355int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
356 struct iov_iter *to, int len)
357{
358 int start = skb_headlen(skb);
359 int i, copy = start - offset;
360 struct sk_buff *frag_iter;
361
362 trace_skb_copy_datagram_iovec(skb, len);
363
364
365 if (copy > 0) {
366 if (copy > len)
367 copy = len;
368 if (copy_to_iter(skb->data + offset, copy, to) != copy)
369 goto short_copy;
370 if ((len -= copy) == 0)
371 return 0;
372 offset += copy;
373 }
374
375
376 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
377 int end;
378 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
379
380 WARN_ON(start > offset + len);
381
382 end = start + skb_frag_size(frag);
383 if ((copy = end - offset) > 0) {
384 if (copy > len)
385 copy = len;
386 if (copy_page_to_iter(skb_frag_page(frag),
387 frag->page_offset + offset -
388 start, copy, to) != copy)
389 goto short_copy;
390 if (!(len -= copy))
391 return 0;
392 offset += copy;
393 }
394 start = end;
395 }
396
397 skb_walk_frags(skb, frag_iter) {
398 int end;
399
400 WARN_ON(start > offset + len);
401
402 end = start + frag_iter->len;
403 if ((copy = end - offset) > 0) {
404 if (copy > len)
405 copy = len;
406 if (skb_copy_datagram_iter(frag_iter, offset - start,
407 to, copy))
408 goto fault;
409 if ((len -= copy) == 0)
410 return 0;
411 offset += copy;
412 }
413 start = end;
414 }
415 if (!len)
416 return 0;
417
418
419
420
421
422
423fault:
424 return -EFAULT;
425
426short_copy:
427 if (iov_iter_count(to))
428 goto fault;
429
430 return 0;
431}
432EXPORT_SYMBOL(skb_copy_datagram_iter);
433
434
435
436
437
438
439
440
441
442
443int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
444 struct iov_iter *from,
445 int len)
446{
447 int start = skb_headlen(skb);
448 int i, copy = start - offset;
449 struct sk_buff *frag_iter;
450
451
452 if (copy > 0) {
453 if (copy > len)
454 copy = len;
455 if (copy_from_iter(skb->data + offset, copy, from) != copy)
456 goto fault;
457 if ((len -= copy) == 0)
458 return 0;
459 offset += copy;
460 }
461
462
463 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
464 int end;
465 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
466
467 WARN_ON(start > offset + len);
468
469 end = start + skb_frag_size(frag);
470 if ((copy = end - offset) > 0) {
471 size_t copied;
472
473 if (copy > len)
474 copy = len;
475 copied = copy_page_from_iter(skb_frag_page(frag),
476 frag->page_offset + offset - start,
477 copy, from);
478 if (copied != copy)
479 goto fault;
480
481 if (!(len -= copy))
482 return 0;
483 offset += copy;
484 }
485 start = end;
486 }
487
488 skb_walk_frags(skb, frag_iter) {
489 int end;
490
491 WARN_ON(start > offset + len);
492
493 end = start + frag_iter->len;
494 if ((copy = end - offset) > 0) {
495 if (copy > len)
496 copy = len;
497 if (skb_copy_datagram_from_iter(frag_iter,
498 offset - start,
499 from, copy))
500 goto fault;
501 if ((len -= copy) == 0)
502 return 0;
503 offset += copy;
504 }
505 start = end;
506 }
507 if (!len)
508 return 0;
509
510fault:
511 return -EFAULT;
512}
513EXPORT_SYMBOL(skb_copy_datagram_from_iter);
514
515
516
517
518
519
520
521
522
523
524
525int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
526{
527 int len = iov_iter_count(from);
528 int copy = min_t(int, skb_headlen(skb), len);
529 int frag = 0;
530
531
532 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
533 return -EFAULT;
534
535 while (iov_iter_count(from)) {
536 struct page *pages[MAX_SKB_FRAGS];
537 size_t start;
538 ssize_t copied;
539 unsigned long truesize;
540 int n = 0;
541
542 if (frag == MAX_SKB_FRAGS)
543 return -EMSGSIZE;
544
545 copied = iov_iter_get_pages(from, pages, ~0U,
546 MAX_SKB_FRAGS - frag, &start);
547 if (copied < 0)
548 return -EFAULT;
549
550 iov_iter_advance(from, copied);
551
552 truesize = PAGE_ALIGN(copied + start);
553 skb->data_len += copied;
554 skb->len += copied;
555 skb->truesize += truesize;
556 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
557 while (copied) {
558 int size = min_t(int, copied, PAGE_SIZE - start);
559 skb_fill_page_desc(skb, frag++, pages[n], start, size);
560 start = 0;
561 copied -= size;
562 n++;
563 }
564 }
565 return 0;
566}
567EXPORT_SYMBOL(zerocopy_sg_from_iter);
568
569static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
570 struct iov_iter *to, int len,
571 __wsum *csump)
572{
573 int start = skb_headlen(skb);
574 int i, copy = start - offset;
575 struct sk_buff *frag_iter;
576 int pos = 0;
577 int n;
578
579
580 if (copy > 0) {
581 if (copy > len)
582 copy = len;
583 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
584 if (n != copy)
585 goto fault;
586 if ((len -= copy) == 0)
587 return 0;
588 offset += copy;
589 pos = copy;
590 }
591
592 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
593 int end;
594 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
595
596 WARN_ON(start > offset + len);
597
598 end = start + skb_frag_size(frag);
599 if ((copy = end - offset) > 0) {
600 __wsum csum2 = 0;
601 struct page *page = skb_frag_page(frag);
602 u8 *vaddr = kmap(page);
603
604 if (copy > len)
605 copy = len;
606 n = csum_and_copy_to_iter(vaddr + frag->page_offset +
607 offset - start, copy,
608 &csum2, to);
609 kunmap(page);
610 if (n != copy)
611 goto fault;
612 *csump = csum_block_add(*csump, csum2, pos);
613 if (!(len -= copy))
614 return 0;
615 offset += copy;
616 pos += copy;
617 }
618 start = end;
619 }
620
621 skb_walk_frags(skb, frag_iter) {
622 int end;
623
624 WARN_ON(start > offset + len);
625
626 end = start + frag_iter->len;
627 if ((copy = end - offset) > 0) {
628 __wsum csum2 = 0;
629 if (copy > len)
630 copy = len;
631 if (skb_copy_and_csum_datagram(frag_iter,
632 offset - start,
633 to, copy,
634 &csum2))
635 goto fault;
636 *csump = csum_block_add(*csump, csum2, pos);
637 if ((len -= copy) == 0)
638 return 0;
639 offset += copy;
640 pos += copy;
641 }
642 start = end;
643 }
644 if (!len)
645 return 0;
646
647fault:
648 return -EFAULT;
649}
650
651__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
652{
653 __sum16 sum;
654
655 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
656 if (likely(!sum)) {
657 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
658 !skb->csum_complete_sw)
659 netdev_rx_csum_fault(skb->dev);
660 }
661 if (!skb_shared(skb))
662 skb->csum_valid = !sum;
663 return sum;
664}
665EXPORT_SYMBOL(__skb_checksum_complete_head);
666
667__sum16 __skb_checksum_complete(struct sk_buff *skb)
668{
669 __wsum csum;
670 __sum16 sum;
671
672 csum = skb_checksum(skb, 0, skb->len, 0);
673
674
675 sum = csum_fold(csum_add(skb->csum, csum));
676 if (likely(!sum)) {
677 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
678 !skb->csum_complete_sw)
679 netdev_rx_csum_fault(skb->dev);
680 }
681
682 if (!skb_shared(skb)) {
683
684 skb->csum = csum;
685 skb->ip_summed = CHECKSUM_COMPLETE;
686 skb->csum_complete_sw = 1;
687 skb->csum_valid = !sum;
688 }
689
690 return sum;
691}
692EXPORT_SYMBOL(__skb_checksum_complete);
693
694
695
696
697
698
699
700
701
702
703
704
705
706int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
707 int hlen, struct msghdr *msg)
708{
709 __wsum csum;
710 int chunk = skb->len - hlen;
711
712 if (!chunk)
713 return 0;
714
715 if (msg_data_left(msg) < chunk) {
716 if (__skb_checksum_complete(skb))
717 goto csum_error;
718 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
719 goto fault;
720 } else {
721 csum = csum_partial(skb->data, hlen, skb->csum);
722 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
723 chunk, &csum))
724 goto fault;
725 if (csum_fold(csum))
726 goto csum_error;
727 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
728 netdev_rx_csum_fault(skb->dev);
729 }
730 return 0;
731csum_error:
732 return -EINVAL;
733fault:
734 return -EFAULT;
735}
736EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752unsigned int datagram_poll(struct file *file, struct socket *sock,
753 poll_table *wait)
754{
755 struct sock *sk = sock->sk;
756 unsigned int mask;
757
758 sock_poll_wait(file, sk_sleep(sk), wait);
759 mask = 0;
760
761
762 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
763 mask |= POLLERR |
764 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
765
766 if (sk->sk_shutdown & RCV_SHUTDOWN)
767 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
768 if (sk->sk_shutdown == SHUTDOWN_MASK)
769 mask |= POLLHUP;
770
771
772 if (!skb_queue_empty(&sk->sk_receive_queue))
773 mask |= POLLIN | POLLRDNORM;
774
775
776 if (connection_based(sk)) {
777 if (sk->sk_state == TCP_CLOSE)
778 mask |= POLLHUP;
779
780 if (sk->sk_state == TCP_SYN_SENT)
781 return mask;
782 }
783
784
785 if (sock_writeable(sk))
786 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
787 else
788 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
789
790 return mask;
791}
792EXPORT_SYMBOL(datagram_poll);
793