1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#include <linux/module.h>
38#include <linux/types.h>
39#include <linux/kernel.h>
40#include <linux/uaccess.h>
41#include <linux/mm.h>
42#include <linux/interrupt.h>
43#include <linux/errno.h>
44#include <linux/sched.h>
45#include <linux/inet.h>
46#include <linux/netdevice.h>
47#include <linux/rtnetlink.h>
48#include <linux/poll.h>
49#include <linux/highmem.h>
50#include <linux/spinlock.h>
51#include <linux/slab.h>
52#include <linux/pagemap.h>
53#include <linux/uio.h>
54
55#include <net/protocol.h>
56#include <linux/skbuff.h>
57
58#include <net/checksum.h>
59#include <net/sock.h>
60#include <net/tcp_states.h>
61#include <trace/events/skb.h>
62#include <net/busy_poll.h>
63
64#include "datagram.h"
65
66
67
68
69static inline int connection_based(struct sock *sk)
70{
71 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
72}
73
74static int receiver_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
75 void *key)
76{
77
78
79
80 if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
81 return 0;
82 return autoremove_wake_function(wait, mode, sync, key);
83}
84
85
86
87int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
88 const struct sk_buff *skb)
89{
90 int error;
91 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
92
93 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
94
95
96 error = sock_error(sk);
97 if (error)
98 goto out_err;
99
100 if (sk->sk_receive_queue.prev != skb)
101 goto out;
102
103
104 if (sk->sk_shutdown & RCV_SHUTDOWN)
105 goto out_noerr;
106
107
108
109
110 error = -ENOTCONN;
111 if (connection_based(sk) &&
112 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
113 goto out_err;
114
115
116 if (signal_pending(current))
117 goto interrupted;
118
119 error = 0;
120 *timeo_p = schedule_timeout(*timeo_p);
121out:
122 finish_wait(sk_sleep(sk), &wait);
123 return error;
124interrupted:
125 error = sock_intr_errno(*timeo_p);
126out_err:
127 *err = error;
128 goto out;
129out_noerr:
130 *err = 0;
131 error = 1;
132 goto out;
133}
134EXPORT_SYMBOL(__skb_wait_for_more_packets);
135
136static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
137{
138 struct sk_buff *nskb;
139
140 if (skb->peeked)
141 return skb;
142
143
144 if (!skb_shared(skb))
145 goto done;
146
147 nskb = skb_clone(skb, GFP_ATOMIC);
148 if (!nskb)
149 return ERR_PTR(-ENOMEM);
150
151 skb->prev->next = nskb;
152 skb->next->prev = nskb;
153 nskb->prev = skb->prev;
154 nskb->next = skb->next;
155
156 consume_skb(skb);
157 skb = nskb;
158
159done:
160 skb->peeked = 1;
161
162 return skb;
163}
164
165struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
166 struct sk_buff_head *queue,
167 unsigned int flags,
168 void (*destructor)(struct sock *sk,
169 struct sk_buff *skb),
170 int *off, int *err,
171 struct sk_buff **last)
172{
173 bool peek_at_off = false;
174 struct sk_buff *skb;
175 int _off = 0;
176
177 if (unlikely(flags & MSG_PEEK && *off >= 0)) {
178 peek_at_off = true;
179 _off = *off;
180 }
181
182 *last = queue->prev;
183 skb_queue_walk(queue, skb) {
184 if (flags & MSG_PEEK) {
185 if (peek_at_off && _off >= skb->len &&
186 (_off || skb->peeked)) {
187 _off -= skb->len;
188 continue;
189 }
190 if (!skb->len) {
191 skb = skb_set_peeked(skb);
192 if (IS_ERR(skb)) {
193 *err = PTR_ERR(skb);
194 return NULL;
195 }
196 }
197 refcount_inc(&skb->users);
198 } else {
199 __skb_unlink(skb, queue);
200 if (destructor)
201 destructor(sk, skb);
202 }
203 *off = _off;
204 return skb;
205 }
206 return NULL;
207}
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
245 void (*destructor)(struct sock *sk,
246 struct sk_buff *skb),
247 int *off, int *err,
248 struct sk_buff **last)
249{
250 struct sk_buff_head *queue = &sk->sk_receive_queue;
251 struct sk_buff *skb;
252 unsigned long cpu_flags;
253
254
255
256 int error = sock_error(sk);
257
258 if (error)
259 goto no_packet;
260
261 do {
262
263
264
265
266
267
268 spin_lock_irqsave(&queue->lock, cpu_flags);
269 skb = __skb_try_recv_from_queue(sk, queue, flags, destructor,
270 off, &error, last);
271 spin_unlock_irqrestore(&queue->lock, cpu_flags);
272 if (error)
273 goto no_packet;
274 if (skb)
275 return skb;
276
277 if (!sk_can_busy_loop(sk))
278 break;
279
280 sk_busy_loop(sk, flags & MSG_DONTWAIT);
281 } while (sk->sk_receive_queue.prev != *last);
282
283 error = -EAGAIN;
284
285no_packet:
286 *err = error;
287 return NULL;
288}
289EXPORT_SYMBOL(__skb_try_recv_datagram);
290
291struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
292 void (*destructor)(struct sock *sk,
293 struct sk_buff *skb),
294 int *off, int *err)
295{
296 struct sk_buff *skb, *last;
297 long timeo;
298
299 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
300
301 do {
302 skb = __skb_try_recv_datagram(sk, flags, destructor, off, err,
303 &last);
304 if (skb)
305 return skb;
306
307 if (*err != -EAGAIN)
308 break;
309 } while (timeo &&
310 !__skb_wait_for_more_packets(sk, err, &timeo, last));
311
312 return NULL;
313}
314EXPORT_SYMBOL(__skb_recv_datagram);
315
316struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
317 int noblock, int *err)
318{
319 int off = 0;
320
321 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
322 NULL, &off, err);
323}
324EXPORT_SYMBOL(skb_recv_datagram);
325
326void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
327{
328 consume_skb(skb);
329 sk_mem_reclaim_partial(sk);
330}
331EXPORT_SYMBOL(skb_free_datagram);
332
333void __skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb, int len)
334{
335 bool slow;
336
337 if (!skb_unref(skb)) {
338 sk_peek_offset_bwd(sk, len);
339 return;
340 }
341
342 slow = lock_sock_fast(sk);
343 sk_peek_offset_bwd(sk, len);
344 skb_orphan(skb);
345 sk_mem_reclaim_partial(sk);
346 unlock_sock_fast(sk, slow);
347
348
349 __kfree_skb(skb);
350}
351EXPORT_SYMBOL(__skb_free_datagram_locked);
352
353int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
354 struct sk_buff *skb, unsigned int flags,
355 void (*destructor)(struct sock *sk,
356 struct sk_buff *skb))
357{
358 int err = 0;
359
360 if (flags & MSG_PEEK) {
361 err = -ENOENT;
362 spin_lock_bh(&sk_queue->lock);
363 if (skb->next) {
364 __skb_unlink(skb, sk_queue);
365 refcount_dec(&skb->users);
366 if (destructor)
367 destructor(sk, skb);
368 err = 0;
369 }
370 spin_unlock_bh(&sk_queue->lock);
371 }
372
373 atomic_inc(&sk->sk_drops);
374 return err;
375}
376EXPORT_SYMBOL(__sk_queue_drop_skb);
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
400{
401 int err = __sk_queue_drop_skb(sk, &sk->sk_receive_queue, skb, flags,
402 NULL);
403
404 kfree_skb(skb);
405 sk_mem_reclaim_partial(sk);
406 return err;
407}
408EXPORT_SYMBOL(skb_kill_datagram);
409
410static int __skb_datagram_iter(const struct sk_buff *skb, int offset,
411 struct iov_iter *to, int len, bool fault_short,
412 size_t (*cb)(const void *, size_t, void *,
413 struct iov_iter *), void *data)
414{
415 int start = skb_headlen(skb);
416 int i, copy = start - offset, start_off = offset, n;
417 struct sk_buff *frag_iter;
418
419
420 if (copy > 0) {
421 if (copy > len)
422 copy = len;
423 n = cb(skb->data + offset, copy, data, to);
424 offset += n;
425 if (n != copy)
426 goto short_copy;
427 if ((len -= copy) == 0)
428 return 0;
429 }
430
431
432 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
433 int end;
434 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
435
436 WARN_ON(start > offset + len);
437
438 end = start + skb_frag_size(frag);
439 if ((copy = end - offset) > 0) {
440 struct page *page = skb_frag_page(frag);
441 u8 *vaddr = kmap(page);
442
443 if (copy > len)
444 copy = len;
445 n = cb(vaddr + frag->page_offset +
446 offset - start, copy, data, to);
447 kunmap(page);
448 offset += n;
449 if (n != copy)
450 goto short_copy;
451 if (!(len -= copy))
452 return 0;
453 }
454 start = end;
455 }
456
457 skb_walk_frags(skb, frag_iter) {
458 int end;
459
460 WARN_ON(start > offset + len);
461
462 end = start + frag_iter->len;
463 if ((copy = end - offset) > 0) {
464 if (copy > len)
465 copy = len;
466 if (__skb_datagram_iter(frag_iter, offset - start,
467 to, copy, fault_short, cb, data))
468 goto fault;
469 if ((len -= copy) == 0)
470 return 0;
471 offset += copy;
472 }
473 start = end;
474 }
475 if (!len)
476 return 0;
477
478
479
480
481
482
483fault:
484 iov_iter_revert(to, offset - start_off);
485 return -EFAULT;
486
487short_copy:
488 if (fault_short || iov_iter_count(to))
489 goto fault;
490
491 return 0;
492}
493
494
495
496
497
498
499
500
501
502
503int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset,
504 struct iov_iter *to, int len,
505 struct ahash_request *hash)
506{
507 return __skb_datagram_iter(skb, offset, to, len, true,
508 hash_and_copy_to_iter, hash);
509}
510EXPORT_SYMBOL(skb_copy_and_hash_datagram_iter);
511
512static size_t simple_copy_to_iter(const void *addr, size_t bytes,
513 void *data __always_unused, struct iov_iter *i)
514{
515 return copy_to_iter(addr, bytes, i);
516}
517
518
519
520
521
522
523
524
525int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
526 struct iov_iter *to, int len)
527{
528 trace_skb_copy_datagram_iovec(skb, len);
529 return __skb_datagram_iter(skb, offset, to, len, false,
530 simple_copy_to_iter, NULL);
531}
532EXPORT_SYMBOL(skb_copy_datagram_iter);
533
534
535
536
537
538
539
540
541
542
543int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
544 struct iov_iter *from,
545 int len)
546{
547 int start = skb_headlen(skb);
548 int i, copy = start - offset;
549 struct sk_buff *frag_iter;
550
551
552 if (copy > 0) {
553 if (copy > len)
554 copy = len;
555 if (copy_from_iter(skb->data + offset, copy, from) != copy)
556 goto fault;
557 if ((len -= copy) == 0)
558 return 0;
559 offset += copy;
560 }
561
562
563 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
564 int end;
565 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
566
567 WARN_ON(start > offset + len);
568
569 end = start + skb_frag_size(frag);
570 if ((copy = end - offset) > 0) {
571 size_t copied;
572
573 if (copy > len)
574 copy = len;
575 copied = copy_page_from_iter(skb_frag_page(frag),
576 frag->page_offset + offset - start,
577 copy, from);
578 if (copied != copy)
579 goto fault;
580
581 if (!(len -= copy))
582 return 0;
583 offset += copy;
584 }
585 start = end;
586 }
587
588 skb_walk_frags(skb, frag_iter) {
589 int end;
590
591 WARN_ON(start > offset + len);
592
593 end = start + frag_iter->len;
594 if ((copy = end - offset) > 0) {
595 if (copy > len)
596 copy = len;
597 if (skb_copy_datagram_from_iter(frag_iter,
598 offset - start,
599 from, copy))
600 goto fault;
601 if ((len -= copy) == 0)
602 return 0;
603 offset += copy;
604 }
605 start = end;
606 }
607 if (!len)
608 return 0;
609
610fault:
611 return -EFAULT;
612}
613EXPORT_SYMBOL(skb_copy_datagram_from_iter);
614
615int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
616 struct iov_iter *from, size_t length)
617{
618 int frag = skb_shinfo(skb)->nr_frags;
619
620 while (length && iov_iter_count(from)) {
621 struct page *pages[MAX_SKB_FRAGS];
622 size_t start;
623 ssize_t copied;
624 unsigned long truesize;
625 int n = 0;
626
627 if (frag == MAX_SKB_FRAGS)
628 return -EMSGSIZE;
629
630 copied = iov_iter_get_pages(from, pages, length,
631 MAX_SKB_FRAGS - frag, &start);
632 if (copied < 0)
633 return -EFAULT;
634
635 iov_iter_advance(from, copied);
636 length -= copied;
637
638 truesize = PAGE_ALIGN(copied + start);
639 skb->data_len += copied;
640 skb->len += copied;
641 skb->truesize += truesize;
642 if (sk && sk->sk_type == SOCK_STREAM) {
643 sk->sk_wmem_queued += truesize;
644 sk_mem_charge(sk, truesize);
645 } else {
646 refcount_add(truesize, &skb->sk->sk_wmem_alloc);
647 }
648 while (copied) {
649 int size = min_t(int, copied, PAGE_SIZE - start);
650 skb_fill_page_desc(skb, frag++, pages[n], start, size);
651 start = 0;
652 copied -= size;
653 n++;
654 }
655 }
656 return 0;
657}
658EXPORT_SYMBOL(__zerocopy_sg_from_iter);
659
660
661
662
663
664
665
666
667
668
669
670int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
671{
672 int copy = min_t(int, skb_headlen(skb), iov_iter_count(from));
673
674
675 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
676 return -EFAULT;
677
678 return __zerocopy_sg_from_iter(NULL, skb, from, ~0U);
679}
680EXPORT_SYMBOL(zerocopy_sg_from_iter);
681
682
683
684
685
686
687
688
689
690
691static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
692 struct iov_iter *to, int len,
693 __wsum *csump)
694{
695 return __skb_datagram_iter(skb, offset, to, len, true,
696 csum_and_copy_to_iter, csump);
697}
698
699
700
701
702
703
704
705
706
707
708
709
710
711int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
712 int hlen, struct msghdr *msg)
713{
714 __wsum csum;
715 int chunk = skb->len - hlen;
716
717 if (!chunk)
718 return 0;
719
720 if (msg_data_left(msg) < chunk) {
721 if (__skb_checksum_complete(skb))
722 return -EINVAL;
723 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
724 goto fault;
725 } else {
726 csum = csum_partial(skb->data, hlen, skb->csum);
727 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
728 chunk, &csum))
729 goto fault;
730
731 if (csum_fold(csum)) {
732 iov_iter_revert(&msg->msg_iter, chunk);
733 return -EINVAL;
734 }
735
736 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
737 !skb->csum_complete_sw)
738 netdev_rx_csum_fault(NULL, skb);
739 }
740 return 0;
741fault:
742 return -EFAULT;
743}
744EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760__poll_t datagram_poll(struct file *file, struct socket *sock,
761 poll_table *wait)
762{
763 struct sock *sk = sock->sk;
764 __poll_t mask;
765
766 sock_poll_wait(file, sock, wait);
767 mask = 0;
768
769
770 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
771 mask |= EPOLLERR |
772 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
773
774 if (sk->sk_shutdown & RCV_SHUTDOWN)
775 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
776 if (sk->sk_shutdown == SHUTDOWN_MASK)
777 mask |= EPOLLHUP;
778
779
780 if (!skb_queue_empty(&sk->sk_receive_queue))
781 mask |= EPOLLIN | EPOLLRDNORM;
782
783
784 if (connection_based(sk)) {
785 if (sk->sk_state == TCP_CLOSE)
786 mask |= EPOLLHUP;
787
788 if (sk->sk_state == TCP_SYN_SENT)
789 return mask;
790 }
791
792
793 if (sock_writeable(sk))
794 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
795 else
796 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
797
798 return mask;
799}
800EXPORT_SYMBOL(datagram_poll);
801