1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36#include <linux/module.h>
37#include <linux/types.h>
38#include <linux/kernel.h>
39#include <asm/uaccess.h>
40#include <linux/mm.h>
41#include <linux/interrupt.h>
42#include <linux/errno.h>
43#include <linux/sched.h>
44#include <linux/inet.h>
45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/poll.h>
48#include <linux/highmem.h>
49#include <linux/spinlock.h>
50#include <linux/slab.h>
51#include <linux/pagemap.h>
52#include <linux/uio.h>
53
54#include <net/protocol.h>
55#include <linux/skbuff.h>
56
57#include <net/checksum.h>
58#include <net/sock.h>
59#include <net/tcp_states.h>
60#include <trace/events/skb.h>
61#include <net/busy_poll.h>
62
63
64
65
66static inline int connection_based(struct sock *sk)
67{
68 return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
69}
70
71static int receiver_wake_function(wait_queue_t *wait, unsigned int mode, int sync,
72 void *key)
73{
74 unsigned long bits = (unsigned long)key;
75
76
77
78
79 if (bits && !(bits & (POLLIN | POLLERR)))
80 return 0;
81 return autoremove_wake_function(wait, mode, sync, key);
82}
83
84
85
86static int wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
87 const struct sk_buff *skb)
88{
89 int error;
90 DEFINE_WAIT_FUNC(wait, receiver_wake_function);
91
92 prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
93
94
95 error = sock_error(sk);
96 if (error)
97 goto out_err;
98
99 if (sk->sk_receive_queue.prev != skb)
100 goto out;
101
102
103 if (sk->sk_shutdown & RCV_SHUTDOWN)
104 goto out_noerr;
105
106
107
108
109 error = -ENOTCONN;
110 if (connection_based(sk) &&
111 !(sk->sk_state == TCP_ESTABLISHED || sk->sk_state == TCP_LISTEN))
112 goto out_err;
113
114
115 if (signal_pending(current))
116 goto interrupted;
117
118 error = 0;
119 *timeo_p = schedule_timeout(*timeo_p);
120out:
121 finish_wait(sk_sleep(sk), &wait);
122 return error;
123interrupted:
124 error = sock_intr_errno(*timeo_p);
125out_err:
126 *err = error;
127 goto out;
128out_noerr:
129 *err = 0;
130 error = 1;
131 goto out;
132}
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
166 int *peeked, int *off, int *err)
167{
168 struct sk_buff *skb, *last;
169 long timeo;
170
171
172
173 int error = sock_error(sk);
174
175 if (error)
176 goto no_packet;
177
178 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
179
180 do {
181
182
183
184
185
186
187 unsigned long cpu_flags;
188 struct sk_buff_head *queue = &sk->sk_receive_queue;
189 int _off = *off;
190
191 last = (struct sk_buff *)queue;
192 spin_lock_irqsave(&queue->lock, cpu_flags);
193 skb_queue_walk(queue, skb) {
194 last = skb;
195 *peeked = skb->peeked;
196 if (flags & MSG_PEEK) {
197 if (_off >= skb->len && (skb->len || _off ||
198 skb->peeked)) {
199 _off -= skb->len;
200 continue;
201 }
202 skb->peeked = 1;
203 atomic_inc(&skb->users);
204 } else
205 __skb_unlink(skb, queue);
206
207 spin_unlock_irqrestore(&queue->lock, cpu_flags);
208 *off = _off;
209 return skb;
210 }
211 spin_unlock_irqrestore(&queue->lock, cpu_flags);
212
213 if (sk_can_busy_loop(sk) &&
214 sk_busy_loop(sk, flags & MSG_DONTWAIT))
215 continue;
216
217
218 error = -EAGAIN;
219 if (!timeo)
220 goto no_packet;
221
222 } while (!wait_for_more_packets(sk, err, &timeo, last));
223
224 return NULL;
225
226no_packet:
227 *err = error;
228 return NULL;
229}
230EXPORT_SYMBOL(__skb_recv_datagram);
231
232struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
233 int noblock, int *err)
234{
235 int peeked, off = 0;
236
237 return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
238 &peeked, &off, err);
239}
240EXPORT_SYMBOL(skb_recv_datagram);
241
242void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
243{
244 consume_skb(skb);
245 sk_mem_reclaim_partial(sk);
246}
247EXPORT_SYMBOL(skb_free_datagram);
248
249void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
250{
251 bool slow;
252
253 if (likely(atomic_read(&skb->users) == 1))
254 smp_rmb();
255 else if (likely(!atomic_dec_and_test(&skb->users)))
256 return;
257
258 slow = lock_sock_fast(sk);
259 skb_orphan(skb);
260 sk_mem_reclaim_partial(sk);
261 unlock_sock_fast(sk, slow);
262
263
264 __kfree_skb(skb);
265}
266EXPORT_SYMBOL(skb_free_datagram_locked);
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
290{
291 int err = 0;
292
293 if (flags & MSG_PEEK) {
294 err = -ENOENT;
295 spin_lock_bh(&sk->sk_receive_queue.lock);
296 if (skb == skb_peek(&sk->sk_receive_queue)) {
297 __skb_unlink(skb, &sk->sk_receive_queue);
298 atomic_dec(&skb->users);
299 err = 0;
300 }
301 spin_unlock_bh(&sk->sk_receive_queue.lock);
302 }
303
304 kfree_skb(skb);
305 atomic_inc(&sk->sk_drops);
306 sk_mem_reclaim_partial(sk);
307
308 return err;
309}
310EXPORT_SYMBOL(skb_kill_datagram);
311
312
313
314
315
316
317
318
319int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
320 struct iov_iter *to, int len)
321{
322 int start = skb_headlen(skb);
323 int i, copy = start - offset;
324 struct sk_buff *frag_iter;
325
326 trace_skb_copy_datagram_iovec(skb, len);
327
328
329 if (copy > 0) {
330 if (copy > len)
331 copy = len;
332 if (copy_to_iter(skb->data + offset, copy, to) != copy)
333 goto short_copy;
334 if ((len -= copy) == 0)
335 return 0;
336 offset += copy;
337 }
338
339
340 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
341 int end;
342 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
343
344 WARN_ON(start > offset + len);
345
346 end = start + skb_frag_size(frag);
347 if ((copy = end - offset) > 0) {
348 if (copy > len)
349 copy = len;
350 if (copy_page_to_iter(skb_frag_page(frag),
351 frag->page_offset + offset -
352 start, copy, to) != copy)
353 goto short_copy;
354 if (!(len -= copy))
355 return 0;
356 offset += copy;
357 }
358 start = end;
359 }
360
361 skb_walk_frags(skb, frag_iter) {
362 int end;
363
364 WARN_ON(start > offset + len);
365
366 end = start + frag_iter->len;
367 if ((copy = end - offset) > 0) {
368 if (copy > len)
369 copy = len;
370 if (skb_copy_datagram_iter(frag_iter, offset - start,
371 to, copy))
372 goto fault;
373 if ((len -= copy) == 0)
374 return 0;
375 offset += copy;
376 }
377 start = end;
378 }
379 if (!len)
380 return 0;
381
382
383
384
385
386
387fault:
388 return -EFAULT;
389
390short_copy:
391 if (iov_iter_count(to))
392 goto fault;
393
394 return 0;
395}
396EXPORT_SYMBOL(skb_copy_datagram_iter);
397
398
399
400
401
402
403
404
405
406
407int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
408 struct iov_iter *from,
409 int len)
410{
411 int start = skb_headlen(skb);
412 int i, copy = start - offset;
413 struct sk_buff *frag_iter;
414
415
416 if (copy > 0) {
417 if (copy > len)
418 copy = len;
419 if (copy_from_iter(skb->data + offset, copy, from) != copy)
420 goto fault;
421 if ((len -= copy) == 0)
422 return 0;
423 offset += copy;
424 }
425
426
427 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
428 int end;
429 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
430
431 WARN_ON(start > offset + len);
432
433 end = start + skb_frag_size(frag);
434 if ((copy = end - offset) > 0) {
435 size_t copied;
436
437 if (copy > len)
438 copy = len;
439 copied = copy_page_from_iter(skb_frag_page(frag),
440 frag->page_offset + offset - start,
441 copy, from);
442 if (copied != copy)
443 goto fault;
444
445 if (!(len -= copy))
446 return 0;
447 offset += copy;
448 }
449 start = end;
450 }
451
452 skb_walk_frags(skb, frag_iter) {
453 int end;
454
455 WARN_ON(start > offset + len);
456
457 end = start + frag_iter->len;
458 if ((copy = end - offset) > 0) {
459 if (copy > len)
460 copy = len;
461 if (skb_copy_datagram_from_iter(frag_iter,
462 offset - start,
463 from, copy))
464 goto fault;
465 if ((len -= copy) == 0)
466 return 0;
467 offset += copy;
468 }
469 start = end;
470 }
471 if (!len)
472 return 0;
473
474fault:
475 return -EFAULT;
476}
477EXPORT_SYMBOL(skb_copy_datagram_from_iter);
478
479
480
481
482
483
484
485
486
487
488
489int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
490{
491 int len = iov_iter_count(from);
492 int copy = min_t(int, skb_headlen(skb), len);
493 int frag = 0;
494
495
496 if (skb_copy_datagram_from_iter(skb, 0, from, copy))
497 return -EFAULT;
498
499 while (iov_iter_count(from)) {
500 struct page *pages[MAX_SKB_FRAGS];
501 size_t start;
502 ssize_t copied;
503 unsigned long truesize;
504 int n = 0;
505
506 if (frag == MAX_SKB_FRAGS)
507 return -EMSGSIZE;
508
509 copied = iov_iter_get_pages(from, pages, ~0U,
510 MAX_SKB_FRAGS - frag, &start);
511 if (copied < 0)
512 return -EFAULT;
513
514 iov_iter_advance(from, copied);
515
516 truesize = PAGE_ALIGN(copied + start);
517 skb->data_len += copied;
518 skb->len += copied;
519 skb->truesize += truesize;
520 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
521 while (copied) {
522 int size = min_t(int, copied, PAGE_SIZE - start);
523 skb_fill_page_desc(skb, frag++, pages[n], start, size);
524 start = 0;
525 copied -= size;
526 n++;
527 }
528 }
529 return 0;
530}
531EXPORT_SYMBOL(zerocopy_sg_from_iter);
532
533static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
534 struct iov_iter *to, int len,
535 __wsum *csump)
536{
537 int start = skb_headlen(skb);
538 int i, copy = start - offset;
539 struct sk_buff *frag_iter;
540 int pos = 0;
541 int n;
542
543
544 if (copy > 0) {
545 if (copy > len)
546 copy = len;
547 n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
548 if (n != copy)
549 goto fault;
550 if ((len -= copy) == 0)
551 return 0;
552 offset += copy;
553 pos = copy;
554 }
555
556 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
557 int end;
558 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
559
560 WARN_ON(start > offset + len);
561
562 end = start + skb_frag_size(frag);
563 if ((copy = end - offset) > 0) {
564 __wsum csum2 = 0;
565 struct page *page = skb_frag_page(frag);
566 u8 *vaddr = kmap(page);
567
568 if (copy > len)
569 copy = len;
570 n = csum_and_copy_to_iter(vaddr + frag->page_offset +
571 offset - start, copy,
572 &csum2, to);
573 kunmap(page);
574 if (n != copy)
575 goto fault;
576 *csump = csum_block_add(*csump, csum2, pos);
577 if (!(len -= copy))
578 return 0;
579 offset += copy;
580 pos += copy;
581 }
582 start = end;
583 }
584
585 skb_walk_frags(skb, frag_iter) {
586 int end;
587
588 WARN_ON(start > offset + len);
589
590 end = start + frag_iter->len;
591 if ((copy = end - offset) > 0) {
592 __wsum csum2 = 0;
593 if (copy > len)
594 copy = len;
595 if (skb_copy_and_csum_datagram(frag_iter,
596 offset - start,
597 to, copy,
598 &csum2))
599 goto fault;
600 *csump = csum_block_add(*csump, csum2, pos);
601 if ((len -= copy) == 0)
602 return 0;
603 offset += copy;
604 pos += copy;
605 }
606 start = end;
607 }
608 if (!len)
609 return 0;
610
611fault:
612 return -EFAULT;
613}
614
615__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
616{
617 __sum16 sum;
618
619 sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
620 if (likely(!sum)) {
621 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
622 !skb->csum_complete_sw)
623 netdev_rx_csum_fault(skb->dev);
624 }
625 skb->csum_valid = !sum;
626 return sum;
627}
628EXPORT_SYMBOL(__skb_checksum_complete_head);
629
630__sum16 __skb_checksum_complete(struct sk_buff *skb)
631{
632 __wsum csum;
633 __sum16 sum;
634
635 csum = skb_checksum(skb, 0, skb->len, 0);
636
637
638 sum = csum_fold(csum_add(skb->csum, csum));
639 if (likely(!sum)) {
640 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
641 !skb->csum_complete_sw)
642 netdev_rx_csum_fault(skb->dev);
643 }
644
645
646 skb->csum = csum;
647 skb->ip_summed = CHECKSUM_COMPLETE;
648 skb->csum_complete_sw = 1;
649 skb->csum_valid = !sum;
650
651 return sum;
652}
653EXPORT_SYMBOL(__skb_checksum_complete);
654
655
656
657
658
659
660
661
662
663
664
665
666
667int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
668 int hlen, struct msghdr *msg)
669{
670 __wsum csum;
671 int chunk = skb->len - hlen;
672
673 if (!chunk)
674 return 0;
675
676 if (msg_data_left(msg) < chunk) {
677 if (__skb_checksum_complete(skb))
678 goto csum_error;
679 if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
680 goto fault;
681 } else {
682 csum = csum_partial(skb->data, hlen, skb->csum);
683 if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
684 chunk, &csum))
685 goto fault;
686 if (csum_fold(csum))
687 goto csum_error;
688 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
689 netdev_rx_csum_fault(skb->dev);
690 }
691 return 0;
692csum_error:
693 return -EINVAL;
694fault:
695 return -EFAULT;
696}
697EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713unsigned int datagram_poll(struct file *file, struct socket *sock,
714 poll_table *wait)
715{
716 struct sock *sk = sock->sk;
717 unsigned int mask;
718
719 sock_poll_wait(file, sk_sleep(sk), wait);
720 mask = 0;
721
722
723 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
724 mask |= POLLERR |
725 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
726
727 if (sk->sk_shutdown & RCV_SHUTDOWN)
728 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
729 if (sk->sk_shutdown == SHUTDOWN_MASK)
730 mask |= POLLHUP;
731
732
733 if (!skb_queue_empty(&sk->sk_receive_queue))
734 mask |= POLLIN | POLLRDNORM;
735
736
737 if (connection_based(sk)) {
738 if (sk->sk_state == TCP_CLOSE)
739 mask |= POLLHUP;
740
741 if (sk->sk_state == TCP_SYN_SENT)
742 return mask;
743 }
744
745
746 if (sock_writeable(sk))
747 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
748 else
749 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
750
751 return mask;
752}
753EXPORT_SYMBOL(datagram_poll);
754