1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51#include <linux/types.h>
52#include <linux/mm.h>
53#include <linux/capability.h>
54#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
61#include <linux/kernel.h>
62#include <linux/kmod.h>
63#include <linux/slab.h>
64#include <linux/vmalloc.h>
65#include <net/net_namespace.h>
66#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
76#include <asm/cacheflush.h>
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
83#include <linux/mutex.h>
84#include <linux/if_vlan.h>
85#include <linux/virtio_net.h>
86#include <linux/errqueue.h>
87#include <linux/net_tstamp.h>
88
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146struct packet_mclist {
147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
152 unsigned char addr[MAX_ADDR_LEN];
153};
154
155
156
157struct packet_mreq_max {
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
162};
163
164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
170};
171
172struct packet_ring_buffer {
173 struct pgv *pg_vec;
174 unsigned int head;
175 unsigned int frames_per_block;
176 unsigned int frame_size;
177 unsigned int frame_max;
178
179 unsigned int pg_vec_order;
180 unsigned int pg_vec_pages;
181 unsigned int pg_vec_len;
182
183 atomic_t pending;
184};
185
186struct packet_sock;
187static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
188
189static void packet_flush_mclist(struct sock *sk);
190
191struct packet_sock {
192
193 struct sock sk;
194 struct tpacket_stats stats;
195 struct packet_ring_buffer rx_ring;
196 struct packet_ring_buffer tx_ring;
197 int copy_thresh;
198 spinlock_t bind_lock;
199 struct mutex pg_vec_lock;
200 unsigned int running:1,
201 auxdata:1,
202 origdev:1,
203 has_vnet_hdr:1;
204 int ifindex;
205 __be16 num;
206 struct packet_mclist *mclist;
207 atomic_t mapped;
208 enum tpacket_versions tp_version;
209 unsigned int tp_hdrlen;
210 unsigned int tp_reserve;
211 unsigned int tp_loss:1;
212 unsigned int tp_tstamp;
213 struct packet_type prot_hook ____cacheline_aligned_in_smp;
214};
215
216struct packet_skb_cb {
217 unsigned int origlen;
218 union {
219 struct sockaddr_pkt pkt;
220 struct sockaddr_ll ll;
221 } sa;
222};
223
224#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
225
226static inline __pure struct page *pgv_to_page(void *addr)
227{
228 if (is_vmalloc_addr(addr))
229 return vmalloc_to_page(addr);
230 return virt_to_page(addr);
231}
232
233static void __packet_set_status(struct packet_sock *po, void *frame, int status)
234{
235 union {
236 struct tpacket_hdr *h1;
237 struct tpacket2_hdr *h2;
238 void *raw;
239 } h;
240
241 h.raw = frame;
242 switch (po->tp_version) {
243 case TPACKET_V1:
244 h.h1->tp_status = status;
245 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
246 break;
247 case TPACKET_V2:
248 h.h2->tp_status = status;
249 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
250 break;
251 default:
252 pr_err("TPACKET version not supported\n");
253 BUG();
254 }
255
256 smp_wmb();
257}
258
259static int __packet_get_status(struct packet_sock *po, void *frame)
260{
261 union {
262 struct tpacket_hdr *h1;
263 struct tpacket2_hdr *h2;
264 void *raw;
265 } h;
266
267 smp_rmb();
268
269 h.raw = frame;
270 switch (po->tp_version) {
271 case TPACKET_V1:
272 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
273 return h.h1->tp_status;
274 case TPACKET_V2:
275 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
276 return h.h2->tp_status;
277 default:
278 pr_err("TPACKET version not supported\n");
279 BUG();
280 return 0;
281 }
282}
283
284static void *packet_lookup_frame(struct packet_sock *po,
285 struct packet_ring_buffer *rb,
286 unsigned int position,
287 int status)
288{
289 unsigned int pg_vec_pos, frame_offset;
290 union {
291 struct tpacket_hdr *h1;
292 struct tpacket2_hdr *h2;
293 void *raw;
294 } h;
295
296 pg_vec_pos = position / rb->frames_per_block;
297 frame_offset = position % rb->frames_per_block;
298
299 h.raw = rb->pg_vec[pg_vec_pos].buffer +
300 (frame_offset * rb->frame_size);
301
302 if (status != __packet_get_status(po, h.raw))
303 return NULL;
304
305 return h.raw;
306}
307
308static inline void *packet_current_frame(struct packet_sock *po,
309 struct packet_ring_buffer *rb,
310 int status)
311{
312 return packet_lookup_frame(po, rb, rb->head, status);
313}
314
315static inline void *packet_previous_frame(struct packet_sock *po,
316 struct packet_ring_buffer *rb,
317 int status)
318{
319 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
320 return packet_lookup_frame(po, rb, previous, status);
321}
322
323static inline void packet_increment_head(struct packet_ring_buffer *buff)
324{
325 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
326}
327
328static inline struct packet_sock *pkt_sk(struct sock *sk)
329{
330 return (struct packet_sock *)sk;
331}
332
333static void packet_sock_destruct(struct sock *sk)
334{
335 skb_queue_purge(&sk->sk_error_queue);
336
337 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
338 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
339
340 if (!sock_flag(sk, SOCK_DEAD)) {
341 pr_err("Attempt to release alive packet socket: %p\n", sk);
342 return;
343 }
344
345 sk_refcnt_debug_dec(sk);
346}
347
348
349static const struct proto_ops packet_ops;
350
351static const struct proto_ops packet_ops_spkt;
352
353static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
354 struct packet_type *pt, struct net_device *orig_dev)
355{
356 struct sock *sk;
357 struct sockaddr_pkt *spkt;
358
359
360
361
362
363
364 sk = pt->af_packet_priv;
365
366
367
368
369
370
371
372
373
374
375
376
377 if (skb->pkt_type == PACKET_LOOPBACK)
378 goto out;
379
380 if (!net_eq(dev_net(dev), sock_net(sk)))
381 goto out;
382
383 skb = skb_share_check(skb, GFP_ATOMIC);
384 if (skb == NULL)
385 goto oom;
386
387
388 skb_dst_drop(skb);
389
390
391 nf_reset(skb);
392
393 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
394
395 skb_push(skb, skb->data - skb_mac_header(skb));
396
397
398
399
400
401 spkt->spkt_family = dev->type;
402 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
403 spkt->spkt_protocol = skb->protocol;
404
405
406
407
408
409
410 if (sock_queue_rcv_skb(sk, skb) == 0)
411 return 0;
412
413out:
414 kfree_skb(skb);
415oom:
416 return 0;
417}
418
419
420
421
422
423
424
425static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
426 struct msghdr *msg, size_t len)
427{
428 struct sock *sk = sock->sk;
429 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
430 struct sk_buff *skb = NULL;
431 struct net_device *dev;
432 __be16 proto = 0;
433 int err;
434
435
436
437
438
439 if (saddr) {
440 if (msg->msg_namelen < sizeof(struct sockaddr))
441 return -EINVAL;
442 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
443 proto = saddr->spkt_protocol;
444 } else
445 return -ENOTCONN;
446
447
448
449
450
451 saddr->spkt_device[13] = 0;
452retry:
453 rcu_read_lock();
454 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
455 err = -ENODEV;
456 if (dev == NULL)
457 goto out_unlock;
458
459 err = -ENETDOWN;
460 if (!(dev->flags & IFF_UP))
461 goto out_unlock;
462
463
464
465
466
467
468 err = -EMSGSIZE;
469 if (len > dev->mtu + dev->hard_header_len)
470 goto out_unlock;
471
472 if (!skb) {
473 size_t reserved = LL_RESERVED_SPACE(dev);
474 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
475
476 rcu_read_unlock();
477 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
478 if (skb == NULL)
479 return -ENOBUFS;
480
481
482
483
484 skb_reserve(skb, reserved);
485 skb_reset_network_header(skb);
486
487
488 if (hhlen) {
489 skb->data -= hhlen;
490 skb->tail -= hhlen;
491 if (len < hhlen)
492 skb_reset_network_header(skb);
493 }
494 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
495 if (err)
496 goto out_free;
497 goto retry;
498 }
499
500
501 skb->protocol = proto;
502 skb->dev = dev;
503 skb->priority = sk->sk_priority;
504 skb->mark = sk->sk_mark;
505 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
506 if (err < 0)
507 goto out_unlock;
508
509 dev_queue_xmit(skb);
510 rcu_read_unlock();
511 return len;
512
513out_unlock:
514 rcu_read_unlock();
515out_free:
516 kfree_skb(skb);
517 return err;
518}
519
520static inline unsigned int run_filter(const struct sk_buff *skb,
521 const struct sock *sk,
522 unsigned int res)
523{
524 struct sk_filter *filter;
525
526 rcu_read_lock_bh();
527 filter = rcu_dereference_bh(sk->sk_filter);
528 if (filter != NULL)
529 res = sk_run_filter(skb, filter->insns);
530 rcu_read_unlock_bh();
531
532 return res;
533}
534
535
536
537
538
539
540
541
542
543
544
545
546
547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
548 struct packet_type *pt, struct net_device *orig_dev)
549{
550 struct sock *sk;
551 struct sockaddr_ll *sll;
552 struct packet_sock *po;
553 u8 *skb_head = skb->data;
554 int skb_len = skb->len;
555 unsigned int snaplen, res;
556
557 if (skb->pkt_type == PACKET_LOOPBACK)
558 goto drop;
559
560 sk = pt->af_packet_priv;
561 po = pkt_sk(sk);
562
563 if (!net_eq(dev_net(dev), sock_net(sk)))
564 goto drop;
565
566 skb->dev = dev;
567
568 if (dev->header_ops) {
569
570
571
572
573
574
575
576 if (sk->sk_type != SOCK_DGRAM)
577 skb_push(skb, skb->data - skb_mac_header(skb));
578 else if (skb->pkt_type == PACKET_OUTGOING) {
579
580 skb_pull(skb, skb_network_offset(skb));
581 }
582 }
583
584 snaplen = skb->len;
585
586 res = run_filter(skb, sk, snaplen);
587 if (!res)
588 goto drop_n_restore;
589 if (snaplen > res)
590 snaplen = res;
591
592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
593 (unsigned)sk->sk_rcvbuf)
594 goto drop_n_acct;
595
596 if (skb_shared(skb)) {
597 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
598 if (nskb == NULL)
599 goto drop_n_acct;
600
601 if (skb_head != skb->data) {
602 skb->data = skb_head;
603 skb->len = skb_len;
604 }
605 kfree_skb(skb);
606 skb = nskb;
607 }
608
609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
610 sizeof(skb->cb));
611
612 sll = &PACKET_SKB_CB(skb)->sa.ll;
613 sll->sll_family = AF_PACKET;
614 sll->sll_hatype = dev->type;
615 sll->sll_protocol = skb->protocol;
616 sll->sll_pkttype = skb->pkt_type;
617 if (unlikely(po->origdev))
618 sll->sll_ifindex = orig_dev->ifindex;
619 else
620 sll->sll_ifindex = dev->ifindex;
621
622 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
623
624 PACKET_SKB_CB(skb)->origlen = skb->len;
625
626 if (pskb_trim(skb, snaplen))
627 goto drop_n_acct;
628
629 skb_set_owner_r(skb, sk);
630 skb->dev = NULL;
631 skb_dst_drop(skb);
632
633
634 nf_reset(skb);
635
636 spin_lock(&sk->sk_receive_queue.lock);
637 po->stats.tp_packets++;
638 skb->dropcount = atomic_read(&sk->sk_drops);
639 __skb_queue_tail(&sk->sk_receive_queue, skb);
640 spin_unlock(&sk->sk_receive_queue.lock);
641 sk->sk_data_ready(sk, skb->len);
642 return 0;
643
644drop_n_acct:
645 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
646
647drop_n_restore:
648 if (skb_head != skb->data && skb_shared(skb)) {
649 skb->data = skb_head;
650 skb->len = skb_len;
651 }
652drop:
653 consume_skb(skb);
654 return 0;
655}
656
657static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct packet_type *pt, struct net_device *orig_dev)
659{
660 struct sock *sk;
661 struct packet_sock *po;
662 struct sockaddr_ll *sll;
663 union {
664 struct tpacket_hdr *h1;
665 struct tpacket2_hdr *h2;
666 void *raw;
667 } h;
668 u8 *skb_head = skb->data;
669 int skb_len = skb->len;
670 unsigned int snaplen, res;
671 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
672 unsigned short macoff, netoff, hdrlen;
673 struct sk_buff *copy_skb = NULL;
674 struct timeval tv;
675 struct timespec ts;
676 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
677
678 if (skb->pkt_type == PACKET_LOOPBACK)
679 goto drop;
680
681 sk = pt->af_packet_priv;
682 po = pkt_sk(sk);
683
684 if (!net_eq(dev_net(dev), sock_net(sk)))
685 goto drop;
686
687 if (dev->header_ops) {
688 if (sk->sk_type != SOCK_DGRAM)
689 skb_push(skb, skb->data - skb_mac_header(skb));
690 else if (skb->pkt_type == PACKET_OUTGOING) {
691
692 skb_pull(skb, skb_network_offset(skb));
693 }
694 }
695
696 if (skb->ip_summed == CHECKSUM_PARTIAL)
697 status |= TP_STATUS_CSUMNOTREADY;
698
699 snaplen = skb->len;
700
701 res = run_filter(skb, sk, snaplen);
702 if (!res)
703 goto drop_n_restore;
704 if (snaplen > res)
705 snaplen = res;
706
707 if (sk->sk_type == SOCK_DGRAM) {
708 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
709 po->tp_reserve;
710 } else {
711 unsigned maclen = skb_network_offset(skb);
712 netoff = TPACKET_ALIGN(po->tp_hdrlen +
713 (maclen < 16 ? 16 : maclen)) +
714 po->tp_reserve;
715 macoff = netoff - maclen;
716 }
717
718 if (macoff + snaplen > po->rx_ring.frame_size) {
719 if (po->copy_thresh &&
720 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
721 (unsigned)sk->sk_rcvbuf) {
722 if (skb_shared(skb)) {
723 copy_skb = skb_clone(skb, GFP_ATOMIC);
724 } else {
725 copy_skb = skb_get(skb);
726 skb_head = skb->data;
727 }
728 if (copy_skb)
729 skb_set_owner_r(copy_skb, sk);
730 }
731 snaplen = po->rx_ring.frame_size - macoff;
732 if ((int)snaplen < 0)
733 snaplen = 0;
734 }
735
736 spin_lock(&sk->sk_receive_queue.lock);
737 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
738 if (!h.raw)
739 goto ring_is_full;
740 packet_increment_head(&po->rx_ring);
741 po->stats.tp_packets++;
742 if (copy_skb) {
743 status |= TP_STATUS_COPY;
744 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
745 }
746 if (!po->stats.tp_drops)
747 status &= ~TP_STATUS_LOSING;
748 spin_unlock(&sk->sk_receive_queue.lock);
749
750 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
751
752 switch (po->tp_version) {
753 case TPACKET_V1:
754 h.h1->tp_len = skb->len;
755 h.h1->tp_snaplen = snaplen;
756 h.h1->tp_mac = macoff;
757 h.h1->tp_net = netoff;
758 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
759 && shhwtstamps->syststamp.tv64)
760 tv = ktime_to_timeval(shhwtstamps->syststamp);
761 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
762 && shhwtstamps->hwtstamp.tv64)
763 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
764 else if (skb->tstamp.tv64)
765 tv = ktime_to_timeval(skb->tstamp);
766 else
767 do_gettimeofday(&tv);
768 h.h1->tp_sec = tv.tv_sec;
769 h.h1->tp_usec = tv.tv_usec;
770 hdrlen = sizeof(*h.h1);
771 break;
772 case TPACKET_V2:
773 h.h2->tp_len = skb->len;
774 h.h2->tp_snaplen = snaplen;
775 h.h2->tp_mac = macoff;
776 h.h2->tp_net = netoff;
777 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
778 && shhwtstamps->syststamp.tv64)
779 ts = ktime_to_timespec(shhwtstamps->syststamp);
780 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
781 && shhwtstamps->hwtstamp.tv64)
782 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
783 else if (skb->tstamp.tv64)
784 ts = ktime_to_timespec(skb->tstamp);
785 else
786 getnstimeofday(&ts);
787 h.h2->tp_sec = ts.tv_sec;
788 h.h2->tp_nsec = ts.tv_nsec;
789 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
790 hdrlen = sizeof(*h.h2);
791 break;
792 default:
793 BUG();
794 }
795
796 sll = h.raw + TPACKET_ALIGN(hdrlen);
797 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
798 sll->sll_family = AF_PACKET;
799 sll->sll_hatype = dev->type;
800 sll->sll_protocol = skb->protocol;
801 sll->sll_pkttype = skb->pkt_type;
802 if (unlikely(po->origdev))
803 sll->sll_ifindex = orig_dev->ifindex;
804 else
805 sll->sll_ifindex = dev->ifindex;
806
807 __packet_set_status(po, h.raw, status);
808 smp_mb();
809#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
810 {
811 u8 *start, *end;
812
813 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
814 for (start = h.raw; start < end; start += PAGE_SIZE)
815 flush_dcache_page(pgv_to_page(start));
816 }
817#endif
818
819 sk->sk_data_ready(sk, 0);
820
821drop_n_restore:
822 if (skb_head != skb->data && skb_shared(skb)) {
823 skb->data = skb_head;
824 skb->len = skb_len;
825 }
826drop:
827 kfree_skb(skb);
828 return 0;
829
830ring_is_full:
831 po->stats.tp_drops++;
832 spin_unlock(&sk->sk_receive_queue.lock);
833
834 sk->sk_data_ready(sk, 0);
835 kfree_skb(copy_skb);
836 goto drop_n_restore;
837}
838
839static void tpacket_destruct_skb(struct sk_buff *skb)
840{
841 struct packet_sock *po = pkt_sk(skb->sk);
842 void *ph;
843
844 BUG_ON(skb == NULL);
845
846 if (likely(po->tx_ring.pg_vec)) {
847 ph = skb_shinfo(skb)->destructor_arg;
848 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
849 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
850 atomic_dec(&po->tx_ring.pending);
851 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
852 }
853
854 sock_wfree(skb);
855}
856
857static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
858 void *frame, struct net_device *dev, int size_max,
859 __be16 proto, unsigned char *addr)
860{
861 union {
862 struct tpacket_hdr *h1;
863 struct tpacket2_hdr *h2;
864 void *raw;
865 } ph;
866 int to_write, offset, len, tp_len, nr_frags, len_max;
867 struct socket *sock = po->sk.sk_socket;
868 struct page *page;
869 void *data;
870 int err;
871
872 ph.raw = frame;
873
874 skb->protocol = proto;
875 skb->dev = dev;
876 skb->priority = po->sk.sk_priority;
877 skb->mark = po->sk.sk_mark;
878 skb_shinfo(skb)->destructor_arg = ph.raw;
879
880 switch (po->tp_version) {
881 case TPACKET_V2:
882 tp_len = ph.h2->tp_len;
883 break;
884 default:
885 tp_len = ph.h1->tp_len;
886 break;
887 }
888 if (unlikely(tp_len > size_max)) {
889 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
890 return -EMSGSIZE;
891 }
892
893 skb_reserve(skb, LL_RESERVED_SPACE(dev));
894 skb_reset_network_header(skb);
895
896 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
897 to_write = tp_len;
898
899 if (sock->type == SOCK_DGRAM) {
900 err = dev_hard_header(skb, dev, ntohs(proto), addr,
901 NULL, tp_len);
902 if (unlikely(err < 0))
903 return -EINVAL;
904 } else if (dev->hard_header_len) {
905
906 if (unlikely(tp_len <= dev->hard_header_len)) {
907 pr_err("packet size is too short (%d < %d)\n",
908 tp_len, dev->hard_header_len);
909 return -EINVAL;
910 }
911
912 skb_push(skb, dev->hard_header_len);
913 err = skb_store_bits(skb, 0, data,
914 dev->hard_header_len);
915 if (unlikely(err))
916 return err;
917
918 data += dev->hard_header_len;
919 to_write -= dev->hard_header_len;
920 }
921
922 err = -EFAULT;
923 offset = offset_in_page(data);
924 len_max = PAGE_SIZE - offset;
925 len = ((to_write > len_max) ? len_max : to_write);
926
927 skb->data_len = to_write;
928 skb->len += to_write;
929 skb->truesize += to_write;
930 atomic_add(to_write, &po->sk.sk_wmem_alloc);
931
932 while (likely(to_write)) {
933 nr_frags = skb_shinfo(skb)->nr_frags;
934
935 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
936 pr_err("Packet exceed the number of skb frags(%lu)\n",
937 MAX_SKB_FRAGS);
938 return -EFAULT;
939 }
940
941 page = pgv_to_page(data);
942 data += len;
943 flush_dcache_page(page);
944 get_page(page);
945 skb_fill_page_desc(skb, nr_frags, page, offset, len);
946 to_write -= len;
947 offset = 0;
948 len_max = PAGE_SIZE;
949 len = ((to_write > len_max) ? len_max : to_write);
950 }
951
952 return tp_len;
953}
954
955static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
956{
957 struct socket *sock;
958 struct sk_buff *skb;
959 struct net_device *dev;
960 __be16 proto;
961 int ifindex, err, reserve = 0;
962 void *ph;
963 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
964 int tp_len, size_max;
965 unsigned char *addr;
966 int len_sum = 0;
967 int status = 0;
968
969 sock = po->sk.sk_socket;
970
971 mutex_lock(&po->pg_vec_lock);
972
973 err = -EBUSY;
974 if (saddr == NULL) {
975 ifindex = po->ifindex;
976 proto = po->num;
977 addr = NULL;
978 } else {
979 err = -EINVAL;
980 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
981 goto out;
982 if (msg->msg_namelen < (saddr->sll_halen
983 + offsetof(struct sockaddr_ll,
984 sll_addr)))
985 goto out;
986 ifindex = saddr->sll_ifindex;
987 proto = saddr->sll_protocol;
988 addr = saddr->sll_addr;
989 }
990
991 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
992 err = -ENXIO;
993 if (unlikely(dev == NULL))
994 goto out;
995
996 reserve = dev->hard_header_len;
997
998 err = -ENETDOWN;
999 if (unlikely(!(dev->flags & IFF_UP)))
1000 goto out_put;
1001
1002 size_max = po->tx_ring.frame_size
1003 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
1004
1005 if (size_max > dev->mtu + reserve)
1006 size_max = dev->mtu + reserve;
1007
1008 do {
1009 ph = packet_current_frame(po, &po->tx_ring,
1010 TP_STATUS_SEND_REQUEST);
1011
1012 if (unlikely(ph == NULL)) {
1013 schedule();
1014 continue;
1015 }
1016
1017 status = TP_STATUS_SEND_REQUEST;
1018 skb = sock_alloc_send_skb(&po->sk,
1019 LL_ALLOCATED_SPACE(dev)
1020 + sizeof(struct sockaddr_ll),
1021 0, &err);
1022
1023 if (unlikely(skb == NULL))
1024 goto out_status;
1025
1026 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1027 addr);
1028
1029 if (unlikely(tp_len < 0)) {
1030 if (po->tp_loss) {
1031 __packet_set_status(po, ph,
1032 TP_STATUS_AVAILABLE);
1033 packet_increment_head(&po->tx_ring);
1034 kfree_skb(skb);
1035 continue;
1036 } else {
1037 status = TP_STATUS_WRONG_FORMAT;
1038 err = tp_len;
1039 goto out_status;
1040 }
1041 }
1042
1043 skb->destructor = tpacket_destruct_skb;
1044 __packet_set_status(po, ph, TP_STATUS_SENDING);
1045 atomic_inc(&po->tx_ring.pending);
1046
1047 status = TP_STATUS_SEND_REQUEST;
1048 err = dev_queue_xmit(skb);
1049 if (unlikely(err > 0)) {
1050 err = net_xmit_errno(err);
1051 if (err && __packet_get_status(po, ph) ==
1052 TP_STATUS_AVAILABLE) {
1053
1054 skb = NULL;
1055 goto out_status;
1056 }
1057
1058
1059
1060
1061 err = 0;
1062 }
1063 packet_increment_head(&po->tx_ring);
1064 len_sum += tp_len;
1065 } while (likely((ph != NULL) ||
1066 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1067 (atomic_read(&po->tx_ring.pending))))
1068 );
1069
1070 err = len_sum;
1071 goto out_put;
1072
1073out_status:
1074 __packet_set_status(po, ph, status);
1075 kfree_skb(skb);
1076out_put:
1077 dev_put(dev);
1078out:
1079 mutex_unlock(&po->pg_vec_lock);
1080 return err;
1081}
1082
1083static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1084 size_t reserve, size_t len,
1085 size_t linear, int noblock,
1086 int *err)
1087{
1088 struct sk_buff *skb;
1089
1090
1091 if (prepad + len < PAGE_SIZE || !linear)
1092 linear = len;
1093
1094 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1095 err);
1096 if (!skb)
1097 return NULL;
1098
1099 skb_reserve(skb, reserve);
1100 skb_put(skb, linear);
1101 skb->data_len = len - linear;
1102 skb->len += len - linear;
1103
1104 return skb;
1105}
1106
1107static int packet_snd(struct socket *sock,
1108 struct msghdr *msg, size_t len)
1109{
1110 struct sock *sk = sock->sk;
1111 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1112 struct sk_buff *skb;
1113 struct net_device *dev;
1114 __be16 proto;
1115 unsigned char *addr;
1116 int ifindex, err, reserve = 0;
1117 struct virtio_net_hdr vnet_hdr = { 0 };
1118 int offset = 0;
1119 int vnet_hdr_len;
1120 struct packet_sock *po = pkt_sk(sk);
1121 unsigned short gso_type = 0;
1122
1123
1124
1125
1126
1127 if (saddr == NULL) {
1128 ifindex = po->ifindex;
1129 proto = po->num;
1130 addr = NULL;
1131 } else {
1132 err = -EINVAL;
1133 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1134 goto out;
1135 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1136 goto out;
1137 ifindex = saddr->sll_ifindex;
1138 proto = saddr->sll_protocol;
1139 addr = saddr->sll_addr;
1140 }
1141
1142
1143 dev = dev_get_by_index(sock_net(sk), ifindex);
1144 err = -ENXIO;
1145 if (dev == NULL)
1146 goto out_unlock;
1147 if (sock->type == SOCK_RAW)
1148 reserve = dev->hard_header_len;
1149
1150 err = -ENETDOWN;
1151 if (!(dev->flags & IFF_UP))
1152 goto out_unlock;
1153
1154 if (po->has_vnet_hdr) {
1155 vnet_hdr_len = sizeof(vnet_hdr);
1156
1157 err = -EINVAL;
1158 if (len < vnet_hdr_len)
1159 goto out_unlock;
1160
1161 len -= vnet_hdr_len;
1162
1163 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1164 vnet_hdr_len);
1165 if (err < 0)
1166 goto out_unlock;
1167
1168 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1169 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1170 vnet_hdr.hdr_len))
1171 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1172 vnet_hdr.csum_offset + 2;
1173
1174 err = -EINVAL;
1175 if (vnet_hdr.hdr_len > len)
1176 goto out_unlock;
1177
1178 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1179 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1180 case VIRTIO_NET_HDR_GSO_TCPV4:
1181 gso_type = SKB_GSO_TCPV4;
1182 break;
1183 case VIRTIO_NET_HDR_GSO_TCPV6:
1184 gso_type = SKB_GSO_TCPV6;
1185 break;
1186 case VIRTIO_NET_HDR_GSO_UDP:
1187 gso_type = SKB_GSO_UDP;
1188 break;
1189 default:
1190 goto out_unlock;
1191 }
1192
1193 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1194 gso_type |= SKB_GSO_TCP_ECN;
1195
1196 if (vnet_hdr.gso_size == 0)
1197 goto out_unlock;
1198
1199 }
1200 }
1201
1202 err = -EMSGSIZE;
1203 if (!gso_type && (len > dev->mtu+reserve))
1204 goto out_unlock;
1205
1206 err = -ENOBUFS;
1207 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1208 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1209 msg->msg_flags & MSG_DONTWAIT, &err);
1210 if (skb == NULL)
1211 goto out_unlock;
1212
1213 skb_set_network_header(skb, reserve);
1214
1215 err = -EINVAL;
1216 if (sock->type == SOCK_DGRAM &&
1217 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
1218 goto out_free;
1219
1220
1221 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1222 if (err)
1223 goto out_free;
1224 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1225 if (err < 0)
1226 goto out_free;
1227
1228 skb->protocol = proto;
1229 skb->dev = dev;
1230 skb->priority = sk->sk_priority;
1231 skb->mark = sk->sk_mark;
1232
1233 if (po->has_vnet_hdr) {
1234 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1235 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1236 vnet_hdr.csum_offset)) {
1237 err = -EINVAL;
1238 goto out_free;
1239 }
1240 }
1241
1242 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1243 skb_shinfo(skb)->gso_type = gso_type;
1244
1245
1246 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1247 skb_shinfo(skb)->gso_segs = 0;
1248
1249 len += vnet_hdr_len;
1250 }
1251
1252
1253
1254
1255
1256 err = dev_queue_xmit(skb);
1257 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1258 goto out_unlock;
1259
1260 dev_put(dev);
1261
1262 return len;
1263
1264out_free:
1265 kfree_skb(skb);
1266out_unlock:
1267 if (dev)
1268 dev_put(dev);
1269out:
1270 return err;
1271}
1272
1273static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1274 struct msghdr *msg, size_t len)
1275{
1276 struct sock *sk = sock->sk;
1277 struct packet_sock *po = pkt_sk(sk);
1278 if (po->tx_ring.pg_vec)
1279 return tpacket_snd(po, msg);
1280 else
1281 return packet_snd(sock, msg, len);
1282}
1283
1284
1285
1286
1287
1288
1289static int packet_release(struct socket *sock)
1290{
1291 struct sock *sk = sock->sk;
1292 struct packet_sock *po;
1293 struct net *net;
1294 struct tpacket_req req;
1295
1296 if (!sk)
1297 return 0;
1298
1299 net = sock_net(sk);
1300 po = pkt_sk(sk);
1301
1302 spin_lock_bh(&net->packet.sklist_lock);
1303 sk_del_node_init_rcu(sk);
1304 sock_prot_inuse_add(net, sk->sk_prot, -1);
1305 spin_unlock_bh(&net->packet.sklist_lock);
1306
1307 spin_lock(&po->bind_lock);
1308 if (po->running) {
1309
1310
1311
1312 po->running = 0;
1313 po->num = 0;
1314 __dev_remove_pack(&po->prot_hook);
1315 __sock_put(sk);
1316 }
1317 spin_unlock(&po->bind_lock);
1318
1319 packet_flush_mclist(sk);
1320
1321 memset(&req, 0, sizeof(req));
1322
1323 if (po->rx_ring.pg_vec)
1324 packet_set_ring(sk, &req, 1, 0);
1325
1326 if (po->tx_ring.pg_vec)
1327 packet_set_ring(sk, &req, 1, 1);
1328
1329 synchronize_net();
1330
1331
1332
1333 sock_orphan(sk);
1334 sock->sk = NULL;
1335
1336
1337
1338 skb_queue_purge(&sk->sk_receive_queue);
1339 sk_refcnt_debug_release(sk);
1340
1341 sock_put(sk);
1342 return 0;
1343}
1344
1345
1346
1347
1348
1349static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
1350{
1351 struct packet_sock *po = pkt_sk(sk);
1352
1353
1354
1355
1356 lock_sock(sk);
1357
1358 spin_lock(&po->bind_lock);
1359 if (po->running) {
1360 __sock_put(sk);
1361 po->running = 0;
1362 po->num = 0;
1363 spin_unlock(&po->bind_lock);
1364 dev_remove_pack(&po->prot_hook);
1365 spin_lock(&po->bind_lock);
1366 }
1367
1368 po->num = protocol;
1369 po->prot_hook.type = protocol;
1370 po->prot_hook.dev = dev;
1371
1372 po->ifindex = dev ? dev->ifindex : 0;
1373
1374 if (protocol == 0)
1375 goto out_unlock;
1376
1377 if (!dev || (dev->flags & IFF_UP)) {
1378 dev_add_pack(&po->prot_hook);
1379 sock_hold(sk);
1380 po->running = 1;
1381 } else {
1382 sk->sk_err = ENETDOWN;
1383 if (!sock_flag(sk, SOCK_DEAD))
1384 sk->sk_error_report(sk);
1385 }
1386
1387out_unlock:
1388 spin_unlock(&po->bind_lock);
1389 release_sock(sk);
1390 return 0;
1391}
1392
1393
1394
1395
1396
1397static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1398 int addr_len)
1399{
1400 struct sock *sk = sock->sk;
1401 char name[15];
1402 struct net_device *dev;
1403 int err = -ENODEV;
1404
1405
1406
1407
1408
1409 if (addr_len != sizeof(struct sockaddr))
1410 return -EINVAL;
1411 strlcpy(name, uaddr->sa_data, sizeof(name));
1412
1413 dev = dev_get_by_name(sock_net(sk), name);
1414 if (dev) {
1415 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1416 dev_put(dev);
1417 }
1418 return err;
1419}
1420
1421static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1422{
1423 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1424 struct sock *sk = sock->sk;
1425 struct net_device *dev = NULL;
1426 int err;
1427
1428
1429
1430
1431
1432
1433 if (addr_len < sizeof(struct sockaddr_ll))
1434 return -EINVAL;
1435 if (sll->sll_family != AF_PACKET)
1436 return -EINVAL;
1437
1438 if (sll->sll_ifindex) {
1439 err = -ENODEV;
1440 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
1441 if (dev == NULL)
1442 goto out;
1443 }
1444 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1445 if (dev)
1446 dev_put(dev);
1447
1448out:
1449 return err;
1450}
1451
1452static struct proto packet_proto = {
1453 .name = "PACKET",
1454 .owner = THIS_MODULE,
1455 .obj_size = sizeof(struct packet_sock),
1456};
1457
1458
1459
1460
1461
1462static int packet_create(struct net *net, struct socket *sock, int protocol,
1463 int kern)
1464{
1465 struct sock *sk;
1466 struct packet_sock *po;
1467 __be16 proto = (__force __be16)protocol;
1468 int err;
1469
1470 if (!capable(CAP_NET_RAW))
1471 return -EPERM;
1472 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1473 sock->type != SOCK_PACKET)
1474 return -ESOCKTNOSUPPORT;
1475
1476 sock->state = SS_UNCONNECTED;
1477
1478 err = -ENOBUFS;
1479 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
1480 if (sk == NULL)
1481 goto out;
1482
1483 sock->ops = &packet_ops;
1484 if (sock->type == SOCK_PACKET)
1485 sock->ops = &packet_ops_spkt;
1486
1487 sock_init_data(sock, sk);
1488
1489 po = pkt_sk(sk);
1490 sk->sk_family = PF_PACKET;
1491 po->num = proto;
1492
1493 sk->sk_destruct = packet_sock_destruct;
1494 sk_refcnt_debug_inc(sk);
1495
1496
1497
1498
1499
1500 spin_lock_init(&po->bind_lock);
1501 mutex_init(&po->pg_vec_lock);
1502 po->prot_hook.func = packet_rcv;
1503
1504 if (sock->type == SOCK_PACKET)
1505 po->prot_hook.func = packet_rcv_spkt;
1506
1507 po->prot_hook.af_packet_priv = sk;
1508
1509 if (proto) {
1510 po->prot_hook.type = proto;
1511 dev_add_pack(&po->prot_hook);
1512 sock_hold(sk);
1513 po->running = 1;
1514 }
1515
1516 spin_lock_bh(&net->packet.sklist_lock);
1517 sk_add_node_rcu(sk, &net->packet.sklist);
1518 sock_prot_inuse_add(net, &packet_proto, 1);
1519 spin_unlock_bh(&net->packet.sklist_lock);
1520
1521 return 0;
1522out:
1523 return err;
1524}
1525
1526static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1527{
1528 struct sock_exterr_skb *serr;
1529 struct sk_buff *skb, *skb2;
1530 int copied, err;
1531
1532 err = -EAGAIN;
1533 skb = skb_dequeue(&sk->sk_error_queue);
1534 if (skb == NULL)
1535 goto out;
1536
1537 copied = skb->len;
1538 if (copied > len) {
1539 msg->msg_flags |= MSG_TRUNC;
1540 copied = len;
1541 }
1542 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1543 if (err)
1544 goto out_free_skb;
1545
1546 sock_recv_timestamp(msg, sk, skb);
1547
1548 serr = SKB_EXT_ERR(skb);
1549 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1550 sizeof(serr->ee), &serr->ee);
1551
1552 msg->msg_flags |= MSG_ERRQUEUE;
1553 err = copied;
1554
1555
1556 spin_lock_bh(&sk->sk_error_queue.lock);
1557 sk->sk_err = 0;
1558 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1559 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1560 spin_unlock_bh(&sk->sk_error_queue.lock);
1561 sk->sk_error_report(sk);
1562 } else
1563 spin_unlock_bh(&sk->sk_error_queue.lock);
1564
1565out_free_skb:
1566 kfree_skb(skb);
1567out:
1568 return err;
1569}
1570
1571
1572
1573
1574
1575
1576static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1577 struct msghdr *msg, size_t len, int flags)
1578{
1579 struct sock *sk = sock->sk;
1580 struct sk_buff *skb;
1581 int copied, err;
1582 struct sockaddr_ll *sll;
1583 int vnet_hdr_len = 0;
1584
1585 err = -EINVAL;
1586 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1587 goto out;
1588
1589#if 0
1590
1591 if (pkt_sk(sk)->ifindex < 0)
1592 return -ENODEV;
1593#endif
1594
1595 if (flags & MSG_ERRQUEUE) {
1596 err = packet_recv_error(sk, msg, len);
1597 goto out;
1598 }
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
1610
1611
1612
1613
1614
1615
1616
1617 if (skb == NULL)
1618 goto out;
1619
1620 if (pkt_sk(sk)->has_vnet_hdr) {
1621 struct virtio_net_hdr vnet_hdr = { 0 };
1622
1623 err = -EINVAL;
1624 vnet_hdr_len = sizeof(vnet_hdr);
1625 if (len < vnet_hdr_len)
1626 goto out_free;
1627
1628 len -= vnet_hdr_len;
1629
1630 if (skb_is_gso(skb)) {
1631 struct skb_shared_info *sinfo = skb_shinfo(skb);
1632
1633
1634 vnet_hdr.hdr_len = skb_headlen(skb);
1635 vnet_hdr.gso_size = sinfo->gso_size;
1636 if (sinfo->gso_type & SKB_GSO_TCPV4)
1637 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1638 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1639 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1640 else if (sinfo->gso_type & SKB_GSO_UDP)
1641 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1642 else if (sinfo->gso_type & SKB_GSO_FCOE)
1643 goto out_free;
1644 else
1645 BUG();
1646 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1647 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1648 } else
1649 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1650
1651 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1652 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1653 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
1654 vnet_hdr.csum_offset = skb->csum_offset;
1655 }
1656
1657 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1658 vnet_hdr_len);
1659 if (err < 0)
1660 goto out_free;
1661 }
1662
1663
1664
1665
1666
1667
1668 sll = &PACKET_SKB_CB(skb)->sa.ll;
1669 if (sock->type == SOCK_PACKET)
1670 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1671 else
1672 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1673
1674
1675
1676
1677
1678
1679 copied = skb->len;
1680 if (copied > len) {
1681 copied = len;
1682 msg->msg_flags |= MSG_TRUNC;
1683 }
1684
1685 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1686 if (err)
1687 goto out_free;
1688
1689 sock_recv_ts_and_drops(msg, sk, skb);
1690
1691 if (msg->msg_name)
1692 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1693 msg->msg_namelen);
1694
1695 if (pkt_sk(sk)->auxdata) {
1696 struct tpacket_auxdata aux;
1697
1698 aux.tp_status = TP_STATUS_USER;
1699 if (skb->ip_summed == CHECKSUM_PARTIAL)
1700 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1701 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1702 aux.tp_snaplen = skb->len;
1703 aux.tp_mac = 0;
1704 aux.tp_net = skb_network_offset(skb);
1705 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
1706
1707 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
1708 }
1709
1710
1711
1712
1713
1714 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1715
1716out_free:
1717 skb_free_datagram(sk, skb);
1718out:
1719 return err;
1720}
1721
1722static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1723 int *uaddr_len, int peer)
1724{
1725 struct net_device *dev;
1726 struct sock *sk = sock->sk;
1727
1728 if (peer)
1729 return -EOPNOTSUPP;
1730
1731 uaddr->sa_family = AF_PACKET;
1732 rcu_read_lock();
1733 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1734 if (dev)
1735 strncpy(uaddr->sa_data, dev->name, 14);
1736 else
1737 memset(uaddr->sa_data, 0, 14);
1738 rcu_read_unlock();
1739 *uaddr_len = sizeof(*uaddr);
1740
1741 return 0;
1742}
1743
1744static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1745 int *uaddr_len, int peer)
1746{
1747 struct net_device *dev;
1748 struct sock *sk = sock->sk;
1749 struct packet_sock *po = pkt_sk(sk);
1750 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
1751
1752 if (peer)
1753 return -EOPNOTSUPP;
1754
1755 sll->sll_family = AF_PACKET;
1756 sll->sll_ifindex = po->ifindex;
1757 sll->sll_protocol = po->num;
1758 sll->sll_pkttype = 0;
1759 rcu_read_lock();
1760 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
1761 if (dev) {
1762 sll->sll_hatype = dev->type;
1763 sll->sll_halen = dev->addr_len;
1764 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1765 } else {
1766 sll->sll_hatype = 0;
1767 sll->sll_halen = 0;
1768 }
1769 rcu_read_unlock();
1770 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
1771
1772 return 0;
1773}
1774
1775static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1776 int what)
1777{
1778 switch (i->type) {
1779 case PACKET_MR_MULTICAST:
1780 if (i->alen != dev->addr_len)
1781 return -EINVAL;
1782 if (what > 0)
1783 return dev_mc_add(dev, i->addr);
1784 else
1785 return dev_mc_del(dev, i->addr);
1786 break;
1787 case PACKET_MR_PROMISC:
1788 return dev_set_promiscuity(dev, what);
1789 break;
1790 case PACKET_MR_ALLMULTI:
1791 return dev_set_allmulti(dev, what);
1792 break;
1793 case PACKET_MR_UNICAST:
1794 if (i->alen != dev->addr_len)
1795 return -EINVAL;
1796 if (what > 0)
1797 return dev_uc_add(dev, i->addr);
1798 else
1799 return dev_uc_del(dev, i->addr);
1800 break;
1801 default:
1802 break;
1803 }
1804 return 0;
1805}
1806
1807static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1808{
1809 for ( ; i; i = i->next) {
1810 if (i->ifindex == dev->ifindex)
1811 packet_dev_mc(dev, i, what);
1812 }
1813}
1814
1815static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
1816{
1817 struct packet_sock *po = pkt_sk(sk);
1818 struct packet_mclist *ml, *i;
1819 struct net_device *dev;
1820 int err;
1821
1822 rtnl_lock();
1823
1824 err = -ENODEV;
1825 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
1826 if (!dev)
1827 goto done;
1828
1829 err = -EINVAL;
1830 if (mreq->mr_alen > dev->addr_len)
1831 goto done;
1832
1833 err = -ENOBUFS;
1834 i = kmalloc(sizeof(*i), GFP_KERNEL);
1835 if (i == NULL)
1836 goto done;
1837
1838 err = 0;
1839 for (ml = po->mclist; ml; ml = ml->next) {
1840 if (ml->ifindex == mreq->mr_ifindex &&
1841 ml->type == mreq->mr_type &&
1842 ml->alen == mreq->mr_alen &&
1843 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1844 ml->count++;
1845
1846 kfree(i);
1847 goto done;
1848 }
1849 }
1850
1851 i->type = mreq->mr_type;
1852 i->ifindex = mreq->mr_ifindex;
1853 i->alen = mreq->mr_alen;
1854 memcpy(i->addr, mreq->mr_address, i->alen);
1855 i->count = 1;
1856 i->next = po->mclist;
1857 po->mclist = i;
1858 err = packet_dev_mc(dev, i, 1);
1859 if (err) {
1860 po->mclist = i->next;
1861 kfree(i);
1862 }
1863
1864done:
1865 rtnl_unlock();
1866 return err;
1867}
1868
1869static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
1870{
1871 struct packet_mclist *ml, **mlp;
1872
1873 rtnl_lock();
1874
1875 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1876 if (ml->ifindex == mreq->mr_ifindex &&
1877 ml->type == mreq->mr_type &&
1878 ml->alen == mreq->mr_alen &&
1879 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1880 if (--ml->count == 0) {
1881 struct net_device *dev;
1882 *mlp = ml->next;
1883 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1884 if (dev)
1885 packet_dev_mc(dev, ml, -1);
1886 kfree(ml);
1887 }
1888 rtnl_unlock();
1889 return 0;
1890 }
1891 }
1892 rtnl_unlock();
1893 return -EADDRNOTAVAIL;
1894}
1895
1896static void packet_flush_mclist(struct sock *sk)
1897{
1898 struct packet_sock *po = pkt_sk(sk);
1899 struct packet_mclist *ml;
1900
1901 if (!po->mclist)
1902 return;
1903
1904 rtnl_lock();
1905 while ((ml = po->mclist) != NULL) {
1906 struct net_device *dev;
1907
1908 po->mclist = ml->next;
1909 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1910 if (dev != NULL)
1911 packet_dev_mc(dev, ml, -1);
1912 kfree(ml);
1913 }
1914 rtnl_unlock();
1915}
1916
1917static int
1918packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
1919{
1920 struct sock *sk = sock->sk;
1921 struct packet_sock *po = pkt_sk(sk);
1922 int ret;
1923
1924 if (level != SOL_PACKET)
1925 return -ENOPROTOOPT;
1926
1927 switch (optname) {
1928 case PACKET_ADD_MEMBERSHIP:
1929 case PACKET_DROP_MEMBERSHIP:
1930 {
1931 struct packet_mreq_max mreq;
1932 int len = optlen;
1933 memset(&mreq, 0, sizeof(mreq));
1934 if (len < sizeof(struct packet_mreq))
1935 return -EINVAL;
1936 if (len > sizeof(mreq))
1937 len = sizeof(mreq);
1938 if (copy_from_user(&mreq, optval, len))
1939 return -EFAULT;
1940 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1941 return -EINVAL;
1942 if (optname == PACKET_ADD_MEMBERSHIP)
1943 ret = packet_mc_add(sk, &mreq);
1944 else
1945 ret = packet_mc_drop(sk, &mreq);
1946 return ret;
1947 }
1948
1949 case PACKET_RX_RING:
1950 case PACKET_TX_RING:
1951 {
1952 struct tpacket_req req;
1953
1954 if (optlen < sizeof(req))
1955 return -EINVAL;
1956 if (pkt_sk(sk)->has_vnet_hdr)
1957 return -EINVAL;
1958 if (copy_from_user(&req, optval, sizeof(req)))
1959 return -EFAULT;
1960 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
1961 }
1962 case PACKET_COPY_THRESH:
1963 {
1964 int val;
1965
1966 if (optlen != sizeof(val))
1967 return -EINVAL;
1968 if (copy_from_user(&val, optval, sizeof(val)))
1969 return -EFAULT;
1970
1971 pkt_sk(sk)->copy_thresh = val;
1972 return 0;
1973 }
1974 case PACKET_VERSION:
1975 {
1976 int val;
1977
1978 if (optlen != sizeof(val))
1979 return -EINVAL;
1980 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1981 return -EBUSY;
1982 if (copy_from_user(&val, optval, sizeof(val)))
1983 return -EFAULT;
1984 switch (val) {
1985 case TPACKET_V1:
1986 case TPACKET_V2:
1987 po->tp_version = val;
1988 return 0;
1989 default:
1990 return -EINVAL;
1991 }
1992 }
1993 case PACKET_RESERVE:
1994 {
1995 unsigned int val;
1996
1997 if (optlen != sizeof(val))
1998 return -EINVAL;
1999 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2000 return -EBUSY;
2001 if (copy_from_user(&val, optval, sizeof(val)))
2002 return -EFAULT;
2003 po->tp_reserve = val;
2004 return 0;
2005 }
2006 case PACKET_LOSS:
2007 {
2008 unsigned int val;
2009
2010 if (optlen != sizeof(val))
2011 return -EINVAL;
2012 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2013 return -EBUSY;
2014 if (copy_from_user(&val, optval, sizeof(val)))
2015 return -EFAULT;
2016 po->tp_loss = !!val;
2017 return 0;
2018 }
2019 case PACKET_AUXDATA:
2020 {
2021 int val;
2022
2023 if (optlen < sizeof(val))
2024 return -EINVAL;
2025 if (copy_from_user(&val, optval, sizeof(val)))
2026 return -EFAULT;
2027
2028 po->auxdata = !!val;
2029 return 0;
2030 }
2031 case PACKET_ORIGDEV:
2032 {
2033 int val;
2034
2035 if (optlen < sizeof(val))
2036 return -EINVAL;
2037 if (copy_from_user(&val, optval, sizeof(val)))
2038 return -EFAULT;
2039
2040 po->origdev = !!val;
2041 return 0;
2042 }
2043 case PACKET_VNET_HDR:
2044 {
2045 int val;
2046
2047 if (sock->type != SOCK_RAW)
2048 return -EINVAL;
2049 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2050 return -EBUSY;
2051 if (optlen < sizeof(val))
2052 return -EINVAL;
2053 if (copy_from_user(&val, optval, sizeof(val)))
2054 return -EFAULT;
2055
2056 po->has_vnet_hdr = !!val;
2057 return 0;
2058 }
2059 case PACKET_TIMESTAMP:
2060 {
2061 int val;
2062
2063 if (optlen != sizeof(val))
2064 return -EINVAL;
2065 if (copy_from_user(&val, optval, sizeof(val)))
2066 return -EFAULT;
2067
2068 po->tp_tstamp = val;
2069 return 0;
2070 }
2071 default:
2072 return -ENOPROTOOPT;
2073 }
2074}
2075
2076static int packet_getsockopt(struct socket *sock, int level, int optname,
2077 char __user *optval, int __user *optlen)
2078{
2079 int len;
2080 int val;
2081 struct sock *sk = sock->sk;
2082 struct packet_sock *po = pkt_sk(sk);
2083 void *data;
2084 struct tpacket_stats st;
2085
2086 if (level != SOL_PACKET)
2087 return -ENOPROTOOPT;
2088
2089 if (get_user(len, optlen))
2090 return -EFAULT;
2091
2092 if (len < 0)
2093 return -EINVAL;
2094
2095 switch (optname) {
2096 case PACKET_STATISTICS:
2097 if (len > sizeof(struct tpacket_stats))
2098 len = sizeof(struct tpacket_stats);
2099 spin_lock_bh(&sk->sk_receive_queue.lock);
2100 st = po->stats;
2101 memset(&po->stats, 0, sizeof(st));
2102 spin_unlock_bh(&sk->sk_receive_queue.lock);
2103 st.tp_packets += st.tp_drops;
2104
2105 data = &st;
2106 break;
2107 case PACKET_AUXDATA:
2108 if (len > sizeof(int))
2109 len = sizeof(int);
2110 val = po->auxdata;
2111
2112 data = &val;
2113 break;
2114 case PACKET_ORIGDEV:
2115 if (len > sizeof(int))
2116 len = sizeof(int);
2117 val = po->origdev;
2118
2119 data = &val;
2120 break;
2121 case PACKET_VNET_HDR:
2122 if (len > sizeof(int))
2123 len = sizeof(int);
2124 val = po->has_vnet_hdr;
2125
2126 data = &val;
2127 break;
2128 case PACKET_VERSION:
2129 if (len > sizeof(int))
2130 len = sizeof(int);
2131 val = po->tp_version;
2132 data = &val;
2133 break;
2134 case PACKET_HDRLEN:
2135 if (len > sizeof(int))
2136 len = sizeof(int);
2137 if (copy_from_user(&val, optval, len))
2138 return -EFAULT;
2139 switch (val) {
2140 case TPACKET_V1:
2141 val = sizeof(struct tpacket_hdr);
2142 break;
2143 case TPACKET_V2:
2144 val = sizeof(struct tpacket2_hdr);
2145 break;
2146 default:
2147 return -EINVAL;
2148 }
2149 data = &val;
2150 break;
2151 case PACKET_RESERVE:
2152 if (len > sizeof(unsigned int))
2153 len = sizeof(unsigned int);
2154 val = po->tp_reserve;
2155 data = &val;
2156 break;
2157 case PACKET_LOSS:
2158 if (len > sizeof(unsigned int))
2159 len = sizeof(unsigned int);
2160 val = po->tp_loss;
2161 data = &val;
2162 break;
2163 case PACKET_TIMESTAMP:
2164 if (len > sizeof(int))
2165 len = sizeof(int);
2166 val = po->tp_tstamp;
2167 data = &val;
2168 break;
2169 default:
2170 return -ENOPROTOOPT;
2171 }
2172
2173 if (put_user(len, optlen))
2174 return -EFAULT;
2175 if (copy_to_user(optval, data, len))
2176 return -EFAULT;
2177 return 0;
2178}
2179
2180
2181static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2182{
2183 struct sock *sk;
2184 struct hlist_node *node;
2185 struct net_device *dev = data;
2186 struct net *net = dev_net(dev);
2187
2188 rcu_read_lock();
2189 sk_for_each_rcu(sk, node, &net->packet.sklist) {
2190 struct packet_sock *po = pkt_sk(sk);
2191
2192 switch (msg) {
2193 case NETDEV_UNREGISTER:
2194 if (po->mclist)
2195 packet_dev_mclist(dev, po->mclist, -1);
2196
2197
2198 case NETDEV_DOWN:
2199 if (dev->ifindex == po->ifindex) {
2200 spin_lock(&po->bind_lock);
2201 if (po->running) {
2202 __dev_remove_pack(&po->prot_hook);
2203 __sock_put(sk);
2204 po->running = 0;
2205 sk->sk_err = ENETDOWN;
2206 if (!sock_flag(sk, SOCK_DEAD))
2207 sk->sk_error_report(sk);
2208 }
2209 if (msg == NETDEV_UNREGISTER) {
2210 po->ifindex = -1;
2211 po->prot_hook.dev = NULL;
2212 }
2213 spin_unlock(&po->bind_lock);
2214 }
2215 break;
2216 case NETDEV_UP:
2217 if (dev->ifindex == po->ifindex) {
2218 spin_lock(&po->bind_lock);
2219 if (po->num && !po->running) {
2220 dev_add_pack(&po->prot_hook);
2221 sock_hold(sk);
2222 po->running = 1;
2223 }
2224 spin_unlock(&po->bind_lock);
2225 }
2226 break;
2227 }
2228 }
2229 rcu_read_unlock();
2230 return NOTIFY_DONE;
2231}
2232
2233
2234static int packet_ioctl(struct socket *sock, unsigned int cmd,
2235 unsigned long arg)
2236{
2237 struct sock *sk = sock->sk;
2238
2239 switch (cmd) {
2240 case SIOCOUTQ:
2241 {
2242 int amount = sk_wmem_alloc_get(sk);
2243
2244 return put_user(amount, (int __user *)arg);
2245 }
2246 case SIOCINQ:
2247 {
2248 struct sk_buff *skb;
2249 int amount = 0;
2250
2251 spin_lock_bh(&sk->sk_receive_queue.lock);
2252 skb = skb_peek(&sk->sk_receive_queue);
2253 if (skb)
2254 amount = skb->len;
2255 spin_unlock_bh(&sk->sk_receive_queue.lock);
2256 return put_user(amount, (int __user *)arg);
2257 }
2258 case SIOCGSTAMP:
2259 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2260 case SIOCGSTAMPNS:
2261 return sock_get_timestampns(sk, (struct timespec __user *)arg);
2262
2263#ifdef CONFIG_INET
2264 case SIOCADDRT:
2265 case SIOCDELRT:
2266 case SIOCDARP:
2267 case SIOCGARP:
2268 case SIOCSARP:
2269 case SIOCGIFADDR:
2270 case SIOCSIFADDR:
2271 case SIOCGIFBRDADDR:
2272 case SIOCSIFBRDADDR:
2273 case SIOCGIFNETMASK:
2274 case SIOCSIFNETMASK:
2275 case SIOCGIFDSTADDR:
2276 case SIOCSIFDSTADDR:
2277 case SIOCSIFFLAGS:
2278 return inet_dgram_ops.ioctl(sock, cmd, arg);
2279#endif
2280
2281 default:
2282 return -ENOIOCTLCMD;
2283 }
2284 return 0;
2285}
2286
2287static unsigned int packet_poll(struct file *file, struct socket *sock,
2288 poll_table *wait)
2289{
2290 struct sock *sk = sock->sk;
2291 struct packet_sock *po = pkt_sk(sk);
2292 unsigned int mask = datagram_poll(file, sock, wait);
2293
2294 spin_lock_bh(&sk->sk_receive_queue.lock);
2295 if (po->rx_ring.pg_vec) {
2296 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
2297 mask |= POLLIN | POLLRDNORM;
2298 }
2299 spin_unlock_bh(&sk->sk_receive_queue.lock);
2300 spin_lock_bh(&sk->sk_write_queue.lock);
2301 if (po->tx_ring.pg_vec) {
2302 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2303 mask |= POLLOUT | POLLWRNORM;
2304 }
2305 spin_unlock_bh(&sk->sk_write_queue.lock);
2306 return mask;
2307}
2308
2309
2310
2311
2312
2313
2314static void packet_mm_open(struct vm_area_struct *vma)
2315{
2316 struct file *file = vma->vm_file;
2317 struct socket *sock = file->private_data;
2318 struct sock *sk = sock->sk;
2319
2320 if (sk)
2321 atomic_inc(&pkt_sk(sk)->mapped);
2322}
2323
2324static void packet_mm_close(struct vm_area_struct *vma)
2325{
2326 struct file *file = vma->vm_file;
2327 struct socket *sock = file->private_data;
2328 struct sock *sk = sock->sk;
2329
2330 if (sk)
2331 atomic_dec(&pkt_sk(sk)->mapped);
2332}
2333
2334static const struct vm_operations_struct packet_mmap_ops = {
2335 .open = packet_mm_open,
2336 .close = packet_mm_close,
2337};
2338
2339static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2340 unsigned int len)
2341{
2342 int i;
2343
2344 for (i = 0; i < len; i++) {
2345 if (likely(pg_vec[i].buffer)) {
2346 if (is_vmalloc_addr(pg_vec[i].buffer))
2347 vfree(pg_vec[i].buffer);
2348 else
2349 free_pages((unsigned long)pg_vec[i].buffer,
2350 order);
2351 pg_vec[i].buffer = NULL;
2352 }
2353 }
2354 kfree(pg_vec);
2355}
2356
2357static inline char *alloc_one_pg_vec_page(unsigned long order)
2358{
2359 char *buffer = NULL;
2360 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2361 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
2362
2363 buffer = (char *) __get_free_pages(gfp_flags, order);
2364
2365 if (buffer)
2366 return buffer;
2367
2368
2369
2370
2371 buffer = vzalloc((1 << order) * PAGE_SIZE);
2372
2373 if (buffer)
2374 return buffer;
2375
2376
2377
2378
2379 gfp_flags &= ~__GFP_NORETRY;
2380 buffer = (char *)__get_free_pages(gfp_flags, order);
2381 if (buffer)
2382 return buffer;
2383
2384
2385
2386
2387 return NULL;
2388}
2389
2390static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
2391{
2392 unsigned int block_nr = req->tp_block_nr;
2393 struct pgv *pg_vec;
2394 int i;
2395
2396 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
2397 if (unlikely(!pg_vec))
2398 goto out;
2399
2400 for (i = 0; i < block_nr; i++) {
2401 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
2402 if (unlikely(!pg_vec[i].buffer))
2403 goto out_free_pgvec;
2404 }
2405
2406out:
2407 return pg_vec;
2408
2409out_free_pgvec:
2410 free_pg_vec(pg_vec, order, block_nr);
2411 pg_vec = NULL;
2412 goto out;
2413}
2414
2415static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2416 int closing, int tx_ring)
2417{
2418 struct pgv *pg_vec = NULL;
2419 struct packet_sock *po = pkt_sk(sk);
2420 int was_running, order = 0;
2421 struct packet_ring_buffer *rb;
2422 struct sk_buff_head *rb_queue;
2423 __be16 num;
2424 int err;
2425
2426 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2427 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2428
2429 err = -EBUSY;
2430 if (!closing) {
2431 if (atomic_read(&po->mapped))
2432 goto out;
2433 if (atomic_read(&rb->pending))
2434 goto out;
2435 }
2436
2437 if (req->tp_block_nr) {
2438
2439 err = -EBUSY;
2440 if (unlikely(rb->pg_vec))
2441 goto out;
2442
2443 switch (po->tp_version) {
2444 case TPACKET_V1:
2445 po->tp_hdrlen = TPACKET_HDRLEN;
2446 break;
2447 case TPACKET_V2:
2448 po->tp_hdrlen = TPACKET2_HDRLEN;
2449 break;
2450 }
2451
2452 err = -EINVAL;
2453 if (unlikely((int)req->tp_block_size <= 0))
2454 goto out;
2455 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
2456 goto out;
2457 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
2458 po->tp_reserve))
2459 goto out;
2460 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
2461 goto out;
2462
2463 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2464 if (unlikely(rb->frames_per_block <= 0))
2465 goto out;
2466 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2467 req->tp_frame_nr))
2468 goto out;
2469
2470 err = -ENOMEM;
2471 order = get_order(req->tp_block_size);
2472 pg_vec = alloc_pg_vec(req, order);
2473 if (unlikely(!pg_vec))
2474 goto out;
2475 }
2476
2477 else {
2478 err = -EINVAL;
2479 if (unlikely(req->tp_frame_nr))
2480 goto out;
2481 }
2482
2483 lock_sock(sk);
2484
2485
2486 spin_lock(&po->bind_lock);
2487 was_running = po->running;
2488 num = po->num;
2489 if (was_running) {
2490 __dev_remove_pack(&po->prot_hook);
2491 po->num = 0;
2492 po->running = 0;
2493 __sock_put(sk);
2494 }
2495 spin_unlock(&po->bind_lock);
2496
2497 synchronize_net();
2498
2499 err = -EBUSY;
2500 mutex_lock(&po->pg_vec_lock);
2501 if (closing || atomic_read(&po->mapped) == 0) {
2502 err = 0;
2503 spin_lock_bh(&rb_queue->lock);
2504 swap(rb->pg_vec, pg_vec);
2505 rb->frame_max = (req->tp_frame_nr - 1);
2506 rb->head = 0;
2507 rb->frame_size = req->tp_frame_size;
2508 spin_unlock_bh(&rb_queue->lock);
2509
2510 swap(rb->pg_vec_order, order);
2511 swap(rb->pg_vec_len, req->tp_block_nr);
2512
2513 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2514 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2515 tpacket_rcv : packet_rcv;
2516 skb_queue_purge(rb_queue);
2517 if (atomic_read(&po->mapped))
2518 pr_err("packet_mmap: vma is busy: %d\n",
2519 atomic_read(&po->mapped));
2520 }
2521 mutex_unlock(&po->pg_vec_lock);
2522
2523 spin_lock(&po->bind_lock);
2524 if (was_running && !po->running) {
2525 sock_hold(sk);
2526 po->running = 1;
2527 po->num = num;
2528 dev_add_pack(&po->prot_hook);
2529 }
2530 spin_unlock(&po->bind_lock);
2531
2532 release_sock(sk);
2533
2534 if (pg_vec)
2535 free_pg_vec(pg_vec, order, req->tp_block_nr);
2536out:
2537 return err;
2538}
2539
2540static int packet_mmap(struct file *file, struct socket *sock,
2541 struct vm_area_struct *vma)
2542{
2543 struct sock *sk = sock->sk;
2544 struct packet_sock *po = pkt_sk(sk);
2545 unsigned long size, expected_size;
2546 struct packet_ring_buffer *rb;
2547 unsigned long start;
2548 int err = -EINVAL;
2549 int i;
2550
2551 if (vma->vm_pgoff)
2552 return -EINVAL;
2553
2554 mutex_lock(&po->pg_vec_lock);
2555
2556 expected_size = 0;
2557 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2558 if (rb->pg_vec) {
2559 expected_size += rb->pg_vec_len
2560 * rb->pg_vec_pages
2561 * PAGE_SIZE;
2562 }
2563 }
2564
2565 if (expected_size == 0)
2566 goto out;
2567
2568 size = vma->vm_end - vma->vm_start;
2569 if (size != expected_size)
2570 goto out;
2571
2572 start = vma->vm_start;
2573 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2574 if (rb->pg_vec == NULL)
2575 continue;
2576
2577 for (i = 0; i < rb->pg_vec_len; i++) {
2578 struct page *page;
2579 void *kaddr = rb->pg_vec[i].buffer;
2580 int pg_num;
2581
2582 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
2583 page = pgv_to_page(kaddr);
2584 err = vm_insert_page(vma, start, page);
2585 if (unlikely(err))
2586 goto out;
2587 start += PAGE_SIZE;
2588 kaddr += PAGE_SIZE;
2589 }
2590 }
2591 }
2592
2593 atomic_inc(&po->mapped);
2594 vma->vm_ops = &packet_mmap_ops;
2595 err = 0;
2596
2597out:
2598 mutex_unlock(&po->pg_vec_lock);
2599 return err;
2600}
2601
2602static const struct proto_ops packet_ops_spkt = {
2603 .family = PF_PACKET,
2604 .owner = THIS_MODULE,
2605 .release = packet_release,
2606 .bind = packet_bind_spkt,
2607 .connect = sock_no_connect,
2608 .socketpair = sock_no_socketpair,
2609 .accept = sock_no_accept,
2610 .getname = packet_getname_spkt,
2611 .poll = datagram_poll,
2612 .ioctl = packet_ioctl,
2613 .listen = sock_no_listen,
2614 .shutdown = sock_no_shutdown,
2615 .setsockopt = sock_no_setsockopt,
2616 .getsockopt = sock_no_getsockopt,
2617 .sendmsg = packet_sendmsg_spkt,
2618 .recvmsg = packet_recvmsg,
2619 .mmap = sock_no_mmap,
2620 .sendpage = sock_no_sendpage,
2621};
2622
2623static const struct proto_ops packet_ops = {
2624 .family = PF_PACKET,
2625 .owner = THIS_MODULE,
2626 .release = packet_release,
2627 .bind = packet_bind,
2628 .connect = sock_no_connect,
2629 .socketpair = sock_no_socketpair,
2630 .accept = sock_no_accept,
2631 .getname = packet_getname,
2632 .poll = packet_poll,
2633 .ioctl = packet_ioctl,
2634 .listen = sock_no_listen,
2635 .shutdown = sock_no_shutdown,
2636 .setsockopt = packet_setsockopt,
2637 .getsockopt = packet_getsockopt,
2638 .sendmsg = packet_sendmsg,
2639 .recvmsg = packet_recvmsg,
2640 .mmap = packet_mmap,
2641 .sendpage = sock_no_sendpage,
2642};
2643
2644static const struct net_proto_family packet_family_ops = {
2645 .family = PF_PACKET,
2646 .create = packet_create,
2647 .owner = THIS_MODULE,
2648};
2649
2650static struct notifier_block packet_netdev_notifier = {
2651 .notifier_call = packet_notifier,
2652};
2653
2654#ifdef CONFIG_PROC_FS
2655
2656static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2657 __acquires(RCU)
2658{
2659 struct net *net = seq_file_net(seq);
2660
2661 rcu_read_lock();
2662 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
2663}
2664
2665static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2666{
2667 struct net *net = seq_file_net(seq);
2668 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
2669}
2670
2671static void packet_seq_stop(struct seq_file *seq, void *v)
2672 __releases(RCU)
2673{
2674 rcu_read_unlock();
2675}
2676
2677static int packet_seq_show(struct seq_file *seq, void *v)
2678{
2679 if (v == SEQ_START_TOKEN)
2680 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2681 else {
2682 struct sock *s = sk_entry(v);
2683 const struct packet_sock *po = pkt_sk(s);
2684
2685 seq_printf(seq,
2686 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2687 s,
2688 atomic_read(&s->sk_refcnt),
2689 s->sk_type,
2690 ntohs(po->num),
2691 po->ifindex,
2692 po->running,
2693 atomic_read(&s->sk_rmem_alloc),
2694 sock_i_uid(s),
2695 sock_i_ino(s));
2696 }
2697
2698 return 0;
2699}
2700
2701static const struct seq_operations packet_seq_ops = {
2702 .start = packet_seq_start,
2703 .next = packet_seq_next,
2704 .stop = packet_seq_stop,
2705 .show = packet_seq_show,
2706};
2707
2708static int packet_seq_open(struct inode *inode, struct file *file)
2709{
2710 return seq_open_net(inode, file, &packet_seq_ops,
2711 sizeof(struct seq_net_private));
2712}
2713
2714static const struct file_operations packet_seq_fops = {
2715 .owner = THIS_MODULE,
2716 .open = packet_seq_open,
2717 .read = seq_read,
2718 .llseek = seq_lseek,
2719 .release = seq_release_net,
2720};
2721
2722#endif
2723
2724static int __net_init packet_net_init(struct net *net)
2725{
2726 spin_lock_init(&net->packet.sklist_lock);
2727 INIT_HLIST_HEAD(&net->packet.sklist);
2728
2729 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2730 return -ENOMEM;
2731
2732 return 0;
2733}
2734
2735static void __net_exit packet_net_exit(struct net *net)
2736{
2737 proc_net_remove(net, "packet");
2738}
2739
2740static struct pernet_operations packet_net_ops = {
2741 .init = packet_net_init,
2742 .exit = packet_net_exit,
2743};
2744
2745
2746static void __exit packet_exit(void)
2747{
2748 unregister_netdevice_notifier(&packet_netdev_notifier);
2749 unregister_pernet_subsys(&packet_net_ops);
2750 sock_unregister(PF_PACKET);
2751 proto_unregister(&packet_proto);
2752}
2753
2754static int __init packet_init(void)
2755{
2756 int rc = proto_register(&packet_proto, 0);
2757
2758 if (rc != 0)
2759 goto out;
2760
2761 sock_register(&packet_family_ops);
2762 register_pernet_subsys(&packet_net_ops);
2763 register_netdevice_notifier(&packet_netdev_notifier);
2764out:
2765 return rc;
2766}
2767
2768module_init(packet_init);
2769module_exit(packet_exit);
2770MODULE_LICENSE("GPL");
2771MODULE_ALIAS_NETPROTO(PF_PACKET);
2772