1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/types.h>
56#include <linux/mm.h>
57#include <linux/capability.h>
58#include <linux/fcntl.h>
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_packet.h>
64#include <linux/wireless.h>
65#include <linux/kernel.h>
66#include <linux/kmod.h>
67#include <linux/slab.h>
68#include <linux/vmalloc.h>
69#include <net/net_namespace.h>
70#include <net/ip.h>
71#include <net/protocol.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <linux/errno.h>
75#include <linux/timer.h>
76#include <asm/uaccess.h>
77#include <asm/ioctls.h>
78#include <asm/page.h>
79#include <asm/cacheflush.h>
80#include <asm/io.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/poll.h>
84#include <linux/module.h>
85#include <linux/init.h>
86#include <linux/mutex.h>
87#include <linux/if_vlan.h>
88#include <linux/virtio_net.h>
89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h>
91
92#ifdef CONFIG_INET
93#include <net/inet_common.h>
94#endif
95
96#include "internal.h"
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154struct packet_mreq_max {
155 int mr_ifindex;
156 unsigned short mr_type;
157 unsigned short mr_alen;
158 unsigned char mr_address[MAX_ADDR_LEN];
159};
160
161static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
162 int closing, int tx_ring);
163
164
165#define V3_ALIGNMENT (8)
166
167#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
168
169#define BLK_PLUS_PRIV(sz_of_priv) \
170 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
171
172#define PGV_FROM_VMALLOC 1
173
174#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
175#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
176#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
177#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
178#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
179#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
180#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
181
182struct packet_sock;
183static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
184
185static void *packet_previous_frame(struct packet_sock *po,
186 struct packet_ring_buffer *rb,
187 int status);
188static void packet_increment_head(struct packet_ring_buffer *buff);
189static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
190 struct tpacket_block_desc *);
191static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
192 struct packet_sock *);
193static void prb_retire_current_block(struct tpacket_kbdq_core *,
194 struct packet_sock *, unsigned int status);
195static int prb_queue_frozen(struct tpacket_kbdq_core *);
196static void prb_open_block(struct tpacket_kbdq_core *,
197 struct tpacket_block_desc *);
198static void prb_retire_rx_blk_timer_expired(unsigned long);
199static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
200static void prb_init_blk_timer(struct packet_sock *,
201 struct tpacket_kbdq_core *,
202 void (*func) (unsigned long));
203static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
204static void prb_clear_rxhash(struct tpacket_kbdq_core *,
205 struct tpacket3_hdr *);
206static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
207 struct tpacket3_hdr *);
208static void packet_flush_mclist(struct sock *sk);
209
210struct packet_skb_cb {
211 unsigned int origlen;
212 union {
213 struct sockaddr_pkt pkt;
214 struct sockaddr_ll ll;
215 } sa;
216};
217
218#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
219
220#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
221#define GET_PBLOCK_DESC(x, bid) \
222 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
223#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
224 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
225#define GET_NEXT_PRB_BLK_NUM(x) \
226 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
227 ((x)->kactive_blk_num+1) : 0)
228
229static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
230static void __fanout_link(struct sock *sk, struct packet_sock *po);
231
232
233
234
235
236static void register_prot_hook(struct sock *sk)
237{
238 struct packet_sock *po = pkt_sk(sk);
239 if (!po->running) {
240 if (po->fanout)
241 __fanout_link(sk, po);
242 else
243 dev_add_pack(&po->prot_hook);
244 sock_hold(sk);
245 po->running = 1;
246 }
247}
248
249
250
251
252
253
254
255
256static void __unregister_prot_hook(struct sock *sk, bool sync)
257{
258 struct packet_sock *po = pkt_sk(sk);
259
260 po->running = 0;
261 if (po->fanout)
262 __fanout_unlink(sk, po);
263 else
264 __dev_remove_pack(&po->prot_hook);
265 __sock_put(sk);
266
267 if (sync) {
268 spin_unlock(&po->bind_lock);
269 synchronize_net();
270 spin_lock(&po->bind_lock);
271 }
272}
273
274static void unregister_prot_hook(struct sock *sk, bool sync)
275{
276 struct packet_sock *po = pkt_sk(sk);
277
278 if (po->running)
279 __unregister_prot_hook(sk, sync);
280}
281
282static inline __pure struct page *pgv_to_page(void *addr)
283{
284 if (is_vmalloc_addr(addr))
285 return vmalloc_to_page(addr);
286 return virt_to_page(addr);
287}
288
289static void __packet_set_status(struct packet_sock *po, void *frame, int status)
290{
291 union {
292 struct tpacket_hdr *h1;
293 struct tpacket2_hdr *h2;
294 void *raw;
295 } h;
296
297 h.raw = frame;
298 switch (po->tp_version) {
299 case TPACKET_V1:
300 h.h1->tp_status = status;
301 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
302 break;
303 case TPACKET_V2:
304 h.h2->tp_status = status;
305 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
306 break;
307 case TPACKET_V3:
308 default:
309 WARN(1, "TPACKET version not supported.\n");
310 BUG();
311 }
312
313 smp_wmb();
314}
315
316static int __packet_get_status(struct packet_sock *po, void *frame)
317{
318 union {
319 struct tpacket_hdr *h1;
320 struct tpacket2_hdr *h2;
321 void *raw;
322 } h;
323
324 smp_rmb();
325
326 h.raw = frame;
327 switch (po->tp_version) {
328 case TPACKET_V1:
329 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
330 return h.h1->tp_status;
331 case TPACKET_V2:
332 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
333 return h.h2->tp_status;
334 case TPACKET_V3:
335 default:
336 WARN(1, "TPACKET version not supported.\n");
337 BUG();
338 return 0;
339 }
340}
341
342static void *packet_lookup_frame(struct packet_sock *po,
343 struct packet_ring_buffer *rb,
344 unsigned int position,
345 int status)
346{
347 unsigned int pg_vec_pos, frame_offset;
348 union {
349 struct tpacket_hdr *h1;
350 struct tpacket2_hdr *h2;
351 void *raw;
352 } h;
353
354 pg_vec_pos = position / rb->frames_per_block;
355 frame_offset = position % rb->frames_per_block;
356
357 h.raw = rb->pg_vec[pg_vec_pos].buffer +
358 (frame_offset * rb->frame_size);
359
360 if (status != __packet_get_status(po, h.raw))
361 return NULL;
362
363 return h.raw;
364}
365
366static void *packet_current_frame(struct packet_sock *po,
367 struct packet_ring_buffer *rb,
368 int status)
369{
370 return packet_lookup_frame(po, rb, rb->head, status);
371}
372
373static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
374{
375 del_timer_sync(&pkc->retire_blk_timer);
376}
377
378static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
379 int tx_ring,
380 struct sk_buff_head *rb_queue)
381{
382 struct tpacket_kbdq_core *pkc;
383
384 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
385
386 spin_lock(&rb_queue->lock);
387 pkc->delete_blk_timer = 1;
388 spin_unlock(&rb_queue->lock);
389
390 prb_del_retire_blk_timer(pkc);
391}
392
393static void prb_init_blk_timer(struct packet_sock *po,
394 struct tpacket_kbdq_core *pkc,
395 void (*func) (unsigned long))
396{
397 init_timer(&pkc->retire_blk_timer);
398 pkc->retire_blk_timer.data = (long)po;
399 pkc->retire_blk_timer.function = func;
400 pkc->retire_blk_timer.expires = jiffies;
401}
402
403static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
404{
405 struct tpacket_kbdq_core *pkc;
406
407 if (tx_ring)
408 BUG();
409
410 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
411 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
412}
413
414static int prb_calc_retire_blk_tmo(struct packet_sock *po,
415 int blk_size_in_bytes)
416{
417 struct net_device *dev;
418 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
419 struct ethtool_cmd ecmd;
420 int err;
421 u32 speed;
422
423 rtnl_lock();
424 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
425 if (unlikely(!dev)) {
426 rtnl_unlock();
427 return DEFAULT_PRB_RETIRE_TOV;
428 }
429 err = __ethtool_get_settings(dev, &ecmd);
430 speed = ethtool_cmd_speed(&ecmd);
431 rtnl_unlock();
432 if (!err) {
433
434
435
436
437 if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) {
438 return DEFAULT_PRB_RETIRE_TOV;
439 } else {
440 msec = 1;
441 div = speed / 1000;
442 }
443 }
444
445 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
446
447 if (div)
448 mbits /= div;
449
450 tmo = mbits * msec;
451
452 if (div)
453 return tmo+1;
454 return tmo;
455}
456
457static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
458 union tpacket_req_u *req_u)
459{
460 p1->feature_req_word = req_u->req3.tp_feature_req_word;
461}
462
463static void init_prb_bdqc(struct packet_sock *po,
464 struct packet_ring_buffer *rb,
465 struct pgv *pg_vec,
466 union tpacket_req_u *req_u, int tx_ring)
467{
468 struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
469 struct tpacket_block_desc *pbd;
470
471 memset(p1, 0x0, sizeof(*p1));
472
473 p1->knxt_seq_num = 1;
474 p1->pkbdq = pg_vec;
475 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
476 p1->pkblk_start = pg_vec[0].buffer;
477 p1->kblk_size = req_u->req3.tp_block_size;
478 p1->knum_blocks = req_u->req3.tp_block_nr;
479 p1->hdrlen = po->tp_hdrlen;
480 p1->version = po->tp_version;
481 p1->last_kactive_blk_num = 0;
482 po->stats_u.stats3.tp_freeze_q_cnt = 0;
483 if (req_u->req3.tp_retire_blk_tov)
484 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
485 else
486 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
487 req_u->req3.tp_block_size);
488 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
489 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
490
491 prb_init_ft_ops(p1, req_u);
492 prb_setup_retire_blk_timer(po, tx_ring);
493 prb_open_block(p1, pbd);
494}
495
496
497
498
499static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
500{
501 mod_timer(&pkc->retire_blk_timer,
502 jiffies + pkc->tov_in_jiffies);
503 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
504}
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529static void prb_retire_rx_blk_timer_expired(unsigned long data)
530{
531 struct packet_sock *po = (struct packet_sock *)data;
532 struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
533 unsigned int frozen;
534 struct tpacket_block_desc *pbd;
535
536 spin_lock(&po->sk.sk_receive_queue.lock);
537
538 frozen = prb_queue_frozen(pkc);
539 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
540
541 if (unlikely(pkc->delete_blk_timer))
542 goto out;
543
544
545
546
547
548
549
550
551
552
553 if (BLOCK_NUM_PKTS(pbd)) {
554 while (atomic_read(&pkc->blk_fill_in_prog)) {
555
556 cpu_relax();
557 }
558 }
559
560 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
561 if (!frozen) {
562 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
563 if (!prb_dispatch_next_block(pkc, po))
564 goto refresh_timer;
565 else
566 goto out;
567 } else {
568
569
570
571 if (prb_curr_blk_in_use(pkc, pbd)) {
572
573
574
575
576 goto refresh_timer;
577 } else {
578
579
580
581
582
583
584
585 prb_open_block(pkc, pbd);
586 goto out;
587 }
588 }
589 }
590
591refresh_timer:
592 _prb_refresh_rx_retire_blk_timer(pkc);
593
594out:
595 spin_unlock(&po->sk.sk_receive_queue.lock);
596}
597
598static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
599 struct tpacket_block_desc *pbd1, __u32 status)
600{
601
602
603#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
604 u8 *start, *end;
605
606 start = (u8 *)pbd1;
607
608
609 start += PAGE_SIZE;
610
611 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
612 for (; start < end; start += PAGE_SIZE)
613 flush_dcache_page(pgv_to_page(start));
614
615 smp_wmb();
616#endif
617
618
619
620 BLOCK_STATUS(pbd1) = status;
621
622
623
624#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
625 start = (u8 *)pbd1;
626 flush_dcache_page(pgv_to_page(start));
627
628 smp_wmb();
629#endif
630}
631
632
633
634
635
636
637
638
639
640
641static void prb_close_block(struct tpacket_kbdq_core *pkc1,
642 struct tpacket_block_desc *pbd1,
643 struct packet_sock *po, unsigned int stat)
644{
645 __u32 status = TP_STATUS_USER | stat;
646
647 struct tpacket3_hdr *last_pkt;
648 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
649
650 if (po->stats.tp_drops)
651 status |= TP_STATUS_LOSING;
652
653 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
654 last_pkt->tp_next_offset = 0;
655
656
657 if (BLOCK_NUM_PKTS(pbd1)) {
658 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
659 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
660 } else {
661
662 struct timespec ts;
663 getnstimeofday(&ts);
664 h1->ts_last_pkt.ts_sec = ts.tv_sec;
665 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
666 }
667
668 smp_wmb();
669
670
671 prb_flush_block(pkc1, pbd1, status);
672
673 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
674}
675
676static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
677{
678 pkc->reset_pending_on_curr_blk = 0;
679}
680
681
682
683
684
685
686
687
688static void prb_open_block(struct tpacket_kbdq_core *pkc1,
689 struct tpacket_block_desc *pbd1)
690{
691 struct timespec ts;
692 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
693
694 smp_rmb();
695
696 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
697
698
699
700
701 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
702 BLOCK_NUM_PKTS(pbd1) = 0;
703 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
704 getnstimeofday(&ts);
705 h1->ts_first_pkt.ts_sec = ts.tv_sec;
706 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
707 pkc1->pkblk_start = (char *)pbd1;
708 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
709 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
710 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
711 pbd1->version = pkc1->version;
712 pkc1->prev = pkc1->nxt_offset;
713 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
714 prb_thaw_queue(pkc1);
715 _prb_refresh_rx_retire_blk_timer(pkc1);
716
717 smp_wmb();
718
719 return;
720 }
721
722 WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
723 pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
724 dump_stack();
725 BUG();
726}
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
752 struct packet_sock *po)
753{
754 pkc->reset_pending_on_curr_blk = 1;
755 po->stats_u.stats3.tp_freeze_q_cnt++;
756}
757
758#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
759
760
761
762
763
764
765
766static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
767 struct packet_sock *po)
768{
769 struct tpacket_block_desc *pbd;
770
771 smp_rmb();
772
773
774 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
775
776
777 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
778 prb_freeze_queue(pkc, po);
779 return NULL;
780 }
781
782
783
784
785
786
787 prb_open_block(pkc, pbd);
788 return (void *)pkc->nxt_offset;
789}
790
791static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
792 struct packet_sock *po, unsigned int status)
793{
794 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
795
796
797 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
798
799
800
801
802
803
804
805
806
807 if (!(status & TP_STATUS_BLK_TMO)) {
808 while (atomic_read(&pkc->blk_fill_in_prog)) {
809
810 cpu_relax();
811 }
812 }
813 prb_close_block(pkc, pbd, po, status);
814 return;
815 }
816
817 WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
818 dump_stack();
819 BUG();
820}
821
822static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
823 struct tpacket_block_desc *pbd)
824{
825 return TP_STATUS_USER & BLOCK_STATUS(pbd);
826}
827
828static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
829{
830 return pkc->reset_pending_on_curr_blk;
831}
832
833static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
834{
835 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
836 atomic_dec(&pkc->blk_fill_in_prog);
837}
838
839static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
840 struct tpacket3_hdr *ppd)
841{
842 ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
843}
844
845static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
846 struct tpacket3_hdr *ppd)
847{
848 ppd->hv1.tp_rxhash = 0;
849}
850
851static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
852 struct tpacket3_hdr *ppd)
853{
854 if (vlan_tx_tag_present(pkc->skb)) {
855 ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
856 ppd->tp_status = TP_STATUS_VLAN_VALID;
857 } else {
858 ppd->hv1.tp_vlan_tci = 0;
859 ppd->tp_status = TP_STATUS_AVAILABLE;
860 }
861}
862
863static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
864 struct tpacket3_hdr *ppd)
865{
866 prb_fill_vlan_info(pkc, ppd);
867
868 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
869 prb_fill_rxhash(pkc, ppd);
870 else
871 prb_clear_rxhash(pkc, ppd);
872}
873
874static void prb_fill_curr_block(char *curr,
875 struct tpacket_kbdq_core *pkc,
876 struct tpacket_block_desc *pbd,
877 unsigned int len)
878{
879 struct tpacket3_hdr *ppd;
880
881 ppd = (struct tpacket3_hdr *)curr;
882 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
883 pkc->prev = curr;
884 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
885 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
886 BLOCK_NUM_PKTS(pbd) += 1;
887 atomic_inc(&pkc->blk_fill_in_prog);
888 prb_run_all_ft_ops(pkc, ppd);
889}
890
891
892static void *__packet_lookup_frame_in_block(struct packet_sock *po,
893 struct sk_buff *skb,
894 int status,
895 unsigned int len
896 )
897{
898 struct tpacket_kbdq_core *pkc;
899 struct tpacket_block_desc *pbd;
900 char *curr, *end;
901
902 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
903 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
904
905
906 if (prb_queue_frozen(pkc)) {
907
908
909
910
911 if (prb_curr_blk_in_use(pkc, pbd)) {
912
913 return NULL;
914 } else {
915
916
917
918
919
920
921 prb_open_block(pkc, pbd);
922 }
923 }
924
925 smp_mb();
926 curr = pkc->nxt_offset;
927 pkc->skb = skb;
928 end = (char *)pbd + pkc->kblk_size;
929
930
931 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
932 prb_fill_curr_block(curr, pkc, pbd, len);
933 return (void *)curr;
934 }
935
936
937 prb_retire_current_block(pkc, po, 0);
938
939
940 curr = (char *)prb_dispatch_next_block(pkc, po);
941 if (curr) {
942 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
943 prb_fill_curr_block(curr, pkc, pbd, len);
944 return (void *)curr;
945 }
946
947
948
949
950
951 return NULL;
952}
953
954static void *packet_current_rx_frame(struct packet_sock *po,
955 struct sk_buff *skb,
956 int status, unsigned int len)
957{
958 char *curr = NULL;
959 switch (po->tp_version) {
960 case TPACKET_V1:
961 case TPACKET_V2:
962 curr = packet_lookup_frame(po, &po->rx_ring,
963 po->rx_ring.head, status);
964 return curr;
965 case TPACKET_V3:
966 return __packet_lookup_frame_in_block(po, skb, status, len);
967 default:
968 WARN(1, "TPACKET version not supported\n");
969 BUG();
970 return NULL;
971 }
972}
973
974static void *prb_lookup_block(struct packet_sock *po,
975 struct packet_ring_buffer *rb,
976 unsigned int previous,
977 int status)
978{
979 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
980 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, previous);
981
982 if (status != BLOCK_STATUS(pbd))
983 return NULL;
984 return pbd;
985}
986
987static int prb_previous_blk_num(struct packet_ring_buffer *rb)
988{
989 unsigned int prev;
990 if (rb->prb_bdqc.kactive_blk_num)
991 prev = rb->prb_bdqc.kactive_blk_num-1;
992 else
993 prev = rb->prb_bdqc.knum_blocks-1;
994 return prev;
995}
996
997
998static void *__prb_previous_block(struct packet_sock *po,
999 struct packet_ring_buffer *rb,
1000 int status)
1001{
1002 unsigned int previous = prb_previous_blk_num(rb);
1003 return prb_lookup_block(po, rb, previous, status);
1004}
1005
1006static void *packet_previous_rx_frame(struct packet_sock *po,
1007 struct packet_ring_buffer *rb,
1008 int status)
1009{
1010 if (po->tp_version <= TPACKET_V2)
1011 return packet_previous_frame(po, rb, status);
1012
1013 return __prb_previous_block(po, rb, status);
1014}
1015
1016static void packet_increment_rx_head(struct packet_sock *po,
1017 struct packet_ring_buffer *rb)
1018{
1019 switch (po->tp_version) {
1020 case TPACKET_V1:
1021 case TPACKET_V2:
1022 return packet_increment_head(rb);
1023 case TPACKET_V3:
1024 default:
1025 WARN(1, "TPACKET version not supported.\n");
1026 BUG();
1027 return;
1028 }
1029}
1030
1031static void *packet_previous_frame(struct packet_sock *po,
1032 struct packet_ring_buffer *rb,
1033 int status)
1034{
1035 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1036 return packet_lookup_frame(po, rb, previous, status);
1037}
1038
1039static void packet_increment_head(struct packet_ring_buffer *buff)
1040{
1041 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1042}
1043
1044static void packet_sock_destruct(struct sock *sk)
1045{
1046 skb_queue_purge(&sk->sk_error_queue);
1047
1048 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1049 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1050
1051 if (!sock_flag(sk, SOCK_DEAD)) {
1052 pr_err("Attempt to release alive packet socket: %p\n", sk);
1053 return;
1054 }
1055
1056 sk_refcnt_debug_dec(sk);
1057}
1058
1059static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1060{
1061 int x = atomic_read(&f->rr_cur) + 1;
1062
1063 if (x >= num)
1064 x = 0;
1065
1066 return x;
1067}
1068
1069static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1070{
1071 u32 idx, hash = skb->rxhash;
1072
1073 idx = ((u64)hash * num) >> 32;
1074
1075 return f->arr[idx];
1076}
1077
1078static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1079{
1080 int cur, old;
1081
1082 cur = atomic_read(&f->rr_cur);
1083 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1084 fanout_rr_next(f, num))) != cur)
1085 cur = old;
1086 return f->arr[cur];
1087}
1088
1089static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
1090{
1091 unsigned int cpu = smp_processor_id();
1092
1093 return f->arr[cpu % num];
1094}
1095
1096static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1097 struct packet_type *pt, struct net_device *orig_dev)
1098{
1099 struct packet_fanout *f = pt->af_packet_priv;
1100 unsigned int num = f->num_members;
1101 struct packet_sock *po;
1102 struct sock *sk;
1103
1104 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1105 !num) {
1106 kfree_skb(skb);
1107 return 0;
1108 }
1109
1110 switch (f->type) {
1111 case PACKET_FANOUT_HASH:
1112 default:
1113 if (f->defrag) {
1114 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1115 if (!skb)
1116 return 0;
1117 }
1118 skb_get_rxhash(skb);
1119 sk = fanout_demux_hash(f, skb, num);
1120 break;
1121 case PACKET_FANOUT_LB:
1122 sk = fanout_demux_lb(f, skb, num);
1123 break;
1124 case PACKET_FANOUT_CPU:
1125 sk = fanout_demux_cpu(f, skb, num);
1126 break;
1127 }
1128
1129 po = pkt_sk(sk);
1130
1131 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1132}
1133
1134DEFINE_MUTEX(fanout_mutex);
1135EXPORT_SYMBOL_GPL(fanout_mutex);
1136static LIST_HEAD(fanout_list);
1137
1138static void __fanout_link(struct sock *sk, struct packet_sock *po)
1139{
1140 struct packet_fanout *f = po->fanout;
1141
1142 spin_lock(&f->lock);
1143 f->arr[f->num_members] = sk;
1144 smp_wmb();
1145 f->num_members++;
1146 spin_unlock(&f->lock);
1147}
1148
1149static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1150{
1151 struct packet_fanout *f = po->fanout;
1152 int i;
1153
1154 spin_lock(&f->lock);
1155 for (i = 0; i < f->num_members; i++) {
1156 if (f->arr[i] == sk)
1157 break;
1158 }
1159 BUG_ON(i >= f->num_members);
1160 f->arr[i] = f->arr[f->num_members - 1];
1161 f->num_members--;
1162 spin_unlock(&f->lock);
1163}
1164
1165static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
1166{
1167 if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
1168 return true;
1169
1170 return false;
1171}
1172
1173static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1174{
1175 struct packet_sock *po = pkt_sk(sk);
1176 struct packet_fanout *f, *match;
1177 u8 type = type_flags & 0xff;
1178 u8 defrag = (type_flags & PACKET_FANOUT_FLAG_DEFRAG) ? 1 : 0;
1179 int err;
1180
1181 switch (type) {
1182 case PACKET_FANOUT_HASH:
1183 case PACKET_FANOUT_LB:
1184 case PACKET_FANOUT_CPU:
1185 break;
1186 default:
1187 return -EINVAL;
1188 }
1189
1190 if (!po->running)
1191 return -EINVAL;
1192
1193 if (po->fanout)
1194 return -EALREADY;
1195
1196 mutex_lock(&fanout_mutex);
1197 match = NULL;
1198 list_for_each_entry(f, &fanout_list, list) {
1199 if (f->id == id &&
1200 read_pnet(&f->net) == sock_net(sk)) {
1201 match = f;
1202 break;
1203 }
1204 }
1205 err = -EINVAL;
1206 if (match && match->defrag != defrag)
1207 goto out;
1208 if (!match) {
1209 err = -ENOMEM;
1210 match = kzalloc(sizeof(*match), GFP_KERNEL);
1211 if (!match)
1212 goto out;
1213 write_pnet(&match->net, sock_net(sk));
1214 match->id = id;
1215 match->type = type;
1216 match->defrag = defrag;
1217 atomic_set(&match->rr_cur, 0);
1218 INIT_LIST_HEAD(&match->list);
1219 spin_lock_init(&match->lock);
1220 atomic_set(&match->sk_ref, 0);
1221 match->prot_hook.type = po->prot_hook.type;
1222 match->prot_hook.dev = po->prot_hook.dev;
1223 match->prot_hook.func = packet_rcv_fanout;
1224 match->prot_hook.af_packet_priv = match;
1225 match->prot_hook.id_match = match_fanout_group;
1226 dev_add_pack(&match->prot_hook);
1227 list_add(&match->list, &fanout_list);
1228 }
1229 err = -EINVAL;
1230 if (match->type == type &&
1231 match->prot_hook.type == po->prot_hook.type &&
1232 match->prot_hook.dev == po->prot_hook.dev) {
1233 err = -ENOSPC;
1234 if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1235 __dev_remove_pack(&po->prot_hook);
1236 po->fanout = match;
1237 atomic_inc(&match->sk_ref);
1238 __fanout_link(sk, po);
1239 err = 0;
1240 }
1241 }
1242out:
1243 mutex_unlock(&fanout_mutex);
1244 return err;
1245}
1246
1247static void fanout_release(struct sock *sk)
1248{
1249 struct packet_sock *po = pkt_sk(sk);
1250 struct packet_fanout *f;
1251
1252 f = po->fanout;
1253 if (!f)
1254 return;
1255
1256 mutex_lock(&fanout_mutex);
1257 po->fanout = NULL;
1258
1259 if (atomic_dec_and_test(&f->sk_ref)) {
1260 list_del(&f->list);
1261 dev_remove_pack(&f->prot_hook);
1262 kfree(f);
1263 }
1264 mutex_unlock(&fanout_mutex);
1265}
1266
1267static const struct proto_ops packet_ops;
1268
1269static const struct proto_ops packet_ops_spkt;
1270
1271static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1272 struct packet_type *pt, struct net_device *orig_dev)
1273{
1274 struct sock *sk;
1275 struct sockaddr_pkt *spkt;
1276
1277
1278
1279
1280
1281
1282 sk = pt->af_packet_priv;
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295 if (skb->pkt_type == PACKET_LOOPBACK)
1296 goto out;
1297
1298 if (!net_eq(dev_net(dev), sock_net(sk)))
1299 goto out;
1300
1301 skb = skb_share_check(skb, GFP_ATOMIC);
1302 if (skb == NULL)
1303 goto oom;
1304
1305
1306 skb_dst_drop(skb);
1307
1308
1309 nf_reset(skb);
1310
1311 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1312
1313 skb_push(skb, skb->data - skb_mac_header(skb));
1314
1315
1316
1317
1318
1319 spkt->spkt_family = dev->type;
1320 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1321 spkt->spkt_protocol = skb->protocol;
1322
1323
1324
1325
1326
1327
1328 if (sock_queue_rcv_skb(sk, skb) == 0)
1329 return 0;
1330
1331out:
1332 kfree_skb(skb);
1333oom:
1334 return 0;
1335}
1336
1337
1338
1339
1340
1341
1342
1343static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
1344 struct msghdr *msg, size_t len)
1345{
1346 struct sock *sk = sock->sk;
1347 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1348 struct sk_buff *skb = NULL;
1349 struct net_device *dev;
1350 __be16 proto = 0;
1351 int err;
1352 int extra_len = 0;
1353
1354
1355
1356
1357
1358 if (saddr) {
1359 if (msg->msg_namelen < sizeof(struct sockaddr))
1360 return -EINVAL;
1361 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1362 proto = saddr->spkt_protocol;
1363 } else
1364 return -ENOTCONN;
1365
1366
1367
1368
1369
1370 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1371retry:
1372 rcu_read_lock();
1373 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1374 err = -ENODEV;
1375 if (dev == NULL)
1376 goto out_unlock;
1377
1378 err = -ENETDOWN;
1379 if (!(dev->flags & IFF_UP))
1380 goto out_unlock;
1381
1382
1383
1384
1385
1386
1387 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1388 if (!netif_supports_nofcs(dev)) {
1389 err = -EPROTONOSUPPORT;
1390 goto out_unlock;
1391 }
1392 extra_len = 4;
1393 }
1394
1395 err = -EMSGSIZE;
1396 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1397 goto out_unlock;
1398
1399 if (!skb) {
1400 size_t reserved = LL_RESERVED_SPACE(dev);
1401 int tlen = dev->needed_tailroom;
1402 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1403
1404 rcu_read_unlock();
1405 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1406 if (skb == NULL)
1407 return -ENOBUFS;
1408
1409
1410
1411
1412 skb_reserve(skb, reserved);
1413 skb_reset_network_header(skb);
1414
1415
1416 if (hhlen) {
1417 skb->data -= hhlen;
1418 skb->tail -= hhlen;
1419 if (len < hhlen)
1420 skb_reset_network_header(skb);
1421 }
1422 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1423 if (err)
1424 goto out_free;
1425 goto retry;
1426 }
1427
1428 if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
1429
1430
1431
1432
1433 struct ethhdr *ehdr;
1434 skb_reset_mac_header(skb);
1435 ehdr = eth_hdr(skb);
1436 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1437 err = -EMSGSIZE;
1438 goto out_unlock;
1439 }
1440 }
1441
1442 skb->protocol = proto;
1443 skb->dev = dev;
1444 skb->priority = sk->sk_priority;
1445 skb->mark = sk->sk_mark;
1446 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1447 if (err < 0)
1448 goto out_unlock;
1449
1450 if (unlikely(extra_len == 4))
1451 skb->no_fcs = 1;
1452
1453 dev_queue_xmit(skb);
1454 rcu_read_unlock();
1455 return len;
1456
1457out_unlock:
1458 rcu_read_unlock();
1459out_free:
1460 kfree_skb(skb);
1461 return err;
1462}
1463
1464static unsigned int run_filter(const struct sk_buff *skb,
1465 const struct sock *sk,
1466 unsigned int res)
1467{
1468 struct sk_filter *filter;
1469
1470 rcu_read_lock();
1471 filter = rcu_dereference(sk->sk_filter);
1472 if (filter != NULL)
1473 res = SK_RUN_FILTER(filter, skb);
1474 rcu_read_unlock();
1475
1476 return res;
1477}
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1492 struct packet_type *pt, struct net_device *orig_dev)
1493{
1494 struct sock *sk;
1495 struct sockaddr_ll *sll;
1496 struct packet_sock *po;
1497 u8 *skb_head = skb->data;
1498 int skb_len = skb->len;
1499 unsigned int snaplen, res;
1500
1501 if (skb->pkt_type == PACKET_LOOPBACK)
1502 goto drop;
1503
1504 sk = pt->af_packet_priv;
1505 po = pkt_sk(sk);
1506
1507 if (!net_eq(dev_net(dev), sock_net(sk)))
1508 goto drop;
1509
1510 skb->dev = dev;
1511
1512 if (dev->header_ops) {
1513
1514
1515
1516
1517
1518
1519
1520 if (sk->sk_type != SOCK_DGRAM)
1521 skb_push(skb, skb->data - skb_mac_header(skb));
1522 else if (skb->pkt_type == PACKET_OUTGOING) {
1523
1524 skb_pull(skb, skb_network_offset(skb));
1525 }
1526 }
1527
1528 snaplen = skb->len;
1529
1530 res = run_filter(skb, sk, snaplen);
1531 if (!res)
1532 goto drop_n_restore;
1533 if (snaplen > res)
1534 snaplen = res;
1535
1536 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
1537 goto drop_n_acct;
1538
1539 if (skb_shared(skb)) {
1540 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
1541 if (nskb == NULL)
1542 goto drop_n_acct;
1543
1544 if (skb_head != skb->data) {
1545 skb->data = skb_head;
1546 skb->len = skb_len;
1547 }
1548 consume_skb(skb);
1549 skb = nskb;
1550 }
1551
1552 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
1553 sizeof(skb->cb));
1554
1555 sll = &PACKET_SKB_CB(skb)->sa.ll;
1556 sll->sll_family = AF_PACKET;
1557 sll->sll_hatype = dev->type;
1558 sll->sll_protocol = skb->protocol;
1559 sll->sll_pkttype = skb->pkt_type;
1560 if (unlikely(po->origdev))
1561 sll->sll_ifindex = orig_dev->ifindex;
1562 else
1563 sll->sll_ifindex = dev->ifindex;
1564
1565 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1566
1567 PACKET_SKB_CB(skb)->origlen = skb->len;
1568
1569 if (pskb_trim(skb, snaplen))
1570 goto drop_n_acct;
1571
1572 skb_set_owner_r(skb, sk);
1573 skb->dev = NULL;
1574 skb_dst_drop(skb);
1575
1576
1577 nf_reset(skb);
1578
1579 spin_lock(&sk->sk_receive_queue.lock);
1580 po->stats.tp_packets++;
1581 skb->dropcount = atomic_read(&sk->sk_drops);
1582 __skb_queue_tail(&sk->sk_receive_queue, skb);
1583 spin_unlock(&sk->sk_receive_queue.lock);
1584 sk->sk_data_ready(sk, skb->len);
1585 return 0;
1586
1587drop_n_acct:
1588 spin_lock(&sk->sk_receive_queue.lock);
1589 po->stats.tp_drops++;
1590 atomic_inc(&sk->sk_drops);
1591 spin_unlock(&sk->sk_receive_queue.lock);
1592
1593drop_n_restore:
1594 if (skb_head != skb->data && skb_shared(skb)) {
1595 skb->data = skb_head;
1596 skb->len = skb_len;
1597 }
1598drop:
1599 consume_skb(skb);
1600 return 0;
1601}
1602
1603static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1604 struct packet_type *pt, struct net_device *orig_dev)
1605{
1606 struct sock *sk;
1607 struct packet_sock *po;
1608 struct sockaddr_ll *sll;
1609 union {
1610 struct tpacket_hdr *h1;
1611 struct tpacket2_hdr *h2;
1612 struct tpacket3_hdr *h3;
1613 void *raw;
1614 } h;
1615 u8 *skb_head = skb->data;
1616 int skb_len = skb->len;
1617 unsigned int snaplen, res;
1618 unsigned long status = TP_STATUS_USER;
1619 unsigned short macoff, netoff, hdrlen;
1620 struct sk_buff *copy_skb = NULL;
1621 struct timeval tv;
1622 struct timespec ts;
1623 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
1624
1625 if (skb->pkt_type == PACKET_LOOPBACK)
1626 goto drop;
1627
1628 sk = pt->af_packet_priv;
1629 po = pkt_sk(sk);
1630
1631 if (!net_eq(dev_net(dev), sock_net(sk)))
1632 goto drop;
1633
1634 if (dev->header_ops) {
1635 if (sk->sk_type != SOCK_DGRAM)
1636 skb_push(skb, skb->data - skb_mac_header(skb));
1637 else if (skb->pkt_type == PACKET_OUTGOING) {
1638
1639 skb_pull(skb, skb_network_offset(skb));
1640 }
1641 }
1642
1643 if (skb->ip_summed == CHECKSUM_PARTIAL)
1644 status |= TP_STATUS_CSUMNOTREADY;
1645
1646 snaplen = skb->len;
1647
1648 res = run_filter(skb, sk, snaplen);
1649 if (!res)
1650 goto drop_n_restore;
1651 if (snaplen > res)
1652 snaplen = res;
1653
1654 if (sk->sk_type == SOCK_DGRAM) {
1655 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
1656 po->tp_reserve;
1657 } else {
1658 unsigned int maclen = skb_network_offset(skb);
1659 netoff = TPACKET_ALIGN(po->tp_hdrlen +
1660 (maclen < 16 ? 16 : maclen)) +
1661 po->tp_reserve;
1662 macoff = netoff - maclen;
1663 }
1664 if (po->tp_version <= TPACKET_V2) {
1665 if (macoff + snaplen > po->rx_ring.frame_size) {
1666 if (po->copy_thresh &&
1667 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1668 if (skb_shared(skb)) {
1669 copy_skb = skb_clone(skb, GFP_ATOMIC);
1670 } else {
1671 copy_skb = skb_get(skb);
1672 skb_head = skb->data;
1673 }
1674 if (copy_skb)
1675 skb_set_owner_r(copy_skb, sk);
1676 }
1677 snaplen = po->rx_ring.frame_size - macoff;
1678 if ((int)snaplen < 0)
1679 snaplen = 0;
1680 }
1681 }
1682 spin_lock(&sk->sk_receive_queue.lock);
1683 h.raw = packet_current_rx_frame(po, skb,
1684 TP_STATUS_KERNEL, (macoff+snaplen));
1685 if (!h.raw)
1686 goto ring_is_full;
1687 if (po->tp_version <= TPACKET_V2) {
1688 packet_increment_rx_head(po, &po->rx_ring);
1689
1690
1691
1692
1693
1694
1695 if (po->stats.tp_drops)
1696 status |= TP_STATUS_LOSING;
1697 }
1698 po->stats.tp_packets++;
1699 if (copy_skb) {
1700 status |= TP_STATUS_COPY;
1701 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
1702 }
1703 spin_unlock(&sk->sk_receive_queue.lock);
1704
1705 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1706
1707 switch (po->tp_version) {
1708 case TPACKET_V1:
1709 h.h1->tp_len = skb->len;
1710 h.h1->tp_snaplen = snaplen;
1711 h.h1->tp_mac = macoff;
1712 h.h1->tp_net = netoff;
1713 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1714 && shhwtstamps->syststamp.tv64)
1715 tv = ktime_to_timeval(shhwtstamps->syststamp);
1716 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1717 && shhwtstamps->hwtstamp.tv64)
1718 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
1719 else if (skb->tstamp.tv64)
1720 tv = ktime_to_timeval(skb->tstamp);
1721 else
1722 do_gettimeofday(&tv);
1723 h.h1->tp_sec = tv.tv_sec;
1724 h.h1->tp_usec = tv.tv_usec;
1725 hdrlen = sizeof(*h.h1);
1726 break;
1727 case TPACKET_V2:
1728 h.h2->tp_len = skb->len;
1729 h.h2->tp_snaplen = snaplen;
1730 h.h2->tp_mac = macoff;
1731 h.h2->tp_net = netoff;
1732 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1733 && shhwtstamps->syststamp.tv64)
1734 ts = ktime_to_timespec(shhwtstamps->syststamp);
1735 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1736 && shhwtstamps->hwtstamp.tv64)
1737 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1738 else if (skb->tstamp.tv64)
1739 ts = ktime_to_timespec(skb->tstamp);
1740 else
1741 getnstimeofday(&ts);
1742 h.h2->tp_sec = ts.tv_sec;
1743 h.h2->tp_nsec = ts.tv_nsec;
1744 if (vlan_tx_tag_present(skb)) {
1745 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
1746 status |= TP_STATUS_VLAN_VALID;
1747 } else {
1748 h.h2->tp_vlan_tci = 0;
1749 }
1750 h.h2->tp_padding = 0;
1751 hdrlen = sizeof(*h.h2);
1752 break;
1753 case TPACKET_V3:
1754
1755
1756
1757 h.h3->tp_status |= status;
1758 h.h3->tp_len = skb->len;
1759 h.h3->tp_snaplen = snaplen;
1760 h.h3->tp_mac = macoff;
1761 h.h3->tp_net = netoff;
1762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
1763 && shhwtstamps->syststamp.tv64)
1764 ts = ktime_to_timespec(shhwtstamps->syststamp);
1765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
1766 && shhwtstamps->hwtstamp.tv64)
1767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
1768 else if (skb->tstamp.tv64)
1769 ts = ktime_to_timespec(skb->tstamp);
1770 else
1771 getnstimeofday(&ts);
1772 h.h3->tp_sec = ts.tv_sec;
1773 h.h3->tp_nsec = ts.tv_nsec;
1774 hdrlen = sizeof(*h.h3);
1775 break;
1776 default:
1777 BUG();
1778 }
1779
1780 sll = h.raw + TPACKET_ALIGN(hdrlen);
1781 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1782 sll->sll_family = AF_PACKET;
1783 sll->sll_hatype = dev->type;
1784 sll->sll_protocol = skb->protocol;
1785 sll->sll_pkttype = skb->pkt_type;
1786 if (unlikely(po->origdev))
1787 sll->sll_ifindex = orig_dev->ifindex;
1788 else
1789 sll->sll_ifindex = dev->ifindex;
1790
1791 smp_mb();
1792#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1793 {
1794 u8 *start, *end;
1795
1796 if (po->tp_version <= TPACKET_V2) {
1797 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1798 + macoff + snaplen);
1799 for (start = h.raw; start < end; start += PAGE_SIZE)
1800 flush_dcache_page(pgv_to_page(start));
1801 }
1802 smp_wmb();
1803 }
1804#endif
1805 if (po->tp_version <= TPACKET_V2)
1806 __packet_set_status(po, h.raw, status);
1807 else
1808 prb_clear_blk_fill_status(&po->rx_ring);
1809
1810 sk->sk_data_ready(sk, 0);
1811
1812drop_n_restore:
1813 if (skb_head != skb->data && skb_shared(skb)) {
1814 skb->data = skb_head;
1815 skb->len = skb_len;
1816 }
1817drop:
1818 kfree_skb(skb);
1819 return 0;
1820
1821ring_is_full:
1822 po->stats.tp_drops++;
1823 spin_unlock(&sk->sk_receive_queue.lock);
1824
1825 sk->sk_data_ready(sk, 0);
1826 kfree_skb(copy_skb);
1827 goto drop_n_restore;
1828}
1829
1830static void tpacket_destruct_skb(struct sk_buff *skb)
1831{
1832 struct packet_sock *po = pkt_sk(skb->sk);
1833 void *ph;
1834
1835 if (likely(po->tx_ring.pg_vec)) {
1836 ph = skb_shinfo(skb)->destructor_arg;
1837 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
1838 atomic_dec(&po->tx_ring.pending);
1839 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
1840 }
1841
1842 sock_wfree(skb);
1843}
1844
1845static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1846 void *frame, struct net_device *dev, int size_max,
1847 __be16 proto, unsigned char *addr, int hlen)
1848{
1849 union {
1850 struct tpacket_hdr *h1;
1851 struct tpacket2_hdr *h2;
1852 void *raw;
1853 } ph;
1854 int to_write, offset, len, tp_len, nr_frags, len_max;
1855 struct socket *sock = po->sk.sk_socket;
1856 struct page *page;
1857 void *data;
1858 int err;
1859
1860 ph.raw = frame;
1861
1862 skb->protocol = proto;
1863 skb->dev = dev;
1864 skb->priority = po->sk.sk_priority;
1865 skb->mark = po->sk.sk_mark;
1866 skb_shinfo(skb)->destructor_arg = ph.raw;
1867
1868 switch (po->tp_version) {
1869 case TPACKET_V2:
1870 tp_len = ph.h2->tp_len;
1871 break;
1872 default:
1873 tp_len = ph.h1->tp_len;
1874 break;
1875 }
1876 if (unlikely(tp_len > size_max)) {
1877 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
1878 return -EMSGSIZE;
1879 }
1880
1881 skb_reserve(skb, hlen);
1882 skb_reset_network_header(skb);
1883
1884 if (po->tp_tx_has_off) {
1885 int off_min, off_max, off;
1886 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
1887 off_max = po->tx_ring.frame_size - tp_len;
1888 if (sock->type == SOCK_DGRAM) {
1889 switch (po->tp_version) {
1890 case TPACKET_V2:
1891 off = ph.h2->tp_net;
1892 break;
1893 default:
1894 off = ph.h1->tp_net;
1895 break;
1896 }
1897 } else {
1898 switch (po->tp_version) {
1899 case TPACKET_V2:
1900 off = ph.h2->tp_mac;
1901 break;
1902 default:
1903 off = ph.h1->tp_mac;
1904 break;
1905 }
1906 }
1907 if (unlikely((off < off_min) || (off_max < off)))
1908 return -EINVAL;
1909 data = ph.raw + off;
1910 } else {
1911 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
1912 }
1913 to_write = tp_len;
1914
1915 if (sock->type == SOCK_DGRAM) {
1916 err = dev_hard_header(skb, dev, ntohs(proto), addr,
1917 NULL, tp_len);
1918 if (unlikely(err < 0))
1919 return -EINVAL;
1920 } else if (dev->hard_header_len) {
1921
1922 if (unlikely(tp_len <= dev->hard_header_len)) {
1923 pr_err("packet size is too short (%d < %d)\n",
1924 tp_len, dev->hard_header_len);
1925 return -EINVAL;
1926 }
1927
1928 skb_push(skb, dev->hard_header_len);
1929 err = skb_store_bits(skb, 0, data,
1930 dev->hard_header_len);
1931 if (unlikely(err))
1932 return err;
1933
1934 data += dev->hard_header_len;
1935 to_write -= dev->hard_header_len;
1936 }
1937
1938 offset = offset_in_page(data);
1939 len_max = PAGE_SIZE - offset;
1940 len = ((to_write > len_max) ? len_max : to_write);
1941
1942 skb->data_len = to_write;
1943 skb->len += to_write;
1944 skb->truesize += to_write;
1945 atomic_add(to_write, &po->sk.sk_wmem_alloc);
1946
1947 while (likely(to_write)) {
1948 nr_frags = skb_shinfo(skb)->nr_frags;
1949
1950 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
1951 pr_err("Packet exceed the number of skb frags(%lu)\n",
1952 MAX_SKB_FRAGS);
1953 return -EFAULT;
1954 }
1955
1956 page = pgv_to_page(data);
1957 data += len;
1958 flush_dcache_page(page);
1959 get_page(page);
1960 skb_fill_page_desc(skb, nr_frags, page, offset, len);
1961 to_write -= len;
1962 offset = 0;
1963 len_max = PAGE_SIZE;
1964 len = ((to_write > len_max) ? len_max : to_write);
1965 }
1966
1967 return tp_len;
1968}
1969
1970static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
1971{
1972 struct sk_buff *skb;
1973 struct net_device *dev;
1974 __be16 proto;
1975 bool need_rls_dev = false;
1976 int err, reserve = 0;
1977 void *ph;
1978 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
1979 int tp_len, size_max;
1980 unsigned char *addr;
1981 int len_sum = 0;
1982 int status = TP_STATUS_AVAILABLE;
1983 int hlen, tlen;
1984
1985 mutex_lock(&po->pg_vec_lock);
1986
1987 if (saddr == NULL) {
1988 dev = po->prot_hook.dev;
1989 proto = po->num;
1990 addr = NULL;
1991 } else {
1992 err = -EINVAL;
1993 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1994 goto out;
1995 if (msg->msg_namelen < (saddr->sll_halen
1996 + offsetof(struct sockaddr_ll,
1997 sll_addr)))
1998 goto out;
1999 proto = saddr->sll_protocol;
2000 addr = saddr->sll_addr;
2001 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2002 need_rls_dev = true;
2003 }
2004
2005 err = -ENXIO;
2006 if (unlikely(dev == NULL))
2007 goto out;
2008
2009 reserve = dev->hard_header_len;
2010
2011 err = -ENETDOWN;
2012 if (unlikely(!(dev->flags & IFF_UP)))
2013 goto out_put;
2014
2015 size_max = po->tx_ring.frame_size
2016 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2017
2018 if (size_max > dev->mtu + reserve)
2019 size_max = dev->mtu + reserve;
2020
2021 do {
2022 ph = packet_current_frame(po, &po->tx_ring,
2023 TP_STATUS_SEND_REQUEST);
2024
2025 if (unlikely(ph == NULL)) {
2026 schedule();
2027 continue;
2028 }
2029
2030 status = TP_STATUS_SEND_REQUEST;
2031 hlen = LL_RESERVED_SPACE(dev);
2032 tlen = dev->needed_tailroom;
2033 skb = sock_alloc_send_skb(&po->sk,
2034 hlen + tlen + sizeof(struct sockaddr_ll),
2035 0, &err);
2036
2037 if (unlikely(skb == NULL))
2038 goto out_status;
2039
2040 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2041 addr, hlen);
2042
2043 if (unlikely(tp_len < 0)) {
2044 if (po->tp_loss) {
2045 __packet_set_status(po, ph,
2046 TP_STATUS_AVAILABLE);
2047 packet_increment_head(&po->tx_ring);
2048 kfree_skb(skb);
2049 continue;
2050 } else {
2051 status = TP_STATUS_WRONG_FORMAT;
2052 err = tp_len;
2053 goto out_status;
2054 }
2055 }
2056
2057 skb->destructor = tpacket_destruct_skb;
2058 __packet_set_status(po, ph, TP_STATUS_SENDING);
2059 atomic_inc(&po->tx_ring.pending);
2060
2061 status = TP_STATUS_SEND_REQUEST;
2062 err = dev_queue_xmit(skb);
2063 if (unlikely(err > 0)) {
2064 err = net_xmit_errno(err);
2065 if (err && __packet_get_status(po, ph) ==
2066 TP_STATUS_AVAILABLE) {
2067
2068 skb = NULL;
2069 goto out_status;
2070 }
2071
2072
2073
2074
2075 err = 0;
2076 }
2077 packet_increment_head(&po->tx_ring);
2078 len_sum += tp_len;
2079 } while (likely((ph != NULL) ||
2080 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
2081 (atomic_read(&po->tx_ring.pending))))
2082 );
2083
2084 err = len_sum;
2085 goto out_put;
2086
2087out_status:
2088 __packet_set_status(po, ph, status);
2089 kfree_skb(skb);
2090out_put:
2091 if (need_rls_dev)
2092 dev_put(dev);
2093out:
2094 mutex_unlock(&po->pg_vec_lock);
2095 return err;
2096}
2097
2098static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2099 size_t reserve, size_t len,
2100 size_t linear, int noblock,
2101 int *err)
2102{
2103 struct sk_buff *skb;
2104
2105
2106 if (prepad + len < PAGE_SIZE || !linear)
2107 linear = len;
2108
2109 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2110 err);
2111 if (!skb)
2112 return NULL;
2113
2114 skb_reserve(skb, reserve);
2115 skb_put(skb, linear);
2116 skb->data_len = len - linear;
2117 skb->len += len - linear;
2118
2119 return skb;
2120}
2121
2122static int packet_snd(struct socket *sock,
2123 struct msghdr *msg, size_t len)
2124{
2125 struct sock *sk = sock->sk;
2126 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2127 struct sk_buff *skb;
2128 struct net_device *dev;
2129 __be16 proto;
2130 bool need_rls_dev = false;
2131 unsigned char *addr;
2132 int err, reserve = 0;
2133 struct virtio_net_hdr vnet_hdr = { 0 };
2134 int offset = 0;
2135 int vnet_hdr_len;
2136 struct packet_sock *po = pkt_sk(sk);
2137 unsigned short gso_type = 0;
2138 int hlen, tlen;
2139 int extra_len = 0;
2140
2141
2142
2143
2144
2145 if (saddr == NULL) {
2146 dev = po->prot_hook.dev;
2147 proto = po->num;
2148 addr = NULL;
2149 } else {
2150 err = -EINVAL;
2151 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2152 goto out;
2153 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2154 goto out;
2155 proto = saddr->sll_protocol;
2156 addr = saddr->sll_addr;
2157 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2158 need_rls_dev = true;
2159 }
2160
2161 err = -ENXIO;
2162 if (dev == NULL)
2163 goto out_unlock;
2164 if (sock->type == SOCK_RAW)
2165 reserve = dev->hard_header_len;
2166
2167 err = -ENETDOWN;
2168 if (!(dev->flags & IFF_UP))
2169 goto out_unlock;
2170
2171 if (po->has_vnet_hdr) {
2172 vnet_hdr_len = sizeof(vnet_hdr);
2173
2174 err = -EINVAL;
2175 if (len < vnet_hdr_len)
2176 goto out_unlock;
2177
2178 len -= vnet_hdr_len;
2179
2180 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
2181 vnet_hdr_len);
2182 if (err < 0)
2183 goto out_unlock;
2184
2185 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2186 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
2187 vnet_hdr.hdr_len))
2188 vnet_hdr.hdr_len = vnet_hdr.csum_start +
2189 vnet_hdr.csum_offset + 2;
2190
2191 err = -EINVAL;
2192 if (vnet_hdr.hdr_len > len)
2193 goto out_unlock;
2194
2195 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2196 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2197 case VIRTIO_NET_HDR_GSO_TCPV4:
2198 gso_type = SKB_GSO_TCPV4;
2199 break;
2200 case VIRTIO_NET_HDR_GSO_TCPV6:
2201 gso_type = SKB_GSO_TCPV6;
2202 break;
2203 case VIRTIO_NET_HDR_GSO_UDP:
2204 gso_type = SKB_GSO_UDP;
2205 break;
2206 default:
2207 goto out_unlock;
2208 }
2209
2210 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2211 gso_type |= SKB_GSO_TCP_ECN;
2212
2213 if (vnet_hdr.gso_size == 0)
2214 goto out_unlock;
2215
2216 }
2217 }
2218
2219 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2220 if (!netif_supports_nofcs(dev)) {
2221 err = -EPROTONOSUPPORT;
2222 goto out_unlock;
2223 }
2224 extra_len = 4;
2225 }
2226
2227 err = -EMSGSIZE;
2228 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2229 goto out_unlock;
2230
2231 err = -ENOBUFS;
2232 hlen = LL_RESERVED_SPACE(dev);
2233 tlen = dev->needed_tailroom;
2234 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
2235 msg->msg_flags & MSG_DONTWAIT, &err);
2236 if (skb == NULL)
2237 goto out_unlock;
2238
2239 skb_set_network_header(skb, reserve);
2240
2241 err = -EINVAL;
2242 if (sock->type == SOCK_DGRAM &&
2243 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
2244 goto out_free;
2245
2246
2247 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2248 if (err)
2249 goto out_free;
2250 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2251 if (err < 0)
2252 goto out_free;
2253
2254 if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
2255
2256
2257
2258
2259 struct ethhdr *ehdr;
2260 skb_reset_mac_header(skb);
2261 ehdr = eth_hdr(skb);
2262 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2263 err = -EMSGSIZE;
2264 goto out_free;
2265 }
2266 }
2267
2268 skb->protocol = proto;
2269 skb->dev = dev;
2270 skb->priority = sk->sk_priority;
2271 skb->mark = sk->sk_mark;
2272
2273 if (po->has_vnet_hdr) {
2274 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2275 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
2276 vnet_hdr.csum_offset)) {
2277 err = -EINVAL;
2278 goto out_free;
2279 }
2280 }
2281
2282 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
2283 skb_shinfo(skb)->gso_type = gso_type;
2284
2285
2286 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2287 skb_shinfo(skb)->gso_segs = 0;
2288
2289 len += vnet_hdr_len;
2290 }
2291
2292 if (unlikely(extra_len == 4))
2293 skb->no_fcs = 1;
2294
2295
2296
2297
2298
2299 err = dev_queue_xmit(skb);
2300 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2301 goto out_unlock;
2302
2303 if (need_rls_dev)
2304 dev_put(dev);
2305
2306 return len;
2307
2308out_free:
2309 kfree_skb(skb);
2310out_unlock:
2311 if (dev && need_rls_dev)
2312 dev_put(dev);
2313out:
2314 return err;
2315}
2316
2317static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
2318 struct msghdr *msg, size_t len)
2319{
2320 struct sock *sk = sock->sk;
2321 struct packet_sock *po = pkt_sk(sk);
2322 if (po->tx_ring.pg_vec)
2323 return tpacket_snd(po, msg);
2324 else
2325 return packet_snd(sock, msg, len);
2326}
2327
2328
2329
2330
2331
2332
2333static int packet_release(struct socket *sock)
2334{
2335 struct sock *sk = sock->sk;
2336 struct packet_sock *po;
2337 struct net *net;
2338 union tpacket_req_u req_u;
2339
2340 if (!sk)
2341 return 0;
2342
2343 net = sock_net(sk);
2344 po = pkt_sk(sk);
2345
2346 mutex_lock(&net->packet.sklist_lock);
2347 sk_del_node_init_rcu(sk);
2348 mutex_unlock(&net->packet.sklist_lock);
2349
2350 preempt_disable();
2351 sock_prot_inuse_add(net, sk->sk_prot, -1);
2352 preempt_enable();
2353
2354 spin_lock(&po->bind_lock);
2355 unregister_prot_hook(sk, false);
2356 if (po->prot_hook.dev) {
2357 dev_put(po->prot_hook.dev);
2358 po->prot_hook.dev = NULL;
2359 }
2360 spin_unlock(&po->bind_lock);
2361
2362 packet_flush_mclist(sk);
2363
2364 if (po->rx_ring.pg_vec) {
2365 memset(&req_u, 0, sizeof(req_u));
2366 packet_set_ring(sk, &req_u, 1, 0);
2367 }
2368
2369 if (po->tx_ring.pg_vec) {
2370 memset(&req_u, 0, sizeof(req_u));
2371 packet_set_ring(sk, &req_u, 1, 1);
2372 }
2373
2374 fanout_release(sk);
2375
2376 synchronize_net();
2377
2378
2379
2380 sock_orphan(sk);
2381 sock->sk = NULL;
2382
2383
2384
2385 skb_queue_purge(&sk->sk_receive_queue);
2386 sk_refcnt_debug_release(sk);
2387
2388 sock_put(sk);
2389 return 0;
2390}
2391
2392
2393
2394
2395
2396static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
2397{
2398 struct packet_sock *po = pkt_sk(sk);
2399
2400 if (po->fanout) {
2401 if (dev)
2402 dev_put(dev);
2403
2404 return -EINVAL;
2405 }
2406
2407 lock_sock(sk);
2408
2409 spin_lock(&po->bind_lock);
2410 unregister_prot_hook(sk, true);
2411 po->num = protocol;
2412 po->prot_hook.type = protocol;
2413 if (po->prot_hook.dev)
2414 dev_put(po->prot_hook.dev);
2415 po->prot_hook.dev = dev;
2416
2417 po->ifindex = dev ? dev->ifindex : 0;
2418
2419 if (protocol == 0)
2420 goto out_unlock;
2421
2422 if (!dev || (dev->flags & IFF_UP)) {
2423 register_prot_hook(sk);
2424 } else {
2425 sk->sk_err = ENETDOWN;
2426 if (!sock_flag(sk, SOCK_DEAD))
2427 sk->sk_error_report(sk);
2428 }
2429
2430out_unlock:
2431 spin_unlock(&po->bind_lock);
2432 release_sock(sk);
2433 return 0;
2434}
2435
2436
2437
2438
2439
2440static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2441 int addr_len)
2442{
2443 struct sock *sk = sock->sk;
2444 char name[15];
2445 struct net_device *dev;
2446 int err = -ENODEV;
2447
2448
2449
2450
2451
2452 if (addr_len != sizeof(struct sockaddr))
2453 return -EINVAL;
2454 strlcpy(name, uaddr->sa_data, sizeof(name));
2455
2456 dev = dev_get_by_name(sock_net(sk), name);
2457 if (dev)
2458 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
2459 return err;
2460}
2461
2462static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2463{
2464 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2465 struct sock *sk = sock->sk;
2466 struct net_device *dev = NULL;
2467 int err;
2468
2469
2470
2471
2472
2473
2474 if (addr_len < sizeof(struct sockaddr_ll))
2475 return -EINVAL;
2476 if (sll->sll_family != AF_PACKET)
2477 return -EINVAL;
2478
2479 if (sll->sll_ifindex) {
2480 err = -ENODEV;
2481 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
2482 if (dev == NULL)
2483 goto out;
2484 }
2485 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
2486
2487out:
2488 return err;
2489}
2490
2491static struct proto packet_proto = {
2492 .name = "PACKET",
2493 .owner = THIS_MODULE,
2494 .obj_size = sizeof(struct packet_sock),
2495};
2496
2497
2498
2499
2500
2501static int packet_create(struct net *net, struct socket *sock, int protocol,
2502 int kern)
2503{
2504 struct sock *sk;
2505 struct packet_sock *po;
2506 __be16 proto = (__force __be16)protocol;
2507 int err;
2508
2509 if (!ns_capable(net->user_ns, CAP_NET_RAW))
2510 return -EPERM;
2511 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
2512 sock->type != SOCK_PACKET)
2513 return -ESOCKTNOSUPPORT;
2514
2515 sock->state = SS_UNCONNECTED;
2516
2517 err = -ENOBUFS;
2518 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
2519 if (sk == NULL)
2520 goto out;
2521
2522 sock->ops = &packet_ops;
2523 if (sock->type == SOCK_PACKET)
2524 sock->ops = &packet_ops_spkt;
2525
2526 sock_init_data(sock, sk);
2527
2528 po = pkt_sk(sk);
2529 sk->sk_family = PF_PACKET;
2530 po->num = proto;
2531
2532 sk->sk_destruct = packet_sock_destruct;
2533 sk_refcnt_debug_inc(sk);
2534
2535
2536
2537
2538
2539 spin_lock_init(&po->bind_lock);
2540 mutex_init(&po->pg_vec_lock);
2541 po->prot_hook.func = packet_rcv;
2542
2543 if (sock->type == SOCK_PACKET)
2544 po->prot_hook.func = packet_rcv_spkt;
2545
2546 po->prot_hook.af_packet_priv = sk;
2547
2548 if (proto) {
2549 po->prot_hook.type = proto;
2550 register_prot_hook(sk);
2551 }
2552
2553 mutex_lock(&net->packet.sklist_lock);
2554 sk_add_node_rcu(sk, &net->packet.sklist);
2555 mutex_unlock(&net->packet.sklist_lock);
2556
2557 preempt_disable();
2558 sock_prot_inuse_add(net, &packet_proto, 1);
2559 preempt_enable();
2560
2561 return 0;
2562out:
2563 return err;
2564}
2565
2566static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2567{
2568 struct sock_exterr_skb *serr;
2569 struct sk_buff *skb, *skb2;
2570 int copied, err;
2571
2572 err = -EAGAIN;
2573 skb = skb_dequeue(&sk->sk_error_queue);
2574 if (skb == NULL)
2575 goto out;
2576
2577 copied = skb->len;
2578 if (copied > len) {
2579 msg->msg_flags |= MSG_TRUNC;
2580 copied = len;
2581 }
2582 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2583 if (err)
2584 goto out_free_skb;
2585
2586 sock_recv_timestamp(msg, sk, skb);
2587
2588 serr = SKB_EXT_ERR(skb);
2589 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2590 sizeof(serr->ee), &serr->ee);
2591
2592 msg->msg_flags |= MSG_ERRQUEUE;
2593 err = copied;
2594
2595
2596 spin_lock_bh(&sk->sk_error_queue.lock);
2597 sk->sk_err = 0;
2598 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2599 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2600 spin_unlock_bh(&sk->sk_error_queue.lock);
2601 sk->sk_error_report(sk);
2602 } else
2603 spin_unlock_bh(&sk->sk_error_queue.lock);
2604
2605out_free_skb:
2606 kfree_skb(skb);
2607out:
2608 return err;
2609}
2610
2611
2612
2613
2614
2615
2616static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2617 struct msghdr *msg, size_t len, int flags)
2618{
2619 struct sock *sk = sock->sk;
2620 struct sk_buff *skb;
2621 int copied, err;
2622 struct sockaddr_ll *sll;
2623 int vnet_hdr_len = 0;
2624
2625 err = -EINVAL;
2626 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
2627 goto out;
2628
2629#if 0
2630
2631 if (pkt_sk(sk)->ifindex < 0)
2632 return -ENODEV;
2633#endif
2634
2635 if (flags & MSG_ERRQUEUE) {
2636 err = packet_recv_error(sk, msg, len);
2637 goto out;
2638 }
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
2650
2651
2652
2653
2654
2655
2656
2657 if (skb == NULL)
2658 goto out;
2659
2660 if (pkt_sk(sk)->has_vnet_hdr) {
2661 struct virtio_net_hdr vnet_hdr = { 0 };
2662
2663 err = -EINVAL;
2664 vnet_hdr_len = sizeof(vnet_hdr);
2665 if (len < vnet_hdr_len)
2666 goto out_free;
2667
2668 len -= vnet_hdr_len;
2669
2670 if (skb_is_gso(skb)) {
2671 struct skb_shared_info *sinfo = skb_shinfo(skb);
2672
2673
2674 vnet_hdr.hdr_len = skb_headlen(skb);
2675 vnet_hdr.gso_size = sinfo->gso_size;
2676 if (sinfo->gso_type & SKB_GSO_TCPV4)
2677 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2678 else if (sinfo->gso_type & SKB_GSO_TCPV6)
2679 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2680 else if (sinfo->gso_type & SKB_GSO_UDP)
2681 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
2682 else if (sinfo->gso_type & SKB_GSO_FCOE)
2683 goto out_free;
2684 else
2685 BUG();
2686 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
2687 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2688 } else
2689 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
2690
2691 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2692 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
2693 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
2694 vnet_hdr.csum_offset = skb->csum_offset;
2695 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2696 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
2697 }
2698
2699 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
2700 vnet_hdr_len);
2701 if (err < 0)
2702 goto out_free;
2703 }
2704
2705
2706
2707
2708
2709
2710 sll = &PACKET_SKB_CB(skb)->sa.ll;
2711 if (sock->type == SOCK_PACKET)
2712 msg->msg_namelen = sizeof(struct sockaddr_pkt);
2713 else
2714 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
2715
2716
2717
2718
2719
2720
2721 copied = skb->len;
2722 if (copied > len) {
2723 copied = len;
2724 msg->msg_flags |= MSG_TRUNC;
2725 }
2726
2727 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2728 if (err)
2729 goto out_free;
2730
2731 sock_recv_ts_and_drops(msg, sk, skb);
2732
2733 if (msg->msg_name)
2734 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
2735 msg->msg_namelen);
2736
2737 if (pkt_sk(sk)->auxdata) {
2738 struct tpacket_auxdata aux;
2739
2740 aux.tp_status = TP_STATUS_USER;
2741 if (skb->ip_summed == CHECKSUM_PARTIAL)
2742 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
2743 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
2744 aux.tp_snaplen = skb->len;
2745 aux.tp_mac = 0;
2746 aux.tp_net = skb_network_offset(skb);
2747 if (vlan_tx_tag_present(skb)) {
2748 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
2749 aux.tp_status |= TP_STATUS_VLAN_VALID;
2750 } else {
2751 aux.tp_vlan_tci = 0;
2752 }
2753 aux.tp_padding = 0;
2754 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
2755 }
2756
2757
2758
2759
2760
2761 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
2762
2763out_free:
2764 skb_free_datagram(sk, skb);
2765out:
2766 return err;
2767}
2768
2769static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
2770 int *uaddr_len, int peer)
2771{
2772 struct net_device *dev;
2773 struct sock *sk = sock->sk;
2774
2775 if (peer)
2776 return -EOPNOTSUPP;
2777
2778 uaddr->sa_family = AF_PACKET;
2779 rcu_read_lock();
2780 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
2781 if (dev)
2782 strncpy(uaddr->sa_data, dev->name, 14);
2783 else
2784 memset(uaddr->sa_data, 0, 14);
2785 rcu_read_unlock();
2786 *uaddr_len = sizeof(*uaddr);
2787
2788 return 0;
2789}
2790
2791static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
2792 int *uaddr_len, int peer)
2793{
2794 struct net_device *dev;
2795 struct sock *sk = sock->sk;
2796 struct packet_sock *po = pkt_sk(sk);
2797 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
2798
2799 if (peer)
2800 return -EOPNOTSUPP;
2801
2802 sll->sll_family = AF_PACKET;
2803 sll->sll_ifindex = po->ifindex;
2804 sll->sll_protocol = po->num;
2805 sll->sll_pkttype = 0;
2806 rcu_read_lock();
2807 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
2808 if (dev) {
2809 sll->sll_hatype = dev->type;
2810 sll->sll_halen = dev->addr_len;
2811 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
2812 } else {
2813 sll->sll_hatype = 0;
2814 sll->sll_halen = 0;
2815 }
2816 rcu_read_unlock();
2817 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
2818
2819 return 0;
2820}
2821
2822static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
2823 int what)
2824{
2825 switch (i->type) {
2826 case PACKET_MR_MULTICAST:
2827 if (i->alen != dev->addr_len)
2828 return -EINVAL;
2829 if (what > 0)
2830 return dev_mc_add(dev, i->addr);
2831 else
2832 return dev_mc_del(dev, i->addr);
2833 break;
2834 case PACKET_MR_PROMISC:
2835 return dev_set_promiscuity(dev, what);
2836 break;
2837 case PACKET_MR_ALLMULTI:
2838 return dev_set_allmulti(dev, what);
2839 break;
2840 case PACKET_MR_UNICAST:
2841 if (i->alen != dev->addr_len)
2842 return -EINVAL;
2843 if (what > 0)
2844 return dev_uc_add(dev, i->addr);
2845 else
2846 return dev_uc_del(dev, i->addr);
2847 break;
2848 default:
2849 break;
2850 }
2851 return 0;
2852}
2853
2854static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
2855{
2856 for ( ; i; i = i->next) {
2857 if (i->ifindex == dev->ifindex)
2858 packet_dev_mc(dev, i, what);
2859 }
2860}
2861
2862static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
2863{
2864 struct packet_sock *po = pkt_sk(sk);
2865 struct packet_mclist *ml, *i;
2866 struct net_device *dev;
2867 int err;
2868
2869 rtnl_lock();
2870
2871 err = -ENODEV;
2872 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
2873 if (!dev)
2874 goto done;
2875
2876 err = -EINVAL;
2877 if (mreq->mr_alen > dev->addr_len)
2878 goto done;
2879
2880 err = -ENOBUFS;
2881 i = kmalloc(sizeof(*i), GFP_KERNEL);
2882 if (i == NULL)
2883 goto done;
2884
2885 err = 0;
2886 for (ml = po->mclist; ml; ml = ml->next) {
2887 if (ml->ifindex == mreq->mr_ifindex &&
2888 ml->type == mreq->mr_type &&
2889 ml->alen == mreq->mr_alen &&
2890 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
2891 ml->count++;
2892
2893 kfree(i);
2894 goto done;
2895 }
2896 }
2897
2898 i->type = mreq->mr_type;
2899 i->ifindex = mreq->mr_ifindex;
2900 i->alen = mreq->mr_alen;
2901 memcpy(i->addr, mreq->mr_address, i->alen);
2902 i->count = 1;
2903 i->next = po->mclist;
2904 po->mclist = i;
2905 err = packet_dev_mc(dev, i, 1);
2906 if (err) {
2907 po->mclist = i->next;
2908 kfree(i);
2909 }
2910
2911done:
2912 rtnl_unlock();
2913 return err;
2914}
2915
2916static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
2917{
2918 struct packet_mclist *ml, **mlp;
2919
2920 rtnl_lock();
2921
2922 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
2923 if (ml->ifindex == mreq->mr_ifindex &&
2924 ml->type == mreq->mr_type &&
2925 ml->alen == mreq->mr_alen &&
2926 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
2927 if (--ml->count == 0) {
2928 struct net_device *dev;
2929 *mlp = ml->next;
2930 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
2931 if (dev)
2932 packet_dev_mc(dev, ml, -1);
2933 kfree(ml);
2934 }
2935 rtnl_unlock();
2936 return 0;
2937 }
2938 }
2939 rtnl_unlock();
2940 return -EADDRNOTAVAIL;
2941}
2942
2943static void packet_flush_mclist(struct sock *sk)
2944{
2945 struct packet_sock *po = pkt_sk(sk);
2946 struct packet_mclist *ml;
2947
2948 if (!po->mclist)
2949 return;
2950
2951 rtnl_lock();
2952 while ((ml = po->mclist) != NULL) {
2953 struct net_device *dev;
2954
2955 po->mclist = ml->next;
2956 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
2957 if (dev != NULL)
2958 packet_dev_mc(dev, ml, -1);
2959 kfree(ml);
2960 }
2961 rtnl_unlock();
2962}
2963
2964static int
2965packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
2966{
2967 struct sock *sk = sock->sk;
2968 struct packet_sock *po = pkt_sk(sk);
2969 int ret;
2970
2971 if (level != SOL_PACKET)
2972 return -ENOPROTOOPT;
2973
2974 switch (optname) {
2975 case PACKET_ADD_MEMBERSHIP:
2976 case PACKET_DROP_MEMBERSHIP:
2977 {
2978 struct packet_mreq_max mreq;
2979 int len = optlen;
2980 memset(&mreq, 0, sizeof(mreq));
2981 if (len < sizeof(struct packet_mreq))
2982 return -EINVAL;
2983 if (len > sizeof(mreq))
2984 len = sizeof(mreq);
2985 if (copy_from_user(&mreq, optval, len))
2986 return -EFAULT;
2987 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
2988 return -EINVAL;
2989 if (optname == PACKET_ADD_MEMBERSHIP)
2990 ret = packet_mc_add(sk, &mreq);
2991 else
2992 ret = packet_mc_drop(sk, &mreq);
2993 return ret;
2994 }
2995
2996 case PACKET_RX_RING:
2997 case PACKET_TX_RING:
2998 {
2999 union tpacket_req_u req_u;
3000 int len;
3001
3002 switch (po->tp_version) {
3003 case TPACKET_V1:
3004 case TPACKET_V2:
3005 len = sizeof(req_u.req);
3006 break;
3007 case TPACKET_V3:
3008 default:
3009 len = sizeof(req_u.req3);
3010 break;
3011 }
3012 if (optlen < len)
3013 return -EINVAL;
3014 if (pkt_sk(sk)->has_vnet_hdr)
3015 return -EINVAL;
3016 if (copy_from_user(&req_u.req, optval, len))
3017 return -EFAULT;
3018 return packet_set_ring(sk, &req_u, 0,
3019 optname == PACKET_TX_RING);
3020 }
3021 case PACKET_COPY_THRESH:
3022 {
3023 int val;
3024
3025 if (optlen != sizeof(val))
3026 return -EINVAL;
3027 if (copy_from_user(&val, optval, sizeof(val)))
3028 return -EFAULT;
3029
3030 pkt_sk(sk)->copy_thresh = val;
3031 return 0;
3032 }
3033 case PACKET_VERSION:
3034 {
3035 int val;
3036
3037 if (optlen != sizeof(val))
3038 return -EINVAL;
3039 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3040 return -EBUSY;
3041 if (copy_from_user(&val, optval, sizeof(val)))
3042 return -EFAULT;
3043 switch (val) {
3044 case TPACKET_V1:
3045 case TPACKET_V2:
3046 case TPACKET_V3:
3047 po->tp_version = val;
3048 return 0;
3049 default:
3050 return -EINVAL;
3051 }
3052 }
3053 case PACKET_RESERVE:
3054 {
3055 unsigned int val;
3056
3057 if (optlen != sizeof(val))
3058 return -EINVAL;
3059 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3060 return -EBUSY;
3061 if (copy_from_user(&val, optval, sizeof(val)))
3062 return -EFAULT;
3063 po->tp_reserve = val;
3064 return 0;
3065 }
3066 case PACKET_LOSS:
3067 {
3068 unsigned int val;
3069
3070 if (optlen != sizeof(val))
3071 return -EINVAL;
3072 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3073 return -EBUSY;
3074 if (copy_from_user(&val, optval, sizeof(val)))
3075 return -EFAULT;
3076 po->tp_loss = !!val;
3077 return 0;
3078 }
3079 case PACKET_AUXDATA:
3080 {
3081 int val;
3082
3083 if (optlen < sizeof(val))
3084 return -EINVAL;
3085 if (copy_from_user(&val, optval, sizeof(val)))
3086 return -EFAULT;
3087
3088 po->auxdata = !!val;
3089 return 0;
3090 }
3091 case PACKET_ORIGDEV:
3092 {
3093 int val;
3094
3095 if (optlen < sizeof(val))
3096 return -EINVAL;
3097 if (copy_from_user(&val, optval, sizeof(val)))
3098 return -EFAULT;
3099
3100 po->origdev = !!val;
3101 return 0;
3102 }
3103 case PACKET_VNET_HDR:
3104 {
3105 int val;
3106
3107 if (sock->type != SOCK_RAW)
3108 return -EINVAL;
3109 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3110 return -EBUSY;
3111 if (optlen < sizeof(val))
3112 return -EINVAL;
3113 if (copy_from_user(&val, optval, sizeof(val)))
3114 return -EFAULT;
3115
3116 po->has_vnet_hdr = !!val;
3117 return 0;
3118 }
3119 case PACKET_TIMESTAMP:
3120 {
3121 int val;
3122
3123 if (optlen != sizeof(val))
3124 return -EINVAL;
3125 if (copy_from_user(&val, optval, sizeof(val)))
3126 return -EFAULT;
3127
3128 po->tp_tstamp = val;
3129 return 0;
3130 }
3131 case PACKET_FANOUT:
3132 {
3133 int val;
3134
3135 if (optlen != sizeof(val))
3136 return -EINVAL;
3137 if (copy_from_user(&val, optval, sizeof(val)))
3138 return -EFAULT;
3139
3140 return fanout_add(sk, val & 0xffff, val >> 16);
3141 }
3142 case PACKET_TX_HAS_OFF:
3143 {
3144 unsigned int val;
3145
3146 if (optlen != sizeof(val))
3147 return -EINVAL;
3148 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3149 return -EBUSY;
3150 if (copy_from_user(&val, optval, sizeof(val)))
3151 return -EFAULT;
3152 po->tp_tx_has_off = !!val;
3153 return 0;
3154 }
3155 default:
3156 return -ENOPROTOOPT;
3157 }
3158}
3159
3160static int packet_getsockopt(struct socket *sock, int level, int optname,
3161 char __user *optval, int __user *optlen)
3162{
3163 int len;
3164 int val, lv = sizeof(val);
3165 struct sock *sk = sock->sk;
3166 struct packet_sock *po = pkt_sk(sk);
3167 void *data = &val;
3168 struct tpacket_stats st;
3169 union tpacket_stats_u st_u;
3170
3171 if (level != SOL_PACKET)
3172 return -ENOPROTOOPT;
3173
3174 if (get_user(len, optlen))
3175 return -EFAULT;
3176
3177 if (len < 0)
3178 return -EINVAL;
3179
3180 switch (optname) {
3181 case PACKET_STATISTICS:
3182 spin_lock_bh(&sk->sk_receive_queue.lock);
3183 if (po->tp_version == TPACKET_V3) {
3184 lv = sizeof(struct tpacket_stats_v3);
3185 memcpy(&st_u.stats3, &po->stats,
3186 sizeof(struct tpacket_stats));
3187 st_u.stats3.tp_freeze_q_cnt =
3188 po->stats_u.stats3.tp_freeze_q_cnt;
3189 st_u.stats3.tp_packets += po->stats.tp_drops;
3190 data = &st_u.stats3;
3191 } else {
3192 lv = sizeof(struct tpacket_stats);
3193 st = po->stats;
3194 st.tp_packets += st.tp_drops;
3195 data = &st;
3196 }
3197 memset(&po->stats, 0, sizeof(st));
3198 spin_unlock_bh(&sk->sk_receive_queue.lock);
3199 break;
3200 case PACKET_AUXDATA:
3201 val = po->auxdata;
3202 break;
3203 case PACKET_ORIGDEV:
3204 val = po->origdev;
3205 break;
3206 case PACKET_VNET_HDR:
3207 val = po->has_vnet_hdr;
3208 break;
3209 case PACKET_VERSION:
3210 val = po->tp_version;
3211 break;
3212 case PACKET_HDRLEN:
3213 if (len > sizeof(int))
3214 len = sizeof(int);
3215 if (copy_from_user(&val, optval, len))
3216 return -EFAULT;
3217 switch (val) {
3218 case TPACKET_V1:
3219 val = sizeof(struct tpacket_hdr);
3220 break;
3221 case TPACKET_V2:
3222 val = sizeof(struct tpacket2_hdr);
3223 break;
3224 case TPACKET_V3:
3225 val = sizeof(struct tpacket3_hdr);
3226 break;
3227 default:
3228 return -EINVAL;
3229 }
3230 break;
3231 case PACKET_RESERVE:
3232 val = po->tp_reserve;
3233 break;
3234 case PACKET_LOSS:
3235 val = po->tp_loss;
3236 break;
3237 case PACKET_TIMESTAMP:
3238 val = po->tp_tstamp;
3239 break;
3240 case PACKET_FANOUT:
3241 val = (po->fanout ?
3242 ((u32)po->fanout->id |
3243 ((u32)po->fanout->type << 16)) :
3244 0);
3245 break;
3246 case PACKET_TX_HAS_OFF:
3247 val = po->tp_tx_has_off;
3248 break;
3249 default:
3250 return -ENOPROTOOPT;
3251 }
3252
3253 if (len > lv)
3254 len = lv;
3255 if (put_user(len, optlen))
3256 return -EFAULT;
3257 if (copy_to_user(optval, data, len))
3258 return -EFAULT;
3259 return 0;
3260}
3261
3262
3263static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
3264{
3265 struct sock *sk;
3266 struct hlist_node *node;
3267 struct net_device *dev = data;
3268 struct net *net = dev_net(dev);
3269
3270 rcu_read_lock();
3271 sk_for_each_rcu(sk, node, &net->packet.sklist) {
3272 struct packet_sock *po = pkt_sk(sk);
3273
3274 switch (msg) {
3275 case NETDEV_UNREGISTER:
3276 if (po->mclist)
3277 packet_dev_mclist(dev, po->mclist, -1);
3278
3279
3280 case NETDEV_DOWN:
3281 if (dev->ifindex == po->ifindex) {
3282 spin_lock(&po->bind_lock);
3283 if (po->running) {
3284 __unregister_prot_hook(sk, false);
3285 sk->sk_err = ENETDOWN;
3286 if (!sock_flag(sk, SOCK_DEAD))
3287 sk->sk_error_report(sk);
3288 }
3289 if (msg == NETDEV_UNREGISTER) {
3290 po->ifindex = -1;
3291 if (po->prot_hook.dev)
3292 dev_put(po->prot_hook.dev);
3293 po->prot_hook.dev = NULL;
3294 }
3295 spin_unlock(&po->bind_lock);
3296 }
3297 break;
3298 case NETDEV_UP:
3299 if (dev->ifindex == po->ifindex) {
3300 spin_lock(&po->bind_lock);
3301 if (po->num)
3302 register_prot_hook(sk);
3303 spin_unlock(&po->bind_lock);
3304 }
3305 break;
3306 }
3307 }
3308 rcu_read_unlock();
3309 return NOTIFY_DONE;
3310}
3311
3312
3313static int packet_ioctl(struct socket *sock, unsigned int cmd,
3314 unsigned long arg)
3315{
3316 struct sock *sk = sock->sk;
3317
3318 switch (cmd) {
3319 case SIOCOUTQ:
3320 {
3321 int amount = sk_wmem_alloc_get(sk);
3322
3323 return put_user(amount, (int __user *)arg);
3324 }
3325 case SIOCINQ:
3326 {
3327 struct sk_buff *skb;
3328 int amount = 0;
3329
3330 spin_lock_bh(&sk->sk_receive_queue.lock);
3331 skb = skb_peek(&sk->sk_receive_queue);
3332 if (skb)
3333 amount = skb->len;
3334 spin_unlock_bh(&sk->sk_receive_queue.lock);
3335 return put_user(amount, (int __user *)arg);
3336 }
3337 case SIOCGSTAMP:
3338 return sock_get_timestamp(sk, (struct timeval __user *)arg);
3339 case SIOCGSTAMPNS:
3340 return sock_get_timestampns(sk, (struct timespec __user *)arg);
3341
3342#ifdef CONFIG_INET
3343 case SIOCADDRT:
3344 case SIOCDELRT:
3345 case SIOCDARP:
3346 case SIOCGARP:
3347 case SIOCSARP:
3348 case SIOCGIFADDR:
3349 case SIOCSIFADDR:
3350 case SIOCGIFBRDADDR:
3351 case SIOCSIFBRDADDR:
3352 case SIOCGIFNETMASK:
3353 case SIOCSIFNETMASK:
3354 case SIOCGIFDSTADDR:
3355 case SIOCSIFDSTADDR:
3356 case SIOCSIFFLAGS:
3357 return inet_dgram_ops.ioctl(sock, cmd, arg);
3358#endif
3359
3360 default:
3361 return -ENOIOCTLCMD;
3362 }
3363 return 0;
3364}
3365
3366static unsigned int packet_poll(struct file *file, struct socket *sock,
3367 poll_table *wait)
3368{
3369 struct sock *sk = sock->sk;
3370 struct packet_sock *po = pkt_sk(sk);
3371 unsigned int mask = datagram_poll(file, sock, wait);
3372
3373 spin_lock_bh(&sk->sk_receive_queue.lock);
3374 if (po->rx_ring.pg_vec) {
3375 if (!packet_previous_rx_frame(po, &po->rx_ring,
3376 TP_STATUS_KERNEL))
3377 mask |= POLLIN | POLLRDNORM;
3378 }
3379 spin_unlock_bh(&sk->sk_receive_queue.lock);
3380 spin_lock_bh(&sk->sk_write_queue.lock);
3381 if (po->tx_ring.pg_vec) {
3382 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
3383 mask |= POLLOUT | POLLWRNORM;
3384 }
3385 spin_unlock_bh(&sk->sk_write_queue.lock);
3386 return mask;
3387}
3388
3389
3390
3391
3392
3393
3394static void packet_mm_open(struct vm_area_struct *vma)
3395{
3396 struct file *file = vma->vm_file;
3397 struct socket *sock = file->private_data;
3398 struct sock *sk = sock->sk;
3399
3400 if (sk)
3401 atomic_inc(&pkt_sk(sk)->mapped);
3402}
3403
3404static void packet_mm_close(struct vm_area_struct *vma)
3405{
3406 struct file *file = vma->vm_file;
3407 struct socket *sock = file->private_data;
3408 struct sock *sk = sock->sk;
3409
3410 if (sk)
3411 atomic_dec(&pkt_sk(sk)->mapped);
3412}
3413
3414static const struct vm_operations_struct packet_mmap_ops = {
3415 .open = packet_mm_open,
3416 .close = packet_mm_close,
3417};
3418
3419static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
3420 unsigned int len)
3421{
3422 int i;
3423
3424 for (i = 0; i < len; i++) {
3425 if (likely(pg_vec[i].buffer)) {
3426 if (is_vmalloc_addr(pg_vec[i].buffer))
3427 vfree(pg_vec[i].buffer);
3428 else
3429 free_pages((unsigned long)pg_vec[i].buffer,
3430 order);
3431 pg_vec[i].buffer = NULL;
3432 }
3433 }
3434 kfree(pg_vec);
3435}
3436
3437static char *alloc_one_pg_vec_page(unsigned long order)
3438{
3439 char *buffer = NULL;
3440 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
3441 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
3442
3443 buffer = (char *) __get_free_pages(gfp_flags, order);
3444
3445 if (buffer)
3446 return buffer;
3447
3448
3449
3450
3451 buffer = vzalloc((1 << order) * PAGE_SIZE);
3452
3453 if (buffer)
3454 return buffer;
3455
3456
3457
3458
3459 gfp_flags &= ~__GFP_NORETRY;
3460 buffer = (char *)__get_free_pages(gfp_flags, order);
3461 if (buffer)
3462 return buffer;
3463
3464
3465
3466
3467 return NULL;
3468}
3469
3470static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
3471{
3472 unsigned int block_nr = req->tp_block_nr;
3473 struct pgv *pg_vec;
3474 int i;
3475
3476 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
3477 if (unlikely(!pg_vec))
3478 goto out;
3479
3480 for (i = 0; i < block_nr; i++) {
3481 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
3482 if (unlikely(!pg_vec[i].buffer))
3483 goto out_free_pgvec;
3484 }
3485
3486out:
3487 return pg_vec;
3488
3489out_free_pgvec:
3490 free_pg_vec(pg_vec, order, block_nr);
3491 pg_vec = NULL;
3492 goto out;
3493}
3494
3495static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
3496 int closing, int tx_ring)
3497{
3498 struct pgv *pg_vec = NULL;
3499 struct packet_sock *po = pkt_sk(sk);
3500 int was_running, order = 0;
3501 struct packet_ring_buffer *rb;
3502 struct sk_buff_head *rb_queue;
3503 __be16 num;
3504 int err = -EINVAL;
3505
3506 struct tpacket_req *req = &req_u->req;
3507
3508
3509 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3510 WARN(1, "Tx-ring is not supported.\n");
3511 goto out;
3512 }
3513
3514 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
3515 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
3516
3517 err = -EBUSY;
3518 if (!closing) {
3519 if (atomic_read(&po->mapped))
3520 goto out;
3521 if (atomic_read(&rb->pending))
3522 goto out;
3523 }
3524
3525 if (req->tp_block_nr) {
3526
3527 err = -EBUSY;
3528 if (unlikely(rb->pg_vec))
3529 goto out;
3530
3531 switch (po->tp_version) {
3532 case TPACKET_V1:
3533 po->tp_hdrlen = TPACKET_HDRLEN;
3534 break;
3535 case TPACKET_V2:
3536 po->tp_hdrlen = TPACKET2_HDRLEN;
3537 break;
3538 case TPACKET_V3:
3539 po->tp_hdrlen = TPACKET3_HDRLEN;
3540 break;
3541 }
3542
3543 err = -EINVAL;
3544 if (unlikely((int)req->tp_block_size <= 0))
3545 goto out;
3546 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
3547 goto out;
3548 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
3549 po->tp_reserve))
3550 goto out;
3551 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
3552 goto out;
3553
3554 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
3555 if (unlikely(rb->frames_per_block <= 0))
3556 goto out;
3557 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
3558 req->tp_frame_nr))
3559 goto out;
3560
3561 err = -ENOMEM;
3562 order = get_order(req->tp_block_size);
3563 pg_vec = alloc_pg_vec(req, order);
3564 if (unlikely(!pg_vec))
3565 goto out;
3566 switch (po->tp_version) {
3567 case TPACKET_V3:
3568
3569
3570
3571 if (!tx_ring)
3572 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3573 break;
3574 default:
3575 break;
3576 }
3577 }
3578
3579 else {
3580 err = -EINVAL;
3581 if (unlikely(req->tp_frame_nr))
3582 goto out;
3583 }
3584
3585 lock_sock(sk);
3586
3587
3588 spin_lock(&po->bind_lock);
3589 was_running = po->running;
3590 num = po->num;
3591 if (was_running) {
3592 po->num = 0;
3593 __unregister_prot_hook(sk, false);
3594 }
3595 spin_unlock(&po->bind_lock);
3596
3597 synchronize_net();
3598
3599 err = -EBUSY;
3600 mutex_lock(&po->pg_vec_lock);
3601 if (closing || atomic_read(&po->mapped) == 0) {
3602 err = 0;
3603 spin_lock_bh(&rb_queue->lock);
3604 swap(rb->pg_vec, pg_vec);
3605 rb->frame_max = (req->tp_frame_nr - 1);
3606 rb->head = 0;
3607 rb->frame_size = req->tp_frame_size;
3608 spin_unlock_bh(&rb_queue->lock);
3609
3610 swap(rb->pg_vec_order, order);
3611 swap(rb->pg_vec_len, req->tp_block_nr);
3612
3613 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
3614 po->prot_hook.func = (po->rx_ring.pg_vec) ?
3615 tpacket_rcv : packet_rcv;
3616 skb_queue_purge(rb_queue);
3617 if (atomic_read(&po->mapped))
3618 pr_err("packet_mmap: vma is busy: %d\n",
3619 atomic_read(&po->mapped));
3620 }
3621 mutex_unlock(&po->pg_vec_lock);
3622
3623 spin_lock(&po->bind_lock);
3624 if (was_running) {
3625 po->num = num;
3626 register_prot_hook(sk);
3627 }
3628 spin_unlock(&po->bind_lock);
3629 if (closing && (po->tp_version > TPACKET_V2)) {
3630
3631 if (!tx_ring)
3632 prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3633 }
3634 release_sock(sk);
3635
3636 if (pg_vec)
3637 free_pg_vec(pg_vec, order, req->tp_block_nr);
3638out:
3639 return err;
3640}
3641
3642static int packet_mmap(struct file *file, struct socket *sock,
3643 struct vm_area_struct *vma)
3644{
3645 struct sock *sk = sock->sk;
3646 struct packet_sock *po = pkt_sk(sk);
3647 unsigned long size, expected_size;
3648 struct packet_ring_buffer *rb;
3649 unsigned long start;
3650 int err = -EINVAL;
3651 int i;
3652
3653 if (vma->vm_pgoff)
3654 return -EINVAL;
3655
3656 mutex_lock(&po->pg_vec_lock);
3657
3658 expected_size = 0;
3659 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3660 if (rb->pg_vec) {
3661 expected_size += rb->pg_vec_len
3662 * rb->pg_vec_pages
3663 * PAGE_SIZE;
3664 }
3665 }
3666
3667 if (expected_size == 0)
3668 goto out;
3669
3670 size = vma->vm_end - vma->vm_start;
3671 if (size != expected_size)
3672 goto out;
3673
3674 start = vma->vm_start;
3675 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3676 if (rb->pg_vec == NULL)
3677 continue;
3678
3679 for (i = 0; i < rb->pg_vec_len; i++) {
3680 struct page *page;
3681 void *kaddr = rb->pg_vec[i].buffer;
3682 int pg_num;
3683
3684 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
3685 page = pgv_to_page(kaddr);
3686 err = vm_insert_page(vma, start, page);
3687 if (unlikely(err))
3688 goto out;
3689 start += PAGE_SIZE;
3690 kaddr += PAGE_SIZE;
3691 }
3692 }
3693 }
3694
3695 atomic_inc(&po->mapped);
3696 vma->vm_ops = &packet_mmap_ops;
3697 err = 0;
3698
3699out:
3700 mutex_unlock(&po->pg_vec_lock);
3701 return err;
3702}
3703
3704static const struct proto_ops packet_ops_spkt = {
3705 .family = PF_PACKET,
3706 .owner = THIS_MODULE,
3707 .release = packet_release,
3708 .bind = packet_bind_spkt,
3709 .connect = sock_no_connect,
3710 .socketpair = sock_no_socketpair,
3711 .accept = sock_no_accept,
3712 .getname = packet_getname_spkt,
3713 .poll = datagram_poll,
3714 .ioctl = packet_ioctl,
3715 .listen = sock_no_listen,
3716 .shutdown = sock_no_shutdown,
3717 .setsockopt = sock_no_setsockopt,
3718 .getsockopt = sock_no_getsockopt,
3719 .sendmsg = packet_sendmsg_spkt,
3720 .recvmsg = packet_recvmsg,
3721 .mmap = sock_no_mmap,
3722 .sendpage = sock_no_sendpage,
3723};
3724
3725static const struct proto_ops packet_ops = {
3726 .family = PF_PACKET,
3727 .owner = THIS_MODULE,
3728 .release = packet_release,
3729 .bind = packet_bind,
3730 .connect = sock_no_connect,
3731 .socketpair = sock_no_socketpair,
3732 .accept = sock_no_accept,
3733 .getname = packet_getname,
3734 .poll = packet_poll,
3735 .ioctl = packet_ioctl,
3736 .listen = sock_no_listen,
3737 .shutdown = sock_no_shutdown,
3738 .setsockopt = packet_setsockopt,
3739 .getsockopt = packet_getsockopt,
3740 .sendmsg = packet_sendmsg,
3741 .recvmsg = packet_recvmsg,
3742 .mmap = packet_mmap,
3743 .sendpage = sock_no_sendpage,
3744};
3745
3746static const struct net_proto_family packet_family_ops = {
3747 .family = PF_PACKET,
3748 .create = packet_create,
3749 .owner = THIS_MODULE,
3750};
3751
3752static struct notifier_block packet_netdev_notifier = {
3753 .notifier_call = packet_notifier,
3754};
3755
3756#ifdef CONFIG_PROC_FS
3757
3758static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
3759 __acquires(RCU)
3760{
3761 struct net *net = seq_file_net(seq);
3762
3763 rcu_read_lock();
3764 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
3765}
3766
3767static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3768{
3769 struct net *net = seq_file_net(seq);
3770 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
3771}
3772
3773static void packet_seq_stop(struct seq_file *seq, void *v)
3774 __releases(RCU)
3775{
3776 rcu_read_unlock();
3777}
3778
3779static int packet_seq_show(struct seq_file *seq, void *v)
3780{
3781 if (v == SEQ_START_TOKEN)
3782 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
3783 else {
3784 struct sock *s = sk_entry(v);
3785 const struct packet_sock *po = pkt_sk(s);
3786
3787 seq_printf(seq,
3788 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
3789 s,
3790 atomic_read(&s->sk_refcnt),
3791 s->sk_type,
3792 ntohs(po->num),
3793 po->ifindex,
3794 po->running,
3795 atomic_read(&s->sk_rmem_alloc),
3796 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
3797 sock_i_ino(s));
3798 }
3799
3800 return 0;
3801}
3802
3803static const struct seq_operations packet_seq_ops = {
3804 .start = packet_seq_start,
3805 .next = packet_seq_next,
3806 .stop = packet_seq_stop,
3807 .show = packet_seq_show,
3808};
3809
3810static int packet_seq_open(struct inode *inode, struct file *file)
3811{
3812 return seq_open_net(inode, file, &packet_seq_ops,
3813 sizeof(struct seq_net_private));
3814}
3815
3816static const struct file_operations packet_seq_fops = {
3817 .owner = THIS_MODULE,
3818 .open = packet_seq_open,
3819 .read = seq_read,
3820 .llseek = seq_lseek,
3821 .release = seq_release_net,
3822};
3823
3824#endif
3825
3826static int __net_init packet_net_init(struct net *net)
3827{
3828 mutex_init(&net->packet.sklist_lock);
3829 INIT_HLIST_HEAD(&net->packet.sklist);
3830
3831 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
3832 return -ENOMEM;
3833
3834 return 0;
3835}
3836
3837static void __net_exit packet_net_exit(struct net *net)
3838{
3839 proc_net_remove(net, "packet");
3840}
3841
3842static struct pernet_operations packet_net_ops = {
3843 .init = packet_net_init,
3844 .exit = packet_net_exit,
3845};
3846
3847
3848static void __exit packet_exit(void)
3849{
3850 unregister_netdevice_notifier(&packet_netdev_notifier);
3851 unregister_pernet_subsys(&packet_net_ops);
3852 sock_unregister(PF_PACKET);
3853 proto_unregister(&packet_proto);
3854}
3855
3856static int __init packet_init(void)
3857{
3858 int rc = proto_register(&packet_proto, 0);
3859
3860 if (rc != 0)
3861 goto out;
3862
3863 sock_register(&packet_family_ops);
3864 register_pernet_subsys(&packet_net_ops);
3865 register_netdevice_notifier(&packet_netdev_notifier);
3866out:
3867 return rc;
3868}
3869
3870module_init(packet_init);
3871module_exit(packet_exit);
3872MODULE_LICENSE("GPL");
3873MODULE_ALIAS_NETPROTO(PF_PACKET);
3874