1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/types.h>
56#include <linux/mm.h>
57#include <linux/capability.h>
58#include <linux/fcntl.h>
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_packet.h>
64#include <linux/wireless.h>
65#include <linux/kernel.h>
66#include <linux/kmod.h>
67#include <linux/slab.h>
68#include <linux/vmalloc.h>
69#include <net/net_namespace.h>
70#include <net/ip.h>
71#include <net/protocol.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <linux/errno.h>
75#include <linux/timer.h>
76#include <asm/uaccess.h>
77#include <asm/ioctls.h>
78#include <asm/page.h>
79#include <asm/cacheflush.h>
80#include <asm/io.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/poll.h>
84#include <linux/module.h>
85#include <linux/init.h>
86#include <linux/mutex.h>
87#include <linux/if_vlan.h>
88#include <linux/virtio_net.h>
89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h>
91
92#ifdef CONFIG_INET
93#include <net/inet_common.h>
94#endif
95
96#include "internal.h"
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154struct packet_mreq_max {
155 int mr_ifindex;
156 unsigned short mr_type;
157 unsigned short mr_alen;
158 unsigned char mr_address[MAX_ADDR_LEN];
159};
160
161union tpacket_uhdr {
162 struct tpacket_hdr *h1;
163 struct tpacket2_hdr *h2;
164 struct tpacket3_hdr *h3;
165 void *raw;
166};
167
168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
169 int closing, int tx_ring);
170
171#define V3_ALIGNMENT (8)
172
173#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
174
175#define BLK_PLUS_PRIV(sz_of_priv) \
176 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
177
178#define PGV_FROM_VMALLOC 1
179
180#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
181#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
182#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
183#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
184#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
185#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
186#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
187
188struct packet_sock;
189static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
190static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
191 struct packet_type *pt, struct net_device *orig_dev);
192
193static void *packet_previous_frame(struct packet_sock *po,
194 struct packet_ring_buffer *rb,
195 int status);
196static void packet_increment_head(struct packet_ring_buffer *buff);
197static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
198 struct tpacket_block_desc *);
199static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
200 struct packet_sock *);
201static void prb_retire_current_block(struct tpacket_kbdq_core *,
202 struct packet_sock *, unsigned int status);
203static int prb_queue_frozen(struct tpacket_kbdq_core *);
204static void prb_open_block(struct tpacket_kbdq_core *,
205 struct tpacket_block_desc *);
206static void prb_retire_rx_blk_timer_expired(unsigned long);
207static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
208static void prb_init_blk_timer(struct packet_sock *,
209 struct tpacket_kbdq_core *,
210 void (*func) (unsigned long));
211static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
212static void prb_clear_rxhash(struct tpacket_kbdq_core *,
213 struct tpacket3_hdr *);
214static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
215 struct tpacket3_hdr *);
216static void packet_flush_mclist(struct sock *sk);
217
218struct packet_skb_cb {
219 unsigned int origlen;
220 union {
221 struct sockaddr_pkt pkt;
222 struct sockaddr_ll ll;
223 } sa;
224};
225
226#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
227
228#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
229#define GET_PBLOCK_DESC(x, bid) \
230 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
231#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
232 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
233#define GET_NEXT_PRB_BLK_NUM(x) \
234 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
235 ((x)->kactive_blk_num+1) : 0)
236
237static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
238static void __fanout_link(struct sock *sk, struct packet_sock *po);
239
240
241
242
243
244static void register_prot_hook(struct sock *sk)
245{
246 struct packet_sock *po = pkt_sk(sk);
247 if (!po->running) {
248 if (po->fanout)
249 __fanout_link(sk, po);
250 else
251 dev_add_pack(&po->prot_hook);
252 sock_hold(sk);
253 po->running = 1;
254 }
255}
256
257
258
259
260
261
262
263
264static void __unregister_prot_hook(struct sock *sk, bool sync)
265{
266 struct packet_sock *po = pkt_sk(sk);
267
268 po->running = 0;
269 if (po->fanout)
270 __fanout_unlink(sk, po);
271 else
272 __dev_remove_pack(&po->prot_hook);
273 __sock_put(sk);
274
275 if (sync) {
276 spin_unlock(&po->bind_lock);
277 synchronize_net();
278 spin_lock(&po->bind_lock);
279 }
280}
281
282static void unregister_prot_hook(struct sock *sk, bool sync)
283{
284 struct packet_sock *po = pkt_sk(sk);
285
286 if (po->running)
287 __unregister_prot_hook(sk, sync);
288}
289
290static inline __pure struct page *pgv_to_page(void *addr)
291{
292 if (is_vmalloc_addr(addr))
293 return vmalloc_to_page(addr);
294 return virt_to_page(addr);
295}
296
297static void __packet_set_status(struct packet_sock *po, void *frame, int status)
298{
299 union tpacket_uhdr h;
300
301 h.raw = frame;
302 switch (po->tp_version) {
303 case TPACKET_V1:
304 h.h1->tp_status = status;
305 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
306 break;
307 case TPACKET_V2:
308 h.h2->tp_status = status;
309 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
310 break;
311 case TPACKET_V3:
312 default:
313 WARN(1, "TPACKET version not supported.\n");
314 BUG();
315 }
316
317 smp_wmb();
318}
319
320static int __packet_get_status(struct packet_sock *po, void *frame)
321{
322 union tpacket_uhdr h;
323
324 smp_rmb();
325
326 h.raw = frame;
327 switch (po->tp_version) {
328 case TPACKET_V1:
329 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
330 return h.h1->tp_status;
331 case TPACKET_V2:
332 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
333 return h.h2->tp_status;
334 case TPACKET_V3:
335 default:
336 WARN(1, "TPACKET version not supported.\n");
337 BUG();
338 return 0;
339 }
340}
341
342static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
343 unsigned int flags)
344{
345 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
346
347 if (shhwtstamps) {
348 if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
349 ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
350 return TP_STATUS_TS_SYS_HARDWARE;
351 if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
352 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
353 return TP_STATUS_TS_RAW_HARDWARE;
354 }
355
356 if (ktime_to_timespec_cond(skb->tstamp, ts))
357 return TP_STATUS_TS_SOFTWARE;
358
359 return 0;
360}
361
362static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
363 struct sk_buff *skb)
364{
365 union tpacket_uhdr h;
366 struct timespec ts;
367 __u32 ts_status;
368
369 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
370 return 0;
371
372 h.raw = frame;
373 switch (po->tp_version) {
374 case TPACKET_V1:
375 h.h1->tp_sec = ts.tv_sec;
376 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
377 break;
378 case TPACKET_V2:
379 h.h2->tp_sec = ts.tv_sec;
380 h.h2->tp_nsec = ts.tv_nsec;
381 break;
382 case TPACKET_V3:
383 default:
384 WARN(1, "TPACKET version not supported.\n");
385 BUG();
386 }
387
388
389 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
390 smp_wmb();
391
392 return ts_status;
393}
394
395static void *packet_lookup_frame(struct packet_sock *po,
396 struct packet_ring_buffer *rb,
397 unsigned int position,
398 int status)
399{
400 unsigned int pg_vec_pos, frame_offset;
401 union tpacket_uhdr h;
402
403 pg_vec_pos = position / rb->frames_per_block;
404 frame_offset = position % rb->frames_per_block;
405
406 h.raw = rb->pg_vec[pg_vec_pos].buffer +
407 (frame_offset * rb->frame_size);
408
409 if (status != __packet_get_status(po, h.raw))
410 return NULL;
411
412 return h.raw;
413}
414
415static void *packet_current_frame(struct packet_sock *po,
416 struct packet_ring_buffer *rb,
417 int status)
418{
419 return packet_lookup_frame(po, rb, rb->head, status);
420}
421
422static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
423{
424 del_timer_sync(&pkc->retire_blk_timer);
425}
426
427static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
428 int tx_ring,
429 struct sk_buff_head *rb_queue)
430{
431 struct tpacket_kbdq_core *pkc;
432
433 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
434
435 spin_lock(&rb_queue->lock);
436 pkc->delete_blk_timer = 1;
437 spin_unlock(&rb_queue->lock);
438
439 prb_del_retire_blk_timer(pkc);
440}
441
442static void prb_init_blk_timer(struct packet_sock *po,
443 struct tpacket_kbdq_core *pkc,
444 void (*func) (unsigned long))
445{
446 init_timer(&pkc->retire_blk_timer);
447 pkc->retire_blk_timer.data = (long)po;
448 pkc->retire_blk_timer.function = func;
449 pkc->retire_blk_timer.expires = jiffies;
450}
451
452static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
453{
454 struct tpacket_kbdq_core *pkc;
455
456 if (tx_ring)
457 BUG();
458
459 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
460 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
461}
462
463static int prb_calc_retire_blk_tmo(struct packet_sock *po,
464 int blk_size_in_bytes)
465{
466 struct net_device *dev;
467 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
468 struct ethtool_cmd ecmd;
469 int err;
470 u32 speed;
471
472 rtnl_lock();
473 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
474 if (unlikely(!dev)) {
475 rtnl_unlock();
476 return DEFAULT_PRB_RETIRE_TOV;
477 }
478 err = __ethtool_get_settings(dev, &ecmd);
479 speed = ethtool_cmd_speed(&ecmd);
480 rtnl_unlock();
481 if (!err) {
482
483
484
485
486 if (speed < SPEED_1000 || speed == SPEED_UNKNOWN) {
487 return DEFAULT_PRB_RETIRE_TOV;
488 } else {
489 msec = 1;
490 div = speed / 1000;
491 }
492 }
493
494 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
495
496 if (div)
497 mbits /= div;
498
499 tmo = mbits * msec;
500
501 if (div)
502 return tmo+1;
503 return tmo;
504}
505
506static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
507 union tpacket_req_u *req_u)
508{
509 p1->feature_req_word = req_u->req3.tp_feature_req_word;
510}
511
512static void init_prb_bdqc(struct packet_sock *po,
513 struct packet_ring_buffer *rb,
514 struct pgv *pg_vec,
515 union tpacket_req_u *req_u, int tx_ring)
516{
517 struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
518 struct tpacket_block_desc *pbd;
519
520 memset(p1, 0x0, sizeof(*p1));
521
522 p1->knxt_seq_num = 1;
523 p1->pkbdq = pg_vec;
524 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
525 p1->pkblk_start = pg_vec[0].buffer;
526 p1->kblk_size = req_u->req3.tp_block_size;
527 p1->knum_blocks = req_u->req3.tp_block_nr;
528 p1->hdrlen = po->tp_hdrlen;
529 p1->version = po->tp_version;
530 p1->last_kactive_blk_num = 0;
531 po->stats.stats3.tp_freeze_q_cnt = 0;
532 if (req_u->req3.tp_retire_blk_tov)
533 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
534 else
535 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
536 req_u->req3.tp_block_size);
537 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
538 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
539
540 prb_init_ft_ops(p1, req_u);
541 prb_setup_retire_blk_timer(po, tx_ring);
542 prb_open_block(p1, pbd);
543}
544
545
546
547
548static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
549{
550 mod_timer(&pkc->retire_blk_timer,
551 jiffies + pkc->tov_in_jiffies);
552 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
553}
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578static void prb_retire_rx_blk_timer_expired(unsigned long data)
579{
580 struct packet_sock *po = (struct packet_sock *)data;
581 struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
582 unsigned int frozen;
583 struct tpacket_block_desc *pbd;
584
585 spin_lock(&po->sk.sk_receive_queue.lock);
586
587 frozen = prb_queue_frozen(pkc);
588 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
589
590 if (unlikely(pkc->delete_blk_timer))
591 goto out;
592
593
594
595
596
597
598
599
600
601
602 if (BLOCK_NUM_PKTS(pbd)) {
603 while (atomic_read(&pkc->blk_fill_in_prog)) {
604
605 cpu_relax();
606 }
607 }
608
609 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
610 if (!frozen) {
611 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
612 if (!prb_dispatch_next_block(pkc, po))
613 goto refresh_timer;
614 else
615 goto out;
616 } else {
617
618
619
620 if (prb_curr_blk_in_use(pkc, pbd)) {
621
622
623
624
625 goto refresh_timer;
626 } else {
627
628
629
630
631
632
633
634 prb_open_block(pkc, pbd);
635 goto out;
636 }
637 }
638 }
639
640refresh_timer:
641 _prb_refresh_rx_retire_blk_timer(pkc);
642
643out:
644 spin_unlock(&po->sk.sk_receive_queue.lock);
645}
646
647static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
648 struct tpacket_block_desc *pbd1, __u32 status)
649{
650
651
652#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
653 u8 *start, *end;
654
655 start = (u8 *)pbd1;
656
657
658 start += PAGE_SIZE;
659
660 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
661 for (; start < end; start += PAGE_SIZE)
662 flush_dcache_page(pgv_to_page(start));
663
664 smp_wmb();
665#endif
666
667
668
669 BLOCK_STATUS(pbd1) = status;
670
671
672
673#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
674 start = (u8 *)pbd1;
675 flush_dcache_page(pgv_to_page(start));
676
677 smp_wmb();
678#endif
679}
680
681
682
683
684
685
686
687
688
689
690static void prb_close_block(struct tpacket_kbdq_core *pkc1,
691 struct tpacket_block_desc *pbd1,
692 struct packet_sock *po, unsigned int stat)
693{
694 __u32 status = TP_STATUS_USER | stat;
695
696 struct tpacket3_hdr *last_pkt;
697 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
698
699 if (po->stats.stats3.tp_drops)
700 status |= TP_STATUS_LOSING;
701
702 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
703 last_pkt->tp_next_offset = 0;
704
705
706 if (BLOCK_NUM_PKTS(pbd1)) {
707 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
708 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
709 } else {
710
711 struct timespec ts;
712 getnstimeofday(&ts);
713 h1->ts_last_pkt.ts_sec = ts.tv_sec;
714 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
715 }
716
717 smp_wmb();
718
719
720 prb_flush_block(pkc1, pbd1, status);
721
722 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
723}
724
725static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
726{
727 pkc->reset_pending_on_curr_blk = 0;
728}
729
730
731
732
733
734
735
736
737static void prb_open_block(struct tpacket_kbdq_core *pkc1,
738 struct tpacket_block_desc *pbd1)
739{
740 struct timespec ts;
741 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
742
743 smp_rmb();
744
745
746
747
748
749 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
750 BLOCK_NUM_PKTS(pbd1) = 0;
751 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
752
753 getnstimeofday(&ts);
754
755 h1->ts_first_pkt.ts_sec = ts.tv_sec;
756 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
757
758 pkc1->pkblk_start = (char *)pbd1;
759 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
760
761 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
762 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
763
764 pbd1->version = pkc1->version;
765 pkc1->prev = pkc1->nxt_offset;
766 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
767
768 prb_thaw_queue(pkc1);
769 _prb_refresh_rx_retire_blk_timer(pkc1);
770
771 smp_wmb();
772}
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
798 struct packet_sock *po)
799{
800 pkc->reset_pending_on_curr_blk = 1;
801 po->stats.stats3.tp_freeze_q_cnt++;
802}
803
804#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
805
806
807
808
809
810
811
812static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
813 struct packet_sock *po)
814{
815 struct tpacket_block_desc *pbd;
816
817 smp_rmb();
818
819
820 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
821
822
823 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
824 prb_freeze_queue(pkc, po);
825 return NULL;
826 }
827
828
829
830
831
832
833 prb_open_block(pkc, pbd);
834 return (void *)pkc->nxt_offset;
835}
836
837static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
838 struct packet_sock *po, unsigned int status)
839{
840 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
841
842
843 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
844
845
846
847
848
849
850
851
852
853 if (!(status & TP_STATUS_BLK_TMO)) {
854 while (atomic_read(&pkc->blk_fill_in_prog)) {
855
856 cpu_relax();
857 }
858 }
859 prb_close_block(pkc, pbd, po, status);
860 return;
861 }
862}
863
864static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
865 struct tpacket_block_desc *pbd)
866{
867 return TP_STATUS_USER & BLOCK_STATUS(pbd);
868}
869
870static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
871{
872 return pkc->reset_pending_on_curr_blk;
873}
874
875static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
876{
877 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
878 atomic_dec(&pkc->blk_fill_in_prog);
879}
880
881static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
882 struct tpacket3_hdr *ppd)
883{
884 ppd->hv1.tp_rxhash = skb_get_rxhash(pkc->skb);
885}
886
887static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
888 struct tpacket3_hdr *ppd)
889{
890 ppd->hv1.tp_rxhash = 0;
891}
892
893static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
894 struct tpacket3_hdr *ppd)
895{
896 if (vlan_tx_tag_present(pkc->skb)) {
897 ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
898 ppd->tp_status = TP_STATUS_VLAN_VALID;
899 } else {
900 ppd->hv1.tp_vlan_tci = 0;
901 ppd->tp_status = TP_STATUS_AVAILABLE;
902 }
903}
904
905static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
906 struct tpacket3_hdr *ppd)
907{
908 prb_fill_vlan_info(pkc, ppd);
909
910 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
911 prb_fill_rxhash(pkc, ppd);
912 else
913 prb_clear_rxhash(pkc, ppd);
914}
915
916static void prb_fill_curr_block(char *curr,
917 struct tpacket_kbdq_core *pkc,
918 struct tpacket_block_desc *pbd,
919 unsigned int len)
920{
921 struct tpacket3_hdr *ppd;
922
923 ppd = (struct tpacket3_hdr *)curr;
924 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
925 pkc->prev = curr;
926 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
927 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
928 BLOCK_NUM_PKTS(pbd) += 1;
929 atomic_inc(&pkc->blk_fill_in_prog);
930 prb_run_all_ft_ops(pkc, ppd);
931}
932
933
934static void *__packet_lookup_frame_in_block(struct packet_sock *po,
935 struct sk_buff *skb,
936 int status,
937 unsigned int len
938 )
939{
940 struct tpacket_kbdq_core *pkc;
941 struct tpacket_block_desc *pbd;
942 char *curr, *end;
943
944 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
945 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
946
947
948 if (prb_queue_frozen(pkc)) {
949
950
951
952
953 if (prb_curr_blk_in_use(pkc, pbd)) {
954
955 return NULL;
956 } else {
957
958
959
960
961
962
963 prb_open_block(pkc, pbd);
964 }
965 }
966
967 smp_mb();
968 curr = pkc->nxt_offset;
969 pkc->skb = skb;
970 end = (char *)pbd + pkc->kblk_size;
971
972
973 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
974 prb_fill_curr_block(curr, pkc, pbd, len);
975 return (void *)curr;
976 }
977
978
979 prb_retire_current_block(pkc, po, 0);
980
981
982 curr = (char *)prb_dispatch_next_block(pkc, po);
983 if (curr) {
984 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
985 prb_fill_curr_block(curr, pkc, pbd, len);
986 return (void *)curr;
987 }
988
989
990
991
992
993 return NULL;
994}
995
996static void *packet_current_rx_frame(struct packet_sock *po,
997 struct sk_buff *skb,
998 int status, unsigned int len)
999{
1000 char *curr = NULL;
1001 switch (po->tp_version) {
1002 case TPACKET_V1:
1003 case TPACKET_V2:
1004 curr = packet_lookup_frame(po, &po->rx_ring,
1005 po->rx_ring.head, status);
1006 return curr;
1007 case TPACKET_V3:
1008 return __packet_lookup_frame_in_block(po, skb, status, len);
1009 default:
1010 WARN(1, "TPACKET version not supported\n");
1011 BUG();
1012 return NULL;
1013 }
1014}
1015
1016static void *prb_lookup_block(struct packet_sock *po,
1017 struct packet_ring_buffer *rb,
1018 unsigned int idx,
1019 int status)
1020{
1021 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1022 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1023
1024 if (status != BLOCK_STATUS(pbd))
1025 return NULL;
1026 return pbd;
1027}
1028
1029static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1030{
1031 unsigned int prev;
1032 if (rb->prb_bdqc.kactive_blk_num)
1033 prev = rb->prb_bdqc.kactive_blk_num-1;
1034 else
1035 prev = rb->prb_bdqc.knum_blocks-1;
1036 return prev;
1037}
1038
1039
1040static void *__prb_previous_block(struct packet_sock *po,
1041 struct packet_ring_buffer *rb,
1042 int status)
1043{
1044 unsigned int previous = prb_previous_blk_num(rb);
1045 return prb_lookup_block(po, rb, previous, status);
1046}
1047
1048static void *packet_previous_rx_frame(struct packet_sock *po,
1049 struct packet_ring_buffer *rb,
1050 int status)
1051{
1052 if (po->tp_version <= TPACKET_V2)
1053 return packet_previous_frame(po, rb, status);
1054
1055 return __prb_previous_block(po, rb, status);
1056}
1057
1058static void packet_increment_rx_head(struct packet_sock *po,
1059 struct packet_ring_buffer *rb)
1060{
1061 switch (po->tp_version) {
1062 case TPACKET_V1:
1063 case TPACKET_V2:
1064 return packet_increment_head(rb);
1065 case TPACKET_V3:
1066 default:
1067 WARN(1, "TPACKET version not supported.\n");
1068 BUG();
1069 return;
1070 }
1071}
1072
1073static void *packet_previous_frame(struct packet_sock *po,
1074 struct packet_ring_buffer *rb,
1075 int status)
1076{
1077 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1078 return packet_lookup_frame(po, rb, previous, status);
1079}
1080
1081static void packet_increment_head(struct packet_ring_buffer *buff)
1082{
1083 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1084}
1085
1086static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1087{
1088 struct sock *sk = &po->sk;
1089 bool has_room;
1090
1091 if (po->prot_hook.func != tpacket_rcv)
1092 return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
1093 <= sk->sk_rcvbuf;
1094
1095 spin_lock(&sk->sk_receive_queue.lock);
1096 if (po->tp_version == TPACKET_V3)
1097 has_room = prb_lookup_block(po, &po->rx_ring,
1098 po->rx_ring.prb_bdqc.kactive_blk_num,
1099 TP_STATUS_KERNEL);
1100 else
1101 has_room = packet_lookup_frame(po, &po->rx_ring,
1102 po->rx_ring.head,
1103 TP_STATUS_KERNEL);
1104 spin_unlock(&sk->sk_receive_queue.lock);
1105
1106 return has_room;
1107}
1108
1109static void packet_sock_destruct(struct sock *sk)
1110{
1111 skb_queue_purge(&sk->sk_error_queue);
1112
1113 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1114 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1115
1116 if (!sock_flag(sk, SOCK_DEAD)) {
1117 pr_err("Attempt to release alive packet socket: %p\n", sk);
1118 return;
1119 }
1120
1121 sk_refcnt_debug_dec(sk);
1122}
1123
1124static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1125{
1126 int x = atomic_read(&f->rr_cur) + 1;
1127
1128 if (x >= num)
1129 x = 0;
1130
1131 return x;
1132}
1133
1134static unsigned int fanout_demux_hash(struct packet_fanout *f,
1135 struct sk_buff *skb,
1136 unsigned int num)
1137{
1138 return (((u64)skb->rxhash) * num) >> 32;
1139}
1140
1141static unsigned int fanout_demux_lb(struct packet_fanout *f,
1142 struct sk_buff *skb,
1143 unsigned int num)
1144{
1145 int cur, old;
1146
1147 cur = atomic_read(&f->rr_cur);
1148 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1149 fanout_rr_next(f, num))) != cur)
1150 cur = old;
1151 return cur;
1152}
1153
1154static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1155 struct sk_buff *skb,
1156 unsigned int num)
1157{
1158 return smp_processor_id() % num;
1159}
1160
1161static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1162 struct sk_buff *skb,
1163 unsigned int idx, unsigned int skip,
1164 unsigned int num)
1165{
1166 unsigned int i, j;
1167
1168 i = j = min_t(int, f->next[idx], num - 1);
1169 do {
1170 if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
1171 if (i != j)
1172 f->next[idx] = i;
1173 return i;
1174 }
1175 if (++i == num)
1176 i = 0;
1177 } while (i != j);
1178
1179 return idx;
1180}
1181
1182static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1183{
1184 return f->flags & (flag >> 8);
1185}
1186
1187static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1188 struct packet_type *pt, struct net_device *orig_dev)
1189{
1190 struct packet_fanout *f = pt->af_packet_priv;
1191 unsigned int num = f->num_members;
1192 struct packet_sock *po;
1193 unsigned int idx;
1194
1195 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1196 !num) {
1197 kfree_skb(skb);
1198 return 0;
1199 }
1200
1201 switch (f->type) {
1202 case PACKET_FANOUT_HASH:
1203 default:
1204 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1205 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1206 if (!skb)
1207 return 0;
1208 }
1209 skb_get_rxhash(skb);
1210 idx = fanout_demux_hash(f, skb, num);
1211 break;
1212 case PACKET_FANOUT_LB:
1213 idx = fanout_demux_lb(f, skb, num);
1214 break;
1215 case PACKET_FANOUT_CPU:
1216 idx = fanout_demux_cpu(f, skb, num);
1217 break;
1218 case PACKET_FANOUT_ROLLOVER:
1219 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1220 break;
1221 }
1222
1223 po = pkt_sk(f->arr[idx]);
1224 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
1225 unlikely(!packet_rcv_has_room(po, skb))) {
1226 idx = fanout_demux_rollover(f, skb, idx, idx, num);
1227 po = pkt_sk(f->arr[idx]);
1228 }
1229
1230 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1231}
1232
1233DEFINE_MUTEX(fanout_mutex);
1234EXPORT_SYMBOL_GPL(fanout_mutex);
1235static LIST_HEAD(fanout_list);
1236
1237static void __fanout_link(struct sock *sk, struct packet_sock *po)
1238{
1239 struct packet_fanout *f = po->fanout;
1240
1241 spin_lock(&f->lock);
1242 f->arr[f->num_members] = sk;
1243 smp_wmb();
1244 f->num_members++;
1245 spin_unlock(&f->lock);
1246}
1247
1248static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1249{
1250 struct packet_fanout *f = po->fanout;
1251 int i;
1252
1253 spin_lock(&f->lock);
1254 for (i = 0; i < f->num_members; i++) {
1255 if (f->arr[i] == sk)
1256 break;
1257 }
1258 BUG_ON(i >= f->num_members);
1259 f->arr[i] = f->arr[f->num_members - 1];
1260 f->num_members--;
1261 spin_unlock(&f->lock);
1262}
1263
1264static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
1265{
1266 if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
1267 return true;
1268
1269 return false;
1270}
1271
1272static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1273{
1274 struct packet_sock *po = pkt_sk(sk);
1275 struct packet_fanout *f, *match;
1276 u8 type = type_flags & 0xff;
1277 u8 flags = type_flags >> 8;
1278 int err;
1279
1280 switch (type) {
1281 case PACKET_FANOUT_ROLLOVER:
1282 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1283 return -EINVAL;
1284 case PACKET_FANOUT_HASH:
1285 case PACKET_FANOUT_LB:
1286 case PACKET_FANOUT_CPU:
1287 break;
1288 default:
1289 return -EINVAL;
1290 }
1291
1292 if (!po->running)
1293 return -EINVAL;
1294
1295 if (po->fanout)
1296 return -EALREADY;
1297
1298 mutex_lock(&fanout_mutex);
1299 match = NULL;
1300 list_for_each_entry(f, &fanout_list, list) {
1301 if (f->id == id &&
1302 read_pnet(&f->net) == sock_net(sk)) {
1303 match = f;
1304 break;
1305 }
1306 }
1307 err = -EINVAL;
1308 if (match && match->flags != flags)
1309 goto out;
1310 if (!match) {
1311 err = -ENOMEM;
1312 match = kzalloc(sizeof(*match), GFP_KERNEL);
1313 if (!match)
1314 goto out;
1315 write_pnet(&match->net, sock_net(sk));
1316 match->id = id;
1317 match->type = type;
1318 match->flags = flags;
1319 atomic_set(&match->rr_cur, 0);
1320 INIT_LIST_HEAD(&match->list);
1321 spin_lock_init(&match->lock);
1322 atomic_set(&match->sk_ref, 0);
1323 match->prot_hook.type = po->prot_hook.type;
1324 match->prot_hook.dev = po->prot_hook.dev;
1325 match->prot_hook.func = packet_rcv_fanout;
1326 match->prot_hook.af_packet_priv = match;
1327 match->prot_hook.id_match = match_fanout_group;
1328 dev_add_pack(&match->prot_hook);
1329 list_add(&match->list, &fanout_list);
1330 }
1331 err = -EINVAL;
1332 if (match->type == type &&
1333 match->prot_hook.type == po->prot_hook.type &&
1334 match->prot_hook.dev == po->prot_hook.dev) {
1335 err = -ENOSPC;
1336 if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1337 __dev_remove_pack(&po->prot_hook);
1338 po->fanout = match;
1339 atomic_inc(&match->sk_ref);
1340 __fanout_link(sk, po);
1341 err = 0;
1342 }
1343 }
1344out:
1345 mutex_unlock(&fanout_mutex);
1346 return err;
1347}
1348
1349static void fanout_release(struct sock *sk)
1350{
1351 struct packet_sock *po = pkt_sk(sk);
1352 struct packet_fanout *f;
1353
1354 f = po->fanout;
1355 if (!f)
1356 return;
1357
1358 mutex_lock(&fanout_mutex);
1359 po->fanout = NULL;
1360
1361 if (atomic_dec_and_test(&f->sk_ref)) {
1362 list_del(&f->list);
1363 dev_remove_pack(&f->prot_hook);
1364 kfree(f);
1365 }
1366 mutex_unlock(&fanout_mutex);
1367}
1368
1369static const struct proto_ops packet_ops;
1370
1371static const struct proto_ops packet_ops_spkt;
1372
1373static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1374 struct packet_type *pt, struct net_device *orig_dev)
1375{
1376 struct sock *sk;
1377 struct sockaddr_pkt *spkt;
1378
1379
1380
1381
1382
1383
1384 sk = pt->af_packet_priv;
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397 if (skb->pkt_type == PACKET_LOOPBACK)
1398 goto out;
1399
1400 if (!net_eq(dev_net(dev), sock_net(sk)))
1401 goto out;
1402
1403 skb = skb_share_check(skb, GFP_ATOMIC);
1404 if (skb == NULL)
1405 goto oom;
1406
1407
1408 skb_dst_drop(skb);
1409
1410
1411 nf_reset(skb);
1412
1413 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1414
1415 skb_push(skb, skb->data - skb_mac_header(skb));
1416
1417
1418
1419
1420
1421 spkt->spkt_family = dev->type;
1422 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1423 spkt->spkt_protocol = skb->protocol;
1424
1425
1426
1427
1428
1429
1430 if (sock_queue_rcv_skb(sk, skb) == 0)
1431 return 0;
1432
1433out:
1434 kfree_skb(skb);
1435oom:
1436 return 0;
1437}
1438
1439
1440
1441
1442
1443
1444
1445static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
1446 struct msghdr *msg, size_t len)
1447{
1448 struct sock *sk = sock->sk;
1449 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1450 struct sk_buff *skb = NULL;
1451 struct net_device *dev;
1452 __be16 proto = 0;
1453 int err;
1454 int extra_len = 0;
1455
1456
1457
1458
1459
1460 if (saddr) {
1461 if (msg->msg_namelen < sizeof(struct sockaddr))
1462 return -EINVAL;
1463 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1464 proto = saddr->spkt_protocol;
1465 } else
1466 return -ENOTCONN;
1467
1468
1469
1470
1471
1472 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1473retry:
1474 rcu_read_lock();
1475 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1476 err = -ENODEV;
1477 if (dev == NULL)
1478 goto out_unlock;
1479
1480 err = -ENETDOWN;
1481 if (!(dev->flags & IFF_UP))
1482 goto out_unlock;
1483
1484
1485
1486
1487
1488
1489 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1490 if (!netif_supports_nofcs(dev)) {
1491 err = -EPROTONOSUPPORT;
1492 goto out_unlock;
1493 }
1494 extra_len = 4;
1495 }
1496
1497 err = -EMSGSIZE;
1498 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1499 goto out_unlock;
1500
1501 if (!skb) {
1502 size_t reserved = LL_RESERVED_SPACE(dev);
1503 int tlen = dev->needed_tailroom;
1504 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1505
1506 rcu_read_unlock();
1507 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1508 if (skb == NULL)
1509 return -ENOBUFS;
1510
1511
1512
1513
1514 skb_reserve(skb, reserved);
1515 skb_reset_network_header(skb);
1516
1517
1518 if (hhlen) {
1519 skb->data -= hhlen;
1520 skb->tail -= hhlen;
1521 if (len < hhlen)
1522 skb_reset_network_header(skb);
1523 }
1524 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1525 if (err)
1526 goto out_free;
1527 goto retry;
1528 }
1529
1530 if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
1531
1532
1533
1534
1535 struct ethhdr *ehdr;
1536 skb_reset_mac_header(skb);
1537 ehdr = eth_hdr(skb);
1538 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1539 err = -EMSGSIZE;
1540 goto out_unlock;
1541 }
1542 }
1543
1544 skb->protocol = proto;
1545 skb->dev = dev;
1546 skb->priority = sk->sk_priority;
1547 skb->mark = sk->sk_mark;
1548
1549 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1550
1551 if (unlikely(extra_len == 4))
1552 skb->no_fcs = 1;
1553
1554 skb_probe_transport_header(skb, 0);
1555
1556 dev_queue_xmit(skb);
1557 rcu_read_unlock();
1558 return len;
1559
1560out_unlock:
1561 rcu_read_unlock();
1562out_free:
1563 kfree_skb(skb);
1564 return err;
1565}
1566
1567static unsigned int run_filter(const struct sk_buff *skb,
1568 const struct sock *sk,
1569 unsigned int res)
1570{
1571 struct sk_filter *filter;
1572
1573 rcu_read_lock();
1574 filter = rcu_dereference(sk->sk_filter);
1575 if (filter != NULL)
1576 res = SK_RUN_FILTER(filter, skb);
1577 rcu_read_unlock();
1578
1579 return res;
1580}
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1595 struct packet_type *pt, struct net_device *orig_dev)
1596{
1597 struct sock *sk;
1598 struct sockaddr_ll *sll;
1599 struct packet_sock *po;
1600 u8 *skb_head = skb->data;
1601 int skb_len = skb->len;
1602 unsigned int snaplen, res;
1603
1604 if (skb->pkt_type == PACKET_LOOPBACK)
1605 goto drop;
1606
1607 sk = pt->af_packet_priv;
1608 po = pkt_sk(sk);
1609
1610 if (!net_eq(dev_net(dev), sock_net(sk)))
1611 goto drop;
1612
1613 skb->dev = dev;
1614
1615 if (dev->header_ops) {
1616
1617
1618
1619
1620
1621
1622
1623 if (sk->sk_type != SOCK_DGRAM)
1624 skb_push(skb, skb->data - skb_mac_header(skb));
1625 else if (skb->pkt_type == PACKET_OUTGOING) {
1626
1627 skb_pull(skb, skb_network_offset(skb));
1628 }
1629 }
1630
1631 snaplen = skb->len;
1632
1633 res = run_filter(skb, sk, snaplen);
1634 if (!res)
1635 goto drop_n_restore;
1636 if (snaplen > res)
1637 snaplen = res;
1638
1639 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
1640 goto drop_n_acct;
1641
1642 if (skb_shared(skb)) {
1643 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
1644 if (nskb == NULL)
1645 goto drop_n_acct;
1646
1647 if (skb_head != skb->data) {
1648 skb->data = skb_head;
1649 skb->len = skb_len;
1650 }
1651 consume_skb(skb);
1652 skb = nskb;
1653 }
1654
1655 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
1656 sizeof(skb->cb));
1657
1658 sll = &PACKET_SKB_CB(skb)->sa.ll;
1659 sll->sll_family = AF_PACKET;
1660 sll->sll_hatype = dev->type;
1661 sll->sll_protocol = skb->protocol;
1662 sll->sll_pkttype = skb->pkt_type;
1663 if (unlikely(po->origdev))
1664 sll->sll_ifindex = orig_dev->ifindex;
1665 else
1666 sll->sll_ifindex = dev->ifindex;
1667
1668 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1669
1670 PACKET_SKB_CB(skb)->origlen = skb->len;
1671
1672 if (pskb_trim(skb, snaplen))
1673 goto drop_n_acct;
1674
1675 skb_set_owner_r(skb, sk);
1676 skb->dev = NULL;
1677 skb_dst_drop(skb);
1678
1679
1680 nf_reset(skb);
1681
1682 spin_lock(&sk->sk_receive_queue.lock);
1683 po->stats.stats1.tp_packets++;
1684 skb->dropcount = atomic_read(&sk->sk_drops);
1685 __skb_queue_tail(&sk->sk_receive_queue, skb);
1686 spin_unlock(&sk->sk_receive_queue.lock);
1687 sk->sk_data_ready(sk, skb->len);
1688 return 0;
1689
1690drop_n_acct:
1691 spin_lock(&sk->sk_receive_queue.lock);
1692 po->stats.stats1.tp_drops++;
1693 atomic_inc(&sk->sk_drops);
1694 spin_unlock(&sk->sk_receive_queue.lock);
1695
1696drop_n_restore:
1697 if (skb_head != skb->data && skb_shared(skb)) {
1698 skb->data = skb_head;
1699 skb->len = skb_len;
1700 }
1701drop:
1702 consume_skb(skb);
1703 return 0;
1704}
1705
1706static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1707 struct packet_type *pt, struct net_device *orig_dev)
1708{
1709 struct sock *sk;
1710 struct packet_sock *po;
1711 struct sockaddr_ll *sll;
1712 union tpacket_uhdr h;
1713 u8 *skb_head = skb->data;
1714 int skb_len = skb->len;
1715 unsigned int snaplen, res;
1716 unsigned long status = TP_STATUS_USER;
1717 unsigned short macoff, netoff, hdrlen;
1718 struct sk_buff *copy_skb = NULL;
1719 struct timespec ts;
1720 __u32 ts_status;
1721
1722 if (skb->pkt_type == PACKET_LOOPBACK)
1723 goto drop;
1724
1725 sk = pt->af_packet_priv;
1726 po = pkt_sk(sk);
1727
1728 if (!net_eq(dev_net(dev), sock_net(sk)))
1729 goto drop;
1730
1731 if (dev->header_ops) {
1732 if (sk->sk_type != SOCK_DGRAM)
1733 skb_push(skb, skb->data - skb_mac_header(skb));
1734 else if (skb->pkt_type == PACKET_OUTGOING) {
1735
1736 skb_pull(skb, skb_network_offset(skb));
1737 }
1738 }
1739
1740 if (skb->ip_summed == CHECKSUM_PARTIAL)
1741 status |= TP_STATUS_CSUMNOTREADY;
1742
1743 snaplen = skb->len;
1744
1745 res = run_filter(skb, sk, snaplen);
1746 if (!res)
1747 goto drop_n_restore;
1748 if (snaplen > res)
1749 snaplen = res;
1750
1751 if (sk->sk_type == SOCK_DGRAM) {
1752 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
1753 po->tp_reserve;
1754 } else {
1755 unsigned int maclen = skb_network_offset(skb);
1756 netoff = TPACKET_ALIGN(po->tp_hdrlen +
1757 (maclen < 16 ? 16 : maclen)) +
1758 po->tp_reserve;
1759 macoff = netoff - maclen;
1760 }
1761 if (po->tp_version <= TPACKET_V2) {
1762 if (macoff + snaplen > po->rx_ring.frame_size) {
1763 if (po->copy_thresh &&
1764 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1765 if (skb_shared(skb)) {
1766 copy_skb = skb_clone(skb, GFP_ATOMIC);
1767 } else {
1768 copy_skb = skb_get(skb);
1769 skb_head = skb->data;
1770 }
1771 if (copy_skb)
1772 skb_set_owner_r(copy_skb, sk);
1773 }
1774 snaplen = po->rx_ring.frame_size - macoff;
1775 if ((int)snaplen < 0)
1776 snaplen = 0;
1777 }
1778 }
1779 spin_lock(&sk->sk_receive_queue.lock);
1780 h.raw = packet_current_rx_frame(po, skb,
1781 TP_STATUS_KERNEL, (macoff+snaplen));
1782 if (!h.raw)
1783 goto ring_is_full;
1784 if (po->tp_version <= TPACKET_V2) {
1785 packet_increment_rx_head(po, &po->rx_ring);
1786
1787
1788
1789
1790
1791
1792 if (po->stats.stats1.tp_drops)
1793 status |= TP_STATUS_LOSING;
1794 }
1795 po->stats.stats1.tp_packets++;
1796 if (copy_skb) {
1797 status |= TP_STATUS_COPY;
1798 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
1799 }
1800 spin_unlock(&sk->sk_receive_queue.lock);
1801
1802 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1803
1804 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
1805 getnstimeofday(&ts);
1806
1807 status |= ts_status;
1808
1809 switch (po->tp_version) {
1810 case TPACKET_V1:
1811 h.h1->tp_len = skb->len;
1812 h.h1->tp_snaplen = snaplen;
1813 h.h1->tp_mac = macoff;
1814 h.h1->tp_net = netoff;
1815 h.h1->tp_sec = ts.tv_sec;
1816 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
1817 hdrlen = sizeof(*h.h1);
1818 break;
1819 case TPACKET_V2:
1820 h.h2->tp_len = skb->len;
1821 h.h2->tp_snaplen = snaplen;
1822 h.h2->tp_mac = macoff;
1823 h.h2->tp_net = netoff;
1824 h.h2->tp_sec = ts.tv_sec;
1825 h.h2->tp_nsec = ts.tv_nsec;
1826 if (vlan_tx_tag_present(skb)) {
1827 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
1828 status |= TP_STATUS_VLAN_VALID;
1829 } else {
1830 h.h2->tp_vlan_tci = 0;
1831 }
1832 h.h2->tp_padding = 0;
1833 hdrlen = sizeof(*h.h2);
1834 break;
1835 case TPACKET_V3:
1836
1837
1838
1839 h.h3->tp_status |= status;
1840 h.h3->tp_len = skb->len;
1841 h.h3->tp_snaplen = snaplen;
1842 h.h3->tp_mac = macoff;
1843 h.h3->tp_net = netoff;
1844 h.h3->tp_sec = ts.tv_sec;
1845 h.h3->tp_nsec = ts.tv_nsec;
1846 hdrlen = sizeof(*h.h3);
1847 break;
1848 default:
1849 BUG();
1850 }
1851
1852 sll = h.raw + TPACKET_ALIGN(hdrlen);
1853 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1854 sll->sll_family = AF_PACKET;
1855 sll->sll_hatype = dev->type;
1856 sll->sll_protocol = skb->protocol;
1857 sll->sll_pkttype = skb->pkt_type;
1858 if (unlikely(po->origdev))
1859 sll->sll_ifindex = orig_dev->ifindex;
1860 else
1861 sll->sll_ifindex = dev->ifindex;
1862
1863 smp_mb();
1864#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1865 {
1866 u8 *start, *end;
1867
1868 if (po->tp_version <= TPACKET_V2) {
1869 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1870 + macoff + snaplen);
1871 for (start = h.raw; start < end; start += PAGE_SIZE)
1872 flush_dcache_page(pgv_to_page(start));
1873 }
1874 smp_wmb();
1875 }
1876#endif
1877 if (po->tp_version <= TPACKET_V2)
1878 __packet_set_status(po, h.raw, status);
1879 else
1880 prb_clear_blk_fill_status(&po->rx_ring);
1881
1882 sk->sk_data_ready(sk, 0);
1883
1884drop_n_restore:
1885 if (skb_head != skb->data && skb_shared(skb)) {
1886 skb->data = skb_head;
1887 skb->len = skb_len;
1888 }
1889drop:
1890 kfree_skb(skb);
1891 return 0;
1892
1893ring_is_full:
1894 po->stats.stats1.tp_drops++;
1895 spin_unlock(&sk->sk_receive_queue.lock);
1896
1897 sk->sk_data_ready(sk, 0);
1898 kfree_skb(copy_skb);
1899 goto drop_n_restore;
1900}
1901
1902static void tpacket_destruct_skb(struct sk_buff *skb)
1903{
1904 struct packet_sock *po = pkt_sk(skb->sk);
1905 void *ph;
1906
1907 if (likely(po->tx_ring.pg_vec)) {
1908 __u32 ts;
1909
1910 ph = skb_shinfo(skb)->destructor_arg;
1911 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
1912 atomic_dec(&po->tx_ring.pending);
1913
1914 ts = __packet_set_timestamp(po, ph, skb);
1915 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
1916 }
1917
1918 sock_wfree(skb);
1919}
1920
1921static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
1922 void *frame, struct net_device *dev, int size_max,
1923 __be16 proto, unsigned char *addr, int hlen)
1924{
1925 union tpacket_uhdr ph;
1926 int to_write, offset, len, tp_len, nr_frags, len_max;
1927 struct socket *sock = po->sk.sk_socket;
1928 struct page *page;
1929 void *data;
1930 int err;
1931
1932 ph.raw = frame;
1933
1934 skb->protocol = proto;
1935 skb->dev = dev;
1936 skb->priority = po->sk.sk_priority;
1937 skb->mark = po->sk.sk_mark;
1938 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
1939 skb_shinfo(skb)->destructor_arg = ph.raw;
1940
1941 switch (po->tp_version) {
1942 case TPACKET_V2:
1943 tp_len = ph.h2->tp_len;
1944 break;
1945 default:
1946 tp_len = ph.h1->tp_len;
1947 break;
1948 }
1949 if (unlikely(tp_len > size_max)) {
1950 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
1951 return -EMSGSIZE;
1952 }
1953
1954 skb_reserve(skb, hlen);
1955 skb_reset_network_header(skb);
1956 skb_probe_transport_header(skb, 0);
1957
1958 if (po->tp_tx_has_off) {
1959 int off_min, off_max, off;
1960 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
1961 off_max = po->tx_ring.frame_size - tp_len;
1962 if (sock->type == SOCK_DGRAM) {
1963 switch (po->tp_version) {
1964 case TPACKET_V2:
1965 off = ph.h2->tp_net;
1966 break;
1967 default:
1968 off = ph.h1->tp_net;
1969 break;
1970 }
1971 } else {
1972 switch (po->tp_version) {
1973 case TPACKET_V2:
1974 off = ph.h2->tp_mac;
1975 break;
1976 default:
1977 off = ph.h1->tp_mac;
1978 break;
1979 }
1980 }
1981 if (unlikely((off < off_min) || (off_max < off)))
1982 return -EINVAL;
1983 data = ph.raw + off;
1984 } else {
1985 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
1986 }
1987 to_write = tp_len;
1988
1989 if (sock->type == SOCK_DGRAM) {
1990 err = dev_hard_header(skb, dev, ntohs(proto), addr,
1991 NULL, tp_len);
1992 if (unlikely(err < 0))
1993 return -EINVAL;
1994 } else if (dev->hard_header_len) {
1995
1996 if (unlikely(tp_len <= dev->hard_header_len)) {
1997 pr_err("packet size is too short (%d < %d)\n",
1998 tp_len, dev->hard_header_len);
1999 return -EINVAL;
2000 }
2001
2002 skb_push(skb, dev->hard_header_len);
2003 err = skb_store_bits(skb, 0, data,
2004 dev->hard_header_len);
2005 if (unlikely(err))
2006 return err;
2007
2008 data += dev->hard_header_len;
2009 to_write -= dev->hard_header_len;
2010 }
2011
2012 offset = offset_in_page(data);
2013 len_max = PAGE_SIZE - offset;
2014 len = ((to_write > len_max) ? len_max : to_write);
2015
2016 skb->data_len = to_write;
2017 skb->len += to_write;
2018 skb->truesize += to_write;
2019 atomic_add(to_write, &po->sk.sk_wmem_alloc);
2020
2021 while (likely(to_write)) {
2022 nr_frags = skb_shinfo(skb)->nr_frags;
2023
2024 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2025 pr_err("Packet exceed the number of skb frags(%lu)\n",
2026 MAX_SKB_FRAGS);
2027 return -EFAULT;
2028 }
2029
2030 page = pgv_to_page(data);
2031 data += len;
2032 flush_dcache_page(page);
2033 get_page(page);
2034 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2035 to_write -= len;
2036 offset = 0;
2037 len_max = PAGE_SIZE;
2038 len = ((to_write > len_max) ? len_max : to_write);
2039 }
2040
2041 return tp_len;
2042}
2043
2044static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2045{
2046 struct sk_buff *skb;
2047 struct net_device *dev;
2048 __be16 proto;
2049 bool need_rls_dev = false;
2050 int err, reserve = 0;
2051 void *ph;
2052 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2053 int tp_len, size_max;
2054 unsigned char *addr;
2055 int len_sum = 0;
2056 int status = TP_STATUS_AVAILABLE;
2057 int hlen, tlen;
2058
2059 mutex_lock(&po->pg_vec_lock);
2060
2061 if (saddr == NULL) {
2062 dev = po->prot_hook.dev;
2063 proto = po->num;
2064 addr = NULL;
2065 } else {
2066 err = -EINVAL;
2067 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2068 goto out;
2069 if (msg->msg_namelen < (saddr->sll_halen
2070 + offsetof(struct sockaddr_ll,
2071 sll_addr)))
2072 goto out;
2073 proto = saddr->sll_protocol;
2074 addr = saddr->sll_addr;
2075 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2076 need_rls_dev = true;
2077 }
2078
2079 err = -ENXIO;
2080 if (unlikely(dev == NULL))
2081 goto out;
2082
2083 reserve = dev->hard_header_len;
2084
2085 err = -ENETDOWN;
2086 if (unlikely(!(dev->flags & IFF_UP)))
2087 goto out_put;
2088
2089 size_max = po->tx_ring.frame_size
2090 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2091
2092 if (size_max > dev->mtu + reserve)
2093 size_max = dev->mtu + reserve;
2094
2095 do {
2096 ph = packet_current_frame(po, &po->tx_ring,
2097 TP_STATUS_SEND_REQUEST);
2098
2099 if (unlikely(ph == NULL)) {
2100 schedule();
2101 continue;
2102 }
2103
2104 status = TP_STATUS_SEND_REQUEST;
2105 hlen = LL_RESERVED_SPACE(dev);
2106 tlen = dev->needed_tailroom;
2107 skb = sock_alloc_send_skb(&po->sk,
2108 hlen + tlen + sizeof(struct sockaddr_ll),
2109 0, &err);
2110
2111 if (unlikely(skb == NULL))
2112 goto out_status;
2113
2114 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2115 addr, hlen);
2116
2117 if (unlikely(tp_len < 0)) {
2118 if (po->tp_loss) {
2119 __packet_set_status(po, ph,
2120 TP_STATUS_AVAILABLE);
2121 packet_increment_head(&po->tx_ring);
2122 kfree_skb(skb);
2123 continue;
2124 } else {
2125 status = TP_STATUS_WRONG_FORMAT;
2126 err = tp_len;
2127 goto out_status;
2128 }
2129 }
2130
2131 skb->destructor = tpacket_destruct_skb;
2132 __packet_set_status(po, ph, TP_STATUS_SENDING);
2133 atomic_inc(&po->tx_ring.pending);
2134
2135 status = TP_STATUS_SEND_REQUEST;
2136 err = dev_queue_xmit(skb);
2137 if (unlikely(err > 0)) {
2138 err = net_xmit_errno(err);
2139 if (err && __packet_get_status(po, ph) ==
2140 TP_STATUS_AVAILABLE) {
2141
2142 skb = NULL;
2143 goto out_status;
2144 }
2145
2146
2147
2148
2149 err = 0;
2150 }
2151 packet_increment_head(&po->tx_ring);
2152 len_sum += tp_len;
2153 } while (likely((ph != NULL) ||
2154 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
2155 (atomic_read(&po->tx_ring.pending))))
2156 );
2157
2158 err = len_sum;
2159 goto out_put;
2160
2161out_status:
2162 __packet_set_status(po, ph, status);
2163 kfree_skb(skb);
2164out_put:
2165 if (need_rls_dev)
2166 dev_put(dev);
2167out:
2168 mutex_unlock(&po->pg_vec_lock);
2169 return err;
2170}
2171
2172static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2173 size_t reserve, size_t len,
2174 size_t linear, int noblock,
2175 int *err)
2176{
2177 struct sk_buff *skb;
2178
2179
2180 if (prepad + len < PAGE_SIZE || !linear)
2181 linear = len;
2182
2183 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2184 err);
2185 if (!skb)
2186 return NULL;
2187
2188 skb_reserve(skb, reserve);
2189 skb_put(skb, linear);
2190 skb->data_len = len - linear;
2191 skb->len += len - linear;
2192
2193 return skb;
2194}
2195
2196static int packet_snd(struct socket *sock,
2197 struct msghdr *msg, size_t len)
2198{
2199 struct sock *sk = sock->sk;
2200 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2201 struct sk_buff *skb;
2202 struct net_device *dev;
2203 __be16 proto;
2204 bool need_rls_dev = false;
2205 unsigned char *addr;
2206 int err, reserve = 0;
2207 struct virtio_net_hdr vnet_hdr = { 0 };
2208 int offset = 0;
2209 int vnet_hdr_len;
2210 struct packet_sock *po = pkt_sk(sk);
2211 unsigned short gso_type = 0;
2212 int hlen, tlen;
2213 int extra_len = 0;
2214
2215
2216
2217
2218
2219 if (saddr == NULL) {
2220 dev = po->prot_hook.dev;
2221 proto = po->num;
2222 addr = NULL;
2223 } else {
2224 err = -EINVAL;
2225 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2226 goto out;
2227 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2228 goto out;
2229 proto = saddr->sll_protocol;
2230 addr = saddr->sll_addr;
2231 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2232 need_rls_dev = true;
2233 }
2234
2235 err = -ENXIO;
2236 if (dev == NULL)
2237 goto out_unlock;
2238 if (sock->type == SOCK_RAW)
2239 reserve = dev->hard_header_len;
2240
2241 err = -ENETDOWN;
2242 if (!(dev->flags & IFF_UP))
2243 goto out_unlock;
2244
2245 if (po->has_vnet_hdr) {
2246 vnet_hdr_len = sizeof(vnet_hdr);
2247
2248 err = -EINVAL;
2249 if (len < vnet_hdr_len)
2250 goto out_unlock;
2251
2252 len -= vnet_hdr_len;
2253
2254 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
2255 vnet_hdr_len);
2256 if (err < 0)
2257 goto out_unlock;
2258
2259 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2260 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
2261 vnet_hdr.hdr_len))
2262 vnet_hdr.hdr_len = vnet_hdr.csum_start +
2263 vnet_hdr.csum_offset + 2;
2264
2265 err = -EINVAL;
2266 if (vnet_hdr.hdr_len > len)
2267 goto out_unlock;
2268
2269 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2270 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2271 case VIRTIO_NET_HDR_GSO_TCPV4:
2272 gso_type = SKB_GSO_TCPV4;
2273 break;
2274 case VIRTIO_NET_HDR_GSO_TCPV6:
2275 gso_type = SKB_GSO_TCPV6;
2276 break;
2277 case VIRTIO_NET_HDR_GSO_UDP:
2278 gso_type = SKB_GSO_UDP;
2279 break;
2280 default:
2281 goto out_unlock;
2282 }
2283
2284 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2285 gso_type |= SKB_GSO_TCP_ECN;
2286
2287 if (vnet_hdr.gso_size == 0)
2288 goto out_unlock;
2289
2290 }
2291 }
2292
2293 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2294 if (!netif_supports_nofcs(dev)) {
2295 err = -EPROTONOSUPPORT;
2296 goto out_unlock;
2297 }
2298 extra_len = 4;
2299 }
2300
2301 err = -EMSGSIZE;
2302 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2303 goto out_unlock;
2304
2305 err = -ENOBUFS;
2306 hlen = LL_RESERVED_SPACE(dev);
2307 tlen = dev->needed_tailroom;
2308 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, vnet_hdr.hdr_len,
2309 msg->msg_flags & MSG_DONTWAIT, &err);
2310 if (skb == NULL)
2311 goto out_unlock;
2312
2313 skb_set_network_header(skb, reserve);
2314
2315 err = -EINVAL;
2316 if (sock->type == SOCK_DGRAM &&
2317 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
2318 goto out_free;
2319
2320
2321 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2322 if (err)
2323 goto out_free;
2324
2325 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2326
2327 if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
2328
2329
2330
2331
2332 struct ethhdr *ehdr;
2333 skb_reset_mac_header(skb);
2334 ehdr = eth_hdr(skb);
2335 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2336 err = -EMSGSIZE;
2337 goto out_free;
2338 }
2339 }
2340
2341 skb->protocol = proto;
2342 skb->dev = dev;
2343 skb->priority = sk->sk_priority;
2344 skb->mark = sk->sk_mark;
2345
2346 if (po->has_vnet_hdr) {
2347 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2348 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
2349 vnet_hdr.csum_offset)) {
2350 err = -EINVAL;
2351 goto out_free;
2352 }
2353 }
2354
2355 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
2356 skb_shinfo(skb)->gso_type = gso_type;
2357
2358
2359 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2360 skb_shinfo(skb)->gso_segs = 0;
2361
2362 len += vnet_hdr_len;
2363 }
2364
2365 skb_probe_transport_header(skb, reserve);
2366
2367 if (unlikely(extra_len == 4))
2368 skb->no_fcs = 1;
2369
2370
2371
2372
2373
2374 err = dev_queue_xmit(skb);
2375 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2376 goto out_unlock;
2377
2378 if (need_rls_dev)
2379 dev_put(dev);
2380
2381 return len;
2382
2383out_free:
2384 kfree_skb(skb);
2385out_unlock:
2386 if (dev && need_rls_dev)
2387 dev_put(dev);
2388out:
2389 return err;
2390}
2391
2392static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
2393 struct msghdr *msg, size_t len)
2394{
2395 struct sock *sk = sock->sk;
2396 struct packet_sock *po = pkt_sk(sk);
2397 if (po->tx_ring.pg_vec)
2398 return tpacket_snd(po, msg);
2399 else
2400 return packet_snd(sock, msg, len);
2401}
2402
2403
2404
2405
2406
2407
2408static int packet_release(struct socket *sock)
2409{
2410 struct sock *sk = sock->sk;
2411 struct packet_sock *po;
2412 struct net *net;
2413 union tpacket_req_u req_u;
2414
2415 if (!sk)
2416 return 0;
2417
2418 net = sock_net(sk);
2419 po = pkt_sk(sk);
2420
2421 mutex_lock(&net->packet.sklist_lock);
2422 sk_del_node_init_rcu(sk);
2423 mutex_unlock(&net->packet.sklist_lock);
2424
2425 preempt_disable();
2426 sock_prot_inuse_add(net, sk->sk_prot, -1);
2427 preempt_enable();
2428
2429 spin_lock(&po->bind_lock);
2430 unregister_prot_hook(sk, false);
2431 if (po->prot_hook.dev) {
2432 dev_put(po->prot_hook.dev);
2433 po->prot_hook.dev = NULL;
2434 }
2435 spin_unlock(&po->bind_lock);
2436
2437 packet_flush_mclist(sk);
2438
2439 if (po->rx_ring.pg_vec) {
2440 memset(&req_u, 0, sizeof(req_u));
2441 packet_set_ring(sk, &req_u, 1, 0);
2442 }
2443
2444 if (po->tx_ring.pg_vec) {
2445 memset(&req_u, 0, sizeof(req_u));
2446 packet_set_ring(sk, &req_u, 1, 1);
2447 }
2448
2449 fanout_release(sk);
2450
2451 synchronize_net();
2452
2453
2454
2455 sock_orphan(sk);
2456 sock->sk = NULL;
2457
2458
2459
2460 skb_queue_purge(&sk->sk_receive_queue);
2461 sk_refcnt_debug_release(sk);
2462
2463 sock_put(sk);
2464 return 0;
2465}
2466
2467
2468
2469
2470
2471static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
2472{
2473 struct packet_sock *po = pkt_sk(sk);
2474
2475 if (po->fanout) {
2476 if (dev)
2477 dev_put(dev);
2478
2479 return -EINVAL;
2480 }
2481
2482 lock_sock(sk);
2483
2484 spin_lock(&po->bind_lock);
2485 unregister_prot_hook(sk, true);
2486 po->num = protocol;
2487 po->prot_hook.type = protocol;
2488 if (po->prot_hook.dev)
2489 dev_put(po->prot_hook.dev);
2490 po->prot_hook.dev = dev;
2491
2492 po->ifindex = dev ? dev->ifindex : 0;
2493
2494 if (protocol == 0)
2495 goto out_unlock;
2496
2497 if (!dev || (dev->flags & IFF_UP)) {
2498 register_prot_hook(sk);
2499 } else {
2500 sk->sk_err = ENETDOWN;
2501 if (!sock_flag(sk, SOCK_DEAD))
2502 sk->sk_error_report(sk);
2503 }
2504
2505out_unlock:
2506 spin_unlock(&po->bind_lock);
2507 release_sock(sk);
2508 return 0;
2509}
2510
2511
2512
2513
2514
2515static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2516 int addr_len)
2517{
2518 struct sock *sk = sock->sk;
2519 char name[15];
2520 struct net_device *dev;
2521 int err = -ENODEV;
2522
2523
2524
2525
2526
2527 if (addr_len != sizeof(struct sockaddr))
2528 return -EINVAL;
2529 strlcpy(name, uaddr->sa_data, sizeof(name));
2530
2531 dev = dev_get_by_name(sock_net(sk), name);
2532 if (dev)
2533 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
2534 return err;
2535}
2536
2537static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2538{
2539 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2540 struct sock *sk = sock->sk;
2541 struct net_device *dev = NULL;
2542 int err;
2543
2544
2545
2546
2547
2548
2549 if (addr_len < sizeof(struct sockaddr_ll))
2550 return -EINVAL;
2551 if (sll->sll_family != AF_PACKET)
2552 return -EINVAL;
2553
2554 if (sll->sll_ifindex) {
2555 err = -ENODEV;
2556 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
2557 if (dev == NULL)
2558 goto out;
2559 }
2560 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
2561
2562out:
2563 return err;
2564}
2565
2566static struct proto packet_proto = {
2567 .name = "PACKET",
2568 .owner = THIS_MODULE,
2569 .obj_size = sizeof(struct packet_sock),
2570};
2571
2572
2573
2574
2575
2576static int packet_create(struct net *net, struct socket *sock, int protocol,
2577 int kern)
2578{
2579 struct sock *sk;
2580 struct packet_sock *po;
2581 __be16 proto = (__force __be16)protocol;
2582 int err;
2583
2584 if (!ns_capable(net->user_ns, CAP_NET_RAW))
2585 return -EPERM;
2586 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
2587 sock->type != SOCK_PACKET)
2588 return -ESOCKTNOSUPPORT;
2589
2590 sock->state = SS_UNCONNECTED;
2591
2592 err = -ENOBUFS;
2593 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
2594 if (sk == NULL)
2595 goto out;
2596
2597 sock->ops = &packet_ops;
2598 if (sock->type == SOCK_PACKET)
2599 sock->ops = &packet_ops_spkt;
2600
2601 sock_init_data(sock, sk);
2602
2603 po = pkt_sk(sk);
2604 sk->sk_family = PF_PACKET;
2605 po->num = proto;
2606
2607 sk->sk_destruct = packet_sock_destruct;
2608 sk_refcnt_debug_inc(sk);
2609
2610
2611
2612
2613
2614 spin_lock_init(&po->bind_lock);
2615 mutex_init(&po->pg_vec_lock);
2616 po->prot_hook.func = packet_rcv;
2617
2618 if (sock->type == SOCK_PACKET)
2619 po->prot_hook.func = packet_rcv_spkt;
2620
2621 po->prot_hook.af_packet_priv = sk;
2622
2623 if (proto) {
2624 po->prot_hook.type = proto;
2625 register_prot_hook(sk);
2626 }
2627
2628 mutex_lock(&net->packet.sklist_lock);
2629 sk_add_node_rcu(sk, &net->packet.sklist);
2630 mutex_unlock(&net->packet.sklist_lock);
2631
2632 preempt_disable();
2633 sock_prot_inuse_add(net, &packet_proto, 1);
2634 preempt_enable();
2635
2636 return 0;
2637out:
2638 return err;
2639}
2640
2641static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2642{
2643 struct sock_exterr_skb *serr;
2644 struct sk_buff *skb, *skb2;
2645 int copied, err;
2646
2647 err = -EAGAIN;
2648 skb = skb_dequeue(&sk->sk_error_queue);
2649 if (skb == NULL)
2650 goto out;
2651
2652 copied = skb->len;
2653 if (copied > len) {
2654 msg->msg_flags |= MSG_TRUNC;
2655 copied = len;
2656 }
2657 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2658 if (err)
2659 goto out_free_skb;
2660
2661 sock_recv_timestamp(msg, sk, skb);
2662
2663 serr = SKB_EXT_ERR(skb);
2664 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2665 sizeof(serr->ee), &serr->ee);
2666
2667 msg->msg_flags |= MSG_ERRQUEUE;
2668 err = copied;
2669
2670
2671 spin_lock_bh(&sk->sk_error_queue.lock);
2672 sk->sk_err = 0;
2673 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2674 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2675 spin_unlock_bh(&sk->sk_error_queue.lock);
2676 sk->sk_error_report(sk);
2677 } else
2678 spin_unlock_bh(&sk->sk_error_queue.lock);
2679
2680out_free_skb:
2681 kfree_skb(skb);
2682out:
2683 return err;
2684}
2685
2686
2687
2688
2689
2690
2691static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2692 struct msghdr *msg, size_t len, int flags)
2693{
2694 struct sock *sk = sock->sk;
2695 struct sk_buff *skb;
2696 int copied, err;
2697 struct sockaddr_ll *sll;
2698 int vnet_hdr_len = 0;
2699
2700 err = -EINVAL;
2701 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
2702 goto out;
2703
2704#if 0
2705
2706 if (pkt_sk(sk)->ifindex < 0)
2707 return -ENODEV;
2708#endif
2709
2710 if (flags & MSG_ERRQUEUE) {
2711 err = packet_recv_error(sk, msg, len);
2712 goto out;
2713 }
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
2725
2726
2727
2728
2729
2730
2731
2732 if (skb == NULL)
2733 goto out;
2734
2735 if (pkt_sk(sk)->has_vnet_hdr) {
2736 struct virtio_net_hdr vnet_hdr = { 0 };
2737
2738 err = -EINVAL;
2739 vnet_hdr_len = sizeof(vnet_hdr);
2740 if (len < vnet_hdr_len)
2741 goto out_free;
2742
2743 len -= vnet_hdr_len;
2744
2745 if (skb_is_gso(skb)) {
2746 struct skb_shared_info *sinfo = skb_shinfo(skb);
2747
2748
2749 vnet_hdr.hdr_len = skb_headlen(skb);
2750 vnet_hdr.gso_size = sinfo->gso_size;
2751 if (sinfo->gso_type & SKB_GSO_TCPV4)
2752 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2753 else if (sinfo->gso_type & SKB_GSO_TCPV6)
2754 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2755 else if (sinfo->gso_type & SKB_GSO_UDP)
2756 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
2757 else if (sinfo->gso_type & SKB_GSO_FCOE)
2758 goto out_free;
2759 else
2760 BUG();
2761 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
2762 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2763 } else
2764 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
2765
2766 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2767 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
2768 vnet_hdr.csum_start = skb_checksum_start_offset(skb);
2769 vnet_hdr.csum_offset = skb->csum_offset;
2770 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2771 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
2772 }
2773
2774 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
2775 vnet_hdr_len);
2776 if (err < 0)
2777 goto out_free;
2778 }
2779
2780
2781
2782
2783
2784
2785 sll = &PACKET_SKB_CB(skb)->sa.ll;
2786 if (sock->type == SOCK_PACKET)
2787 msg->msg_namelen = sizeof(struct sockaddr_pkt);
2788 else
2789 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
2790
2791
2792
2793
2794
2795
2796 copied = skb->len;
2797 if (copied > len) {
2798 copied = len;
2799 msg->msg_flags |= MSG_TRUNC;
2800 }
2801
2802 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2803 if (err)
2804 goto out_free;
2805
2806 sock_recv_ts_and_drops(msg, sk, skb);
2807
2808 if (msg->msg_name)
2809 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
2810 msg->msg_namelen);
2811
2812 if (pkt_sk(sk)->auxdata) {
2813 struct tpacket_auxdata aux;
2814
2815 aux.tp_status = TP_STATUS_USER;
2816 if (skb->ip_summed == CHECKSUM_PARTIAL)
2817 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
2818 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
2819 aux.tp_snaplen = skb->len;
2820 aux.tp_mac = 0;
2821 aux.tp_net = skb_network_offset(skb);
2822 if (vlan_tx_tag_present(skb)) {
2823 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
2824 aux.tp_status |= TP_STATUS_VLAN_VALID;
2825 } else {
2826 aux.tp_vlan_tci = 0;
2827 }
2828 aux.tp_padding = 0;
2829 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
2830 }
2831
2832
2833
2834
2835
2836 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
2837
2838out_free:
2839 skb_free_datagram(sk, skb);
2840out:
2841 return err;
2842}
2843
2844static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
2845 int *uaddr_len, int peer)
2846{
2847 struct net_device *dev;
2848 struct sock *sk = sock->sk;
2849
2850 if (peer)
2851 return -EOPNOTSUPP;
2852
2853 uaddr->sa_family = AF_PACKET;
2854 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
2855 rcu_read_lock();
2856 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
2857 if (dev)
2858 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
2859 rcu_read_unlock();
2860 *uaddr_len = sizeof(*uaddr);
2861
2862 return 0;
2863}
2864
2865static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
2866 int *uaddr_len, int peer)
2867{
2868 struct net_device *dev;
2869 struct sock *sk = sock->sk;
2870 struct packet_sock *po = pkt_sk(sk);
2871 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
2872
2873 if (peer)
2874 return -EOPNOTSUPP;
2875
2876 sll->sll_family = AF_PACKET;
2877 sll->sll_ifindex = po->ifindex;
2878 sll->sll_protocol = po->num;
2879 sll->sll_pkttype = 0;
2880 rcu_read_lock();
2881 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
2882 if (dev) {
2883 sll->sll_hatype = dev->type;
2884 sll->sll_halen = dev->addr_len;
2885 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
2886 } else {
2887 sll->sll_hatype = 0;
2888 sll->sll_halen = 0;
2889 }
2890 rcu_read_unlock();
2891 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
2892
2893 return 0;
2894}
2895
2896static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
2897 int what)
2898{
2899 switch (i->type) {
2900 case PACKET_MR_MULTICAST:
2901 if (i->alen != dev->addr_len)
2902 return -EINVAL;
2903 if (what > 0)
2904 return dev_mc_add(dev, i->addr);
2905 else
2906 return dev_mc_del(dev, i->addr);
2907 break;
2908 case PACKET_MR_PROMISC:
2909 return dev_set_promiscuity(dev, what);
2910 break;
2911 case PACKET_MR_ALLMULTI:
2912 return dev_set_allmulti(dev, what);
2913 break;
2914 case PACKET_MR_UNICAST:
2915 if (i->alen != dev->addr_len)
2916 return -EINVAL;
2917 if (what > 0)
2918 return dev_uc_add(dev, i->addr);
2919 else
2920 return dev_uc_del(dev, i->addr);
2921 break;
2922 default:
2923 break;
2924 }
2925 return 0;
2926}
2927
2928static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
2929{
2930 for ( ; i; i = i->next) {
2931 if (i->ifindex == dev->ifindex)
2932 packet_dev_mc(dev, i, what);
2933 }
2934}
2935
2936static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
2937{
2938 struct packet_sock *po = pkt_sk(sk);
2939 struct packet_mclist *ml, *i;
2940 struct net_device *dev;
2941 int err;
2942
2943 rtnl_lock();
2944
2945 err = -ENODEV;
2946 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
2947 if (!dev)
2948 goto done;
2949
2950 err = -EINVAL;
2951 if (mreq->mr_alen > dev->addr_len)
2952 goto done;
2953
2954 err = -ENOBUFS;
2955 i = kmalloc(sizeof(*i), GFP_KERNEL);
2956 if (i == NULL)
2957 goto done;
2958
2959 err = 0;
2960 for (ml = po->mclist; ml; ml = ml->next) {
2961 if (ml->ifindex == mreq->mr_ifindex &&
2962 ml->type == mreq->mr_type &&
2963 ml->alen == mreq->mr_alen &&
2964 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
2965 ml->count++;
2966
2967 kfree(i);
2968 goto done;
2969 }
2970 }
2971
2972 i->type = mreq->mr_type;
2973 i->ifindex = mreq->mr_ifindex;
2974 i->alen = mreq->mr_alen;
2975 memcpy(i->addr, mreq->mr_address, i->alen);
2976 i->count = 1;
2977 i->next = po->mclist;
2978 po->mclist = i;
2979 err = packet_dev_mc(dev, i, 1);
2980 if (err) {
2981 po->mclist = i->next;
2982 kfree(i);
2983 }
2984
2985done:
2986 rtnl_unlock();
2987 return err;
2988}
2989
2990static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
2991{
2992 struct packet_mclist *ml, **mlp;
2993
2994 rtnl_lock();
2995
2996 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
2997 if (ml->ifindex == mreq->mr_ifindex &&
2998 ml->type == mreq->mr_type &&
2999 ml->alen == mreq->mr_alen &&
3000 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3001 if (--ml->count == 0) {
3002 struct net_device *dev;
3003 *mlp = ml->next;
3004 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3005 if (dev)
3006 packet_dev_mc(dev, ml, -1);
3007 kfree(ml);
3008 }
3009 rtnl_unlock();
3010 return 0;
3011 }
3012 }
3013 rtnl_unlock();
3014 return -EADDRNOTAVAIL;
3015}
3016
3017static void packet_flush_mclist(struct sock *sk)
3018{
3019 struct packet_sock *po = pkt_sk(sk);
3020 struct packet_mclist *ml;
3021
3022 if (!po->mclist)
3023 return;
3024
3025 rtnl_lock();
3026 while ((ml = po->mclist) != NULL) {
3027 struct net_device *dev;
3028
3029 po->mclist = ml->next;
3030 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3031 if (dev != NULL)
3032 packet_dev_mc(dev, ml, -1);
3033 kfree(ml);
3034 }
3035 rtnl_unlock();
3036}
3037
3038static int
3039packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3040{
3041 struct sock *sk = sock->sk;
3042 struct packet_sock *po = pkt_sk(sk);
3043 int ret;
3044
3045 if (level != SOL_PACKET)
3046 return -ENOPROTOOPT;
3047
3048 switch (optname) {
3049 case PACKET_ADD_MEMBERSHIP:
3050 case PACKET_DROP_MEMBERSHIP:
3051 {
3052 struct packet_mreq_max mreq;
3053 int len = optlen;
3054 memset(&mreq, 0, sizeof(mreq));
3055 if (len < sizeof(struct packet_mreq))
3056 return -EINVAL;
3057 if (len > sizeof(mreq))
3058 len = sizeof(mreq);
3059 if (copy_from_user(&mreq, optval, len))
3060 return -EFAULT;
3061 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3062 return -EINVAL;
3063 if (optname == PACKET_ADD_MEMBERSHIP)
3064 ret = packet_mc_add(sk, &mreq);
3065 else
3066 ret = packet_mc_drop(sk, &mreq);
3067 return ret;
3068 }
3069
3070 case PACKET_RX_RING:
3071 case PACKET_TX_RING:
3072 {
3073 union tpacket_req_u req_u;
3074 int len;
3075
3076 switch (po->tp_version) {
3077 case TPACKET_V1:
3078 case TPACKET_V2:
3079 len = sizeof(req_u.req);
3080 break;
3081 case TPACKET_V3:
3082 default:
3083 len = sizeof(req_u.req3);
3084 break;
3085 }
3086 if (optlen < len)
3087 return -EINVAL;
3088 if (pkt_sk(sk)->has_vnet_hdr)
3089 return -EINVAL;
3090 if (copy_from_user(&req_u.req, optval, len))
3091 return -EFAULT;
3092 return packet_set_ring(sk, &req_u, 0,
3093 optname == PACKET_TX_RING);
3094 }
3095 case PACKET_COPY_THRESH:
3096 {
3097 int val;
3098
3099 if (optlen != sizeof(val))
3100 return -EINVAL;
3101 if (copy_from_user(&val, optval, sizeof(val)))
3102 return -EFAULT;
3103
3104 pkt_sk(sk)->copy_thresh = val;
3105 return 0;
3106 }
3107 case PACKET_VERSION:
3108 {
3109 int val;
3110
3111 if (optlen != sizeof(val))
3112 return -EINVAL;
3113 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3114 return -EBUSY;
3115 if (copy_from_user(&val, optval, sizeof(val)))
3116 return -EFAULT;
3117 switch (val) {
3118 case TPACKET_V1:
3119 case TPACKET_V2:
3120 case TPACKET_V3:
3121 po->tp_version = val;
3122 return 0;
3123 default:
3124 return -EINVAL;
3125 }
3126 }
3127 case PACKET_RESERVE:
3128 {
3129 unsigned int val;
3130
3131 if (optlen != sizeof(val))
3132 return -EINVAL;
3133 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3134 return -EBUSY;
3135 if (copy_from_user(&val, optval, sizeof(val)))
3136 return -EFAULT;
3137 po->tp_reserve = val;
3138 return 0;
3139 }
3140 case PACKET_LOSS:
3141 {
3142 unsigned int val;
3143
3144 if (optlen != sizeof(val))
3145 return -EINVAL;
3146 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3147 return -EBUSY;
3148 if (copy_from_user(&val, optval, sizeof(val)))
3149 return -EFAULT;
3150 po->tp_loss = !!val;
3151 return 0;
3152 }
3153 case PACKET_AUXDATA:
3154 {
3155 int val;
3156
3157 if (optlen < sizeof(val))
3158 return -EINVAL;
3159 if (copy_from_user(&val, optval, sizeof(val)))
3160 return -EFAULT;
3161
3162 po->auxdata = !!val;
3163 return 0;
3164 }
3165 case PACKET_ORIGDEV:
3166 {
3167 int val;
3168
3169 if (optlen < sizeof(val))
3170 return -EINVAL;
3171 if (copy_from_user(&val, optval, sizeof(val)))
3172 return -EFAULT;
3173
3174 po->origdev = !!val;
3175 return 0;
3176 }
3177 case PACKET_VNET_HDR:
3178 {
3179 int val;
3180
3181 if (sock->type != SOCK_RAW)
3182 return -EINVAL;
3183 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3184 return -EBUSY;
3185 if (optlen < sizeof(val))
3186 return -EINVAL;
3187 if (copy_from_user(&val, optval, sizeof(val)))
3188 return -EFAULT;
3189
3190 po->has_vnet_hdr = !!val;
3191 return 0;
3192 }
3193 case PACKET_TIMESTAMP:
3194 {
3195 int val;
3196
3197 if (optlen != sizeof(val))
3198 return -EINVAL;
3199 if (copy_from_user(&val, optval, sizeof(val)))
3200 return -EFAULT;
3201
3202 po->tp_tstamp = val;
3203 return 0;
3204 }
3205 case PACKET_FANOUT:
3206 {
3207 int val;
3208
3209 if (optlen != sizeof(val))
3210 return -EINVAL;
3211 if (copy_from_user(&val, optval, sizeof(val)))
3212 return -EFAULT;
3213
3214 return fanout_add(sk, val & 0xffff, val >> 16);
3215 }
3216 case PACKET_TX_HAS_OFF:
3217 {
3218 unsigned int val;
3219
3220 if (optlen != sizeof(val))
3221 return -EINVAL;
3222 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3223 return -EBUSY;
3224 if (copy_from_user(&val, optval, sizeof(val)))
3225 return -EFAULT;
3226 po->tp_tx_has_off = !!val;
3227 return 0;
3228 }
3229 default:
3230 return -ENOPROTOOPT;
3231 }
3232}
3233
3234static int packet_getsockopt(struct socket *sock, int level, int optname,
3235 char __user *optval, int __user *optlen)
3236{
3237 int len;
3238 int val, lv = sizeof(val);
3239 struct sock *sk = sock->sk;
3240 struct packet_sock *po = pkt_sk(sk);
3241 void *data = &val;
3242 union tpacket_stats_u st;
3243
3244 if (level != SOL_PACKET)
3245 return -ENOPROTOOPT;
3246
3247 if (get_user(len, optlen))
3248 return -EFAULT;
3249
3250 if (len < 0)
3251 return -EINVAL;
3252
3253 switch (optname) {
3254 case PACKET_STATISTICS:
3255 spin_lock_bh(&sk->sk_receive_queue.lock);
3256 memcpy(&st, &po->stats, sizeof(st));
3257 memset(&po->stats, 0, sizeof(po->stats));
3258 spin_unlock_bh(&sk->sk_receive_queue.lock);
3259
3260 if (po->tp_version == TPACKET_V3) {
3261 lv = sizeof(struct tpacket_stats_v3);
3262 st.stats3.tp_packets += st.stats3.tp_drops;
3263 data = &st.stats3;
3264 } else {
3265 lv = sizeof(struct tpacket_stats);
3266 st.stats1.tp_packets += st.stats1.tp_drops;
3267 data = &st.stats1;
3268 }
3269
3270 break;
3271 case PACKET_AUXDATA:
3272 val = po->auxdata;
3273 break;
3274 case PACKET_ORIGDEV:
3275 val = po->origdev;
3276 break;
3277 case PACKET_VNET_HDR:
3278 val = po->has_vnet_hdr;
3279 break;
3280 case PACKET_VERSION:
3281 val = po->tp_version;
3282 break;
3283 case PACKET_HDRLEN:
3284 if (len > sizeof(int))
3285 len = sizeof(int);
3286 if (copy_from_user(&val, optval, len))
3287 return -EFAULT;
3288 switch (val) {
3289 case TPACKET_V1:
3290 val = sizeof(struct tpacket_hdr);
3291 break;
3292 case TPACKET_V2:
3293 val = sizeof(struct tpacket2_hdr);
3294 break;
3295 case TPACKET_V3:
3296 val = sizeof(struct tpacket3_hdr);
3297 break;
3298 default:
3299 return -EINVAL;
3300 }
3301 break;
3302 case PACKET_RESERVE:
3303 val = po->tp_reserve;
3304 break;
3305 case PACKET_LOSS:
3306 val = po->tp_loss;
3307 break;
3308 case PACKET_TIMESTAMP:
3309 val = po->tp_tstamp;
3310 break;
3311 case PACKET_FANOUT:
3312 val = (po->fanout ?
3313 ((u32)po->fanout->id |
3314 ((u32)po->fanout->type << 16) |
3315 ((u32)po->fanout->flags << 24)) :
3316 0);
3317 break;
3318 case PACKET_TX_HAS_OFF:
3319 val = po->tp_tx_has_off;
3320 break;
3321 default:
3322 return -ENOPROTOOPT;
3323 }
3324
3325 if (len > lv)
3326 len = lv;
3327 if (put_user(len, optlen))
3328 return -EFAULT;
3329 if (copy_to_user(optval, data, len))
3330 return -EFAULT;
3331 return 0;
3332}
3333
3334
3335static int packet_notifier(struct notifier_block *this,
3336 unsigned long msg, void *ptr)
3337{
3338 struct sock *sk;
3339 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3340 struct net *net = dev_net(dev);
3341
3342 rcu_read_lock();
3343 sk_for_each_rcu(sk, &net->packet.sklist) {
3344 struct packet_sock *po = pkt_sk(sk);
3345
3346 switch (msg) {
3347 case NETDEV_UNREGISTER:
3348 if (po->mclist)
3349 packet_dev_mclist(dev, po->mclist, -1);
3350
3351
3352 case NETDEV_DOWN:
3353 if (dev->ifindex == po->ifindex) {
3354 spin_lock(&po->bind_lock);
3355 if (po->running) {
3356 __unregister_prot_hook(sk, false);
3357 sk->sk_err = ENETDOWN;
3358 if (!sock_flag(sk, SOCK_DEAD))
3359 sk->sk_error_report(sk);
3360 }
3361 if (msg == NETDEV_UNREGISTER) {
3362 po->ifindex = -1;
3363 if (po->prot_hook.dev)
3364 dev_put(po->prot_hook.dev);
3365 po->prot_hook.dev = NULL;
3366 }
3367 spin_unlock(&po->bind_lock);
3368 }
3369 break;
3370 case NETDEV_UP:
3371 if (dev->ifindex == po->ifindex) {
3372 spin_lock(&po->bind_lock);
3373 if (po->num)
3374 register_prot_hook(sk);
3375 spin_unlock(&po->bind_lock);
3376 }
3377 break;
3378 }
3379 }
3380 rcu_read_unlock();
3381 return NOTIFY_DONE;
3382}
3383
3384
3385static int packet_ioctl(struct socket *sock, unsigned int cmd,
3386 unsigned long arg)
3387{
3388 struct sock *sk = sock->sk;
3389
3390 switch (cmd) {
3391 case SIOCOUTQ:
3392 {
3393 int amount = sk_wmem_alloc_get(sk);
3394
3395 return put_user(amount, (int __user *)arg);
3396 }
3397 case SIOCINQ:
3398 {
3399 struct sk_buff *skb;
3400 int amount = 0;
3401
3402 spin_lock_bh(&sk->sk_receive_queue.lock);
3403 skb = skb_peek(&sk->sk_receive_queue);
3404 if (skb)
3405 amount = skb->len;
3406 spin_unlock_bh(&sk->sk_receive_queue.lock);
3407 return put_user(amount, (int __user *)arg);
3408 }
3409 case SIOCGSTAMP:
3410 return sock_get_timestamp(sk, (struct timeval __user *)arg);
3411 case SIOCGSTAMPNS:
3412 return sock_get_timestampns(sk, (struct timespec __user *)arg);
3413
3414#ifdef CONFIG_INET
3415 case SIOCADDRT:
3416 case SIOCDELRT:
3417 case SIOCDARP:
3418 case SIOCGARP:
3419 case SIOCSARP:
3420 case SIOCGIFADDR:
3421 case SIOCSIFADDR:
3422 case SIOCGIFBRDADDR:
3423 case SIOCSIFBRDADDR:
3424 case SIOCGIFNETMASK:
3425 case SIOCSIFNETMASK:
3426 case SIOCGIFDSTADDR:
3427 case SIOCSIFDSTADDR:
3428 case SIOCSIFFLAGS:
3429 return inet_dgram_ops.ioctl(sock, cmd, arg);
3430#endif
3431
3432 default:
3433 return -ENOIOCTLCMD;
3434 }
3435 return 0;
3436}
3437
3438static unsigned int packet_poll(struct file *file, struct socket *sock,
3439 poll_table *wait)
3440{
3441 struct sock *sk = sock->sk;
3442 struct packet_sock *po = pkt_sk(sk);
3443 unsigned int mask = datagram_poll(file, sock, wait);
3444
3445 spin_lock_bh(&sk->sk_receive_queue.lock);
3446 if (po->rx_ring.pg_vec) {
3447 if (!packet_previous_rx_frame(po, &po->rx_ring,
3448 TP_STATUS_KERNEL))
3449 mask |= POLLIN | POLLRDNORM;
3450 }
3451 spin_unlock_bh(&sk->sk_receive_queue.lock);
3452 spin_lock_bh(&sk->sk_write_queue.lock);
3453 if (po->tx_ring.pg_vec) {
3454 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
3455 mask |= POLLOUT | POLLWRNORM;
3456 }
3457 spin_unlock_bh(&sk->sk_write_queue.lock);
3458 return mask;
3459}
3460
3461
3462
3463
3464
3465
3466static void packet_mm_open(struct vm_area_struct *vma)
3467{
3468 struct file *file = vma->vm_file;
3469 struct socket *sock = file->private_data;
3470 struct sock *sk = sock->sk;
3471
3472 if (sk)
3473 atomic_inc(&pkt_sk(sk)->mapped);
3474}
3475
3476static void packet_mm_close(struct vm_area_struct *vma)
3477{
3478 struct file *file = vma->vm_file;
3479 struct socket *sock = file->private_data;
3480 struct sock *sk = sock->sk;
3481
3482 if (sk)
3483 atomic_dec(&pkt_sk(sk)->mapped);
3484}
3485
3486static const struct vm_operations_struct packet_mmap_ops = {
3487 .open = packet_mm_open,
3488 .close = packet_mm_close,
3489};
3490
3491static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
3492 unsigned int len)
3493{
3494 int i;
3495
3496 for (i = 0; i < len; i++) {
3497 if (likely(pg_vec[i].buffer)) {
3498 if (is_vmalloc_addr(pg_vec[i].buffer))
3499 vfree(pg_vec[i].buffer);
3500 else
3501 free_pages((unsigned long)pg_vec[i].buffer,
3502 order);
3503 pg_vec[i].buffer = NULL;
3504 }
3505 }
3506 kfree(pg_vec);
3507}
3508
3509static char *alloc_one_pg_vec_page(unsigned long order)
3510{
3511 char *buffer = NULL;
3512 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
3513 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
3514
3515 buffer = (char *) __get_free_pages(gfp_flags, order);
3516
3517 if (buffer)
3518 return buffer;
3519
3520
3521
3522
3523 buffer = vzalloc((1 << order) * PAGE_SIZE);
3524
3525 if (buffer)
3526 return buffer;
3527
3528
3529
3530
3531 gfp_flags &= ~__GFP_NORETRY;
3532 buffer = (char *)__get_free_pages(gfp_flags, order);
3533 if (buffer)
3534 return buffer;
3535
3536
3537
3538
3539 return NULL;
3540}
3541
3542static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
3543{
3544 unsigned int block_nr = req->tp_block_nr;
3545 struct pgv *pg_vec;
3546 int i;
3547
3548 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
3549 if (unlikely(!pg_vec))
3550 goto out;
3551
3552 for (i = 0; i < block_nr; i++) {
3553 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
3554 if (unlikely(!pg_vec[i].buffer))
3555 goto out_free_pgvec;
3556 }
3557
3558out:
3559 return pg_vec;
3560
3561out_free_pgvec:
3562 free_pg_vec(pg_vec, order, block_nr);
3563 pg_vec = NULL;
3564 goto out;
3565}
3566
3567static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
3568 int closing, int tx_ring)
3569{
3570 struct pgv *pg_vec = NULL;
3571 struct packet_sock *po = pkt_sk(sk);
3572 int was_running, order = 0;
3573 struct packet_ring_buffer *rb;
3574 struct sk_buff_head *rb_queue;
3575 __be16 num;
3576 int err = -EINVAL;
3577
3578 struct tpacket_req *req = &req_u->req;
3579
3580
3581 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3582 WARN(1, "Tx-ring is not supported.\n");
3583 goto out;
3584 }
3585
3586 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
3587 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
3588
3589 err = -EBUSY;
3590 if (!closing) {
3591 if (atomic_read(&po->mapped))
3592 goto out;
3593 if (atomic_read(&rb->pending))
3594 goto out;
3595 }
3596
3597 if (req->tp_block_nr) {
3598
3599 err = -EBUSY;
3600 if (unlikely(rb->pg_vec))
3601 goto out;
3602
3603 switch (po->tp_version) {
3604 case TPACKET_V1:
3605 po->tp_hdrlen = TPACKET_HDRLEN;
3606 break;
3607 case TPACKET_V2:
3608 po->tp_hdrlen = TPACKET2_HDRLEN;
3609 break;
3610 case TPACKET_V3:
3611 po->tp_hdrlen = TPACKET3_HDRLEN;
3612 break;
3613 }
3614
3615 err = -EINVAL;
3616 if (unlikely((int)req->tp_block_size <= 0))
3617 goto out;
3618 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
3619 goto out;
3620 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
3621 po->tp_reserve))
3622 goto out;
3623 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
3624 goto out;
3625
3626 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
3627 if (unlikely(rb->frames_per_block <= 0))
3628 goto out;
3629 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
3630 req->tp_frame_nr))
3631 goto out;
3632
3633 err = -ENOMEM;
3634 order = get_order(req->tp_block_size);
3635 pg_vec = alloc_pg_vec(req, order);
3636 if (unlikely(!pg_vec))
3637 goto out;
3638 switch (po->tp_version) {
3639 case TPACKET_V3:
3640
3641
3642
3643 if (!tx_ring)
3644 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3645 break;
3646 default:
3647 break;
3648 }
3649 }
3650
3651 else {
3652 err = -EINVAL;
3653 if (unlikely(req->tp_frame_nr))
3654 goto out;
3655 }
3656
3657 lock_sock(sk);
3658
3659
3660 spin_lock(&po->bind_lock);
3661 was_running = po->running;
3662 num = po->num;
3663 if (was_running) {
3664 po->num = 0;
3665 __unregister_prot_hook(sk, false);
3666 }
3667 spin_unlock(&po->bind_lock);
3668
3669 synchronize_net();
3670
3671 err = -EBUSY;
3672 mutex_lock(&po->pg_vec_lock);
3673 if (closing || atomic_read(&po->mapped) == 0) {
3674 err = 0;
3675 spin_lock_bh(&rb_queue->lock);
3676 swap(rb->pg_vec, pg_vec);
3677 rb->frame_max = (req->tp_frame_nr - 1);
3678 rb->head = 0;
3679 rb->frame_size = req->tp_frame_size;
3680 spin_unlock_bh(&rb_queue->lock);
3681
3682 swap(rb->pg_vec_order, order);
3683 swap(rb->pg_vec_len, req->tp_block_nr);
3684
3685 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
3686 po->prot_hook.func = (po->rx_ring.pg_vec) ?
3687 tpacket_rcv : packet_rcv;
3688 skb_queue_purge(rb_queue);
3689 if (atomic_read(&po->mapped))
3690 pr_err("packet_mmap: vma is busy: %d\n",
3691 atomic_read(&po->mapped));
3692 }
3693 mutex_unlock(&po->pg_vec_lock);
3694
3695 spin_lock(&po->bind_lock);
3696 if (was_running) {
3697 po->num = num;
3698 register_prot_hook(sk);
3699 }
3700 spin_unlock(&po->bind_lock);
3701 if (closing && (po->tp_version > TPACKET_V2)) {
3702
3703 if (!tx_ring)
3704 prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3705 }
3706 release_sock(sk);
3707
3708 if (pg_vec)
3709 free_pg_vec(pg_vec, order, req->tp_block_nr);
3710out:
3711 return err;
3712}
3713
3714static int packet_mmap(struct file *file, struct socket *sock,
3715 struct vm_area_struct *vma)
3716{
3717 struct sock *sk = sock->sk;
3718 struct packet_sock *po = pkt_sk(sk);
3719 unsigned long size, expected_size;
3720 struct packet_ring_buffer *rb;
3721 unsigned long start;
3722 int err = -EINVAL;
3723 int i;
3724
3725 if (vma->vm_pgoff)
3726 return -EINVAL;
3727
3728 mutex_lock(&po->pg_vec_lock);
3729
3730 expected_size = 0;
3731 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3732 if (rb->pg_vec) {
3733 expected_size += rb->pg_vec_len
3734 * rb->pg_vec_pages
3735 * PAGE_SIZE;
3736 }
3737 }
3738
3739 if (expected_size == 0)
3740 goto out;
3741
3742 size = vma->vm_end - vma->vm_start;
3743 if (size != expected_size)
3744 goto out;
3745
3746 start = vma->vm_start;
3747 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3748 if (rb->pg_vec == NULL)
3749 continue;
3750
3751 for (i = 0; i < rb->pg_vec_len; i++) {
3752 struct page *page;
3753 void *kaddr = rb->pg_vec[i].buffer;
3754 int pg_num;
3755
3756 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
3757 page = pgv_to_page(kaddr);
3758 err = vm_insert_page(vma, start, page);
3759 if (unlikely(err))
3760 goto out;
3761 start += PAGE_SIZE;
3762 kaddr += PAGE_SIZE;
3763 }
3764 }
3765 }
3766
3767 atomic_inc(&po->mapped);
3768 vma->vm_ops = &packet_mmap_ops;
3769 err = 0;
3770
3771out:
3772 mutex_unlock(&po->pg_vec_lock);
3773 return err;
3774}
3775
3776static const struct proto_ops packet_ops_spkt = {
3777 .family = PF_PACKET,
3778 .owner = THIS_MODULE,
3779 .release = packet_release,
3780 .bind = packet_bind_spkt,
3781 .connect = sock_no_connect,
3782 .socketpair = sock_no_socketpair,
3783 .accept = sock_no_accept,
3784 .getname = packet_getname_spkt,
3785 .poll = datagram_poll,
3786 .ioctl = packet_ioctl,
3787 .listen = sock_no_listen,
3788 .shutdown = sock_no_shutdown,
3789 .setsockopt = sock_no_setsockopt,
3790 .getsockopt = sock_no_getsockopt,
3791 .sendmsg = packet_sendmsg_spkt,
3792 .recvmsg = packet_recvmsg,
3793 .mmap = sock_no_mmap,
3794 .sendpage = sock_no_sendpage,
3795};
3796
3797static const struct proto_ops packet_ops = {
3798 .family = PF_PACKET,
3799 .owner = THIS_MODULE,
3800 .release = packet_release,
3801 .bind = packet_bind,
3802 .connect = sock_no_connect,
3803 .socketpair = sock_no_socketpair,
3804 .accept = sock_no_accept,
3805 .getname = packet_getname,
3806 .poll = packet_poll,
3807 .ioctl = packet_ioctl,
3808 .listen = sock_no_listen,
3809 .shutdown = sock_no_shutdown,
3810 .setsockopt = packet_setsockopt,
3811 .getsockopt = packet_getsockopt,
3812 .sendmsg = packet_sendmsg,
3813 .recvmsg = packet_recvmsg,
3814 .mmap = packet_mmap,
3815 .sendpage = sock_no_sendpage,
3816};
3817
3818static const struct net_proto_family packet_family_ops = {
3819 .family = PF_PACKET,
3820 .create = packet_create,
3821 .owner = THIS_MODULE,
3822};
3823
3824static struct notifier_block packet_netdev_notifier = {
3825 .notifier_call = packet_notifier,
3826};
3827
3828#ifdef CONFIG_PROC_FS
3829
3830static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
3831 __acquires(RCU)
3832{
3833 struct net *net = seq_file_net(seq);
3834
3835 rcu_read_lock();
3836 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
3837}
3838
3839static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3840{
3841 struct net *net = seq_file_net(seq);
3842 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
3843}
3844
3845static void packet_seq_stop(struct seq_file *seq, void *v)
3846 __releases(RCU)
3847{
3848 rcu_read_unlock();
3849}
3850
3851static int packet_seq_show(struct seq_file *seq, void *v)
3852{
3853 if (v == SEQ_START_TOKEN)
3854 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
3855 else {
3856 struct sock *s = sk_entry(v);
3857 const struct packet_sock *po = pkt_sk(s);
3858
3859 seq_printf(seq,
3860 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
3861 s,
3862 atomic_read(&s->sk_refcnt),
3863 s->sk_type,
3864 ntohs(po->num),
3865 po->ifindex,
3866 po->running,
3867 atomic_read(&s->sk_rmem_alloc),
3868 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
3869 sock_i_ino(s));
3870 }
3871
3872 return 0;
3873}
3874
3875static const struct seq_operations packet_seq_ops = {
3876 .start = packet_seq_start,
3877 .next = packet_seq_next,
3878 .stop = packet_seq_stop,
3879 .show = packet_seq_show,
3880};
3881
3882static int packet_seq_open(struct inode *inode, struct file *file)
3883{
3884 return seq_open_net(inode, file, &packet_seq_ops,
3885 sizeof(struct seq_net_private));
3886}
3887
3888static const struct file_operations packet_seq_fops = {
3889 .owner = THIS_MODULE,
3890 .open = packet_seq_open,
3891 .read = seq_read,
3892 .llseek = seq_lseek,
3893 .release = seq_release_net,
3894};
3895
3896#endif
3897
3898static int __net_init packet_net_init(struct net *net)
3899{
3900 mutex_init(&net->packet.sklist_lock);
3901 INIT_HLIST_HEAD(&net->packet.sklist);
3902
3903 if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
3904 return -ENOMEM;
3905
3906 return 0;
3907}
3908
3909static void __net_exit packet_net_exit(struct net *net)
3910{
3911 remove_proc_entry("packet", net->proc_net);
3912}
3913
3914static struct pernet_operations packet_net_ops = {
3915 .init = packet_net_init,
3916 .exit = packet_net_exit,
3917};
3918
3919
3920static void __exit packet_exit(void)
3921{
3922 unregister_netdevice_notifier(&packet_netdev_notifier);
3923 unregister_pernet_subsys(&packet_net_ops);
3924 sock_unregister(PF_PACKET);
3925 proto_unregister(&packet_proto);
3926}
3927
3928static int __init packet_init(void)
3929{
3930 int rc = proto_register(&packet_proto, 0);
3931
3932 if (rc != 0)
3933 goto out;
3934
3935 sock_register(&packet_family_ops);
3936 register_pernet_subsys(&packet_net_ops);
3937 register_netdevice_notifier(&packet_netdev_notifier);
3938out:
3939 return rc;
3940}
3941
3942module_init(packet_init);
3943module_exit(packet_exit);
3944MODULE_LICENSE("GPL");
3945MODULE_ALIAS_NETPROTO(PF_PACKET);
3946