1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/types.h>
56#include <linux/mm.h>
57#include <linux/capability.h>
58#include <linux/fcntl.h>
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_packet.h>
64#include <linux/wireless.h>
65#include <linux/kernel.h>
66#include <linux/kmod.h>
67#include <linux/slab.h>
68#include <linux/vmalloc.h>
69#include <net/net_namespace.h>
70#include <net/ip.h>
71#include <net/protocol.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <linux/errno.h>
75#include <linux/timer.h>
76#include <linux/uaccess.h>
77#include <asm/ioctls.h>
78#include <asm/page.h>
79#include <asm/cacheflush.h>
80#include <asm/io.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/poll.h>
84#include <linux/module.h>
85#include <linux/init.h>
86#include <linux/mutex.h>
87#include <linux/if_vlan.h>
88#include <linux/virtio_net.h>
89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h>
91#include <linux/percpu.h>
92#ifdef CONFIG_INET
93#include <net/inet_common.h>
94#endif
95#include <linux/bpf.h>
96#include <net/compat.h>
97
98#include "internal.h"
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156struct packet_mreq_max {
157 int mr_ifindex;
158 unsigned short mr_type;
159 unsigned short mr_alen;
160 unsigned char mr_address[MAX_ADDR_LEN];
161};
162
163union tpacket_uhdr {
164 struct tpacket_hdr *h1;
165 struct tpacket2_hdr *h2;
166 struct tpacket3_hdr *h3;
167 void *raw;
168};
169
170static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
171 int closing, int tx_ring);
172
173#define V3_ALIGNMENT (8)
174
175#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
176
177#define BLK_PLUS_PRIV(sz_of_priv) \
178 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
179
180#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
181#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
182#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
183#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
184#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
185#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
186#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
187
188struct packet_sock;
189static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
190 struct packet_type *pt, struct net_device *orig_dev);
191
192static void *packet_previous_frame(struct packet_sock *po,
193 struct packet_ring_buffer *rb,
194 int status);
195static void packet_increment_head(struct packet_ring_buffer *buff);
196static int prb_curr_blk_in_use(struct tpacket_block_desc *);
197static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
198 struct packet_sock *);
199static void prb_retire_current_block(struct tpacket_kbdq_core *,
200 struct packet_sock *, unsigned int status);
201static int prb_queue_frozen(struct tpacket_kbdq_core *);
202static void prb_open_block(struct tpacket_kbdq_core *,
203 struct tpacket_block_desc *);
204static void prb_retire_rx_blk_timer_expired(struct timer_list *);
205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
206static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
207static void prb_clear_rxhash(struct tpacket_kbdq_core *,
208 struct tpacket3_hdr *);
209static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
210 struct tpacket3_hdr *);
211static void packet_flush_mclist(struct sock *sk);
212static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb);
213
214struct packet_skb_cb {
215 union {
216 struct sockaddr_pkt pkt;
217 union {
218
219
220
221
222 unsigned int origlen;
223 struct sockaddr_ll ll;
224 };
225 } sa;
226};
227
228#define vio_le() virtio_legacy_is_little_endian()
229
230#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
231
232#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
233#define GET_PBLOCK_DESC(x, bid) \
234 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
235#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
236 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
237#define GET_NEXT_PRB_BLK_NUM(x) \
238 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
239 ((x)->kactive_blk_num+1) : 0)
240
241static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
242static void __fanout_link(struct sock *sk, struct packet_sock *po);
243
244static int packet_direct_xmit(struct sk_buff *skb)
245{
246 struct net_device *dev = skb->dev;
247 struct sk_buff *orig_skb = skb;
248 struct netdev_queue *txq;
249 int ret = NETDEV_TX_BUSY;
250 bool again = false;
251
252 if (unlikely(!netif_running(dev) ||
253 !netif_carrier_ok(dev)))
254 goto drop;
255
256 skb = validate_xmit_skb_list(skb, dev, &again);
257 if (skb != orig_skb)
258 goto drop;
259
260 packet_pick_tx_queue(dev, skb);
261 txq = skb_get_tx_queue(dev, skb);
262
263 local_bh_disable();
264
265 HARD_TX_LOCK(dev, txq, smp_processor_id());
266 if (!netif_xmit_frozen_or_drv_stopped(txq))
267 ret = netdev_start_xmit(skb, dev, txq, false);
268 HARD_TX_UNLOCK(dev, txq);
269
270 local_bh_enable();
271
272 if (!dev_xmit_complete(ret))
273 kfree_skb(skb);
274
275 return ret;
276drop:
277 atomic_long_inc(&dev->tx_dropped);
278 kfree_skb_list(skb);
279 return NET_XMIT_DROP;
280}
281
282static struct net_device *packet_cached_dev_get(struct packet_sock *po)
283{
284 struct net_device *dev;
285
286 rcu_read_lock();
287 dev = rcu_dereference(po->cached_dev);
288 if (likely(dev))
289 dev_hold(dev);
290 rcu_read_unlock();
291
292 return dev;
293}
294
295static void packet_cached_dev_assign(struct packet_sock *po,
296 struct net_device *dev)
297{
298 rcu_assign_pointer(po->cached_dev, dev);
299}
300
301static void packet_cached_dev_reset(struct packet_sock *po)
302{
303 RCU_INIT_POINTER(po->cached_dev, NULL);
304}
305
306static bool packet_use_direct_xmit(const struct packet_sock *po)
307{
308 return po->xmit == packet_direct_xmit;
309}
310
311static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
312{
313 return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
314}
315
316static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
317{
318 const struct net_device_ops *ops = dev->netdev_ops;
319 u16 queue_index;
320
321 if (ops->ndo_select_queue) {
322 queue_index = ops->ndo_select_queue(dev, skb, NULL,
323 __packet_pick_tx_queue);
324 queue_index = netdev_cap_txqueue(dev, queue_index);
325 } else {
326 queue_index = __packet_pick_tx_queue(dev, skb);
327 }
328
329 skb_set_queue_mapping(skb, queue_index);
330}
331
332
333
334
335
336static void __register_prot_hook(struct sock *sk)
337{
338 struct packet_sock *po = pkt_sk(sk);
339
340 if (!po->running) {
341 if (po->fanout)
342 __fanout_link(sk, po);
343 else
344 dev_add_pack(&po->prot_hook);
345
346 sock_hold(sk);
347 po->running = 1;
348 }
349}
350
351static void register_prot_hook(struct sock *sk)
352{
353 lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
354 __register_prot_hook(sk);
355}
356
357
358
359
360
361
362
363static void __unregister_prot_hook(struct sock *sk, bool sync)
364{
365 struct packet_sock *po = pkt_sk(sk);
366
367 lockdep_assert_held_once(&po->bind_lock);
368
369 po->running = 0;
370
371 if (po->fanout)
372 __fanout_unlink(sk, po);
373 else
374 __dev_remove_pack(&po->prot_hook);
375
376 __sock_put(sk);
377
378 if (sync) {
379 spin_unlock(&po->bind_lock);
380 synchronize_net();
381 spin_lock(&po->bind_lock);
382 }
383}
384
385static void unregister_prot_hook(struct sock *sk, bool sync)
386{
387 struct packet_sock *po = pkt_sk(sk);
388
389 if (po->running)
390 __unregister_prot_hook(sk, sync);
391}
392
393static inline struct page * __pure pgv_to_page(void *addr)
394{
395 if (is_vmalloc_addr(addr))
396 return vmalloc_to_page(addr);
397 return virt_to_page(addr);
398}
399
400static void __packet_set_status(struct packet_sock *po, void *frame, int status)
401{
402 union tpacket_uhdr h;
403
404 h.raw = frame;
405 switch (po->tp_version) {
406 case TPACKET_V1:
407 h.h1->tp_status = status;
408 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
409 break;
410 case TPACKET_V2:
411 h.h2->tp_status = status;
412 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
413 break;
414 case TPACKET_V3:
415 h.h3->tp_status = status;
416 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
417 break;
418 default:
419 WARN(1, "TPACKET version not supported.\n");
420 BUG();
421 }
422
423 smp_wmb();
424}
425
426static int __packet_get_status(struct packet_sock *po, void *frame)
427{
428 union tpacket_uhdr h;
429
430 smp_rmb();
431
432 h.raw = frame;
433 switch (po->tp_version) {
434 case TPACKET_V1:
435 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
436 return h.h1->tp_status;
437 case TPACKET_V2:
438 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
439 return h.h2->tp_status;
440 case TPACKET_V3:
441 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
442 return h.h3->tp_status;
443 default:
444 WARN(1, "TPACKET version not supported.\n");
445 BUG();
446 return 0;
447 }
448}
449
450static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
451 unsigned int flags)
452{
453 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
454
455 if (shhwtstamps &&
456 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
457 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
458 return TP_STATUS_TS_RAW_HARDWARE;
459
460 if (ktime_to_timespec_cond(skb->tstamp, ts))
461 return TP_STATUS_TS_SOFTWARE;
462
463 return 0;
464}
465
466static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
467 struct sk_buff *skb)
468{
469 union tpacket_uhdr h;
470 struct timespec ts;
471 __u32 ts_status;
472
473 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
474 return 0;
475
476 h.raw = frame;
477 switch (po->tp_version) {
478 case TPACKET_V1:
479 h.h1->tp_sec = ts.tv_sec;
480 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
481 break;
482 case TPACKET_V2:
483 h.h2->tp_sec = ts.tv_sec;
484 h.h2->tp_nsec = ts.tv_nsec;
485 break;
486 case TPACKET_V3:
487 h.h3->tp_sec = ts.tv_sec;
488 h.h3->tp_nsec = ts.tv_nsec;
489 break;
490 default:
491 WARN(1, "TPACKET version not supported.\n");
492 BUG();
493 }
494
495
496 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
497 smp_wmb();
498
499 return ts_status;
500}
501
502static void *packet_lookup_frame(struct packet_sock *po,
503 struct packet_ring_buffer *rb,
504 unsigned int position,
505 int status)
506{
507 unsigned int pg_vec_pos, frame_offset;
508 union tpacket_uhdr h;
509
510 pg_vec_pos = position / rb->frames_per_block;
511 frame_offset = position % rb->frames_per_block;
512
513 h.raw = rb->pg_vec[pg_vec_pos].buffer +
514 (frame_offset * rb->frame_size);
515
516 if (status != __packet_get_status(po, h.raw))
517 return NULL;
518
519 return h.raw;
520}
521
522static void *packet_current_frame(struct packet_sock *po,
523 struct packet_ring_buffer *rb,
524 int status)
525{
526 return packet_lookup_frame(po, rb, rb->head, status);
527}
528
529static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
530{
531 del_timer_sync(&pkc->retire_blk_timer);
532}
533
534static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
535 struct sk_buff_head *rb_queue)
536{
537 struct tpacket_kbdq_core *pkc;
538
539 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
540
541 spin_lock_bh(&rb_queue->lock);
542 pkc->delete_blk_timer = 1;
543 spin_unlock_bh(&rb_queue->lock);
544
545 prb_del_retire_blk_timer(pkc);
546}
547
548static void prb_setup_retire_blk_timer(struct packet_sock *po)
549{
550 struct tpacket_kbdq_core *pkc;
551
552 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
553 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
554 0);
555 pkc->retire_blk_timer.expires = jiffies;
556}
557
558static int prb_calc_retire_blk_tmo(struct packet_sock *po,
559 int blk_size_in_bytes)
560{
561 struct net_device *dev;
562 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
563 struct ethtool_link_ksettings ecmd;
564 int err;
565
566 rtnl_lock();
567 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
568 if (unlikely(!dev)) {
569 rtnl_unlock();
570 return DEFAULT_PRB_RETIRE_TOV;
571 }
572 err = __ethtool_get_link_ksettings(dev, &ecmd);
573 rtnl_unlock();
574 if (!err) {
575
576
577
578
579 if (ecmd.base.speed < SPEED_1000 ||
580 ecmd.base.speed == SPEED_UNKNOWN) {
581 return DEFAULT_PRB_RETIRE_TOV;
582 } else {
583 msec = 1;
584 div = ecmd.base.speed / 1000;
585 }
586 }
587
588 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
589
590 if (div)
591 mbits /= div;
592
593 tmo = mbits * msec;
594
595 if (div)
596 return tmo+1;
597 return tmo;
598}
599
600static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
601 union tpacket_req_u *req_u)
602{
603 p1->feature_req_word = req_u->req3.tp_feature_req_word;
604}
605
606static void init_prb_bdqc(struct packet_sock *po,
607 struct packet_ring_buffer *rb,
608 struct pgv *pg_vec,
609 union tpacket_req_u *req_u)
610{
611 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
612 struct tpacket_block_desc *pbd;
613
614 memset(p1, 0x0, sizeof(*p1));
615
616 p1->knxt_seq_num = 1;
617 p1->pkbdq = pg_vec;
618 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
619 p1->pkblk_start = pg_vec[0].buffer;
620 p1->kblk_size = req_u->req3.tp_block_size;
621 p1->knum_blocks = req_u->req3.tp_block_nr;
622 p1->hdrlen = po->tp_hdrlen;
623 p1->version = po->tp_version;
624 p1->last_kactive_blk_num = 0;
625 po->stats.stats3.tp_freeze_q_cnt = 0;
626 if (req_u->req3.tp_retire_blk_tov)
627 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
628 else
629 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
630 req_u->req3.tp_block_size);
631 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
632 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
633
634 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
635 prb_init_ft_ops(p1, req_u);
636 prb_setup_retire_blk_timer(po);
637 prb_open_block(p1, pbd);
638}
639
640
641
642
643static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
644{
645 mod_timer(&pkc->retire_blk_timer,
646 jiffies + pkc->tov_in_jiffies);
647 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
648}
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
674{
675 struct packet_sock *po =
676 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
677 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
678 unsigned int frozen;
679 struct tpacket_block_desc *pbd;
680
681 spin_lock(&po->sk.sk_receive_queue.lock);
682
683 frozen = prb_queue_frozen(pkc);
684 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
685
686 if (unlikely(pkc->delete_blk_timer))
687 goto out;
688
689
690
691
692
693
694
695
696
697
698 if (BLOCK_NUM_PKTS(pbd)) {
699 while (atomic_read(&pkc->blk_fill_in_prog)) {
700
701 cpu_relax();
702 }
703 }
704
705 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
706 if (!frozen) {
707 if (!BLOCK_NUM_PKTS(pbd)) {
708
709 goto refresh_timer;
710 }
711 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
712 if (!prb_dispatch_next_block(pkc, po))
713 goto refresh_timer;
714 else
715 goto out;
716 } else {
717
718
719
720 if (prb_curr_blk_in_use(pbd)) {
721
722
723
724
725 goto refresh_timer;
726 } else {
727
728
729
730
731
732
733
734 prb_open_block(pkc, pbd);
735 goto out;
736 }
737 }
738 }
739
740refresh_timer:
741 _prb_refresh_rx_retire_blk_timer(pkc);
742
743out:
744 spin_unlock(&po->sk.sk_receive_queue.lock);
745}
746
747static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
748 struct tpacket_block_desc *pbd1, __u32 status)
749{
750
751
752#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
753 u8 *start, *end;
754
755 start = (u8 *)pbd1;
756
757
758 start += PAGE_SIZE;
759
760 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
761 for (; start < end; start += PAGE_SIZE)
762 flush_dcache_page(pgv_to_page(start));
763
764 smp_wmb();
765#endif
766
767
768
769 BLOCK_STATUS(pbd1) = status;
770
771
772
773#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
774 start = (u8 *)pbd1;
775 flush_dcache_page(pgv_to_page(start));
776
777 smp_wmb();
778#endif
779}
780
781
782
783
784
785
786
787
788
789
790static void prb_close_block(struct tpacket_kbdq_core *pkc1,
791 struct tpacket_block_desc *pbd1,
792 struct packet_sock *po, unsigned int stat)
793{
794 __u32 status = TP_STATUS_USER | stat;
795
796 struct tpacket3_hdr *last_pkt;
797 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
798 struct sock *sk = &po->sk;
799
800 if (po->stats.stats3.tp_drops)
801 status |= TP_STATUS_LOSING;
802
803 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
804 last_pkt->tp_next_offset = 0;
805
806
807 if (BLOCK_NUM_PKTS(pbd1)) {
808 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
809 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
810 } else {
811
812
813
814
815
816 struct timespec ts;
817 getnstimeofday(&ts);
818 h1->ts_last_pkt.ts_sec = ts.tv_sec;
819 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
820 }
821
822 smp_wmb();
823
824
825 prb_flush_block(pkc1, pbd1, status);
826
827 sk->sk_data_ready(sk);
828
829 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
830}
831
832static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
833{
834 pkc->reset_pending_on_curr_blk = 0;
835}
836
837
838
839
840
841
842
843
844static void prb_open_block(struct tpacket_kbdq_core *pkc1,
845 struct tpacket_block_desc *pbd1)
846{
847 struct timespec ts;
848 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
849
850 smp_rmb();
851
852
853
854
855
856 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
857 BLOCK_NUM_PKTS(pbd1) = 0;
858 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
859
860 getnstimeofday(&ts);
861
862 h1->ts_first_pkt.ts_sec = ts.tv_sec;
863 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
864
865 pkc1->pkblk_start = (char *)pbd1;
866 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
867
868 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
869 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
870
871 pbd1->version = pkc1->version;
872 pkc1->prev = pkc1->nxt_offset;
873 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
874
875 prb_thaw_queue(pkc1);
876 _prb_refresh_rx_retire_blk_timer(pkc1);
877
878 smp_wmb();
879}
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
905 struct packet_sock *po)
906{
907 pkc->reset_pending_on_curr_blk = 1;
908 po->stats.stats3.tp_freeze_q_cnt++;
909}
910
911#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
912
913
914
915
916
917
918
919static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
920 struct packet_sock *po)
921{
922 struct tpacket_block_desc *pbd;
923
924 smp_rmb();
925
926
927 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
928
929
930 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
931 prb_freeze_queue(pkc, po);
932 return NULL;
933 }
934
935
936
937
938
939
940 prb_open_block(pkc, pbd);
941 return (void *)pkc->nxt_offset;
942}
943
944static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
945 struct packet_sock *po, unsigned int status)
946{
947 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
948
949
950 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
951
952
953
954
955
956
957
958
959
960 if (!(status & TP_STATUS_BLK_TMO)) {
961 while (atomic_read(&pkc->blk_fill_in_prog)) {
962
963 cpu_relax();
964 }
965 }
966 prb_close_block(pkc, pbd, po, status);
967 return;
968 }
969}
970
971static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
972{
973 return TP_STATUS_USER & BLOCK_STATUS(pbd);
974}
975
976static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
977{
978 return pkc->reset_pending_on_curr_blk;
979}
980
981static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
982{
983 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
984 atomic_dec(&pkc->blk_fill_in_prog);
985}
986
987static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
988 struct tpacket3_hdr *ppd)
989{
990 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
991}
992
993static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
994 struct tpacket3_hdr *ppd)
995{
996 ppd->hv1.tp_rxhash = 0;
997}
998
999static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
1000 struct tpacket3_hdr *ppd)
1001{
1002 if (skb_vlan_tag_present(pkc->skb)) {
1003 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
1004 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
1005 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
1006 } else {
1007 ppd->hv1.tp_vlan_tci = 0;
1008 ppd->hv1.tp_vlan_tpid = 0;
1009 ppd->tp_status = TP_STATUS_AVAILABLE;
1010 }
1011}
1012
1013static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
1014 struct tpacket3_hdr *ppd)
1015{
1016 ppd->hv1.tp_padding = 0;
1017 prb_fill_vlan_info(pkc, ppd);
1018
1019 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
1020 prb_fill_rxhash(pkc, ppd);
1021 else
1022 prb_clear_rxhash(pkc, ppd);
1023}
1024
1025static void prb_fill_curr_block(char *curr,
1026 struct tpacket_kbdq_core *pkc,
1027 struct tpacket_block_desc *pbd,
1028 unsigned int len)
1029{
1030 struct tpacket3_hdr *ppd;
1031
1032 ppd = (struct tpacket3_hdr *)curr;
1033 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1034 pkc->prev = curr;
1035 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1036 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1037 BLOCK_NUM_PKTS(pbd) += 1;
1038 atomic_inc(&pkc->blk_fill_in_prog);
1039 prb_run_all_ft_ops(pkc, ppd);
1040}
1041
1042
1043static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1044 struct sk_buff *skb,
1045 int status,
1046 unsigned int len
1047 )
1048{
1049 struct tpacket_kbdq_core *pkc;
1050 struct tpacket_block_desc *pbd;
1051 char *curr, *end;
1052
1053 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1054 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1055
1056
1057 if (prb_queue_frozen(pkc)) {
1058
1059
1060
1061
1062 if (prb_curr_blk_in_use(pbd)) {
1063
1064 return NULL;
1065 } else {
1066
1067
1068
1069
1070
1071
1072 prb_open_block(pkc, pbd);
1073 }
1074 }
1075
1076 smp_mb();
1077 curr = pkc->nxt_offset;
1078 pkc->skb = skb;
1079 end = (char *)pbd + pkc->kblk_size;
1080
1081
1082 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1083 prb_fill_curr_block(curr, pkc, pbd, len);
1084 return (void *)curr;
1085 }
1086
1087
1088 prb_retire_current_block(pkc, po, 0);
1089
1090
1091 curr = (char *)prb_dispatch_next_block(pkc, po);
1092 if (curr) {
1093 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1094 prb_fill_curr_block(curr, pkc, pbd, len);
1095 return (void *)curr;
1096 }
1097
1098
1099
1100
1101
1102 return NULL;
1103}
1104
1105static void *packet_current_rx_frame(struct packet_sock *po,
1106 struct sk_buff *skb,
1107 int status, unsigned int len)
1108{
1109 char *curr = NULL;
1110 switch (po->tp_version) {
1111 case TPACKET_V1:
1112 case TPACKET_V2:
1113 curr = packet_lookup_frame(po, &po->rx_ring,
1114 po->rx_ring.head, status);
1115 return curr;
1116 case TPACKET_V3:
1117 return __packet_lookup_frame_in_block(po, skb, status, len);
1118 default:
1119 WARN(1, "TPACKET version not supported\n");
1120 BUG();
1121 return NULL;
1122 }
1123}
1124
1125static void *prb_lookup_block(struct packet_sock *po,
1126 struct packet_ring_buffer *rb,
1127 unsigned int idx,
1128 int status)
1129{
1130 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1131 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1132
1133 if (status != BLOCK_STATUS(pbd))
1134 return NULL;
1135 return pbd;
1136}
1137
1138static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1139{
1140 unsigned int prev;
1141 if (rb->prb_bdqc.kactive_blk_num)
1142 prev = rb->prb_bdqc.kactive_blk_num-1;
1143 else
1144 prev = rb->prb_bdqc.knum_blocks-1;
1145 return prev;
1146}
1147
1148
1149static void *__prb_previous_block(struct packet_sock *po,
1150 struct packet_ring_buffer *rb,
1151 int status)
1152{
1153 unsigned int previous = prb_previous_blk_num(rb);
1154 return prb_lookup_block(po, rb, previous, status);
1155}
1156
1157static void *packet_previous_rx_frame(struct packet_sock *po,
1158 struct packet_ring_buffer *rb,
1159 int status)
1160{
1161 if (po->tp_version <= TPACKET_V2)
1162 return packet_previous_frame(po, rb, status);
1163
1164 return __prb_previous_block(po, rb, status);
1165}
1166
1167static void packet_increment_rx_head(struct packet_sock *po,
1168 struct packet_ring_buffer *rb)
1169{
1170 switch (po->tp_version) {
1171 case TPACKET_V1:
1172 case TPACKET_V2:
1173 return packet_increment_head(rb);
1174 case TPACKET_V3:
1175 default:
1176 WARN(1, "TPACKET version not supported.\n");
1177 BUG();
1178 return;
1179 }
1180}
1181
1182static void *packet_previous_frame(struct packet_sock *po,
1183 struct packet_ring_buffer *rb,
1184 int status)
1185{
1186 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1187 return packet_lookup_frame(po, rb, previous, status);
1188}
1189
1190static void packet_increment_head(struct packet_ring_buffer *buff)
1191{
1192 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1193}
1194
1195static void packet_inc_pending(struct packet_ring_buffer *rb)
1196{
1197 this_cpu_inc(*rb->pending_refcnt);
1198}
1199
1200static void packet_dec_pending(struct packet_ring_buffer *rb)
1201{
1202 this_cpu_dec(*rb->pending_refcnt);
1203}
1204
1205static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1206{
1207 unsigned int refcnt = 0;
1208 int cpu;
1209
1210
1211 if (rb->pending_refcnt == NULL)
1212 return 0;
1213
1214 for_each_possible_cpu(cpu)
1215 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1216
1217 return refcnt;
1218}
1219
1220static int packet_alloc_pending(struct packet_sock *po)
1221{
1222 po->rx_ring.pending_refcnt = NULL;
1223
1224 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1225 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1226 return -ENOBUFS;
1227
1228 return 0;
1229}
1230
1231static void packet_free_pending(struct packet_sock *po)
1232{
1233 free_percpu(po->tx_ring.pending_refcnt);
1234}
1235
1236#define ROOM_POW_OFF 2
1237#define ROOM_NONE 0x0
1238#define ROOM_LOW 0x1
1239#define ROOM_NORMAL 0x2
1240
1241static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
1242{
1243 int idx, len;
1244
1245 len = po->rx_ring.frame_max + 1;
1246 idx = po->rx_ring.head;
1247 if (pow_off)
1248 idx += len >> pow_off;
1249 if (idx >= len)
1250 idx -= len;
1251 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1252}
1253
1254static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
1255{
1256 int idx, len;
1257
1258 len = po->rx_ring.prb_bdqc.knum_blocks;
1259 idx = po->rx_ring.prb_bdqc.kactive_blk_num;
1260 if (pow_off)
1261 idx += len >> pow_off;
1262 if (idx >= len)
1263 idx -= len;
1264 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1265}
1266
1267static int __packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1268{
1269 struct sock *sk = &po->sk;
1270 int ret = ROOM_NONE;
1271
1272 if (po->prot_hook.func != tpacket_rcv) {
1273 int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1274 - (skb ? skb->truesize : 0);
1275 if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
1276 return ROOM_NORMAL;
1277 else if (avail > 0)
1278 return ROOM_LOW;
1279 else
1280 return ROOM_NONE;
1281 }
1282
1283 if (po->tp_version == TPACKET_V3) {
1284 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1285 ret = ROOM_NORMAL;
1286 else if (__tpacket_v3_has_room(po, 0))
1287 ret = ROOM_LOW;
1288 } else {
1289 if (__tpacket_has_room(po, ROOM_POW_OFF))
1290 ret = ROOM_NORMAL;
1291 else if (__tpacket_has_room(po, 0))
1292 ret = ROOM_LOW;
1293 }
1294
1295 return ret;
1296}
1297
1298static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1299{
1300 int ret;
1301 bool has_room;
1302
1303 spin_lock_bh(&po->sk.sk_receive_queue.lock);
1304 ret = __packet_rcv_has_room(po, skb);
1305 has_room = ret == ROOM_NORMAL;
1306 if (po->pressure == has_room)
1307 po->pressure = !has_room;
1308 spin_unlock_bh(&po->sk.sk_receive_queue.lock);
1309
1310 return ret;
1311}
1312
1313static void packet_sock_destruct(struct sock *sk)
1314{
1315 skb_queue_purge(&sk->sk_error_queue);
1316
1317 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1318 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1319
1320 if (!sock_flag(sk, SOCK_DEAD)) {
1321 pr_err("Attempt to release alive packet socket: %p\n", sk);
1322 return;
1323 }
1324
1325 sk_refcnt_debug_dec(sk);
1326}
1327
1328static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1329{
1330 u32 rxhash;
1331 int i, count = 0;
1332
1333 rxhash = skb_get_hash(skb);
1334 for (i = 0; i < ROLLOVER_HLEN; i++)
1335 if (po->rollover->history[i] == rxhash)
1336 count++;
1337
1338 po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash;
1339 return count > (ROLLOVER_HLEN >> 1);
1340}
1341
1342static unsigned int fanout_demux_hash(struct packet_fanout *f,
1343 struct sk_buff *skb,
1344 unsigned int num)
1345{
1346 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1347}
1348
1349static unsigned int fanout_demux_lb(struct packet_fanout *f,
1350 struct sk_buff *skb,
1351 unsigned int num)
1352{
1353 unsigned int val = atomic_inc_return(&f->rr_cur);
1354
1355 return val % num;
1356}
1357
1358static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1359 struct sk_buff *skb,
1360 unsigned int num)
1361{
1362 return smp_processor_id() % num;
1363}
1364
1365static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1366 struct sk_buff *skb,
1367 unsigned int num)
1368{
1369 return prandom_u32_max(num);
1370}
1371
1372static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1373 struct sk_buff *skb,
1374 unsigned int idx, bool try_self,
1375 unsigned int num)
1376{
1377 struct packet_sock *po, *po_next, *po_skip = NULL;
1378 unsigned int i, j, room = ROOM_NONE;
1379
1380 po = pkt_sk(f->arr[idx]);
1381
1382 if (try_self) {
1383 room = packet_rcv_has_room(po, skb);
1384 if (room == ROOM_NORMAL ||
1385 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1386 return idx;
1387 po_skip = po;
1388 }
1389
1390 i = j = min_t(int, po->rollover->sock, num - 1);
1391 do {
1392 po_next = pkt_sk(f->arr[i]);
1393 if (po_next != po_skip && !po_next->pressure &&
1394 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1395 if (i != j)
1396 po->rollover->sock = i;
1397 atomic_long_inc(&po->rollover->num);
1398 if (room == ROOM_LOW)
1399 atomic_long_inc(&po->rollover->num_huge);
1400 return i;
1401 }
1402
1403 if (++i == num)
1404 i = 0;
1405 } while (i != j);
1406
1407 atomic_long_inc(&po->rollover->num_failed);
1408 return idx;
1409}
1410
1411static unsigned int fanout_demux_qm(struct packet_fanout *f,
1412 struct sk_buff *skb,
1413 unsigned int num)
1414{
1415 return skb_get_queue_mapping(skb) % num;
1416}
1417
1418static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1419 struct sk_buff *skb,
1420 unsigned int num)
1421{
1422 struct bpf_prog *prog;
1423 unsigned int ret = 0;
1424
1425 rcu_read_lock();
1426 prog = rcu_dereference(f->bpf_prog);
1427 if (prog)
1428 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1429 rcu_read_unlock();
1430
1431 return ret;
1432}
1433
1434static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1435{
1436 return f->flags & (flag >> 8);
1437}
1438
1439static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1440 struct packet_type *pt, struct net_device *orig_dev)
1441{
1442 struct packet_fanout *f = pt->af_packet_priv;
1443 unsigned int num = READ_ONCE(f->num_members);
1444 struct net *net = read_pnet(&f->net);
1445 struct packet_sock *po;
1446 unsigned int idx;
1447
1448 if (!net_eq(dev_net(dev), net) || !num) {
1449 kfree_skb(skb);
1450 return 0;
1451 }
1452
1453 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1454 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1455 if (!skb)
1456 return 0;
1457 }
1458 switch (f->type) {
1459 case PACKET_FANOUT_HASH:
1460 default:
1461 idx = fanout_demux_hash(f, skb, num);
1462 break;
1463 case PACKET_FANOUT_LB:
1464 idx = fanout_demux_lb(f, skb, num);
1465 break;
1466 case PACKET_FANOUT_CPU:
1467 idx = fanout_demux_cpu(f, skb, num);
1468 break;
1469 case PACKET_FANOUT_RND:
1470 idx = fanout_demux_rnd(f, skb, num);
1471 break;
1472 case PACKET_FANOUT_QM:
1473 idx = fanout_demux_qm(f, skb, num);
1474 break;
1475 case PACKET_FANOUT_ROLLOVER:
1476 idx = fanout_demux_rollover(f, skb, 0, false, num);
1477 break;
1478 case PACKET_FANOUT_CBPF:
1479 case PACKET_FANOUT_EBPF:
1480 idx = fanout_demux_bpf(f, skb, num);
1481 break;
1482 }
1483
1484 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1485 idx = fanout_demux_rollover(f, skb, idx, true, num);
1486
1487 po = pkt_sk(f->arr[idx]);
1488 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1489}
1490
1491DEFINE_MUTEX(fanout_mutex);
1492EXPORT_SYMBOL_GPL(fanout_mutex);
1493static LIST_HEAD(fanout_list);
1494static u16 fanout_next_id;
1495
1496static void __fanout_link(struct sock *sk, struct packet_sock *po)
1497{
1498 struct packet_fanout *f = po->fanout;
1499
1500 spin_lock(&f->lock);
1501 f->arr[f->num_members] = sk;
1502 smp_wmb();
1503 f->num_members++;
1504 if (f->num_members == 1)
1505 dev_add_pack(&f->prot_hook);
1506 spin_unlock(&f->lock);
1507}
1508
1509static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1510{
1511 struct packet_fanout *f = po->fanout;
1512 int i;
1513
1514 spin_lock(&f->lock);
1515 for (i = 0; i < f->num_members; i++) {
1516 if (f->arr[i] == sk)
1517 break;
1518 }
1519 BUG_ON(i >= f->num_members);
1520 f->arr[i] = f->arr[f->num_members - 1];
1521 f->num_members--;
1522 if (f->num_members == 0)
1523 __dev_remove_pack(&f->prot_hook);
1524 spin_unlock(&f->lock);
1525}
1526
1527static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1528{
1529 if (sk->sk_family != PF_PACKET)
1530 return false;
1531
1532 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1533}
1534
1535static void fanout_init_data(struct packet_fanout *f)
1536{
1537 switch (f->type) {
1538 case PACKET_FANOUT_LB:
1539 atomic_set(&f->rr_cur, 0);
1540 break;
1541 case PACKET_FANOUT_CBPF:
1542 case PACKET_FANOUT_EBPF:
1543 RCU_INIT_POINTER(f->bpf_prog, NULL);
1544 break;
1545 }
1546}
1547
1548static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1549{
1550 struct bpf_prog *old;
1551
1552 spin_lock(&f->lock);
1553 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1554 rcu_assign_pointer(f->bpf_prog, new);
1555 spin_unlock(&f->lock);
1556
1557 if (old) {
1558 synchronize_net();
1559 bpf_prog_destroy(old);
1560 }
1561}
1562
1563static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
1564 unsigned int len)
1565{
1566 struct bpf_prog *new;
1567 struct sock_fprog fprog;
1568 int ret;
1569
1570 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1571 return -EPERM;
1572 if (len != sizeof(fprog))
1573 return -EINVAL;
1574 if (copy_from_user(&fprog, data, len))
1575 return -EFAULT;
1576
1577 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1578 if (ret)
1579 return ret;
1580
1581 __fanout_set_data_bpf(po->fanout, new);
1582 return 0;
1583}
1584
1585static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
1586 unsigned int len)
1587{
1588 struct bpf_prog *new;
1589 u32 fd;
1590
1591 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1592 return -EPERM;
1593 if (len != sizeof(fd))
1594 return -EINVAL;
1595 if (copy_from_user(&fd, data, len))
1596 return -EFAULT;
1597
1598 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1599 if (IS_ERR(new))
1600 return PTR_ERR(new);
1601
1602 __fanout_set_data_bpf(po->fanout, new);
1603 return 0;
1604}
1605
1606static int fanout_set_data(struct packet_sock *po, char __user *data,
1607 unsigned int len)
1608{
1609 switch (po->fanout->type) {
1610 case PACKET_FANOUT_CBPF:
1611 return fanout_set_data_cbpf(po, data, len);
1612 case PACKET_FANOUT_EBPF:
1613 return fanout_set_data_ebpf(po, data, len);
1614 default:
1615 return -EINVAL;
1616 };
1617}
1618
1619static void fanout_release_data(struct packet_fanout *f)
1620{
1621 switch (f->type) {
1622 case PACKET_FANOUT_CBPF:
1623 case PACKET_FANOUT_EBPF:
1624 __fanout_set_data_bpf(f, NULL);
1625 };
1626}
1627
1628static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1629{
1630 struct packet_fanout *f;
1631
1632 list_for_each_entry(f, &fanout_list, list) {
1633 if (f->id == candidate_id &&
1634 read_pnet(&f->net) == sock_net(sk)) {
1635 return false;
1636 }
1637 }
1638 return true;
1639}
1640
1641static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1642{
1643 u16 id = fanout_next_id;
1644
1645 do {
1646 if (__fanout_id_is_free(sk, id)) {
1647 *new_id = id;
1648 fanout_next_id = id + 1;
1649 return true;
1650 }
1651
1652 id++;
1653 } while (id != fanout_next_id);
1654
1655 return false;
1656}
1657
1658static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1659{
1660 struct packet_rollover *rollover = NULL;
1661 struct packet_sock *po = pkt_sk(sk);
1662 struct packet_fanout *f, *match;
1663 u8 type = type_flags & 0xff;
1664 u8 flags = type_flags >> 8;
1665 int err;
1666
1667 switch (type) {
1668 case PACKET_FANOUT_ROLLOVER:
1669 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1670 return -EINVAL;
1671 case PACKET_FANOUT_HASH:
1672 case PACKET_FANOUT_LB:
1673 case PACKET_FANOUT_CPU:
1674 case PACKET_FANOUT_RND:
1675 case PACKET_FANOUT_QM:
1676 case PACKET_FANOUT_CBPF:
1677 case PACKET_FANOUT_EBPF:
1678 break;
1679 default:
1680 return -EINVAL;
1681 }
1682
1683 mutex_lock(&fanout_mutex);
1684
1685 err = -EALREADY;
1686 if (po->fanout)
1687 goto out;
1688
1689 if (type == PACKET_FANOUT_ROLLOVER ||
1690 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1691 err = -ENOMEM;
1692 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1693 if (!rollover)
1694 goto out;
1695 atomic_long_set(&rollover->num, 0);
1696 atomic_long_set(&rollover->num_huge, 0);
1697 atomic_long_set(&rollover->num_failed, 0);
1698 }
1699
1700 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1701 if (id != 0) {
1702 err = -EINVAL;
1703 goto out;
1704 }
1705 if (!fanout_find_new_id(sk, &id)) {
1706 err = -ENOMEM;
1707 goto out;
1708 }
1709
1710 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1711 }
1712
1713 match = NULL;
1714 list_for_each_entry(f, &fanout_list, list) {
1715 if (f->id == id &&
1716 read_pnet(&f->net) == sock_net(sk)) {
1717 match = f;
1718 break;
1719 }
1720 }
1721 err = -EINVAL;
1722 if (match && match->flags != flags)
1723 goto out;
1724 if (!match) {
1725 err = -ENOMEM;
1726 match = kzalloc(sizeof(*match), GFP_KERNEL);
1727 if (!match)
1728 goto out;
1729 write_pnet(&match->net, sock_net(sk));
1730 match->id = id;
1731 match->type = type;
1732 match->flags = flags;
1733 INIT_LIST_HEAD(&match->list);
1734 spin_lock_init(&match->lock);
1735 refcount_set(&match->sk_ref, 0);
1736 fanout_init_data(match);
1737 match->prot_hook.type = po->prot_hook.type;
1738 match->prot_hook.dev = po->prot_hook.dev;
1739 match->prot_hook.func = packet_rcv_fanout;
1740 match->prot_hook.af_packet_priv = match;
1741 match->prot_hook.id_match = match_fanout_group;
1742 list_add(&match->list, &fanout_list);
1743 }
1744 err = -EINVAL;
1745
1746 spin_lock(&po->bind_lock);
1747 if (po->running &&
1748 match->type == type &&
1749 match->prot_hook.type == po->prot_hook.type &&
1750 match->prot_hook.dev == po->prot_hook.dev) {
1751 err = -ENOSPC;
1752 if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1753 __dev_remove_pack(&po->prot_hook);
1754 po->fanout = match;
1755 po->rollover = rollover;
1756 rollover = NULL;
1757 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1758 __fanout_link(sk, po);
1759 err = 0;
1760 }
1761 }
1762 spin_unlock(&po->bind_lock);
1763
1764 if (err && !refcount_read(&match->sk_ref)) {
1765 list_del(&match->list);
1766 kfree(match);
1767 }
1768
1769out:
1770 kfree(rollover);
1771 mutex_unlock(&fanout_mutex);
1772 return err;
1773}
1774
1775
1776
1777
1778
1779
1780static struct packet_fanout *fanout_release(struct sock *sk)
1781{
1782 struct packet_sock *po = pkt_sk(sk);
1783 struct packet_fanout *f;
1784
1785 mutex_lock(&fanout_mutex);
1786 f = po->fanout;
1787 if (f) {
1788 po->fanout = NULL;
1789
1790 if (refcount_dec_and_test(&f->sk_ref))
1791 list_del(&f->list);
1792 else
1793 f = NULL;
1794 }
1795 mutex_unlock(&fanout_mutex);
1796
1797 return f;
1798}
1799
1800static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1801 struct sk_buff *skb)
1802{
1803
1804
1805
1806
1807 if (unlikely(dev->type != ARPHRD_ETHER))
1808 return false;
1809
1810 skb_reset_mac_header(skb);
1811 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1812}
1813
1814static const struct proto_ops packet_ops;
1815
1816static const struct proto_ops packet_ops_spkt;
1817
1818static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1819 struct packet_type *pt, struct net_device *orig_dev)
1820{
1821 struct sock *sk;
1822 struct sockaddr_pkt *spkt;
1823
1824
1825
1826
1827
1828
1829 sk = pt->af_packet_priv;
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842 if (skb->pkt_type == PACKET_LOOPBACK)
1843 goto out;
1844
1845 if (!net_eq(dev_net(dev), sock_net(sk)))
1846 goto out;
1847
1848 skb = skb_share_check(skb, GFP_ATOMIC);
1849 if (skb == NULL)
1850 goto oom;
1851
1852
1853 skb_dst_drop(skb);
1854
1855
1856 nf_reset(skb);
1857
1858 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1859
1860 skb_push(skb, skb->data - skb_mac_header(skb));
1861
1862
1863
1864
1865
1866 spkt->spkt_family = dev->type;
1867 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1868 spkt->spkt_protocol = skb->protocol;
1869
1870
1871
1872
1873
1874
1875 if (sock_queue_rcv_skb(sk, skb) == 0)
1876 return 0;
1877
1878out:
1879 kfree_skb(skb);
1880oom:
1881 return 0;
1882}
1883
1884
1885
1886
1887
1888
1889
1890static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1891 size_t len)
1892{
1893 struct sock *sk = sock->sk;
1894 DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1895 struct sk_buff *skb = NULL;
1896 struct net_device *dev;
1897 struct sockcm_cookie sockc;
1898 __be16 proto = 0;
1899 int err;
1900 int extra_len = 0;
1901
1902
1903
1904
1905
1906 if (saddr) {
1907 if (msg->msg_namelen < sizeof(struct sockaddr))
1908 return -EINVAL;
1909 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1910 proto = saddr->spkt_protocol;
1911 } else
1912 return -ENOTCONN;
1913
1914
1915
1916
1917
1918 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1919retry:
1920 rcu_read_lock();
1921 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1922 err = -ENODEV;
1923 if (dev == NULL)
1924 goto out_unlock;
1925
1926 err = -ENETDOWN;
1927 if (!(dev->flags & IFF_UP))
1928 goto out_unlock;
1929
1930
1931
1932
1933
1934
1935 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1936 if (!netif_supports_nofcs(dev)) {
1937 err = -EPROTONOSUPPORT;
1938 goto out_unlock;
1939 }
1940 extra_len = 4;
1941 }
1942
1943 err = -EMSGSIZE;
1944 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1945 goto out_unlock;
1946
1947 if (!skb) {
1948 size_t reserved = LL_RESERVED_SPACE(dev);
1949 int tlen = dev->needed_tailroom;
1950 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1951
1952 rcu_read_unlock();
1953 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1954 if (skb == NULL)
1955 return -ENOBUFS;
1956
1957
1958
1959
1960 skb_reserve(skb, reserved);
1961 skb_reset_network_header(skb);
1962
1963
1964 if (hhlen) {
1965 skb->data -= hhlen;
1966 skb->tail -= hhlen;
1967 if (len < hhlen)
1968 skb_reset_network_header(skb);
1969 }
1970 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1971 if (err)
1972 goto out_free;
1973 goto retry;
1974 }
1975
1976 if (!dev_validate_header(dev, skb->data, len)) {
1977 err = -EINVAL;
1978 goto out_unlock;
1979 }
1980 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1981 !packet_extra_vlan_len_allowed(dev, skb)) {
1982 err = -EMSGSIZE;
1983 goto out_unlock;
1984 }
1985
1986 sockc.tsflags = sk->sk_tsflags;
1987 if (msg->msg_controllen) {
1988 err = sock_cmsg_send(sk, msg, &sockc);
1989 if (unlikely(err))
1990 goto out_unlock;
1991 }
1992
1993 skb->protocol = proto;
1994 skb->dev = dev;
1995 skb->priority = sk->sk_priority;
1996 skb->mark = sk->sk_mark;
1997
1998 sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
1999
2000 if (unlikely(extra_len == 4))
2001 skb->no_fcs = 1;
2002
2003 skb_probe_transport_header(skb, 0);
2004
2005 dev_queue_xmit(skb);
2006 rcu_read_unlock();
2007 return len;
2008
2009out_unlock:
2010 rcu_read_unlock();
2011out_free:
2012 kfree_skb(skb);
2013 return err;
2014}
2015
2016static unsigned int run_filter(struct sk_buff *skb,
2017 const struct sock *sk,
2018 unsigned int res)
2019{
2020 struct sk_filter *filter;
2021
2022 rcu_read_lock();
2023 filter = rcu_dereference(sk->sk_filter);
2024 if (filter != NULL)
2025 res = bpf_prog_run_clear_cb(filter->prog, skb);
2026 rcu_read_unlock();
2027
2028 return res;
2029}
2030
2031static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2032 size_t *len)
2033{
2034 struct virtio_net_hdr vnet_hdr;
2035
2036 if (*len < sizeof(vnet_hdr))
2037 return -EINVAL;
2038 *len -= sizeof(vnet_hdr);
2039
2040 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true))
2041 return -EINVAL;
2042
2043 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2044}
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2059 struct packet_type *pt, struct net_device *orig_dev)
2060{
2061 struct sock *sk;
2062 struct sockaddr_ll *sll;
2063 struct packet_sock *po;
2064 u8 *skb_head = skb->data;
2065 int skb_len = skb->len;
2066 unsigned int snaplen, res;
2067 bool is_drop_n_account = false;
2068
2069 if (skb->pkt_type == PACKET_LOOPBACK)
2070 goto drop;
2071
2072 sk = pt->af_packet_priv;
2073 po = pkt_sk(sk);
2074
2075 if (!net_eq(dev_net(dev), sock_net(sk)))
2076 goto drop;
2077
2078 skb->dev = dev;
2079
2080 if (dev->header_ops) {
2081
2082
2083
2084
2085
2086
2087
2088 if (sk->sk_type != SOCK_DGRAM)
2089 skb_push(skb, skb->data - skb_mac_header(skb));
2090 else if (skb->pkt_type == PACKET_OUTGOING) {
2091
2092 skb_pull(skb, skb_network_offset(skb));
2093 }
2094 }
2095
2096 snaplen = skb->len;
2097
2098 res = run_filter(skb, sk, snaplen);
2099 if (!res)
2100 goto drop_n_restore;
2101 if (snaplen > res)
2102 snaplen = res;
2103
2104 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2105 goto drop_n_acct;
2106
2107 if (skb_shared(skb)) {
2108 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2109 if (nskb == NULL)
2110 goto drop_n_acct;
2111
2112 if (skb_head != skb->data) {
2113 skb->data = skb_head;
2114 skb->len = skb_len;
2115 }
2116 consume_skb(skb);
2117 skb = nskb;
2118 }
2119
2120 sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2121
2122 sll = &PACKET_SKB_CB(skb)->sa.ll;
2123 sll->sll_hatype = dev->type;
2124 sll->sll_pkttype = skb->pkt_type;
2125 if (unlikely(po->origdev))
2126 sll->sll_ifindex = orig_dev->ifindex;
2127 else
2128 sll->sll_ifindex = dev->ifindex;
2129
2130 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2131
2132
2133
2134
2135 PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2136
2137 if (pskb_trim(skb, snaplen))
2138 goto drop_n_acct;
2139
2140 skb_set_owner_r(skb, sk);
2141 skb->dev = NULL;
2142 skb_dst_drop(skb);
2143
2144
2145 nf_reset(skb);
2146
2147 spin_lock(&sk->sk_receive_queue.lock);
2148 po->stats.stats1.tp_packets++;
2149 sock_skb_set_dropcount(sk, skb);
2150 __skb_queue_tail(&sk->sk_receive_queue, skb);
2151 spin_unlock(&sk->sk_receive_queue.lock);
2152 sk->sk_data_ready(sk);
2153 return 0;
2154
2155drop_n_acct:
2156 is_drop_n_account = true;
2157 spin_lock(&sk->sk_receive_queue.lock);
2158 po->stats.stats1.tp_drops++;
2159 atomic_inc(&sk->sk_drops);
2160 spin_unlock(&sk->sk_receive_queue.lock);
2161
2162drop_n_restore:
2163 if (skb_head != skb->data && skb_shared(skb)) {
2164 skb->data = skb_head;
2165 skb->len = skb_len;
2166 }
2167drop:
2168 if (!is_drop_n_account)
2169 consume_skb(skb);
2170 else
2171 kfree_skb(skb);
2172 return 0;
2173}
2174
2175static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2176 struct packet_type *pt, struct net_device *orig_dev)
2177{
2178 struct sock *sk;
2179 struct packet_sock *po;
2180 struct sockaddr_ll *sll;
2181 union tpacket_uhdr h;
2182 u8 *skb_head = skb->data;
2183 int skb_len = skb->len;
2184 unsigned int snaplen, res;
2185 unsigned long status = TP_STATUS_USER;
2186 unsigned short macoff, netoff, hdrlen;
2187 struct sk_buff *copy_skb = NULL;
2188 struct timespec ts;
2189 __u32 ts_status;
2190 bool is_drop_n_account = false;
2191 bool do_vnet = false;
2192
2193
2194
2195
2196
2197 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2198 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2199
2200 if (skb->pkt_type == PACKET_LOOPBACK)
2201 goto drop;
2202
2203 sk = pt->af_packet_priv;
2204 po = pkt_sk(sk);
2205
2206 if (!net_eq(dev_net(dev), sock_net(sk)))
2207 goto drop;
2208
2209 if (dev->header_ops) {
2210 if (sk->sk_type != SOCK_DGRAM)
2211 skb_push(skb, skb->data - skb_mac_header(skb));
2212 else if (skb->pkt_type == PACKET_OUTGOING) {
2213
2214 skb_pull(skb, skb_network_offset(skb));
2215 }
2216 }
2217
2218 snaplen = skb->len;
2219
2220 res = run_filter(skb, sk, snaplen);
2221 if (!res)
2222 goto drop_n_restore;
2223
2224 if (skb->ip_summed == CHECKSUM_PARTIAL)
2225 status |= TP_STATUS_CSUMNOTREADY;
2226 else if (skb->pkt_type != PACKET_OUTGOING &&
2227 (skb->ip_summed == CHECKSUM_COMPLETE ||
2228 skb_csum_unnecessary(skb)))
2229 status |= TP_STATUS_CSUM_VALID;
2230
2231 if (snaplen > res)
2232 snaplen = res;
2233
2234 if (sk->sk_type == SOCK_DGRAM) {
2235 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2236 po->tp_reserve;
2237 } else {
2238 unsigned int maclen = skb_network_offset(skb);
2239 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2240 (maclen < 16 ? 16 : maclen)) +
2241 po->tp_reserve;
2242 if (po->has_vnet_hdr) {
2243 netoff += sizeof(struct virtio_net_hdr);
2244 do_vnet = true;
2245 }
2246 macoff = netoff - maclen;
2247 }
2248 if (po->tp_version <= TPACKET_V2) {
2249 if (macoff + snaplen > po->rx_ring.frame_size) {
2250 if (po->copy_thresh &&
2251 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2252 if (skb_shared(skb)) {
2253 copy_skb = skb_clone(skb, GFP_ATOMIC);
2254 } else {
2255 copy_skb = skb_get(skb);
2256 skb_head = skb->data;
2257 }
2258 if (copy_skb)
2259 skb_set_owner_r(copy_skb, sk);
2260 }
2261 snaplen = po->rx_ring.frame_size - macoff;
2262 if ((int)snaplen < 0) {
2263 snaplen = 0;
2264 do_vnet = false;
2265 }
2266 }
2267 } else if (unlikely(macoff + snaplen >
2268 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2269 u32 nval;
2270
2271 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2272 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2273 snaplen, nval, macoff);
2274 snaplen = nval;
2275 if (unlikely((int)snaplen < 0)) {
2276 snaplen = 0;
2277 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2278 do_vnet = false;
2279 }
2280 }
2281 spin_lock(&sk->sk_receive_queue.lock);
2282 h.raw = packet_current_rx_frame(po, skb,
2283 TP_STATUS_KERNEL, (macoff+snaplen));
2284 if (!h.raw)
2285 goto drop_n_account;
2286 if (po->tp_version <= TPACKET_V2) {
2287 packet_increment_rx_head(po, &po->rx_ring);
2288
2289
2290
2291
2292
2293
2294 if (po->stats.stats1.tp_drops)
2295 status |= TP_STATUS_LOSING;
2296 }
2297 po->stats.stats1.tp_packets++;
2298 if (copy_skb) {
2299 status |= TP_STATUS_COPY;
2300 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2301 }
2302 spin_unlock(&sk->sk_receive_queue.lock);
2303
2304 if (do_vnet) {
2305 if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
2306 sizeof(struct virtio_net_hdr),
2307 vio_le(), true)) {
2308 spin_lock(&sk->sk_receive_queue.lock);
2309 goto drop_n_account;
2310 }
2311 }
2312
2313 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2314
2315 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2316 getnstimeofday(&ts);
2317
2318 status |= ts_status;
2319
2320 switch (po->tp_version) {
2321 case TPACKET_V1:
2322 h.h1->tp_len = skb->len;
2323 h.h1->tp_snaplen = snaplen;
2324 h.h1->tp_mac = macoff;
2325 h.h1->tp_net = netoff;
2326 h.h1->tp_sec = ts.tv_sec;
2327 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2328 hdrlen = sizeof(*h.h1);
2329 break;
2330 case TPACKET_V2:
2331 h.h2->tp_len = skb->len;
2332 h.h2->tp_snaplen = snaplen;
2333 h.h2->tp_mac = macoff;
2334 h.h2->tp_net = netoff;
2335 h.h2->tp_sec = ts.tv_sec;
2336 h.h2->tp_nsec = ts.tv_nsec;
2337 if (skb_vlan_tag_present(skb)) {
2338 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2339 h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2340 status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2341 } else {
2342 h.h2->tp_vlan_tci = 0;
2343 h.h2->tp_vlan_tpid = 0;
2344 }
2345 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2346 hdrlen = sizeof(*h.h2);
2347 break;
2348 case TPACKET_V3:
2349
2350
2351
2352 h.h3->tp_status |= status;
2353 h.h3->tp_len = skb->len;
2354 h.h3->tp_snaplen = snaplen;
2355 h.h3->tp_mac = macoff;
2356 h.h3->tp_net = netoff;
2357 h.h3->tp_sec = ts.tv_sec;
2358 h.h3->tp_nsec = ts.tv_nsec;
2359 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2360 hdrlen = sizeof(*h.h3);
2361 break;
2362 default:
2363 BUG();
2364 }
2365
2366 sll = h.raw + TPACKET_ALIGN(hdrlen);
2367 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2368 sll->sll_family = AF_PACKET;
2369 sll->sll_hatype = dev->type;
2370 sll->sll_protocol = skb->protocol;
2371 sll->sll_pkttype = skb->pkt_type;
2372 if (unlikely(po->origdev))
2373 sll->sll_ifindex = orig_dev->ifindex;
2374 else
2375 sll->sll_ifindex = dev->ifindex;
2376
2377 smp_mb();
2378
2379#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2380 if (po->tp_version <= TPACKET_V2) {
2381 u8 *start, *end;
2382
2383 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2384 macoff + snaplen);
2385
2386 for (start = h.raw; start < end; start += PAGE_SIZE)
2387 flush_dcache_page(pgv_to_page(start));
2388 }
2389 smp_wmb();
2390#endif
2391
2392 if (po->tp_version <= TPACKET_V2) {
2393 __packet_set_status(po, h.raw, status);
2394 sk->sk_data_ready(sk);
2395 } else {
2396 prb_clear_blk_fill_status(&po->rx_ring);
2397 }
2398
2399drop_n_restore:
2400 if (skb_head != skb->data && skb_shared(skb)) {
2401 skb->data = skb_head;
2402 skb->len = skb_len;
2403 }
2404drop:
2405 if (!is_drop_n_account)
2406 consume_skb(skb);
2407 else
2408 kfree_skb(skb);
2409 return 0;
2410
2411drop_n_account:
2412 is_drop_n_account = true;
2413 po->stats.stats1.tp_drops++;
2414 spin_unlock(&sk->sk_receive_queue.lock);
2415
2416 sk->sk_data_ready(sk);
2417 kfree_skb(copy_skb);
2418 goto drop_n_restore;
2419}
2420
2421static void tpacket_destruct_skb(struct sk_buff *skb)
2422{
2423 struct packet_sock *po = pkt_sk(skb->sk);
2424
2425 if (likely(po->tx_ring.pg_vec)) {
2426 void *ph;
2427 __u32 ts;
2428
2429 ph = skb_shinfo(skb)->destructor_arg;
2430 packet_dec_pending(&po->tx_ring);
2431
2432 ts = __packet_set_timestamp(po, ph, skb);
2433 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2434 }
2435
2436 sock_wfree(skb);
2437}
2438
2439static void tpacket_set_protocol(const struct net_device *dev,
2440 struct sk_buff *skb)
2441{
2442 if (dev->type == ARPHRD_ETHER) {
2443 skb_reset_mac_header(skb);
2444 skb->protocol = eth_hdr(skb)->h_proto;
2445 }
2446}
2447
2448static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2449{
2450 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2451 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2452 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2453 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2454 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2455 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2456 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2457
2458 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2459 return -EINVAL;
2460
2461 return 0;
2462}
2463
2464static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2465 struct virtio_net_hdr *vnet_hdr)
2466{
2467 if (*len < sizeof(*vnet_hdr))
2468 return -EINVAL;
2469 *len -= sizeof(*vnet_hdr);
2470
2471 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2472 return -EFAULT;
2473
2474 return __packet_snd_vnet_parse(vnet_hdr, *len);
2475}
2476
2477static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2478 void *frame, struct net_device *dev, void *data, int tp_len,
2479 __be16 proto, unsigned char *addr, int hlen, int copylen,
2480 const struct sockcm_cookie *sockc)
2481{
2482 union tpacket_uhdr ph;
2483 int to_write, offset, len, nr_frags, len_max;
2484 struct socket *sock = po->sk.sk_socket;
2485 struct page *page;
2486 int err;
2487
2488 ph.raw = frame;
2489
2490 skb->protocol = proto;
2491 skb->dev = dev;
2492 skb->priority = po->sk.sk_priority;
2493 skb->mark = po->sk.sk_mark;
2494 sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
2495 skb_shinfo(skb)->destructor_arg = ph.raw;
2496
2497 skb_reserve(skb, hlen);
2498 skb_reset_network_header(skb);
2499
2500 to_write = tp_len;
2501
2502 if (sock->type == SOCK_DGRAM) {
2503 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2504 NULL, tp_len);
2505 if (unlikely(err < 0))
2506 return -EINVAL;
2507 } else if (copylen) {
2508 int hdrlen = min_t(int, copylen, tp_len);
2509
2510 skb_push(skb, dev->hard_header_len);
2511 skb_put(skb, copylen - dev->hard_header_len);
2512 err = skb_store_bits(skb, 0, data, hdrlen);
2513 if (unlikely(err))
2514 return err;
2515 if (!dev_validate_header(dev, skb->data, hdrlen))
2516 return -EINVAL;
2517 if (!skb->protocol)
2518 tpacket_set_protocol(dev, skb);
2519
2520 data += hdrlen;
2521 to_write -= hdrlen;
2522 }
2523
2524 offset = offset_in_page(data);
2525 len_max = PAGE_SIZE - offset;
2526 len = ((to_write > len_max) ? len_max : to_write);
2527
2528 skb->data_len = to_write;
2529 skb->len += to_write;
2530 skb->truesize += to_write;
2531 refcount_add(to_write, &po->sk.sk_wmem_alloc);
2532
2533 while (likely(to_write)) {
2534 nr_frags = skb_shinfo(skb)->nr_frags;
2535
2536 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2537 pr_err("Packet exceed the number of skb frags(%lu)\n",
2538 MAX_SKB_FRAGS);
2539 return -EFAULT;
2540 }
2541
2542 page = pgv_to_page(data);
2543 data += len;
2544 flush_dcache_page(page);
2545 get_page(page);
2546 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2547 to_write -= len;
2548 offset = 0;
2549 len_max = PAGE_SIZE;
2550 len = ((to_write > len_max) ? len_max : to_write);
2551 }
2552
2553 skb_probe_transport_header(skb, 0);
2554
2555 return tp_len;
2556}
2557
2558static int tpacket_parse_header(struct packet_sock *po, void *frame,
2559 int size_max, void **data)
2560{
2561 union tpacket_uhdr ph;
2562 int tp_len, off;
2563
2564 ph.raw = frame;
2565
2566 switch (po->tp_version) {
2567 case TPACKET_V3:
2568 if (ph.h3->tp_next_offset != 0) {
2569 pr_warn_once("variable sized slot not supported");
2570 return -EINVAL;
2571 }
2572 tp_len = ph.h3->tp_len;
2573 break;
2574 case TPACKET_V2:
2575 tp_len = ph.h2->tp_len;
2576 break;
2577 default:
2578 tp_len = ph.h1->tp_len;
2579 break;
2580 }
2581 if (unlikely(tp_len > size_max)) {
2582 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2583 return -EMSGSIZE;
2584 }
2585
2586 if (unlikely(po->tp_tx_has_off)) {
2587 int off_min, off_max;
2588
2589 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2590 off_max = po->tx_ring.frame_size - tp_len;
2591 if (po->sk.sk_type == SOCK_DGRAM) {
2592 switch (po->tp_version) {
2593 case TPACKET_V3:
2594 off = ph.h3->tp_net;
2595 break;
2596 case TPACKET_V2:
2597 off = ph.h2->tp_net;
2598 break;
2599 default:
2600 off = ph.h1->tp_net;
2601 break;
2602 }
2603 } else {
2604 switch (po->tp_version) {
2605 case TPACKET_V3:
2606 off = ph.h3->tp_mac;
2607 break;
2608 case TPACKET_V2:
2609 off = ph.h2->tp_mac;
2610 break;
2611 default:
2612 off = ph.h1->tp_mac;
2613 break;
2614 }
2615 }
2616 if (unlikely((off < off_min) || (off_max < off)))
2617 return -EINVAL;
2618 } else {
2619 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2620 }
2621
2622 *data = frame + off;
2623 return tp_len;
2624}
2625
2626static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2627{
2628 struct sk_buff *skb;
2629 struct net_device *dev;
2630 struct virtio_net_hdr *vnet_hdr = NULL;
2631 struct sockcm_cookie sockc;
2632 __be16 proto;
2633 int err, reserve = 0;
2634 void *ph;
2635 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2636 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2637 int tp_len, size_max;
2638 unsigned char *addr;
2639 void *data;
2640 int len_sum = 0;
2641 int status = TP_STATUS_AVAILABLE;
2642 int hlen, tlen, copylen = 0;
2643
2644 mutex_lock(&po->pg_vec_lock);
2645
2646 if (likely(saddr == NULL)) {
2647 dev = packet_cached_dev_get(po);
2648 proto = po->num;
2649 addr = NULL;
2650 } else {
2651 err = -EINVAL;
2652 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2653 goto out;
2654 if (msg->msg_namelen < (saddr->sll_halen
2655 + offsetof(struct sockaddr_ll,
2656 sll_addr)))
2657 goto out;
2658 proto = saddr->sll_protocol;
2659 addr = saddr->sll_addr;
2660 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2661 }
2662
2663 err = -ENXIO;
2664 if (unlikely(dev == NULL))
2665 goto out;
2666 err = -ENETDOWN;
2667 if (unlikely(!(dev->flags & IFF_UP)))
2668 goto out_put;
2669
2670 sockc.tsflags = po->sk.sk_tsflags;
2671 if (msg->msg_controllen) {
2672 err = sock_cmsg_send(&po->sk, msg, &sockc);
2673 if (unlikely(err))
2674 goto out_put;
2675 }
2676
2677 if (po->sk.sk_socket->type == SOCK_RAW)
2678 reserve = dev->hard_header_len;
2679 size_max = po->tx_ring.frame_size
2680 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2681
2682 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2683 size_max = dev->mtu + reserve + VLAN_HLEN;
2684
2685 do {
2686 ph = packet_current_frame(po, &po->tx_ring,
2687 TP_STATUS_SEND_REQUEST);
2688 if (unlikely(ph == NULL)) {
2689 if (need_wait && need_resched())
2690 schedule();
2691 continue;
2692 }
2693
2694 skb = NULL;
2695 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2696 if (tp_len < 0)
2697 goto tpacket_error;
2698
2699 status = TP_STATUS_SEND_REQUEST;
2700 hlen = LL_RESERVED_SPACE(dev);
2701 tlen = dev->needed_tailroom;
2702 if (po->has_vnet_hdr) {
2703 vnet_hdr = data;
2704 data += sizeof(*vnet_hdr);
2705 tp_len -= sizeof(*vnet_hdr);
2706 if (tp_len < 0 ||
2707 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2708 tp_len = -EINVAL;
2709 goto tpacket_error;
2710 }
2711 copylen = __virtio16_to_cpu(vio_le(),
2712 vnet_hdr->hdr_len);
2713 }
2714 copylen = max_t(int, copylen, dev->hard_header_len);
2715 skb = sock_alloc_send_skb(&po->sk,
2716 hlen + tlen + sizeof(struct sockaddr_ll) +
2717 (copylen - dev->hard_header_len),
2718 !need_wait, &err);
2719
2720 if (unlikely(skb == NULL)) {
2721
2722 if (likely(len_sum > 0))
2723 err = len_sum;
2724 goto out_status;
2725 }
2726 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2727 addr, hlen, copylen, &sockc);
2728 if (likely(tp_len >= 0) &&
2729 tp_len > dev->mtu + reserve &&
2730 !po->has_vnet_hdr &&
2731 !packet_extra_vlan_len_allowed(dev, skb))
2732 tp_len = -EMSGSIZE;
2733
2734 if (unlikely(tp_len < 0)) {
2735tpacket_error:
2736 if (po->tp_loss) {
2737 __packet_set_status(po, ph,
2738 TP_STATUS_AVAILABLE);
2739 packet_increment_head(&po->tx_ring);
2740 kfree_skb(skb);
2741 continue;
2742 } else {
2743 status = TP_STATUS_WRONG_FORMAT;
2744 err = tp_len;
2745 goto out_status;
2746 }
2747 }
2748
2749 if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
2750 vio_le())) {
2751 tp_len = -EINVAL;
2752 goto tpacket_error;
2753 }
2754
2755 skb->destructor = tpacket_destruct_skb;
2756 __packet_set_status(po, ph, TP_STATUS_SENDING);
2757 packet_inc_pending(&po->tx_ring);
2758
2759 status = TP_STATUS_SEND_REQUEST;
2760 err = po->xmit(skb);
2761 if (unlikely(err > 0)) {
2762 err = net_xmit_errno(err);
2763 if (err && __packet_get_status(po, ph) ==
2764 TP_STATUS_AVAILABLE) {
2765
2766 skb = NULL;
2767 goto out_status;
2768 }
2769
2770
2771
2772
2773 err = 0;
2774 }
2775 packet_increment_head(&po->tx_ring);
2776 len_sum += tp_len;
2777 } while (likely((ph != NULL) ||
2778
2779
2780
2781
2782
2783
2784 (need_wait && packet_read_pending(&po->tx_ring))));
2785
2786 err = len_sum;
2787 goto out_put;
2788
2789out_status:
2790 __packet_set_status(po, ph, status);
2791 kfree_skb(skb);
2792out_put:
2793 dev_put(dev);
2794out:
2795 mutex_unlock(&po->pg_vec_lock);
2796 return err;
2797}
2798
2799static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2800 size_t reserve, size_t len,
2801 size_t linear, int noblock,
2802 int *err)
2803{
2804 struct sk_buff *skb;
2805
2806
2807 if (prepad + len < PAGE_SIZE || !linear)
2808 linear = len;
2809
2810 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2811 err, 0);
2812 if (!skb)
2813 return NULL;
2814
2815 skb_reserve(skb, reserve);
2816 skb_put(skb, linear);
2817 skb->data_len = len - linear;
2818 skb->len += len - linear;
2819
2820 return skb;
2821}
2822
2823static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2824{
2825 struct sock *sk = sock->sk;
2826 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2827 struct sk_buff *skb;
2828 struct net_device *dev;
2829 __be16 proto;
2830 unsigned char *addr;
2831 int err, reserve = 0;
2832 struct sockcm_cookie sockc;
2833 struct virtio_net_hdr vnet_hdr = { 0 };
2834 int offset = 0;
2835 struct packet_sock *po = pkt_sk(sk);
2836 bool has_vnet_hdr = false;
2837 int hlen, tlen, linear;
2838 int extra_len = 0;
2839
2840
2841
2842
2843
2844 if (likely(saddr == NULL)) {
2845 dev = packet_cached_dev_get(po);
2846 proto = po->num;
2847 addr = NULL;
2848 } else {
2849 err = -EINVAL;
2850 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2851 goto out;
2852 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2853 goto out;
2854 proto = saddr->sll_protocol;
2855 addr = saddr->sll_addr;
2856 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2857 }
2858
2859 err = -ENXIO;
2860 if (unlikely(dev == NULL))
2861 goto out_unlock;
2862 err = -ENETDOWN;
2863 if (unlikely(!(dev->flags & IFF_UP)))
2864 goto out_unlock;
2865
2866 sockc.tsflags = sk->sk_tsflags;
2867 sockc.mark = sk->sk_mark;
2868 if (msg->msg_controllen) {
2869 err = sock_cmsg_send(sk, msg, &sockc);
2870 if (unlikely(err))
2871 goto out_unlock;
2872 }
2873
2874 if (sock->type == SOCK_RAW)
2875 reserve = dev->hard_header_len;
2876 if (po->has_vnet_hdr) {
2877 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2878 if (err)
2879 goto out_unlock;
2880 has_vnet_hdr = true;
2881 }
2882
2883 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2884 if (!netif_supports_nofcs(dev)) {
2885 err = -EPROTONOSUPPORT;
2886 goto out_unlock;
2887 }
2888 extra_len = 4;
2889 }
2890
2891 err = -EMSGSIZE;
2892 if (!vnet_hdr.gso_type &&
2893 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2894 goto out_unlock;
2895
2896 err = -ENOBUFS;
2897 hlen = LL_RESERVED_SPACE(dev);
2898 tlen = dev->needed_tailroom;
2899 linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
2900 linear = max(linear, min_t(int, len, dev->hard_header_len));
2901 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
2902 msg->msg_flags & MSG_DONTWAIT, &err);
2903 if (skb == NULL)
2904 goto out_unlock;
2905
2906 skb_reset_network_header(skb);
2907
2908 err = -EINVAL;
2909 if (sock->type == SOCK_DGRAM) {
2910 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2911 if (unlikely(offset < 0))
2912 goto out_free;
2913 } else if (reserve) {
2914 skb_reserve(skb, -reserve);
2915 }
2916
2917
2918 err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
2919 if (err)
2920 goto out_free;
2921
2922 if (sock->type == SOCK_RAW &&
2923 !dev_validate_header(dev, skb->data, len)) {
2924 err = -EINVAL;
2925 goto out_free;
2926 }
2927
2928 sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
2929
2930 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2931 !packet_extra_vlan_len_allowed(dev, skb)) {
2932 err = -EMSGSIZE;
2933 goto out_free;
2934 }
2935
2936 skb->protocol = proto;
2937 skb->dev = dev;
2938 skb->priority = sk->sk_priority;
2939 skb->mark = sockc.mark;
2940
2941 if (has_vnet_hdr) {
2942 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2943 if (err)
2944 goto out_free;
2945 len += sizeof(vnet_hdr);
2946 }
2947
2948 skb_probe_transport_header(skb, reserve);
2949
2950 if (unlikely(extra_len == 4))
2951 skb->no_fcs = 1;
2952
2953 err = po->xmit(skb);
2954 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2955 goto out_unlock;
2956
2957 dev_put(dev);
2958
2959 return len;
2960
2961out_free:
2962 kfree_skb(skb);
2963out_unlock:
2964 if (dev)
2965 dev_put(dev);
2966out:
2967 return err;
2968}
2969
2970static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2971{
2972 struct sock *sk = sock->sk;
2973 struct packet_sock *po = pkt_sk(sk);
2974
2975 if (po->tx_ring.pg_vec)
2976 return tpacket_snd(po, msg);
2977 else
2978 return packet_snd(sock, msg, len);
2979}
2980
2981
2982
2983
2984
2985
2986static int packet_release(struct socket *sock)
2987{
2988 struct sock *sk = sock->sk;
2989 struct packet_sock *po;
2990 struct packet_fanout *f;
2991 struct net *net;
2992 union tpacket_req_u req_u;
2993
2994 if (!sk)
2995 return 0;
2996
2997 net = sock_net(sk);
2998 po = pkt_sk(sk);
2999
3000 mutex_lock(&net->packet.sklist_lock);
3001 sk_del_node_init_rcu(sk);
3002 mutex_unlock(&net->packet.sklist_lock);
3003
3004 preempt_disable();
3005 sock_prot_inuse_add(net, sk->sk_prot, -1);
3006 preempt_enable();
3007
3008 spin_lock(&po->bind_lock);
3009 unregister_prot_hook(sk, false);
3010 packet_cached_dev_reset(po);
3011
3012 if (po->prot_hook.dev) {
3013 dev_put(po->prot_hook.dev);
3014 po->prot_hook.dev = NULL;
3015 }
3016 spin_unlock(&po->bind_lock);
3017
3018 packet_flush_mclist(sk);
3019
3020 lock_sock(sk);
3021 if (po->rx_ring.pg_vec) {
3022 memset(&req_u, 0, sizeof(req_u));
3023 packet_set_ring(sk, &req_u, 1, 0);
3024 }
3025
3026 if (po->tx_ring.pg_vec) {
3027 memset(&req_u, 0, sizeof(req_u));
3028 packet_set_ring(sk, &req_u, 1, 1);
3029 }
3030 release_sock(sk);
3031
3032 f = fanout_release(sk);
3033
3034 synchronize_net();
3035
3036 if (f) {
3037 kfree(po->rollover);
3038 fanout_release_data(f);
3039 kfree(f);
3040 }
3041
3042
3043
3044 sock_orphan(sk);
3045 sock->sk = NULL;
3046
3047
3048
3049 skb_queue_purge(&sk->sk_receive_queue);
3050 packet_free_pending(po);
3051 sk_refcnt_debug_release(sk);
3052
3053 sock_put(sk);
3054 return 0;
3055}
3056
3057
3058
3059
3060
3061static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3062 __be16 proto)
3063{
3064 struct packet_sock *po = pkt_sk(sk);
3065 struct net_device *dev_curr;
3066 __be16 proto_curr;
3067 bool need_rehook;
3068 struct net_device *dev = NULL;
3069 int ret = 0;
3070 bool unlisted = false;
3071
3072 lock_sock(sk);
3073 spin_lock(&po->bind_lock);
3074 rcu_read_lock();
3075
3076 if (po->fanout) {
3077 ret = -EINVAL;
3078 goto out_unlock;
3079 }
3080
3081 if (name) {
3082 dev = dev_get_by_name_rcu(sock_net(sk), name);
3083 if (!dev) {
3084 ret = -ENODEV;
3085 goto out_unlock;
3086 }
3087 } else if (ifindex) {
3088 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3089 if (!dev) {
3090 ret = -ENODEV;
3091 goto out_unlock;
3092 }
3093 }
3094
3095 if (dev)
3096 dev_hold(dev);
3097
3098 proto_curr = po->prot_hook.type;
3099 dev_curr = po->prot_hook.dev;
3100
3101 need_rehook = proto_curr != proto || dev_curr != dev;
3102
3103 if (need_rehook) {
3104 if (po->running) {
3105 rcu_read_unlock();
3106
3107
3108
3109 po->num = 0;
3110 __unregister_prot_hook(sk, true);
3111 rcu_read_lock();
3112 dev_curr = po->prot_hook.dev;
3113 if (dev)
3114 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3115 dev->ifindex);
3116 }
3117
3118 BUG_ON(po->running);
3119 po->num = proto;
3120 po->prot_hook.type = proto;
3121
3122 if (unlikely(unlisted)) {
3123 dev_put(dev);
3124 po->prot_hook.dev = NULL;
3125 po->ifindex = -1;
3126 packet_cached_dev_reset(po);
3127 } else {
3128 po->prot_hook.dev = dev;
3129 po->ifindex = dev ? dev->ifindex : 0;
3130 packet_cached_dev_assign(po, dev);
3131 }
3132 }
3133 if (dev_curr)
3134 dev_put(dev_curr);
3135
3136 if (proto == 0 || !need_rehook)
3137 goto out_unlock;
3138
3139 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3140 register_prot_hook(sk);
3141 } else {
3142 sk->sk_err = ENETDOWN;
3143 if (!sock_flag(sk, SOCK_DEAD))
3144 sk->sk_error_report(sk);
3145 }
3146
3147out_unlock:
3148 rcu_read_unlock();
3149 spin_unlock(&po->bind_lock);
3150 release_sock(sk);
3151 return ret;
3152}
3153
3154
3155
3156
3157
3158static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3159 int addr_len)
3160{
3161 struct sock *sk = sock->sk;
3162 char name[sizeof(uaddr->sa_data) + 1];
3163
3164
3165
3166
3167
3168 if (addr_len != sizeof(struct sockaddr))
3169 return -EINVAL;
3170
3171
3172
3173 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3174 name[sizeof(uaddr->sa_data)] = 0;
3175
3176 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3177}
3178
3179static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3180{
3181 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3182 struct sock *sk = sock->sk;
3183
3184
3185
3186
3187
3188 if (addr_len < sizeof(struct sockaddr_ll))
3189 return -EINVAL;
3190 if (sll->sll_family != AF_PACKET)
3191 return -EINVAL;
3192
3193 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3194 sll->sll_protocol ? : pkt_sk(sk)->num);
3195}
3196
3197static struct proto packet_proto = {
3198 .name = "PACKET",
3199 .owner = THIS_MODULE,
3200 .obj_size = sizeof(struct packet_sock),
3201};
3202
3203
3204
3205
3206
3207static int packet_create(struct net *net, struct socket *sock, int protocol,
3208 int kern)
3209{
3210 struct sock *sk;
3211 struct packet_sock *po;
3212 __be16 proto = (__force __be16)protocol;
3213 int err;
3214
3215 if (!ns_capable(net->user_ns, CAP_NET_RAW))
3216 return -EPERM;
3217 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3218 sock->type != SOCK_PACKET)
3219 return -ESOCKTNOSUPPORT;
3220
3221 sock->state = SS_UNCONNECTED;
3222
3223 err = -ENOBUFS;
3224 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3225 if (sk == NULL)
3226 goto out;
3227
3228 sock->ops = &packet_ops;
3229 if (sock->type == SOCK_PACKET)
3230 sock->ops = &packet_ops_spkt;
3231
3232 sock_init_data(sock, sk);
3233
3234 po = pkt_sk(sk);
3235 sk->sk_family = PF_PACKET;
3236 po->num = proto;
3237 po->xmit = dev_queue_xmit;
3238
3239 err = packet_alloc_pending(po);
3240 if (err)
3241 goto out2;
3242
3243 packet_cached_dev_reset(po);
3244
3245 sk->sk_destruct = packet_sock_destruct;
3246 sk_refcnt_debug_inc(sk);
3247
3248
3249
3250
3251
3252 spin_lock_init(&po->bind_lock);
3253 mutex_init(&po->pg_vec_lock);
3254 po->rollover = NULL;
3255 po->prot_hook.func = packet_rcv;
3256
3257 if (sock->type == SOCK_PACKET)
3258 po->prot_hook.func = packet_rcv_spkt;
3259
3260 po->prot_hook.af_packet_priv = sk;
3261
3262 if (proto) {
3263 po->prot_hook.type = proto;
3264 __register_prot_hook(sk);
3265 }
3266
3267 mutex_lock(&net->packet.sklist_lock);
3268 sk_add_node_rcu(sk, &net->packet.sklist);
3269 mutex_unlock(&net->packet.sklist_lock);
3270
3271 preempt_disable();
3272 sock_prot_inuse_add(net, &packet_proto, 1);
3273 preempt_enable();
3274
3275 return 0;
3276out2:
3277 sk_free(sk);
3278out:
3279 return err;
3280}
3281
3282
3283
3284
3285
3286
3287static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3288 int flags)
3289{
3290 struct sock *sk = sock->sk;
3291 struct sk_buff *skb;
3292 int copied, err;
3293 int vnet_hdr_len = 0;
3294 unsigned int origlen = 0;
3295
3296 err = -EINVAL;
3297 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3298 goto out;
3299
3300#if 0
3301
3302 if (pkt_sk(sk)->ifindex < 0)
3303 return -ENODEV;
3304#endif
3305
3306 if (flags & MSG_ERRQUEUE) {
3307 err = sock_recv_errqueue(sk, msg, len,
3308 SOL_PACKET, PACKET_TX_TIMESTAMP);
3309 goto out;
3310 }
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
3322
3323
3324
3325
3326
3327
3328
3329 if (skb == NULL)
3330 goto out;
3331
3332 if (pkt_sk(sk)->pressure)
3333 packet_rcv_has_room(pkt_sk(sk), NULL);
3334
3335 if (pkt_sk(sk)->has_vnet_hdr) {
3336 err = packet_rcv_vnet(msg, skb, &len);
3337 if (err)
3338 goto out_free;
3339 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3340 }
3341
3342
3343
3344
3345
3346 copied = skb->len;
3347 if (copied > len) {
3348 copied = len;
3349 msg->msg_flags |= MSG_TRUNC;
3350 }
3351
3352 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3353 if (err)
3354 goto out_free;
3355
3356 if (sock->type != SOCK_PACKET) {
3357 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3358
3359
3360 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3361 sll->sll_family = AF_PACKET;
3362 sll->sll_protocol = skb->protocol;
3363 }
3364
3365 sock_recv_ts_and_drops(msg, sk, skb);
3366
3367 if (msg->msg_name) {
3368
3369
3370
3371 if (sock->type == SOCK_PACKET) {
3372 __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3373 msg->msg_namelen = sizeof(struct sockaddr_pkt);
3374 } else {
3375 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3376
3377 msg->msg_namelen = sll->sll_halen +
3378 offsetof(struct sockaddr_ll, sll_addr);
3379 }
3380 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
3381 msg->msg_namelen);
3382 }
3383
3384 if (pkt_sk(sk)->auxdata) {
3385 struct tpacket_auxdata aux;
3386
3387 aux.tp_status = TP_STATUS_USER;
3388 if (skb->ip_summed == CHECKSUM_PARTIAL)
3389 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3390 else if (skb->pkt_type != PACKET_OUTGOING &&
3391 (skb->ip_summed == CHECKSUM_COMPLETE ||
3392 skb_csum_unnecessary(skb)))
3393 aux.tp_status |= TP_STATUS_CSUM_VALID;
3394
3395 aux.tp_len = origlen;
3396 aux.tp_snaplen = skb->len;
3397 aux.tp_mac = 0;
3398 aux.tp_net = skb_network_offset(skb);
3399 if (skb_vlan_tag_present(skb)) {
3400 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3401 aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3402 aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3403 } else {
3404 aux.tp_vlan_tci = 0;
3405 aux.tp_vlan_tpid = 0;
3406 }
3407 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3408 }
3409
3410
3411
3412
3413
3414 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3415
3416out_free:
3417 skb_free_datagram(sk, skb);
3418out:
3419 return err;
3420}
3421
3422static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3423 int peer)
3424{
3425 struct net_device *dev;
3426 struct sock *sk = sock->sk;
3427
3428 if (peer)
3429 return -EOPNOTSUPP;
3430
3431 uaddr->sa_family = AF_PACKET;
3432 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3433 rcu_read_lock();
3434 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
3435 if (dev)
3436 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3437 rcu_read_unlock();
3438
3439 return sizeof(*uaddr);
3440}
3441
3442static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3443 int peer)
3444{
3445 struct net_device *dev;
3446 struct sock *sk = sock->sk;
3447 struct packet_sock *po = pkt_sk(sk);
3448 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3449
3450 if (peer)
3451 return -EOPNOTSUPP;
3452
3453 sll->sll_family = AF_PACKET;
3454 sll->sll_ifindex = po->ifindex;
3455 sll->sll_protocol = po->num;
3456 sll->sll_pkttype = 0;
3457 rcu_read_lock();
3458 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
3459 if (dev) {
3460 sll->sll_hatype = dev->type;
3461 sll->sll_halen = dev->addr_len;
3462 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3463 } else {
3464 sll->sll_hatype = 0;
3465 sll->sll_halen = 0;
3466 }
3467 rcu_read_unlock();
3468
3469 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3470}
3471
3472static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3473 int what)
3474{
3475 switch (i->type) {
3476 case PACKET_MR_MULTICAST:
3477 if (i->alen != dev->addr_len)
3478 return -EINVAL;
3479 if (what > 0)
3480 return dev_mc_add(dev, i->addr);
3481 else
3482 return dev_mc_del(dev, i->addr);
3483 break;
3484 case PACKET_MR_PROMISC:
3485 return dev_set_promiscuity(dev, what);
3486 case PACKET_MR_ALLMULTI:
3487 return dev_set_allmulti(dev, what);
3488 case PACKET_MR_UNICAST:
3489 if (i->alen != dev->addr_len)
3490 return -EINVAL;
3491 if (what > 0)
3492 return dev_uc_add(dev, i->addr);
3493 else
3494 return dev_uc_del(dev, i->addr);
3495 break;
3496 default:
3497 break;
3498 }
3499 return 0;
3500}
3501
3502static void packet_dev_mclist_delete(struct net_device *dev,
3503 struct packet_mclist **mlp)
3504{
3505 struct packet_mclist *ml;
3506
3507 while ((ml = *mlp) != NULL) {
3508 if (ml->ifindex == dev->ifindex) {
3509 packet_dev_mc(dev, ml, -1);
3510 *mlp = ml->next;
3511 kfree(ml);
3512 } else
3513 mlp = &ml->next;
3514 }
3515}
3516
3517static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3518{
3519 struct packet_sock *po = pkt_sk(sk);
3520 struct packet_mclist *ml, *i;
3521 struct net_device *dev;
3522 int err;
3523
3524 rtnl_lock();
3525
3526 err = -ENODEV;
3527 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3528 if (!dev)
3529 goto done;
3530
3531 err = -EINVAL;
3532 if (mreq->mr_alen > dev->addr_len)
3533 goto done;
3534
3535 err = -ENOBUFS;
3536 i = kmalloc(sizeof(*i), GFP_KERNEL);
3537 if (i == NULL)
3538 goto done;
3539
3540 err = 0;
3541 for (ml = po->mclist; ml; ml = ml->next) {
3542 if (ml->ifindex == mreq->mr_ifindex &&
3543 ml->type == mreq->mr_type &&
3544 ml->alen == mreq->mr_alen &&
3545 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3546 ml->count++;
3547
3548 kfree(i);
3549 goto done;
3550 }
3551 }
3552
3553 i->type = mreq->mr_type;
3554 i->ifindex = mreq->mr_ifindex;
3555 i->alen = mreq->mr_alen;
3556 memcpy(i->addr, mreq->mr_address, i->alen);
3557 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3558 i->count = 1;
3559 i->next = po->mclist;
3560 po->mclist = i;
3561 err = packet_dev_mc(dev, i, 1);
3562 if (err) {
3563 po->mclist = i->next;
3564 kfree(i);
3565 }
3566
3567done:
3568 rtnl_unlock();
3569 return err;
3570}
3571
3572static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3573{
3574 struct packet_mclist *ml, **mlp;
3575
3576 rtnl_lock();
3577
3578 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3579 if (ml->ifindex == mreq->mr_ifindex &&
3580 ml->type == mreq->mr_type &&
3581 ml->alen == mreq->mr_alen &&
3582 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3583 if (--ml->count == 0) {
3584 struct net_device *dev;
3585 *mlp = ml->next;
3586 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3587 if (dev)
3588 packet_dev_mc(dev, ml, -1);
3589 kfree(ml);
3590 }
3591 break;
3592 }
3593 }
3594 rtnl_unlock();
3595 return 0;
3596}
3597
3598static void packet_flush_mclist(struct sock *sk)
3599{
3600 struct packet_sock *po = pkt_sk(sk);
3601 struct packet_mclist *ml;
3602
3603 if (!po->mclist)
3604 return;
3605
3606 rtnl_lock();
3607 while ((ml = po->mclist) != NULL) {
3608 struct net_device *dev;
3609
3610 po->mclist = ml->next;
3611 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3612 if (dev != NULL)
3613 packet_dev_mc(dev, ml, -1);
3614 kfree(ml);
3615 }
3616 rtnl_unlock();
3617}
3618
3619static int
3620packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3621{
3622 struct sock *sk = sock->sk;
3623 struct packet_sock *po = pkt_sk(sk);
3624 int ret;
3625
3626 if (level != SOL_PACKET)
3627 return -ENOPROTOOPT;
3628
3629 switch (optname) {
3630 case PACKET_ADD_MEMBERSHIP:
3631 case PACKET_DROP_MEMBERSHIP:
3632 {
3633 struct packet_mreq_max mreq;
3634 int len = optlen;
3635 memset(&mreq, 0, sizeof(mreq));
3636 if (len < sizeof(struct packet_mreq))
3637 return -EINVAL;
3638 if (len > sizeof(mreq))
3639 len = sizeof(mreq);
3640 if (copy_from_user(&mreq, optval, len))
3641 return -EFAULT;
3642 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3643 return -EINVAL;
3644 if (optname == PACKET_ADD_MEMBERSHIP)
3645 ret = packet_mc_add(sk, &mreq);
3646 else
3647 ret = packet_mc_drop(sk, &mreq);
3648 return ret;
3649 }
3650
3651 case PACKET_RX_RING:
3652 case PACKET_TX_RING:
3653 {
3654 union tpacket_req_u req_u;
3655 int len;
3656
3657 lock_sock(sk);
3658 switch (po->tp_version) {
3659 case TPACKET_V1:
3660 case TPACKET_V2:
3661 len = sizeof(req_u.req);
3662 break;
3663 case TPACKET_V3:
3664 default:
3665 len = sizeof(req_u.req3);
3666 break;
3667 }
3668 if (optlen < len) {
3669 ret = -EINVAL;
3670 } else {
3671 if (copy_from_user(&req_u.req, optval, len))
3672 ret = -EFAULT;
3673 else
3674 ret = packet_set_ring(sk, &req_u, 0,
3675 optname == PACKET_TX_RING);
3676 }
3677 release_sock(sk);
3678 return ret;
3679 }
3680 case PACKET_COPY_THRESH:
3681 {
3682 int val;
3683
3684 if (optlen != sizeof(val))
3685 return -EINVAL;
3686 if (copy_from_user(&val, optval, sizeof(val)))
3687 return -EFAULT;
3688
3689 pkt_sk(sk)->copy_thresh = val;
3690 return 0;
3691 }
3692 case PACKET_VERSION:
3693 {
3694 int val;
3695
3696 if (optlen != sizeof(val))
3697 return -EINVAL;
3698 if (copy_from_user(&val, optval, sizeof(val)))
3699 return -EFAULT;
3700 switch (val) {
3701 case TPACKET_V1:
3702 case TPACKET_V2:
3703 case TPACKET_V3:
3704 break;
3705 default:
3706 return -EINVAL;
3707 }
3708 lock_sock(sk);
3709 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3710 ret = -EBUSY;
3711 } else {
3712 po->tp_version = val;
3713 ret = 0;
3714 }
3715 release_sock(sk);
3716 return ret;
3717 }
3718 case PACKET_RESERVE:
3719 {
3720 unsigned int val;
3721
3722 if (optlen != sizeof(val))
3723 return -EINVAL;
3724 if (copy_from_user(&val, optval, sizeof(val)))
3725 return -EFAULT;
3726 if (val > INT_MAX)
3727 return -EINVAL;
3728 lock_sock(sk);
3729 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3730 ret = -EBUSY;
3731 } else {
3732 po->tp_reserve = val;
3733 ret = 0;
3734 }
3735 release_sock(sk);
3736 return ret;
3737 }
3738 case PACKET_LOSS:
3739 {
3740 unsigned int val;
3741
3742 if (optlen != sizeof(val))
3743 return -EINVAL;
3744 if (copy_from_user(&val, optval, sizeof(val)))
3745 return -EFAULT;
3746
3747 lock_sock(sk);
3748 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3749 ret = -EBUSY;
3750 } else {
3751 po->tp_loss = !!val;
3752 ret = 0;
3753 }
3754 release_sock(sk);
3755 return ret;
3756 }
3757 case PACKET_AUXDATA:
3758 {
3759 int val;
3760
3761 if (optlen < sizeof(val))
3762 return -EINVAL;
3763 if (copy_from_user(&val, optval, sizeof(val)))
3764 return -EFAULT;
3765
3766 lock_sock(sk);
3767 po->auxdata = !!val;
3768 release_sock(sk);
3769 return 0;
3770 }
3771 case PACKET_ORIGDEV:
3772 {
3773 int val;
3774
3775 if (optlen < sizeof(val))
3776 return -EINVAL;
3777 if (copy_from_user(&val, optval, sizeof(val)))
3778 return -EFAULT;
3779
3780 lock_sock(sk);
3781 po->origdev = !!val;
3782 release_sock(sk);
3783 return 0;
3784 }
3785 case PACKET_VNET_HDR:
3786 {
3787 int val;
3788
3789 if (sock->type != SOCK_RAW)
3790 return -EINVAL;
3791 if (optlen < sizeof(val))
3792 return -EINVAL;
3793 if (copy_from_user(&val, optval, sizeof(val)))
3794 return -EFAULT;
3795
3796 lock_sock(sk);
3797 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3798 ret = -EBUSY;
3799 } else {
3800 po->has_vnet_hdr = !!val;
3801 ret = 0;
3802 }
3803 release_sock(sk);
3804 return ret;
3805 }
3806 case PACKET_TIMESTAMP:
3807 {
3808 int val;
3809
3810 if (optlen != sizeof(val))
3811 return -EINVAL;
3812 if (copy_from_user(&val, optval, sizeof(val)))
3813 return -EFAULT;
3814
3815 po->tp_tstamp = val;
3816 return 0;
3817 }
3818 case PACKET_FANOUT:
3819 {
3820 int val;
3821
3822 if (optlen != sizeof(val))
3823 return -EINVAL;
3824 if (copy_from_user(&val, optval, sizeof(val)))
3825 return -EFAULT;
3826
3827 return fanout_add(sk, val & 0xffff, val >> 16);
3828 }
3829 case PACKET_FANOUT_DATA:
3830 {
3831 if (!po->fanout)
3832 return -EINVAL;
3833
3834 return fanout_set_data(po, optval, optlen);
3835 }
3836 case PACKET_TX_HAS_OFF:
3837 {
3838 unsigned int val;
3839
3840 if (optlen != sizeof(val))
3841 return -EINVAL;
3842 if (copy_from_user(&val, optval, sizeof(val)))
3843 return -EFAULT;
3844
3845 lock_sock(sk);
3846 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3847 ret = -EBUSY;
3848 } else {
3849 po->tp_tx_has_off = !!val;
3850 ret = 0;
3851 }
3852 release_sock(sk);
3853 return 0;
3854 }
3855 case PACKET_QDISC_BYPASS:
3856 {
3857 int val;
3858
3859 if (optlen != sizeof(val))
3860 return -EINVAL;
3861 if (copy_from_user(&val, optval, sizeof(val)))
3862 return -EFAULT;
3863
3864 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
3865 return 0;
3866 }
3867 default:
3868 return -ENOPROTOOPT;
3869 }
3870}
3871
3872static int packet_getsockopt(struct socket *sock, int level, int optname,
3873 char __user *optval, int __user *optlen)
3874{
3875 int len;
3876 int val, lv = sizeof(val);
3877 struct sock *sk = sock->sk;
3878 struct packet_sock *po = pkt_sk(sk);
3879 void *data = &val;
3880 union tpacket_stats_u st;
3881 struct tpacket_rollover_stats rstats;
3882
3883 if (level != SOL_PACKET)
3884 return -ENOPROTOOPT;
3885
3886 if (get_user(len, optlen))
3887 return -EFAULT;
3888
3889 if (len < 0)
3890 return -EINVAL;
3891
3892 switch (optname) {
3893 case PACKET_STATISTICS:
3894 spin_lock_bh(&sk->sk_receive_queue.lock);
3895 memcpy(&st, &po->stats, sizeof(st));
3896 memset(&po->stats, 0, sizeof(po->stats));
3897 spin_unlock_bh(&sk->sk_receive_queue.lock);
3898
3899 if (po->tp_version == TPACKET_V3) {
3900 lv = sizeof(struct tpacket_stats_v3);
3901 st.stats3.tp_packets += st.stats3.tp_drops;
3902 data = &st.stats3;
3903 } else {
3904 lv = sizeof(struct tpacket_stats);
3905 st.stats1.tp_packets += st.stats1.tp_drops;
3906 data = &st.stats1;
3907 }
3908
3909 break;
3910 case PACKET_AUXDATA:
3911 val = po->auxdata;
3912 break;
3913 case PACKET_ORIGDEV:
3914 val = po->origdev;
3915 break;
3916 case PACKET_VNET_HDR:
3917 val = po->has_vnet_hdr;
3918 break;
3919 case PACKET_VERSION:
3920 val = po->tp_version;
3921 break;
3922 case PACKET_HDRLEN:
3923 if (len > sizeof(int))
3924 len = sizeof(int);
3925 if (len < sizeof(int))
3926 return -EINVAL;
3927 if (copy_from_user(&val, optval, len))
3928 return -EFAULT;
3929 switch (val) {
3930 case TPACKET_V1:
3931 val = sizeof(struct tpacket_hdr);
3932 break;
3933 case TPACKET_V2:
3934 val = sizeof(struct tpacket2_hdr);
3935 break;
3936 case TPACKET_V3:
3937 val = sizeof(struct tpacket3_hdr);
3938 break;
3939 default:
3940 return -EINVAL;
3941 }
3942 break;
3943 case PACKET_RESERVE:
3944 val = po->tp_reserve;
3945 break;
3946 case PACKET_LOSS:
3947 val = po->tp_loss;
3948 break;
3949 case PACKET_TIMESTAMP:
3950 val = po->tp_tstamp;
3951 break;
3952 case PACKET_FANOUT:
3953 val = (po->fanout ?
3954 ((u32)po->fanout->id |
3955 ((u32)po->fanout->type << 16) |
3956 ((u32)po->fanout->flags << 24)) :
3957 0);
3958 break;
3959 case PACKET_ROLLOVER_STATS:
3960 if (!po->rollover)
3961 return -EINVAL;
3962 rstats.tp_all = atomic_long_read(&po->rollover->num);
3963 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
3964 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
3965 data = &rstats;
3966 lv = sizeof(rstats);
3967 break;
3968 case PACKET_TX_HAS_OFF:
3969 val = po->tp_tx_has_off;
3970 break;
3971 case PACKET_QDISC_BYPASS:
3972 val = packet_use_direct_xmit(po);
3973 break;
3974 default:
3975 return -ENOPROTOOPT;
3976 }
3977
3978 if (len > lv)
3979 len = lv;
3980 if (put_user(len, optlen))
3981 return -EFAULT;
3982 if (copy_to_user(optval, data, len))
3983 return -EFAULT;
3984 return 0;
3985}
3986
3987
3988#ifdef CONFIG_COMPAT
3989static int compat_packet_setsockopt(struct socket *sock, int level, int optname,
3990 char __user *optval, unsigned int optlen)
3991{
3992 struct packet_sock *po = pkt_sk(sock->sk);
3993
3994 if (level != SOL_PACKET)
3995 return -ENOPROTOOPT;
3996
3997 if (optname == PACKET_FANOUT_DATA &&
3998 po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) {
3999 optval = (char __user *)get_compat_bpf_fprog(optval);
4000 if (!optval)
4001 return -EFAULT;
4002 optlen = sizeof(struct sock_fprog);
4003 }
4004
4005 return packet_setsockopt(sock, level, optname, optval, optlen);
4006}
4007#endif
4008
4009static int packet_notifier(struct notifier_block *this,
4010 unsigned long msg, void *ptr)
4011{
4012 struct sock *sk;
4013 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4014 struct net *net = dev_net(dev);
4015
4016 rcu_read_lock();
4017 sk_for_each_rcu(sk, &net->packet.sklist) {
4018 struct packet_sock *po = pkt_sk(sk);
4019
4020 switch (msg) {
4021 case NETDEV_UNREGISTER:
4022 if (po->mclist)
4023 packet_dev_mclist_delete(dev, &po->mclist);
4024
4025
4026 case NETDEV_DOWN:
4027 if (dev->ifindex == po->ifindex) {
4028 spin_lock(&po->bind_lock);
4029 if (po->running) {
4030 __unregister_prot_hook(sk, false);
4031 sk->sk_err = ENETDOWN;
4032 if (!sock_flag(sk, SOCK_DEAD))
4033 sk->sk_error_report(sk);
4034 }
4035 if (msg == NETDEV_UNREGISTER) {
4036 packet_cached_dev_reset(po);
4037 po->ifindex = -1;
4038 if (po->prot_hook.dev)
4039 dev_put(po->prot_hook.dev);
4040 po->prot_hook.dev = NULL;
4041 }
4042 spin_unlock(&po->bind_lock);
4043 }
4044 break;
4045 case NETDEV_UP:
4046 if (dev->ifindex == po->ifindex) {
4047 spin_lock(&po->bind_lock);
4048 if (po->num)
4049 register_prot_hook(sk);
4050 spin_unlock(&po->bind_lock);
4051 }
4052 break;
4053 }
4054 }
4055 rcu_read_unlock();
4056 return NOTIFY_DONE;
4057}
4058
4059
4060static int packet_ioctl(struct socket *sock, unsigned int cmd,
4061 unsigned long arg)
4062{
4063 struct sock *sk = sock->sk;
4064
4065 switch (cmd) {
4066 case SIOCOUTQ:
4067 {
4068 int amount = sk_wmem_alloc_get(sk);
4069
4070 return put_user(amount, (int __user *)arg);
4071 }
4072 case SIOCINQ:
4073 {
4074 struct sk_buff *skb;
4075 int amount = 0;
4076
4077 spin_lock_bh(&sk->sk_receive_queue.lock);
4078 skb = skb_peek(&sk->sk_receive_queue);
4079 if (skb)
4080 amount = skb->len;
4081 spin_unlock_bh(&sk->sk_receive_queue.lock);
4082 return put_user(amount, (int __user *)arg);
4083 }
4084 case SIOCGSTAMP:
4085 return sock_get_timestamp(sk, (struct timeval __user *)arg);
4086 case SIOCGSTAMPNS:
4087 return sock_get_timestampns(sk, (struct timespec __user *)arg);
4088
4089#ifdef CONFIG_INET
4090 case SIOCADDRT:
4091 case SIOCDELRT:
4092 case SIOCDARP:
4093 case SIOCGARP:
4094 case SIOCSARP:
4095 case SIOCGIFADDR:
4096 case SIOCSIFADDR:
4097 case SIOCGIFBRDADDR:
4098 case SIOCSIFBRDADDR:
4099 case SIOCGIFNETMASK:
4100 case SIOCSIFNETMASK:
4101 case SIOCGIFDSTADDR:
4102 case SIOCSIFDSTADDR:
4103 case SIOCSIFFLAGS:
4104 return inet_dgram_ops.ioctl(sock, cmd, arg);
4105#endif
4106
4107 default:
4108 return -ENOIOCTLCMD;
4109 }
4110 return 0;
4111}
4112
4113static __poll_t packet_poll(struct file *file, struct socket *sock,
4114 poll_table *wait)
4115{
4116 struct sock *sk = sock->sk;
4117 struct packet_sock *po = pkt_sk(sk);
4118 __poll_t mask = datagram_poll(file, sock, wait);
4119
4120 spin_lock_bh(&sk->sk_receive_queue.lock);
4121 if (po->rx_ring.pg_vec) {
4122 if (!packet_previous_rx_frame(po, &po->rx_ring,
4123 TP_STATUS_KERNEL))
4124 mask |= EPOLLIN | EPOLLRDNORM;
4125 }
4126 if (po->pressure && __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
4127 po->pressure = 0;
4128 spin_unlock_bh(&sk->sk_receive_queue.lock);
4129 spin_lock_bh(&sk->sk_write_queue.lock);
4130 if (po->tx_ring.pg_vec) {
4131 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4132 mask |= EPOLLOUT | EPOLLWRNORM;
4133 }
4134 spin_unlock_bh(&sk->sk_write_queue.lock);
4135 return mask;
4136}
4137
4138
4139
4140
4141
4142
4143static void packet_mm_open(struct vm_area_struct *vma)
4144{
4145 struct file *file = vma->vm_file;
4146 struct socket *sock = file->private_data;
4147 struct sock *sk = sock->sk;
4148
4149 if (sk)
4150 atomic_inc(&pkt_sk(sk)->mapped);
4151}
4152
4153static void packet_mm_close(struct vm_area_struct *vma)
4154{
4155 struct file *file = vma->vm_file;
4156 struct socket *sock = file->private_data;
4157 struct sock *sk = sock->sk;
4158
4159 if (sk)
4160 atomic_dec(&pkt_sk(sk)->mapped);
4161}
4162
4163static const struct vm_operations_struct packet_mmap_ops = {
4164 .open = packet_mm_open,
4165 .close = packet_mm_close,
4166};
4167
4168static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4169 unsigned int len)
4170{
4171 int i;
4172
4173 for (i = 0; i < len; i++) {
4174 if (likely(pg_vec[i].buffer)) {
4175 if (is_vmalloc_addr(pg_vec[i].buffer))
4176 vfree(pg_vec[i].buffer);
4177 else
4178 free_pages((unsigned long)pg_vec[i].buffer,
4179 order);
4180 pg_vec[i].buffer = NULL;
4181 }
4182 }
4183 kfree(pg_vec);
4184}
4185
4186static char *alloc_one_pg_vec_page(unsigned long order)
4187{
4188 char *buffer;
4189 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4190 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4191
4192 buffer = (char *) __get_free_pages(gfp_flags, order);
4193 if (buffer)
4194 return buffer;
4195
4196
4197 buffer = vzalloc((1 << order) * PAGE_SIZE);
4198 if (buffer)
4199 return buffer;
4200
4201
4202 gfp_flags &= ~__GFP_NORETRY;
4203 buffer = (char *) __get_free_pages(gfp_flags, order);
4204 if (buffer)
4205 return buffer;
4206
4207
4208 return NULL;
4209}
4210
4211static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4212{
4213 unsigned int block_nr = req->tp_block_nr;
4214 struct pgv *pg_vec;
4215 int i;
4216
4217 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
4218 if (unlikely(!pg_vec))
4219 goto out;
4220
4221 for (i = 0; i < block_nr; i++) {
4222 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4223 if (unlikely(!pg_vec[i].buffer))
4224 goto out_free_pgvec;
4225 }
4226
4227out:
4228 return pg_vec;
4229
4230out_free_pgvec:
4231 free_pg_vec(pg_vec, order, block_nr);
4232 pg_vec = NULL;
4233 goto out;
4234}
4235
4236static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4237 int closing, int tx_ring)
4238{
4239 struct pgv *pg_vec = NULL;
4240 struct packet_sock *po = pkt_sk(sk);
4241 int was_running, order = 0;
4242 struct packet_ring_buffer *rb;
4243 struct sk_buff_head *rb_queue;
4244 __be16 num;
4245 int err = -EINVAL;
4246
4247 struct tpacket_req *req = &req_u->req;
4248
4249 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4250 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4251
4252 err = -EBUSY;
4253 if (!closing) {
4254 if (atomic_read(&po->mapped))
4255 goto out;
4256 if (packet_read_pending(rb))
4257 goto out;
4258 }
4259
4260 if (req->tp_block_nr) {
4261
4262 err = -EBUSY;
4263 if (unlikely(rb->pg_vec))
4264 goto out;
4265
4266 switch (po->tp_version) {
4267 case TPACKET_V1:
4268 po->tp_hdrlen = TPACKET_HDRLEN;
4269 break;
4270 case TPACKET_V2:
4271 po->tp_hdrlen = TPACKET2_HDRLEN;
4272 break;
4273 case TPACKET_V3:
4274 po->tp_hdrlen = TPACKET3_HDRLEN;
4275 break;
4276 }
4277
4278 err = -EINVAL;
4279 if (unlikely((int)req->tp_block_size <= 0))
4280 goto out;
4281 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4282 goto out;
4283 if (po->tp_version >= TPACKET_V3 &&
4284 req->tp_block_size <=
4285 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
4286 goto out;
4287 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
4288 po->tp_reserve))
4289 goto out;
4290 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4291 goto out;
4292
4293 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4294 if (unlikely(rb->frames_per_block == 0))
4295 goto out;
4296 if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
4297 goto out;
4298 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4299 req->tp_frame_nr))
4300 goto out;
4301
4302 err = -ENOMEM;
4303 order = get_order(req->tp_block_size);
4304 pg_vec = alloc_pg_vec(req, order);
4305 if (unlikely(!pg_vec))
4306 goto out;
4307 switch (po->tp_version) {
4308 case TPACKET_V3:
4309
4310 if (!tx_ring) {
4311 init_prb_bdqc(po, rb, pg_vec, req_u);
4312 } else {
4313 struct tpacket_req3 *req3 = &req_u->req3;
4314
4315 if (req3->tp_retire_blk_tov ||
4316 req3->tp_sizeof_priv ||
4317 req3->tp_feature_req_word) {
4318 err = -EINVAL;
4319 goto out;
4320 }
4321 }
4322 break;
4323 default:
4324 break;
4325 }
4326 }
4327
4328 else {
4329 err = -EINVAL;
4330 if (unlikely(req->tp_frame_nr))
4331 goto out;
4332 }
4333
4334
4335
4336 spin_lock(&po->bind_lock);
4337 was_running = po->running;
4338 num = po->num;
4339 if (was_running) {
4340 po->num = 0;
4341 __unregister_prot_hook(sk, false);
4342 }
4343 spin_unlock(&po->bind_lock);
4344
4345 synchronize_net();
4346
4347 err = -EBUSY;
4348 mutex_lock(&po->pg_vec_lock);
4349 if (closing || atomic_read(&po->mapped) == 0) {
4350 err = 0;
4351 spin_lock_bh(&rb_queue->lock);
4352 swap(rb->pg_vec, pg_vec);
4353 rb->frame_max = (req->tp_frame_nr - 1);
4354 rb->head = 0;
4355 rb->frame_size = req->tp_frame_size;
4356 spin_unlock_bh(&rb_queue->lock);
4357
4358 swap(rb->pg_vec_order, order);
4359 swap(rb->pg_vec_len, req->tp_block_nr);
4360
4361 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4362 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4363 tpacket_rcv : packet_rcv;
4364 skb_queue_purge(rb_queue);
4365 if (atomic_read(&po->mapped))
4366 pr_err("packet_mmap: vma is busy: %d\n",
4367 atomic_read(&po->mapped));
4368 }
4369 mutex_unlock(&po->pg_vec_lock);
4370
4371 spin_lock(&po->bind_lock);
4372 if (was_running) {
4373 po->num = num;
4374 register_prot_hook(sk);
4375 }
4376 spin_unlock(&po->bind_lock);
4377 if (pg_vec && (po->tp_version > TPACKET_V2)) {
4378
4379 if (!tx_ring)
4380 prb_shutdown_retire_blk_timer(po, rb_queue);
4381 }
4382
4383 if (pg_vec)
4384 free_pg_vec(pg_vec, order, req->tp_block_nr);
4385out:
4386 return err;
4387}
4388
4389static int packet_mmap(struct file *file, struct socket *sock,
4390 struct vm_area_struct *vma)
4391{
4392 struct sock *sk = sock->sk;
4393 struct packet_sock *po = pkt_sk(sk);
4394 unsigned long size, expected_size;
4395 struct packet_ring_buffer *rb;
4396 unsigned long start;
4397 int err = -EINVAL;
4398 int i;
4399
4400 if (vma->vm_pgoff)
4401 return -EINVAL;
4402
4403 mutex_lock(&po->pg_vec_lock);
4404
4405 expected_size = 0;
4406 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4407 if (rb->pg_vec) {
4408 expected_size += rb->pg_vec_len
4409 * rb->pg_vec_pages
4410 * PAGE_SIZE;
4411 }
4412 }
4413
4414 if (expected_size == 0)
4415 goto out;
4416
4417 size = vma->vm_end - vma->vm_start;
4418 if (size != expected_size)
4419 goto out;
4420
4421 start = vma->vm_start;
4422 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4423 if (rb->pg_vec == NULL)
4424 continue;
4425
4426 for (i = 0; i < rb->pg_vec_len; i++) {
4427 struct page *page;
4428 void *kaddr = rb->pg_vec[i].buffer;
4429 int pg_num;
4430
4431 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4432 page = pgv_to_page(kaddr);
4433 err = vm_insert_page(vma, start, page);
4434 if (unlikely(err))
4435 goto out;
4436 start += PAGE_SIZE;
4437 kaddr += PAGE_SIZE;
4438 }
4439 }
4440 }
4441
4442 atomic_inc(&po->mapped);
4443 vma->vm_ops = &packet_mmap_ops;
4444 err = 0;
4445
4446out:
4447 mutex_unlock(&po->pg_vec_lock);
4448 return err;
4449}
4450
4451static const struct proto_ops packet_ops_spkt = {
4452 .family = PF_PACKET,
4453 .owner = THIS_MODULE,
4454 .release = packet_release,
4455 .bind = packet_bind_spkt,
4456 .connect = sock_no_connect,
4457 .socketpair = sock_no_socketpair,
4458 .accept = sock_no_accept,
4459 .getname = packet_getname_spkt,
4460 .poll = datagram_poll,
4461 .ioctl = packet_ioctl,
4462 .listen = sock_no_listen,
4463 .shutdown = sock_no_shutdown,
4464 .setsockopt = sock_no_setsockopt,
4465 .getsockopt = sock_no_getsockopt,
4466 .sendmsg = packet_sendmsg_spkt,
4467 .recvmsg = packet_recvmsg,
4468 .mmap = sock_no_mmap,
4469 .sendpage = sock_no_sendpage,
4470};
4471
4472static const struct proto_ops packet_ops = {
4473 .family = PF_PACKET,
4474 .owner = THIS_MODULE,
4475 .release = packet_release,
4476 .bind = packet_bind,
4477 .connect = sock_no_connect,
4478 .socketpair = sock_no_socketpair,
4479 .accept = sock_no_accept,
4480 .getname = packet_getname,
4481 .poll = packet_poll,
4482 .ioctl = packet_ioctl,
4483 .listen = sock_no_listen,
4484 .shutdown = sock_no_shutdown,
4485 .setsockopt = packet_setsockopt,
4486 .getsockopt = packet_getsockopt,
4487#ifdef CONFIG_COMPAT
4488 .compat_setsockopt = compat_packet_setsockopt,
4489#endif
4490 .sendmsg = packet_sendmsg,
4491 .recvmsg = packet_recvmsg,
4492 .mmap = packet_mmap,
4493 .sendpage = sock_no_sendpage,
4494};
4495
4496static const struct net_proto_family packet_family_ops = {
4497 .family = PF_PACKET,
4498 .create = packet_create,
4499 .owner = THIS_MODULE,
4500};
4501
4502static struct notifier_block packet_netdev_notifier = {
4503 .notifier_call = packet_notifier,
4504};
4505
4506#ifdef CONFIG_PROC_FS
4507
4508static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4509 __acquires(RCU)
4510{
4511 struct net *net = seq_file_net(seq);
4512
4513 rcu_read_lock();
4514 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4515}
4516
4517static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4518{
4519 struct net *net = seq_file_net(seq);
4520 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4521}
4522
4523static void packet_seq_stop(struct seq_file *seq, void *v)
4524 __releases(RCU)
4525{
4526 rcu_read_unlock();
4527}
4528
4529static int packet_seq_show(struct seq_file *seq, void *v)
4530{
4531 if (v == SEQ_START_TOKEN)
4532 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
4533 else {
4534 struct sock *s = sk_entry(v);
4535 const struct packet_sock *po = pkt_sk(s);
4536
4537 seq_printf(seq,
4538 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4539 s,
4540 refcount_read(&s->sk_refcnt),
4541 s->sk_type,
4542 ntohs(po->num),
4543 po->ifindex,
4544 po->running,
4545 atomic_read(&s->sk_rmem_alloc),
4546 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4547 sock_i_ino(s));
4548 }
4549
4550 return 0;
4551}
4552
4553static const struct seq_operations packet_seq_ops = {
4554 .start = packet_seq_start,
4555 .next = packet_seq_next,
4556 .stop = packet_seq_stop,
4557 .show = packet_seq_show,
4558};
4559
4560static int packet_seq_open(struct inode *inode, struct file *file)
4561{
4562 return seq_open_net(inode, file, &packet_seq_ops,
4563 sizeof(struct seq_net_private));
4564}
4565
4566static const struct file_operations packet_seq_fops = {
4567 .open = packet_seq_open,
4568 .read = seq_read,
4569 .llseek = seq_lseek,
4570 .release = seq_release_net,
4571};
4572
4573#endif
4574
4575static int __net_init packet_net_init(struct net *net)
4576{
4577 mutex_init(&net->packet.sklist_lock);
4578 INIT_HLIST_HEAD(&net->packet.sklist);
4579
4580 if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
4581 return -ENOMEM;
4582
4583 return 0;
4584}
4585
4586static void __net_exit packet_net_exit(struct net *net)
4587{
4588 remove_proc_entry("packet", net->proc_net);
4589 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4590}
4591
4592static struct pernet_operations packet_net_ops = {
4593 .init = packet_net_init,
4594 .exit = packet_net_exit,
4595};
4596
4597
4598static void __exit packet_exit(void)
4599{
4600 unregister_netdevice_notifier(&packet_netdev_notifier);
4601 unregister_pernet_subsys(&packet_net_ops);
4602 sock_unregister(PF_PACKET);
4603 proto_unregister(&packet_proto);
4604}
4605
4606static int __init packet_init(void)
4607{
4608 int rc = proto_register(&packet_proto, 0);
4609
4610 if (rc != 0)
4611 goto out;
4612
4613 sock_register(&packet_family_ops);
4614 register_pernet_subsys(&packet_net_ops);
4615 register_netdevice_notifier(&packet_netdev_notifier);
4616out:
4617 return rc;
4618}
4619
4620module_init(packet_init);
4621module_exit(packet_exit);
4622MODULE_LICENSE("GPL");
4623MODULE_ALIAS_NETPROTO(PF_PACKET);
4624