1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#include <linux/ethtool.h>
50#include <linux/types.h>
51#include <linux/mm.h>
52#include <linux/capability.h>
53#include <linux/fcntl.h>
54#include <linux/socket.h>
55#include <linux/in.h>
56#include <linux/inet.h>
57#include <linux/netdevice.h>
58#include <linux/if_packet.h>
59#include <linux/wireless.h>
60#include <linux/kernel.h>
61#include <linux/kmod.h>
62#include <linux/slab.h>
63#include <linux/vmalloc.h>
64#include <net/net_namespace.h>
65#include <net/ip.h>
66#include <net/protocol.h>
67#include <linux/skbuff.h>
68#include <net/sock.h>
69#include <linux/errno.h>
70#include <linux/timer.h>
71#include <linux/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
74#include <asm/cacheflush.h>
75#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
81#include <linux/mutex.h>
82#include <linux/if_vlan.h>
83#include <linux/virtio_net.h>
84#include <linux/errqueue.h>
85#include <linux/net_tstamp.h>
86#include <linux/percpu.h>
87#ifdef CONFIG_INET
88#include <net/inet_common.h>
89#endif
90#include <linux/bpf.h>
91#include <net/compat.h>
92
93#include "internal.h"
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155struct packet_mreq_max {
156 int mr_ifindex;
157 unsigned short mr_type;
158 unsigned short mr_alen;
159 unsigned char mr_address[MAX_ADDR_LEN];
160};
161
162union tpacket_uhdr {
163 struct tpacket_hdr *h1;
164 struct tpacket2_hdr *h2;
165 struct tpacket3_hdr *h3;
166 void *raw;
167};
168
169static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
170 int closing, int tx_ring);
171
172#define V3_ALIGNMENT (8)
173
174#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
175
176#define BLK_PLUS_PRIV(sz_of_priv) \
177 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
178
179#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
180#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
181#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
182#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
183#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
184#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
185
186struct packet_sock;
187static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
188 struct packet_type *pt, struct net_device *orig_dev);
189
190static void *packet_previous_frame(struct packet_sock *po,
191 struct packet_ring_buffer *rb,
192 int status);
193static void packet_increment_head(struct packet_ring_buffer *buff);
194static int prb_curr_blk_in_use(struct tpacket_block_desc *);
195static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
196 struct packet_sock *);
197static void prb_retire_current_block(struct tpacket_kbdq_core *,
198 struct packet_sock *, unsigned int status);
199static int prb_queue_frozen(struct tpacket_kbdq_core *);
200static void prb_open_block(struct tpacket_kbdq_core *,
201 struct tpacket_block_desc *);
202static void prb_retire_rx_blk_timer_expired(struct timer_list *);
203static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
204static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
205static void prb_clear_rxhash(struct tpacket_kbdq_core *,
206 struct tpacket3_hdr *);
207static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
208 struct tpacket3_hdr *);
209static void packet_flush_mclist(struct sock *sk);
210static u16 packet_pick_tx_queue(struct sk_buff *skb);
211
212struct packet_skb_cb {
213 union {
214 struct sockaddr_pkt pkt;
215 union {
216
217
218
219
220 unsigned int origlen;
221 struct sockaddr_ll ll;
222 };
223 } sa;
224};
225
226#define vio_le() virtio_legacy_is_little_endian()
227
228#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
229
230#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
231#define GET_PBLOCK_DESC(x, bid) \
232 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
233#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
234 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
235#define GET_NEXT_PRB_BLK_NUM(x) \
236 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
237 ((x)->kactive_blk_num+1) : 0)
238
239static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
240static void __fanout_link(struct sock *sk, struct packet_sock *po);
241
242static int packet_direct_xmit(struct sk_buff *skb)
243{
244 return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
245}
246
247static struct net_device *packet_cached_dev_get(struct packet_sock *po)
248{
249 struct net_device *dev;
250
251 rcu_read_lock();
252 dev = rcu_dereference(po->cached_dev);
253 if (likely(dev))
254 dev_hold(dev);
255 rcu_read_unlock();
256
257 return dev;
258}
259
260static void packet_cached_dev_assign(struct packet_sock *po,
261 struct net_device *dev)
262{
263 rcu_assign_pointer(po->cached_dev, dev);
264}
265
266static void packet_cached_dev_reset(struct packet_sock *po)
267{
268 RCU_INIT_POINTER(po->cached_dev, NULL);
269}
270
271static bool packet_use_direct_xmit(const struct packet_sock *po)
272{
273 return po->xmit == packet_direct_xmit;
274}
275
276static u16 packet_pick_tx_queue(struct sk_buff *skb)
277{
278 struct net_device *dev = skb->dev;
279 const struct net_device_ops *ops = dev->netdev_ops;
280 int cpu = raw_smp_processor_id();
281 u16 queue_index;
282
283#ifdef CONFIG_XPS
284 skb->sender_cpu = cpu + 1;
285#endif
286 skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
287 if (ops->ndo_select_queue) {
288 queue_index = ops->ndo_select_queue(dev, skb, NULL);
289 queue_index = netdev_cap_txqueue(dev, queue_index);
290 } else {
291 queue_index = netdev_pick_tx(dev, skb, NULL);
292 }
293
294 return queue_index;
295}
296
297
298
299
300
301static void __register_prot_hook(struct sock *sk)
302{
303 struct packet_sock *po = pkt_sk(sk);
304
305 if (!po->running) {
306 if (po->fanout)
307 __fanout_link(sk, po);
308 else
309 dev_add_pack(&po->prot_hook);
310
311 sock_hold(sk);
312 po->running = 1;
313 }
314}
315
316static void register_prot_hook(struct sock *sk)
317{
318 lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
319 __register_prot_hook(sk);
320}
321
322
323
324
325
326
327
328static void __unregister_prot_hook(struct sock *sk, bool sync)
329{
330 struct packet_sock *po = pkt_sk(sk);
331
332 lockdep_assert_held_once(&po->bind_lock);
333
334 po->running = 0;
335
336 if (po->fanout)
337 __fanout_unlink(sk, po);
338 else
339 __dev_remove_pack(&po->prot_hook);
340
341 __sock_put(sk);
342
343 if (sync) {
344 spin_unlock(&po->bind_lock);
345 synchronize_net();
346 spin_lock(&po->bind_lock);
347 }
348}
349
350static void unregister_prot_hook(struct sock *sk, bool sync)
351{
352 struct packet_sock *po = pkt_sk(sk);
353
354 if (po->running)
355 __unregister_prot_hook(sk, sync);
356}
357
358static inline struct page * __pure pgv_to_page(void *addr)
359{
360 if (is_vmalloc_addr(addr))
361 return vmalloc_to_page(addr);
362 return virt_to_page(addr);
363}
364
365static void __packet_set_status(struct packet_sock *po, void *frame, int status)
366{
367 union tpacket_uhdr h;
368
369 h.raw = frame;
370 switch (po->tp_version) {
371 case TPACKET_V1:
372 h.h1->tp_status = status;
373 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
374 break;
375 case TPACKET_V2:
376 h.h2->tp_status = status;
377 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
378 break;
379 case TPACKET_V3:
380 h.h3->tp_status = status;
381 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
382 break;
383 default:
384 WARN(1, "TPACKET version not supported.\n");
385 BUG();
386 }
387
388 smp_wmb();
389}
390
391static int __packet_get_status(const struct packet_sock *po, void *frame)
392{
393 union tpacket_uhdr h;
394
395 smp_rmb();
396
397 h.raw = frame;
398 switch (po->tp_version) {
399 case TPACKET_V1:
400 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
401 return h.h1->tp_status;
402 case TPACKET_V2:
403 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
404 return h.h2->tp_status;
405 case TPACKET_V3:
406 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
407 return h.h3->tp_status;
408 default:
409 WARN(1, "TPACKET version not supported.\n");
410 BUG();
411 return 0;
412 }
413}
414
415static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
416 unsigned int flags)
417{
418 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
419
420 if (shhwtstamps &&
421 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
422 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
423 return TP_STATUS_TS_RAW_HARDWARE;
424
425 if (ktime_to_timespec64_cond(skb->tstamp, ts))
426 return TP_STATUS_TS_SOFTWARE;
427
428 return 0;
429}
430
431static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
432 struct sk_buff *skb)
433{
434 union tpacket_uhdr h;
435 struct timespec64 ts;
436 __u32 ts_status;
437
438 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
439 return 0;
440
441 h.raw = frame;
442
443
444
445
446
447
448
449 switch (po->tp_version) {
450 case TPACKET_V1:
451 h.h1->tp_sec = ts.tv_sec;
452 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
453 break;
454 case TPACKET_V2:
455 h.h2->tp_sec = ts.tv_sec;
456 h.h2->tp_nsec = ts.tv_nsec;
457 break;
458 case TPACKET_V3:
459 h.h3->tp_sec = ts.tv_sec;
460 h.h3->tp_nsec = ts.tv_nsec;
461 break;
462 default:
463 WARN(1, "TPACKET version not supported.\n");
464 BUG();
465 }
466
467
468 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
469 smp_wmb();
470
471 return ts_status;
472}
473
474static void *packet_lookup_frame(const struct packet_sock *po,
475 const struct packet_ring_buffer *rb,
476 unsigned int position,
477 int status)
478{
479 unsigned int pg_vec_pos, frame_offset;
480 union tpacket_uhdr h;
481
482 pg_vec_pos = position / rb->frames_per_block;
483 frame_offset = position % rb->frames_per_block;
484
485 h.raw = rb->pg_vec[pg_vec_pos].buffer +
486 (frame_offset * rb->frame_size);
487
488 if (status != __packet_get_status(po, h.raw))
489 return NULL;
490
491 return h.raw;
492}
493
494static void *packet_current_frame(struct packet_sock *po,
495 struct packet_ring_buffer *rb,
496 int status)
497{
498 return packet_lookup_frame(po, rb, rb->head, status);
499}
500
501static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
502{
503 del_timer_sync(&pkc->retire_blk_timer);
504}
505
506static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
507 struct sk_buff_head *rb_queue)
508{
509 struct tpacket_kbdq_core *pkc;
510
511 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
512
513 spin_lock_bh(&rb_queue->lock);
514 pkc->delete_blk_timer = 1;
515 spin_unlock_bh(&rb_queue->lock);
516
517 prb_del_retire_blk_timer(pkc);
518}
519
520static void prb_setup_retire_blk_timer(struct packet_sock *po)
521{
522 struct tpacket_kbdq_core *pkc;
523
524 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
525 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
526 0);
527 pkc->retire_blk_timer.expires = jiffies;
528}
529
530static int prb_calc_retire_blk_tmo(struct packet_sock *po,
531 int blk_size_in_bytes)
532{
533 struct net_device *dev;
534 unsigned int mbits, div;
535 struct ethtool_link_ksettings ecmd;
536 int err;
537
538 rtnl_lock();
539 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
540 if (unlikely(!dev)) {
541 rtnl_unlock();
542 return DEFAULT_PRB_RETIRE_TOV;
543 }
544 err = __ethtool_get_link_ksettings(dev, &ecmd);
545 rtnl_unlock();
546 if (err)
547 return DEFAULT_PRB_RETIRE_TOV;
548
549
550
551
552 if (ecmd.base.speed < SPEED_1000 ||
553 ecmd.base.speed == SPEED_UNKNOWN)
554 return DEFAULT_PRB_RETIRE_TOV;
555
556 div = ecmd.base.speed / 1000;
557 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
558
559 if (div)
560 mbits /= div;
561
562 if (div)
563 return mbits + 1;
564 return mbits;
565}
566
567static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
568 union tpacket_req_u *req_u)
569{
570 p1->feature_req_word = req_u->req3.tp_feature_req_word;
571}
572
573static void init_prb_bdqc(struct packet_sock *po,
574 struct packet_ring_buffer *rb,
575 struct pgv *pg_vec,
576 union tpacket_req_u *req_u)
577{
578 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
579 struct tpacket_block_desc *pbd;
580
581 memset(p1, 0x0, sizeof(*p1));
582
583 p1->knxt_seq_num = 1;
584 p1->pkbdq = pg_vec;
585 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
586 p1->pkblk_start = pg_vec[0].buffer;
587 p1->kblk_size = req_u->req3.tp_block_size;
588 p1->knum_blocks = req_u->req3.tp_block_nr;
589 p1->hdrlen = po->tp_hdrlen;
590 p1->version = po->tp_version;
591 p1->last_kactive_blk_num = 0;
592 po->stats.stats3.tp_freeze_q_cnt = 0;
593 if (req_u->req3.tp_retire_blk_tov)
594 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
595 else
596 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
597 req_u->req3.tp_block_size);
598 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
599 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
600 rwlock_init(&p1->blk_fill_in_prog_lock);
601
602 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
603 prb_init_ft_ops(p1, req_u);
604 prb_setup_retire_blk_timer(po);
605 prb_open_block(p1, pbd);
606}
607
608
609
610
611static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
612{
613 mod_timer(&pkc->retire_blk_timer,
614 jiffies + pkc->tov_in_jiffies);
615 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
616}
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
642{
643 struct packet_sock *po =
644 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
645 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
646 unsigned int frozen;
647 struct tpacket_block_desc *pbd;
648
649 spin_lock(&po->sk.sk_receive_queue.lock);
650
651 frozen = prb_queue_frozen(pkc);
652 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
653
654 if (unlikely(pkc->delete_blk_timer))
655 goto out;
656
657
658
659
660
661
662
663
664
665
666 if (BLOCK_NUM_PKTS(pbd)) {
667
668 write_lock(&pkc->blk_fill_in_prog_lock);
669 write_unlock(&pkc->blk_fill_in_prog_lock);
670 }
671
672 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
673 if (!frozen) {
674 if (!BLOCK_NUM_PKTS(pbd)) {
675
676 goto refresh_timer;
677 }
678 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
679 if (!prb_dispatch_next_block(pkc, po))
680 goto refresh_timer;
681 else
682 goto out;
683 } else {
684
685
686
687 if (prb_curr_blk_in_use(pbd)) {
688
689
690
691
692 goto refresh_timer;
693 } else {
694
695
696
697
698
699
700
701 prb_open_block(pkc, pbd);
702 goto out;
703 }
704 }
705 }
706
707refresh_timer:
708 _prb_refresh_rx_retire_blk_timer(pkc);
709
710out:
711 spin_unlock(&po->sk.sk_receive_queue.lock);
712}
713
714static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
715 struct tpacket_block_desc *pbd1, __u32 status)
716{
717
718
719#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
720 u8 *start, *end;
721
722 start = (u8 *)pbd1;
723
724
725 start += PAGE_SIZE;
726
727 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
728 for (; start < end; start += PAGE_SIZE)
729 flush_dcache_page(pgv_to_page(start));
730
731 smp_wmb();
732#endif
733
734
735
736 BLOCK_STATUS(pbd1) = status;
737
738
739
740#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
741 start = (u8 *)pbd1;
742 flush_dcache_page(pgv_to_page(start));
743
744 smp_wmb();
745#endif
746}
747
748
749
750
751
752
753
754
755
756
757static void prb_close_block(struct tpacket_kbdq_core *pkc1,
758 struct tpacket_block_desc *pbd1,
759 struct packet_sock *po, unsigned int stat)
760{
761 __u32 status = TP_STATUS_USER | stat;
762
763 struct tpacket3_hdr *last_pkt;
764 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
765 struct sock *sk = &po->sk;
766
767 if (atomic_read(&po->tp_drops))
768 status |= TP_STATUS_LOSING;
769
770 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
771 last_pkt->tp_next_offset = 0;
772
773
774 if (BLOCK_NUM_PKTS(pbd1)) {
775 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
776 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
777 } else {
778
779
780
781
782
783 struct timespec64 ts;
784 ktime_get_real_ts64(&ts);
785 h1->ts_last_pkt.ts_sec = ts.tv_sec;
786 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
787 }
788
789 smp_wmb();
790
791
792 prb_flush_block(pkc1, pbd1, status);
793
794 sk->sk_data_ready(sk);
795
796 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
797}
798
799static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
800{
801 pkc->reset_pending_on_curr_blk = 0;
802}
803
804
805
806
807
808
809
810
811static void prb_open_block(struct tpacket_kbdq_core *pkc1,
812 struct tpacket_block_desc *pbd1)
813{
814 struct timespec64 ts;
815 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
816
817 smp_rmb();
818
819
820
821
822
823 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
824 BLOCK_NUM_PKTS(pbd1) = 0;
825 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
826
827 ktime_get_real_ts64(&ts);
828
829 h1->ts_first_pkt.ts_sec = ts.tv_sec;
830 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
831
832 pkc1->pkblk_start = (char *)pbd1;
833 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
834
835 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
836 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
837
838 pbd1->version = pkc1->version;
839 pkc1->prev = pkc1->nxt_offset;
840 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
841
842 prb_thaw_queue(pkc1);
843 _prb_refresh_rx_retire_blk_timer(pkc1);
844
845 smp_wmb();
846}
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
872 struct packet_sock *po)
873{
874 pkc->reset_pending_on_curr_blk = 1;
875 po->stats.stats3.tp_freeze_q_cnt++;
876}
877
878#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
879
880
881
882
883
884
885
886static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
887 struct packet_sock *po)
888{
889 struct tpacket_block_desc *pbd;
890
891 smp_rmb();
892
893
894 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
895
896
897 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
898 prb_freeze_queue(pkc, po);
899 return NULL;
900 }
901
902
903
904
905
906
907 prb_open_block(pkc, pbd);
908 return (void *)pkc->nxt_offset;
909}
910
911static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
912 struct packet_sock *po, unsigned int status)
913{
914 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
915
916
917 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
918
919
920
921
922
923
924
925
926
927 if (!(status & TP_STATUS_BLK_TMO)) {
928
929 write_lock(&pkc->blk_fill_in_prog_lock);
930 write_unlock(&pkc->blk_fill_in_prog_lock);
931 }
932 prb_close_block(pkc, pbd, po, status);
933 return;
934 }
935}
936
937static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
938{
939 return TP_STATUS_USER & BLOCK_STATUS(pbd);
940}
941
942static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
943{
944 return pkc->reset_pending_on_curr_blk;
945}
946
947static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
948 __releases(&pkc->blk_fill_in_prog_lock)
949{
950 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
951
952 read_unlock(&pkc->blk_fill_in_prog_lock);
953}
954
955static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
956 struct tpacket3_hdr *ppd)
957{
958 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
959}
960
961static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
962 struct tpacket3_hdr *ppd)
963{
964 ppd->hv1.tp_rxhash = 0;
965}
966
967static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
968 struct tpacket3_hdr *ppd)
969{
970 if (skb_vlan_tag_present(pkc->skb)) {
971 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
972 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
973 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
974 } else {
975 ppd->hv1.tp_vlan_tci = 0;
976 ppd->hv1.tp_vlan_tpid = 0;
977 ppd->tp_status = TP_STATUS_AVAILABLE;
978 }
979}
980
981static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
982 struct tpacket3_hdr *ppd)
983{
984 ppd->hv1.tp_padding = 0;
985 prb_fill_vlan_info(pkc, ppd);
986
987 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
988 prb_fill_rxhash(pkc, ppd);
989 else
990 prb_clear_rxhash(pkc, ppd);
991}
992
993static void prb_fill_curr_block(char *curr,
994 struct tpacket_kbdq_core *pkc,
995 struct tpacket_block_desc *pbd,
996 unsigned int len)
997 __acquires(&pkc->blk_fill_in_prog_lock)
998{
999 struct tpacket3_hdr *ppd;
1000
1001 ppd = (struct tpacket3_hdr *)curr;
1002 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1003 pkc->prev = curr;
1004 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1005 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1006 BLOCK_NUM_PKTS(pbd) += 1;
1007 read_lock(&pkc->blk_fill_in_prog_lock);
1008 prb_run_all_ft_ops(pkc, ppd);
1009}
1010
1011
1012static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1013 struct sk_buff *skb,
1014 unsigned int len
1015 )
1016{
1017 struct tpacket_kbdq_core *pkc;
1018 struct tpacket_block_desc *pbd;
1019 char *curr, *end;
1020
1021 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1022 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1023
1024
1025 if (prb_queue_frozen(pkc)) {
1026
1027
1028
1029
1030 if (prb_curr_blk_in_use(pbd)) {
1031
1032 return NULL;
1033 } else {
1034
1035
1036
1037
1038
1039
1040 prb_open_block(pkc, pbd);
1041 }
1042 }
1043
1044 smp_mb();
1045 curr = pkc->nxt_offset;
1046 pkc->skb = skb;
1047 end = (char *)pbd + pkc->kblk_size;
1048
1049
1050 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1051 prb_fill_curr_block(curr, pkc, pbd, len);
1052 return (void *)curr;
1053 }
1054
1055
1056 prb_retire_current_block(pkc, po, 0);
1057
1058
1059 curr = (char *)prb_dispatch_next_block(pkc, po);
1060 if (curr) {
1061 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1062 prb_fill_curr_block(curr, pkc, pbd, len);
1063 return (void *)curr;
1064 }
1065
1066
1067
1068
1069
1070 return NULL;
1071}
1072
1073static void *packet_current_rx_frame(struct packet_sock *po,
1074 struct sk_buff *skb,
1075 int status, unsigned int len)
1076{
1077 char *curr = NULL;
1078 switch (po->tp_version) {
1079 case TPACKET_V1:
1080 case TPACKET_V2:
1081 curr = packet_lookup_frame(po, &po->rx_ring,
1082 po->rx_ring.head, status);
1083 return curr;
1084 case TPACKET_V3:
1085 return __packet_lookup_frame_in_block(po, skb, len);
1086 default:
1087 WARN(1, "TPACKET version not supported\n");
1088 BUG();
1089 return NULL;
1090 }
1091}
1092
1093static void *prb_lookup_block(const struct packet_sock *po,
1094 const struct packet_ring_buffer *rb,
1095 unsigned int idx,
1096 int status)
1097{
1098 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1099 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1100
1101 if (status != BLOCK_STATUS(pbd))
1102 return NULL;
1103 return pbd;
1104}
1105
1106static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1107{
1108 unsigned int prev;
1109 if (rb->prb_bdqc.kactive_blk_num)
1110 prev = rb->prb_bdqc.kactive_blk_num-1;
1111 else
1112 prev = rb->prb_bdqc.knum_blocks-1;
1113 return prev;
1114}
1115
1116
1117static void *__prb_previous_block(struct packet_sock *po,
1118 struct packet_ring_buffer *rb,
1119 int status)
1120{
1121 unsigned int previous = prb_previous_blk_num(rb);
1122 return prb_lookup_block(po, rb, previous, status);
1123}
1124
1125static void *packet_previous_rx_frame(struct packet_sock *po,
1126 struct packet_ring_buffer *rb,
1127 int status)
1128{
1129 if (po->tp_version <= TPACKET_V2)
1130 return packet_previous_frame(po, rb, status);
1131
1132 return __prb_previous_block(po, rb, status);
1133}
1134
1135static void packet_increment_rx_head(struct packet_sock *po,
1136 struct packet_ring_buffer *rb)
1137{
1138 switch (po->tp_version) {
1139 case TPACKET_V1:
1140 case TPACKET_V2:
1141 return packet_increment_head(rb);
1142 case TPACKET_V3:
1143 default:
1144 WARN(1, "TPACKET version not supported.\n");
1145 BUG();
1146 return;
1147 }
1148}
1149
1150static void *packet_previous_frame(struct packet_sock *po,
1151 struct packet_ring_buffer *rb,
1152 int status)
1153{
1154 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1155 return packet_lookup_frame(po, rb, previous, status);
1156}
1157
1158static void packet_increment_head(struct packet_ring_buffer *buff)
1159{
1160 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1161}
1162
1163static void packet_inc_pending(struct packet_ring_buffer *rb)
1164{
1165 this_cpu_inc(*rb->pending_refcnt);
1166}
1167
1168static void packet_dec_pending(struct packet_ring_buffer *rb)
1169{
1170 this_cpu_dec(*rb->pending_refcnt);
1171}
1172
1173static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1174{
1175 unsigned int refcnt = 0;
1176 int cpu;
1177
1178
1179 if (rb->pending_refcnt == NULL)
1180 return 0;
1181
1182 for_each_possible_cpu(cpu)
1183 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1184
1185 return refcnt;
1186}
1187
1188static int packet_alloc_pending(struct packet_sock *po)
1189{
1190 po->rx_ring.pending_refcnt = NULL;
1191
1192 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1193 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1194 return -ENOBUFS;
1195
1196 return 0;
1197}
1198
1199static void packet_free_pending(struct packet_sock *po)
1200{
1201 free_percpu(po->tx_ring.pending_refcnt);
1202}
1203
1204#define ROOM_POW_OFF 2
1205#define ROOM_NONE 0x0
1206#define ROOM_LOW 0x1
1207#define ROOM_NORMAL 0x2
1208
1209static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
1210{
1211 int idx, len;
1212
1213 len = READ_ONCE(po->rx_ring.frame_max) + 1;
1214 idx = READ_ONCE(po->rx_ring.head);
1215 if (pow_off)
1216 idx += len >> pow_off;
1217 if (idx >= len)
1218 idx -= len;
1219 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1220}
1221
1222static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
1223{
1224 int idx, len;
1225
1226 len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1227 idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
1228 if (pow_off)
1229 idx += len >> pow_off;
1230 if (idx >= len)
1231 idx -= len;
1232 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1233}
1234
1235static int __packet_rcv_has_room(const struct packet_sock *po,
1236 const struct sk_buff *skb)
1237{
1238 const struct sock *sk = &po->sk;
1239 int ret = ROOM_NONE;
1240
1241 if (po->prot_hook.func != tpacket_rcv) {
1242 int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1243 int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1244 - (skb ? skb->truesize : 0);
1245
1246 if (avail > (rcvbuf >> ROOM_POW_OFF))
1247 return ROOM_NORMAL;
1248 else if (avail > 0)
1249 return ROOM_LOW;
1250 else
1251 return ROOM_NONE;
1252 }
1253
1254 if (po->tp_version == TPACKET_V3) {
1255 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1256 ret = ROOM_NORMAL;
1257 else if (__tpacket_v3_has_room(po, 0))
1258 ret = ROOM_LOW;
1259 } else {
1260 if (__tpacket_has_room(po, ROOM_POW_OFF))
1261 ret = ROOM_NORMAL;
1262 else if (__tpacket_has_room(po, 0))
1263 ret = ROOM_LOW;
1264 }
1265
1266 return ret;
1267}
1268
1269static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1270{
1271 int pressure, ret;
1272
1273 ret = __packet_rcv_has_room(po, skb);
1274 pressure = ret != ROOM_NORMAL;
1275
1276 if (READ_ONCE(po->pressure) != pressure)
1277 WRITE_ONCE(po->pressure, pressure);
1278
1279 return ret;
1280}
1281
1282static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1283{
1284 if (READ_ONCE(po->pressure) &&
1285 __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1286 WRITE_ONCE(po->pressure, 0);
1287}
1288
1289static void packet_sock_destruct(struct sock *sk)
1290{
1291 skb_queue_purge(&sk->sk_error_queue);
1292
1293 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1294 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1295
1296 if (!sock_flag(sk, SOCK_DEAD)) {
1297 pr_err("Attempt to release alive packet socket: %p\n", sk);
1298 return;
1299 }
1300
1301 sk_refcnt_debug_dec(sk);
1302}
1303
1304static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1305{
1306 u32 *history = po->rollover->history;
1307 u32 victim, rxhash;
1308 int i, count = 0;
1309
1310 rxhash = skb_get_hash(skb);
1311 for (i = 0; i < ROLLOVER_HLEN; i++)
1312 if (READ_ONCE(history[i]) == rxhash)
1313 count++;
1314
1315 victim = prandom_u32() % ROLLOVER_HLEN;
1316
1317
1318 if (READ_ONCE(history[victim]) != rxhash)
1319 WRITE_ONCE(history[victim], rxhash);
1320
1321 return count > (ROLLOVER_HLEN >> 1);
1322}
1323
1324static unsigned int fanout_demux_hash(struct packet_fanout *f,
1325 struct sk_buff *skb,
1326 unsigned int num)
1327{
1328 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1329}
1330
1331static unsigned int fanout_demux_lb(struct packet_fanout *f,
1332 struct sk_buff *skb,
1333 unsigned int num)
1334{
1335 unsigned int val = atomic_inc_return(&f->rr_cur);
1336
1337 return val % num;
1338}
1339
1340static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1341 struct sk_buff *skb,
1342 unsigned int num)
1343{
1344 return smp_processor_id() % num;
1345}
1346
1347static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1348 struct sk_buff *skb,
1349 unsigned int num)
1350{
1351 return prandom_u32_max(num);
1352}
1353
1354static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1355 struct sk_buff *skb,
1356 unsigned int idx, bool try_self,
1357 unsigned int num)
1358{
1359 struct packet_sock *po, *po_next, *po_skip = NULL;
1360 unsigned int i, j, room = ROOM_NONE;
1361
1362 po = pkt_sk(f->arr[idx]);
1363
1364 if (try_self) {
1365 room = packet_rcv_has_room(po, skb);
1366 if (room == ROOM_NORMAL ||
1367 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1368 return idx;
1369 po_skip = po;
1370 }
1371
1372 i = j = min_t(int, po->rollover->sock, num - 1);
1373 do {
1374 po_next = pkt_sk(f->arr[i]);
1375 if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
1376 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1377 if (i != j)
1378 po->rollover->sock = i;
1379 atomic_long_inc(&po->rollover->num);
1380 if (room == ROOM_LOW)
1381 atomic_long_inc(&po->rollover->num_huge);
1382 return i;
1383 }
1384
1385 if (++i == num)
1386 i = 0;
1387 } while (i != j);
1388
1389 atomic_long_inc(&po->rollover->num_failed);
1390 return idx;
1391}
1392
1393static unsigned int fanout_demux_qm(struct packet_fanout *f,
1394 struct sk_buff *skb,
1395 unsigned int num)
1396{
1397 return skb_get_queue_mapping(skb) % num;
1398}
1399
1400static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1401 struct sk_buff *skb,
1402 unsigned int num)
1403{
1404 struct bpf_prog *prog;
1405 unsigned int ret = 0;
1406
1407 rcu_read_lock();
1408 prog = rcu_dereference(f->bpf_prog);
1409 if (prog)
1410 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1411 rcu_read_unlock();
1412
1413 return ret;
1414}
1415
1416static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1417{
1418 return f->flags & (flag >> 8);
1419}
1420
1421static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1422 struct packet_type *pt, struct net_device *orig_dev)
1423{
1424 struct packet_fanout *f = pt->af_packet_priv;
1425 unsigned int num = READ_ONCE(f->num_members);
1426 struct net *net = read_pnet(&f->net);
1427 struct packet_sock *po;
1428 unsigned int idx;
1429
1430 if (!net_eq(dev_net(dev), net) || !num) {
1431 kfree_skb(skb);
1432 return 0;
1433 }
1434
1435 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1436 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1437 if (!skb)
1438 return 0;
1439 }
1440 switch (f->type) {
1441 case PACKET_FANOUT_HASH:
1442 default:
1443 idx = fanout_demux_hash(f, skb, num);
1444 break;
1445 case PACKET_FANOUT_LB:
1446 idx = fanout_demux_lb(f, skb, num);
1447 break;
1448 case PACKET_FANOUT_CPU:
1449 idx = fanout_demux_cpu(f, skb, num);
1450 break;
1451 case PACKET_FANOUT_RND:
1452 idx = fanout_demux_rnd(f, skb, num);
1453 break;
1454 case PACKET_FANOUT_QM:
1455 idx = fanout_demux_qm(f, skb, num);
1456 break;
1457 case PACKET_FANOUT_ROLLOVER:
1458 idx = fanout_demux_rollover(f, skb, 0, false, num);
1459 break;
1460 case PACKET_FANOUT_CBPF:
1461 case PACKET_FANOUT_EBPF:
1462 idx = fanout_demux_bpf(f, skb, num);
1463 break;
1464 }
1465
1466 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1467 idx = fanout_demux_rollover(f, skb, idx, true, num);
1468
1469 po = pkt_sk(f->arr[idx]);
1470 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1471}
1472
1473DEFINE_MUTEX(fanout_mutex);
1474EXPORT_SYMBOL_GPL(fanout_mutex);
1475static LIST_HEAD(fanout_list);
1476static u16 fanout_next_id;
1477
1478static void __fanout_link(struct sock *sk, struct packet_sock *po)
1479{
1480 struct packet_fanout *f = po->fanout;
1481
1482 spin_lock(&f->lock);
1483 f->arr[f->num_members] = sk;
1484 smp_wmb();
1485 f->num_members++;
1486 if (f->num_members == 1)
1487 dev_add_pack(&f->prot_hook);
1488 spin_unlock(&f->lock);
1489}
1490
1491static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1492{
1493 struct packet_fanout *f = po->fanout;
1494 int i;
1495
1496 spin_lock(&f->lock);
1497 for (i = 0; i < f->num_members; i++) {
1498 if (f->arr[i] == sk)
1499 break;
1500 }
1501 BUG_ON(i >= f->num_members);
1502 f->arr[i] = f->arr[f->num_members - 1];
1503 f->num_members--;
1504 if (f->num_members == 0)
1505 __dev_remove_pack(&f->prot_hook);
1506 spin_unlock(&f->lock);
1507}
1508
1509static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1510{
1511 if (sk->sk_family != PF_PACKET)
1512 return false;
1513
1514 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1515}
1516
1517static void fanout_init_data(struct packet_fanout *f)
1518{
1519 switch (f->type) {
1520 case PACKET_FANOUT_LB:
1521 atomic_set(&f->rr_cur, 0);
1522 break;
1523 case PACKET_FANOUT_CBPF:
1524 case PACKET_FANOUT_EBPF:
1525 RCU_INIT_POINTER(f->bpf_prog, NULL);
1526 break;
1527 }
1528}
1529
1530static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1531{
1532 struct bpf_prog *old;
1533
1534 spin_lock(&f->lock);
1535 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1536 rcu_assign_pointer(f->bpf_prog, new);
1537 spin_unlock(&f->lock);
1538
1539 if (old) {
1540 synchronize_net();
1541 bpf_prog_destroy(old);
1542 }
1543}
1544
1545static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
1546 unsigned int len)
1547{
1548 struct bpf_prog *new;
1549 struct sock_fprog fprog;
1550 int ret;
1551
1552 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1553 return -EPERM;
1554
1555 ret = copy_bpf_fprog_from_user(&fprog, data, len);
1556 if (ret)
1557 return ret;
1558
1559 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1560 if (ret)
1561 return ret;
1562
1563 __fanout_set_data_bpf(po->fanout, new);
1564 return 0;
1565}
1566
1567static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
1568 unsigned int len)
1569{
1570 struct bpf_prog *new;
1571 u32 fd;
1572
1573 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1574 return -EPERM;
1575 if (len != sizeof(fd))
1576 return -EINVAL;
1577 if (copy_from_sockptr(&fd, data, len))
1578 return -EFAULT;
1579
1580 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1581 if (IS_ERR(new))
1582 return PTR_ERR(new);
1583
1584 __fanout_set_data_bpf(po->fanout, new);
1585 return 0;
1586}
1587
1588static int fanout_set_data(struct packet_sock *po, sockptr_t data,
1589 unsigned int len)
1590{
1591 switch (po->fanout->type) {
1592 case PACKET_FANOUT_CBPF:
1593 return fanout_set_data_cbpf(po, data, len);
1594 case PACKET_FANOUT_EBPF:
1595 return fanout_set_data_ebpf(po, data, len);
1596 default:
1597 return -EINVAL;
1598 }
1599}
1600
1601static void fanout_release_data(struct packet_fanout *f)
1602{
1603 switch (f->type) {
1604 case PACKET_FANOUT_CBPF:
1605 case PACKET_FANOUT_EBPF:
1606 __fanout_set_data_bpf(f, NULL);
1607 }
1608}
1609
1610static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1611{
1612 struct packet_fanout *f;
1613
1614 list_for_each_entry(f, &fanout_list, list) {
1615 if (f->id == candidate_id &&
1616 read_pnet(&f->net) == sock_net(sk)) {
1617 return false;
1618 }
1619 }
1620 return true;
1621}
1622
1623static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1624{
1625 u16 id = fanout_next_id;
1626
1627 do {
1628 if (__fanout_id_is_free(sk, id)) {
1629 *new_id = id;
1630 fanout_next_id = id + 1;
1631 return true;
1632 }
1633
1634 id++;
1635 } while (id != fanout_next_id);
1636
1637 return false;
1638}
1639
1640static int fanout_add(struct sock *sk, struct fanout_args *args)
1641{
1642 struct packet_rollover *rollover = NULL;
1643 struct packet_sock *po = pkt_sk(sk);
1644 u16 type_flags = args->type_flags;
1645 struct packet_fanout *f, *match;
1646 u8 type = type_flags & 0xff;
1647 u8 flags = type_flags >> 8;
1648 u16 id = args->id;
1649 int err;
1650
1651 switch (type) {
1652 case PACKET_FANOUT_ROLLOVER:
1653 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1654 return -EINVAL;
1655 case PACKET_FANOUT_HASH:
1656 case PACKET_FANOUT_LB:
1657 case PACKET_FANOUT_CPU:
1658 case PACKET_FANOUT_RND:
1659 case PACKET_FANOUT_QM:
1660 case PACKET_FANOUT_CBPF:
1661 case PACKET_FANOUT_EBPF:
1662 break;
1663 default:
1664 return -EINVAL;
1665 }
1666
1667 mutex_lock(&fanout_mutex);
1668
1669 err = -EALREADY;
1670 if (po->fanout)
1671 goto out;
1672
1673 if (type == PACKET_FANOUT_ROLLOVER ||
1674 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1675 err = -ENOMEM;
1676 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1677 if (!rollover)
1678 goto out;
1679 atomic_long_set(&rollover->num, 0);
1680 atomic_long_set(&rollover->num_huge, 0);
1681 atomic_long_set(&rollover->num_failed, 0);
1682 }
1683
1684 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1685 if (id != 0) {
1686 err = -EINVAL;
1687 goto out;
1688 }
1689 if (!fanout_find_new_id(sk, &id)) {
1690 err = -ENOMEM;
1691 goto out;
1692 }
1693
1694 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1695 }
1696
1697 match = NULL;
1698 list_for_each_entry(f, &fanout_list, list) {
1699 if (f->id == id &&
1700 read_pnet(&f->net) == sock_net(sk)) {
1701 match = f;
1702 break;
1703 }
1704 }
1705 err = -EINVAL;
1706 if (match) {
1707 if (match->flags != flags)
1708 goto out;
1709 if (args->max_num_members &&
1710 args->max_num_members != match->max_num_members)
1711 goto out;
1712 } else {
1713 if (args->max_num_members > PACKET_FANOUT_MAX)
1714 goto out;
1715 if (!args->max_num_members)
1716
1717 args->max_num_members = 256;
1718 err = -ENOMEM;
1719 match = kvzalloc(struct_size(match, arr, args->max_num_members),
1720 GFP_KERNEL);
1721 if (!match)
1722 goto out;
1723 write_pnet(&match->net, sock_net(sk));
1724 match->id = id;
1725 match->type = type;
1726 match->flags = flags;
1727 INIT_LIST_HEAD(&match->list);
1728 spin_lock_init(&match->lock);
1729 refcount_set(&match->sk_ref, 0);
1730 fanout_init_data(match);
1731 match->prot_hook.type = po->prot_hook.type;
1732 match->prot_hook.dev = po->prot_hook.dev;
1733 match->prot_hook.func = packet_rcv_fanout;
1734 match->prot_hook.af_packet_priv = match;
1735 match->prot_hook.id_match = match_fanout_group;
1736 match->max_num_members = args->max_num_members;
1737 list_add(&match->list, &fanout_list);
1738 }
1739 err = -EINVAL;
1740
1741 spin_lock(&po->bind_lock);
1742 if (po->running &&
1743 match->type == type &&
1744 match->prot_hook.type == po->prot_hook.type &&
1745 match->prot_hook.dev == po->prot_hook.dev) {
1746 err = -ENOSPC;
1747 if (refcount_read(&match->sk_ref) < match->max_num_members) {
1748 __dev_remove_pack(&po->prot_hook);
1749 po->fanout = match;
1750 po->rollover = rollover;
1751 rollover = NULL;
1752 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1753 __fanout_link(sk, po);
1754 err = 0;
1755 }
1756 }
1757 spin_unlock(&po->bind_lock);
1758
1759 if (err && !refcount_read(&match->sk_ref)) {
1760 list_del(&match->list);
1761 kvfree(match);
1762 }
1763
1764out:
1765 kfree(rollover);
1766 mutex_unlock(&fanout_mutex);
1767 return err;
1768}
1769
1770
1771
1772
1773
1774
1775static struct packet_fanout *fanout_release(struct sock *sk)
1776{
1777 struct packet_sock *po = pkt_sk(sk);
1778 struct packet_fanout *f;
1779
1780 mutex_lock(&fanout_mutex);
1781 f = po->fanout;
1782 if (f) {
1783 po->fanout = NULL;
1784
1785 if (refcount_dec_and_test(&f->sk_ref))
1786 list_del(&f->list);
1787 else
1788 f = NULL;
1789 }
1790 mutex_unlock(&fanout_mutex);
1791
1792 return f;
1793}
1794
1795static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1796 struct sk_buff *skb)
1797{
1798
1799
1800
1801
1802 if (unlikely(dev->type != ARPHRD_ETHER))
1803 return false;
1804
1805 skb_reset_mac_header(skb);
1806 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1807}
1808
1809static const struct proto_ops packet_ops;
1810
1811static const struct proto_ops packet_ops_spkt;
1812
1813static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1814 struct packet_type *pt, struct net_device *orig_dev)
1815{
1816 struct sock *sk;
1817 struct sockaddr_pkt *spkt;
1818
1819
1820
1821
1822
1823
1824 sk = pt->af_packet_priv;
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837 if (skb->pkt_type == PACKET_LOOPBACK)
1838 goto out;
1839
1840 if (!net_eq(dev_net(dev), sock_net(sk)))
1841 goto out;
1842
1843 skb = skb_share_check(skb, GFP_ATOMIC);
1844 if (skb == NULL)
1845 goto oom;
1846
1847
1848 skb_dst_drop(skb);
1849
1850
1851 nf_reset_ct(skb);
1852
1853 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1854
1855 skb_push(skb, skb->data - skb_mac_header(skb));
1856
1857
1858
1859
1860
1861 spkt->spkt_family = dev->type;
1862 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1863 spkt->spkt_protocol = skb->protocol;
1864
1865
1866
1867
1868
1869
1870 if (sock_queue_rcv_skb(sk, skb) == 0)
1871 return 0;
1872
1873out:
1874 kfree_skb(skb);
1875oom:
1876 return 0;
1877}
1878
1879static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1880{
1881 if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1882 sock->type == SOCK_RAW) {
1883 skb_reset_mac_header(skb);
1884 skb->protocol = dev_parse_header_protocol(skb);
1885 }
1886
1887 skb_probe_transport_header(skb);
1888}
1889
1890
1891
1892
1893
1894
1895static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1896 size_t len)
1897{
1898 struct sock *sk = sock->sk;
1899 DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1900 struct sk_buff *skb = NULL;
1901 struct net_device *dev;
1902 struct sockcm_cookie sockc;
1903 __be16 proto = 0;
1904 int err;
1905 int extra_len = 0;
1906
1907
1908
1909
1910
1911 if (saddr) {
1912 if (msg->msg_namelen < sizeof(struct sockaddr))
1913 return -EINVAL;
1914 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1915 proto = saddr->spkt_protocol;
1916 } else
1917 return -ENOTCONN;
1918
1919
1920
1921
1922
1923 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1924retry:
1925 rcu_read_lock();
1926 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1927 err = -ENODEV;
1928 if (dev == NULL)
1929 goto out_unlock;
1930
1931 err = -ENETDOWN;
1932 if (!(dev->flags & IFF_UP))
1933 goto out_unlock;
1934
1935
1936
1937
1938
1939
1940 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1941 if (!netif_supports_nofcs(dev)) {
1942 err = -EPROTONOSUPPORT;
1943 goto out_unlock;
1944 }
1945 extra_len = 4;
1946 }
1947
1948 err = -EMSGSIZE;
1949 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1950 goto out_unlock;
1951
1952 if (!skb) {
1953 size_t reserved = LL_RESERVED_SPACE(dev);
1954 int tlen = dev->needed_tailroom;
1955 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1956
1957 rcu_read_unlock();
1958 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1959 if (skb == NULL)
1960 return -ENOBUFS;
1961
1962
1963
1964
1965 skb_reserve(skb, reserved);
1966 skb_reset_network_header(skb);
1967
1968
1969 if (hhlen) {
1970 skb->data -= hhlen;
1971 skb->tail -= hhlen;
1972 if (len < hhlen)
1973 skb_reset_network_header(skb);
1974 }
1975 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1976 if (err)
1977 goto out_free;
1978 goto retry;
1979 }
1980
1981 if (!dev_validate_header(dev, skb->data, len)) {
1982 err = -EINVAL;
1983 goto out_unlock;
1984 }
1985 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1986 !packet_extra_vlan_len_allowed(dev, skb)) {
1987 err = -EMSGSIZE;
1988 goto out_unlock;
1989 }
1990
1991 sockcm_init(&sockc, sk);
1992 if (msg->msg_controllen) {
1993 err = sock_cmsg_send(sk, msg, &sockc);
1994 if (unlikely(err))
1995 goto out_unlock;
1996 }
1997
1998 skb->protocol = proto;
1999 skb->dev = dev;
2000 skb->priority = sk->sk_priority;
2001 skb->mark = sk->sk_mark;
2002 skb->tstamp = sockc.transmit_time;
2003
2004 skb_setup_tx_timestamp(skb, sockc.tsflags);
2005
2006 if (unlikely(extra_len == 4))
2007 skb->no_fcs = 1;
2008
2009 packet_parse_headers(skb, sock);
2010
2011 dev_queue_xmit(skb);
2012 rcu_read_unlock();
2013 return len;
2014
2015out_unlock:
2016 rcu_read_unlock();
2017out_free:
2018 kfree_skb(skb);
2019 return err;
2020}
2021
2022static unsigned int run_filter(struct sk_buff *skb,
2023 const struct sock *sk,
2024 unsigned int res)
2025{
2026 struct sk_filter *filter;
2027
2028 rcu_read_lock();
2029 filter = rcu_dereference(sk->sk_filter);
2030 if (filter != NULL)
2031 res = bpf_prog_run_clear_cb(filter->prog, skb);
2032 rcu_read_unlock();
2033
2034 return res;
2035}
2036
2037static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2038 size_t *len)
2039{
2040 struct virtio_net_hdr vnet_hdr;
2041
2042 if (*len < sizeof(vnet_hdr))
2043 return -EINVAL;
2044 *len -= sizeof(vnet_hdr);
2045
2046 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2047 return -EINVAL;
2048
2049 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2050}
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2065 struct packet_type *pt, struct net_device *orig_dev)
2066{
2067 struct sock *sk;
2068 struct sockaddr_ll *sll;
2069 struct packet_sock *po;
2070 u8 *skb_head = skb->data;
2071 int skb_len = skb->len;
2072 unsigned int snaplen, res;
2073 bool is_drop_n_account = false;
2074
2075 if (skb->pkt_type == PACKET_LOOPBACK)
2076 goto drop;
2077
2078 sk = pt->af_packet_priv;
2079 po = pkt_sk(sk);
2080
2081 if (!net_eq(dev_net(dev), sock_net(sk)))
2082 goto drop;
2083
2084 skb->dev = dev;
2085
2086 if (dev_has_header(dev)) {
2087
2088
2089
2090
2091
2092
2093
2094 if (sk->sk_type != SOCK_DGRAM)
2095 skb_push(skb, skb->data - skb_mac_header(skb));
2096 else if (skb->pkt_type == PACKET_OUTGOING) {
2097
2098 skb_pull(skb, skb_network_offset(skb));
2099 }
2100 }
2101
2102 snaplen = skb->len;
2103
2104 res = run_filter(skb, sk, snaplen);
2105 if (!res)
2106 goto drop_n_restore;
2107 if (snaplen > res)
2108 snaplen = res;
2109
2110 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2111 goto drop_n_acct;
2112
2113 if (skb_shared(skb)) {
2114 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2115 if (nskb == NULL)
2116 goto drop_n_acct;
2117
2118 if (skb_head != skb->data) {
2119 skb->data = skb_head;
2120 skb->len = skb_len;
2121 }
2122 consume_skb(skb);
2123 skb = nskb;
2124 }
2125
2126 sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2127
2128 sll = &PACKET_SKB_CB(skb)->sa.ll;
2129 sll->sll_hatype = dev->type;
2130 sll->sll_pkttype = skb->pkt_type;
2131 if (unlikely(po->origdev))
2132 sll->sll_ifindex = orig_dev->ifindex;
2133 else
2134 sll->sll_ifindex = dev->ifindex;
2135
2136 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2137
2138
2139
2140
2141 PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2142
2143 if (pskb_trim(skb, snaplen))
2144 goto drop_n_acct;
2145
2146 skb_set_owner_r(skb, sk);
2147 skb->dev = NULL;
2148 skb_dst_drop(skb);
2149
2150
2151 nf_reset_ct(skb);
2152
2153 spin_lock(&sk->sk_receive_queue.lock);
2154 po->stats.stats1.tp_packets++;
2155 sock_skb_set_dropcount(sk, skb);
2156 __skb_queue_tail(&sk->sk_receive_queue, skb);
2157 spin_unlock(&sk->sk_receive_queue.lock);
2158 sk->sk_data_ready(sk);
2159 return 0;
2160
2161drop_n_acct:
2162 is_drop_n_account = true;
2163 atomic_inc(&po->tp_drops);
2164 atomic_inc(&sk->sk_drops);
2165
2166drop_n_restore:
2167 if (skb_head != skb->data && skb_shared(skb)) {
2168 skb->data = skb_head;
2169 skb->len = skb_len;
2170 }
2171drop:
2172 if (!is_drop_n_account)
2173 consume_skb(skb);
2174 else
2175 kfree_skb(skb);
2176 return 0;
2177}
2178
2179static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2180 struct packet_type *pt, struct net_device *orig_dev)
2181{
2182 struct sock *sk;
2183 struct packet_sock *po;
2184 struct sockaddr_ll *sll;
2185 union tpacket_uhdr h;
2186 u8 *skb_head = skb->data;
2187 int skb_len = skb->len;
2188 unsigned int snaplen, res;
2189 unsigned long status = TP_STATUS_USER;
2190 unsigned short macoff, hdrlen;
2191 unsigned int netoff;
2192 struct sk_buff *copy_skb = NULL;
2193 struct timespec64 ts;
2194 __u32 ts_status;
2195 bool is_drop_n_account = false;
2196 unsigned int slot_id = 0;
2197 bool do_vnet = false;
2198
2199
2200
2201
2202
2203 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2204 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2205
2206 if (skb->pkt_type == PACKET_LOOPBACK)
2207 goto drop;
2208
2209 sk = pt->af_packet_priv;
2210 po = pkt_sk(sk);
2211
2212 if (!net_eq(dev_net(dev), sock_net(sk)))
2213 goto drop;
2214
2215 if (dev_has_header(dev)) {
2216 if (sk->sk_type != SOCK_DGRAM)
2217 skb_push(skb, skb->data - skb_mac_header(skb));
2218 else if (skb->pkt_type == PACKET_OUTGOING) {
2219
2220 skb_pull(skb, skb_network_offset(skb));
2221 }
2222 }
2223
2224 snaplen = skb->len;
2225
2226 res = run_filter(skb, sk, snaplen);
2227 if (!res)
2228 goto drop_n_restore;
2229
2230
2231 if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2232 atomic_inc(&po->tp_drops);
2233 goto drop_n_restore;
2234 }
2235
2236 if (skb->ip_summed == CHECKSUM_PARTIAL)
2237 status |= TP_STATUS_CSUMNOTREADY;
2238 else if (skb->pkt_type != PACKET_OUTGOING &&
2239 (skb->ip_summed == CHECKSUM_COMPLETE ||
2240 skb_csum_unnecessary(skb)))
2241 status |= TP_STATUS_CSUM_VALID;
2242
2243 if (snaplen > res)
2244 snaplen = res;
2245
2246 if (sk->sk_type == SOCK_DGRAM) {
2247 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2248 po->tp_reserve;
2249 } else {
2250 unsigned int maclen = skb_network_offset(skb);
2251 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2252 (maclen < 16 ? 16 : maclen)) +
2253 po->tp_reserve;
2254 if (po->has_vnet_hdr) {
2255 netoff += sizeof(struct virtio_net_hdr);
2256 do_vnet = true;
2257 }
2258 macoff = netoff - maclen;
2259 }
2260 if (netoff > USHRT_MAX) {
2261 atomic_inc(&po->tp_drops);
2262 goto drop_n_restore;
2263 }
2264 if (po->tp_version <= TPACKET_V2) {
2265 if (macoff + snaplen > po->rx_ring.frame_size) {
2266 if (po->copy_thresh &&
2267 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2268 if (skb_shared(skb)) {
2269 copy_skb = skb_clone(skb, GFP_ATOMIC);
2270 } else {
2271 copy_skb = skb_get(skb);
2272 skb_head = skb->data;
2273 }
2274 if (copy_skb)
2275 skb_set_owner_r(copy_skb, sk);
2276 }
2277 snaplen = po->rx_ring.frame_size - macoff;
2278 if ((int)snaplen < 0) {
2279 snaplen = 0;
2280 do_vnet = false;
2281 }
2282 }
2283 } else if (unlikely(macoff + snaplen >
2284 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2285 u32 nval;
2286
2287 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2288 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2289 snaplen, nval, macoff);
2290 snaplen = nval;
2291 if (unlikely((int)snaplen < 0)) {
2292 snaplen = 0;
2293 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2294 do_vnet = false;
2295 }
2296 }
2297 spin_lock(&sk->sk_receive_queue.lock);
2298 h.raw = packet_current_rx_frame(po, skb,
2299 TP_STATUS_KERNEL, (macoff+snaplen));
2300 if (!h.raw)
2301 goto drop_n_account;
2302
2303 if (po->tp_version <= TPACKET_V2) {
2304 slot_id = po->rx_ring.head;
2305 if (test_bit(slot_id, po->rx_ring.rx_owner_map))
2306 goto drop_n_account;
2307 __set_bit(slot_id, po->rx_ring.rx_owner_map);
2308 }
2309
2310 if (do_vnet &&
2311 virtio_net_hdr_from_skb(skb, h.raw + macoff -
2312 sizeof(struct virtio_net_hdr),
2313 vio_le(), true, 0)) {
2314 if (po->tp_version == TPACKET_V3)
2315 prb_clear_blk_fill_status(&po->rx_ring);
2316 goto drop_n_account;
2317 }
2318
2319 if (po->tp_version <= TPACKET_V2) {
2320 packet_increment_rx_head(po, &po->rx_ring);
2321
2322
2323
2324
2325
2326
2327 if (atomic_read(&po->tp_drops))
2328 status |= TP_STATUS_LOSING;
2329 }
2330
2331 po->stats.stats1.tp_packets++;
2332 if (copy_skb) {
2333 status |= TP_STATUS_COPY;
2334 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2335 }
2336 spin_unlock(&sk->sk_receive_queue.lock);
2337
2338 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2339
2340 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
2341 ktime_get_real_ts64(&ts);
2342
2343 status |= ts_status;
2344
2345 switch (po->tp_version) {
2346 case TPACKET_V1:
2347 h.h1->tp_len = skb->len;
2348 h.h1->tp_snaplen = snaplen;
2349 h.h1->tp_mac = macoff;
2350 h.h1->tp_net = netoff;
2351 h.h1->tp_sec = ts.tv_sec;
2352 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2353 hdrlen = sizeof(*h.h1);
2354 break;
2355 case TPACKET_V2:
2356 h.h2->tp_len = skb->len;
2357 h.h2->tp_snaplen = snaplen;
2358 h.h2->tp_mac = macoff;
2359 h.h2->tp_net = netoff;
2360 h.h2->tp_sec = ts.tv_sec;
2361 h.h2->tp_nsec = ts.tv_nsec;
2362 if (skb_vlan_tag_present(skb)) {
2363 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2364 h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2365 status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2366 } else {
2367 h.h2->tp_vlan_tci = 0;
2368 h.h2->tp_vlan_tpid = 0;
2369 }
2370 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2371 hdrlen = sizeof(*h.h2);
2372 break;
2373 case TPACKET_V3:
2374
2375
2376
2377 h.h3->tp_status |= status;
2378 h.h3->tp_len = skb->len;
2379 h.h3->tp_snaplen = snaplen;
2380 h.h3->tp_mac = macoff;
2381 h.h3->tp_net = netoff;
2382 h.h3->tp_sec = ts.tv_sec;
2383 h.h3->tp_nsec = ts.tv_nsec;
2384 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2385 hdrlen = sizeof(*h.h3);
2386 break;
2387 default:
2388 BUG();
2389 }
2390
2391 sll = h.raw + TPACKET_ALIGN(hdrlen);
2392 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2393 sll->sll_family = AF_PACKET;
2394 sll->sll_hatype = dev->type;
2395 sll->sll_protocol = skb->protocol;
2396 sll->sll_pkttype = skb->pkt_type;
2397 if (unlikely(po->origdev))
2398 sll->sll_ifindex = orig_dev->ifindex;
2399 else
2400 sll->sll_ifindex = dev->ifindex;
2401
2402 smp_mb();
2403
2404#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2405 if (po->tp_version <= TPACKET_V2) {
2406 u8 *start, *end;
2407
2408 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2409 macoff + snaplen);
2410
2411 for (start = h.raw; start < end; start += PAGE_SIZE)
2412 flush_dcache_page(pgv_to_page(start));
2413 }
2414 smp_wmb();
2415#endif
2416
2417 if (po->tp_version <= TPACKET_V2) {
2418 spin_lock(&sk->sk_receive_queue.lock);
2419 __packet_set_status(po, h.raw, status);
2420 __clear_bit(slot_id, po->rx_ring.rx_owner_map);
2421 spin_unlock(&sk->sk_receive_queue.lock);
2422 sk->sk_data_ready(sk);
2423 } else if (po->tp_version == TPACKET_V3) {
2424 prb_clear_blk_fill_status(&po->rx_ring);
2425 }
2426
2427drop_n_restore:
2428 if (skb_head != skb->data && skb_shared(skb)) {
2429 skb->data = skb_head;
2430 skb->len = skb_len;
2431 }
2432drop:
2433 if (!is_drop_n_account)
2434 consume_skb(skb);
2435 else
2436 kfree_skb(skb);
2437 return 0;
2438
2439drop_n_account:
2440 spin_unlock(&sk->sk_receive_queue.lock);
2441 atomic_inc(&po->tp_drops);
2442 is_drop_n_account = true;
2443
2444 sk->sk_data_ready(sk);
2445 kfree_skb(copy_skb);
2446 goto drop_n_restore;
2447}
2448
2449static void tpacket_destruct_skb(struct sk_buff *skb)
2450{
2451 struct packet_sock *po = pkt_sk(skb->sk);
2452
2453 if (likely(po->tx_ring.pg_vec)) {
2454 void *ph;
2455 __u32 ts;
2456
2457 ph = skb_zcopy_get_nouarg(skb);
2458 packet_dec_pending(&po->tx_ring);
2459
2460 ts = __packet_set_timestamp(po, ph, skb);
2461 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2462
2463 if (!packet_read_pending(&po->tx_ring))
2464 complete(&po->skb_completion);
2465 }
2466
2467 sock_wfree(skb);
2468}
2469
2470static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2471{
2472 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2473 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2474 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2475 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2476 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2477 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2478 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2479
2480 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2481 return -EINVAL;
2482
2483 return 0;
2484}
2485
2486static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2487 struct virtio_net_hdr *vnet_hdr)
2488{
2489 if (*len < sizeof(*vnet_hdr))
2490 return -EINVAL;
2491 *len -= sizeof(*vnet_hdr);
2492
2493 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2494 return -EFAULT;
2495
2496 return __packet_snd_vnet_parse(vnet_hdr, *len);
2497}
2498
2499static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2500 void *frame, struct net_device *dev, void *data, int tp_len,
2501 __be16 proto, unsigned char *addr, int hlen, int copylen,
2502 const struct sockcm_cookie *sockc)
2503{
2504 union tpacket_uhdr ph;
2505 int to_write, offset, len, nr_frags, len_max;
2506 struct socket *sock = po->sk.sk_socket;
2507 struct page *page;
2508 int err;
2509
2510 ph.raw = frame;
2511
2512 skb->protocol = proto;
2513 skb->dev = dev;
2514 skb->priority = po->sk.sk_priority;
2515 skb->mark = po->sk.sk_mark;
2516 skb->tstamp = sockc->transmit_time;
2517 skb_setup_tx_timestamp(skb, sockc->tsflags);
2518 skb_zcopy_set_nouarg(skb, ph.raw);
2519
2520 skb_reserve(skb, hlen);
2521 skb_reset_network_header(skb);
2522
2523 to_write = tp_len;
2524
2525 if (sock->type == SOCK_DGRAM) {
2526 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2527 NULL, tp_len);
2528 if (unlikely(err < 0))
2529 return -EINVAL;
2530 } else if (copylen) {
2531 int hdrlen = min_t(int, copylen, tp_len);
2532
2533 skb_push(skb, dev->hard_header_len);
2534 skb_put(skb, copylen - dev->hard_header_len);
2535 err = skb_store_bits(skb, 0, data, hdrlen);
2536 if (unlikely(err))
2537 return err;
2538 if (!dev_validate_header(dev, skb->data, hdrlen))
2539 return -EINVAL;
2540
2541 data += hdrlen;
2542 to_write -= hdrlen;
2543 }
2544
2545 offset = offset_in_page(data);
2546 len_max = PAGE_SIZE - offset;
2547 len = ((to_write > len_max) ? len_max : to_write);
2548
2549 skb->data_len = to_write;
2550 skb->len += to_write;
2551 skb->truesize += to_write;
2552 refcount_add(to_write, &po->sk.sk_wmem_alloc);
2553
2554 while (likely(to_write)) {
2555 nr_frags = skb_shinfo(skb)->nr_frags;
2556
2557 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2558 pr_err("Packet exceed the number of skb frags(%lu)\n",
2559 MAX_SKB_FRAGS);
2560 return -EFAULT;
2561 }
2562
2563 page = pgv_to_page(data);
2564 data += len;
2565 flush_dcache_page(page);
2566 get_page(page);
2567 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2568 to_write -= len;
2569 offset = 0;
2570 len_max = PAGE_SIZE;
2571 len = ((to_write > len_max) ? len_max : to_write);
2572 }
2573
2574 packet_parse_headers(skb, sock);
2575
2576 return tp_len;
2577}
2578
2579static int tpacket_parse_header(struct packet_sock *po, void *frame,
2580 int size_max, void **data)
2581{
2582 union tpacket_uhdr ph;
2583 int tp_len, off;
2584
2585 ph.raw = frame;
2586
2587 switch (po->tp_version) {
2588 case TPACKET_V3:
2589 if (ph.h3->tp_next_offset != 0) {
2590 pr_warn_once("variable sized slot not supported");
2591 return -EINVAL;
2592 }
2593 tp_len = ph.h3->tp_len;
2594 break;
2595 case TPACKET_V2:
2596 tp_len = ph.h2->tp_len;
2597 break;
2598 default:
2599 tp_len = ph.h1->tp_len;
2600 break;
2601 }
2602 if (unlikely(tp_len > size_max)) {
2603 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2604 return -EMSGSIZE;
2605 }
2606
2607 if (unlikely(po->tp_tx_has_off)) {
2608 int off_min, off_max;
2609
2610 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2611 off_max = po->tx_ring.frame_size - tp_len;
2612 if (po->sk.sk_type == SOCK_DGRAM) {
2613 switch (po->tp_version) {
2614 case TPACKET_V3:
2615 off = ph.h3->tp_net;
2616 break;
2617 case TPACKET_V2:
2618 off = ph.h2->tp_net;
2619 break;
2620 default:
2621 off = ph.h1->tp_net;
2622 break;
2623 }
2624 } else {
2625 switch (po->tp_version) {
2626 case TPACKET_V3:
2627 off = ph.h3->tp_mac;
2628 break;
2629 case TPACKET_V2:
2630 off = ph.h2->tp_mac;
2631 break;
2632 default:
2633 off = ph.h1->tp_mac;
2634 break;
2635 }
2636 }
2637 if (unlikely((off < off_min) || (off_max < off)))
2638 return -EINVAL;
2639 } else {
2640 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2641 }
2642
2643 *data = frame + off;
2644 return tp_len;
2645}
2646
2647static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2648{
2649 struct sk_buff *skb = NULL;
2650 struct net_device *dev;
2651 struct virtio_net_hdr *vnet_hdr = NULL;
2652 struct sockcm_cookie sockc;
2653 __be16 proto;
2654 int err, reserve = 0;
2655 void *ph;
2656 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2657 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2658 unsigned char *addr = NULL;
2659 int tp_len, size_max;
2660 void *data;
2661 int len_sum = 0;
2662 int status = TP_STATUS_AVAILABLE;
2663 int hlen, tlen, copylen = 0;
2664 long timeo = 0;
2665
2666 mutex_lock(&po->pg_vec_lock);
2667
2668
2669
2670
2671 if (unlikely(!po->tx_ring.pg_vec)) {
2672 err = -EBUSY;
2673 goto out;
2674 }
2675 if (likely(saddr == NULL)) {
2676 dev = packet_cached_dev_get(po);
2677 proto = po->num;
2678 } else {
2679 err = -EINVAL;
2680 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2681 goto out;
2682 if (msg->msg_namelen < (saddr->sll_halen
2683 + offsetof(struct sockaddr_ll,
2684 sll_addr)))
2685 goto out;
2686 proto = saddr->sll_protocol;
2687 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2688 if (po->sk.sk_socket->type == SOCK_DGRAM) {
2689 if (dev && msg->msg_namelen < dev->addr_len +
2690 offsetof(struct sockaddr_ll, sll_addr))
2691 goto out_put;
2692 addr = saddr->sll_addr;
2693 }
2694 }
2695
2696 err = -ENXIO;
2697 if (unlikely(dev == NULL))
2698 goto out;
2699 err = -ENETDOWN;
2700 if (unlikely(!(dev->flags & IFF_UP)))
2701 goto out_put;
2702
2703 sockcm_init(&sockc, &po->sk);
2704 if (msg->msg_controllen) {
2705 err = sock_cmsg_send(&po->sk, msg, &sockc);
2706 if (unlikely(err))
2707 goto out_put;
2708 }
2709
2710 if (po->sk.sk_socket->type == SOCK_RAW)
2711 reserve = dev->hard_header_len;
2712 size_max = po->tx_ring.frame_size
2713 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2714
2715 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2716 size_max = dev->mtu + reserve + VLAN_HLEN;
2717
2718 reinit_completion(&po->skb_completion);
2719
2720 do {
2721 ph = packet_current_frame(po, &po->tx_ring,
2722 TP_STATUS_SEND_REQUEST);
2723 if (unlikely(ph == NULL)) {
2724 if (need_wait && skb) {
2725 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2726 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2727 if (timeo <= 0) {
2728 err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2729 goto out_put;
2730 }
2731 }
2732
2733 continue;
2734 }
2735
2736 skb = NULL;
2737 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2738 if (tp_len < 0)
2739 goto tpacket_error;
2740
2741 status = TP_STATUS_SEND_REQUEST;
2742 hlen = LL_RESERVED_SPACE(dev);
2743 tlen = dev->needed_tailroom;
2744 if (po->has_vnet_hdr) {
2745 vnet_hdr = data;
2746 data += sizeof(*vnet_hdr);
2747 tp_len -= sizeof(*vnet_hdr);
2748 if (tp_len < 0 ||
2749 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2750 tp_len = -EINVAL;
2751 goto tpacket_error;
2752 }
2753 copylen = __virtio16_to_cpu(vio_le(),
2754 vnet_hdr->hdr_len);
2755 }
2756 copylen = max_t(int, copylen, dev->hard_header_len);
2757 skb = sock_alloc_send_skb(&po->sk,
2758 hlen + tlen + sizeof(struct sockaddr_ll) +
2759 (copylen - dev->hard_header_len),
2760 !need_wait, &err);
2761
2762 if (unlikely(skb == NULL)) {
2763
2764 if (likely(len_sum > 0))
2765 err = len_sum;
2766 goto out_status;
2767 }
2768 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2769 addr, hlen, copylen, &sockc);
2770 if (likely(tp_len >= 0) &&
2771 tp_len > dev->mtu + reserve &&
2772 !po->has_vnet_hdr &&
2773 !packet_extra_vlan_len_allowed(dev, skb))
2774 tp_len = -EMSGSIZE;
2775
2776 if (unlikely(tp_len < 0)) {
2777tpacket_error:
2778 if (po->tp_loss) {
2779 __packet_set_status(po, ph,
2780 TP_STATUS_AVAILABLE);
2781 packet_increment_head(&po->tx_ring);
2782 kfree_skb(skb);
2783 continue;
2784 } else {
2785 status = TP_STATUS_WRONG_FORMAT;
2786 err = tp_len;
2787 goto out_status;
2788 }
2789 }
2790
2791 if (po->has_vnet_hdr) {
2792 if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
2793 tp_len = -EINVAL;
2794 goto tpacket_error;
2795 }
2796 virtio_net_hdr_set_proto(skb, vnet_hdr);
2797 }
2798
2799 skb->destructor = tpacket_destruct_skb;
2800 __packet_set_status(po, ph, TP_STATUS_SENDING);
2801 packet_inc_pending(&po->tx_ring);
2802
2803 status = TP_STATUS_SEND_REQUEST;
2804 err = po->xmit(skb);
2805 if (unlikely(err > 0)) {
2806 err = net_xmit_errno(err);
2807 if (err && __packet_get_status(po, ph) ==
2808 TP_STATUS_AVAILABLE) {
2809
2810 skb = NULL;
2811 goto out_status;
2812 }
2813
2814
2815
2816
2817 err = 0;
2818 }
2819 packet_increment_head(&po->tx_ring);
2820 len_sum += tp_len;
2821 } while (likely((ph != NULL) ||
2822
2823
2824
2825
2826
2827
2828 (need_wait && packet_read_pending(&po->tx_ring))));
2829
2830 err = len_sum;
2831 goto out_put;
2832
2833out_status:
2834 __packet_set_status(po, ph, status);
2835 kfree_skb(skb);
2836out_put:
2837 dev_put(dev);
2838out:
2839 mutex_unlock(&po->pg_vec_lock);
2840 return err;
2841}
2842
2843static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2844 size_t reserve, size_t len,
2845 size_t linear, int noblock,
2846 int *err)
2847{
2848 struct sk_buff *skb;
2849
2850
2851 if (prepad + len < PAGE_SIZE || !linear)
2852 linear = len;
2853
2854 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2855 err, 0);
2856 if (!skb)
2857 return NULL;
2858
2859 skb_reserve(skb, reserve);
2860 skb_put(skb, linear);
2861 skb->data_len = len - linear;
2862 skb->len += len - linear;
2863
2864 return skb;
2865}
2866
2867static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2868{
2869 struct sock *sk = sock->sk;
2870 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2871 struct sk_buff *skb;
2872 struct net_device *dev;
2873 __be16 proto;
2874 unsigned char *addr = NULL;
2875 int err, reserve = 0;
2876 struct sockcm_cookie sockc;
2877 struct virtio_net_hdr vnet_hdr = { 0 };
2878 int offset = 0;
2879 struct packet_sock *po = pkt_sk(sk);
2880 bool has_vnet_hdr = false;
2881 int hlen, tlen, linear;
2882 int extra_len = 0;
2883
2884
2885
2886
2887
2888 if (likely(saddr == NULL)) {
2889 dev = packet_cached_dev_get(po);
2890 proto = po->num;
2891 } else {
2892 err = -EINVAL;
2893 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2894 goto out;
2895 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2896 goto out;
2897 proto = saddr->sll_protocol;
2898 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2899 if (sock->type == SOCK_DGRAM) {
2900 if (dev && msg->msg_namelen < dev->addr_len +
2901 offsetof(struct sockaddr_ll, sll_addr))
2902 goto out_unlock;
2903 addr = saddr->sll_addr;
2904 }
2905 }
2906
2907 err = -ENXIO;
2908 if (unlikely(dev == NULL))
2909 goto out_unlock;
2910 err = -ENETDOWN;
2911 if (unlikely(!(dev->flags & IFF_UP)))
2912 goto out_unlock;
2913
2914 sockcm_init(&sockc, sk);
2915 sockc.mark = sk->sk_mark;
2916 if (msg->msg_controllen) {
2917 err = sock_cmsg_send(sk, msg, &sockc);
2918 if (unlikely(err))
2919 goto out_unlock;
2920 }
2921
2922 if (sock->type == SOCK_RAW)
2923 reserve = dev->hard_header_len;
2924 if (po->has_vnet_hdr) {
2925 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2926 if (err)
2927 goto out_unlock;
2928 has_vnet_hdr = true;
2929 }
2930
2931 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2932 if (!netif_supports_nofcs(dev)) {
2933 err = -EPROTONOSUPPORT;
2934 goto out_unlock;
2935 }
2936 extra_len = 4;
2937 }
2938
2939 err = -EMSGSIZE;
2940 if (!vnet_hdr.gso_type &&
2941 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2942 goto out_unlock;
2943
2944 err = -ENOBUFS;
2945 hlen = LL_RESERVED_SPACE(dev);
2946 tlen = dev->needed_tailroom;
2947 linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
2948 linear = max(linear, min_t(int, len, dev->hard_header_len));
2949 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
2950 msg->msg_flags & MSG_DONTWAIT, &err);
2951 if (skb == NULL)
2952 goto out_unlock;
2953
2954 skb_reset_network_header(skb);
2955
2956 err = -EINVAL;
2957 if (sock->type == SOCK_DGRAM) {
2958 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2959 if (unlikely(offset < 0))
2960 goto out_free;
2961 } else if (reserve) {
2962 skb_reserve(skb, -reserve);
2963 if (len < reserve + sizeof(struct ipv6hdr) &&
2964 dev->min_header_len != dev->hard_header_len)
2965 skb_reset_network_header(skb);
2966 }
2967
2968
2969 err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
2970 if (err)
2971 goto out_free;
2972
2973 if (sock->type == SOCK_RAW &&
2974 !dev_validate_header(dev, skb->data, len)) {
2975 err = -EINVAL;
2976 goto out_free;
2977 }
2978
2979 skb_setup_tx_timestamp(skb, sockc.tsflags);
2980
2981 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2982 !packet_extra_vlan_len_allowed(dev, skb)) {
2983 err = -EMSGSIZE;
2984 goto out_free;
2985 }
2986
2987 skb->protocol = proto;
2988 skb->dev = dev;
2989 skb->priority = sk->sk_priority;
2990 skb->mark = sockc.mark;
2991 skb->tstamp = sockc.transmit_time;
2992
2993 if (has_vnet_hdr) {
2994 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
2995 if (err)
2996 goto out_free;
2997 len += sizeof(vnet_hdr);
2998 virtio_net_hdr_set_proto(skb, &vnet_hdr);
2999 }
3000
3001 packet_parse_headers(skb, sock);
3002
3003 if (unlikely(extra_len == 4))
3004 skb->no_fcs = 1;
3005
3006 err = po->xmit(skb);
3007 if (err > 0 && (err = net_xmit_errno(err)) != 0)
3008 goto out_unlock;
3009
3010 dev_put(dev);
3011
3012 return len;
3013
3014out_free:
3015 kfree_skb(skb);
3016out_unlock:
3017 if (dev)
3018 dev_put(dev);
3019out:
3020 return err;
3021}
3022
3023static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
3024{
3025 struct sock *sk = sock->sk;
3026 struct packet_sock *po = pkt_sk(sk);
3027
3028 if (po->tx_ring.pg_vec)
3029 return tpacket_snd(po, msg);
3030 else
3031 return packet_snd(sock, msg, len);
3032}
3033
3034
3035
3036
3037
3038
3039static int packet_release(struct socket *sock)
3040{
3041 struct sock *sk = sock->sk;
3042 struct packet_sock *po;
3043 struct packet_fanout *f;
3044 struct net *net;
3045 union tpacket_req_u req_u;
3046
3047 if (!sk)
3048 return 0;
3049
3050 net = sock_net(sk);
3051 po = pkt_sk(sk);
3052
3053 mutex_lock(&net->packet.sklist_lock);
3054 sk_del_node_init_rcu(sk);
3055 mutex_unlock(&net->packet.sklist_lock);
3056
3057 preempt_disable();
3058 sock_prot_inuse_add(net, sk->sk_prot, -1);
3059 preempt_enable();
3060
3061 spin_lock(&po->bind_lock);
3062 unregister_prot_hook(sk, false);
3063 packet_cached_dev_reset(po);
3064
3065 if (po->prot_hook.dev) {
3066 dev_put(po->prot_hook.dev);
3067 po->prot_hook.dev = NULL;
3068 }
3069 spin_unlock(&po->bind_lock);
3070
3071 packet_flush_mclist(sk);
3072
3073 lock_sock(sk);
3074 if (po->rx_ring.pg_vec) {
3075 memset(&req_u, 0, sizeof(req_u));
3076 packet_set_ring(sk, &req_u, 1, 0);
3077 }
3078
3079 if (po->tx_ring.pg_vec) {
3080 memset(&req_u, 0, sizeof(req_u));
3081 packet_set_ring(sk, &req_u, 1, 1);
3082 }
3083 release_sock(sk);
3084
3085 f = fanout_release(sk);
3086
3087 synchronize_net();
3088
3089 kfree(po->rollover);
3090 if (f) {
3091 fanout_release_data(f);
3092 kvfree(f);
3093 }
3094
3095
3096
3097 sock_orphan(sk);
3098 sock->sk = NULL;
3099
3100
3101
3102 skb_queue_purge(&sk->sk_receive_queue);
3103 packet_free_pending(po);
3104 sk_refcnt_debug_release(sk);
3105
3106 sock_put(sk);
3107 return 0;
3108}
3109
3110
3111
3112
3113
3114static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3115 __be16 proto)
3116{
3117 struct packet_sock *po = pkt_sk(sk);
3118 struct net_device *dev_curr;
3119 __be16 proto_curr;
3120 bool need_rehook;
3121 struct net_device *dev = NULL;
3122 int ret = 0;
3123 bool unlisted = false;
3124
3125 lock_sock(sk);
3126 spin_lock(&po->bind_lock);
3127 rcu_read_lock();
3128
3129 if (po->fanout) {
3130 ret = -EINVAL;
3131 goto out_unlock;
3132 }
3133
3134 if (name) {
3135 dev = dev_get_by_name_rcu(sock_net(sk), name);
3136 if (!dev) {
3137 ret = -ENODEV;
3138 goto out_unlock;
3139 }
3140 } else if (ifindex) {
3141 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3142 if (!dev) {
3143 ret = -ENODEV;
3144 goto out_unlock;
3145 }
3146 }
3147
3148 if (dev)
3149 dev_hold(dev);
3150
3151 proto_curr = po->prot_hook.type;
3152 dev_curr = po->prot_hook.dev;
3153
3154 need_rehook = proto_curr != proto || dev_curr != dev;
3155
3156 if (need_rehook) {
3157 if (po->running) {
3158 rcu_read_unlock();
3159
3160
3161
3162 po->num = 0;
3163 __unregister_prot_hook(sk, true);
3164 rcu_read_lock();
3165 dev_curr = po->prot_hook.dev;
3166 if (dev)
3167 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3168 dev->ifindex);
3169 }
3170
3171 BUG_ON(po->running);
3172 po->num = proto;
3173 po->prot_hook.type = proto;
3174
3175 if (unlikely(unlisted)) {
3176 dev_put(dev);
3177 po->prot_hook.dev = NULL;
3178 po->ifindex = -1;
3179 packet_cached_dev_reset(po);
3180 } else {
3181 po->prot_hook.dev = dev;
3182 po->ifindex = dev ? dev->ifindex : 0;
3183 packet_cached_dev_assign(po, dev);
3184 }
3185 }
3186 if (dev_curr)
3187 dev_put(dev_curr);
3188
3189 if (proto == 0 || !need_rehook)
3190 goto out_unlock;
3191
3192 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3193 register_prot_hook(sk);
3194 } else {
3195 sk->sk_err = ENETDOWN;
3196 if (!sock_flag(sk, SOCK_DEAD))
3197 sk->sk_error_report(sk);
3198 }
3199
3200out_unlock:
3201 rcu_read_unlock();
3202 spin_unlock(&po->bind_lock);
3203 release_sock(sk);
3204 return ret;
3205}
3206
3207
3208
3209
3210
3211static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3212 int addr_len)
3213{
3214 struct sock *sk = sock->sk;
3215 char name[sizeof(uaddr->sa_data) + 1];
3216
3217
3218
3219
3220
3221 if (addr_len != sizeof(struct sockaddr))
3222 return -EINVAL;
3223
3224
3225
3226 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3227 name[sizeof(uaddr->sa_data)] = 0;
3228
3229 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3230}
3231
3232static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3233{
3234 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3235 struct sock *sk = sock->sk;
3236
3237
3238
3239
3240
3241 if (addr_len < sizeof(struct sockaddr_ll))
3242 return -EINVAL;
3243 if (sll->sll_family != AF_PACKET)
3244 return -EINVAL;
3245
3246 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3247 sll->sll_protocol ? : pkt_sk(sk)->num);
3248}
3249
3250static struct proto packet_proto = {
3251 .name = "PACKET",
3252 .owner = THIS_MODULE,
3253 .obj_size = sizeof(struct packet_sock),
3254};
3255
3256
3257
3258
3259
3260static int packet_create(struct net *net, struct socket *sock, int protocol,
3261 int kern)
3262{
3263 struct sock *sk;
3264 struct packet_sock *po;
3265 __be16 proto = (__force __be16)protocol;
3266 int err;
3267
3268 if (!ns_capable(net->user_ns, CAP_NET_RAW))
3269 return -EPERM;
3270 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3271 sock->type != SOCK_PACKET)
3272 return -ESOCKTNOSUPPORT;
3273
3274 sock->state = SS_UNCONNECTED;
3275
3276 err = -ENOBUFS;
3277 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3278 if (sk == NULL)
3279 goto out;
3280
3281 sock->ops = &packet_ops;
3282 if (sock->type == SOCK_PACKET)
3283 sock->ops = &packet_ops_spkt;
3284
3285 sock_init_data(sock, sk);
3286
3287 po = pkt_sk(sk);
3288 init_completion(&po->skb_completion);
3289 sk->sk_family = PF_PACKET;
3290 po->num = proto;
3291 po->xmit = dev_queue_xmit;
3292
3293 err = packet_alloc_pending(po);
3294 if (err)
3295 goto out2;
3296
3297 packet_cached_dev_reset(po);
3298
3299 sk->sk_destruct = packet_sock_destruct;
3300 sk_refcnt_debug_inc(sk);
3301
3302
3303
3304
3305
3306 spin_lock_init(&po->bind_lock);
3307 mutex_init(&po->pg_vec_lock);
3308 po->rollover = NULL;
3309 po->prot_hook.func = packet_rcv;
3310
3311 if (sock->type == SOCK_PACKET)
3312 po->prot_hook.func = packet_rcv_spkt;
3313
3314 po->prot_hook.af_packet_priv = sk;
3315
3316 if (proto) {
3317 po->prot_hook.type = proto;
3318 __register_prot_hook(sk);
3319 }
3320
3321 mutex_lock(&net->packet.sklist_lock);
3322 sk_add_node_tail_rcu(sk, &net->packet.sklist);
3323 mutex_unlock(&net->packet.sklist_lock);
3324
3325 preempt_disable();
3326 sock_prot_inuse_add(net, &packet_proto, 1);
3327 preempt_enable();
3328
3329 return 0;
3330out2:
3331 sk_free(sk);
3332out:
3333 return err;
3334}
3335
3336
3337
3338
3339
3340
3341static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3342 int flags)
3343{
3344 struct sock *sk = sock->sk;
3345 struct sk_buff *skb;
3346 int copied, err;
3347 int vnet_hdr_len = 0;
3348 unsigned int origlen = 0;
3349
3350 err = -EINVAL;
3351 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3352 goto out;
3353
3354#if 0
3355
3356 if (pkt_sk(sk)->ifindex < 0)
3357 return -ENODEV;
3358#endif
3359
3360 if (flags & MSG_ERRQUEUE) {
3361 err = sock_recv_errqueue(sk, msg, len,
3362 SOL_PACKET, PACKET_TX_TIMESTAMP);
3363 goto out;
3364 }
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
3376
3377
3378
3379
3380
3381
3382
3383 if (skb == NULL)
3384 goto out;
3385
3386 packet_rcv_try_clear_pressure(pkt_sk(sk));
3387
3388 if (pkt_sk(sk)->has_vnet_hdr) {
3389 err = packet_rcv_vnet(msg, skb, &len);
3390 if (err)
3391 goto out_free;
3392 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3393 }
3394
3395
3396
3397
3398
3399 copied = skb->len;
3400 if (copied > len) {
3401 copied = len;
3402 msg->msg_flags |= MSG_TRUNC;
3403 }
3404
3405 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3406 if (err)
3407 goto out_free;
3408
3409 if (sock->type != SOCK_PACKET) {
3410 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3411
3412
3413 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3414 sll->sll_family = AF_PACKET;
3415 sll->sll_protocol = skb->protocol;
3416 }
3417
3418 sock_recv_ts_and_drops(msg, sk, skb);
3419
3420 if (msg->msg_name) {
3421 int copy_len;
3422
3423
3424
3425
3426 if (sock->type == SOCK_PACKET) {
3427 __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3428 msg->msg_namelen = sizeof(struct sockaddr_pkt);
3429 copy_len = msg->msg_namelen;
3430 } else {
3431 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3432
3433 msg->msg_namelen = sll->sll_halen +
3434 offsetof(struct sockaddr_ll, sll_addr);
3435 copy_len = msg->msg_namelen;
3436 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
3437 memset(msg->msg_name +
3438 offsetof(struct sockaddr_ll, sll_addr),
3439 0, sizeof(sll->sll_addr));
3440 msg->msg_namelen = sizeof(struct sockaddr_ll);
3441 }
3442 }
3443 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
3444 }
3445
3446 if (pkt_sk(sk)->auxdata) {
3447 struct tpacket_auxdata aux;
3448
3449 aux.tp_status = TP_STATUS_USER;
3450 if (skb->ip_summed == CHECKSUM_PARTIAL)
3451 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3452 else if (skb->pkt_type != PACKET_OUTGOING &&
3453 (skb->ip_summed == CHECKSUM_COMPLETE ||
3454 skb_csum_unnecessary(skb)))
3455 aux.tp_status |= TP_STATUS_CSUM_VALID;
3456
3457 aux.tp_len = origlen;
3458 aux.tp_snaplen = skb->len;
3459 aux.tp_mac = 0;
3460 aux.tp_net = skb_network_offset(skb);
3461 if (skb_vlan_tag_present(skb)) {
3462 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3463 aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3464 aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3465 } else {
3466 aux.tp_vlan_tci = 0;
3467 aux.tp_vlan_tpid = 0;
3468 }
3469 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3470 }
3471
3472
3473
3474
3475
3476 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3477
3478out_free:
3479 skb_free_datagram(sk, skb);
3480out:
3481 return err;
3482}
3483
3484static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3485 int peer)
3486{
3487 struct net_device *dev;
3488 struct sock *sk = sock->sk;
3489
3490 if (peer)
3491 return -EOPNOTSUPP;
3492
3493 uaddr->sa_family = AF_PACKET;
3494 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3495 rcu_read_lock();
3496 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
3497 if (dev)
3498 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3499 rcu_read_unlock();
3500
3501 return sizeof(*uaddr);
3502}
3503
3504static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3505 int peer)
3506{
3507 struct net_device *dev;
3508 struct sock *sk = sock->sk;
3509 struct packet_sock *po = pkt_sk(sk);
3510 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3511
3512 if (peer)
3513 return -EOPNOTSUPP;
3514
3515 sll->sll_family = AF_PACKET;
3516 sll->sll_ifindex = po->ifindex;
3517 sll->sll_protocol = po->num;
3518 sll->sll_pkttype = 0;
3519 rcu_read_lock();
3520 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
3521 if (dev) {
3522 sll->sll_hatype = dev->type;
3523 sll->sll_halen = dev->addr_len;
3524 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3525 } else {
3526 sll->sll_hatype = 0;
3527 sll->sll_halen = 0;
3528 }
3529 rcu_read_unlock();
3530
3531 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3532}
3533
3534static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3535 int what)
3536{
3537 switch (i->type) {
3538 case PACKET_MR_MULTICAST:
3539 if (i->alen != dev->addr_len)
3540 return -EINVAL;
3541 if (what > 0)
3542 return dev_mc_add(dev, i->addr);
3543 else
3544 return dev_mc_del(dev, i->addr);
3545 break;
3546 case PACKET_MR_PROMISC:
3547 return dev_set_promiscuity(dev, what);
3548 case PACKET_MR_ALLMULTI:
3549 return dev_set_allmulti(dev, what);
3550 case PACKET_MR_UNICAST:
3551 if (i->alen != dev->addr_len)
3552 return -EINVAL;
3553 if (what > 0)
3554 return dev_uc_add(dev, i->addr);
3555 else
3556 return dev_uc_del(dev, i->addr);
3557 break;
3558 default:
3559 break;
3560 }
3561 return 0;
3562}
3563
3564static void packet_dev_mclist_delete(struct net_device *dev,
3565 struct packet_mclist **mlp)
3566{
3567 struct packet_mclist *ml;
3568
3569 while ((ml = *mlp) != NULL) {
3570 if (ml->ifindex == dev->ifindex) {
3571 packet_dev_mc(dev, ml, -1);
3572 *mlp = ml->next;
3573 kfree(ml);
3574 } else
3575 mlp = &ml->next;
3576 }
3577}
3578
3579static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3580{
3581 struct packet_sock *po = pkt_sk(sk);
3582 struct packet_mclist *ml, *i;
3583 struct net_device *dev;
3584 int err;
3585
3586 rtnl_lock();
3587
3588 err = -ENODEV;
3589 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3590 if (!dev)
3591 goto done;
3592
3593 err = -EINVAL;
3594 if (mreq->mr_alen > dev->addr_len)
3595 goto done;
3596
3597 err = -ENOBUFS;
3598 i = kmalloc(sizeof(*i), GFP_KERNEL);
3599 if (i == NULL)
3600 goto done;
3601
3602 err = 0;
3603 for (ml = po->mclist; ml; ml = ml->next) {
3604 if (ml->ifindex == mreq->mr_ifindex &&
3605 ml->type == mreq->mr_type &&
3606 ml->alen == mreq->mr_alen &&
3607 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3608 ml->count++;
3609
3610 kfree(i);
3611 goto done;
3612 }
3613 }
3614
3615 i->type = mreq->mr_type;
3616 i->ifindex = mreq->mr_ifindex;
3617 i->alen = mreq->mr_alen;
3618 memcpy(i->addr, mreq->mr_address, i->alen);
3619 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3620 i->count = 1;
3621 i->next = po->mclist;
3622 po->mclist = i;
3623 err = packet_dev_mc(dev, i, 1);
3624 if (err) {
3625 po->mclist = i->next;
3626 kfree(i);
3627 }
3628
3629done:
3630 rtnl_unlock();
3631 return err;
3632}
3633
3634static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3635{
3636 struct packet_mclist *ml, **mlp;
3637
3638 rtnl_lock();
3639
3640 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3641 if (ml->ifindex == mreq->mr_ifindex &&
3642 ml->type == mreq->mr_type &&
3643 ml->alen == mreq->mr_alen &&
3644 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3645 if (--ml->count == 0) {
3646 struct net_device *dev;
3647 *mlp = ml->next;
3648 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3649 if (dev)
3650 packet_dev_mc(dev, ml, -1);
3651 kfree(ml);
3652 }
3653 break;
3654 }
3655 }
3656 rtnl_unlock();
3657 return 0;
3658}
3659
3660static void packet_flush_mclist(struct sock *sk)
3661{
3662 struct packet_sock *po = pkt_sk(sk);
3663 struct packet_mclist *ml;
3664
3665 if (!po->mclist)
3666 return;
3667
3668 rtnl_lock();
3669 while ((ml = po->mclist) != NULL) {
3670 struct net_device *dev;
3671
3672 po->mclist = ml->next;
3673 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3674 if (dev != NULL)
3675 packet_dev_mc(dev, ml, -1);
3676 kfree(ml);
3677 }
3678 rtnl_unlock();
3679}
3680
3681static int
3682packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
3683 unsigned int optlen)
3684{
3685 struct sock *sk = sock->sk;
3686 struct packet_sock *po = pkt_sk(sk);
3687 int ret;
3688
3689 if (level != SOL_PACKET)
3690 return -ENOPROTOOPT;
3691
3692 switch (optname) {
3693 case PACKET_ADD_MEMBERSHIP:
3694 case PACKET_DROP_MEMBERSHIP:
3695 {
3696 struct packet_mreq_max mreq;
3697 int len = optlen;
3698 memset(&mreq, 0, sizeof(mreq));
3699 if (len < sizeof(struct packet_mreq))
3700 return -EINVAL;
3701 if (len > sizeof(mreq))
3702 len = sizeof(mreq);
3703 if (copy_from_sockptr(&mreq, optval, len))
3704 return -EFAULT;
3705 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3706 return -EINVAL;
3707 if (optname == PACKET_ADD_MEMBERSHIP)
3708 ret = packet_mc_add(sk, &mreq);
3709 else
3710 ret = packet_mc_drop(sk, &mreq);
3711 return ret;
3712 }
3713
3714 case PACKET_RX_RING:
3715 case PACKET_TX_RING:
3716 {
3717 union tpacket_req_u req_u;
3718 int len;
3719
3720 lock_sock(sk);
3721 switch (po->tp_version) {
3722 case TPACKET_V1:
3723 case TPACKET_V2:
3724 len = sizeof(req_u.req);
3725 break;
3726 case TPACKET_V3:
3727 default:
3728 len = sizeof(req_u.req3);
3729 break;
3730 }
3731 if (optlen < len) {
3732 ret = -EINVAL;
3733 } else {
3734 if (copy_from_sockptr(&req_u.req, optval, len))
3735 ret = -EFAULT;
3736 else
3737 ret = packet_set_ring(sk, &req_u, 0,
3738 optname == PACKET_TX_RING);
3739 }
3740 release_sock(sk);
3741 return ret;
3742 }
3743 case PACKET_COPY_THRESH:
3744 {
3745 int val;
3746
3747 if (optlen != sizeof(val))
3748 return -EINVAL;
3749 if (copy_from_sockptr(&val, optval, sizeof(val)))
3750 return -EFAULT;
3751
3752 pkt_sk(sk)->copy_thresh = val;
3753 return 0;
3754 }
3755 case PACKET_VERSION:
3756 {
3757 int val;
3758
3759 if (optlen != sizeof(val))
3760 return -EINVAL;
3761 if (copy_from_sockptr(&val, optval, sizeof(val)))
3762 return -EFAULT;
3763 switch (val) {
3764 case TPACKET_V1:
3765 case TPACKET_V2:
3766 case TPACKET_V3:
3767 break;
3768 default:
3769 return -EINVAL;
3770 }
3771 lock_sock(sk);
3772 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3773 ret = -EBUSY;
3774 } else {
3775 po->tp_version = val;
3776 ret = 0;
3777 }
3778 release_sock(sk);
3779 return ret;
3780 }
3781 case PACKET_RESERVE:
3782 {
3783 unsigned int val;
3784
3785 if (optlen != sizeof(val))
3786 return -EINVAL;
3787 if (copy_from_sockptr(&val, optval, sizeof(val)))
3788 return -EFAULT;
3789 if (val > INT_MAX)
3790 return -EINVAL;
3791 lock_sock(sk);
3792 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3793 ret = -EBUSY;
3794 } else {
3795 po->tp_reserve = val;
3796 ret = 0;
3797 }
3798 release_sock(sk);
3799 return ret;
3800 }
3801 case PACKET_LOSS:
3802 {
3803 unsigned int val;
3804
3805 if (optlen != sizeof(val))
3806 return -EINVAL;
3807 if (copy_from_sockptr(&val, optval, sizeof(val)))
3808 return -EFAULT;
3809
3810 lock_sock(sk);
3811 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3812 ret = -EBUSY;
3813 } else {
3814 po->tp_loss = !!val;
3815 ret = 0;
3816 }
3817 release_sock(sk);
3818 return ret;
3819 }
3820 case PACKET_AUXDATA:
3821 {
3822 int val;
3823
3824 if (optlen < sizeof(val))
3825 return -EINVAL;
3826 if (copy_from_sockptr(&val, optval, sizeof(val)))
3827 return -EFAULT;
3828
3829 lock_sock(sk);
3830 po->auxdata = !!val;
3831 release_sock(sk);
3832 return 0;
3833 }
3834 case PACKET_ORIGDEV:
3835 {
3836 int val;
3837
3838 if (optlen < sizeof(val))
3839 return -EINVAL;
3840 if (copy_from_sockptr(&val, optval, sizeof(val)))
3841 return -EFAULT;
3842
3843 lock_sock(sk);
3844 po->origdev = !!val;
3845 release_sock(sk);
3846 return 0;
3847 }
3848 case PACKET_VNET_HDR:
3849 {
3850 int val;
3851
3852 if (sock->type != SOCK_RAW)
3853 return -EINVAL;
3854 if (optlen < sizeof(val))
3855 return -EINVAL;
3856 if (copy_from_sockptr(&val, optval, sizeof(val)))
3857 return -EFAULT;
3858
3859 lock_sock(sk);
3860 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3861 ret = -EBUSY;
3862 } else {
3863 po->has_vnet_hdr = !!val;
3864 ret = 0;
3865 }
3866 release_sock(sk);
3867 return ret;
3868 }
3869 case PACKET_TIMESTAMP:
3870 {
3871 int val;
3872
3873 if (optlen != sizeof(val))
3874 return -EINVAL;
3875 if (copy_from_sockptr(&val, optval, sizeof(val)))
3876 return -EFAULT;
3877
3878 po->tp_tstamp = val;
3879 return 0;
3880 }
3881 case PACKET_FANOUT:
3882 {
3883 struct fanout_args args = { 0 };
3884
3885 if (optlen != sizeof(int) && optlen != sizeof(args))
3886 return -EINVAL;
3887 if (copy_from_sockptr(&args, optval, optlen))
3888 return -EFAULT;
3889
3890 return fanout_add(sk, &args);
3891 }
3892 case PACKET_FANOUT_DATA:
3893 {
3894 if (!po->fanout)
3895 return -EINVAL;
3896
3897 return fanout_set_data(po, optval, optlen);
3898 }
3899 case PACKET_IGNORE_OUTGOING:
3900 {
3901 int val;
3902
3903 if (optlen != sizeof(val))
3904 return -EINVAL;
3905 if (copy_from_sockptr(&val, optval, sizeof(val)))
3906 return -EFAULT;
3907 if (val < 0 || val > 1)
3908 return -EINVAL;
3909
3910 po->prot_hook.ignore_outgoing = !!val;
3911 return 0;
3912 }
3913 case PACKET_TX_HAS_OFF:
3914 {
3915 unsigned int val;
3916
3917 if (optlen != sizeof(val))
3918 return -EINVAL;
3919 if (copy_from_sockptr(&val, optval, sizeof(val)))
3920 return -EFAULT;
3921
3922 lock_sock(sk);
3923 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3924 ret = -EBUSY;
3925 } else {
3926 po->tp_tx_has_off = !!val;
3927 ret = 0;
3928 }
3929 release_sock(sk);
3930 return 0;
3931 }
3932 case PACKET_QDISC_BYPASS:
3933 {
3934 int val;
3935
3936 if (optlen != sizeof(val))
3937 return -EINVAL;
3938 if (copy_from_sockptr(&val, optval, sizeof(val)))
3939 return -EFAULT;
3940
3941 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
3942 return 0;
3943 }
3944 default:
3945 return -ENOPROTOOPT;
3946 }
3947}
3948
3949static int packet_getsockopt(struct socket *sock, int level, int optname,
3950 char __user *optval, int __user *optlen)
3951{
3952 int len;
3953 int val, lv = sizeof(val);
3954 struct sock *sk = sock->sk;
3955 struct packet_sock *po = pkt_sk(sk);
3956 void *data = &val;
3957 union tpacket_stats_u st;
3958 struct tpacket_rollover_stats rstats;
3959 int drops;
3960
3961 if (level != SOL_PACKET)
3962 return -ENOPROTOOPT;
3963
3964 if (get_user(len, optlen))
3965 return -EFAULT;
3966
3967 if (len < 0)
3968 return -EINVAL;
3969
3970 switch (optname) {
3971 case PACKET_STATISTICS:
3972 spin_lock_bh(&sk->sk_receive_queue.lock);
3973 memcpy(&st, &po->stats, sizeof(st));
3974 memset(&po->stats, 0, sizeof(po->stats));
3975 spin_unlock_bh(&sk->sk_receive_queue.lock);
3976 drops = atomic_xchg(&po->tp_drops, 0);
3977
3978 if (po->tp_version == TPACKET_V3) {
3979 lv = sizeof(struct tpacket_stats_v3);
3980 st.stats3.tp_drops = drops;
3981 st.stats3.tp_packets += drops;
3982 data = &st.stats3;
3983 } else {
3984 lv = sizeof(struct tpacket_stats);
3985 st.stats1.tp_drops = drops;
3986 st.stats1.tp_packets += drops;
3987 data = &st.stats1;
3988 }
3989
3990 break;
3991 case PACKET_AUXDATA:
3992 val = po->auxdata;
3993 break;
3994 case PACKET_ORIGDEV:
3995 val = po->origdev;
3996 break;
3997 case PACKET_VNET_HDR:
3998 val = po->has_vnet_hdr;
3999 break;
4000 case PACKET_VERSION:
4001 val = po->tp_version;
4002 break;
4003 case PACKET_HDRLEN:
4004 if (len > sizeof(int))
4005 len = sizeof(int);
4006 if (len < sizeof(int))
4007 return -EINVAL;
4008 if (copy_from_user(&val, optval, len))
4009 return -EFAULT;
4010 switch (val) {
4011 case TPACKET_V1:
4012 val = sizeof(struct tpacket_hdr);
4013 break;
4014 case TPACKET_V2:
4015 val = sizeof(struct tpacket2_hdr);
4016 break;
4017 case TPACKET_V3:
4018 val = sizeof(struct tpacket3_hdr);
4019 break;
4020 default:
4021 return -EINVAL;
4022 }
4023 break;
4024 case PACKET_RESERVE:
4025 val = po->tp_reserve;
4026 break;
4027 case PACKET_LOSS:
4028 val = po->tp_loss;
4029 break;
4030 case PACKET_TIMESTAMP:
4031 val = po->tp_tstamp;
4032 break;
4033 case PACKET_FANOUT:
4034 val = (po->fanout ?
4035 ((u32)po->fanout->id |
4036 ((u32)po->fanout->type << 16) |
4037 ((u32)po->fanout->flags << 24)) :
4038 0);
4039 break;
4040 case PACKET_IGNORE_OUTGOING:
4041 val = po->prot_hook.ignore_outgoing;
4042 break;
4043 case PACKET_ROLLOVER_STATS:
4044 if (!po->rollover)
4045 return -EINVAL;
4046 rstats.tp_all = atomic_long_read(&po->rollover->num);
4047 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
4048 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
4049 data = &rstats;
4050 lv = sizeof(rstats);
4051 break;
4052 case PACKET_TX_HAS_OFF:
4053 val = po->tp_tx_has_off;
4054 break;
4055 case PACKET_QDISC_BYPASS:
4056 val = packet_use_direct_xmit(po);
4057 break;
4058 default:
4059 return -ENOPROTOOPT;
4060 }
4061
4062 if (len > lv)
4063 len = lv;
4064 if (put_user(len, optlen))
4065 return -EFAULT;
4066 if (copy_to_user(optval, data, len))
4067 return -EFAULT;
4068 return 0;
4069}
4070
4071static int packet_notifier(struct notifier_block *this,
4072 unsigned long msg, void *ptr)
4073{
4074 struct sock *sk;
4075 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4076 struct net *net = dev_net(dev);
4077
4078 rcu_read_lock();
4079 sk_for_each_rcu(sk, &net->packet.sklist) {
4080 struct packet_sock *po = pkt_sk(sk);
4081
4082 switch (msg) {
4083 case NETDEV_UNREGISTER:
4084 if (po->mclist)
4085 packet_dev_mclist_delete(dev, &po->mclist);
4086 fallthrough;
4087
4088 case NETDEV_DOWN:
4089 if (dev->ifindex == po->ifindex) {
4090 spin_lock(&po->bind_lock);
4091 if (po->running) {
4092 __unregister_prot_hook(sk, false);
4093 sk->sk_err = ENETDOWN;
4094 if (!sock_flag(sk, SOCK_DEAD))
4095 sk->sk_error_report(sk);
4096 }
4097 if (msg == NETDEV_UNREGISTER) {
4098 packet_cached_dev_reset(po);
4099 po->ifindex = -1;
4100 if (po->prot_hook.dev)
4101 dev_put(po->prot_hook.dev);
4102 po->prot_hook.dev = NULL;
4103 }
4104 spin_unlock(&po->bind_lock);
4105 }
4106 break;
4107 case NETDEV_UP:
4108 if (dev->ifindex == po->ifindex) {
4109 spin_lock(&po->bind_lock);
4110 if (po->num)
4111 register_prot_hook(sk);
4112 spin_unlock(&po->bind_lock);
4113 }
4114 break;
4115 }
4116 }
4117 rcu_read_unlock();
4118 return NOTIFY_DONE;
4119}
4120
4121
4122static int packet_ioctl(struct socket *sock, unsigned int cmd,
4123 unsigned long arg)
4124{
4125 struct sock *sk = sock->sk;
4126
4127 switch (cmd) {
4128 case SIOCOUTQ:
4129 {
4130 int amount = sk_wmem_alloc_get(sk);
4131
4132 return put_user(amount, (int __user *)arg);
4133 }
4134 case SIOCINQ:
4135 {
4136 struct sk_buff *skb;
4137 int amount = 0;
4138
4139 spin_lock_bh(&sk->sk_receive_queue.lock);
4140 skb = skb_peek(&sk->sk_receive_queue);
4141 if (skb)
4142 amount = skb->len;
4143 spin_unlock_bh(&sk->sk_receive_queue.lock);
4144 return put_user(amount, (int __user *)arg);
4145 }
4146#ifdef CONFIG_INET
4147 case SIOCADDRT:
4148 case SIOCDELRT:
4149 case SIOCDARP:
4150 case SIOCGARP:
4151 case SIOCSARP:
4152 case SIOCGIFADDR:
4153 case SIOCSIFADDR:
4154 case SIOCGIFBRDADDR:
4155 case SIOCSIFBRDADDR:
4156 case SIOCGIFNETMASK:
4157 case SIOCSIFNETMASK:
4158 case SIOCGIFDSTADDR:
4159 case SIOCSIFDSTADDR:
4160 case SIOCSIFFLAGS:
4161 return inet_dgram_ops.ioctl(sock, cmd, arg);
4162#endif
4163
4164 default:
4165 return -ENOIOCTLCMD;
4166 }
4167 return 0;
4168}
4169
4170static __poll_t packet_poll(struct file *file, struct socket *sock,
4171 poll_table *wait)
4172{
4173 struct sock *sk = sock->sk;
4174 struct packet_sock *po = pkt_sk(sk);
4175 __poll_t mask = datagram_poll(file, sock, wait);
4176
4177 spin_lock_bh(&sk->sk_receive_queue.lock);
4178 if (po->rx_ring.pg_vec) {
4179 if (!packet_previous_rx_frame(po, &po->rx_ring,
4180 TP_STATUS_KERNEL))
4181 mask |= EPOLLIN | EPOLLRDNORM;
4182 }
4183 packet_rcv_try_clear_pressure(po);
4184 spin_unlock_bh(&sk->sk_receive_queue.lock);
4185 spin_lock_bh(&sk->sk_write_queue.lock);
4186 if (po->tx_ring.pg_vec) {
4187 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4188 mask |= EPOLLOUT | EPOLLWRNORM;
4189 }
4190 spin_unlock_bh(&sk->sk_write_queue.lock);
4191 return mask;
4192}
4193
4194
4195
4196
4197
4198
4199static void packet_mm_open(struct vm_area_struct *vma)
4200{
4201 struct file *file = vma->vm_file;
4202 struct socket *sock = file->private_data;
4203 struct sock *sk = sock->sk;
4204
4205 if (sk)
4206 atomic_inc(&pkt_sk(sk)->mapped);
4207}
4208
4209static void packet_mm_close(struct vm_area_struct *vma)
4210{
4211 struct file *file = vma->vm_file;
4212 struct socket *sock = file->private_data;
4213 struct sock *sk = sock->sk;
4214
4215 if (sk)
4216 atomic_dec(&pkt_sk(sk)->mapped);
4217}
4218
4219static const struct vm_operations_struct packet_mmap_ops = {
4220 .open = packet_mm_open,
4221 .close = packet_mm_close,
4222};
4223
4224static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4225 unsigned int len)
4226{
4227 int i;
4228
4229 for (i = 0; i < len; i++) {
4230 if (likely(pg_vec[i].buffer)) {
4231 if (is_vmalloc_addr(pg_vec[i].buffer))
4232 vfree(pg_vec[i].buffer);
4233 else
4234 free_pages((unsigned long)pg_vec[i].buffer,
4235 order);
4236 pg_vec[i].buffer = NULL;
4237 }
4238 }
4239 kfree(pg_vec);
4240}
4241
4242static char *alloc_one_pg_vec_page(unsigned long order)
4243{
4244 char *buffer;
4245 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4246 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4247
4248 buffer = (char *) __get_free_pages(gfp_flags, order);
4249 if (buffer)
4250 return buffer;
4251
4252
4253 buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4254 if (buffer)
4255 return buffer;
4256
4257
4258 gfp_flags &= ~__GFP_NORETRY;
4259 buffer = (char *) __get_free_pages(gfp_flags, order);
4260 if (buffer)
4261 return buffer;
4262
4263
4264 return NULL;
4265}
4266
4267static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4268{
4269 unsigned int block_nr = req->tp_block_nr;
4270 struct pgv *pg_vec;
4271 int i;
4272
4273 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
4274 if (unlikely(!pg_vec))
4275 goto out;
4276
4277 for (i = 0; i < block_nr; i++) {
4278 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4279 if (unlikely(!pg_vec[i].buffer))
4280 goto out_free_pgvec;
4281 }
4282
4283out:
4284 return pg_vec;
4285
4286out_free_pgvec:
4287 free_pg_vec(pg_vec, order, block_nr);
4288 pg_vec = NULL;
4289 goto out;
4290}
4291
4292static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4293 int closing, int tx_ring)
4294{
4295 struct pgv *pg_vec = NULL;
4296 struct packet_sock *po = pkt_sk(sk);
4297 unsigned long *rx_owner_map = NULL;
4298 int was_running, order = 0;
4299 struct packet_ring_buffer *rb;
4300 struct sk_buff_head *rb_queue;
4301 __be16 num;
4302 int err;
4303
4304 struct tpacket_req *req = &req_u->req;
4305
4306 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4307 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4308
4309 err = -EBUSY;
4310 if (!closing) {
4311 if (atomic_read(&po->mapped))
4312 goto out;
4313 if (packet_read_pending(rb))
4314 goto out;
4315 }
4316
4317 if (req->tp_block_nr) {
4318 unsigned int min_frame_size;
4319
4320
4321 err = -EBUSY;
4322 if (unlikely(rb->pg_vec))
4323 goto out;
4324
4325 switch (po->tp_version) {
4326 case TPACKET_V1:
4327 po->tp_hdrlen = TPACKET_HDRLEN;
4328 break;
4329 case TPACKET_V2:
4330 po->tp_hdrlen = TPACKET2_HDRLEN;
4331 break;
4332 case TPACKET_V3:
4333 po->tp_hdrlen = TPACKET3_HDRLEN;
4334 break;
4335 }
4336
4337 err = -EINVAL;
4338 if (unlikely((int)req->tp_block_size <= 0))
4339 goto out;
4340 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4341 goto out;
4342 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4343 if (po->tp_version >= TPACKET_V3 &&
4344 req->tp_block_size <
4345 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4346 goto out;
4347 if (unlikely(req->tp_frame_size < min_frame_size))
4348 goto out;
4349 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4350 goto out;
4351
4352 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4353 if (unlikely(rb->frames_per_block == 0))
4354 goto out;
4355 if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
4356 goto out;
4357 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4358 req->tp_frame_nr))
4359 goto out;
4360
4361 err = -ENOMEM;
4362 order = get_order(req->tp_block_size);
4363 pg_vec = alloc_pg_vec(req, order);
4364 if (unlikely(!pg_vec))
4365 goto out;
4366 switch (po->tp_version) {
4367 case TPACKET_V3:
4368
4369 if (!tx_ring) {
4370 init_prb_bdqc(po, rb, pg_vec, req_u);
4371 } else {
4372 struct tpacket_req3 *req3 = &req_u->req3;
4373
4374 if (req3->tp_retire_blk_tov ||
4375 req3->tp_sizeof_priv ||
4376 req3->tp_feature_req_word) {
4377 err = -EINVAL;
4378 goto out_free_pg_vec;
4379 }
4380 }
4381 break;
4382 default:
4383 if (!tx_ring) {
4384 rx_owner_map = bitmap_alloc(req->tp_frame_nr,
4385 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
4386 if (!rx_owner_map)
4387 goto out_free_pg_vec;
4388 }
4389 break;
4390 }
4391 }
4392
4393 else {
4394 err = -EINVAL;
4395 if (unlikely(req->tp_frame_nr))
4396 goto out;
4397 }
4398
4399
4400
4401 spin_lock(&po->bind_lock);
4402 was_running = po->running;
4403 num = po->num;
4404 if (was_running) {
4405 po->num = 0;
4406 __unregister_prot_hook(sk, false);
4407 }
4408 spin_unlock(&po->bind_lock);
4409
4410 synchronize_net();
4411
4412 err = -EBUSY;
4413 mutex_lock(&po->pg_vec_lock);
4414 if (closing || atomic_read(&po->mapped) == 0) {
4415 err = 0;
4416 spin_lock_bh(&rb_queue->lock);
4417 swap(rb->pg_vec, pg_vec);
4418 if (po->tp_version <= TPACKET_V2)
4419 swap(rb->rx_owner_map, rx_owner_map);
4420 rb->frame_max = (req->tp_frame_nr - 1);
4421 rb->head = 0;
4422 rb->frame_size = req->tp_frame_size;
4423 spin_unlock_bh(&rb_queue->lock);
4424
4425 swap(rb->pg_vec_order, order);
4426 swap(rb->pg_vec_len, req->tp_block_nr);
4427
4428 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4429 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4430 tpacket_rcv : packet_rcv;
4431 skb_queue_purge(rb_queue);
4432 if (atomic_read(&po->mapped))
4433 pr_err("packet_mmap: vma is busy: %d\n",
4434 atomic_read(&po->mapped));
4435 }
4436 mutex_unlock(&po->pg_vec_lock);
4437
4438 spin_lock(&po->bind_lock);
4439 if (was_running) {
4440 po->num = num;
4441 register_prot_hook(sk);
4442 }
4443 spin_unlock(&po->bind_lock);
4444 if (pg_vec && (po->tp_version > TPACKET_V2)) {
4445
4446 if (!tx_ring)
4447 prb_shutdown_retire_blk_timer(po, rb_queue);
4448 }
4449
4450out_free_pg_vec:
4451 bitmap_free(rx_owner_map);
4452 if (pg_vec)
4453 free_pg_vec(pg_vec, order, req->tp_block_nr);
4454out:
4455 return err;
4456}
4457
4458static int packet_mmap(struct file *file, struct socket *sock,
4459 struct vm_area_struct *vma)
4460{
4461 struct sock *sk = sock->sk;
4462 struct packet_sock *po = pkt_sk(sk);
4463 unsigned long size, expected_size;
4464 struct packet_ring_buffer *rb;
4465 unsigned long start;
4466 int err = -EINVAL;
4467 int i;
4468
4469 if (vma->vm_pgoff)
4470 return -EINVAL;
4471
4472 mutex_lock(&po->pg_vec_lock);
4473
4474 expected_size = 0;
4475 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4476 if (rb->pg_vec) {
4477 expected_size += rb->pg_vec_len
4478 * rb->pg_vec_pages
4479 * PAGE_SIZE;
4480 }
4481 }
4482
4483 if (expected_size == 0)
4484 goto out;
4485
4486 size = vma->vm_end - vma->vm_start;
4487 if (size != expected_size)
4488 goto out;
4489
4490 start = vma->vm_start;
4491 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4492 if (rb->pg_vec == NULL)
4493 continue;
4494
4495 for (i = 0; i < rb->pg_vec_len; i++) {
4496 struct page *page;
4497 void *kaddr = rb->pg_vec[i].buffer;
4498 int pg_num;
4499
4500 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4501 page = pgv_to_page(kaddr);
4502 err = vm_insert_page(vma, start, page);
4503 if (unlikely(err))
4504 goto out;
4505 start += PAGE_SIZE;
4506 kaddr += PAGE_SIZE;
4507 }
4508 }
4509 }
4510
4511 atomic_inc(&po->mapped);
4512 vma->vm_ops = &packet_mmap_ops;
4513 err = 0;
4514
4515out:
4516 mutex_unlock(&po->pg_vec_lock);
4517 return err;
4518}
4519
4520static const struct proto_ops packet_ops_spkt = {
4521 .family = PF_PACKET,
4522 .owner = THIS_MODULE,
4523 .release = packet_release,
4524 .bind = packet_bind_spkt,
4525 .connect = sock_no_connect,
4526 .socketpair = sock_no_socketpair,
4527 .accept = sock_no_accept,
4528 .getname = packet_getname_spkt,
4529 .poll = datagram_poll,
4530 .ioctl = packet_ioctl,
4531 .gettstamp = sock_gettstamp,
4532 .listen = sock_no_listen,
4533 .shutdown = sock_no_shutdown,
4534 .sendmsg = packet_sendmsg_spkt,
4535 .recvmsg = packet_recvmsg,
4536 .mmap = sock_no_mmap,
4537 .sendpage = sock_no_sendpage,
4538};
4539
4540static const struct proto_ops packet_ops = {
4541 .family = PF_PACKET,
4542 .owner = THIS_MODULE,
4543 .release = packet_release,
4544 .bind = packet_bind,
4545 .connect = sock_no_connect,
4546 .socketpair = sock_no_socketpair,
4547 .accept = sock_no_accept,
4548 .getname = packet_getname,
4549 .poll = packet_poll,
4550 .ioctl = packet_ioctl,
4551 .gettstamp = sock_gettstamp,
4552 .listen = sock_no_listen,
4553 .shutdown = sock_no_shutdown,
4554 .setsockopt = packet_setsockopt,
4555 .getsockopt = packet_getsockopt,
4556 .sendmsg = packet_sendmsg,
4557 .recvmsg = packet_recvmsg,
4558 .mmap = packet_mmap,
4559 .sendpage = sock_no_sendpage,
4560};
4561
4562static const struct net_proto_family packet_family_ops = {
4563 .family = PF_PACKET,
4564 .create = packet_create,
4565 .owner = THIS_MODULE,
4566};
4567
4568static struct notifier_block packet_netdev_notifier = {
4569 .notifier_call = packet_notifier,
4570};
4571
4572#ifdef CONFIG_PROC_FS
4573
4574static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4575 __acquires(RCU)
4576{
4577 struct net *net = seq_file_net(seq);
4578
4579 rcu_read_lock();
4580 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4581}
4582
4583static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4584{
4585 struct net *net = seq_file_net(seq);
4586 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4587}
4588
4589static void packet_seq_stop(struct seq_file *seq, void *v)
4590 __releases(RCU)
4591{
4592 rcu_read_unlock();
4593}
4594
4595static int packet_seq_show(struct seq_file *seq, void *v)
4596{
4597 if (v == SEQ_START_TOKEN)
4598 seq_printf(seq,
4599 "%*sRefCnt Type Proto Iface R Rmem User Inode\n",
4600 IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
4601 else {
4602 struct sock *s = sk_entry(v);
4603 const struct packet_sock *po = pkt_sk(s);
4604
4605 seq_printf(seq,
4606 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4607 s,
4608 refcount_read(&s->sk_refcnt),
4609 s->sk_type,
4610 ntohs(po->num),
4611 po->ifindex,
4612 po->running,
4613 atomic_read(&s->sk_rmem_alloc),
4614 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4615 sock_i_ino(s));
4616 }
4617
4618 return 0;
4619}
4620
4621static const struct seq_operations packet_seq_ops = {
4622 .start = packet_seq_start,
4623 .next = packet_seq_next,
4624 .stop = packet_seq_stop,
4625 .show = packet_seq_show,
4626};
4627#endif
4628
4629static int __net_init packet_net_init(struct net *net)
4630{
4631 mutex_init(&net->packet.sklist_lock);
4632 INIT_HLIST_HEAD(&net->packet.sklist);
4633
4634#ifdef CONFIG_PROC_FS
4635 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
4636 sizeof(struct seq_net_private)))
4637 return -ENOMEM;
4638#endif
4639
4640 return 0;
4641}
4642
4643static void __net_exit packet_net_exit(struct net *net)
4644{
4645 remove_proc_entry("packet", net->proc_net);
4646 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4647}
4648
4649static struct pernet_operations packet_net_ops = {
4650 .init = packet_net_init,
4651 .exit = packet_net_exit,
4652};
4653
4654
4655static void __exit packet_exit(void)
4656{
4657 unregister_netdevice_notifier(&packet_netdev_notifier);
4658 unregister_pernet_subsys(&packet_net_ops);
4659 sock_unregister(PF_PACKET);
4660 proto_unregister(&packet_proto);
4661}
4662
4663static int __init packet_init(void)
4664{
4665 int rc;
4666
4667 rc = proto_register(&packet_proto, 0);
4668 if (rc)
4669 goto out;
4670 rc = sock_register(&packet_family_ops);
4671 if (rc)
4672 goto out_proto;
4673 rc = register_pernet_subsys(&packet_net_ops);
4674 if (rc)
4675 goto out_sock;
4676 rc = register_netdevice_notifier(&packet_netdev_notifier);
4677 if (rc)
4678 goto out_pernet;
4679
4680 return 0;
4681
4682out_pernet:
4683 unregister_pernet_subsys(&packet_net_ops);
4684out_sock:
4685 sock_unregister(PF_PACKET);
4686out_proto:
4687 proto_unregister(&packet_proto);
4688out:
4689 return rc;
4690}
4691
4692module_init(packet_init);
4693module_exit(packet_exit);
4694MODULE_LICENSE("GPL");
4695MODULE_ALIAS_NETPROTO(PF_PACKET);
4696