1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
51#include <linux/ethtool.h>
52#include <linux/types.h>
53#include <linux/mm.h>
54#include <linux/capability.h>
55#include <linux/fcntl.h>
56#include <linux/socket.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/if_packet.h>
61#include <linux/wireless.h>
62#include <linux/kernel.h>
63#include <linux/kmod.h>
64#include <linux/slab.h>
65#include <linux/vmalloc.h>
66#include <net/net_namespace.h>
67#include <net/ip.h>
68#include <net/protocol.h>
69#include <linux/skbuff.h>
70#include <net/sock.h>
71#include <linux/errno.h>
72#include <linux/timer.h>
73#include <linux/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
76#include <asm/cacheflush.h>
77#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
83#include <linux/mutex.h>
84#include <linux/if_vlan.h>
85#include <linux/virtio_net.h>
86#include <linux/errqueue.h>
87#include <linux/net_tstamp.h>
88#include <linux/percpu.h>
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92#include <linux/bpf.h>
93#include <net/compat.h>
94
95#include "internal.h"
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157struct packet_mreq_max {
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
162};
163
164union tpacket_uhdr {
165 struct tpacket_hdr *h1;
166 struct tpacket2_hdr *h2;
167 struct tpacket3_hdr *h3;
168 void *raw;
169};
170
171static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
172 int closing, int tx_ring);
173
174#define V3_ALIGNMENT (8)
175
176#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
177
178#define BLK_PLUS_PRIV(sz_of_priv) \
179 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
180
181#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
182#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
183#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
184#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
185#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
186#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
187
188struct packet_sock;
189static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
190 struct packet_type *pt, struct net_device *orig_dev);
191
192static void *packet_previous_frame(struct packet_sock *po,
193 struct packet_ring_buffer *rb,
194 int status);
195static void packet_increment_head(struct packet_ring_buffer *buff);
196static int prb_curr_blk_in_use(struct tpacket_block_desc *);
197static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
198 struct packet_sock *);
199static void prb_retire_current_block(struct tpacket_kbdq_core *,
200 struct packet_sock *, unsigned int status);
201static int prb_queue_frozen(struct tpacket_kbdq_core *);
202static void prb_open_block(struct tpacket_kbdq_core *,
203 struct tpacket_block_desc *);
204static void prb_retire_rx_blk_timer_expired(struct timer_list *);
205static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
206static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
207static void prb_clear_rxhash(struct tpacket_kbdq_core *,
208 struct tpacket3_hdr *);
209static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
210 struct tpacket3_hdr *);
211static void packet_flush_mclist(struct sock *sk);
212static u16 packet_pick_tx_queue(struct sk_buff *skb);
213
214struct packet_skb_cb {
215 union {
216 struct sockaddr_pkt pkt;
217 union {
218
219
220
221
222 unsigned int origlen;
223 struct sockaddr_ll ll;
224 };
225 } sa;
226};
227
228#define vio_le() virtio_legacy_is_little_endian()
229
230#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
231
232#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
233#define GET_PBLOCK_DESC(x, bid) \
234 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
235#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
236 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
237#define GET_NEXT_PRB_BLK_NUM(x) \
238 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
239 ((x)->kactive_blk_num+1) : 0)
240
241static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
242static void __fanout_link(struct sock *sk, struct packet_sock *po);
243
244static int packet_direct_xmit(struct sk_buff *skb)
245{
246 return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
247}
248
249static struct net_device *packet_cached_dev_get(struct packet_sock *po)
250{
251 struct net_device *dev;
252
253 rcu_read_lock();
254 dev = rcu_dereference(po->cached_dev);
255 dev_hold(dev);
256 rcu_read_unlock();
257
258 return dev;
259}
260
261static void packet_cached_dev_assign(struct packet_sock *po,
262 struct net_device *dev)
263{
264 rcu_assign_pointer(po->cached_dev, dev);
265}
266
267static void packet_cached_dev_reset(struct packet_sock *po)
268{
269 RCU_INIT_POINTER(po->cached_dev, NULL);
270}
271
272static bool packet_use_direct_xmit(const struct packet_sock *po)
273{
274 return po->xmit == packet_direct_xmit;
275}
276
277static u16 packet_pick_tx_queue(struct sk_buff *skb)
278{
279 struct net_device *dev = skb->dev;
280 const struct net_device_ops *ops = dev->netdev_ops;
281 int cpu = raw_smp_processor_id();
282 u16 queue_index;
283
284#ifdef CONFIG_XPS
285 skb->sender_cpu = cpu + 1;
286#endif
287 skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
288 if (ops->ndo_select_queue) {
289 queue_index = ops->ndo_select_queue(dev, skb, NULL);
290 queue_index = netdev_cap_txqueue(dev, queue_index);
291 } else {
292 queue_index = netdev_pick_tx(dev, skb, NULL);
293 }
294
295 return queue_index;
296}
297
298
299
300
301
302static void __register_prot_hook(struct sock *sk)
303{
304 struct packet_sock *po = pkt_sk(sk);
305
306 if (!po->running) {
307 if (po->fanout)
308 __fanout_link(sk, po);
309 else
310 dev_add_pack(&po->prot_hook);
311
312 sock_hold(sk);
313 po->running = 1;
314 }
315}
316
317static void register_prot_hook(struct sock *sk)
318{
319 lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
320 __register_prot_hook(sk);
321}
322
323
324
325
326
327
328
329static void __unregister_prot_hook(struct sock *sk, bool sync)
330{
331 struct packet_sock *po = pkt_sk(sk);
332
333 lockdep_assert_held_once(&po->bind_lock);
334
335 po->running = 0;
336
337 if (po->fanout)
338 __fanout_unlink(sk, po);
339 else
340 __dev_remove_pack(&po->prot_hook);
341
342 __sock_put(sk);
343
344 if (sync) {
345 spin_unlock(&po->bind_lock);
346 synchronize_net();
347 spin_lock(&po->bind_lock);
348 }
349}
350
351static void unregister_prot_hook(struct sock *sk, bool sync)
352{
353 struct packet_sock *po = pkt_sk(sk);
354
355 if (po->running)
356 __unregister_prot_hook(sk, sync);
357}
358
359static inline struct page * __pure pgv_to_page(void *addr)
360{
361 if (is_vmalloc_addr(addr))
362 return vmalloc_to_page(addr);
363 return virt_to_page(addr);
364}
365
366static void __packet_set_status(struct packet_sock *po, void *frame, int status)
367{
368 union tpacket_uhdr h;
369
370 h.raw = frame;
371 switch (po->tp_version) {
372 case TPACKET_V1:
373 h.h1->tp_status = status;
374 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
375 break;
376 case TPACKET_V2:
377 h.h2->tp_status = status;
378 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
379 break;
380 case TPACKET_V3:
381 h.h3->tp_status = status;
382 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
383 break;
384 default:
385 WARN(1, "TPACKET version not supported.\n");
386 BUG();
387 }
388
389 smp_wmb();
390}
391
392static int __packet_get_status(const struct packet_sock *po, void *frame)
393{
394 union tpacket_uhdr h;
395
396 smp_rmb();
397
398 h.raw = frame;
399 switch (po->tp_version) {
400 case TPACKET_V1:
401 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
402 return h.h1->tp_status;
403 case TPACKET_V2:
404 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
405 return h.h2->tp_status;
406 case TPACKET_V3:
407 flush_dcache_page(pgv_to_page(&h.h3->tp_status));
408 return h.h3->tp_status;
409 default:
410 WARN(1, "TPACKET version not supported.\n");
411 BUG();
412 return 0;
413 }
414}
415
416static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
417 unsigned int flags)
418{
419 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
420
421 if (shhwtstamps &&
422 (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
423 ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
424 return TP_STATUS_TS_RAW_HARDWARE;
425
426 if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
427 ktime_to_timespec64_cond(skb->tstamp, ts))
428 return TP_STATUS_TS_SOFTWARE;
429
430 return 0;
431}
432
433static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
434 struct sk_buff *skb)
435{
436 union tpacket_uhdr h;
437 struct timespec64 ts;
438 __u32 ts_status;
439
440 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
441 return 0;
442
443 h.raw = frame;
444
445
446
447
448
449
450
451 switch (po->tp_version) {
452 case TPACKET_V1:
453 h.h1->tp_sec = ts.tv_sec;
454 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
455 break;
456 case TPACKET_V2:
457 h.h2->tp_sec = ts.tv_sec;
458 h.h2->tp_nsec = ts.tv_nsec;
459 break;
460 case TPACKET_V3:
461 h.h3->tp_sec = ts.tv_sec;
462 h.h3->tp_nsec = ts.tv_nsec;
463 break;
464 default:
465 WARN(1, "TPACKET version not supported.\n");
466 BUG();
467 }
468
469
470 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
471 smp_wmb();
472
473 return ts_status;
474}
475
476static void *packet_lookup_frame(const struct packet_sock *po,
477 const struct packet_ring_buffer *rb,
478 unsigned int position,
479 int status)
480{
481 unsigned int pg_vec_pos, frame_offset;
482 union tpacket_uhdr h;
483
484 pg_vec_pos = position / rb->frames_per_block;
485 frame_offset = position % rb->frames_per_block;
486
487 h.raw = rb->pg_vec[pg_vec_pos].buffer +
488 (frame_offset * rb->frame_size);
489
490 if (status != __packet_get_status(po, h.raw))
491 return NULL;
492
493 return h.raw;
494}
495
496static void *packet_current_frame(struct packet_sock *po,
497 struct packet_ring_buffer *rb,
498 int status)
499{
500 return packet_lookup_frame(po, rb, rb->head, status);
501}
502
503static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
504{
505 del_timer_sync(&pkc->retire_blk_timer);
506}
507
508static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
509 struct sk_buff_head *rb_queue)
510{
511 struct tpacket_kbdq_core *pkc;
512
513 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
514
515 spin_lock_bh(&rb_queue->lock);
516 pkc->delete_blk_timer = 1;
517 spin_unlock_bh(&rb_queue->lock);
518
519 prb_del_retire_blk_timer(pkc);
520}
521
522static void prb_setup_retire_blk_timer(struct packet_sock *po)
523{
524 struct tpacket_kbdq_core *pkc;
525
526 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
527 timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
528 0);
529 pkc->retire_blk_timer.expires = jiffies;
530}
531
532static int prb_calc_retire_blk_tmo(struct packet_sock *po,
533 int blk_size_in_bytes)
534{
535 struct net_device *dev;
536 unsigned int mbits, div;
537 struct ethtool_link_ksettings ecmd;
538 int err;
539
540 rtnl_lock();
541 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
542 if (unlikely(!dev)) {
543 rtnl_unlock();
544 return DEFAULT_PRB_RETIRE_TOV;
545 }
546 err = __ethtool_get_link_ksettings(dev, &ecmd);
547 rtnl_unlock();
548 if (err)
549 return DEFAULT_PRB_RETIRE_TOV;
550
551
552
553
554 if (ecmd.base.speed < SPEED_1000 ||
555 ecmd.base.speed == SPEED_UNKNOWN)
556 return DEFAULT_PRB_RETIRE_TOV;
557
558 div = ecmd.base.speed / 1000;
559 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
560
561 if (div)
562 mbits /= div;
563
564 if (div)
565 return mbits + 1;
566 return mbits;
567}
568
569static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
570 union tpacket_req_u *req_u)
571{
572 p1->feature_req_word = req_u->req3.tp_feature_req_word;
573}
574
575static void init_prb_bdqc(struct packet_sock *po,
576 struct packet_ring_buffer *rb,
577 struct pgv *pg_vec,
578 union tpacket_req_u *req_u)
579{
580 struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
581 struct tpacket_block_desc *pbd;
582
583 memset(p1, 0x0, sizeof(*p1));
584
585 p1->knxt_seq_num = 1;
586 p1->pkbdq = pg_vec;
587 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
588 p1->pkblk_start = pg_vec[0].buffer;
589 p1->kblk_size = req_u->req3.tp_block_size;
590 p1->knum_blocks = req_u->req3.tp_block_nr;
591 p1->hdrlen = po->tp_hdrlen;
592 p1->version = po->tp_version;
593 p1->last_kactive_blk_num = 0;
594 po->stats.stats3.tp_freeze_q_cnt = 0;
595 if (req_u->req3.tp_retire_blk_tov)
596 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
597 else
598 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
599 req_u->req3.tp_block_size);
600 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
601 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
602 rwlock_init(&p1->blk_fill_in_prog_lock);
603
604 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
605 prb_init_ft_ops(p1, req_u);
606 prb_setup_retire_blk_timer(po);
607 prb_open_block(p1, pbd);
608}
609
610
611
612
613static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
614{
615 mod_timer(&pkc->retire_blk_timer,
616 jiffies + pkc->tov_in_jiffies);
617 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
618}
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643static void prb_retire_rx_blk_timer_expired(struct timer_list *t)
644{
645 struct packet_sock *po =
646 from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
647 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
648 unsigned int frozen;
649 struct tpacket_block_desc *pbd;
650
651 spin_lock(&po->sk.sk_receive_queue.lock);
652
653 frozen = prb_queue_frozen(pkc);
654 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
655
656 if (unlikely(pkc->delete_blk_timer))
657 goto out;
658
659
660
661
662
663
664
665
666
667
668 if (BLOCK_NUM_PKTS(pbd)) {
669
670 write_lock(&pkc->blk_fill_in_prog_lock);
671 write_unlock(&pkc->blk_fill_in_prog_lock);
672 }
673
674 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
675 if (!frozen) {
676 if (!BLOCK_NUM_PKTS(pbd)) {
677
678 goto refresh_timer;
679 }
680 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
681 if (!prb_dispatch_next_block(pkc, po))
682 goto refresh_timer;
683 else
684 goto out;
685 } else {
686
687
688
689 if (prb_curr_blk_in_use(pbd)) {
690
691
692
693
694 goto refresh_timer;
695 } else {
696
697
698
699
700
701
702
703 prb_open_block(pkc, pbd);
704 goto out;
705 }
706 }
707 }
708
709refresh_timer:
710 _prb_refresh_rx_retire_blk_timer(pkc);
711
712out:
713 spin_unlock(&po->sk.sk_receive_queue.lock);
714}
715
716static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
717 struct tpacket_block_desc *pbd1, __u32 status)
718{
719
720
721#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
722 u8 *start, *end;
723
724 start = (u8 *)pbd1;
725
726
727 start += PAGE_SIZE;
728
729 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
730 for (; start < end; start += PAGE_SIZE)
731 flush_dcache_page(pgv_to_page(start));
732
733 smp_wmb();
734#endif
735
736
737
738 BLOCK_STATUS(pbd1) = status;
739
740
741
742#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
743 start = (u8 *)pbd1;
744 flush_dcache_page(pgv_to_page(start));
745
746 smp_wmb();
747#endif
748}
749
750
751
752
753
754
755
756
757
758
759static void prb_close_block(struct tpacket_kbdq_core *pkc1,
760 struct tpacket_block_desc *pbd1,
761 struct packet_sock *po, unsigned int stat)
762{
763 __u32 status = TP_STATUS_USER | stat;
764
765 struct tpacket3_hdr *last_pkt;
766 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
767 struct sock *sk = &po->sk;
768
769 if (atomic_read(&po->tp_drops))
770 status |= TP_STATUS_LOSING;
771
772 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
773 last_pkt->tp_next_offset = 0;
774
775
776 if (BLOCK_NUM_PKTS(pbd1)) {
777 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
778 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
779 } else {
780
781
782
783
784
785 struct timespec64 ts;
786 ktime_get_real_ts64(&ts);
787 h1->ts_last_pkt.ts_sec = ts.tv_sec;
788 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
789 }
790
791 smp_wmb();
792
793
794 prb_flush_block(pkc1, pbd1, status);
795
796 sk->sk_data_ready(sk);
797
798 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
799}
800
801static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
802{
803 pkc->reset_pending_on_curr_blk = 0;
804}
805
806
807
808
809
810
811
812
813static void prb_open_block(struct tpacket_kbdq_core *pkc1,
814 struct tpacket_block_desc *pbd1)
815{
816 struct timespec64 ts;
817 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
818
819 smp_rmb();
820
821
822
823
824
825 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
826 BLOCK_NUM_PKTS(pbd1) = 0;
827 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
828
829 ktime_get_real_ts64(&ts);
830
831 h1->ts_first_pkt.ts_sec = ts.tv_sec;
832 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
833
834 pkc1->pkblk_start = (char *)pbd1;
835 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
836
837 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
838 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
839
840 pbd1->version = pkc1->version;
841 pkc1->prev = pkc1->nxt_offset;
842 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
843
844 prb_thaw_queue(pkc1);
845 _prb_refresh_rx_retire_blk_timer(pkc1);
846
847 smp_wmb();
848}
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
874 struct packet_sock *po)
875{
876 pkc->reset_pending_on_curr_blk = 1;
877 po->stats.stats3.tp_freeze_q_cnt++;
878}
879
880#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
881
882
883
884
885
886
887
888static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
889 struct packet_sock *po)
890{
891 struct tpacket_block_desc *pbd;
892
893 smp_rmb();
894
895
896 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
897
898
899 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
900 prb_freeze_queue(pkc, po);
901 return NULL;
902 }
903
904
905
906
907
908
909 prb_open_block(pkc, pbd);
910 return (void *)pkc->nxt_offset;
911}
912
913static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
914 struct packet_sock *po, unsigned int status)
915{
916 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
917
918
919 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
920
921
922
923
924
925
926
927
928
929 if (!(status & TP_STATUS_BLK_TMO)) {
930
931 write_lock(&pkc->blk_fill_in_prog_lock);
932 write_unlock(&pkc->blk_fill_in_prog_lock);
933 }
934 prb_close_block(pkc, pbd, po, status);
935 return;
936 }
937}
938
939static int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
940{
941 return TP_STATUS_USER & BLOCK_STATUS(pbd);
942}
943
944static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
945{
946 return pkc->reset_pending_on_curr_blk;
947}
948
949static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
950 __releases(&pkc->blk_fill_in_prog_lock)
951{
952 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
953
954 read_unlock(&pkc->blk_fill_in_prog_lock);
955}
956
957static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
958 struct tpacket3_hdr *ppd)
959{
960 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
961}
962
963static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
964 struct tpacket3_hdr *ppd)
965{
966 ppd->hv1.tp_rxhash = 0;
967}
968
969static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
970 struct tpacket3_hdr *ppd)
971{
972 if (skb_vlan_tag_present(pkc->skb)) {
973 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
974 ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
975 ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
976 } else {
977 ppd->hv1.tp_vlan_tci = 0;
978 ppd->hv1.tp_vlan_tpid = 0;
979 ppd->tp_status = TP_STATUS_AVAILABLE;
980 }
981}
982
983static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
984 struct tpacket3_hdr *ppd)
985{
986 ppd->hv1.tp_padding = 0;
987 prb_fill_vlan_info(pkc, ppd);
988
989 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
990 prb_fill_rxhash(pkc, ppd);
991 else
992 prb_clear_rxhash(pkc, ppd);
993}
994
995static void prb_fill_curr_block(char *curr,
996 struct tpacket_kbdq_core *pkc,
997 struct tpacket_block_desc *pbd,
998 unsigned int len)
999 __acquires(&pkc->blk_fill_in_prog_lock)
1000{
1001 struct tpacket3_hdr *ppd;
1002
1003 ppd = (struct tpacket3_hdr *)curr;
1004 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
1005 pkc->prev = curr;
1006 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
1007 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
1008 BLOCK_NUM_PKTS(pbd) += 1;
1009 read_lock(&pkc->blk_fill_in_prog_lock);
1010 prb_run_all_ft_ops(pkc, ppd);
1011}
1012
1013
1014static void *__packet_lookup_frame_in_block(struct packet_sock *po,
1015 struct sk_buff *skb,
1016 unsigned int len
1017 )
1018{
1019 struct tpacket_kbdq_core *pkc;
1020 struct tpacket_block_desc *pbd;
1021 char *curr, *end;
1022
1023 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
1024 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1025
1026
1027 if (prb_queue_frozen(pkc)) {
1028
1029
1030
1031
1032 if (prb_curr_blk_in_use(pbd)) {
1033
1034 return NULL;
1035 } else {
1036
1037
1038
1039
1040
1041
1042 prb_open_block(pkc, pbd);
1043 }
1044 }
1045
1046 smp_mb();
1047 curr = pkc->nxt_offset;
1048 pkc->skb = skb;
1049 end = (char *)pbd + pkc->kblk_size;
1050
1051
1052 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1053 prb_fill_curr_block(curr, pkc, pbd, len);
1054 return (void *)curr;
1055 }
1056
1057
1058 prb_retire_current_block(pkc, po, 0);
1059
1060
1061 curr = (char *)prb_dispatch_next_block(pkc, po);
1062 if (curr) {
1063 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1064 prb_fill_curr_block(curr, pkc, pbd, len);
1065 return (void *)curr;
1066 }
1067
1068
1069
1070
1071
1072 return NULL;
1073}
1074
1075static void *packet_current_rx_frame(struct packet_sock *po,
1076 struct sk_buff *skb,
1077 int status, unsigned int len)
1078{
1079 char *curr = NULL;
1080 switch (po->tp_version) {
1081 case TPACKET_V1:
1082 case TPACKET_V2:
1083 curr = packet_lookup_frame(po, &po->rx_ring,
1084 po->rx_ring.head, status);
1085 return curr;
1086 case TPACKET_V3:
1087 return __packet_lookup_frame_in_block(po, skb, len);
1088 default:
1089 WARN(1, "TPACKET version not supported\n");
1090 BUG();
1091 return NULL;
1092 }
1093}
1094
1095static void *prb_lookup_block(const struct packet_sock *po,
1096 const struct packet_ring_buffer *rb,
1097 unsigned int idx,
1098 int status)
1099{
1100 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1101 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1102
1103 if (status != BLOCK_STATUS(pbd))
1104 return NULL;
1105 return pbd;
1106}
1107
1108static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1109{
1110 unsigned int prev;
1111 if (rb->prb_bdqc.kactive_blk_num)
1112 prev = rb->prb_bdqc.kactive_blk_num-1;
1113 else
1114 prev = rb->prb_bdqc.knum_blocks-1;
1115 return prev;
1116}
1117
1118
1119static void *__prb_previous_block(struct packet_sock *po,
1120 struct packet_ring_buffer *rb,
1121 int status)
1122{
1123 unsigned int previous = prb_previous_blk_num(rb);
1124 return prb_lookup_block(po, rb, previous, status);
1125}
1126
1127static void *packet_previous_rx_frame(struct packet_sock *po,
1128 struct packet_ring_buffer *rb,
1129 int status)
1130{
1131 if (po->tp_version <= TPACKET_V2)
1132 return packet_previous_frame(po, rb, status);
1133
1134 return __prb_previous_block(po, rb, status);
1135}
1136
1137static void packet_increment_rx_head(struct packet_sock *po,
1138 struct packet_ring_buffer *rb)
1139{
1140 switch (po->tp_version) {
1141 case TPACKET_V1:
1142 case TPACKET_V2:
1143 return packet_increment_head(rb);
1144 case TPACKET_V3:
1145 default:
1146 WARN(1, "TPACKET version not supported.\n");
1147 BUG();
1148 return;
1149 }
1150}
1151
1152static void *packet_previous_frame(struct packet_sock *po,
1153 struct packet_ring_buffer *rb,
1154 int status)
1155{
1156 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1157 return packet_lookup_frame(po, rb, previous, status);
1158}
1159
1160static void packet_increment_head(struct packet_ring_buffer *buff)
1161{
1162 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1163}
1164
1165static void packet_inc_pending(struct packet_ring_buffer *rb)
1166{
1167 this_cpu_inc(*rb->pending_refcnt);
1168}
1169
1170static void packet_dec_pending(struct packet_ring_buffer *rb)
1171{
1172 this_cpu_dec(*rb->pending_refcnt);
1173}
1174
1175static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1176{
1177 unsigned int refcnt = 0;
1178 int cpu;
1179
1180
1181 if (rb->pending_refcnt == NULL)
1182 return 0;
1183
1184 for_each_possible_cpu(cpu)
1185 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1186
1187 return refcnt;
1188}
1189
1190static int packet_alloc_pending(struct packet_sock *po)
1191{
1192 po->rx_ring.pending_refcnt = NULL;
1193
1194 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1195 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1196 return -ENOBUFS;
1197
1198 return 0;
1199}
1200
1201static void packet_free_pending(struct packet_sock *po)
1202{
1203 free_percpu(po->tx_ring.pending_refcnt);
1204}
1205
1206#define ROOM_POW_OFF 2
1207#define ROOM_NONE 0x0
1208#define ROOM_LOW 0x1
1209#define ROOM_NORMAL 0x2
1210
1211static bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
1212{
1213 int idx, len;
1214
1215 len = READ_ONCE(po->rx_ring.frame_max) + 1;
1216 idx = READ_ONCE(po->rx_ring.head);
1217 if (pow_off)
1218 idx += len >> pow_off;
1219 if (idx >= len)
1220 idx -= len;
1221 return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1222}
1223
1224static bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
1225{
1226 int idx, len;
1227
1228 len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
1229 idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
1230 if (pow_off)
1231 idx += len >> pow_off;
1232 if (idx >= len)
1233 idx -= len;
1234 return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
1235}
1236
1237static int __packet_rcv_has_room(const struct packet_sock *po,
1238 const struct sk_buff *skb)
1239{
1240 const struct sock *sk = &po->sk;
1241 int ret = ROOM_NONE;
1242
1243 if (po->prot_hook.func != tpacket_rcv) {
1244 int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
1245 int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
1246 - (skb ? skb->truesize : 0);
1247
1248 if (avail > (rcvbuf >> ROOM_POW_OFF))
1249 return ROOM_NORMAL;
1250 else if (avail > 0)
1251 return ROOM_LOW;
1252 else
1253 return ROOM_NONE;
1254 }
1255
1256 if (po->tp_version == TPACKET_V3) {
1257 if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
1258 ret = ROOM_NORMAL;
1259 else if (__tpacket_v3_has_room(po, 0))
1260 ret = ROOM_LOW;
1261 } else {
1262 if (__tpacket_has_room(po, ROOM_POW_OFF))
1263 ret = ROOM_NORMAL;
1264 else if (__tpacket_has_room(po, 0))
1265 ret = ROOM_LOW;
1266 }
1267
1268 return ret;
1269}
1270
1271static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1272{
1273 int pressure, ret;
1274
1275 ret = __packet_rcv_has_room(po, skb);
1276 pressure = ret != ROOM_NORMAL;
1277
1278 if (READ_ONCE(po->pressure) != pressure)
1279 WRITE_ONCE(po->pressure, pressure);
1280
1281 return ret;
1282}
1283
1284static void packet_rcv_try_clear_pressure(struct packet_sock *po)
1285{
1286 if (READ_ONCE(po->pressure) &&
1287 __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
1288 WRITE_ONCE(po->pressure, 0);
1289}
1290
1291static void packet_sock_destruct(struct sock *sk)
1292{
1293 skb_queue_purge(&sk->sk_error_queue);
1294
1295 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1296 WARN_ON(refcount_read(&sk->sk_wmem_alloc));
1297
1298 if (!sock_flag(sk, SOCK_DEAD)) {
1299 pr_err("Attempt to release alive packet socket: %p\n", sk);
1300 return;
1301 }
1302
1303 sk_refcnt_debug_dec(sk);
1304}
1305
1306static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
1307{
1308 u32 *history = po->rollover->history;
1309 u32 victim, rxhash;
1310 int i, count = 0;
1311
1312 rxhash = skb_get_hash(skb);
1313 for (i = 0; i < ROLLOVER_HLEN; i++)
1314 if (READ_ONCE(history[i]) == rxhash)
1315 count++;
1316
1317 victim = prandom_u32() % ROLLOVER_HLEN;
1318
1319
1320 if (READ_ONCE(history[victim]) != rxhash)
1321 WRITE_ONCE(history[victim], rxhash);
1322
1323 return count > (ROLLOVER_HLEN >> 1);
1324}
1325
1326static unsigned int fanout_demux_hash(struct packet_fanout *f,
1327 struct sk_buff *skb,
1328 unsigned int num)
1329{
1330 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1331}
1332
1333static unsigned int fanout_demux_lb(struct packet_fanout *f,
1334 struct sk_buff *skb,
1335 unsigned int num)
1336{
1337 unsigned int val = atomic_inc_return(&f->rr_cur);
1338
1339 return val % num;
1340}
1341
1342static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1343 struct sk_buff *skb,
1344 unsigned int num)
1345{
1346 return smp_processor_id() % num;
1347}
1348
1349static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1350 struct sk_buff *skb,
1351 unsigned int num)
1352{
1353 return prandom_u32_max(num);
1354}
1355
1356static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1357 struct sk_buff *skb,
1358 unsigned int idx, bool try_self,
1359 unsigned int num)
1360{
1361 struct packet_sock *po, *po_next, *po_skip = NULL;
1362 unsigned int i, j, room = ROOM_NONE;
1363
1364 po = pkt_sk(rcu_dereference(f->arr[idx]));
1365
1366 if (try_self) {
1367 room = packet_rcv_has_room(po, skb);
1368 if (room == ROOM_NORMAL ||
1369 (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
1370 return idx;
1371 po_skip = po;
1372 }
1373
1374 i = j = min_t(int, po->rollover->sock, num - 1);
1375 do {
1376 po_next = pkt_sk(rcu_dereference(f->arr[i]));
1377 if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
1378 packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
1379 if (i != j)
1380 po->rollover->sock = i;
1381 atomic_long_inc(&po->rollover->num);
1382 if (room == ROOM_LOW)
1383 atomic_long_inc(&po->rollover->num_huge);
1384 return i;
1385 }
1386
1387 if (++i == num)
1388 i = 0;
1389 } while (i != j);
1390
1391 atomic_long_inc(&po->rollover->num_failed);
1392 return idx;
1393}
1394
1395static unsigned int fanout_demux_qm(struct packet_fanout *f,
1396 struct sk_buff *skb,
1397 unsigned int num)
1398{
1399 return skb_get_queue_mapping(skb) % num;
1400}
1401
1402static unsigned int fanout_demux_bpf(struct packet_fanout *f,
1403 struct sk_buff *skb,
1404 unsigned int num)
1405{
1406 struct bpf_prog *prog;
1407 unsigned int ret = 0;
1408
1409 rcu_read_lock();
1410 prog = rcu_dereference(f->bpf_prog);
1411 if (prog)
1412 ret = bpf_prog_run_clear_cb(prog, skb) % num;
1413 rcu_read_unlock();
1414
1415 return ret;
1416}
1417
1418static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1419{
1420 return f->flags & (flag >> 8);
1421}
1422
1423static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1424 struct packet_type *pt, struct net_device *orig_dev)
1425{
1426 struct packet_fanout *f = pt->af_packet_priv;
1427 unsigned int num = READ_ONCE(f->num_members);
1428 struct net *net = read_pnet(&f->net);
1429 struct packet_sock *po;
1430 unsigned int idx;
1431
1432 if (!net_eq(dev_net(dev), net) || !num) {
1433 kfree_skb(skb);
1434 return 0;
1435 }
1436
1437 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1438 skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
1439 if (!skb)
1440 return 0;
1441 }
1442 switch (f->type) {
1443 case PACKET_FANOUT_HASH:
1444 default:
1445 idx = fanout_demux_hash(f, skb, num);
1446 break;
1447 case PACKET_FANOUT_LB:
1448 idx = fanout_demux_lb(f, skb, num);
1449 break;
1450 case PACKET_FANOUT_CPU:
1451 idx = fanout_demux_cpu(f, skb, num);
1452 break;
1453 case PACKET_FANOUT_RND:
1454 idx = fanout_demux_rnd(f, skb, num);
1455 break;
1456 case PACKET_FANOUT_QM:
1457 idx = fanout_demux_qm(f, skb, num);
1458 break;
1459 case PACKET_FANOUT_ROLLOVER:
1460 idx = fanout_demux_rollover(f, skb, 0, false, num);
1461 break;
1462 case PACKET_FANOUT_CBPF:
1463 case PACKET_FANOUT_EBPF:
1464 idx = fanout_demux_bpf(f, skb, num);
1465 break;
1466 }
1467
1468 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
1469 idx = fanout_demux_rollover(f, skb, idx, true, num);
1470
1471 po = pkt_sk(rcu_dereference(f->arr[idx]));
1472 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1473}
1474
1475DEFINE_MUTEX(fanout_mutex);
1476EXPORT_SYMBOL_GPL(fanout_mutex);
1477static LIST_HEAD(fanout_list);
1478static u16 fanout_next_id;
1479
1480static void __fanout_link(struct sock *sk, struct packet_sock *po)
1481{
1482 struct packet_fanout *f = po->fanout;
1483
1484 spin_lock(&f->lock);
1485 rcu_assign_pointer(f->arr[f->num_members], sk);
1486 smp_wmb();
1487 f->num_members++;
1488 if (f->num_members == 1)
1489 dev_add_pack(&f->prot_hook);
1490 spin_unlock(&f->lock);
1491}
1492
1493static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1494{
1495 struct packet_fanout *f = po->fanout;
1496 int i;
1497
1498 spin_lock(&f->lock);
1499 for (i = 0; i < f->num_members; i++) {
1500 if (rcu_dereference_protected(f->arr[i],
1501 lockdep_is_held(&f->lock)) == sk)
1502 break;
1503 }
1504 BUG_ON(i >= f->num_members);
1505 rcu_assign_pointer(f->arr[i],
1506 rcu_dereference_protected(f->arr[f->num_members - 1],
1507 lockdep_is_held(&f->lock)));
1508 f->num_members--;
1509 if (f->num_members == 0)
1510 __dev_remove_pack(&f->prot_hook);
1511 spin_unlock(&f->lock);
1512}
1513
1514static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
1515{
1516 if (sk->sk_family != PF_PACKET)
1517 return false;
1518
1519 return ptype->af_packet_priv == pkt_sk(sk)->fanout;
1520}
1521
1522static void fanout_init_data(struct packet_fanout *f)
1523{
1524 switch (f->type) {
1525 case PACKET_FANOUT_LB:
1526 atomic_set(&f->rr_cur, 0);
1527 break;
1528 case PACKET_FANOUT_CBPF:
1529 case PACKET_FANOUT_EBPF:
1530 RCU_INIT_POINTER(f->bpf_prog, NULL);
1531 break;
1532 }
1533}
1534
1535static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
1536{
1537 struct bpf_prog *old;
1538
1539 spin_lock(&f->lock);
1540 old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
1541 rcu_assign_pointer(f->bpf_prog, new);
1542 spin_unlock(&f->lock);
1543
1544 if (old) {
1545 synchronize_net();
1546 bpf_prog_destroy(old);
1547 }
1548}
1549
1550static int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
1551 unsigned int len)
1552{
1553 struct bpf_prog *new;
1554 struct sock_fprog fprog;
1555 int ret;
1556
1557 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1558 return -EPERM;
1559
1560 ret = copy_bpf_fprog_from_user(&fprog, data, len);
1561 if (ret)
1562 return ret;
1563
1564 ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
1565 if (ret)
1566 return ret;
1567
1568 __fanout_set_data_bpf(po->fanout, new);
1569 return 0;
1570}
1571
1572static int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
1573 unsigned int len)
1574{
1575 struct bpf_prog *new;
1576 u32 fd;
1577
1578 if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
1579 return -EPERM;
1580 if (len != sizeof(fd))
1581 return -EINVAL;
1582 if (copy_from_sockptr(&fd, data, len))
1583 return -EFAULT;
1584
1585 new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
1586 if (IS_ERR(new))
1587 return PTR_ERR(new);
1588
1589 __fanout_set_data_bpf(po->fanout, new);
1590 return 0;
1591}
1592
1593static int fanout_set_data(struct packet_sock *po, sockptr_t data,
1594 unsigned int len)
1595{
1596 switch (po->fanout->type) {
1597 case PACKET_FANOUT_CBPF:
1598 return fanout_set_data_cbpf(po, data, len);
1599 case PACKET_FANOUT_EBPF:
1600 return fanout_set_data_ebpf(po, data, len);
1601 default:
1602 return -EINVAL;
1603 }
1604}
1605
1606static void fanout_release_data(struct packet_fanout *f)
1607{
1608 switch (f->type) {
1609 case PACKET_FANOUT_CBPF:
1610 case PACKET_FANOUT_EBPF:
1611 __fanout_set_data_bpf(f, NULL);
1612 }
1613}
1614
1615static bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
1616{
1617 struct packet_fanout *f;
1618
1619 list_for_each_entry(f, &fanout_list, list) {
1620 if (f->id == candidate_id &&
1621 read_pnet(&f->net) == sock_net(sk)) {
1622 return false;
1623 }
1624 }
1625 return true;
1626}
1627
1628static bool fanout_find_new_id(struct sock *sk, u16 *new_id)
1629{
1630 u16 id = fanout_next_id;
1631
1632 do {
1633 if (__fanout_id_is_free(sk, id)) {
1634 *new_id = id;
1635 fanout_next_id = id + 1;
1636 return true;
1637 }
1638
1639 id++;
1640 } while (id != fanout_next_id);
1641
1642 return false;
1643}
1644
1645static int fanout_add(struct sock *sk, struct fanout_args *args)
1646{
1647 struct packet_rollover *rollover = NULL;
1648 struct packet_sock *po = pkt_sk(sk);
1649 u16 type_flags = args->type_flags;
1650 struct packet_fanout *f, *match;
1651 u8 type = type_flags & 0xff;
1652 u8 flags = type_flags >> 8;
1653 u16 id = args->id;
1654 int err;
1655
1656 switch (type) {
1657 case PACKET_FANOUT_ROLLOVER:
1658 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1659 return -EINVAL;
1660 break;
1661 case PACKET_FANOUT_HASH:
1662 case PACKET_FANOUT_LB:
1663 case PACKET_FANOUT_CPU:
1664 case PACKET_FANOUT_RND:
1665 case PACKET_FANOUT_QM:
1666 case PACKET_FANOUT_CBPF:
1667 case PACKET_FANOUT_EBPF:
1668 break;
1669 default:
1670 return -EINVAL;
1671 }
1672
1673 mutex_lock(&fanout_mutex);
1674
1675 err = -EALREADY;
1676 if (po->fanout)
1677 goto out;
1678
1679 if (type == PACKET_FANOUT_ROLLOVER ||
1680 (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
1681 err = -ENOMEM;
1682 rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
1683 if (!rollover)
1684 goto out;
1685 atomic_long_set(&rollover->num, 0);
1686 atomic_long_set(&rollover->num_huge, 0);
1687 atomic_long_set(&rollover->num_failed, 0);
1688 }
1689
1690 if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
1691 if (id != 0) {
1692 err = -EINVAL;
1693 goto out;
1694 }
1695 if (!fanout_find_new_id(sk, &id)) {
1696 err = -ENOMEM;
1697 goto out;
1698 }
1699
1700 flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
1701 }
1702
1703 match = NULL;
1704 list_for_each_entry(f, &fanout_list, list) {
1705 if (f->id == id &&
1706 read_pnet(&f->net) == sock_net(sk)) {
1707 match = f;
1708 break;
1709 }
1710 }
1711 err = -EINVAL;
1712 if (match) {
1713 if (match->flags != flags)
1714 goto out;
1715 if (args->max_num_members &&
1716 args->max_num_members != match->max_num_members)
1717 goto out;
1718 } else {
1719 if (args->max_num_members > PACKET_FANOUT_MAX)
1720 goto out;
1721 if (!args->max_num_members)
1722
1723 args->max_num_members = 256;
1724 err = -ENOMEM;
1725 match = kvzalloc(struct_size(match, arr, args->max_num_members),
1726 GFP_KERNEL);
1727 if (!match)
1728 goto out;
1729 write_pnet(&match->net, sock_net(sk));
1730 match->id = id;
1731 match->type = type;
1732 match->flags = flags;
1733 INIT_LIST_HEAD(&match->list);
1734 spin_lock_init(&match->lock);
1735 refcount_set(&match->sk_ref, 0);
1736 fanout_init_data(match);
1737 match->prot_hook.type = po->prot_hook.type;
1738 match->prot_hook.dev = po->prot_hook.dev;
1739 match->prot_hook.func = packet_rcv_fanout;
1740 match->prot_hook.af_packet_priv = match;
1741 match->prot_hook.id_match = match_fanout_group;
1742 match->max_num_members = args->max_num_members;
1743 list_add(&match->list, &fanout_list);
1744 }
1745 err = -EINVAL;
1746
1747 spin_lock(&po->bind_lock);
1748 if (po->running &&
1749 match->type == type &&
1750 match->prot_hook.type == po->prot_hook.type &&
1751 match->prot_hook.dev == po->prot_hook.dev) {
1752 err = -ENOSPC;
1753 if (refcount_read(&match->sk_ref) < match->max_num_members) {
1754 __dev_remove_pack(&po->prot_hook);
1755 po->fanout = match;
1756 po->rollover = rollover;
1757 rollover = NULL;
1758 refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
1759 __fanout_link(sk, po);
1760 err = 0;
1761 }
1762 }
1763 spin_unlock(&po->bind_lock);
1764
1765 if (err && !refcount_read(&match->sk_ref)) {
1766 list_del(&match->list);
1767 kvfree(match);
1768 }
1769
1770out:
1771 kfree(rollover);
1772 mutex_unlock(&fanout_mutex);
1773 return err;
1774}
1775
1776
1777
1778
1779
1780
1781static struct packet_fanout *fanout_release(struct sock *sk)
1782{
1783 struct packet_sock *po = pkt_sk(sk);
1784 struct packet_fanout *f;
1785
1786 mutex_lock(&fanout_mutex);
1787 f = po->fanout;
1788 if (f) {
1789 po->fanout = NULL;
1790
1791 if (refcount_dec_and_test(&f->sk_ref))
1792 list_del(&f->list);
1793 else
1794 f = NULL;
1795 }
1796 mutex_unlock(&fanout_mutex);
1797
1798 return f;
1799}
1800
1801static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
1802 struct sk_buff *skb)
1803{
1804
1805
1806
1807
1808 if (unlikely(dev->type != ARPHRD_ETHER))
1809 return false;
1810
1811 skb_reset_mac_header(skb);
1812 return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
1813}
1814
1815static const struct proto_ops packet_ops;
1816
1817static const struct proto_ops packet_ops_spkt;
1818
1819static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1820 struct packet_type *pt, struct net_device *orig_dev)
1821{
1822 struct sock *sk;
1823 struct sockaddr_pkt *spkt;
1824
1825
1826
1827
1828
1829
1830 sk = pt->af_packet_priv;
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843 if (skb->pkt_type == PACKET_LOOPBACK)
1844 goto out;
1845
1846 if (!net_eq(dev_net(dev), sock_net(sk)))
1847 goto out;
1848
1849 skb = skb_share_check(skb, GFP_ATOMIC);
1850 if (skb == NULL)
1851 goto oom;
1852
1853
1854 skb_dst_drop(skb);
1855
1856
1857 nf_reset_ct(skb);
1858
1859 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1860
1861 skb_push(skb, skb->data - skb_mac_header(skb));
1862
1863
1864
1865
1866
1867 spkt->spkt_family = dev->type;
1868 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1869 spkt->spkt_protocol = skb->protocol;
1870
1871
1872
1873
1874
1875
1876 if (sock_queue_rcv_skb(sk, skb) == 0)
1877 return 0;
1878
1879out:
1880 kfree_skb(skb);
1881oom:
1882 return 0;
1883}
1884
1885static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
1886{
1887 if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
1888 sock->type == SOCK_RAW) {
1889 skb_reset_mac_header(skb);
1890 skb->protocol = dev_parse_header_protocol(skb);
1891 }
1892
1893 skb_probe_transport_header(skb);
1894}
1895
1896
1897
1898
1899
1900
1901static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
1902 size_t len)
1903{
1904 struct sock *sk = sock->sk;
1905 DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
1906 struct sk_buff *skb = NULL;
1907 struct net_device *dev;
1908 struct sockcm_cookie sockc;
1909 __be16 proto = 0;
1910 int err;
1911 int extra_len = 0;
1912
1913
1914
1915
1916
1917 if (saddr) {
1918 if (msg->msg_namelen < sizeof(struct sockaddr))
1919 return -EINVAL;
1920 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1921 proto = saddr->spkt_protocol;
1922 } else
1923 return -ENOTCONN;
1924
1925
1926
1927
1928
1929 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1930retry:
1931 rcu_read_lock();
1932 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1933 err = -ENODEV;
1934 if (dev == NULL)
1935 goto out_unlock;
1936
1937 err = -ENETDOWN;
1938 if (!(dev->flags & IFF_UP))
1939 goto out_unlock;
1940
1941
1942
1943
1944
1945
1946 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1947 if (!netif_supports_nofcs(dev)) {
1948 err = -EPROTONOSUPPORT;
1949 goto out_unlock;
1950 }
1951 extra_len = 4;
1952 }
1953
1954 err = -EMSGSIZE;
1955 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1956 goto out_unlock;
1957
1958 if (!skb) {
1959 size_t reserved = LL_RESERVED_SPACE(dev);
1960 int tlen = dev->needed_tailroom;
1961 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1962
1963 rcu_read_unlock();
1964 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1965 if (skb == NULL)
1966 return -ENOBUFS;
1967
1968
1969
1970
1971 skb_reserve(skb, reserved);
1972 skb_reset_network_header(skb);
1973
1974
1975 if (hhlen) {
1976 skb->data -= hhlen;
1977 skb->tail -= hhlen;
1978 if (len < hhlen)
1979 skb_reset_network_header(skb);
1980 }
1981 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1982 if (err)
1983 goto out_free;
1984 goto retry;
1985 }
1986
1987 if (!dev_validate_header(dev, skb->data, len)) {
1988 err = -EINVAL;
1989 goto out_unlock;
1990 }
1991 if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
1992 !packet_extra_vlan_len_allowed(dev, skb)) {
1993 err = -EMSGSIZE;
1994 goto out_unlock;
1995 }
1996
1997 sockcm_init(&sockc, sk);
1998 if (msg->msg_controllen) {
1999 err = sock_cmsg_send(sk, msg, &sockc);
2000 if (unlikely(err))
2001 goto out_unlock;
2002 }
2003
2004 skb->protocol = proto;
2005 skb->dev = dev;
2006 skb->priority = sk->sk_priority;
2007 skb->mark = sk->sk_mark;
2008 skb->tstamp = sockc.transmit_time;
2009
2010 skb_setup_tx_timestamp(skb, sockc.tsflags);
2011
2012 if (unlikely(extra_len == 4))
2013 skb->no_fcs = 1;
2014
2015 packet_parse_headers(skb, sock);
2016
2017 dev_queue_xmit(skb);
2018 rcu_read_unlock();
2019 return len;
2020
2021out_unlock:
2022 rcu_read_unlock();
2023out_free:
2024 kfree_skb(skb);
2025 return err;
2026}
2027
2028static unsigned int run_filter(struct sk_buff *skb,
2029 const struct sock *sk,
2030 unsigned int res)
2031{
2032 struct sk_filter *filter;
2033
2034 rcu_read_lock();
2035 filter = rcu_dereference(sk->sk_filter);
2036 if (filter != NULL)
2037 res = bpf_prog_run_clear_cb(filter->prog, skb);
2038 rcu_read_unlock();
2039
2040 return res;
2041}
2042
2043static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
2044 size_t *len)
2045{
2046 struct virtio_net_hdr vnet_hdr;
2047
2048 if (*len < sizeof(vnet_hdr))
2049 return -EINVAL;
2050 *len -= sizeof(vnet_hdr);
2051
2052 if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
2053 return -EINVAL;
2054
2055 return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
2056}
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
2071 struct packet_type *pt, struct net_device *orig_dev)
2072{
2073 struct sock *sk;
2074 struct sockaddr_ll *sll;
2075 struct packet_sock *po;
2076 u8 *skb_head = skb->data;
2077 int skb_len = skb->len;
2078 unsigned int snaplen, res;
2079 bool is_drop_n_account = false;
2080
2081 if (skb->pkt_type == PACKET_LOOPBACK)
2082 goto drop;
2083
2084 sk = pt->af_packet_priv;
2085 po = pkt_sk(sk);
2086
2087 if (!net_eq(dev_net(dev), sock_net(sk)))
2088 goto drop;
2089
2090 skb->dev = dev;
2091
2092 if (dev_has_header(dev)) {
2093
2094
2095
2096
2097
2098
2099
2100 if (sk->sk_type != SOCK_DGRAM)
2101 skb_push(skb, skb->data - skb_mac_header(skb));
2102 else if (skb->pkt_type == PACKET_OUTGOING) {
2103
2104 skb_pull(skb, skb_network_offset(skb));
2105 }
2106 }
2107
2108 snaplen = skb->len;
2109
2110 res = run_filter(skb, sk, snaplen);
2111 if (!res)
2112 goto drop_n_restore;
2113 if (snaplen > res)
2114 snaplen = res;
2115
2116 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2117 goto drop_n_acct;
2118
2119 if (skb_shared(skb)) {
2120 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
2121 if (nskb == NULL)
2122 goto drop_n_acct;
2123
2124 if (skb_head != skb->data) {
2125 skb->data = skb_head;
2126 skb->len = skb_len;
2127 }
2128 consume_skb(skb);
2129 skb = nskb;
2130 }
2131
2132 sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
2133
2134 sll = &PACKET_SKB_CB(skb)->sa.ll;
2135 sll->sll_hatype = dev->type;
2136 sll->sll_pkttype = skb->pkt_type;
2137 if (unlikely(po->origdev))
2138 sll->sll_ifindex = orig_dev->ifindex;
2139 else
2140 sll->sll_ifindex = dev->ifindex;
2141
2142 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2143
2144
2145
2146
2147 PACKET_SKB_CB(skb)->sa.origlen = skb->len;
2148
2149 if (pskb_trim(skb, snaplen))
2150 goto drop_n_acct;
2151
2152 skb_set_owner_r(skb, sk);
2153 skb->dev = NULL;
2154 skb_dst_drop(skb);
2155
2156
2157 nf_reset_ct(skb);
2158
2159 spin_lock(&sk->sk_receive_queue.lock);
2160 po->stats.stats1.tp_packets++;
2161 sock_skb_set_dropcount(sk, skb);
2162 __skb_queue_tail(&sk->sk_receive_queue, skb);
2163 spin_unlock(&sk->sk_receive_queue.lock);
2164 sk->sk_data_ready(sk);
2165 return 0;
2166
2167drop_n_acct:
2168 is_drop_n_account = true;
2169 atomic_inc(&po->tp_drops);
2170 atomic_inc(&sk->sk_drops);
2171
2172drop_n_restore:
2173 if (skb_head != skb->data && skb_shared(skb)) {
2174 skb->data = skb_head;
2175 skb->len = skb_len;
2176 }
2177drop:
2178 if (!is_drop_n_account)
2179 consume_skb(skb);
2180 else
2181 kfree_skb(skb);
2182 return 0;
2183}
2184
2185static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
2186 struct packet_type *pt, struct net_device *orig_dev)
2187{
2188 struct sock *sk;
2189 struct packet_sock *po;
2190 struct sockaddr_ll *sll;
2191 union tpacket_uhdr h;
2192 u8 *skb_head = skb->data;
2193 int skb_len = skb->len;
2194 unsigned int snaplen, res;
2195 unsigned long status = TP_STATUS_USER;
2196 unsigned short macoff, hdrlen;
2197 unsigned int netoff;
2198 struct sk_buff *copy_skb = NULL;
2199 struct timespec64 ts;
2200 __u32 ts_status;
2201 bool is_drop_n_account = false;
2202 unsigned int slot_id = 0;
2203 bool do_vnet = false;
2204
2205
2206
2207
2208
2209 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
2210 BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
2211
2212 if (skb->pkt_type == PACKET_LOOPBACK)
2213 goto drop;
2214
2215 sk = pt->af_packet_priv;
2216 po = pkt_sk(sk);
2217
2218 if (!net_eq(dev_net(dev), sock_net(sk)))
2219 goto drop;
2220
2221 if (dev_has_header(dev)) {
2222 if (sk->sk_type != SOCK_DGRAM)
2223 skb_push(skb, skb->data - skb_mac_header(skb));
2224 else if (skb->pkt_type == PACKET_OUTGOING) {
2225
2226 skb_pull(skb, skb_network_offset(skb));
2227 }
2228 }
2229
2230 snaplen = skb->len;
2231
2232 res = run_filter(skb, sk, snaplen);
2233 if (!res)
2234 goto drop_n_restore;
2235
2236
2237 if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
2238 atomic_inc(&po->tp_drops);
2239 goto drop_n_restore;
2240 }
2241
2242 if (skb->ip_summed == CHECKSUM_PARTIAL)
2243 status |= TP_STATUS_CSUMNOTREADY;
2244 else if (skb->pkt_type != PACKET_OUTGOING &&
2245 (skb->ip_summed == CHECKSUM_COMPLETE ||
2246 skb_csum_unnecessary(skb)))
2247 status |= TP_STATUS_CSUM_VALID;
2248
2249 if (snaplen > res)
2250 snaplen = res;
2251
2252 if (sk->sk_type == SOCK_DGRAM) {
2253 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
2254 po->tp_reserve;
2255 } else {
2256 unsigned int maclen = skb_network_offset(skb);
2257 netoff = TPACKET_ALIGN(po->tp_hdrlen +
2258 (maclen < 16 ? 16 : maclen)) +
2259 po->tp_reserve;
2260 if (po->has_vnet_hdr) {
2261 netoff += sizeof(struct virtio_net_hdr);
2262 do_vnet = true;
2263 }
2264 macoff = netoff - maclen;
2265 }
2266 if (netoff > USHRT_MAX) {
2267 atomic_inc(&po->tp_drops);
2268 goto drop_n_restore;
2269 }
2270 if (po->tp_version <= TPACKET_V2) {
2271 if (macoff + snaplen > po->rx_ring.frame_size) {
2272 if (po->copy_thresh &&
2273 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
2274 if (skb_shared(skb)) {
2275 copy_skb = skb_clone(skb, GFP_ATOMIC);
2276 } else {
2277 copy_skb = skb_get(skb);
2278 skb_head = skb->data;
2279 }
2280 if (copy_skb)
2281 skb_set_owner_r(copy_skb, sk);
2282 }
2283 snaplen = po->rx_ring.frame_size - macoff;
2284 if ((int)snaplen < 0) {
2285 snaplen = 0;
2286 do_vnet = false;
2287 }
2288 }
2289 } else if (unlikely(macoff + snaplen >
2290 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
2291 u32 nval;
2292
2293 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
2294 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
2295 snaplen, nval, macoff);
2296 snaplen = nval;
2297 if (unlikely((int)snaplen < 0)) {
2298 snaplen = 0;
2299 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
2300 do_vnet = false;
2301 }
2302 }
2303 spin_lock(&sk->sk_receive_queue.lock);
2304 h.raw = packet_current_rx_frame(po, skb,
2305 TP_STATUS_KERNEL, (macoff+snaplen));
2306 if (!h.raw)
2307 goto drop_n_account;
2308
2309 if (po->tp_version <= TPACKET_V2) {
2310 slot_id = po->rx_ring.head;
2311 if (test_bit(slot_id, po->rx_ring.rx_owner_map))
2312 goto drop_n_account;
2313 __set_bit(slot_id, po->rx_ring.rx_owner_map);
2314 }
2315
2316 if (do_vnet &&
2317 virtio_net_hdr_from_skb(skb, h.raw + macoff -
2318 sizeof(struct virtio_net_hdr),
2319 vio_le(), true, 0)) {
2320 if (po->tp_version == TPACKET_V3)
2321 prb_clear_blk_fill_status(&po->rx_ring);
2322 goto drop_n_account;
2323 }
2324
2325 if (po->tp_version <= TPACKET_V2) {
2326 packet_increment_rx_head(po, &po->rx_ring);
2327
2328
2329
2330
2331
2332
2333 if (atomic_read(&po->tp_drops))
2334 status |= TP_STATUS_LOSING;
2335 }
2336
2337 po->stats.stats1.tp_packets++;
2338 if (copy_skb) {
2339 status |= TP_STATUS_COPY;
2340 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
2341 }
2342 spin_unlock(&sk->sk_receive_queue.lock);
2343
2344 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
2345
2346
2347
2348
2349 ts_status = tpacket_get_timestamp(skb, &ts,
2350 po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
2351 if (!ts_status)
2352 ktime_get_real_ts64(&ts);
2353
2354 status |= ts_status;
2355
2356 switch (po->tp_version) {
2357 case TPACKET_V1:
2358 h.h1->tp_len = skb->len;
2359 h.h1->tp_snaplen = snaplen;
2360 h.h1->tp_mac = macoff;
2361 h.h1->tp_net = netoff;
2362 h.h1->tp_sec = ts.tv_sec;
2363 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
2364 hdrlen = sizeof(*h.h1);
2365 break;
2366 case TPACKET_V2:
2367 h.h2->tp_len = skb->len;
2368 h.h2->tp_snaplen = snaplen;
2369 h.h2->tp_mac = macoff;
2370 h.h2->tp_net = netoff;
2371 h.h2->tp_sec = ts.tv_sec;
2372 h.h2->tp_nsec = ts.tv_nsec;
2373 if (skb_vlan_tag_present(skb)) {
2374 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
2375 h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
2376 status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
2377 } else {
2378 h.h2->tp_vlan_tci = 0;
2379 h.h2->tp_vlan_tpid = 0;
2380 }
2381 memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
2382 hdrlen = sizeof(*h.h2);
2383 break;
2384 case TPACKET_V3:
2385
2386
2387
2388 h.h3->tp_status |= status;
2389 h.h3->tp_len = skb->len;
2390 h.h3->tp_snaplen = snaplen;
2391 h.h3->tp_mac = macoff;
2392 h.h3->tp_net = netoff;
2393 h.h3->tp_sec = ts.tv_sec;
2394 h.h3->tp_nsec = ts.tv_nsec;
2395 memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
2396 hdrlen = sizeof(*h.h3);
2397 break;
2398 default:
2399 BUG();
2400 }
2401
2402 sll = h.raw + TPACKET_ALIGN(hdrlen);
2403 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
2404 sll->sll_family = AF_PACKET;
2405 sll->sll_hatype = dev->type;
2406 sll->sll_protocol = skb->protocol;
2407 sll->sll_pkttype = skb->pkt_type;
2408 if (unlikely(po->origdev))
2409 sll->sll_ifindex = orig_dev->ifindex;
2410 else
2411 sll->sll_ifindex = dev->ifindex;
2412
2413 smp_mb();
2414
2415#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
2416 if (po->tp_version <= TPACKET_V2) {
2417 u8 *start, *end;
2418
2419 end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
2420 macoff + snaplen);
2421
2422 for (start = h.raw; start < end; start += PAGE_SIZE)
2423 flush_dcache_page(pgv_to_page(start));
2424 }
2425 smp_wmb();
2426#endif
2427
2428 if (po->tp_version <= TPACKET_V2) {
2429 spin_lock(&sk->sk_receive_queue.lock);
2430 __packet_set_status(po, h.raw, status);
2431 __clear_bit(slot_id, po->rx_ring.rx_owner_map);
2432 spin_unlock(&sk->sk_receive_queue.lock);
2433 sk->sk_data_ready(sk);
2434 } else if (po->tp_version == TPACKET_V3) {
2435 prb_clear_blk_fill_status(&po->rx_ring);
2436 }
2437
2438drop_n_restore:
2439 if (skb_head != skb->data && skb_shared(skb)) {
2440 skb->data = skb_head;
2441 skb->len = skb_len;
2442 }
2443drop:
2444 if (!is_drop_n_account)
2445 consume_skb(skb);
2446 else
2447 kfree_skb(skb);
2448 return 0;
2449
2450drop_n_account:
2451 spin_unlock(&sk->sk_receive_queue.lock);
2452 atomic_inc(&po->tp_drops);
2453 is_drop_n_account = true;
2454
2455 sk->sk_data_ready(sk);
2456 kfree_skb(copy_skb);
2457 goto drop_n_restore;
2458}
2459
2460static void tpacket_destruct_skb(struct sk_buff *skb)
2461{
2462 struct packet_sock *po = pkt_sk(skb->sk);
2463
2464 if (likely(po->tx_ring.pg_vec)) {
2465 void *ph;
2466 __u32 ts;
2467
2468 ph = skb_zcopy_get_nouarg(skb);
2469 packet_dec_pending(&po->tx_ring);
2470
2471 ts = __packet_set_timestamp(po, ph, skb);
2472 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2473
2474 if (!packet_read_pending(&po->tx_ring))
2475 complete(&po->skb_completion);
2476 }
2477
2478 sock_wfree(skb);
2479}
2480
2481static int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
2482{
2483 if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2484 (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2485 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
2486 __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
2487 vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
2488 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
2489 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
2490
2491 if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
2492 return -EINVAL;
2493
2494 return 0;
2495}
2496
2497static int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
2498 struct virtio_net_hdr *vnet_hdr)
2499{
2500 if (*len < sizeof(*vnet_hdr))
2501 return -EINVAL;
2502 *len -= sizeof(*vnet_hdr);
2503
2504 if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
2505 return -EFAULT;
2506
2507 return __packet_snd_vnet_parse(vnet_hdr, *len);
2508}
2509
2510static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2511 void *frame, struct net_device *dev, void *data, int tp_len,
2512 __be16 proto, unsigned char *addr, int hlen, int copylen,
2513 const struct sockcm_cookie *sockc)
2514{
2515 union tpacket_uhdr ph;
2516 int to_write, offset, len, nr_frags, len_max;
2517 struct socket *sock = po->sk.sk_socket;
2518 struct page *page;
2519 int err;
2520
2521 ph.raw = frame;
2522
2523 skb->protocol = proto;
2524 skb->dev = dev;
2525 skb->priority = po->sk.sk_priority;
2526 skb->mark = po->sk.sk_mark;
2527 skb->tstamp = sockc->transmit_time;
2528 skb_setup_tx_timestamp(skb, sockc->tsflags);
2529 skb_zcopy_set_nouarg(skb, ph.raw);
2530
2531 skb_reserve(skb, hlen);
2532 skb_reset_network_header(skb);
2533
2534 to_write = tp_len;
2535
2536 if (sock->type == SOCK_DGRAM) {
2537 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2538 NULL, tp_len);
2539 if (unlikely(err < 0))
2540 return -EINVAL;
2541 } else if (copylen) {
2542 int hdrlen = min_t(int, copylen, tp_len);
2543
2544 skb_push(skb, dev->hard_header_len);
2545 skb_put(skb, copylen - dev->hard_header_len);
2546 err = skb_store_bits(skb, 0, data, hdrlen);
2547 if (unlikely(err))
2548 return err;
2549 if (!dev_validate_header(dev, skb->data, hdrlen))
2550 return -EINVAL;
2551
2552 data += hdrlen;
2553 to_write -= hdrlen;
2554 }
2555
2556 offset = offset_in_page(data);
2557 len_max = PAGE_SIZE - offset;
2558 len = ((to_write > len_max) ? len_max : to_write);
2559
2560 skb->data_len = to_write;
2561 skb->len += to_write;
2562 skb->truesize += to_write;
2563 refcount_add(to_write, &po->sk.sk_wmem_alloc);
2564
2565 while (likely(to_write)) {
2566 nr_frags = skb_shinfo(skb)->nr_frags;
2567
2568 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2569 pr_err("Packet exceed the number of skb frags(%lu)\n",
2570 MAX_SKB_FRAGS);
2571 return -EFAULT;
2572 }
2573
2574 page = pgv_to_page(data);
2575 data += len;
2576 flush_dcache_page(page);
2577 get_page(page);
2578 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2579 to_write -= len;
2580 offset = 0;
2581 len_max = PAGE_SIZE;
2582 len = ((to_write > len_max) ? len_max : to_write);
2583 }
2584
2585 packet_parse_headers(skb, sock);
2586
2587 return tp_len;
2588}
2589
2590static int tpacket_parse_header(struct packet_sock *po, void *frame,
2591 int size_max, void **data)
2592{
2593 union tpacket_uhdr ph;
2594 int tp_len, off;
2595
2596 ph.raw = frame;
2597
2598 switch (po->tp_version) {
2599 case TPACKET_V3:
2600 if (ph.h3->tp_next_offset != 0) {
2601 pr_warn_once("variable sized slot not supported");
2602 return -EINVAL;
2603 }
2604 tp_len = ph.h3->tp_len;
2605 break;
2606 case TPACKET_V2:
2607 tp_len = ph.h2->tp_len;
2608 break;
2609 default:
2610 tp_len = ph.h1->tp_len;
2611 break;
2612 }
2613 if (unlikely(tp_len > size_max)) {
2614 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2615 return -EMSGSIZE;
2616 }
2617
2618 if (unlikely(po->tp_tx_has_off)) {
2619 int off_min, off_max;
2620
2621 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2622 off_max = po->tx_ring.frame_size - tp_len;
2623 if (po->sk.sk_type == SOCK_DGRAM) {
2624 switch (po->tp_version) {
2625 case TPACKET_V3:
2626 off = ph.h3->tp_net;
2627 break;
2628 case TPACKET_V2:
2629 off = ph.h2->tp_net;
2630 break;
2631 default:
2632 off = ph.h1->tp_net;
2633 break;
2634 }
2635 } else {
2636 switch (po->tp_version) {
2637 case TPACKET_V3:
2638 off = ph.h3->tp_mac;
2639 break;
2640 case TPACKET_V2:
2641 off = ph.h2->tp_mac;
2642 break;
2643 default:
2644 off = ph.h1->tp_mac;
2645 break;
2646 }
2647 }
2648 if (unlikely((off < off_min) || (off_max < off)))
2649 return -EINVAL;
2650 } else {
2651 off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2652 }
2653
2654 *data = frame + off;
2655 return tp_len;
2656}
2657
2658static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2659{
2660 struct sk_buff *skb = NULL;
2661 struct net_device *dev;
2662 struct virtio_net_hdr *vnet_hdr = NULL;
2663 struct sockcm_cookie sockc;
2664 __be16 proto;
2665 int err, reserve = 0;
2666 void *ph;
2667 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2668 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2669 unsigned char *addr = NULL;
2670 int tp_len, size_max;
2671 void *data;
2672 int len_sum = 0;
2673 int status = TP_STATUS_AVAILABLE;
2674 int hlen, tlen, copylen = 0;
2675 long timeo = 0;
2676
2677 mutex_lock(&po->pg_vec_lock);
2678
2679
2680
2681
2682 if (unlikely(!po->tx_ring.pg_vec)) {
2683 err = -EBUSY;
2684 goto out;
2685 }
2686 if (likely(saddr == NULL)) {
2687 dev = packet_cached_dev_get(po);
2688 proto = READ_ONCE(po->num);
2689 } else {
2690 err = -EINVAL;
2691 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2692 goto out;
2693 if (msg->msg_namelen < (saddr->sll_halen
2694 + offsetof(struct sockaddr_ll,
2695 sll_addr)))
2696 goto out;
2697 proto = saddr->sll_protocol;
2698 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2699 if (po->sk.sk_socket->type == SOCK_DGRAM) {
2700 if (dev && msg->msg_namelen < dev->addr_len +
2701 offsetof(struct sockaddr_ll, sll_addr))
2702 goto out_put;
2703 addr = saddr->sll_addr;
2704 }
2705 }
2706
2707 err = -ENXIO;
2708 if (unlikely(dev == NULL))
2709 goto out;
2710 err = -ENETDOWN;
2711 if (unlikely(!(dev->flags & IFF_UP)))
2712 goto out_put;
2713
2714 sockcm_init(&sockc, &po->sk);
2715 if (msg->msg_controllen) {
2716 err = sock_cmsg_send(&po->sk, msg, &sockc);
2717 if (unlikely(err))
2718 goto out_put;
2719 }
2720
2721 if (po->sk.sk_socket->type == SOCK_RAW)
2722 reserve = dev->hard_header_len;
2723 size_max = po->tx_ring.frame_size
2724 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2725
2726 if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
2727 size_max = dev->mtu + reserve + VLAN_HLEN;
2728
2729 reinit_completion(&po->skb_completion);
2730
2731 do {
2732 ph = packet_current_frame(po, &po->tx_ring,
2733 TP_STATUS_SEND_REQUEST);
2734 if (unlikely(ph == NULL)) {
2735 if (need_wait && skb) {
2736 timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
2737 timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
2738 if (timeo <= 0) {
2739 err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
2740 goto out_put;
2741 }
2742 }
2743
2744 continue;
2745 }
2746
2747 skb = NULL;
2748 tp_len = tpacket_parse_header(po, ph, size_max, &data);
2749 if (tp_len < 0)
2750 goto tpacket_error;
2751
2752 status = TP_STATUS_SEND_REQUEST;
2753 hlen = LL_RESERVED_SPACE(dev);
2754 tlen = dev->needed_tailroom;
2755 if (po->has_vnet_hdr) {
2756 vnet_hdr = data;
2757 data += sizeof(*vnet_hdr);
2758 tp_len -= sizeof(*vnet_hdr);
2759 if (tp_len < 0 ||
2760 __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
2761 tp_len = -EINVAL;
2762 goto tpacket_error;
2763 }
2764 copylen = __virtio16_to_cpu(vio_le(),
2765 vnet_hdr->hdr_len);
2766 }
2767 copylen = max_t(int, copylen, dev->hard_header_len);
2768 skb = sock_alloc_send_skb(&po->sk,
2769 hlen + tlen + sizeof(struct sockaddr_ll) +
2770 (copylen - dev->hard_header_len),
2771 !need_wait, &err);
2772
2773 if (unlikely(skb == NULL)) {
2774
2775 if (likely(len_sum > 0))
2776 err = len_sum;
2777 goto out_status;
2778 }
2779 tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
2780 addr, hlen, copylen, &sockc);
2781 if (likely(tp_len >= 0) &&
2782 tp_len > dev->mtu + reserve &&
2783 !po->has_vnet_hdr &&
2784 !packet_extra_vlan_len_allowed(dev, skb))
2785 tp_len = -EMSGSIZE;
2786
2787 if (unlikely(tp_len < 0)) {
2788tpacket_error:
2789 if (po->tp_loss) {
2790 __packet_set_status(po, ph,
2791 TP_STATUS_AVAILABLE);
2792 packet_increment_head(&po->tx_ring);
2793 kfree_skb(skb);
2794 continue;
2795 } else {
2796 status = TP_STATUS_WRONG_FORMAT;
2797 err = tp_len;
2798 goto out_status;
2799 }
2800 }
2801
2802 if (po->has_vnet_hdr) {
2803 if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
2804 tp_len = -EINVAL;
2805 goto tpacket_error;
2806 }
2807 virtio_net_hdr_set_proto(skb, vnet_hdr);
2808 }
2809
2810 skb->destructor = tpacket_destruct_skb;
2811 __packet_set_status(po, ph, TP_STATUS_SENDING);
2812 packet_inc_pending(&po->tx_ring);
2813
2814 status = TP_STATUS_SEND_REQUEST;
2815 err = po->xmit(skb);
2816 if (unlikely(err > 0)) {
2817 err = net_xmit_errno(err);
2818 if (err && __packet_get_status(po, ph) ==
2819 TP_STATUS_AVAILABLE) {
2820
2821 skb = NULL;
2822 goto out_status;
2823 }
2824
2825
2826
2827
2828 err = 0;
2829 }
2830 packet_increment_head(&po->tx_ring);
2831 len_sum += tp_len;
2832 } while (likely((ph != NULL) ||
2833
2834
2835
2836
2837
2838
2839 (need_wait && packet_read_pending(&po->tx_ring))));
2840
2841 err = len_sum;
2842 goto out_put;
2843
2844out_status:
2845 __packet_set_status(po, ph, status);
2846 kfree_skb(skb);
2847out_put:
2848 dev_put(dev);
2849out:
2850 mutex_unlock(&po->pg_vec_lock);
2851 return err;
2852}
2853
2854static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2855 size_t reserve, size_t len,
2856 size_t linear, int noblock,
2857 int *err)
2858{
2859 struct sk_buff *skb;
2860
2861
2862 if (prepad + len < PAGE_SIZE || !linear)
2863 linear = len;
2864
2865 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2866 err, 0);
2867 if (!skb)
2868 return NULL;
2869
2870 skb_reserve(skb, reserve);
2871 skb_put(skb, linear);
2872 skb->data_len = len - linear;
2873 skb->len += len - linear;
2874
2875 return skb;
2876}
2877
2878static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
2879{
2880 struct sock *sk = sock->sk;
2881 DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
2882 struct sk_buff *skb;
2883 struct net_device *dev;
2884 __be16 proto;
2885 unsigned char *addr = NULL;
2886 int err, reserve = 0;
2887 struct sockcm_cookie sockc;
2888 struct virtio_net_hdr vnet_hdr = { 0 };
2889 int offset = 0;
2890 struct packet_sock *po = pkt_sk(sk);
2891 bool has_vnet_hdr = false;
2892 int hlen, tlen, linear;
2893 int extra_len = 0;
2894
2895
2896
2897
2898
2899 if (likely(saddr == NULL)) {
2900 dev = packet_cached_dev_get(po);
2901 proto = READ_ONCE(po->num);
2902 } else {
2903 err = -EINVAL;
2904 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2905 goto out;
2906 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2907 goto out;
2908 proto = saddr->sll_protocol;
2909 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2910 if (sock->type == SOCK_DGRAM) {
2911 if (dev && msg->msg_namelen < dev->addr_len +
2912 offsetof(struct sockaddr_ll, sll_addr))
2913 goto out_unlock;
2914 addr = saddr->sll_addr;
2915 }
2916 }
2917
2918 err = -ENXIO;
2919 if (unlikely(dev == NULL))
2920 goto out_unlock;
2921 err = -ENETDOWN;
2922 if (unlikely(!(dev->flags & IFF_UP)))
2923 goto out_unlock;
2924
2925 sockcm_init(&sockc, sk);
2926 sockc.mark = sk->sk_mark;
2927 if (msg->msg_controllen) {
2928 err = sock_cmsg_send(sk, msg, &sockc);
2929 if (unlikely(err))
2930 goto out_unlock;
2931 }
2932
2933 if (sock->type == SOCK_RAW)
2934 reserve = dev->hard_header_len;
2935 if (po->has_vnet_hdr) {
2936 err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
2937 if (err)
2938 goto out_unlock;
2939 has_vnet_hdr = true;
2940 }
2941
2942 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2943 if (!netif_supports_nofcs(dev)) {
2944 err = -EPROTONOSUPPORT;
2945 goto out_unlock;
2946 }
2947 extra_len = 4;
2948 }
2949
2950 err = -EMSGSIZE;
2951 if (!vnet_hdr.gso_type &&
2952 (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2953 goto out_unlock;
2954
2955 err = -ENOBUFS;
2956 hlen = LL_RESERVED_SPACE(dev);
2957 tlen = dev->needed_tailroom;
2958 linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
2959 linear = max(linear, min_t(int, len, dev->hard_header_len));
2960 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
2961 msg->msg_flags & MSG_DONTWAIT, &err);
2962 if (skb == NULL)
2963 goto out_unlock;
2964
2965 skb_reset_network_header(skb);
2966
2967 err = -EINVAL;
2968 if (sock->type == SOCK_DGRAM) {
2969 offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
2970 if (unlikely(offset < 0))
2971 goto out_free;
2972 } else if (reserve) {
2973 skb_reserve(skb, -reserve);
2974 if (len < reserve + sizeof(struct ipv6hdr) &&
2975 dev->min_header_len != dev->hard_header_len)
2976 skb_reset_network_header(skb);
2977 }
2978
2979
2980 err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
2981 if (err)
2982 goto out_free;
2983
2984 if (sock->type == SOCK_RAW &&
2985 !dev_validate_header(dev, skb->data, len)) {
2986 err = -EINVAL;
2987 goto out_free;
2988 }
2989
2990 skb_setup_tx_timestamp(skb, sockc.tsflags);
2991
2992 if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
2993 !packet_extra_vlan_len_allowed(dev, skb)) {
2994 err = -EMSGSIZE;
2995 goto out_free;
2996 }
2997
2998 skb->protocol = proto;
2999 skb->dev = dev;
3000 skb->priority = sk->sk_priority;
3001 skb->mark = sockc.mark;
3002 skb->tstamp = sockc.transmit_time;
3003
3004 if (has_vnet_hdr) {
3005 err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
3006 if (err)
3007 goto out_free;
3008 len += sizeof(vnet_hdr);
3009 virtio_net_hdr_set_proto(skb, &vnet_hdr);
3010 }
3011
3012 packet_parse_headers(skb, sock);
3013
3014 if (unlikely(extra_len == 4))
3015 skb->no_fcs = 1;
3016
3017 err = po->xmit(skb);
3018 if (err > 0 && (err = net_xmit_errno(err)) != 0)
3019 goto out_unlock;
3020
3021 dev_put(dev);
3022
3023 return len;
3024
3025out_free:
3026 kfree_skb(skb);
3027out_unlock:
3028 dev_put(dev);
3029out:
3030 return err;
3031}
3032
3033static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
3034{
3035 struct sock *sk = sock->sk;
3036 struct packet_sock *po = pkt_sk(sk);
3037
3038
3039
3040
3041 if (data_race(po->tx_ring.pg_vec))
3042 return tpacket_snd(po, msg);
3043
3044 return packet_snd(sock, msg, len);
3045}
3046
3047
3048
3049
3050
3051
3052static int packet_release(struct socket *sock)
3053{
3054 struct sock *sk = sock->sk;
3055 struct packet_sock *po;
3056 struct packet_fanout *f;
3057 struct net *net;
3058 union tpacket_req_u req_u;
3059
3060 if (!sk)
3061 return 0;
3062
3063 net = sock_net(sk);
3064 po = pkt_sk(sk);
3065
3066 mutex_lock(&net->packet.sklist_lock);
3067 sk_del_node_init_rcu(sk);
3068 mutex_unlock(&net->packet.sklist_lock);
3069
3070 preempt_disable();
3071 sock_prot_inuse_add(net, sk->sk_prot, -1);
3072 preempt_enable();
3073
3074 spin_lock(&po->bind_lock);
3075 unregister_prot_hook(sk, false);
3076 packet_cached_dev_reset(po);
3077
3078 if (po->prot_hook.dev) {
3079 dev_put(po->prot_hook.dev);
3080 po->prot_hook.dev = NULL;
3081 }
3082 spin_unlock(&po->bind_lock);
3083
3084 packet_flush_mclist(sk);
3085
3086 lock_sock(sk);
3087 if (po->rx_ring.pg_vec) {
3088 memset(&req_u, 0, sizeof(req_u));
3089 packet_set_ring(sk, &req_u, 1, 0);
3090 }
3091
3092 if (po->tx_ring.pg_vec) {
3093 memset(&req_u, 0, sizeof(req_u));
3094 packet_set_ring(sk, &req_u, 1, 1);
3095 }
3096 release_sock(sk);
3097
3098 f = fanout_release(sk);
3099
3100 synchronize_net();
3101
3102 kfree(po->rollover);
3103 if (f) {
3104 fanout_release_data(f);
3105 kvfree(f);
3106 }
3107
3108
3109
3110 sock_orphan(sk);
3111 sock->sk = NULL;
3112
3113
3114
3115 skb_queue_purge(&sk->sk_receive_queue);
3116 packet_free_pending(po);
3117 sk_refcnt_debug_release(sk);
3118
3119 sock_put(sk);
3120 return 0;
3121}
3122
3123
3124
3125
3126
3127static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
3128 __be16 proto)
3129{
3130 struct packet_sock *po = pkt_sk(sk);
3131 struct net_device *dev_curr;
3132 __be16 proto_curr;
3133 bool need_rehook;
3134 struct net_device *dev = NULL;
3135 int ret = 0;
3136 bool unlisted = false;
3137
3138 lock_sock(sk);
3139 spin_lock(&po->bind_lock);
3140 rcu_read_lock();
3141
3142 if (po->fanout) {
3143 ret = -EINVAL;
3144 goto out_unlock;
3145 }
3146
3147 if (name) {
3148 dev = dev_get_by_name_rcu(sock_net(sk), name);
3149 if (!dev) {
3150 ret = -ENODEV;
3151 goto out_unlock;
3152 }
3153 } else if (ifindex) {
3154 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3155 if (!dev) {
3156 ret = -ENODEV;
3157 goto out_unlock;
3158 }
3159 }
3160
3161 dev_hold(dev);
3162
3163 proto_curr = po->prot_hook.type;
3164 dev_curr = po->prot_hook.dev;
3165
3166 need_rehook = proto_curr != proto || dev_curr != dev;
3167
3168 if (need_rehook) {
3169 if (po->running) {
3170 rcu_read_unlock();
3171
3172
3173
3174 WRITE_ONCE(po->num, 0);
3175 __unregister_prot_hook(sk, true);
3176 rcu_read_lock();
3177 dev_curr = po->prot_hook.dev;
3178 if (dev)
3179 unlisted = !dev_get_by_index_rcu(sock_net(sk),
3180 dev->ifindex);
3181 }
3182
3183 BUG_ON(po->running);
3184 WRITE_ONCE(po->num, proto);
3185 po->prot_hook.type = proto;
3186
3187 if (unlikely(unlisted)) {
3188 dev_put(dev);
3189 po->prot_hook.dev = NULL;
3190 WRITE_ONCE(po->ifindex, -1);
3191 packet_cached_dev_reset(po);
3192 } else {
3193 po->prot_hook.dev = dev;
3194 WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
3195 packet_cached_dev_assign(po, dev);
3196 }
3197 }
3198 dev_put(dev_curr);
3199
3200 if (proto == 0 || !need_rehook)
3201 goto out_unlock;
3202
3203 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
3204 register_prot_hook(sk);
3205 } else {
3206 sk->sk_err = ENETDOWN;
3207 if (!sock_flag(sk, SOCK_DEAD))
3208 sk_error_report(sk);
3209 }
3210
3211out_unlock:
3212 rcu_read_unlock();
3213 spin_unlock(&po->bind_lock);
3214 release_sock(sk);
3215 return ret;
3216}
3217
3218
3219
3220
3221
3222static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
3223 int addr_len)
3224{
3225 struct sock *sk = sock->sk;
3226 char name[sizeof(uaddr->sa_data) + 1];
3227
3228
3229
3230
3231
3232 if (addr_len != sizeof(struct sockaddr))
3233 return -EINVAL;
3234
3235
3236
3237 memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
3238 name[sizeof(uaddr->sa_data)] = 0;
3239
3240 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
3241}
3242
3243static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3244{
3245 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
3246 struct sock *sk = sock->sk;
3247
3248
3249
3250
3251
3252 if (addr_len < sizeof(struct sockaddr_ll))
3253 return -EINVAL;
3254 if (sll->sll_family != AF_PACKET)
3255 return -EINVAL;
3256
3257 return packet_do_bind(sk, NULL, sll->sll_ifindex,
3258 sll->sll_protocol ? : pkt_sk(sk)->num);
3259}
3260
3261static struct proto packet_proto = {
3262 .name = "PACKET",
3263 .owner = THIS_MODULE,
3264 .obj_size = sizeof(struct packet_sock),
3265};
3266
3267
3268
3269
3270
3271static int packet_create(struct net *net, struct socket *sock, int protocol,
3272 int kern)
3273{
3274 struct sock *sk;
3275 struct packet_sock *po;
3276 __be16 proto = (__force __be16)protocol;
3277 int err;
3278
3279 if (!ns_capable(net->user_ns, CAP_NET_RAW))
3280 return -EPERM;
3281 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
3282 sock->type != SOCK_PACKET)
3283 return -ESOCKTNOSUPPORT;
3284
3285 sock->state = SS_UNCONNECTED;
3286
3287 err = -ENOBUFS;
3288 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
3289 if (sk == NULL)
3290 goto out;
3291
3292 sock->ops = &packet_ops;
3293 if (sock->type == SOCK_PACKET)
3294 sock->ops = &packet_ops_spkt;
3295
3296 sock_init_data(sock, sk);
3297
3298 po = pkt_sk(sk);
3299 init_completion(&po->skb_completion);
3300 sk->sk_family = PF_PACKET;
3301 po->num = proto;
3302 po->xmit = dev_queue_xmit;
3303
3304 err = packet_alloc_pending(po);
3305 if (err)
3306 goto out2;
3307
3308 packet_cached_dev_reset(po);
3309
3310 sk->sk_destruct = packet_sock_destruct;
3311 sk_refcnt_debug_inc(sk);
3312
3313
3314
3315
3316
3317 spin_lock_init(&po->bind_lock);
3318 mutex_init(&po->pg_vec_lock);
3319 po->rollover = NULL;
3320 po->prot_hook.func = packet_rcv;
3321
3322 if (sock->type == SOCK_PACKET)
3323 po->prot_hook.func = packet_rcv_spkt;
3324
3325 po->prot_hook.af_packet_priv = sk;
3326
3327 if (proto) {
3328 po->prot_hook.type = proto;
3329 __register_prot_hook(sk);
3330 }
3331
3332 mutex_lock(&net->packet.sklist_lock);
3333 sk_add_node_tail_rcu(sk, &net->packet.sklist);
3334 mutex_unlock(&net->packet.sklist_lock);
3335
3336 preempt_disable();
3337 sock_prot_inuse_add(net, &packet_proto, 1);
3338 preempt_enable();
3339
3340 return 0;
3341out2:
3342 sk_free(sk);
3343out:
3344 return err;
3345}
3346
3347
3348
3349
3350
3351
3352static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
3353 int flags)
3354{
3355 struct sock *sk = sock->sk;
3356 struct sk_buff *skb;
3357 int copied, err;
3358 int vnet_hdr_len = 0;
3359 unsigned int origlen = 0;
3360
3361 err = -EINVAL;
3362 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
3363 goto out;
3364
3365#if 0
3366
3367 if (pkt_sk(sk)->ifindex < 0)
3368 return -ENODEV;
3369#endif
3370
3371 if (flags & MSG_ERRQUEUE) {
3372 err = sock_recv_errqueue(sk, msg, len,
3373 SOL_PACKET, PACKET_TX_TIMESTAMP);
3374 goto out;
3375 }
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
3387
3388
3389
3390
3391
3392
3393
3394 if (skb == NULL)
3395 goto out;
3396
3397 packet_rcv_try_clear_pressure(pkt_sk(sk));
3398
3399 if (pkt_sk(sk)->has_vnet_hdr) {
3400 err = packet_rcv_vnet(msg, skb, &len);
3401 if (err)
3402 goto out_free;
3403 vnet_hdr_len = sizeof(struct virtio_net_hdr);
3404 }
3405
3406
3407
3408
3409
3410 copied = skb->len;
3411 if (copied > len) {
3412 copied = len;
3413 msg->msg_flags |= MSG_TRUNC;
3414 }
3415
3416 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3417 if (err)
3418 goto out_free;
3419
3420 if (sock->type != SOCK_PACKET) {
3421 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3422
3423
3424 origlen = PACKET_SKB_CB(skb)->sa.origlen;
3425 sll->sll_family = AF_PACKET;
3426 sll->sll_protocol = skb->protocol;
3427 }
3428
3429 sock_recv_ts_and_drops(msg, sk, skb);
3430
3431 if (msg->msg_name) {
3432 int copy_len;
3433
3434
3435
3436
3437 if (sock->type == SOCK_PACKET) {
3438 __sockaddr_check_size(sizeof(struct sockaddr_pkt));
3439 msg->msg_namelen = sizeof(struct sockaddr_pkt);
3440 copy_len = msg->msg_namelen;
3441 } else {
3442 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
3443
3444 msg->msg_namelen = sll->sll_halen +
3445 offsetof(struct sockaddr_ll, sll_addr);
3446 copy_len = msg->msg_namelen;
3447 if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
3448 memset(msg->msg_name +
3449 offsetof(struct sockaddr_ll, sll_addr),
3450 0, sizeof(sll->sll_addr));
3451 msg->msg_namelen = sizeof(struct sockaddr_ll);
3452 }
3453 }
3454 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
3455 }
3456
3457 if (pkt_sk(sk)->auxdata) {
3458 struct tpacket_auxdata aux;
3459
3460 aux.tp_status = TP_STATUS_USER;
3461 if (skb->ip_summed == CHECKSUM_PARTIAL)
3462 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
3463 else if (skb->pkt_type != PACKET_OUTGOING &&
3464 (skb->ip_summed == CHECKSUM_COMPLETE ||
3465 skb_csum_unnecessary(skb)))
3466 aux.tp_status |= TP_STATUS_CSUM_VALID;
3467
3468 aux.tp_len = origlen;
3469 aux.tp_snaplen = skb->len;
3470 aux.tp_mac = 0;
3471 aux.tp_net = skb_network_offset(skb);
3472 if (skb_vlan_tag_present(skb)) {
3473 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
3474 aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
3475 aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
3476 } else {
3477 aux.tp_vlan_tci = 0;
3478 aux.tp_vlan_tpid = 0;
3479 }
3480 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
3481 }
3482
3483
3484
3485
3486
3487 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
3488
3489out_free:
3490 skb_free_datagram(sk, skb);
3491out:
3492 return err;
3493}
3494
3495static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
3496 int peer)
3497{
3498 struct net_device *dev;
3499 struct sock *sk = sock->sk;
3500
3501 if (peer)
3502 return -EOPNOTSUPP;
3503
3504 uaddr->sa_family = AF_PACKET;
3505 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3506 rcu_read_lock();
3507 dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
3508 if (dev)
3509 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3510 rcu_read_unlock();
3511
3512 return sizeof(*uaddr);
3513}
3514
3515static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3516 int peer)
3517{
3518 struct net_device *dev;
3519 struct sock *sk = sock->sk;
3520 struct packet_sock *po = pkt_sk(sk);
3521 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3522 int ifindex;
3523
3524 if (peer)
3525 return -EOPNOTSUPP;
3526
3527 ifindex = READ_ONCE(po->ifindex);
3528 sll->sll_family = AF_PACKET;
3529 sll->sll_ifindex = ifindex;
3530 sll->sll_protocol = READ_ONCE(po->num);
3531 sll->sll_pkttype = 0;
3532 rcu_read_lock();
3533 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
3534 if (dev) {
3535 sll->sll_hatype = dev->type;
3536 sll->sll_halen = dev->addr_len;
3537 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3538 } else {
3539 sll->sll_hatype = 0;
3540 sll->sll_halen = 0;
3541 }
3542 rcu_read_unlock();
3543
3544 return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3545}
3546
3547static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3548 int what)
3549{
3550 switch (i->type) {
3551 case PACKET_MR_MULTICAST:
3552 if (i->alen != dev->addr_len)
3553 return -EINVAL;
3554 if (what > 0)
3555 return dev_mc_add(dev, i->addr);
3556 else
3557 return dev_mc_del(dev, i->addr);
3558 break;
3559 case PACKET_MR_PROMISC:
3560 return dev_set_promiscuity(dev, what);
3561 case PACKET_MR_ALLMULTI:
3562 return dev_set_allmulti(dev, what);
3563 case PACKET_MR_UNICAST:
3564 if (i->alen != dev->addr_len)
3565 return -EINVAL;
3566 if (what > 0)
3567 return dev_uc_add(dev, i->addr);
3568 else
3569 return dev_uc_del(dev, i->addr);
3570 break;
3571 default:
3572 break;
3573 }
3574 return 0;
3575}
3576
3577static void packet_dev_mclist_delete(struct net_device *dev,
3578 struct packet_mclist **mlp)
3579{
3580 struct packet_mclist *ml;
3581
3582 while ((ml = *mlp) != NULL) {
3583 if (ml->ifindex == dev->ifindex) {
3584 packet_dev_mc(dev, ml, -1);
3585 *mlp = ml->next;
3586 kfree(ml);
3587 } else
3588 mlp = &ml->next;
3589 }
3590}
3591
3592static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3593{
3594 struct packet_sock *po = pkt_sk(sk);
3595 struct packet_mclist *ml, *i;
3596 struct net_device *dev;
3597 int err;
3598
3599 rtnl_lock();
3600
3601 err = -ENODEV;
3602 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3603 if (!dev)
3604 goto done;
3605
3606 err = -EINVAL;
3607 if (mreq->mr_alen > dev->addr_len)
3608 goto done;
3609
3610 err = -ENOBUFS;
3611 i = kmalloc(sizeof(*i), GFP_KERNEL);
3612 if (i == NULL)
3613 goto done;
3614
3615 err = 0;
3616 for (ml = po->mclist; ml; ml = ml->next) {
3617 if (ml->ifindex == mreq->mr_ifindex &&
3618 ml->type == mreq->mr_type &&
3619 ml->alen == mreq->mr_alen &&
3620 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3621 ml->count++;
3622
3623 kfree(i);
3624 goto done;
3625 }
3626 }
3627
3628 i->type = mreq->mr_type;
3629 i->ifindex = mreq->mr_ifindex;
3630 i->alen = mreq->mr_alen;
3631 memcpy(i->addr, mreq->mr_address, i->alen);
3632 memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
3633 i->count = 1;
3634 i->next = po->mclist;
3635 po->mclist = i;
3636 err = packet_dev_mc(dev, i, 1);
3637 if (err) {
3638 po->mclist = i->next;
3639 kfree(i);
3640 }
3641
3642done:
3643 rtnl_unlock();
3644 return err;
3645}
3646
3647static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3648{
3649 struct packet_mclist *ml, **mlp;
3650
3651 rtnl_lock();
3652
3653 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3654 if (ml->ifindex == mreq->mr_ifindex &&
3655 ml->type == mreq->mr_type &&
3656 ml->alen == mreq->mr_alen &&
3657 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3658 if (--ml->count == 0) {
3659 struct net_device *dev;
3660 *mlp = ml->next;
3661 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3662 if (dev)
3663 packet_dev_mc(dev, ml, -1);
3664 kfree(ml);
3665 }
3666 break;
3667 }
3668 }
3669 rtnl_unlock();
3670 return 0;
3671}
3672
3673static void packet_flush_mclist(struct sock *sk)
3674{
3675 struct packet_sock *po = pkt_sk(sk);
3676 struct packet_mclist *ml;
3677
3678 if (!po->mclist)
3679 return;
3680
3681 rtnl_lock();
3682 while ((ml = po->mclist) != NULL) {
3683 struct net_device *dev;
3684
3685 po->mclist = ml->next;
3686 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3687 if (dev != NULL)
3688 packet_dev_mc(dev, ml, -1);
3689 kfree(ml);
3690 }
3691 rtnl_unlock();
3692}
3693
3694static int
3695packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
3696 unsigned int optlen)
3697{
3698 struct sock *sk = sock->sk;
3699 struct packet_sock *po = pkt_sk(sk);
3700 int ret;
3701
3702 if (level != SOL_PACKET)
3703 return -ENOPROTOOPT;
3704
3705 switch (optname) {
3706 case PACKET_ADD_MEMBERSHIP:
3707 case PACKET_DROP_MEMBERSHIP:
3708 {
3709 struct packet_mreq_max mreq;
3710 int len = optlen;
3711 memset(&mreq, 0, sizeof(mreq));
3712 if (len < sizeof(struct packet_mreq))
3713 return -EINVAL;
3714 if (len > sizeof(mreq))
3715 len = sizeof(mreq);
3716 if (copy_from_sockptr(&mreq, optval, len))
3717 return -EFAULT;
3718 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3719 return -EINVAL;
3720 if (optname == PACKET_ADD_MEMBERSHIP)
3721 ret = packet_mc_add(sk, &mreq);
3722 else
3723 ret = packet_mc_drop(sk, &mreq);
3724 return ret;
3725 }
3726
3727 case PACKET_RX_RING:
3728 case PACKET_TX_RING:
3729 {
3730 union tpacket_req_u req_u;
3731 int len;
3732
3733 lock_sock(sk);
3734 switch (po->tp_version) {
3735 case TPACKET_V1:
3736 case TPACKET_V2:
3737 len = sizeof(req_u.req);
3738 break;
3739 case TPACKET_V3:
3740 default:
3741 len = sizeof(req_u.req3);
3742 break;
3743 }
3744 if (optlen < len) {
3745 ret = -EINVAL;
3746 } else {
3747 if (copy_from_sockptr(&req_u.req, optval, len))
3748 ret = -EFAULT;
3749 else
3750 ret = packet_set_ring(sk, &req_u, 0,
3751 optname == PACKET_TX_RING);
3752 }
3753 release_sock(sk);
3754 return ret;
3755 }
3756 case PACKET_COPY_THRESH:
3757 {
3758 int val;
3759
3760 if (optlen != sizeof(val))
3761 return -EINVAL;
3762 if (copy_from_sockptr(&val, optval, sizeof(val)))
3763 return -EFAULT;
3764
3765 pkt_sk(sk)->copy_thresh = val;
3766 return 0;
3767 }
3768 case PACKET_VERSION:
3769 {
3770 int val;
3771
3772 if (optlen != sizeof(val))
3773 return -EINVAL;
3774 if (copy_from_sockptr(&val, optval, sizeof(val)))
3775 return -EFAULT;
3776 switch (val) {
3777 case TPACKET_V1:
3778 case TPACKET_V2:
3779 case TPACKET_V3:
3780 break;
3781 default:
3782 return -EINVAL;
3783 }
3784 lock_sock(sk);
3785 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3786 ret = -EBUSY;
3787 } else {
3788 po->tp_version = val;
3789 ret = 0;
3790 }
3791 release_sock(sk);
3792 return ret;
3793 }
3794 case PACKET_RESERVE:
3795 {
3796 unsigned int val;
3797
3798 if (optlen != sizeof(val))
3799 return -EINVAL;
3800 if (copy_from_sockptr(&val, optval, sizeof(val)))
3801 return -EFAULT;
3802 if (val > INT_MAX)
3803 return -EINVAL;
3804 lock_sock(sk);
3805 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3806 ret = -EBUSY;
3807 } else {
3808 po->tp_reserve = val;
3809 ret = 0;
3810 }
3811 release_sock(sk);
3812 return ret;
3813 }
3814 case PACKET_LOSS:
3815 {
3816 unsigned int val;
3817
3818 if (optlen != sizeof(val))
3819 return -EINVAL;
3820 if (copy_from_sockptr(&val, optval, sizeof(val)))
3821 return -EFAULT;
3822
3823 lock_sock(sk);
3824 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3825 ret = -EBUSY;
3826 } else {
3827 po->tp_loss = !!val;
3828 ret = 0;
3829 }
3830 release_sock(sk);
3831 return ret;
3832 }
3833 case PACKET_AUXDATA:
3834 {
3835 int val;
3836
3837 if (optlen < sizeof(val))
3838 return -EINVAL;
3839 if (copy_from_sockptr(&val, optval, sizeof(val)))
3840 return -EFAULT;
3841
3842 lock_sock(sk);
3843 po->auxdata = !!val;
3844 release_sock(sk);
3845 return 0;
3846 }
3847 case PACKET_ORIGDEV:
3848 {
3849 int val;
3850
3851 if (optlen < sizeof(val))
3852 return -EINVAL;
3853 if (copy_from_sockptr(&val, optval, sizeof(val)))
3854 return -EFAULT;
3855
3856 lock_sock(sk);
3857 po->origdev = !!val;
3858 release_sock(sk);
3859 return 0;
3860 }
3861 case PACKET_VNET_HDR:
3862 {
3863 int val;
3864
3865 if (sock->type != SOCK_RAW)
3866 return -EINVAL;
3867 if (optlen < sizeof(val))
3868 return -EINVAL;
3869 if (copy_from_sockptr(&val, optval, sizeof(val)))
3870 return -EFAULT;
3871
3872 lock_sock(sk);
3873 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3874 ret = -EBUSY;
3875 } else {
3876 po->has_vnet_hdr = !!val;
3877 ret = 0;
3878 }
3879 release_sock(sk);
3880 return ret;
3881 }
3882 case PACKET_TIMESTAMP:
3883 {
3884 int val;
3885
3886 if (optlen != sizeof(val))
3887 return -EINVAL;
3888 if (copy_from_sockptr(&val, optval, sizeof(val)))
3889 return -EFAULT;
3890
3891 po->tp_tstamp = val;
3892 return 0;
3893 }
3894 case PACKET_FANOUT:
3895 {
3896 struct fanout_args args = { 0 };
3897
3898 if (optlen != sizeof(int) && optlen != sizeof(args))
3899 return -EINVAL;
3900 if (copy_from_sockptr(&args, optval, optlen))
3901 return -EFAULT;
3902
3903 return fanout_add(sk, &args);
3904 }
3905 case PACKET_FANOUT_DATA:
3906 {
3907 if (!po->fanout)
3908 return -EINVAL;
3909
3910 return fanout_set_data(po, optval, optlen);
3911 }
3912 case PACKET_IGNORE_OUTGOING:
3913 {
3914 int val;
3915
3916 if (optlen != sizeof(val))
3917 return -EINVAL;
3918 if (copy_from_sockptr(&val, optval, sizeof(val)))
3919 return -EFAULT;
3920 if (val < 0 || val > 1)
3921 return -EINVAL;
3922
3923 po->prot_hook.ignore_outgoing = !!val;
3924 return 0;
3925 }
3926 case PACKET_TX_HAS_OFF:
3927 {
3928 unsigned int val;
3929
3930 if (optlen != sizeof(val))
3931 return -EINVAL;
3932 if (copy_from_sockptr(&val, optval, sizeof(val)))
3933 return -EFAULT;
3934
3935 lock_sock(sk);
3936 if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
3937 po->tp_tx_has_off = !!val;
3938
3939 release_sock(sk);
3940 return 0;
3941 }
3942 case PACKET_QDISC_BYPASS:
3943 {
3944 int val;
3945
3946 if (optlen != sizeof(val))
3947 return -EINVAL;
3948 if (copy_from_sockptr(&val, optval, sizeof(val)))
3949 return -EFAULT;
3950
3951 po->xmit = val ? packet_direct_xmit : dev_queue_xmit;
3952 return 0;
3953 }
3954 default:
3955 return -ENOPROTOOPT;
3956 }
3957}
3958
3959static int packet_getsockopt(struct socket *sock, int level, int optname,
3960 char __user *optval, int __user *optlen)
3961{
3962 int len;
3963 int val, lv = sizeof(val);
3964 struct sock *sk = sock->sk;
3965 struct packet_sock *po = pkt_sk(sk);
3966 void *data = &val;
3967 union tpacket_stats_u st;
3968 struct tpacket_rollover_stats rstats;
3969 int drops;
3970
3971 if (level != SOL_PACKET)
3972 return -ENOPROTOOPT;
3973
3974 if (get_user(len, optlen))
3975 return -EFAULT;
3976
3977 if (len < 0)
3978 return -EINVAL;
3979
3980 switch (optname) {
3981 case PACKET_STATISTICS:
3982 spin_lock_bh(&sk->sk_receive_queue.lock);
3983 memcpy(&st, &po->stats, sizeof(st));
3984 memset(&po->stats, 0, sizeof(po->stats));
3985 spin_unlock_bh(&sk->sk_receive_queue.lock);
3986 drops = atomic_xchg(&po->tp_drops, 0);
3987
3988 if (po->tp_version == TPACKET_V3) {
3989 lv = sizeof(struct tpacket_stats_v3);
3990 st.stats3.tp_drops = drops;
3991 st.stats3.tp_packets += drops;
3992 data = &st.stats3;
3993 } else {
3994 lv = sizeof(struct tpacket_stats);
3995 st.stats1.tp_drops = drops;
3996 st.stats1.tp_packets += drops;
3997 data = &st.stats1;
3998 }
3999
4000 break;
4001 case PACKET_AUXDATA:
4002 val = po->auxdata;
4003 break;
4004 case PACKET_ORIGDEV:
4005 val = po->origdev;
4006 break;
4007 case PACKET_VNET_HDR:
4008 val = po->has_vnet_hdr;
4009 break;
4010 case PACKET_VERSION:
4011 val = po->tp_version;
4012 break;
4013 case PACKET_HDRLEN:
4014 if (len > sizeof(int))
4015 len = sizeof(int);
4016 if (len < sizeof(int))
4017 return -EINVAL;
4018 if (copy_from_user(&val, optval, len))
4019 return -EFAULT;
4020 switch (val) {
4021 case TPACKET_V1:
4022 val = sizeof(struct tpacket_hdr);
4023 break;
4024 case TPACKET_V2:
4025 val = sizeof(struct tpacket2_hdr);
4026 break;
4027 case TPACKET_V3:
4028 val = sizeof(struct tpacket3_hdr);
4029 break;
4030 default:
4031 return -EINVAL;
4032 }
4033 break;
4034 case PACKET_RESERVE:
4035 val = po->tp_reserve;
4036 break;
4037 case PACKET_LOSS:
4038 val = po->tp_loss;
4039 break;
4040 case PACKET_TIMESTAMP:
4041 val = po->tp_tstamp;
4042 break;
4043 case PACKET_FANOUT:
4044 val = (po->fanout ?
4045 ((u32)po->fanout->id |
4046 ((u32)po->fanout->type << 16) |
4047 ((u32)po->fanout->flags << 24)) :
4048 0);
4049 break;
4050 case PACKET_IGNORE_OUTGOING:
4051 val = po->prot_hook.ignore_outgoing;
4052 break;
4053 case PACKET_ROLLOVER_STATS:
4054 if (!po->rollover)
4055 return -EINVAL;
4056 rstats.tp_all = atomic_long_read(&po->rollover->num);
4057 rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
4058 rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
4059 data = &rstats;
4060 lv = sizeof(rstats);
4061 break;
4062 case PACKET_TX_HAS_OFF:
4063 val = po->tp_tx_has_off;
4064 break;
4065 case PACKET_QDISC_BYPASS:
4066 val = packet_use_direct_xmit(po);
4067 break;
4068 default:
4069 return -ENOPROTOOPT;
4070 }
4071
4072 if (len > lv)
4073 len = lv;
4074 if (put_user(len, optlen))
4075 return -EFAULT;
4076 if (copy_to_user(optval, data, len))
4077 return -EFAULT;
4078 return 0;
4079}
4080
4081static int packet_notifier(struct notifier_block *this,
4082 unsigned long msg, void *ptr)
4083{
4084 struct sock *sk;
4085 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4086 struct net *net = dev_net(dev);
4087
4088 rcu_read_lock();
4089 sk_for_each_rcu(sk, &net->packet.sklist) {
4090 struct packet_sock *po = pkt_sk(sk);
4091
4092 switch (msg) {
4093 case NETDEV_UNREGISTER:
4094 if (po->mclist)
4095 packet_dev_mclist_delete(dev, &po->mclist);
4096 fallthrough;
4097
4098 case NETDEV_DOWN:
4099 if (dev->ifindex == po->ifindex) {
4100 spin_lock(&po->bind_lock);
4101 if (po->running) {
4102 __unregister_prot_hook(sk, false);
4103 sk->sk_err = ENETDOWN;
4104 if (!sock_flag(sk, SOCK_DEAD))
4105 sk_error_report(sk);
4106 }
4107 if (msg == NETDEV_UNREGISTER) {
4108 packet_cached_dev_reset(po);
4109 WRITE_ONCE(po->ifindex, -1);
4110 dev_put(po->prot_hook.dev);
4111 po->prot_hook.dev = NULL;
4112 }
4113 spin_unlock(&po->bind_lock);
4114 }
4115 break;
4116 case NETDEV_UP:
4117 if (dev->ifindex == po->ifindex) {
4118 spin_lock(&po->bind_lock);
4119 if (po->num)
4120 register_prot_hook(sk);
4121 spin_unlock(&po->bind_lock);
4122 }
4123 break;
4124 }
4125 }
4126 rcu_read_unlock();
4127 return NOTIFY_DONE;
4128}
4129
4130
4131static int packet_ioctl(struct socket *sock, unsigned int cmd,
4132 unsigned long arg)
4133{
4134 struct sock *sk = sock->sk;
4135
4136 switch (cmd) {
4137 case SIOCOUTQ:
4138 {
4139 int amount = sk_wmem_alloc_get(sk);
4140
4141 return put_user(amount, (int __user *)arg);
4142 }
4143 case SIOCINQ:
4144 {
4145 struct sk_buff *skb;
4146 int amount = 0;
4147
4148 spin_lock_bh(&sk->sk_receive_queue.lock);
4149 skb = skb_peek(&sk->sk_receive_queue);
4150 if (skb)
4151 amount = skb->len;
4152 spin_unlock_bh(&sk->sk_receive_queue.lock);
4153 return put_user(amount, (int __user *)arg);
4154 }
4155#ifdef CONFIG_INET
4156 case SIOCADDRT:
4157 case SIOCDELRT:
4158 case SIOCDARP:
4159 case SIOCGARP:
4160 case SIOCSARP:
4161 case SIOCGIFADDR:
4162 case SIOCSIFADDR:
4163 case SIOCGIFBRDADDR:
4164 case SIOCSIFBRDADDR:
4165 case SIOCGIFNETMASK:
4166 case SIOCSIFNETMASK:
4167 case SIOCGIFDSTADDR:
4168 case SIOCSIFDSTADDR:
4169 case SIOCSIFFLAGS:
4170 return inet_dgram_ops.ioctl(sock, cmd, arg);
4171#endif
4172
4173 default:
4174 return -ENOIOCTLCMD;
4175 }
4176 return 0;
4177}
4178
4179static __poll_t packet_poll(struct file *file, struct socket *sock,
4180 poll_table *wait)
4181{
4182 struct sock *sk = sock->sk;
4183 struct packet_sock *po = pkt_sk(sk);
4184 __poll_t mask = datagram_poll(file, sock, wait);
4185
4186 spin_lock_bh(&sk->sk_receive_queue.lock);
4187 if (po->rx_ring.pg_vec) {
4188 if (!packet_previous_rx_frame(po, &po->rx_ring,
4189 TP_STATUS_KERNEL))
4190 mask |= EPOLLIN | EPOLLRDNORM;
4191 }
4192 packet_rcv_try_clear_pressure(po);
4193 spin_unlock_bh(&sk->sk_receive_queue.lock);
4194 spin_lock_bh(&sk->sk_write_queue.lock);
4195 if (po->tx_ring.pg_vec) {
4196 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
4197 mask |= EPOLLOUT | EPOLLWRNORM;
4198 }
4199 spin_unlock_bh(&sk->sk_write_queue.lock);
4200 return mask;
4201}
4202
4203
4204
4205
4206
4207
4208static void packet_mm_open(struct vm_area_struct *vma)
4209{
4210 struct file *file = vma->vm_file;
4211 struct socket *sock = file->private_data;
4212 struct sock *sk = sock->sk;
4213
4214 if (sk)
4215 atomic_inc(&pkt_sk(sk)->mapped);
4216}
4217
4218static void packet_mm_close(struct vm_area_struct *vma)
4219{
4220 struct file *file = vma->vm_file;
4221 struct socket *sock = file->private_data;
4222 struct sock *sk = sock->sk;
4223
4224 if (sk)
4225 atomic_dec(&pkt_sk(sk)->mapped);
4226}
4227
4228static const struct vm_operations_struct packet_mmap_ops = {
4229 .open = packet_mm_open,
4230 .close = packet_mm_close,
4231};
4232
4233static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
4234 unsigned int len)
4235{
4236 int i;
4237
4238 for (i = 0; i < len; i++) {
4239 if (likely(pg_vec[i].buffer)) {
4240 if (is_vmalloc_addr(pg_vec[i].buffer))
4241 vfree(pg_vec[i].buffer);
4242 else
4243 free_pages((unsigned long)pg_vec[i].buffer,
4244 order);
4245 pg_vec[i].buffer = NULL;
4246 }
4247 }
4248 kfree(pg_vec);
4249}
4250
4251static char *alloc_one_pg_vec_page(unsigned long order)
4252{
4253 char *buffer;
4254 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
4255 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
4256
4257 buffer = (char *) __get_free_pages(gfp_flags, order);
4258 if (buffer)
4259 return buffer;
4260
4261
4262 buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
4263 if (buffer)
4264 return buffer;
4265
4266
4267 gfp_flags &= ~__GFP_NORETRY;
4268 buffer = (char *) __get_free_pages(gfp_flags, order);
4269 if (buffer)
4270 return buffer;
4271
4272
4273 return NULL;
4274}
4275
4276static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
4277{
4278 unsigned int block_nr = req->tp_block_nr;
4279 struct pgv *pg_vec;
4280 int i;
4281
4282 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
4283 if (unlikely(!pg_vec))
4284 goto out;
4285
4286 for (i = 0; i < block_nr; i++) {
4287 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
4288 if (unlikely(!pg_vec[i].buffer))
4289 goto out_free_pgvec;
4290 }
4291
4292out:
4293 return pg_vec;
4294
4295out_free_pgvec:
4296 free_pg_vec(pg_vec, order, block_nr);
4297 pg_vec = NULL;
4298 goto out;
4299}
4300
4301static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
4302 int closing, int tx_ring)
4303{
4304 struct pgv *pg_vec = NULL;
4305 struct packet_sock *po = pkt_sk(sk);
4306 unsigned long *rx_owner_map = NULL;
4307 int was_running, order = 0;
4308 struct packet_ring_buffer *rb;
4309 struct sk_buff_head *rb_queue;
4310 __be16 num;
4311 int err;
4312
4313 struct tpacket_req *req = &req_u->req;
4314
4315 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
4316 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
4317
4318 err = -EBUSY;
4319 if (!closing) {
4320 if (atomic_read(&po->mapped))
4321 goto out;
4322 if (packet_read_pending(rb))
4323 goto out;
4324 }
4325
4326 if (req->tp_block_nr) {
4327 unsigned int min_frame_size;
4328
4329
4330 err = -EBUSY;
4331 if (unlikely(rb->pg_vec))
4332 goto out;
4333
4334 switch (po->tp_version) {
4335 case TPACKET_V1:
4336 po->tp_hdrlen = TPACKET_HDRLEN;
4337 break;
4338 case TPACKET_V2:
4339 po->tp_hdrlen = TPACKET2_HDRLEN;
4340 break;
4341 case TPACKET_V3:
4342 po->tp_hdrlen = TPACKET3_HDRLEN;
4343 break;
4344 }
4345
4346 err = -EINVAL;
4347 if (unlikely((int)req->tp_block_size <= 0))
4348 goto out;
4349 if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
4350 goto out;
4351 min_frame_size = po->tp_hdrlen + po->tp_reserve;
4352 if (po->tp_version >= TPACKET_V3 &&
4353 req->tp_block_size <
4354 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
4355 goto out;
4356 if (unlikely(req->tp_frame_size < min_frame_size))
4357 goto out;
4358 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
4359 goto out;
4360
4361 rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
4362 if (unlikely(rb->frames_per_block == 0))
4363 goto out;
4364 if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
4365 goto out;
4366 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
4367 req->tp_frame_nr))
4368 goto out;
4369
4370 err = -ENOMEM;
4371 order = get_order(req->tp_block_size);
4372 pg_vec = alloc_pg_vec(req, order);
4373 if (unlikely(!pg_vec))
4374 goto out;
4375 switch (po->tp_version) {
4376 case TPACKET_V3:
4377
4378 if (!tx_ring) {
4379 init_prb_bdqc(po, rb, pg_vec, req_u);
4380 } else {
4381 struct tpacket_req3 *req3 = &req_u->req3;
4382
4383 if (req3->tp_retire_blk_tov ||
4384 req3->tp_sizeof_priv ||
4385 req3->tp_feature_req_word) {
4386 err = -EINVAL;
4387 goto out_free_pg_vec;
4388 }
4389 }
4390 break;
4391 default:
4392 if (!tx_ring) {
4393 rx_owner_map = bitmap_alloc(req->tp_frame_nr,
4394 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
4395 if (!rx_owner_map)
4396 goto out_free_pg_vec;
4397 }
4398 break;
4399 }
4400 }
4401
4402 else {
4403 err = -EINVAL;
4404 if (unlikely(req->tp_frame_nr))
4405 goto out;
4406 }
4407
4408
4409
4410 spin_lock(&po->bind_lock);
4411 was_running = po->running;
4412 num = po->num;
4413 if (was_running) {
4414 WRITE_ONCE(po->num, 0);
4415 __unregister_prot_hook(sk, false);
4416 }
4417 spin_unlock(&po->bind_lock);
4418
4419 synchronize_net();
4420
4421 err = -EBUSY;
4422 mutex_lock(&po->pg_vec_lock);
4423 if (closing || atomic_read(&po->mapped) == 0) {
4424 err = 0;
4425 spin_lock_bh(&rb_queue->lock);
4426 swap(rb->pg_vec, pg_vec);
4427 if (po->tp_version <= TPACKET_V2)
4428 swap(rb->rx_owner_map, rx_owner_map);
4429 rb->frame_max = (req->tp_frame_nr - 1);
4430 rb->head = 0;
4431 rb->frame_size = req->tp_frame_size;
4432 spin_unlock_bh(&rb_queue->lock);
4433
4434 swap(rb->pg_vec_order, order);
4435 swap(rb->pg_vec_len, req->tp_block_nr);
4436
4437 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
4438 po->prot_hook.func = (po->rx_ring.pg_vec) ?
4439 tpacket_rcv : packet_rcv;
4440 skb_queue_purge(rb_queue);
4441 if (atomic_read(&po->mapped))
4442 pr_err("packet_mmap: vma is busy: %d\n",
4443 atomic_read(&po->mapped));
4444 }
4445 mutex_unlock(&po->pg_vec_lock);
4446
4447 spin_lock(&po->bind_lock);
4448 if (was_running) {
4449 WRITE_ONCE(po->num, num);
4450 register_prot_hook(sk);
4451 }
4452 spin_unlock(&po->bind_lock);
4453 if (pg_vec && (po->tp_version > TPACKET_V2)) {
4454
4455 if (!tx_ring)
4456 prb_shutdown_retire_blk_timer(po, rb_queue);
4457 }
4458
4459out_free_pg_vec:
4460 bitmap_free(rx_owner_map);
4461 if (pg_vec)
4462 free_pg_vec(pg_vec, order, req->tp_block_nr);
4463out:
4464 return err;
4465}
4466
4467static int packet_mmap(struct file *file, struct socket *sock,
4468 struct vm_area_struct *vma)
4469{
4470 struct sock *sk = sock->sk;
4471 struct packet_sock *po = pkt_sk(sk);
4472 unsigned long size, expected_size;
4473 struct packet_ring_buffer *rb;
4474 unsigned long start;
4475 int err = -EINVAL;
4476 int i;
4477
4478 if (vma->vm_pgoff)
4479 return -EINVAL;
4480
4481 mutex_lock(&po->pg_vec_lock);
4482
4483 expected_size = 0;
4484 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4485 if (rb->pg_vec) {
4486 expected_size += rb->pg_vec_len
4487 * rb->pg_vec_pages
4488 * PAGE_SIZE;
4489 }
4490 }
4491
4492 if (expected_size == 0)
4493 goto out;
4494
4495 size = vma->vm_end - vma->vm_start;
4496 if (size != expected_size)
4497 goto out;
4498
4499 start = vma->vm_start;
4500 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
4501 if (rb->pg_vec == NULL)
4502 continue;
4503
4504 for (i = 0; i < rb->pg_vec_len; i++) {
4505 struct page *page;
4506 void *kaddr = rb->pg_vec[i].buffer;
4507 int pg_num;
4508
4509 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
4510 page = pgv_to_page(kaddr);
4511 err = vm_insert_page(vma, start, page);
4512 if (unlikely(err))
4513 goto out;
4514 start += PAGE_SIZE;
4515 kaddr += PAGE_SIZE;
4516 }
4517 }
4518 }
4519
4520 atomic_inc(&po->mapped);
4521 vma->vm_ops = &packet_mmap_ops;
4522 err = 0;
4523
4524out:
4525 mutex_unlock(&po->pg_vec_lock);
4526 return err;
4527}
4528
4529static const struct proto_ops packet_ops_spkt = {
4530 .family = PF_PACKET,
4531 .owner = THIS_MODULE,
4532 .release = packet_release,
4533 .bind = packet_bind_spkt,
4534 .connect = sock_no_connect,
4535 .socketpair = sock_no_socketpair,
4536 .accept = sock_no_accept,
4537 .getname = packet_getname_spkt,
4538 .poll = datagram_poll,
4539 .ioctl = packet_ioctl,
4540 .gettstamp = sock_gettstamp,
4541 .listen = sock_no_listen,
4542 .shutdown = sock_no_shutdown,
4543 .sendmsg = packet_sendmsg_spkt,
4544 .recvmsg = packet_recvmsg,
4545 .mmap = sock_no_mmap,
4546 .sendpage = sock_no_sendpage,
4547};
4548
4549static const struct proto_ops packet_ops = {
4550 .family = PF_PACKET,
4551 .owner = THIS_MODULE,
4552 .release = packet_release,
4553 .bind = packet_bind,
4554 .connect = sock_no_connect,
4555 .socketpair = sock_no_socketpair,
4556 .accept = sock_no_accept,
4557 .getname = packet_getname,
4558 .poll = packet_poll,
4559 .ioctl = packet_ioctl,
4560 .gettstamp = sock_gettstamp,
4561 .listen = sock_no_listen,
4562 .shutdown = sock_no_shutdown,
4563 .setsockopt = packet_setsockopt,
4564 .getsockopt = packet_getsockopt,
4565 .sendmsg = packet_sendmsg,
4566 .recvmsg = packet_recvmsg,
4567 .mmap = packet_mmap,
4568 .sendpage = sock_no_sendpage,
4569};
4570
4571static const struct net_proto_family packet_family_ops = {
4572 .family = PF_PACKET,
4573 .create = packet_create,
4574 .owner = THIS_MODULE,
4575};
4576
4577static struct notifier_block packet_netdev_notifier = {
4578 .notifier_call = packet_notifier,
4579};
4580
4581#ifdef CONFIG_PROC_FS
4582
4583static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
4584 __acquires(RCU)
4585{
4586 struct net *net = seq_file_net(seq);
4587
4588 rcu_read_lock();
4589 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
4590}
4591
4592static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4593{
4594 struct net *net = seq_file_net(seq);
4595 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4596}
4597
4598static void packet_seq_stop(struct seq_file *seq, void *v)
4599 __releases(RCU)
4600{
4601 rcu_read_unlock();
4602}
4603
4604static int packet_seq_show(struct seq_file *seq, void *v)
4605{
4606 if (v == SEQ_START_TOKEN)
4607 seq_printf(seq,
4608 "%*sRefCnt Type Proto Iface R Rmem User Inode\n",
4609 IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
4610 else {
4611 struct sock *s = sk_entry(v);
4612 const struct packet_sock *po = pkt_sk(s);
4613
4614 seq_printf(seq,
4615 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4616 s,
4617 refcount_read(&s->sk_refcnt),
4618 s->sk_type,
4619 ntohs(READ_ONCE(po->num)),
4620 READ_ONCE(po->ifindex),
4621 po->running,
4622 atomic_read(&s->sk_rmem_alloc),
4623 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4624 sock_i_ino(s));
4625 }
4626
4627 return 0;
4628}
4629
4630static const struct seq_operations packet_seq_ops = {
4631 .start = packet_seq_start,
4632 .next = packet_seq_next,
4633 .stop = packet_seq_stop,
4634 .show = packet_seq_show,
4635};
4636#endif
4637
4638static int __net_init packet_net_init(struct net *net)
4639{
4640 mutex_init(&net->packet.sklist_lock);
4641 INIT_HLIST_HEAD(&net->packet.sklist);
4642
4643#ifdef CONFIG_PROC_FS
4644 if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
4645 sizeof(struct seq_net_private)))
4646 return -ENOMEM;
4647#endif
4648
4649 return 0;
4650}
4651
4652static void __net_exit packet_net_exit(struct net *net)
4653{
4654 remove_proc_entry("packet", net->proc_net);
4655 WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
4656}
4657
4658static struct pernet_operations packet_net_ops = {
4659 .init = packet_net_init,
4660 .exit = packet_net_exit,
4661};
4662
4663
4664static void __exit packet_exit(void)
4665{
4666 unregister_netdevice_notifier(&packet_netdev_notifier);
4667 unregister_pernet_subsys(&packet_net_ops);
4668 sock_unregister(PF_PACKET);
4669 proto_unregister(&packet_proto);
4670}
4671
4672static int __init packet_init(void)
4673{
4674 int rc;
4675
4676 rc = proto_register(&packet_proto, 0);
4677 if (rc)
4678 goto out;
4679 rc = sock_register(&packet_family_ops);
4680 if (rc)
4681 goto out_proto;
4682 rc = register_pernet_subsys(&packet_net_ops);
4683 if (rc)
4684 goto out_sock;
4685 rc = register_netdevice_notifier(&packet_netdev_notifier);
4686 if (rc)
4687 goto out_pernet;
4688
4689 return 0;
4690
4691out_pernet:
4692 unregister_pernet_subsys(&packet_net_ops);
4693out_sock:
4694 sock_unregister(PF_PACKET);
4695out_proto:
4696 proto_unregister(&packet_proto);
4697out:
4698 return rc;
4699}
4700
4701module_init(packet_init);
4702module_exit(packet_exit);
4703MODULE_LICENSE("GPL");
4704MODULE_ALIAS_NETPROTO(PF_PACKET);
4705