1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55#include <linux/types.h>
56#include <linux/mm.h>
57#include <linux/capability.h>
58#include <linux/fcntl.h>
59#include <linux/socket.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_packet.h>
64#include <linux/wireless.h>
65#include <linux/kernel.h>
66#include <linux/kmod.h>
67#include <linux/slab.h>
68#include <linux/vmalloc.h>
69#include <net/net_namespace.h>
70#include <net/ip.h>
71#include <net/protocol.h>
72#include <linux/skbuff.h>
73#include <net/sock.h>
74#include <linux/errno.h>
75#include <linux/timer.h>
76#include <asm/uaccess.h>
77#include <asm/ioctls.h>
78#include <asm/page.h>
79#include <asm/cacheflush.h>
80#include <asm/io.h>
81#include <linux/proc_fs.h>
82#include <linux/seq_file.h>
83#include <linux/poll.h>
84#include <linux/module.h>
85#include <linux/init.h>
86#include <linux/mutex.h>
87#include <linux/if_vlan.h>
88#include <linux/virtio_net.h>
89#include <linux/errqueue.h>
90#include <linux/net_tstamp.h>
91#include <linux/percpu.h>
92#ifdef CONFIG_INET
93#include <net/inet_common.h>
94#endif
95
96#include "internal.h"
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154struct packet_mreq_max {
155 int mr_ifindex;
156 unsigned short mr_type;
157 unsigned short mr_alen;
158 unsigned char mr_address[MAX_ADDR_LEN];
159};
160
161union tpacket_uhdr {
162 struct tpacket_hdr *h1;
163 struct tpacket2_hdr *h2;
164 struct tpacket3_hdr *h3;
165 void *raw;
166};
167
168static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
169 int closing, int tx_ring);
170
171#define V3_ALIGNMENT (8)
172
173#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
174
175#define BLK_PLUS_PRIV(sz_of_priv) \
176 (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
177
178#define PGV_FROM_VMALLOC 1
179
180#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status)
181#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts)
182#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt)
183#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len)
184#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num)
185#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
186#define BLOCK_PRIV(x) ((void *)((char *)(x) + BLOCK_O2PRIV(x)))
187
188struct packet_sock;
189static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
190static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
191 struct packet_type *pt, struct net_device *orig_dev);
192
193static void *packet_previous_frame(struct packet_sock *po,
194 struct packet_ring_buffer *rb,
195 int status);
196static void packet_increment_head(struct packet_ring_buffer *buff);
197static int prb_curr_blk_in_use(struct tpacket_kbdq_core *,
198 struct tpacket_block_desc *);
199static void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
200 struct packet_sock *);
201static void prb_retire_current_block(struct tpacket_kbdq_core *,
202 struct packet_sock *, unsigned int status);
203static int prb_queue_frozen(struct tpacket_kbdq_core *);
204static void prb_open_block(struct tpacket_kbdq_core *,
205 struct tpacket_block_desc *);
206static void prb_retire_rx_blk_timer_expired(unsigned long);
207static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
208static void prb_init_blk_timer(struct packet_sock *,
209 struct tpacket_kbdq_core *,
210 void (*func) (unsigned long));
211static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
212static void prb_clear_rxhash(struct tpacket_kbdq_core *,
213 struct tpacket3_hdr *);
214static void prb_fill_vlan_info(struct tpacket_kbdq_core *,
215 struct tpacket3_hdr *);
216static void packet_flush_mclist(struct sock *sk);
217
218struct packet_skb_cb {
219 unsigned int origlen;
220 union {
221 struct sockaddr_pkt pkt;
222 struct sockaddr_ll ll;
223 } sa;
224};
225
226#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
227
228#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
229#define GET_PBLOCK_DESC(x, bid) \
230 ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
231#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \
232 ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
233#define GET_NEXT_PRB_BLK_NUM(x) \
234 (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
235 ((x)->kactive_blk_num+1) : 0)
236
237static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
238static void __fanout_link(struct sock *sk, struct packet_sock *po);
239
240static struct net_device *packet_cached_dev_get(struct packet_sock *po)
241{
242 struct net_device *dev;
243
244 rcu_read_lock();
245 dev = rcu_dereference(po->cached_dev);
246 if (likely(dev))
247 dev_hold(dev);
248 rcu_read_unlock();
249
250 return dev;
251}
252
253static void packet_cached_dev_assign(struct packet_sock *po,
254 struct net_device *dev)
255{
256 rcu_assign_pointer(po->cached_dev, dev);
257}
258
259static void packet_cached_dev_reset(struct packet_sock *po)
260{
261 RCU_INIT_POINTER(po->cached_dev, NULL);
262}
263
264
265
266
267
268static void register_prot_hook(struct sock *sk)
269{
270 struct packet_sock *po = pkt_sk(sk);
271
272 if (!po->running) {
273 if (po->fanout)
274 __fanout_link(sk, po);
275 else
276 dev_add_pack(&po->prot_hook);
277
278 sock_hold(sk);
279 po->running = 1;
280 }
281}
282
283
284
285
286
287
288
289
290static void __unregister_prot_hook(struct sock *sk, bool sync)
291{
292 struct packet_sock *po = pkt_sk(sk);
293
294 po->running = 0;
295
296 if (po->fanout)
297 __fanout_unlink(sk, po);
298 else
299 __dev_remove_pack(&po->prot_hook);
300
301 __sock_put(sk);
302
303 if (sync) {
304 spin_unlock(&po->bind_lock);
305 synchronize_net();
306 spin_lock(&po->bind_lock);
307 }
308}
309
310static void unregister_prot_hook(struct sock *sk, bool sync)
311{
312 struct packet_sock *po = pkt_sk(sk);
313
314 if (po->running)
315 __unregister_prot_hook(sk, sync);
316}
317
318static inline __pure struct page *pgv_to_page(void *addr)
319{
320 if (is_vmalloc_addr(addr))
321 return vmalloc_to_page(addr);
322 return virt_to_page(addr);
323}
324
325static void __packet_set_status(struct packet_sock *po, void *frame, int status)
326{
327 union tpacket_uhdr h;
328
329 h.raw = frame;
330 switch (po->tp_version) {
331 case TPACKET_V1:
332 h.h1->tp_status = status;
333 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
334 break;
335 case TPACKET_V2:
336 h.h2->tp_status = status;
337 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
338 break;
339 case TPACKET_V3:
340 default:
341 WARN(1, "TPACKET version not supported.\n");
342 BUG();
343 }
344
345 smp_wmb();
346}
347
348static int __packet_get_status(struct packet_sock *po, void *frame)
349{
350 union tpacket_uhdr h;
351
352 smp_rmb();
353
354 h.raw = frame;
355 switch (po->tp_version) {
356 case TPACKET_V1:
357 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
358 return h.h1->tp_status;
359 case TPACKET_V2:
360 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
361 return h.h2->tp_status;
362 case TPACKET_V3:
363 default:
364 WARN(1, "TPACKET version not supported.\n");
365 BUG();
366 return 0;
367 }
368}
369
370static __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec *ts,
371 unsigned int flags)
372{
373 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
374
375 if (shhwtstamps) {
376 if ((flags & SOF_TIMESTAMPING_SYS_HARDWARE) &&
377 ktime_to_timespec_cond(shhwtstamps->syststamp, ts))
378 return TP_STATUS_TS_SYS_HARDWARE;
379 if ((flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
380 ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts))
381 return TP_STATUS_TS_RAW_HARDWARE;
382 }
383
384 if (ktime_to_timespec_cond(skb->tstamp, ts))
385 return TP_STATUS_TS_SOFTWARE;
386
387 return 0;
388}
389
390static __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
391 struct sk_buff *skb)
392{
393 union tpacket_uhdr h;
394 struct timespec ts;
395 __u32 ts_status;
396
397 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
398 return 0;
399
400 h.raw = frame;
401 switch (po->tp_version) {
402 case TPACKET_V1:
403 h.h1->tp_sec = ts.tv_sec;
404 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
405 break;
406 case TPACKET_V2:
407 h.h2->tp_sec = ts.tv_sec;
408 h.h2->tp_nsec = ts.tv_nsec;
409 break;
410 case TPACKET_V3:
411 default:
412 WARN(1, "TPACKET version not supported.\n");
413 BUG();
414 }
415
416
417 flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
418 smp_wmb();
419
420 return ts_status;
421}
422
423static void *packet_lookup_frame(struct packet_sock *po,
424 struct packet_ring_buffer *rb,
425 unsigned int position,
426 int status)
427{
428 unsigned int pg_vec_pos, frame_offset;
429 union tpacket_uhdr h;
430
431 pg_vec_pos = position / rb->frames_per_block;
432 frame_offset = position % rb->frames_per_block;
433
434 h.raw = rb->pg_vec[pg_vec_pos].buffer +
435 (frame_offset * rb->frame_size);
436
437 if (status != __packet_get_status(po, h.raw))
438 return NULL;
439
440 return h.raw;
441}
442
443static void *packet_current_frame(struct packet_sock *po,
444 struct packet_ring_buffer *rb,
445 int status)
446{
447 return packet_lookup_frame(po, rb, rb->head, status);
448}
449
450static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
451{
452 del_timer_sync(&pkc->retire_blk_timer);
453}
454
455static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
456 int tx_ring,
457 struct sk_buff_head *rb_queue)
458{
459 struct tpacket_kbdq_core *pkc;
460
461 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
462
463 spin_lock_bh(&rb_queue->lock);
464 pkc->delete_blk_timer = 1;
465 spin_unlock_bh(&rb_queue->lock);
466
467 prb_del_retire_blk_timer(pkc);
468}
469
470static void prb_init_blk_timer(struct packet_sock *po,
471 struct tpacket_kbdq_core *pkc,
472 void (*func) (unsigned long))
473{
474 init_timer(&pkc->retire_blk_timer);
475 pkc->retire_blk_timer.data = (long)po;
476 pkc->retire_blk_timer.function = func;
477 pkc->retire_blk_timer.expires = jiffies;
478}
479
480static void prb_setup_retire_blk_timer(struct packet_sock *po, int tx_ring)
481{
482 struct tpacket_kbdq_core *pkc;
483
484 if (tx_ring)
485 BUG();
486
487 pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
488 prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired);
489}
490
491static int prb_calc_retire_blk_tmo(struct packet_sock *po,
492 int blk_size_in_bytes)
493{
494 struct net_device *dev;
495 unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
496 struct ethtool_link_ksettings ecmd;
497 int err;
498
499 rtnl_lock();
500 dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
501 if (unlikely(!dev)) {
502 rtnl_unlock();
503 return DEFAULT_PRB_RETIRE_TOV;
504 }
505 err = __ethtool_get_link_ksettings(dev, &ecmd);
506 rtnl_unlock();
507 if (!err) {
508
509
510
511
512 if (ecmd.base.speed < SPEED_1000 ||
513 ecmd.base.speed == SPEED_UNKNOWN) {
514 return DEFAULT_PRB_RETIRE_TOV;
515 } else {
516 msec = 1;
517 div = ecmd.base.speed / 1000;
518 }
519 }
520
521 mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
522
523 if (div)
524 mbits /= div;
525
526 tmo = mbits * msec;
527
528 if (div)
529 return tmo+1;
530 return tmo;
531}
532
533static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
534 union tpacket_req_u *req_u)
535{
536 p1->feature_req_word = req_u->req3.tp_feature_req_word;
537}
538
539static void init_prb_bdqc(struct packet_sock *po,
540 struct packet_ring_buffer *rb,
541 struct pgv *pg_vec,
542 union tpacket_req_u *req_u, int tx_ring)
543{
544 struct tpacket_kbdq_core *p1 = &rb->prb_bdqc;
545 struct tpacket_block_desc *pbd;
546
547 memset(p1, 0x0, sizeof(*p1));
548
549 p1->knxt_seq_num = 1;
550 p1->pkbdq = pg_vec;
551 pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
552 p1->pkblk_start = pg_vec[0].buffer;
553 p1->kblk_size = req_u->req3.tp_block_size;
554 p1->knum_blocks = req_u->req3.tp_block_nr;
555 p1->hdrlen = po->tp_hdrlen;
556 p1->version = po->tp_version;
557 p1->last_kactive_blk_num = 0;
558 po->stats.stats3.tp_freeze_q_cnt = 0;
559 if (req_u->req3.tp_retire_blk_tov)
560 p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
561 else
562 p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
563 req_u->req3.tp_block_size);
564 p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
565 p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
566
567 p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
568 prb_init_ft_ops(p1, req_u);
569 prb_setup_retire_blk_timer(po, tx_ring);
570 prb_open_block(p1, pbd);
571}
572
573
574
575
576static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
577{
578 mod_timer(&pkc->retire_blk_timer,
579 jiffies + pkc->tov_in_jiffies);
580 pkc->last_kactive_blk_num = pkc->kactive_blk_num;
581}
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606static void prb_retire_rx_blk_timer_expired(unsigned long data)
607{
608 struct packet_sock *po = (struct packet_sock *)data;
609 struct tpacket_kbdq_core *pkc = &po->rx_ring.prb_bdqc;
610 unsigned int frozen;
611 struct tpacket_block_desc *pbd;
612
613 spin_lock(&po->sk.sk_receive_queue.lock);
614
615 frozen = prb_queue_frozen(pkc);
616 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
617
618 if (unlikely(pkc->delete_blk_timer))
619 goto out;
620
621
622
623
624
625
626
627
628
629
630 if (BLOCK_NUM_PKTS(pbd)) {
631 while (atomic_read(&pkc->blk_fill_in_prog)) {
632
633 cpu_relax();
634 }
635 }
636
637 if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
638 if (!frozen) {
639 if (!BLOCK_NUM_PKTS(pbd)) {
640
641 goto refresh_timer;
642 }
643 prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
644 if (!prb_dispatch_next_block(pkc, po))
645 goto refresh_timer;
646 else
647 goto out;
648 } else {
649
650
651
652 if (prb_curr_blk_in_use(pkc, pbd)) {
653
654
655
656
657 goto refresh_timer;
658 } else {
659
660
661
662
663
664
665
666 prb_open_block(pkc, pbd);
667 goto out;
668 }
669 }
670 }
671
672refresh_timer:
673 _prb_refresh_rx_retire_blk_timer(pkc);
674
675out:
676 spin_unlock(&po->sk.sk_receive_queue.lock);
677}
678
679static void prb_flush_block(struct tpacket_kbdq_core *pkc1,
680 struct tpacket_block_desc *pbd1, __u32 status)
681{
682
683
684#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
685 u8 *start, *end;
686
687 start = (u8 *)pbd1;
688
689
690 start += PAGE_SIZE;
691
692 end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
693 for (; start < end; start += PAGE_SIZE)
694 flush_dcache_page(pgv_to_page(start));
695
696 smp_wmb();
697#endif
698
699
700
701 BLOCK_STATUS(pbd1) = status;
702
703
704
705#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
706 start = (u8 *)pbd1;
707 flush_dcache_page(pgv_to_page(start));
708
709 smp_wmb();
710#endif
711}
712
713
714
715
716
717
718
719
720
721
722static void prb_close_block(struct tpacket_kbdq_core *pkc1,
723 struct tpacket_block_desc *pbd1,
724 struct packet_sock *po, unsigned int stat)
725{
726 __u32 status = TP_STATUS_USER | stat;
727
728 struct tpacket3_hdr *last_pkt;
729 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
730 struct sock *sk = &po->sk;
731
732 if (po->stats.stats3.tp_drops)
733 status |= TP_STATUS_LOSING;
734
735 last_pkt = (struct tpacket3_hdr *)pkc1->prev;
736 last_pkt->tp_next_offset = 0;
737
738
739 if (BLOCK_NUM_PKTS(pbd1)) {
740 h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
741 h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec;
742 } else {
743
744
745
746
747
748 struct timespec ts;
749 getnstimeofday(&ts);
750 h1->ts_last_pkt.ts_sec = ts.tv_sec;
751 h1->ts_last_pkt.ts_nsec = ts.tv_nsec;
752 }
753
754 smp_wmb();
755
756
757 prb_flush_block(pkc1, pbd1, status);
758
759 sk->sk_data_ready(sk, 0);
760
761 pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
762}
763
764static void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
765{
766 pkc->reset_pending_on_curr_blk = 0;
767}
768
769
770
771
772
773
774
775
776static void prb_open_block(struct tpacket_kbdq_core *pkc1,
777 struct tpacket_block_desc *pbd1)
778{
779 struct timespec ts;
780 struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
781
782 smp_rmb();
783
784
785
786
787
788 BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
789 BLOCK_NUM_PKTS(pbd1) = 0;
790 BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
791
792 getnstimeofday(&ts);
793
794 h1->ts_first_pkt.ts_sec = ts.tv_sec;
795 h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
796
797 pkc1->pkblk_start = (char *)pbd1;
798 pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
799
800 BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
801 BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
802
803 pbd1->version = pkc1->version;
804 pkc1->prev = pkc1->nxt_offset;
805 pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
806
807 prb_thaw_queue(pkc1);
808 _prb_refresh_rx_retire_blk_timer(pkc1);
809
810 smp_wmb();
811}
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836static void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
837 struct packet_sock *po)
838{
839 pkc->reset_pending_on_curr_blk = 1;
840 po->stats.stats3.tp_freeze_q_cnt++;
841}
842
843#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
844
845
846
847
848
849
850
851static void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
852 struct packet_sock *po)
853{
854 struct tpacket_block_desc *pbd;
855
856 smp_rmb();
857
858
859 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
860
861
862 if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
863 prb_freeze_queue(pkc, po);
864 return NULL;
865 }
866
867
868
869
870
871
872 prb_open_block(pkc, pbd);
873 return (void *)pkc->nxt_offset;
874}
875
876static void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
877 struct packet_sock *po, unsigned int status)
878{
879 struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
880
881
882 if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
883
884
885
886
887
888
889
890
891
892 if (!(status & TP_STATUS_BLK_TMO)) {
893 while (atomic_read(&pkc->blk_fill_in_prog)) {
894
895 cpu_relax();
896 }
897 }
898 prb_close_block(pkc, pbd, po, status);
899 return;
900 }
901}
902
903static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
904 struct tpacket_block_desc *pbd)
905{
906 return TP_STATUS_USER & BLOCK_STATUS(pbd);
907}
908
909static int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
910{
911 return pkc->reset_pending_on_curr_blk;
912}
913
914static void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
915{
916 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
917 atomic_dec(&pkc->blk_fill_in_prog);
918}
919
920static void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
921 struct tpacket3_hdr *ppd)
922{
923 ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
924}
925
926static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
927 struct tpacket3_hdr *ppd)
928{
929 ppd->hv1.tp_rxhash = 0;
930}
931
932static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
933 struct tpacket3_hdr *ppd)
934{
935 if (skb_vlan_tag_present(pkc->skb)) {
936 ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
937 ppd->tp_status = TP_STATUS_VLAN_VALID;
938 } else {
939 ppd->hv1.tp_vlan_tci = 0;
940 ppd->tp_status = TP_STATUS_AVAILABLE;
941 }
942}
943
944static void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
945 struct tpacket3_hdr *ppd)
946{
947 prb_fill_vlan_info(pkc, ppd);
948
949 if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
950 prb_fill_rxhash(pkc, ppd);
951 else
952 prb_clear_rxhash(pkc, ppd);
953}
954
955static void prb_fill_curr_block(char *curr,
956 struct tpacket_kbdq_core *pkc,
957 struct tpacket_block_desc *pbd,
958 unsigned int len)
959{
960 struct tpacket3_hdr *ppd;
961
962 ppd = (struct tpacket3_hdr *)curr;
963 ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
964 pkc->prev = curr;
965 pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
966 BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
967 BLOCK_NUM_PKTS(pbd) += 1;
968 atomic_inc(&pkc->blk_fill_in_prog);
969 prb_run_all_ft_ops(pkc, ppd);
970}
971
972
973static void *__packet_lookup_frame_in_block(struct packet_sock *po,
974 struct sk_buff *skb,
975 int status,
976 unsigned int len
977 )
978{
979 struct tpacket_kbdq_core *pkc;
980 struct tpacket_block_desc *pbd;
981 char *curr, *end;
982
983 pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
984 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
985
986
987 if (prb_queue_frozen(pkc)) {
988
989
990
991
992 if (prb_curr_blk_in_use(pkc, pbd)) {
993
994 return NULL;
995 } else {
996
997
998
999
1000
1001
1002 prb_open_block(pkc, pbd);
1003 }
1004 }
1005
1006 smp_mb();
1007 curr = pkc->nxt_offset;
1008 pkc->skb = skb;
1009 end = (char *)pbd + pkc->kblk_size;
1010
1011
1012 if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
1013 prb_fill_curr_block(curr, pkc, pbd, len);
1014 return (void *)curr;
1015 }
1016
1017
1018 prb_retire_current_block(pkc, po, 0);
1019
1020
1021 curr = (char *)prb_dispatch_next_block(pkc, po);
1022 if (curr) {
1023 pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
1024 prb_fill_curr_block(curr, pkc, pbd, len);
1025 return (void *)curr;
1026 }
1027
1028
1029
1030
1031
1032 return NULL;
1033}
1034
1035static void *packet_current_rx_frame(struct packet_sock *po,
1036 struct sk_buff *skb,
1037 int status, unsigned int len)
1038{
1039 char *curr = NULL;
1040 switch (po->tp_version) {
1041 case TPACKET_V1:
1042 case TPACKET_V2:
1043 curr = packet_lookup_frame(po, &po->rx_ring,
1044 po->rx_ring.head, status);
1045 return curr;
1046 case TPACKET_V3:
1047 return __packet_lookup_frame_in_block(po, skb, status, len);
1048 default:
1049 WARN(1, "TPACKET version not supported\n");
1050 BUG();
1051 return NULL;
1052 }
1053}
1054
1055static void *prb_lookup_block(struct packet_sock *po,
1056 struct packet_ring_buffer *rb,
1057 unsigned int idx,
1058 int status)
1059{
1060 struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb);
1061 struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
1062
1063 if (status != BLOCK_STATUS(pbd))
1064 return NULL;
1065 return pbd;
1066}
1067
1068static int prb_previous_blk_num(struct packet_ring_buffer *rb)
1069{
1070 unsigned int prev;
1071 if (rb->prb_bdqc.kactive_blk_num)
1072 prev = rb->prb_bdqc.kactive_blk_num-1;
1073 else
1074 prev = rb->prb_bdqc.knum_blocks-1;
1075 return prev;
1076}
1077
1078
1079static void *__prb_previous_block(struct packet_sock *po,
1080 struct packet_ring_buffer *rb,
1081 int status)
1082{
1083 unsigned int previous = prb_previous_blk_num(rb);
1084 return prb_lookup_block(po, rb, previous, status);
1085}
1086
1087static void *packet_previous_rx_frame(struct packet_sock *po,
1088 struct packet_ring_buffer *rb,
1089 int status)
1090{
1091 if (po->tp_version <= TPACKET_V2)
1092 return packet_previous_frame(po, rb, status);
1093
1094 return __prb_previous_block(po, rb, status);
1095}
1096
1097static void packet_increment_rx_head(struct packet_sock *po,
1098 struct packet_ring_buffer *rb)
1099{
1100 switch (po->tp_version) {
1101 case TPACKET_V1:
1102 case TPACKET_V2:
1103 return packet_increment_head(rb);
1104 case TPACKET_V3:
1105 default:
1106 WARN(1, "TPACKET version not supported.\n");
1107 BUG();
1108 return;
1109 }
1110}
1111
1112static void *packet_previous_frame(struct packet_sock *po,
1113 struct packet_ring_buffer *rb,
1114 int status)
1115{
1116 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
1117 return packet_lookup_frame(po, rb, previous, status);
1118}
1119
1120static void packet_increment_head(struct packet_ring_buffer *buff)
1121{
1122 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
1123}
1124
1125static void packet_inc_pending(struct packet_ring_buffer *rb)
1126{
1127 this_cpu_inc(*rb->pending_refcnt);
1128}
1129
1130static void packet_dec_pending(struct packet_ring_buffer *rb)
1131{
1132 this_cpu_dec(*rb->pending_refcnt);
1133}
1134
1135static unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
1136{
1137 unsigned int refcnt = 0;
1138 int cpu;
1139
1140
1141 if (rb->pending_refcnt == NULL)
1142 return 0;
1143
1144 for_each_possible_cpu(cpu)
1145 refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
1146
1147 return refcnt;
1148}
1149
1150static int packet_alloc_pending(struct packet_sock *po)
1151{
1152 po->rx_ring.pending_refcnt = NULL;
1153
1154 po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
1155 if (unlikely(po->tx_ring.pending_refcnt == NULL))
1156 return -ENOBUFS;
1157
1158 return 0;
1159}
1160
1161static void packet_free_pending(struct packet_sock *po)
1162{
1163 free_percpu(po->tx_ring.pending_refcnt);
1164}
1165
1166static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
1167{
1168 struct sock *sk = &po->sk;
1169 bool has_room;
1170
1171 if (po->prot_hook.func != tpacket_rcv)
1172 return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
1173 <= sk->sk_rcvbuf;
1174
1175 spin_lock(&sk->sk_receive_queue.lock);
1176 if (po->tp_version == TPACKET_V3)
1177 has_room = prb_lookup_block(po, &po->rx_ring,
1178 po->rx_ring.prb_bdqc.kactive_blk_num,
1179 TP_STATUS_KERNEL);
1180 else
1181 has_room = packet_lookup_frame(po, &po->rx_ring,
1182 po->rx_ring.head,
1183 TP_STATUS_KERNEL);
1184 spin_unlock(&sk->sk_receive_queue.lock);
1185
1186 return has_room;
1187}
1188
1189static void packet_sock_destruct(struct sock *sk)
1190{
1191 skb_queue_purge(&sk->sk_error_queue);
1192
1193 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
1194 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
1195
1196 if (!sock_flag(sk, SOCK_DEAD)) {
1197 pr_err("Attempt to release alive packet socket: %p\n", sk);
1198 return;
1199 }
1200
1201 sk_refcnt_debug_dec(sk);
1202}
1203
1204static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
1205{
1206 int x = atomic_read(&f->rr_cur) + 1;
1207
1208 if (x >= num)
1209 x = 0;
1210
1211 return x;
1212}
1213
1214static unsigned int fanout_demux_hash(struct packet_fanout *f,
1215 struct sk_buff *skb,
1216 unsigned int num)
1217{
1218 return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
1219}
1220
1221static unsigned int fanout_demux_lb(struct packet_fanout *f,
1222 struct sk_buff *skb,
1223 unsigned int num)
1224{
1225 int cur, old;
1226
1227 cur = atomic_read(&f->rr_cur);
1228 while ((old = atomic_cmpxchg(&f->rr_cur, cur,
1229 fanout_rr_next(f, num))) != cur)
1230 cur = old;
1231 return cur;
1232}
1233
1234static unsigned int fanout_demux_cpu(struct packet_fanout *f,
1235 struct sk_buff *skb,
1236 unsigned int num)
1237{
1238 return smp_processor_id() % num;
1239}
1240
1241static unsigned int fanout_demux_rnd(struct packet_fanout *f,
1242 struct sk_buff *skb,
1243 unsigned int num)
1244{
1245 return prandom_u32_max(num);
1246}
1247
1248static unsigned int fanout_demux_rollover(struct packet_fanout *f,
1249 struct sk_buff *skb,
1250 unsigned int idx, unsigned int skip,
1251 unsigned int num)
1252{
1253 unsigned int i, j;
1254
1255 i = j = min_t(int, f->next[idx], num - 1);
1256 do {
1257 if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
1258 if (i != j)
1259 f->next[idx] = i;
1260 return i;
1261 }
1262 if (++i == num)
1263 i = 0;
1264 } while (i != j);
1265
1266 return idx;
1267}
1268
1269static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
1270{
1271 return f->flags & (flag >> 8);
1272}
1273
1274static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
1275 struct packet_type *pt, struct net_device *orig_dev)
1276{
1277 struct packet_fanout *f = pt->af_packet_priv;
1278 unsigned int num = f->num_members;
1279 struct packet_sock *po;
1280 unsigned int idx;
1281
1282 if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
1283 !num) {
1284 kfree_skb(skb);
1285 return 0;
1286 }
1287
1288 switch (f->type) {
1289 case PACKET_FANOUT_HASH:
1290 default:
1291 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
1292 skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
1293 if (!skb)
1294 return 0;
1295 }
1296 idx = fanout_demux_hash(f, skb, num);
1297 break;
1298 case PACKET_FANOUT_LB:
1299 idx = fanout_demux_lb(f, skb, num);
1300 break;
1301 case PACKET_FANOUT_CPU:
1302 idx = fanout_demux_cpu(f, skb, num);
1303 break;
1304 case PACKET_FANOUT_RND:
1305 idx = fanout_demux_rnd(f, skb, num);
1306 break;
1307 case PACKET_FANOUT_ROLLOVER:
1308 idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
1309 break;
1310 }
1311
1312 po = pkt_sk(f->arr[idx]);
1313 if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
1314 unlikely(!packet_rcv_has_room(po, skb))) {
1315 idx = fanout_demux_rollover(f, skb, idx, idx, num);
1316 po = pkt_sk(f->arr[idx]);
1317 }
1318
1319 return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
1320}
1321
1322DEFINE_MUTEX(fanout_mutex);
1323EXPORT_SYMBOL_GPL(fanout_mutex);
1324static LIST_HEAD(fanout_list);
1325
1326static void __fanout_link(struct sock *sk, struct packet_sock *po)
1327{
1328 struct packet_fanout *f = po->fanout;
1329
1330 spin_lock(&f->lock);
1331 f->arr[f->num_members] = sk;
1332 smp_wmb();
1333 f->num_members++;
1334 spin_unlock(&f->lock);
1335}
1336
1337static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
1338{
1339 struct packet_fanout *f = po->fanout;
1340 int i;
1341
1342 spin_lock(&f->lock);
1343 for (i = 0; i < f->num_members; i++) {
1344 if (f->arr[i] == sk)
1345 break;
1346 }
1347 BUG_ON(i >= f->num_members);
1348 f->arr[i] = f->arr[f->num_members - 1];
1349 f->num_members--;
1350 spin_unlock(&f->lock);
1351}
1352
1353static bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
1354{
1355 if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
1356 return true;
1357
1358 return false;
1359}
1360
1361static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
1362{
1363 struct packet_sock *po = pkt_sk(sk);
1364 struct packet_fanout *f, *match;
1365 u8 type = type_flags & 0xff;
1366 u8 flags = type_flags >> 8;
1367 int err;
1368
1369 switch (type) {
1370 case PACKET_FANOUT_ROLLOVER:
1371 if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
1372 return -EINVAL;
1373 case PACKET_FANOUT_HASH:
1374 case PACKET_FANOUT_LB:
1375 case PACKET_FANOUT_CPU:
1376 case PACKET_FANOUT_RND:
1377 break;
1378 default:
1379 return -EINVAL;
1380 }
1381
1382 if (!po->running)
1383 return -EINVAL;
1384
1385 if (po->fanout)
1386 return -EALREADY;
1387
1388 mutex_lock(&fanout_mutex);
1389 match = NULL;
1390 list_for_each_entry(f, &fanout_list, list) {
1391 if (f->id == id &&
1392 read_pnet(&f->net) == sock_net(sk)) {
1393 match = f;
1394 break;
1395 }
1396 }
1397 err = -EINVAL;
1398 if (match && match->flags != flags)
1399 goto out;
1400 if (!match) {
1401 err = -ENOMEM;
1402 match = kzalloc(sizeof(*match), GFP_KERNEL);
1403 if (!match)
1404 goto out;
1405 write_pnet(&match->net, sock_net(sk));
1406 match->id = id;
1407 match->type = type;
1408 match->flags = flags;
1409 atomic_set(&match->rr_cur, 0);
1410 INIT_LIST_HEAD(&match->list);
1411 spin_lock_init(&match->lock);
1412 atomic_set(&match->sk_ref, 0);
1413 match->prot_hook.type = po->prot_hook.type;
1414 match->prot_hook.dev = po->prot_hook.dev;
1415 match->prot_hook.func = packet_rcv_fanout;
1416 match->prot_hook.af_packet_priv = match;
1417 match->prot_hook.id_match = match_fanout_group;
1418 dev_add_pack(&match->prot_hook);
1419 list_add(&match->list, &fanout_list);
1420 }
1421 err = -EINVAL;
1422 if (match->type == type &&
1423 match->prot_hook.type == po->prot_hook.type &&
1424 match->prot_hook.dev == po->prot_hook.dev) {
1425 err = -ENOSPC;
1426 if (atomic_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
1427 __dev_remove_pack(&po->prot_hook);
1428 po->fanout = match;
1429 atomic_inc(&match->sk_ref);
1430 __fanout_link(sk, po);
1431 err = 0;
1432 }
1433 }
1434out:
1435 mutex_unlock(&fanout_mutex);
1436 return err;
1437}
1438
1439static void fanout_release(struct sock *sk)
1440{
1441 struct packet_sock *po = pkt_sk(sk);
1442 struct packet_fanout *f;
1443
1444 f = po->fanout;
1445 if (!f)
1446 return;
1447
1448 mutex_lock(&fanout_mutex);
1449 po->fanout = NULL;
1450
1451 if (atomic_dec_and_test(&f->sk_ref)) {
1452 list_del(&f->list);
1453 dev_remove_pack(&f->prot_hook);
1454 kfree(f);
1455 }
1456 mutex_unlock(&fanout_mutex);
1457}
1458
1459static const struct proto_ops packet_ops;
1460
1461static const struct proto_ops packet_ops_spkt;
1462
1463static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
1464 struct packet_type *pt, struct net_device *orig_dev)
1465{
1466 struct sock *sk;
1467 struct sockaddr_pkt *spkt;
1468
1469
1470
1471
1472
1473
1474 sk = pt->af_packet_priv;
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487 if (skb->pkt_type == PACKET_LOOPBACK)
1488 goto out;
1489
1490 if (!net_eq(dev_net(dev), sock_net(sk)))
1491 goto out;
1492
1493 skb = skb_share_check(skb, GFP_ATOMIC);
1494 if (skb == NULL)
1495 goto oom;
1496
1497
1498 skb_dst_drop(skb);
1499
1500
1501 nf_reset(skb);
1502
1503 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
1504
1505 skb_push(skb, skb->data - skb_mac_header(skb));
1506
1507
1508
1509
1510
1511 spkt->spkt_family = dev->type;
1512 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
1513 spkt->spkt_protocol = skb->protocol;
1514
1515
1516
1517
1518
1519
1520 if (sock_queue_rcv_skb(sk, skb) == 0)
1521 return 0;
1522
1523out:
1524 kfree_skb(skb);
1525oom:
1526 return 0;
1527}
1528
1529
1530
1531
1532
1533
1534
1535static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
1536 struct msghdr *msg, size_t len)
1537{
1538 struct sock *sk = sock->sk;
1539 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
1540 struct sk_buff *skb = NULL;
1541 struct net_device *dev;
1542 __be16 proto = 0;
1543 int err;
1544 int extra_len = 0;
1545
1546
1547
1548
1549
1550 if (saddr) {
1551 if (msg->msg_namelen < sizeof(struct sockaddr))
1552 return -EINVAL;
1553 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
1554 proto = saddr->spkt_protocol;
1555 } else
1556 return -ENOTCONN;
1557
1558
1559
1560
1561
1562 saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
1563retry:
1564 rcu_read_lock();
1565 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
1566 err = -ENODEV;
1567 if (dev == NULL)
1568 goto out_unlock;
1569
1570 err = -ENETDOWN;
1571 if (!(dev->flags & IFF_UP))
1572 goto out_unlock;
1573
1574
1575
1576
1577
1578
1579 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
1580 if (!netif_supports_nofcs(dev)) {
1581 err = -EPROTONOSUPPORT;
1582 goto out_unlock;
1583 }
1584 extra_len = 4;
1585 }
1586
1587 err = -EMSGSIZE;
1588 if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
1589 goto out_unlock;
1590
1591 if (!skb) {
1592 size_t reserved = LL_RESERVED_SPACE(dev);
1593 int tlen = dev->needed_tailroom;
1594 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
1595
1596 rcu_read_unlock();
1597 skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
1598 if (skb == NULL)
1599 return -ENOBUFS;
1600
1601
1602
1603
1604 skb_reserve(skb, reserved);
1605 skb_reset_network_header(skb);
1606
1607
1608 if (hhlen) {
1609 skb->data -= hhlen;
1610 skb->tail -= hhlen;
1611 if (len < hhlen)
1612 skb_reset_network_header(skb);
1613 }
1614 err = memcpy_from_msg(skb_put(skb, len), msg, len);
1615 if (err)
1616 goto out_free;
1617 goto retry;
1618 }
1619
1620 if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
1621
1622
1623
1624
1625 struct ethhdr *ehdr;
1626 skb_reset_mac_header(skb);
1627 ehdr = eth_hdr(skb);
1628 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
1629 err = -EMSGSIZE;
1630 goto out_unlock;
1631 }
1632 }
1633
1634 skb->protocol = proto;
1635 skb->dev = dev;
1636 skb->priority = sk->sk_priority;
1637 skb->mark = sk->sk_mark;
1638
1639 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1640
1641 if (unlikely(extra_len == 4))
1642 skb->no_fcs = 1;
1643
1644 skb_probe_transport_header(skb, 0);
1645
1646 dev_queue_xmit(skb);
1647 rcu_read_unlock();
1648 return len;
1649
1650out_unlock:
1651 rcu_read_unlock();
1652out_free:
1653 kfree_skb(skb);
1654 return err;
1655}
1656
1657static unsigned int run_filter(const struct sk_buff *skb,
1658 const struct sock *sk,
1659 unsigned int res)
1660{
1661 struct sk_filter *filter;
1662
1663 rcu_read_lock();
1664 filter = rcu_dereference(sk->sk_filter);
1665 if (filter != NULL)
1666 res = SK_RUN_FILTER(filter, skb);
1667 rcu_read_unlock();
1668
1669 return res;
1670}
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
1685 struct packet_type *pt, struct net_device *orig_dev)
1686{
1687 struct sock *sk;
1688 struct sockaddr_ll *sll;
1689 struct packet_sock *po;
1690 u8 *skb_head = skb->data;
1691 int skb_len = skb->len;
1692 unsigned int snaplen, res;
1693
1694 if (skb->pkt_type == PACKET_LOOPBACK)
1695 goto drop;
1696
1697 sk = pt->af_packet_priv;
1698 po = pkt_sk(sk);
1699
1700 if (!net_eq(dev_net(dev), sock_net(sk)))
1701 goto drop;
1702
1703 skb->dev = dev;
1704
1705 if (dev->header_ops) {
1706
1707
1708
1709
1710
1711
1712
1713 if (sk->sk_type != SOCK_DGRAM)
1714 skb_push(skb, skb->data - skb_mac_header(skb));
1715 else if (skb->pkt_type == PACKET_OUTGOING) {
1716
1717 skb_pull(skb, skb_network_offset(skb));
1718 }
1719 }
1720
1721 snaplen = skb->len;
1722
1723 res = run_filter(skb, sk, snaplen);
1724 if (!res)
1725 goto drop_n_restore;
1726 if (snaplen > res)
1727 snaplen = res;
1728
1729 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
1730 goto drop_n_acct;
1731
1732 if (skb_shared(skb)) {
1733 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
1734 if (nskb == NULL)
1735 goto drop_n_acct;
1736
1737 if (skb_head != skb->data) {
1738 skb->data = skb_head;
1739 skb->len = skb_len;
1740 }
1741 consume_skb(skb);
1742 skb = nskb;
1743 }
1744
1745 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
1746 sizeof(skb->cb));
1747
1748 sll = &PACKET_SKB_CB(skb)->sa.ll;
1749 sll->sll_family = AF_PACKET;
1750 sll->sll_hatype = dev->type;
1751 sll->sll_protocol = skb->protocol;
1752 sll->sll_pkttype = skb->pkt_type;
1753 if (unlikely(po->origdev))
1754 sll->sll_ifindex = orig_dev->ifindex;
1755 else
1756 sll->sll_ifindex = dev->ifindex;
1757
1758 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1759
1760 PACKET_SKB_CB(skb)->origlen = skb->len;
1761
1762 if (pskb_trim(skb, snaplen))
1763 goto drop_n_acct;
1764
1765 skb_set_owner_r(skb, sk);
1766 skb->dev = NULL;
1767 skb_dst_drop(skb);
1768
1769
1770 nf_reset(skb);
1771
1772 spin_lock(&sk->sk_receive_queue.lock);
1773 po->stats.stats1.tp_packets++;
1774 sock_skb_set_dropcount(sk, skb);
1775 __skb_queue_tail(&sk->sk_receive_queue, skb);
1776 spin_unlock(&sk->sk_receive_queue.lock);
1777 sk->sk_data_ready(sk, skb->len);
1778 return 0;
1779
1780drop_n_acct:
1781 spin_lock(&sk->sk_receive_queue.lock);
1782 po->stats.stats1.tp_drops++;
1783 atomic_inc(&sk->sk_drops);
1784 spin_unlock(&sk->sk_receive_queue.lock);
1785
1786drop_n_restore:
1787 if (skb_head != skb->data && skb_shared(skb)) {
1788 skb->data = skb_head;
1789 skb->len = skb_len;
1790 }
1791drop:
1792 consume_skb(skb);
1793 return 0;
1794}
1795
1796static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1797 struct packet_type *pt, struct net_device *orig_dev)
1798{
1799 struct sock *sk;
1800 struct packet_sock *po;
1801 struct sockaddr_ll *sll;
1802 union tpacket_uhdr h;
1803 u8 *skb_head = skb->data;
1804 int skb_len = skb->len;
1805 unsigned int snaplen, res;
1806 unsigned long status = TP_STATUS_USER;
1807 unsigned short macoff, netoff, hdrlen;
1808 struct sk_buff *copy_skb = NULL;
1809 struct timespec ts;
1810 __u32 ts_status;
1811
1812 if (skb->pkt_type == PACKET_LOOPBACK)
1813 goto drop;
1814
1815 sk = pt->af_packet_priv;
1816 po = pkt_sk(sk);
1817
1818 if (!net_eq(dev_net(dev), sock_net(sk)))
1819 goto drop;
1820
1821 if (dev->header_ops) {
1822 if (sk->sk_type != SOCK_DGRAM)
1823 skb_push(skb, skb->data - skb_mac_header(skb));
1824 else if (skb->pkt_type == PACKET_OUTGOING) {
1825
1826 skb_pull(skb, skb_network_offset(skb));
1827 }
1828 }
1829
1830 if (skb->ip_summed == CHECKSUM_PARTIAL)
1831 status |= TP_STATUS_CSUMNOTREADY;
1832
1833 snaplen = skb->len;
1834
1835 res = run_filter(skb, sk, snaplen);
1836 if (!res)
1837 goto drop_n_restore;
1838 if (snaplen > res)
1839 snaplen = res;
1840
1841 if (sk->sk_type == SOCK_DGRAM) {
1842 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
1843 po->tp_reserve;
1844 } else {
1845 unsigned int maclen = skb_network_offset(skb);
1846 netoff = TPACKET_ALIGN(po->tp_hdrlen +
1847 (maclen < 16 ? 16 : maclen)) +
1848 po->tp_reserve;
1849 macoff = netoff - maclen;
1850 }
1851 if (po->tp_version <= TPACKET_V2) {
1852 if (macoff + snaplen > po->rx_ring.frame_size) {
1853 if (po->copy_thresh &&
1854 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
1855 if (skb_shared(skb)) {
1856 copy_skb = skb_clone(skb, GFP_ATOMIC);
1857 } else {
1858 copy_skb = skb_get(skb);
1859 skb_head = skb->data;
1860 }
1861 if (copy_skb)
1862 skb_set_owner_r(copy_skb, sk);
1863 }
1864 snaplen = po->rx_ring.frame_size - macoff;
1865 if ((int)snaplen < 0)
1866 snaplen = 0;
1867 }
1868 } else if (unlikely(macoff + snaplen >
1869 GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
1870 u32 nval;
1871
1872 nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
1873 pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
1874 snaplen, nval, macoff);
1875 snaplen = nval;
1876 if (unlikely((int)snaplen < 0)) {
1877 snaplen = 0;
1878 macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
1879 }
1880 }
1881 spin_lock(&sk->sk_receive_queue.lock);
1882 h.raw = packet_current_rx_frame(po, skb,
1883 TP_STATUS_KERNEL, (macoff+snaplen));
1884 if (!h.raw)
1885 goto ring_is_full;
1886 if (po->tp_version <= TPACKET_V2) {
1887 packet_increment_rx_head(po, &po->rx_ring);
1888
1889
1890
1891
1892
1893
1894 if (po->stats.stats1.tp_drops)
1895 status |= TP_STATUS_LOSING;
1896 }
1897 po->stats.stats1.tp_packets++;
1898 if (copy_skb) {
1899 status |= TP_STATUS_COPY;
1900 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
1901 }
1902 spin_unlock(&sk->sk_receive_queue.lock);
1903
1904 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
1905
1906 if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
1907 getnstimeofday(&ts);
1908
1909 status |= ts_status;
1910
1911 switch (po->tp_version) {
1912 case TPACKET_V1:
1913 h.h1->tp_len = skb->len;
1914 h.h1->tp_snaplen = snaplen;
1915 h.h1->tp_mac = macoff;
1916 h.h1->tp_net = netoff;
1917 h.h1->tp_sec = ts.tv_sec;
1918 h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
1919 hdrlen = sizeof(*h.h1);
1920 break;
1921 case TPACKET_V2:
1922 h.h2->tp_len = skb->len;
1923 h.h2->tp_snaplen = snaplen;
1924 h.h2->tp_mac = macoff;
1925 h.h2->tp_net = netoff;
1926 h.h2->tp_sec = ts.tv_sec;
1927 h.h2->tp_nsec = ts.tv_nsec;
1928 if (skb_vlan_tag_present(skb)) {
1929 h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
1930 status |= TP_STATUS_VLAN_VALID;
1931 } else {
1932 h.h2->tp_vlan_tci = 0;
1933 }
1934 h.h2->tp_padding = 0;
1935 hdrlen = sizeof(*h.h2);
1936 break;
1937 case TPACKET_V3:
1938
1939
1940
1941 h.h3->tp_status |= status;
1942 h.h3->tp_len = skb->len;
1943 h.h3->tp_snaplen = snaplen;
1944 h.h3->tp_mac = macoff;
1945 h.h3->tp_net = netoff;
1946 h.h3->tp_sec = ts.tv_sec;
1947 h.h3->tp_nsec = ts.tv_nsec;
1948 hdrlen = sizeof(*h.h3);
1949 break;
1950 default:
1951 BUG();
1952 }
1953
1954 sll = h.raw + TPACKET_ALIGN(hdrlen);
1955 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
1956 sll->sll_family = AF_PACKET;
1957 sll->sll_hatype = dev->type;
1958 sll->sll_protocol = skb->protocol;
1959 sll->sll_pkttype = skb->pkt_type;
1960 if (unlikely(po->origdev))
1961 sll->sll_ifindex = orig_dev->ifindex;
1962 else
1963 sll->sll_ifindex = dev->ifindex;
1964
1965 smp_mb();
1966#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
1967 {
1968 u8 *start, *end;
1969
1970 if (po->tp_version <= TPACKET_V2) {
1971 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw
1972 + macoff + snaplen);
1973 for (start = h.raw; start < end; start += PAGE_SIZE)
1974 flush_dcache_page(pgv_to_page(start));
1975 }
1976 smp_wmb();
1977 }
1978#endif
1979 if (po->tp_version <= TPACKET_V2) {
1980 __packet_set_status(po, h.raw, status);
1981 sk->sk_data_ready(sk, 0);
1982 } else {
1983 prb_clear_blk_fill_status(&po->rx_ring);
1984 }
1985
1986drop_n_restore:
1987 if (skb_head != skb->data && skb_shared(skb)) {
1988 skb->data = skb_head;
1989 skb->len = skb_len;
1990 }
1991drop:
1992 kfree_skb(skb);
1993 return 0;
1994
1995ring_is_full:
1996 po->stats.stats1.tp_drops++;
1997 spin_unlock(&sk->sk_receive_queue.lock);
1998
1999 sk->sk_data_ready(sk, 0);
2000 kfree_skb(copy_skb);
2001 goto drop_n_restore;
2002}
2003
2004static void tpacket_destruct_skb(struct sk_buff *skb)
2005{
2006 struct packet_sock *po = pkt_sk(skb->sk);
2007 void *ph;
2008
2009 if (likely(po->tx_ring.pg_vec)) {
2010 __u32 ts;
2011
2012 ph = skb_shinfo(skb)->destructor_arg;
2013 packet_dec_pending(&po->tx_ring);
2014
2015 ts = __packet_set_timestamp(po, ph, skb);
2016 __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
2017 }
2018
2019 sock_wfree(skb);
2020}
2021
2022static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
2023 void *frame, struct net_device *dev, int size_max,
2024 __be16 proto, unsigned char *addr, int hlen)
2025{
2026 union tpacket_uhdr ph;
2027 int to_write, offset, len, tp_len, nr_frags, len_max;
2028 struct socket *sock = po->sk.sk_socket;
2029 struct page *page;
2030 void *data;
2031 int err;
2032
2033 ph.raw = frame;
2034
2035 skb->protocol = proto;
2036 skb->dev = dev;
2037 skb->priority = po->sk.sk_priority;
2038 skb->mark = po->sk.sk_mark;
2039 sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
2040 skb_shinfo(skb)->destructor_arg = ph.raw;
2041
2042 switch (po->tp_version) {
2043 case TPACKET_V2:
2044 tp_len = ph.h2->tp_len;
2045 break;
2046 default:
2047 tp_len = ph.h1->tp_len;
2048 break;
2049 }
2050 if (unlikely(tp_len > size_max)) {
2051 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
2052 return -EMSGSIZE;
2053 }
2054
2055 skb_reserve(skb, hlen);
2056 skb_reset_network_header(skb);
2057 skb_probe_transport_header(skb, 0);
2058
2059 if (po->tp_tx_has_off) {
2060 int off_min, off_max, off;
2061 off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
2062 off_max = po->tx_ring.frame_size - tp_len;
2063 if (sock->type == SOCK_DGRAM) {
2064 switch (po->tp_version) {
2065 case TPACKET_V2:
2066 off = ph.h2->tp_net;
2067 break;
2068 default:
2069 off = ph.h1->tp_net;
2070 break;
2071 }
2072 } else {
2073 switch (po->tp_version) {
2074 case TPACKET_V2:
2075 off = ph.h2->tp_mac;
2076 break;
2077 default:
2078 off = ph.h1->tp_mac;
2079 break;
2080 }
2081 }
2082 if (unlikely((off < off_min) || (off_max < off)))
2083 return -EINVAL;
2084 data = ph.raw + off;
2085 } else {
2086 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
2087 }
2088 to_write = tp_len;
2089
2090 if (sock->type == SOCK_DGRAM) {
2091 err = dev_hard_header(skb, dev, ntohs(proto), addr,
2092 NULL, tp_len);
2093 if (unlikely(err < 0))
2094 return -EINVAL;
2095 } else if (dev->hard_header_len) {
2096
2097 if (unlikely(tp_len <= dev->hard_header_len)) {
2098 pr_err("packet size is too short (%d < %d)\n",
2099 tp_len, dev->hard_header_len);
2100 return -EINVAL;
2101 }
2102
2103 skb_push(skb, dev->hard_header_len);
2104 err = skb_store_bits(skb, 0, data,
2105 dev->hard_header_len);
2106 if (unlikely(err))
2107 return err;
2108
2109 data += dev->hard_header_len;
2110 to_write -= dev->hard_header_len;
2111 }
2112
2113 offset = offset_in_page(data);
2114 len_max = PAGE_SIZE - offset;
2115 len = ((to_write > len_max) ? len_max : to_write);
2116
2117 skb->data_len = to_write;
2118 skb->len += to_write;
2119 skb->truesize += to_write;
2120 atomic_add(to_write, &po->sk.sk_wmem_alloc);
2121
2122 while (likely(to_write)) {
2123 nr_frags = skb_shinfo(skb)->nr_frags;
2124
2125 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
2126 pr_err("Packet exceed the number of skb frags(%lu)\n",
2127 MAX_SKB_FRAGS);
2128 return -EFAULT;
2129 }
2130
2131 page = pgv_to_page(data);
2132 data += len;
2133 flush_dcache_page(page);
2134 get_page(page);
2135 skb_fill_page_desc(skb, nr_frags, page, offset, len);
2136 to_write -= len;
2137 offset = 0;
2138 len_max = PAGE_SIZE;
2139 len = ((to_write > len_max) ? len_max : to_write);
2140 }
2141
2142 return tp_len;
2143}
2144
2145static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
2146{
2147 struct sk_buff *skb;
2148 struct net_device *dev;
2149 __be16 proto;
2150 int err, reserve = 0;
2151 void *ph;
2152 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2153 bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
2154 int tp_len, size_max;
2155 unsigned char *addr;
2156 int len_sum = 0;
2157 int status = TP_STATUS_AVAILABLE;
2158 int hlen, tlen;
2159
2160 mutex_lock(&po->pg_vec_lock);
2161
2162 if (likely(saddr == NULL)) {
2163 dev = packet_cached_dev_get(po);
2164 proto = po->num;
2165 addr = NULL;
2166 } else {
2167 err = -EINVAL;
2168 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2169 goto out;
2170 if (msg->msg_namelen < (saddr->sll_halen
2171 + offsetof(struct sockaddr_ll,
2172 sll_addr)))
2173 goto out;
2174 proto = saddr->sll_protocol;
2175 addr = saddr->sll_addr;
2176 dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
2177 }
2178
2179 err = -ENXIO;
2180 if (unlikely(dev == NULL))
2181 goto out;
2182 err = -ENETDOWN;
2183 if (unlikely(!(dev->flags & IFF_UP)))
2184 goto out_put;
2185
2186 reserve = dev->hard_header_len;
2187
2188 size_max = po->tx_ring.frame_size
2189 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
2190
2191 if (size_max > dev->mtu + reserve)
2192 size_max = dev->mtu + reserve;
2193
2194 do {
2195 ph = packet_current_frame(po, &po->tx_ring,
2196 TP_STATUS_SEND_REQUEST);
2197 if (unlikely(ph == NULL)) {
2198 if (need_wait && need_resched())
2199 schedule();
2200 continue;
2201 }
2202
2203 status = TP_STATUS_SEND_REQUEST;
2204 hlen = LL_RESERVED_SPACE(dev);
2205 tlen = dev->needed_tailroom;
2206 skb = sock_alloc_send_skb(&po->sk,
2207 hlen + tlen + sizeof(struct sockaddr_ll),
2208 0, &err);
2209
2210 if (unlikely(skb == NULL))
2211 goto out_status;
2212
2213 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
2214 addr, hlen);
2215
2216 if (unlikely(tp_len < 0)) {
2217 if (po->tp_loss) {
2218 __packet_set_status(po, ph,
2219 TP_STATUS_AVAILABLE);
2220 packet_increment_head(&po->tx_ring);
2221 kfree_skb(skb);
2222 continue;
2223 } else {
2224 status = TP_STATUS_WRONG_FORMAT;
2225 err = tp_len;
2226 goto out_status;
2227 }
2228 }
2229
2230 skb->destructor = tpacket_destruct_skb;
2231 __packet_set_status(po, ph, TP_STATUS_SENDING);
2232 packet_inc_pending(&po->tx_ring);
2233
2234 status = TP_STATUS_SEND_REQUEST;
2235 err = dev_queue_xmit(skb);
2236 if (unlikely(err > 0)) {
2237 err = net_xmit_errno(err);
2238 if (err && __packet_get_status(po, ph) ==
2239 TP_STATUS_AVAILABLE) {
2240
2241 skb = NULL;
2242 goto out_status;
2243 }
2244
2245
2246
2247
2248 err = 0;
2249 }
2250 packet_increment_head(&po->tx_ring);
2251 len_sum += tp_len;
2252 } while (likely((ph != NULL) ||
2253
2254
2255
2256
2257
2258
2259 (need_wait && packet_read_pending(&po->tx_ring))));
2260
2261 err = len_sum;
2262 goto out_put;
2263
2264out_status:
2265 __packet_set_status(po, ph, status);
2266 kfree_skb(skb);
2267out_put:
2268 dev_put(dev);
2269out:
2270 mutex_unlock(&po->pg_vec_lock);
2271 return err;
2272}
2273
2274static struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
2275 size_t reserve, size_t len,
2276 size_t linear, int noblock,
2277 int *err)
2278{
2279 struct sk_buff *skb;
2280
2281
2282 if (prepad + len < PAGE_SIZE || !linear)
2283 linear = len;
2284
2285 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
2286 err, 0);
2287 if (!skb)
2288 return NULL;
2289
2290 skb_reserve(skb, reserve);
2291 skb_put(skb, linear);
2292 skb->data_len = len - linear;
2293 skb->len += len - linear;
2294
2295 return skb;
2296}
2297
2298static int packet_snd(struct socket *sock,
2299 struct msghdr *msg, size_t len)
2300{
2301 struct sock *sk = sock->sk;
2302 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
2303 struct sk_buff *skb;
2304 struct net_device *dev;
2305 __be16 proto;
2306 unsigned char *addr;
2307 int err, reserve = 0;
2308 struct virtio_net_hdr vnet_hdr = { 0 };
2309 int offset = 0;
2310 int vnet_hdr_len;
2311 struct packet_sock *po = pkt_sk(sk);
2312 unsigned short gso_type = 0;
2313 int hlen, tlen;
2314 int extra_len = 0;
2315
2316
2317
2318
2319
2320 if (likely(saddr == NULL)) {
2321 dev = packet_cached_dev_get(po);
2322 proto = po->num;
2323 addr = NULL;
2324 } else {
2325 err = -EINVAL;
2326 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
2327 goto out;
2328 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
2329 goto out;
2330 proto = saddr->sll_protocol;
2331 addr = saddr->sll_addr;
2332 dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
2333 }
2334
2335 err = -ENXIO;
2336 if (unlikely(dev == NULL))
2337 goto out_unlock;
2338 err = -ENETDOWN;
2339 if (unlikely(!(dev->flags & IFF_UP)))
2340 goto out_unlock;
2341
2342 if (sock->type == SOCK_RAW)
2343 reserve = dev->hard_header_len;
2344 if (po->has_vnet_hdr) {
2345 vnet_hdr_len = sizeof(vnet_hdr);
2346
2347 err = -EINVAL;
2348 if (len < vnet_hdr_len)
2349 goto out_unlock;
2350
2351 len -= vnet_hdr_len;
2352
2353 err = memcpy_from_msg((void *)&vnet_hdr, msg, vnet_hdr_len);
2354 if (err < 0)
2355 goto out_unlock;
2356
2357 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
2358 (__virtio16_to_cpu(false, vnet_hdr.csum_start) +
2359 __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2 >
2360 __virtio16_to_cpu(false, vnet_hdr.hdr_len)))
2361 vnet_hdr.hdr_len = __cpu_to_virtio16(false,
2362 __virtio16_to_cpu(false, vnet_hdr.csum_start) +
2363 __virtio16_to_cpu(false, vnet_hdr.csum_offset) + 2);
2364
2365 err = -EINVAL;
2366 if (__virtio16_to_cpu(false, vnet_hdr.hdr_len) > len)
2367 goto out_unlock;
2368
2369 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2370 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2371 case VIRTIO_NET_HDR_GSO_TCPV4:
2372 gso_type = SKB_GSO_TCPV4;
2373 break;
2374 case VIRTIO_NET_HDR_GSO_TCPV6:
2375 gso_type = SKB_GSO_TCPV6;
2376 break;
2377 case VIRTIO_NET_HDR_GSO_UDP:
2378 gso_type = SKB_GSO_UDP;
2379 break;
2380 default:
2381 goto out_unlock;
2382 }
2383
2384 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
2385 gso_type |= SKB_GSO_TCP_ECN;
2386
2387 if (vnet_hdr.gso_size == 0)
2388 goto out_unlock;
2389
2390 }
2391 }
2392
2393 if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
2394 if (!netif_supports_nofcs(dev)) {
2395 err = -EPROTONOSUPPORT;
2396 goto out_unlock;
2397 }
2398 extra_len = 4;
2399 }
2400
2401 err = -EMSGSIZE;
2402 if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
2403 goto out_unlock;
2404
2405 err = -ENOBUFS;
2406 hlen = LL_RESERVED_SPACE(dev);
2407 tlen = dev->needed_tailroom;
2408 skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
2409 __virtio16_to_cpu(false, vnet_hdr.hdr_len),
2410 msg->msg_flags & MSG_DONTWAIT, &err);
2411 if (skb == NULL)
2412 goto out_unlock;
2413
2414 skb_set_network_header(skb, reserve);
2415
2416 err = -EINVAL;
2417 if (sock->type == SOCK_DGRAM &&
2418 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
2419 goto out_free;
2420
2421
2422 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
2423 if (err)
2424 goto out_free;
2425
2426 sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
2427
2428 if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
2429
2430
2431
2432
2433 struct ethhdr *ehdr;
2434 skb_reset_mac_header(skb);
2435 ehdr = eth_hdr(skb);
2436 if (ehdr->h_proto != htons(ETH_P_8021Q)) {
2437 err = -EMSGSIZE;
2438 goto out_free;
2439 }
2440 }
2441
2442 skb->protocol = proto;
2443 skb->dev = dev;
2444 skb->priority = sk->sk_priority;
2445 skb->mark = sk->sk_mark;
2446
2447 if (po->has_vnet_hdr) {
2448 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2449 u16 s = __virtio16_to_cpu(false, vnet_hdr.csum_start);
2450 u16 o = __virtio16_to_cpu(false, vnet_hdr.csum_offset);
2451 if (!skb_partial_csum_set(skb, s, o)) {
2452 err = -EINVAL;
2453 goto out_free;
2454 }
2455 }
2456
2457 skb_shinfo(skb)->gso_size =
2458 __virtio16_to_cpu(false, vnet_hdr.gso_size);
2459 skb_shinfo(skb)->gso_type = gso_type;
2460
2461
2462 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2463 skb_shinfo(skb)->gso_segs = 0;
2464
2465 len += vnet_hdr_len;
2466 }
2467
2468 skb_probe_transport_header(skb, reserve);
2469
2470 if (unlikely(extra_len == 4))
2471 skb->no_fcs = 1;
2472
2473
2474
2475
2476
2477 err = dev_queue_xmit(skb);
2478 if (err > 0 && (err = net_xmit_errno(err)) != 0)
2479 goto out_unlock;
2480
2481 dev_put(dev);
2482
2483 return len;
2484
2485out_free:
2486 kfree_skb(skb);
2487out_unlock:
2488 if (dev)
2489 dev_put(dev);
2490out:
2491 return err;
2492}
2493
2494static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
2495 struct msghdr *msg, size_t len)
2496{
2497 struct sock *sk = sock->sk;
2498 struct packet_sock *po = pkt_sk(sk);
2499 if (po->tx_ring.pg_vec)
2500 return tpacket_snd(po, msg);
2501 else
2502 return packet_snd(sock, msg, len);
2503}
2504
2505
2506
2507
2508
2509
2510static int packet_release(struct socket *sock)
2511{
2512 struct sock *sk = sock->sk;
2513 struct packet_sock *po;
2514 struct net *net;
2515 union tpacket_req_u req_u;
2516
2517 if (!sk)
2518 return 0;
2519
2520 net = sock_net(sk);
2521 po = pkt_sk(sk);
2522
2523 mutex_lock(&net->packet.sklist_lock);
2524 sk_del_node_init_rcu(sk);
2525 mutex_unlock(&net->packet.sklist_lock);
2526
2527 preempt_disable();
2528 sock_prot_inuse_add(net, sk->sk_prot, -1);
2529 preempt_enable();
2530
2531 spin_lock(&po->bind_lock);
2532 unregister_prot_hook(sk, false);
2533 packet_cached_dev_reset(po);
2534
2535 if (po->prot_hook.dev) {
2536 dev_put(po->prot_hook.dev);
2537 po->prot_hook.dev = NULL;
2538 }
2539 spin_unlock(&po->bind_lock);
2540
2541 packet_flush_mclist(sk);
2542
2543 if (po->rx_ring.pg_vec) {
2544 memset(&req_u, 0, sizeof(req_u));
2545 packet_set_ring(sk, &req_u, 1, 0);
2546 }
2547
2548 if (po->tx_ring.pg_vec) {
2549 memset(&req_u, 0, sizeof(req_u));
2550 packet_set_ring(sk, &req_u, 1, 1);
2551 }
2552
2553 fanout_release(sk);
2554
2555 synchronize_net();
2556
2557
2558
2559 sock_orphan(sk);
2560 sock->sk = NULL;
2561
2562
2563
2564 skb_queue_purge(&sk->sk_receive_queue);
2565 packet_free_pending(po);
2566 sk_refcnt_debug_release(sk);
2567
2568 sock_put(sk);
2569 return 0;
2570}
2571
2572
2573
2574
2575
2576static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
2577 __be16 proto)
2578{
2579 struct packet_sock *po = pkt_sk(sk);
2580 struct net_device *dev_curr;
2581 __be16 proto_curr;
2582 bool need_rehook;
2583 struct net_device *dev = NULL;
2584 int ret = 0;
2585 bool unlisted = false;
2586
2587 if (po->fanout)
2588 return -EINVAL;
2589
2590 lock_sock(sk);
2591 spin_lock(&po->bind_lock);
2592 rcu_read_lock();
2593
2594 if (name) {
2595 dev = dev_get_by_name_rcu(sock_net(sk), name);
2596 if (!dev) {
2597 ret = -ENODEV;
2598 goto out_unlock;
2599 }
2600 } else if (ifindex) {
2601 dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
2602 if (!dev) {
2603 ret = -ENODEV;
2604 goto out_unlock;
2605 }
2606 }
2607
2608 if (dev)
2609 dev_hold(dev);
2610
2611 proto_curr = po->prot_hook.type;
2612 dev_curr = po->prot_hook.dev;
2613
2614 need_rehook = proto_curr != proto || dev_curr != dev;
2615
2616 if (need_rehook) {
2617 if (po->running) {
2618 rcu_read_unlock();
2619 __unregister_prot_hook(sk, true);
2620 rcu_read_lock();
2621 dev_curr = po->prot_hook.dev;
2622 if (dev)
2623 unlisted = !dev_get_by_index_rcu(sock_net(sk),
2624 dev->ifindex);
2625 }
2626
2627 po->num = proto;
2628 po->prot_hook.type = proto;
2629
2630 if (unlikely(unlisted)) {
2631 dev_put(dev);
2632 po->prot_hook.dev = NULL;
2633 po->ifindex = -1;
2634 packet_cached_dev_reset(po);
2635 } else {
2636 po->prot_hook.dev = dev;
2637 po->ifindex = dev ? dev->ifindex : 0;
2638 packet_cached_dev_assign(po, dev);
2639 }
2640 }
2641 if (dev_curr)
2642 dev_put(dev_curr);
2643
2644 if (proto == 0 || !need_rehook)
2645 goto out_unlock;
2646
2647 if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
2648 register_prot_hook(sk);
2649 } else {
2650 sk->sk_err = ENETDOWN;
2651 if (!sock_flag(sk, SOCK_DEAD))
2652 sk->sk_error_report(sk);
2653 }
2654
2655out_unlock:
2656 rcu_read_unlock();
2657 spin_unlock(&po->bind_lock);
2658 release_sock(sk);
2659 return ret;
2660}
2661
2662
2663
2664
2665
2666static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
2667 int addr_len)
2668{
2669 struct sock *sk = sock->sk;
2670 char name[15];
2671
2672
2673
2674
2675
2676 if (addr_len != sizeof(struct sockaddr))
2677 return -EINVAL;
2678 strlcpy(name, uaddr->sa_data, sizeof(name));
2679
2680 return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
2681}
2682
2683static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
2684{
2685 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
2686 struct sock *sk = sock->sk;
2687
2688
2689
2690
2691
2692 if (addr_len < sizeof(struct sockaddr_ll))
2693 return -EINVAL;
2694 if (sll->sll_family != AF_PACKET)
2695 return -EINVAL;
2696
2697 return packet_do_bind(sk, NULL, sll->sll_ifindex,
2698 sll->sll_protocol ? : pkt_sk(sk)->num);
2699}
2700
2701static struct proto packet_proto = {
2702 .name = "PACKET",
2703 .owner = THIS_MODULE,
2704 .obj_size = sizeof(struct packet_sock),
2705};
2706
2707
2708
2709
2710
2711static int packet_create(struct net *net, struct socket *sock, int protocol,
2712 int kern)
2713{
2714 struct sock *sk;
2715 struct packet_sock *po;
2716 __be16 proto = (__force __be16)protocol;
2717 int err;
2718
2719 if (!ns_capable(net->user_ns, CAP_NET_RAW))
2720 return -EPERM;
2721 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
2722 sock->type != SOCK_PACKET)
2723 return -ESOCKTNOSUPPORT;
2724
2725 sock->state = SS_UNCONNECTED;
2726
2727 err = -ENOBUFS;
2728 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
2729 if (sk == NULL)
2730 goto out;
2731
2732 sock->ops = &packet_ops;
2733 if (sock->type == SOCK_PACKET)
2734 sock->ops = &packet_ops_spkt;
2735
2736 sock_init_data(sock, sk);
2737
2738 po = pkt_sk(sk);
2739 sk->sk_family = PF_PACKET;
2740 po->num = proto;
2741
2742 err = packet_alloc_pending(po);
2743 if (err)
2744 goto out2;
2745
2746 packet_cached_dev_reset(po);
2747
2748 sk->sk_destruct = packet_sock_destruct;
2749 sk_refcnt_debug_inc(sk);
2750
2751
2752
2753
2754
2755 spin_lock_init(&po->bind_lock);
2756 mutex_init(&po->pg_vec_lock);
2757 po->prot_hook.func = packet_rcv;
2758
2759 if (sock->type == SOCK_PACKET)
2760 po->prot_hook.func = packet_rcv_spkt;
2761
2762 po->prot_hook.af_packet_priv = sk;
2763
2764 if (proto) {
2765 po->prot_hook.type = proto;
2766 register_prot_hook(sk);
2767 }
2768
2769 mutex_lock(&net->packet.sklist_lock);
2770 sk_add_node_rcu(sk, &net->packet.sklist);
2771 mutex_unlock(&net->packet.sklist_lock);
2772
2773 preempt_disable();
2774 sock_prot_inuse_add(net, &packet_proto, 1);
2775 preempt_enable();
2776
2777 return 0;
2778out2:
2779 sk_free(sk);
2780out:
2781 return err;
2782}
2783
2784static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
2785{
2786 struct sock_exterr_skb *serr;
2787 struct sk_buff *skb, *skb2;
2788 int copied, err;
2789
2790 err = -EAGAIN;
2791 skb = skb_dequeue(&sk->sk_error_queue);
2792 if (skb == NULL)
2793 goto out;
2794
2795 copied = skb->len;
2796 if (copied > len) {
2797 msg->msg_flags |= MSG_TRUNC;
2798 copied = len;
2799 }
2800 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
2801 if (err)
2802 goto out_free_skb;
2803
2804 sock_recv_timestamp(msg, sk, skb);
2805
2806 serr = SKB_EXT_ERR(skb);
2807 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
2808 sizeof(serr->ee), &serr->ee);
2809
2810 msg->msg_flags |= MSG_ERRQUEUE;
2811 err = copied;
2812
2813
2814 spin_lock_bh(&sk->sk_error_queue.lock);
2815 sk->sk_err = 0;
2816 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
2817 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
2818 spin_unlock_bh(&sk->sk_error_queue.lock);
2819 sk->sk_error_report(sk);
2820 } else
2821 spin_unlock_bh(&sk->sk_error_queue.lock);
2822
2823out_free_skb:
2824 kfree_skb(skb);
2825out:
2826 return err;
2827}
2828
2829
2830
2831
2832
2833
2834static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
2835 struct msghdr *msg, size_t len, int flags)
2836{
2837 struct sock *sk = sock->sk;
2838 struct sk_buff *skb;
2839 int copied, err;
2840 int vnet_hdr_len = 0;
2841
2842 err = -EINVAL;
2843 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
2844 goto out;
2845
2846#if 0
2847
2848 if (pkt_sk(sk)->ifindex < 0)
2849 return -ENODEV;
2850#endif
2851
2852 if (flags & MSG_ERRQUEUE) {
2853 err = packet_recv_error(sk, msg, len);
2854 goto out;
2855 }
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
2867
2868
2869
2870
2871
2872
2873
2874 if (skb == NULL)
2875 goto out;
2876
2877 if (pkt_sk(sk)->has_vnet_hdr) {
2878 struct virtio_net_hdr vnet_hdr = { 0 };
2879
2880 err = -EINVAL;
2881 vnet_hdr_len = sizeof(vnet_hdr);
2882 if (len < vnet_hdr_len)
2883 goto out_free;
2884
2885 len -= vnet_hdr_len;
2886
2887 if (skb_is_gso(skb)) {
2888 struct skb_shared_info *sinfo = skb_shinfo(skb);
2889
2890
2891 vnet_hdr.hdr_len =
2892 __cpu_to_virtio16(false, skb_headlen(skb));
2893 vnet_hdr.gso_size =
2894 __cpu_to_virtio16(false, sinfo->gso_size);
2895 if (sinfo->gso_type & SKB_GSO_TCPV4)
2896 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
2897 else if (sinfo->gso_type & SKB_GSO_TCPV6)
2898 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
2899 else if (sinfo->gso_type & SKB_GSO_UDP)
2900 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
2901 else if (sinfo->gso_type & SKB_GSO_FCOE)
2902 goto out_free;
2903 else
2904 BUG();
2905 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
2906 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2907 } else
2908 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
2909
2910 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2911 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
2912 vnet_hdr.csum_start = __cpu_to_virtio16(false,
2913 skb_checksum_start_offset(skb));
2914 vnet_hdr.csum_offset = __cpu_to_virtio16(false,
2915 skb->csum_offset);
2916 } else if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
2917 vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
2918 }
2919
2920 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
2921 vnet_hdr_len);
2922 if (err < 0)
2923 goto out_free;
2924 }
2925
2926
2927
2928
2929
2930 copied = skb->len;
2931 if (copied > len) {
2932 copied = len;
2933 msg->msg_flags |= MSG_TRUNC;
2934 }
2935
2936 err = skb_copy_datagram_msg(skb, 0, msg, copied);
2937 if (err)
2938 goto out_free;
2939
2940 sock_recv_ts_and_drops(msg, sk, skb);
2941
2942 if (msg->msg_name) {
2943
2944
2945
2946 if (sock->type == SOCK_PACKET) {
2947 msg->msg_namelen = sizeof(struct sockaddr_pkt);
2948 } else {
2949 struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
2950 msg->msg_namelen = sll->sll_halen +
2951 offsetof(struct sockaddr_ll, sll_addr);
2952 }
2953 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
2954 msg->msg_namelen);
2955 }
2956
2957 if (pkt_sk(sk)->auxdata) {
2958 struct tpacket_auxdata aux;
2959
2960 aux.tp_status = TP_STATUS_USER;
2961 if (skb->ip_summed == CHECKSUM_PARTIAL)
2962 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
2963 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
2964 aux.tp_snaplen = skb->len;
2965 aux.tp_mac = 0;
2966 aux.tp_net = skb_network_offset(skb);
2967 if (skb_vlan_tag_present(skb)) {
2968 aux.tp_vlan_tci = skb_vlan_tag_get(skb);
2969 aux.tp_status |= TP_STATUS_VLAN_VALID;
2970 } else {
2971 aux.tp_vlan_tci = 0;
2972 }
2973 aux.tp_padding = 0;
2974 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
2975 }
2976
2977
2978
2979
2980
2981 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
2982
2983out_free:
2984 skb_free_datagram(sk, skb);
2985out:
2986 return err;
2987}
2988
2989static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
2990 int *uaddr_len, int peer)
2991{
2992 struct net_device *dev;
2993 struct sock *sk = sock->sk;
2994
2995 if (peer)
2996 return -EOPNOTSUPP;
2997
2998 uaddr->sa_family = AF_PACKET;
2999 memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
3000 rcu_read_lock();
3001 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
3002 if (dev)
3003 strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
3004 rcu_read_unlock();
3005 *uaddr_len = sizeof(*uaddr);
3006
3007 return 0;
3008}
3009
3010static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
3011 int *uaddr_len, int peer)
3012{
3013 struct net_device *dev;
3014 struct sock *sk = sock->sk;
3015 struct packet_sock *po = pkt_sk(sk);
3016 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
3017
3018 if (peer)
3019 return -EOPNOTSUPP;
3020
3021 sll->sll_family = AF_PACKET;
3022 sll->sll_ifindex = po->ifindex;
3023 sll->sll_protocol = po->num;
3024 sll->sll_pkttype = 0;
3025 rcu_read_lock();
3026 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
3027 if (dev) {
3028 sll->sll_hatype = dev->type;
3029 sll->sll_halen = dev->addr_len;
3030 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
3031 } else {
3032 sll->sll_hatype = 0;
3033 sll->sll_halen = 0;
3034 }
3035 rcu_read_unlock();
3036 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
3037
3038 return 0;
3039}
3040
3041static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
3042 int what)
3043{
3044 switch (i->type) {
3045 case PACKET_MR_MULTICAST:
3046 if (i->alen != dev->addr_len)
3047 return -EINVAL;
3048 if (what > 0)
3049 return dev_mc_add(dev, i->addr);
3050 else
3051 return dev_mc_del(dev, i->addr);
3052 break;
3053 case PACKET_MR_PROMISC:
3054 return dev_set_promiscuity(dev, what);
3055 break;
3056 case PACKET_MR_ALLMULTI:
3057 return dev_set_allmulti(dev, what);
3058 break;
3059 case PACKET_MR_UNICAST:
3060 if (i->alen != dev->addr_len)
3061 return -EINVAL;
3062 if (what > 0)
3063 return dev_uc_add(dev, i->addr);
3064 else
3065 return dev_uc_del(dev, i->addr);
3066 break;
3067 default:
3068 break;
3069 }
3070 return 0;
3071}
3072
3073static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
3074{
3075 for ( ; i; i = i->next) {
3076 if (i->ifindex == dev->ifindex)
3077 packet_dev_mc(dev, i, what);
3078 }
3079}
3080
3081static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
3082{
3083 struct packet_sock *po = pkt_sk(sk);
3084 struct packet_mclist *ml, *i;
3085 struct net_device *dev;
3086 int err;
3087
3088 rtnl_lock();
3089
3090 err = -ENODEV;
3091 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
3092 if (!dev)
3093 goto done;
3094
3095 err = -EINVAL;
3096 if (mreq->mr_alen > dev->addr_len)
3097 goto done;
3098
3099 err = -ENOBUFS;
3100 i = kmalloc(sizeof(*i), GFP_KERNEL);
3101 if (i == NULL)
3102 goto done;
3103
3104 err = 0;
3105 for (ml = po->mclist; ml; ml = ml->next) {
3106 if (ml->ifindex == mreq->mr_ifindex &&
3107 ml->type == mreq->mr_type &&
3108 ml->alen == mreq->mr_alen &&
3109 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3110 ml->count++;
3111
3112 kfree(i);
3113 goto done;
3114 }
3115 }
3116
3117 i->type = mreq->mr_type;
3118 i->ifindex = mreq->mr_ifindex;
3119 i->alen = mreq->mr_alen;
3120 memcpy(i->addr, mreq->mr_address, i->alen);
3121 i->count = 1;
3122 i->next = po->mclist;
3123 po->mclist = i;
3124 err = packet_dev_mc(dev, i, 1);
3125 if (err) {
3126 po->mclist = i->next;
3127 kfree(i);
3128 }
3129
3130done:
3131 rtnl_unlock();
3132 return err;
3133}
3134
3135static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
3136{
3137 struct packet_mclist *ml, **mlp;
3138
3139 rtnl_lock();
3140
3141 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
3142 if (ml->ifindex == mreq->mr_ifindex &&
3143 ml->type == mreq->mr_type &&
3144 ml->alen == mreq->mr_alen &&
3145 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
3146 if (--ml->count == 0) {
3147 struct net_device *dev;
3148 *mlp = ml->next;
3149 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3150 if (dev)
3151 packet_dev_mc(dev, ml, -1);
3152 kfree(ml);
3153 }
3154 rtnl_unlock();
3155 return 0;
3156 }
3157 }
3158 rtnl_unlock();
3159 return -EADDRNOTAVAIL;
3160}
3161
3162static void packet_flush_mclist(struct sock *sk)
3163{
3164 struct packet_sock *po = pkt_sk(sk);
3165 struct packet_mclist *ml;
3166
3167 if (!po->mclist)
3168 return;
3169
3170 rtnl_lock();
3171 while ((ml = po->mclist) != NULL) {
3172 struct net_device *dev;
3173
3174 po->mclist = ml->next;
3175 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
3176 if (dev != NULL)
3177 packet_dev_mc(dev, ml, -1);
3178 kfree(ml);
3179 }
3180 rtnl_unlock();
3181}
3182
3183static int
3184packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
3185{
3186 struct sock *sk = sock->sk;
3187 struct packet_sock *po = pkt_sk(sk);
3188 int ret;
3189
3190 if (level != SOL_PACKET)
3191 return -ENOPROTOOPT;
3192
3193 switch (optname) {
3194 case PACKET_ADD_MEMBERSHIP:
3195 case PACKET_DROP_MEMBERSHIP:
3196 {
3197 struct packet_mreq_max mreq;
3198 int len = optlen;
3199 memset(&mreq, 0, sizeof(mreq));
3200 if (len < sizeof(struct packet_mreq))
3201 return -EINVAL;
3202 if (len > sizeof(mreq))
3203 len = sizeof(mreq);
3204 if (copy_from_user(&mreq, optval, len))
3205 return -EFAULT;
3206 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
3207 return -EINVAL;
3208 if (optname == PACKET_ADD_MEMBERSHIP)
3209 ret = packet_mc_add(sk, &mreq);
3210 else
3211 ret = packet_mc_drop(sk, &mreq);
3212 return ret;
3213 }
3214
3215 case PACKET_RX_RING:
3216 case PACKET_TX_RING:
3217 {
3218 union tpacket_req_u req_u;
3219 int len;
3220
3221 switch (po->tp_version) {
3222 case TPACKET_V1:
3223 case TPACKET_V2:
3224 len = sizeof(req_u.req);
3225 break;
3226 case TPACKET_V3:
3227 default:
3228 len = sizeof(req_u.req3);
3229 break;
3230 }
3231 if (optlen < len)
3232 return -EINVAL;
3233 if (pkt_sk(sk)->has_vnet_hdr)
3234 return -EINVAL;
3235 if (copy_from_user(&req_u.req, optval, len))
3236 return -EFAULT;
3237 return packet_set_ring(sk, &req_u, 0,
3238 optname == PACKET_TX_RING);
3239 }
3240 case PACKET_COPY_THRESH:
3241 {
3242 int val;
3243
3244 if (optlen != sizeof(val))
3245 return -EINVAL;
3246 if (copy_from_user(&val, optval, sizeof(val)))
3247 return -EFAULT;
3248
3249 pkt_sk(sk)->copy_thresh = val;
3250 return 0;
3251 }
3252 case PACKET_VERSION:
3253 {
3254 int val;
3255
3256 if (optlen != sizeof(val))
3257 return -EINVAL;
3258 if (copy_from_user(&val, optval, sizeof(val)))
3259 return -EFAULT;
3260 switch (val) {
3261 case TPACKET_V1:
3262 case TPACKET_V2:
3263 case TPACKET_V3:
3264 break;
3265 default:
3266 return -EINVAL;
3267 }
3268 lock_sock(sk);
3269 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
3270 ret = -EBUSY;
3271 } else {
3272 po->tp_version = val;
3273 ret = 0;
3274 }
3275 release_sock(sk);
3276 return ret;
3277 }
3278 case PACKET_RESERVE:
3279 {
3280 unsigned int val;
3281
3282 if (optlen != sizeof(val))
3283 return -EINVAL;
3284 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3285 return -EBUSY;
3286 if (copy_from_user(&val, optval, sizeof(val)))
3287 return -EFAULT;
3288 if (val > INT_MAX)
3289 return -EINVAL;
3290 po->tp_reserve = val;
3291 return 0;
3292 }
3293 case PACKET_LOSS:
3294 {
3295 unsigned int val;
3296
3297 if (optlen != sizeof(val))
3298 return -EINVAL;
3299 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3300 return -EBUSY;
3301 if (copy_from_user(&val, optval, sizeof(val)))
3302 return -EFAULT;
3303 po->tp_loss = !!val;
3304 return 0;
3305 }
3306 case PACKET_AUXDATA:
3307 {
3308 int val;
3309
3310 if (optlen < sizeof(val))
3311 return -EINVAL;
3312 if (copy_from_user(&val, optval, sizeof(val)))
3313 return -EFAULT;
3314
3315 po->auxdata = !!val;
3316 return 0;
3317 }
3318 case PACKET_ORIGDEV:
3319 {
3320 int val;
3321
3322 if (optlen < sizeof(val))
3323 return -EINVAL;
3324 if (copy_from_user(&val, optval, sizeof(val)))
3325 return -EFAULT;
3326
3327 po->origdev = !!val;
3328 return 0;
3329 }
3330 case PACKET_VNET_HDR:
3331 {
3332 int val;
3333
3334 if (sock->type != SOCK_RAW)
3335 return -EINVAL;
3336 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3337 return -EBUSY;
3338 if (optlen < sizeof(val))
3339 return -EINVAL;
3340 if (copy_from_user(&val, optval, sizeof(val)))
3341 return -EFAULT;
3342
3343 po->has_vnet_hdr = !!val;
3344 return 0;
3345 }
3346 case PACKET_TIMESTAMP:
3347 {
3348 int val;
3349
3350 if (optlen != sizeof(val))
3351 return -EINVAL;
3352 if (copy_from_user(&val, optval, sizeof(val)))
3353 return -EFAULT;
3354
3355 po->tp_tstamp = val;
3356 return 0;
3357 }
3358 case PACKET_FANOUT:
3359 {
3360 int val;
3361
3362 if (optlen != sizeof(val))
3363 return -EINVAL;
3364 if (copy_from_user(&val, optval, sizeof(val)))
3365 return -EFAULT;
3366
3367 return fanout_add(sk, val & 0xffff, val >> 16);
3368 }
3369 case PACKET_TX_HAS_OFF:
3370 {
3371 unsigned int val;
3372
3373 if (optlen != sizeof(val))
3374 return -EINVAL;
3375 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
3376 return -EBUSY;
3377 if (copy_from_user(&val, optval, sizeof(val)))
3378 return -EFAULT;
3379 po->tp_tx_has_off = !!val;
3380 return 0;
3381 }
3382 default:
3383 return -ENOPROTOOPT;
3384 }
3385}
3386
3387static int packet_getsockopt(struct socket *sock, int level, int optname,
3388 char __user *optval, int __user *optlen)
3389{
3390 int len;
3391 int val, lv = sizeof(val);
3392 struct sock *sk = sock->sk;
3393 struct packet_sock *po = pkt_sk(sk);
3394 void *data = &val;
3395 union tpacket_stats_u st;
3396
3397 if (level != SOL_PACKET)
3398 return -ENOPROTOOPT;
3399
3400 if (get_user(len, optlen))
3401 return -EFAULT;
3402
3403 if (len < 0)
3404 return -EINVAL;
3405
3406 switch (optname) {
3407 case PACKET_STATISTICS:
3408 spin_lock_bh(&sk->sk_receive_queue.lock);
3409 memcpy(&st, &po->stats, sizeof(st));
3410 memset(&po->stats, 0, sizeof(po->stats));
3411 spin_unlock_bh(&sk->sk_receive_queue.lock);
3412
3413 if (po->tp_version == TPACKET_V3) {
3414 lv = sizeof(struct tpacket_stats_v3);
3415 st.stats3.tp_packets += st.stats3.tp_drops;
3416 data = &st.stats3;
3417 } else {
3418 lv = sizeof(struct tpacket_stats);
3419 st.stats1.tp_packets += st.stats1.tp_drops;
3420 data = &st.stats1;
3421 }
3422
3423 break;
3424 case PACKET_AUXDATA:
3425 val = po->auxdata;
3426 break;
3427 case PACKET_ORIGDEV:
3428 val = po->origdev;
3429 break;
3430 case PACKET_VNET_HDR:
3431 val = po->has_vnet_hdr;
3432 break;
3433 case PACKET_VERSION:
3434 val = po->tp_version;
3435 break;
3436 case PACKET_HDRLEN:
3437 if (len > sizeof(int))
3438 len = sizeof(int);
3439 if (copy_from_user(&val, optval, len))
3440 return -EFAULT;
3441 switch (val) {
3442 case TPACKET_V1:
3443 val = sizeof(struct tpacket_hdr);
3444 break;
3445 case TPACKET_V2:
3446 val = sizeof(struct tpacket2_hdr);
3447 break;
3448 case TPACKET_V3:
3449 val = sizeof(struct tpacket3_hdr);
3450 break;
3451 default:
3452 return -EINVAL;
3453 }
3454 break;
3455 case PACKET_RESERVE:
3456 val = po->tp_reserve;
3457 break;
3458 case PACKET_LOSS:
3459 val = po->tp_loss;
3460 break;
3461 case PACKET_TIMESTAMP:
3462 val = po->tp_tstamp;
3463 break;
3464 case PACKET_FANOUT:
3465 val = (po->fanout ?
3466 ((u32)po->fanout->id |
3467 ((u32)po->fanout->type << 16) |
3468 ((u32)po->fanout->flags << 24)) :
3469 0);
3470 break;
3471 case PACKET_TX_HAS_OFF:
3472 val = po->tp_tx_has_off;
3473 break;
3474 default:
3475 return -ENOPROTOOPT;
3476 }
3477
3478 if (len > lv)
3479 len = lv;
3480 if (put_user(len, optlen))
3481 return -EFAULT;
3482 if (copy_to_user(optval, data, len))
3483 return -EFAULT;
3484 return 0;
3485}
3486
3487
3488static int packet_notifier(struct notifier_block *this,
3489 unsigned long msg, void *ptr)
3490{
3491 struct sock *sk;
3492 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3493 struct net *net = dev_net(dev);
3494
3495 rcu_read_lock();
3496 sk_for_each_rcu(sk, &net->packet.sklist) {
3497 struct packet_sock *po = pkt_sk(sk);
3498
3499 switch (msg) {
3500 case NETDEV_UNREGISTER:
3501 if (po->mclist)
3502 packet_dev_mclist(dev, po->mclist, -1);
3503
3504
3505 case NETDEV_DOWN:
3506 if (dev->ifindex == po->ifindex) {
3507 spin_lock(&po->bind_lock);
3508 if (po->running) {
3509 __unregister_prot_hook(sk, false);
3510 sk->sk_err = ENETDOWN;
3511 if (!sock_flag(sk, SOCK_DEAD))
3512 sk->sk_error_report(sk);
3513 }
3514 if (msg == NETDEV_UNREGISTER) {
3515 packet_cached_dev_reset(po);
3516 po->ifindex = -1;
3517 if (po->prot_hook.dev)
3518 dev_put(po->prot_hook.dev);
3519 po->prot_hook.dev = NULL;
3520 }
3521 spin_unlock(&po->bind_lock);
3522 }
3523 break;
3524 case NETDEV_UP:
3525 if (dev->ifindex == po->ifindex) {
3526 spin_lock(&po->bind_lock);
3527 if (po->num)
3528 register_prot_hook(sk);
3529 spin_unlock(&po->bind_lock);
3530 }
3531 break;
3532 }
3533 }
3534 rcu_read_unlock();
3535 return NOTIFY_DONE;
3536}
3537
3538
3539static int packet_ioctl(struct socket *sock, unsigned int cmd,
3540 unsigned long arg)
3541{
3542 struct sock *sk = sock->sk;
3543
3544 switch (cmd) {
3545 case SIOCOUTQ:
3546 {
3547 int amount = sk_wmem_alloc_get(sk);
3548
3549 return put_user(amount, (int __user *)arg);
3550 }
3551 case SIOCINQ:
3552 {
3553 struct sk_buff *skb;
3554 int amount = 0;
3555
3556 spin_lock_bh(&sk->sk_receive_queue.lock);
3557 skb = skb_peek(&sk->sk_receive_queue);
3558 if (skb)
3559 amount = skb->len;
3560 spin_unlock_bh(&sk->sk_receive_queue.lock);
3561 return put_user(amount, (int __user *)arg);
3562 }
3563 case SIOCGSTAMP:
3564 return sock_get_timestamp(sk, (struct timeval __user *)arg);
3565 case SIOCGSTAMPNS:
3566 return sock_get_timestampns(sk, (struct timespec __user *)arg);
3567
3568#ifdef CONFIG_INET
3569 case SIOCADDRT:
3570 case SIOCDELRT:
3571 case SIOCDARP:
3572 case SIOCGARP:
3573 case SIOCSARP:
3574 case SIOCGIFADDR:
3575 case SIOCSIFADDR:
3576 case SIOCGIFBRDADDR:
3577 case SIOCSIFBRDADDR:
3578 case SIOCGIFNETMASK:
3579 case SIOCSIFNETMASK:
3580 case SIOCGIFDSTADDR:
3581 case SIOCSIFDSTADDR:
3582 case SIOCSIFFLAGS:
3583 return inet_dgram_ops.ioctl(sock, cmd, arg);
3584#endif
3585
3586 default:
3587 return -ENOIOCTLCMD;
3588 }
3589 return 0;
3590}
3591
3592static unsigned int packet_poll(struct file *file, struct socket *sock,
3593 poll_table *wait)
3594{
3595 struct sock *sk = sock->sk;
3596 struct packet_sock *po = pkt_sk(sk);
3597 unsigned int mask = datagram_poll(file, sock, wait);
3598
3599 spin_lock_bh(&sk->sk_receive_queue.lock);
3600 if (po->rx_ring.pg_vec) {
3601 if (!packet_previous_rx_frame(po, &po->rx_ring,
3602 TP_STATUS_KERNEL))
3603 mask |= POLLIN | POLLRDNORM;
3604 }
3605 spin_unlock_bh(&sk->sk_receive_queue.lock);
3606 spin_lock_bh(&sk->sk_write_queue.lock);
3607 if (po->tx_ring.pg_vec) {
3608 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
3609 mask |= POLLOUT | POLLWRNORM;
3610 }
3611 spin_unlock_bh(&sk->sk_write_queue.lock);
3612 return mask;
3613}
3614
3615
3616
3617
3618
3619
3620static void packet_mm_open(struct vm_area_struct *vma)
3621{
3622 struct file *file = vma->vm_file;
3623 struct socket *sock = file->private_data;
3624 struct sock *sk = sock->sk;
3625
3626 if (sk)
3627 atomic_inc(&pkt_sk(sk)->mapped);
3628}
3629
3630static void packet_mm_close(struct vm_area_struct *vma)
3631{
3632 struct file *file = vma->vm_file;
3633 struct socket *sock = file->private_data;
3634 struct sock *sk = sock->sk;
3635
3636 if (sk)
3637 atomic_dec(&pkt_sk(sk)->mapped);
3638}
3639
3640static const struct vm_operations_struct packet_mmap_ops = {
3641 .open = packet_mm_open,
3642 .close = packet_mm_close,
3643};
3644
3645static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
3646 unsigned int len)
3647{
3648 int i;
3649
3650 for (i = 0; i < len; i++) {
3651 if (likely(pg_vec[i].buffer)) {
3652 if (is_vmalloc_addr(pg_vec[i].buffer))
3653 vfree(pg_vec[i].buffer);
3654 else
3655 free_pages((unsigned long)pg_vec[i].buffer,
3656 order);
3657 pg_vec[i].buffer = NULL;
3658 }
3659 }
3660 kfree(pg_vec);
3661}
3662
3663static char *alloc_one_pg_vec_page(unsigned long order)
3664{
3665 char *buffer = NULL;
3666 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
3667 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
3668
3669 buffer = (char *) __get_free_pages(gfp_flags, order);
3670
3671 if (buffer)
3672 return buffer;
3673
3674
3675
3676
3677 buffer = vzalloc((1 << order) * PAGE_SIZE);
3678
3679 if (buffer)
3680 return buffer;
3681
3682
3683
3684
3685 gfp_flags &= ~__GFP_NORETRY;
3686 buffer = (char *)__get_free_pages(gfp_flags, order);
3687 if (buffer)
3688 return buffer;
3689
3690
3691
3692
3693 return NULL;
3694}
3695
3696static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
3697{
3698 unsigned int block_nr = req->tp_block_nr;
3699 struct pgv *pg_vec;
3700 int i;
3701
3702 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
3703 if (unlikely(!pg_vec))
3704 goto out;
3705
3706 for (i = 0; i < block_nr; i++) {
3707 pg_vec[i].buffer = alloc_one_pg_vec_page(order);
3708 if (unlikely(!pg_vec[i].buffer))
3709 goto out_free_pgvec;
3710 }
3711
3712out:
3713 return pg_vec;
3714
3715out_free_pgvec:
3716 free_pg_vec(pg_vec, order, block_nr);
3717 pg_vec = NULL;
3718 goto out;
3719}
3720
3721static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
3722 int closing, int tx_ring)
3723{
3724 struct pgv *pg_vec = NULL;
3725 struct packet_sock *po = pkt_sk(sk);
3726 int was_running, order = 0;
3727 struct packet_ring_buffer *rb;
3728 struct sk_buff_head *rb_queue;
3729 __be16 num;
3730 int err = -EINVAL;
3731
3732 struct tpacket_req *req = &req_u->req;
3733
3734 lock_sock(sk);
3735
3736 if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
3737 WARN(1, "Tx-ring is not supported.\n");
3738 goto out;
3739 }
3740
3741 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
3742 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
3743
3744 err = -EBUSY;
3745 if (!closing) {
3746 if (atomic_read(&po->mapped))
3747 goto out;
3748 if (packet_read_pending(rb))
3749 goto out;
3750 }
3751
3752 if (req->tp_block_nr) {
3753
3754 err = -EBUSY;
3755 if (unlikely(rb->pg_vec))
3756 goto out;
3757
3758 switch (po->tp_version) {
3759 case TPACKET_V1:
3760 po->tp_hdrlen = TPACKET_HDRLEN;
3761 break;
3762 case TPACKET_V2:
3763 po->tp_hdrlen = TPACKET2_HDRLEN;
3764 break;
3765 case TPACKET_V3:
3766 po->tp_hdrlen = TPACKET3_HDRLEN;
3767 break;
3768 }
3769
3770 err = -EINVAL;
3771 if (unlikely((int)req->tp_block_size <= 0))
3772 goto out;
3773 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
3774 goto out;
3775 if (po->tp_version >= TPACKET_V3 &&
3776 req->tp_block_size <=
3777 BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
3778 goto out;
3779 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
3780 po->tp_reserve))
3781 goto out;
3782 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
3783 goto out;
3784
3785 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
3786 if (unlikely(rb->frames_per_block <= 0))
3787 goto out;
3788 if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr))
3789 goto out;
3790 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
3791 req->tp_frame_nr))
3792 goto out;
3793
3794 err = -ENOMEM;
3795 order = get_order(req->tp_block_size);
3796 pg_vec = alloc_pg_vec(req, order);
3797 if (unlikely(!pg_vec))
3798 goto out;
3799 switch (po->tp_version) {
3800 case TPACKET_V3:
3801
3802
3803
3804 if (!tx_ring)
3805 init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
3806 break;
3807 default:
3808 break;
3809 }
3810 }
3811
3812 else {
3813 err = -EINVAL;
3814 if (unlikely(req->tp_frame_nr))
3815 goto out;
3816 }
3817
3818
3819
3820 spin_lock(&po->bind_lock);
3821 was_running = po->running;
3822 num = po->num;
3823 if (was_running) {
3824 po->num = 0;
3825 __unregister_prot_hook(sk, false);
3826 }
3827 spin_unlock(&po->bind_lock);
3828
3829 synchronize_net();
3830
3831 err = -EBUSY;
3832 mutex_lock(&po->pg_vec_lock);
3833 if (closing || atomic_read(&po->mapped) == 0) {
3834 err = 0;
3835 spin_lock_bh(&rb_queue->lock);
3836 swap(rb->pg_vec, pg_vec);
3837 rb->frame_max = (req->tp_frame_nr - 1);
3838 rb->head = 0;
3839 rb->frame_size = req->tp_frame_size;
3840 spin_unlock_bh(&rb_queue->lock);
3841
3842 swap(rb->pg_vec_order, order);
3843 swap(rb->pg_vec_len, req->tp_block_nr);
3844
3845 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
3846 po->prot_hook.func = (po->rx_ring.pg_vec) ?
3847 tpacket_rcv : packet_rcv;
3848 skb_queue_purge(rb_queue);
3849 if (atomic_read(&po->mapped))
3850 pr_err("packet_mmap: vma is busy: %d\n",
3851 atomic_read(&po->mapped));
3852 }
3853 mutex_unlock(&po->pg_vec_lock);
3854
3855 spin_lock(&po->bind_lock);
3856 if (was_running) {
3857 po->num = num;
3858 register_prot_hook(sk);
3859 }
3860 spin_unlock(&po->bind_lock);
3861 if (closing && (po->tp_version > TPACKET_V2)) {
3862
3863 if (!tx_ring)
3864 prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
3865 }
3866
3867 if (pg_vec)
3868 free_pg_vec(pg_vec, order, req->tp_block_nr);
3869out:
3870 release_sock(sk);
3871 return err;
3872}
3873
3874static int packet_mmap(struct file *file, struct socket *sock,
3875 struct vm_area_struct *vma)
3876{
3877 struct sock *sk = sock->sk;
3878 struct packet_sock *po = pkt_sk(sk);
3879 unsigned long size, expected_size;
3880 struct packet_ring_buffer *rb;
3881 unsigned long start;
3882 int err = -EINVAL;
3883 int i;
3884
3885 if (vma->vm_pgoff)
3886 return -EINVAL;
3887
3888 mutex_lock(&po->pg_vec_lock);
3889
3890 expected_size = 0;
3891 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3892 if (rb->pg_vec) {
3893 expected_size += rb->pg_vec_len
3894 * rb->pg_vec_pages
3895 * PAGE_SIZE;
3896 }
3897 }
3898
3899 if (expected_size == 0)
3900 goto out;
3901
3902 size = vma->vm_end - vma->vm_start;
3903 if (size != expected_size)
3904 goto out;
3905
3906 start = vma->vm_start;
3907 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
3908 if (rb->pg_vec == NULL)
3909 continue;
3910
3911 for (i = 0; i < rb->pg_vec_len; i++) {
3912 struct page *page;
3913 void *kaddr = rb->pg_vec[i].buffer;
3914 int pg_num;
3915
3916 for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
3917 page = pgv_to_page(kaddr);
3918 err = vm_insert_page(vma, start, page);
3919 if (unlikely(err))
3920 goto out;
3921 start += PAGE_SIZE;
3922 kaddr += PAGE_SIZE;
3923 }
3924 }
3925 }
3926
3927 atomic_inc(&po->mapped);
3928 vma->vm_ops = &packet_mmap_ops;
3929 err = 0;
3930
3931out:
3932 mutex_unlock(&po->pg_vec_lock);
3933 return err;
3934}
3935
3936static const struct proto_ops packet_ops_spkt = {
3937 .family = PF_PACKET,
3938 .owner = THIS_MODULE,
3939 .release = packet_release,
3940 .bind = packet_bind_spkt,
3941 .connect = sock_no_connect,
3942 .socketpair = sock_no_socketpair,
3943 .accept = sock_no_accept,
3944 .getname = packet_getname_spkt,
3945 .poll = datagram_poll,
3946 .ioctl = packet_ioctl,
3947 .listen = sock_no_listen,
3948 .shutdown = sock_no_shutdown,
3949 .setsockopt = sock_no_setsockopt,
3950 .getsockopt = sock_no_getsockopt,
3951 .sendmsg = packet_sendmsg_spkt,
3952 .recvmsg = packet_recvmsg,
3953 .mmap = sock_no_mmap,
3954 .sendpage = sock_no_sendpage,
3955};
3956
3957static const struct proto_ops packet_ops = {
3958 .family = PF_PACKET,
3959 .owner = THIS_MODULE,
3960 .release = packet_release,
3961 .bind = packet_bind,
3962 .connect = sock_no_connect,
3963 .socketpair = sock_no_socketpair,
3964 .accept = sock_no_accept,
3965 .getname = packet_getname,
3966 .poll = packet_poll,
3967 .ioctl = packet_ioctl,
3968 .listen = sock_no_listen,
3969 .shutdown = sock_no_shutdown,
3970 .setsockopt = packet_setsockopt,
3971 .getsockopt = packet_getsockopt,
3972 .sendmsg = packet_sendmsg,
3973 .recvmsg = packet_recvmsg,
3974 .mmap = packet_mmap,
3975 .sendpage = sock_no_sendpage,
3976};
3977
3978static const struct net_proto_family packet_family_ops = {
3979 .family = PF_PACKET,
3980 .create = packet_create,
3981 .owner = THIS_MODULE,
3982};
3983
3984static struct notifier_block packet_netdev_notifier = {
3985 .notifier_call = packet_notifier,
3986};
3987
3988#ifdef CONFIG_PROC_FS
3989
3990static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
3991 __acquires(RCU)
3992{
3993 struct net *net = seq_file_net(seq);
3994
3995 rcu_read_lock();
3996 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
3997}
3998
3999static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4000{
4001 struct net *net = seq_file_net(seq);
4002 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
4003}
4004
4005static void packet_seq_stop(struct seq_file *seq, void *v)
4006 __releases(RCU)
4007{
4008 rcu_read_unlock();
4009}
4010
4011static int packet_seq_show(struct seq_file *seq, void *v)
4012{
4013 if (v == SEQ_START_TOKEN)
4014 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
4015 else {
4016 struct sock *s = sk_entry(v);
4017 const struct packet_sock *po = pkt_sk(s);
4018
4019 seq_printf(seq,
4020 "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
4021 s,
4022 atomic_read(&s->sk_refcnt),
4023 s->sk_type,
4024 ntohs(po->num),
4025 po->ifindex,
4026 po->running,
4027 atomic_read(&s->sk_rmem_alloc),
4028 from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
4029 sock_i_ino(s));
4030 }
4031
4032 return 0;
4033}
4034
4035static const struct seq_operations packet_seq_ops = {
4036 .start = packet_seq_start,
4037 .next = packet_seq_next,
4038 .stop = packet_seq_stop,
4039 .show = packet_seq_show,
4040};
4041
4042static int packet_seq_open(struct inode *inode, struct file *file)
4043{
4044 return seq_open_net(inode, file, &packet_seq_ops,
4045 sizeof(struct seq_net_private));
4046}
4047
4048static const struct file_operations packet_seq_fops = {
4049 .owner = THIS_MODULE,
4050 .open = packet_seq_open,
4051 .read = seq_read,
4052 .llseek = seq_lseek,
4053 .release = seq_release_net,
4054};
4055
4056#endif
4057
4058static int __net_init packet_net_init(struct net *net)
4059{
4060 mutex_init(&net->packet.sklist_lock);
4061 INIT_HLIST_HEAD(&net->packet.sklist);
4062
4063 if (!proc_create("packet", 0, net->proc_net, &packet_seq_fops))
4064 return -ENOMEM;
4065
4066 return 0;
4067}
4068
4069static void __net_exit packet_net_exit(struct net *net)
4070{
4071 remove_proc_entry("packet", net->proc_net);
4072}
4073
4074static struct pernet_operations packet_net_ops = {
4075 .init = packet_net_init,
4076 .exit = packet_net_exit,
4077};
4078
4079
4080static void __exit packet_exit(void)
4081{
4082 unregister_netdevice_notifier_rh(&packet_netdev_notifier);
4083 unregister_pernet_subsys(&packet_net_ops);
4084 sock_unregister(PF_PACKET);
4085 proto_unregister(&packet_proto);
4086}
4087
4088static int __init packet_init(void)
4089{
4090 int rc = proto_register(&packet_proto, 0);
4091
4092 if (rc != 0)
4093 goto out;
4094
4095 sock_register(&packet_family_ops);
4096 register_pernet_subsys(&packet_net_ops);
4097 register_netdevice_notifier_rh(&packet_netdev_notifier);
4098out:
4099 return rc;
4100}
4101
4102module_init(packet_init);
4103module_exit(packet_exit);
4104MODULE_LICENSE("GPL");
4105MODULE_ALIAS_NETPROTO(PF_PACKET);
4106