1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
38
39#define DRV_NAME "tun"
40#define DRV_VERSION "1.6"
41#define DRV_DESCRIPTION "Universal TUN/TAP device driver"
42#define DRV_COPYRIGHT "(C) 1999-2004 Max Krasnyansky <maxk@qualcomm.com>"
43
44#include <linux/module.h>
45#include <linux/errno.h>
46#include <linux/kernel.h>
47#include <linux/sched/signal.h>
48#include <linux/major.h>
49#include <linux/slab.h>
50#include <linux/poll.h>
51#include <linux/fcntl.h>
52#include <linux/init.h>
53#include <linux/skbuff.h>
54#include <linux/netdevice.h>
55#include <linux/etherdevice.h>
56#include <linux/miscdevice.h>
57#include <linux/ethtool.h>
58#include <linux/rtnetlink.h>
59#include <linux/compat.h>
60#include <linux/if.h>
61#include <linux/if_arp.h>
62#include <linux/if_ether.h>
63#include <linux/if_tun.h>
64#include <linux/if_vlan.h>
65#include <linux/crc32.h>
66#include <linux/nsproxy.h>
67#include <linux/virtio_net.h>
68#include <linux/rcupdate.h>
69#include <net/net_namespace.h>
70#include <net/netns/generic.h>
71#include <net/rtnetlink.h>
72#include <net/sock.h>
73#include <net/xdp.h>
74#include <linux/seq_file.h>
75#include <linux/uio.h>
76#include <linux/skb_array.h>
77#include <linux/bpf.h>
78#include <linux/bpf_trace.h>
79#include <linux/mutex.h>
80
81#include <linux/uaccess.h>
82#include <linux/proc_fs.h>
83
84static void tun_default_link_ksettings(struct net_device *dev,
85 struct ethtool_link_ksettings *cmd);
86
87
88
89
90#ifdef TUN_DEBUG
91static int debug;
92
93#define tun_debug(level, tun, fmt, args...) \
94do { \
95 if (tun->debug) \
96 netdev_printk(level, tun->dev, fmt, ##args); \
97} while (0)
98#define DBG1(level, fmt, args...) \
99do { \
100 if (debug == 2) \
101 printk(level fmt, ##args); \
102} while (0)
103#else
104#define tun_debug(level, tun, fmt, args...) \
105do { \
106 if (0) \
107 netdev_printk(level, tun->dev, fmt, ##args); \
108} while (0)
109#define DBG1(level, fmt, args...) \
110do { \
111 if (0) \
112 printk(level fmt, ##args); \
113} while (0)
114#endif
115
116#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
117
118
119
120
121
122
123#define TUN_FASYNC IFF_ATTACH_QUEUE
124
125#define TUN_VNET_LE 0x80000000
126#define TUN_VNET_BE 0x40000000
127
128#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
129 IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
130
131#define GOODCOPY_LEN 128
132
133#define FLT_EXACT_COUNT 8
134struct tap_filter {
135 unsigned int count;
136 u32 mask[2];
137 unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN];
138};
139
140
141
142#define MAX_TAP_QUEUES 256
143#define MAX_TAP_FLOWS 4096
144
145#define TUN_FLOW_EXPIRE (3 * HZ)
146
147struct tun_pcpu_stats {
148 u64 rx_packets;
149 u64 rx_bytes;
150 u64 tx_packets;
151 u64 tx_bytes;
152 struct u64_stats_sync syncp;
153 u32 rx_dropped;
154 u32 tx_dropped;
155 u32 rx_frame_errors;
156};
157
158
159
160
161
162
163
164
165
166
167
168
169struct tun_file {
170 struct sock sk;
171 struct socket socket;
172 struct socket_wq wq;
173 struct tun_struct __rcu *tun;
174 struct fasync_struct *fasync;
175
176 unsigned int flags;
177 union {
178 u16 queue_index;
179 unsigned int ifindex;
180 };
181 struct napi_struct napi;
182 bool napi_enabled;
183 bool napi_frags_enabled;
184 struct mutex napi_mutex;
185 struct list_head next;
186 struct tun_struct *detached;
187 struct ptr_ring tx_ring;
188 struct xdp_rxq_info xdp_rxq;
189};
190
191struct tun_page {
192 struct page *page;
193 int count;
194};
195
196struct tun_flow_entry {
197 struct hlist_node hash_link;
198 struct rcu_head rcu;
199 struct tun_struct *tun;
200
201 u32 rxhash;
202 u32 rps_rxhash;
203 int queue_index;
204 unsigned long updated;
205};
206
207#define TUN_NUM_FLOW_ENTRIES 1024
208
209struct tun_prog {
210 struct rcu_head rcu;
211 struct bpf_prog *prog;
212};
213
214
215
216
217
218struct tun_struct {
219 struct tun_file __rcu *tfiles[MAX_TAP_QUEUES];
220 unsigned int numqueues;
221 unsigned int flags;
222 kuid_t owner;
223 kgid_t group;
224
225 struct net_device *dev;
226 netdev_features_t set_features;
227#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
228 NETIF_F_TSO6)
229
230 int align;
231 int vnet_hdr_sz;
232 int sndbuf;
233 struct tap_filter txflt;
234 struct sock_fprog fprog;
235
236 bool filter_attached;
237#ifdef TUN_DEBUG
238 int debug;
239#endif
240 spinlock_t lock;
241 struct hlist_head flows[TUN_NUM_FLOW_ENTRIES];
242 struct timer_list flow_gc_timer;
243 unsigned long ageing_time;
244 unsigned int numdisabled;
245 struct list_head disabled;
246 void *security;
247 u32 flow_count;
248 u32 rx_batched;
249 struct tun_pcpu_stats __percpu *pcpu_stats;
250 struct bpf_prog __rcu *xdp_prog;
251 struct tun_prog __rcu *steering_prog;
252 struct tun_prog __rcu *filter_prog;
253 struct ethtool_link_ksettings link_ksettings;
254};
255
256struct veth {
257 __be16 h_vlan_proto;
258 __be16 h_vlan_TCI;
259};
260
261bool tun_is_xdp_frame(void *ptr)
262{
263 return (unsigned long)ptr & TUN_XDP_FLAG;
264}
265EXPORT_SYMBOL(tun_is_xdp_frame);
266
267void *tun_xdp_to_ptr(void *ptr)
268{
269 return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
270}
271EXPORT_SYMBOL(tun_xdp_to_ptr);
272
273void *tun_ptr_to_xdp(void *ptr)
274{
275 return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
276}
277EXPORT_SYMBOL(tun_ptr_to_xdp);
278
279static int tun_napi_receive(struct napi_struct *napi, int budget)
280{
281 struct tun_file *tfile = container_of(napi, struct tun_file, napi);
282 struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
283 struct sk_buff_head process_queue;
284 struct sk_buff *skb;
285 int received = 0;
286
287 __skb_queue_head_init(&process_queue);
288
289 spin_lock(&queue->lock);
290 skb_queue_splice_tail_init(queue, &process_queue);
291 spin_unlock(&queue->lock);
292
293 while (received < budget && (skb = __skb_dequeue(&process_queue))) {
294 napi_gro_receive(napi, skb);
295 ++received;
296 }
297
298 if (!skb_queue_empty(&process_queue)) {
299 spin_lock(&queue->lock);
300 skb_queue_splice(&process_queue, queue);
301 spin_unlock(&queue->lock);
302 }
303
304 return received;
305}
306
307static int tun_napi_poll(struct napi_struct *napi, int budget)
308{
309 unsigned int received;
310
311 received = tun_napi_receive(napi, budget);
312
313 if (received < budget)
314 napi_complete_done(napi, received);
315
316 return received;
317}
318
319static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
320 bool napi_en, bool napi_frags)
321{
322 tfile->napi_enabled = napi_en;
323 tfile->napi_frags_enabled = napi_en && napi_frags;
324 if (napi_en) {
325 netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
326 NAPI_POLL_WEIGHT);
327 napi_enable(&tfile->napi);
328 }
329}
330
331static void tun_napi_disable(struct tun_file *tfile)
332{
333 if (tfile->napi_enabled)
334 napi_disable(&tfile->napi);
335}
336
337static void tun_napi_del(struct tun_file *tfile)
338{
339 if (tfile->napi_enabled)
340 netif_napi_del(&tfile->napi);
341}
342
343static bool tun_napi_frags_enabled(const struct tun_file *tfile)
344{
345 return tfile->napi_frags_enabled;
346}
347
348#ifdef CONFIG_TUN_VNET_CROSS_LE
349static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
350{
351 return tun->flags & TUN_VNET_BE ? false :
352 virtio_legacy_is_little_endian();
353}
354
355static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
356{
357 int be = !!(tun->flags & TUN_VNET_BE);
358
359 if (put_user(be, argp))
360 return -EFAULT;
361
362 return 0;
363}
364
365static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
366{
367 int be;
368
369 if (get_user(be, argp))
370 return -EFAULT;
371
372 if (be)
373 tun->flags |= TUN_VNET_BE;
374 else
375 tun->flags &= ~TUN_VNET_BE;
376
377 return 0;
378}
379#else
380static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
381{
382 return virtio_legacy_is_little_endian();
383}
384
385static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
386{
387 return -EINVAL;
388}
389
390static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
391{
392 return -EINVAL;
393}
394#endif
395
396static inline bool tun_is_little_endian(struct tun_struct *tun)
397{
398 return tun->flags & TUN_VNET_LE ||
399 tun_legacy_is_little_endian(tun);
400}
401
402static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
403{
404 return __virtio16_to_cpu(tun_is_little_endian(tun), val);
405}
406
407static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
408{
409 return __cpu_to_virtio16(tun_is_little_endian(tun), val);
410}
411
412static inline u32 tun_hashfn(u32 rxhash)
413{
414 return rxhash & 0x3ff;
415}
416
417static struct tun_flow_entry *tun_flow_find(struct hlist_head *head, u32 rxhash)
418{
419 struct tun_flow_entry *e;
420
421 hlist_for_each_entry_rcu(e, head, hash_link) {
422 if (e->rxhash == rxhash)
423 return e;
424 }
425 return NULL;
426}
427
428static struct tun_flow_entry *tun_flow_create(struct tun_struct *tun,
429 struct hlist_head *head,
430 u32 rxhash, u16 queue_index)
431{
432 struct tun_flow_entry *e = kmalloc(sizeof(*e), GFP_ATOMIC);
433
434 if (e) {
435 tun_debug(KERN_INFO, tun, "create flow: hash %u index %u\n",
436 rxhash, queue_index);
437 e->updated = jiffies;
438 e->rxhash = rxhash;
439 e->rps_rxhash = 0;
440 e->queue_index = queue_index;
441 e->tun = tun;
442 hlist_add_head_rcu(&e->hash_link, head);
443 ++tun->flow_count;
444 }
445 return e;
446}
447
448static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
449{
450 tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
451 e->rxhash, e->queue_index);
452 hlist_del_rcu(&e->hash_link);
453 kfree_rcu(e, rcu);
454 --tun->flow_count;
455}
456
457static void tun_flow_flush(struct tun_struct *tun)
458{
459 int i;
460
461 spin_lock_bh(&tun->lock);
462 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
463 struct tun_flow_entry *e;
464 struct hlist_node *n;
465
466 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link)
467 tun_flow_delete(tun, e);
468 }
469 spin_unlock_bh(&tun->lock);
470}
471
472static void tun_flow_delete_by_queue(struct tun_struct *tun, u16 queue_index)
473{
474 int i;
475
476 spin_lock_bh(&tun->lock);
477 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
478 struct tun_flow_entry *e;
479 struct hlist_node *n;
480
481 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
482 if (e->queue_index == queue_index)
483 tun_flow_delete(tun, e);
484 }
485 }
486 spin_unlock_bh(&tun->lock);
487}
488
489static void tun_flow_cleanup(struct timer_list *t)
490{
491 struct tun_struct *tun = from_timer(tun, t, flow_gc_timer);
492 unsigned long delay = tun->ageing_time;
493 unsigned long next_timer = jiffies + delay;
494 unsigned long count = 0;
495 int i;
496
497 tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
498
499 spin_lock(&tun->lock);
500 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
501 struct tun_flow_entry *e;
502 struct hlist_node *n;
503
504 hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
505 unsigned long this_timer;
506
507 this_timer = e->updated + delay;
508 if (time_before_eq(this_timer, jiffies)) {
509 tun_flow_delete(tun, e);
510 continue;
511 }
512 count++;
513 if (time_before(this_timer, next_timer))
514 next_timer = this_timer;
515 }
516 }
517
518 if (count)
519 mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
520 spin_unlock(&tun->lock);
521}
522
523static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
524 struct tun_file *tfile)
525{
526 struct hlist_head *head;
527 struct tun_flow_entry *e;
528 unsigned long delay = tun->ageing_time;
529 u16 queue_index = tfile->queue_index;
530
531 head = &tun->flows[tun_hashfn(rxhash)];
532
533 rcu_read_lock();
534
535 e = tun_flow_find(head, rxhash);
536 if (likely(e)) {
537
538 e->queue_index = queue_index;
539 e->updated = jiffies;
540 sock_rps_record_flow_hash(e->rps_rxhash);
541 } else {
542 spin_lock_bh(&tun->lock);
543 if (!tun_flow_find(head, rxhash) &&
544 tun->flow_count < MAX_TAP_FLOWS)
545 tun_flow_create(tun, head, rxhash, queue_index);
546
547 if (!timer_pending(&tun->flow_gc_timer))
548 mod_timer(&tun->flow_gc_timer,
549 round_jiffies_up(jiffies + delay));
550 spin_unlock_bh(&tun->lock);
551 }
552
553 rcu_read_unlock();
554}
555
556
557
558
559
560static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
561{
562 if (unlikely(e->rps_rxhash != hash))
563 e->rps_rxhash = hash;
564}
565
566
567
568
569
570
571
572static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
573{
574 struct tun_flow_entry *e;
575 u32 txq = 0;
576 u32 numqueues = 0;
577
578 numqueues = READ_ONCE(tun->numqueues);
579
580 txq = __skb_get_hash_symmetric(skb);
581 e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
582 if (e) {
583 tun_flow_save_rps_rxhash(e, txq);
584 txq = e->queue_index;
585 } else {
586
587 txq = ((u64)txq * numqueues) >> 32;
588 }
589
590 return txq;
591}
592
593static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
594{
595 struct tun_prog *prog;
596 u32 numqueues;
597 u16 ret = 0;
598
599 numqueues = READ_ONCE(tun->numqueues);
600 if (!numqueues)
601 return 0;
602
603 prog = rcu_dereference(tun->steering_prog);
604 if (prog)
605 ret = bpf_prog_run_clear_cb(prog->prog, skb);
606
607 return ret % numqueues;
608}
609
610static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
611 struct net_device *sb_dev,
612 select_queue_fallback_t fallback)
613{
614 struct tun_struct *tun = netdev_priv(dev);
615 u16 ret;
616
617 rcu_read_lock();
618 if (rcu_dereference(tun->steering_prog))
619 ret = tun_ebpf_select_queue(tun, skb);
620 else
621 ret = tun_automq_select_queue(tun, skb);
622 rcu_read_unlock();
623
624 return ret;
625}
626
627static inline bool tun_not_capable(struct tun_struct *tun)
628{
629 const struct cred *cred = current_cred();
630 struct net *net = dev_net(tun->dev);
631
632 return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) ||
633 (gid_valid(tun->group) && !in_egroup_p(tun->group))) &&
634 !ns_capable(net->user_ns, CAP_NET_ADMIN);
635}
636
637static void tun_set_real_num_queues(struct tun_struct *tun)
638{
639 netif_set_real_num_tx_queues(tun->dev, tun->numqueues);
640 netif_set_real_num_rx_queues(tun->dev, tun->numqueues);
641}
642
643static void tun_disable_queue(struct tun_struct *tun, struct tun_file *tfile)
644{
645 tfile->detached = tun;
646 list_add_tail(&tfile->next, &tun->disabled);
647 ++tun->numdisabled;
648}
649
650static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
651{
652 struct tun_struct *tun = tfile->detached;
653
654 tfile->detached = NULL;
655 list_del_init(&tfile->next);
656 --tun->numdisabled;
657 return tun;
658}
659
660void tun_ptr_free(void *ptr)
661{
662 if (!ptr)
663 return;
664 if (tun_is_xdp_frame(ptr)) {
665 struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
666
667 xdp_return_frame(xdpf);
668 } else {
669 __skb_array_destroy_skb(ptr);
670 }
671}
672EXPORT_SYMBOL_GPL(tun_ptr_free);
673
674static void tun_queue_purge(struct tun_file *tfile)
675{
676 void *ptr;
677
678 while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
679 tun_ptr_free(ptr);
680
681 skb_queue_purge(&tfile->sk.sk_write_queue);
682 skb_queue_purge(&tfile->sk.sk_error_queue);
683}
684
685static void __tun_detach(struct tun_file *tfile, bool clean)
686{
687 struct tun_file *ntfile;
688 struct tun_struct *tun;
689
690 tun = rtnl_dereference(tfile->tun);
691
692 if (tun && clean) {
693 tun_napi_disable(tfile);
694 tun_napi_del(tfile);
695 }
696
697 if (tun && !tfile->detached) {
698 u16 index = tfile->queue_index;
699 BUG_ON(index >= tun->numqueues);
700
701 rcu_assign_pointer(tun->tfiles[index],
702 tun->tfiles[tun->numqueues - 1]);
703 ntfile = rtnl_dereference(tun->tfiles[index]);
704 ntfile->queue_index = index;
705 rcu_assign_pointer(tun->tfiles[tun->numqueues - 1],
706 NULL);
707
708 --tun->numqueues;
709 if (clean) {
710 RCU_INIT_POINTER(tfile->tun, NULL);
711 sock_put(&tfile->sk);
712 } else
713 tun_disable_queue(tun, tfile);
714
715 synchronize_net();
716 tun_flow_delete_by_queue(tun, tun->numqueues + 1);
717
718 tun_queue_purge(tfile);
719 tun_set_real_num_queues(tun);
720 } else if (tfile->detached && clean) {
721 tun = tun_enable_queue(tfile);
722 sock_put(&tfile->sk);
723 }
724
725 if (clean) {
726 if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
727 netif_carrier_off(tun->dev);
728
729 if (!(tun->flags & IFF_PERSIST) &&
730 tun->dev->reg_state == NETREG_REGISTERED)
731 unregister_netdevice(tun->dev);
732 }
733 if (tun)
734 xdp_rxq_info_unreg(&tfile->xdp_rxq);
735 ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
736 sock_put(&tfile->sk);
737 }
738}
739
740static void tun_detach(struct tun_file *tfile, bool clean)
741{
742 struct tun_struct *tun;
743 struct net_device *dev;
744
745 rtnl_lock();
746 tun = rtnl_dereference(tfile->tun);
747 dev = tun ? tun->dev : NULL;
748 __tun_detach(tfile, clean);
749 if (dev)
750 netdev_state_change(dev);
751 rtnl_unlock();
752}
753
754static void tun_detach_all(struct net_device *dev)
755{
756 struct tun_struct *tun = netdev_priv(dev);
757 struct tun_file *tfile, *tmp;
758 int i, n = tun->numqueues;
759
760 for (i = 0; i < n; i++) {
761 tfile = rtnl_dereference(tun->tfiles[i]);
762 BUG_ON(!tfile);
763 tun_napi_disable(tfile);
764 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
765 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
766 RCU_INIT_POINTER(tfile->tun, NULL);
767 --tun->numqueues;
768 }
769 list_for_each_entry(tfile, &tun->disabled, next) {
770 tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
771 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
772 RCU_INIT_POINTER(tfile->tun, NULL);
773 }
774 BUG_ON(tun->numqueues != 0);
775
776 synchronize_net();
777 for (i = 0; i < n; i++) {
778 tfile = rtnl_dereference(tun->tfiles[i]);
779 tun_napi_del(tfile);
780
781 tun_queue_purge(tfile);
782 xdp_rxq_info_unreg(&tfile->xdp_rxq);
783 sock_put(&tfile->sk);
784 }
785 list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
786 tun_enable_queue(tfile);
787 tun_queue_purge(tfile);
788 xdp_rxq_info_unreg(&tfile->xdp_rxq);
789 sock_put(&tfile->sk);
790 }
791 BUG_ON(tun->numdisabled != 0);
792
793 if (tun->flags & IFF_PERSIST)
794 module_put(THIS_MODULE);
795}
796
797static int tun_attach(struct tun_struct *tun, struct file *file,
798 bool skip_filter, bool napi, bool napi_frags)
799{
800 struct tun_file *tfile = file->private_data;
801 struct net_device *dev = tun->dev;
802 int err;
803
804 err = security_tun_dev_attach(tfile->socket.sk, tun->security);
805 if (err < 0)
806 goto out;
807
808 err = -EINVAL;
809 if (rtnl_dereference(tfile->tun) && !tfile->detached)
810 goto out;
811
812 err = -EBUSY;
813 if (!(tun->flags & IFF_MULTI_QUEUE) && tun->numqueues == 1)
814 goto out;
815
816 err = -E2BIG;
817 if (!tfile->detached &&
818 tun->numqueues + tun->numdisabled == MAX_TAP_QUEUES)
819 goto out;
820
821 err = 0;
822
823
824 if (!skip_filter && (tun->filter_attached == true)) {
825 lock_sock(tfile->socket.sk);
826 err = sk_attach_filter(&tun->fprog, tfile->socket.sk);
827 release_sock(tfile->socket.sk);
828 if (!err)
829 goto out;
830 }
831
832 if (!tfile->detached &&
833 ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len,
834 GFP_KERNEL, tun_ptr_free)) {
835 err = -ENOMEM;
836 goto out;
837 }
838
839 tfile->queue_index = tun->numqueues;
840 tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
841
842 if (tfile->detached) {
843
844 WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
845
846 if (tfile->xdp_rxq.queue_index != tfile->queue_index)
847 tfile->xdp_rxq.queue_index = tfile->queue_index;
848 } else {
849
850 err = xdp_rxq_info_reg(&tfile->xdp_rxq,
851 tun->dev, tfile->queue_index);
852 if (err < 0)
853 goto out;
854 err = xdp_rxq_info_reg_mem_model(&tfile->xdp_rxq,
855 MEM_TYPE_PAGE_SHARED, NULL);
856 if (err < 0) {
857 xdp_rxq_info_unreg(&tfile->xdp_rxq);
858 goto out;
859 }
860 err = 0;
861 }
862
863 if (tfile->detached) {
864 tun_enable_queue(tfile);
865 } else {
866 sock_hold(&tfile->sk);
867 tun_napi_init(tun, tfile, napi, napi_frags);
868 }
869
870 if (rtnl_dereference(tun->xdp_prog))
871 sock_set_flag(&tfile->sk, SOCK_XDP);
872
873
874
875
876
877
878
879
880
881 rcu_assign_pointer(tfile->tun, tun);
882 rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
883 tun->numqueues++;
884 tun_set_real_num_queues(tun);
885out:
886 return err;
887}
888
889static struct tun_struct *tun_get(struct tun_file *tfile)
890{
891 struct tun_struct *tun;
892
893 rcu_read_lock();
894 tun = rcu_dereference(tfile->tun);
895 if (tun)
896 dev_hold(tun->dev);
897 rcu_read_unlock();
898
899 return tun;
900}
901
902static void tun_put(struct tun_struct *tun)
903{
904 dev_put(tun->dev);
905}
906
907
908static void addr_hash_set(u32 *mask, const u8 *addr)
909{
910 int n = ether_crc(ETH_ALEN, addr) >> 26;
911 mask[n >> 5] |= (1 << (n & 31));
912}
913
914static unsigned int addr_hash_test(const u32 *mask, const u8 *addr)
915{
916 int n = ether_crc(ETH_ALEN, addr) >> 26;
917 return mask[n >> 5] & (1 << (n & 31));
918}
919
920static int update_filter(struct tap_filter *filter, void __user *arg)
921{
922 struct { u8 u[ETH_ALEN]; } *addr;
923 struct tun_filter uf;
924 int err, alen, n, nexact;
925
926 if (copy_from_user(&uf, arg, sizeof(uf)))
927 return -EFAULT;
928
929 if (!uf.count) {
930
931 filter->count = 0;
932 return 0;
933 }
934
935 alen = ETH_ALEN * uf.count;
936 addr = memdup_user(arg + sizeof(uf), alen);
937 if (IS_ERR(addr))
938 return PTR_ERR(addr);
939
940
941
942
943 filter->count = 0;
944 wmb();
945
946
947 for (n = 0; n < uf.count && n < FLT_EXACT_COUNT; n++)
948 memcpy(filter->addr[n], addr[n].u, ETH_ALEN);
949
950 nexact = n;
951
952
953
954 memset(filter->mask, 0, sizeof(filter->mask));
955 for (; n < uf.count; n++) {
956 if (!is_multicast_ether_addr(addr[n].u)) {
957 err = 0;
958 goto free_addr;
959 }
960 addr_hash_set(filter->mask, addr[n].u);
961 }
962
963
964
965 if ((uf.flags & TUN_FLT_ALLMULTI))
966 memset(filter->mask, ~0, sizeof(filter->mask));
967
968
969 wmb();
970 filter->count = nexact;
971
972
973 err = nexact;
974free_addr:
975 kfree(addr);
976 return err;
977}
978
979
980static int run_filter(struct tap_filter *filter, const struct sk_buff *skb)
981{
982
983
984 struct ethhdr *eh = (struct ethhdr *) skb->data;
985 int i;
986
987
988 for (i = 0; i < filter->count; i++)
989 if (ether_addr_equal(eh->h_dest, filter->addr[i]))
990 return 1;
991
992
993 if (is_multicast_ether_addr(eh->h_dest))
994 return addr_hash_test(filter->mask, eh->h_dest);
995
996 return 0;
997}
998
999
1000
1001
1002
1003static int check_filter(struct tap_filter *filter, const struct sk_buff *skb)
1004{
1005 if (!filter->count)
1006 return 1;
1007
1008 return run_filter(filter, skb);
1009}
1010
1011
1012
1013static const struct ethtool_ops tun_ethtool_ops;
1014
1015
1016static void tun_net_uninit(struct net_device *dev)
1017{
1018 tun_detach_all(dev);
1019}
1020
1021
1022static int tun_net_open(struct net_device *dev)
1023{
1024 netif_tx_start_all_queues(dev);
1025
1026 return 0;
1027}
1028
1029
1030static int tun_net_close(struct net_device *dev)
1031{
1032 netif_tx_stop_all_queues(dev);
1033 return 0;
1034}
1035
1036
1037static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
1038{
1039#ifdef CONFIG_RPS
1040 if (tun->numqueues == 1 && static_branch_unlikely(&rps_needed)) {
1041
1042
1043
1044 struct tun_flow_entry *e;
1045 __u32 rxhash;
1046
1047 rxhash = __skb_get_hash_symmetric(skb);
1048 e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)], rxhash);
1049 if (e)
1050 tun_flow_save_rps_rxhash(e, rxhash);
1051 }
1052#endif
1053}
1054
1055static unsigned int run_ebpf_filter(struct tun_struct *tun,
1056 struct sk_buff *skb,
1057 int len)
1058{
1059 struct tun_prog *prog = rcu_dereference(tun->filter_prog);
1060
1061 if (prog)
1062 len = bpf_prog_run_clear_cb(prog->prog, skb);
1063
1064 return len;
1065}
1066
1067
1068static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
1069{
1070 struct tun_struct *tun = netdev_priv(dev);
1071 int txq = skb->queue_mapping;
1072 struct tun_file *tfile;
1073 int len = skb->len;
1074
1075 rcu_read_lock();
1076 tfile = rcu_dereference(tun->tfiles[txq]);
1077
1078
1079 if (!tfile)
1080 goto drop;
1081
1082 if (!rcu_dereference(tun->steering_prog))
1083 tun_automq_xmit(tun, skb);
1084
1085 tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
1086
1087 BUG_ON(!tfile);
1088
1089
1090
1091
1092 if (!check_filter(&tun->txflt, skb))
1093 goto drop;
1094
1095 if (tfile->socket.sk->sk_filter &&
1096 sk_filter(tfile->socket.sk, skb))
1097 goto drop;
1098
1099 len = run_ebpf_filter(tun, skb, len);
1100 if (len == 0 || pskb_trim(skb, len))
1101 goto drop;
1102
1103 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
1104 goto drop;
1105
1106 skb_tx_timestamp(skb);
1107
1108
1109
1110
1111 skb_orphan(skb);
1112
1113 nf_reset(skb);
1114
1115 if (ptr_ring_produce(&tfile->tx_ring, skb))
1116 goto drop;
1117
1118
1119 if (tfile->flags & TUN_FASYNC)
1120 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1121 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1122
1123 rcu_read_unlock();
1124 return NETDEV_TX_OK;
1125
1126drop:
1127 this_cpu_inc(tun->pcpu_stats->tx_dropped);
1128 skb_tx_error(skb);
1129 kfree_skb(skb);
1130 rcu_read_unlock();
1131 return NET_XMIT_DROP;
1132}
1133
1134static void tun_net_mclist(struct net_device *dev)
1135{
1136
1137
1138
1139
1140
1141}
1142
1143static netdev_features_t tun_net_fix_features(struct net_device *dev,
1144 netdev_features_t features)
1145{
1146 struct tun_struct *tun = netdev_priv(dev);
1147
1148 return (features & tun->set_features) | (features & ~TUN_USER_FEATURES);
1149}
1150#ifdef CONFIG_NET_POLL_CONTROLLER
1151static void tun_poll_controller(struct net_device *dev)
1152{
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 struct tun_struct *tun = netdev_priv(dev);
1168
1169 if (tun->flags & IFF_NAPI) {
1170 struct tun_file *tfile;
1171 int i;
1172
1173 rcu_read_lock();
1174 for (i = 0; i < tun->numqueues; i++) {
1175 tfile = rcu_dereference(tun->tfiles[i]);
1176 if (!tun_napi_frags_enabled(tfile) &&
1177 tfile->napi_enabled)
1178 napi_schedule(&tfile->napi);
1179 }
1180 rcu_read_unlock();
1181 }
1182 return;
1183}
1184#endif
1185
1186static void tun_set_headroom(struct net_device *dev, int new_hr)
1187{
1188 struct tun_struct *tun = netdev_priv(dev);
1189
1190 if (new_hr < NET_SKB_PAD)
1191 new_hr = NET_SKB_PAD;
1192
1193 tun->align = new_hr;
1194}
1195
1196static void
1197tun_net_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
1198{
1199 u32 rx_dropped = 0, tx_dropped = 0, rx_frame_errors = 0;
1200 struct tun_struct *tun = netdev_priv(dev);
1201 struct tun_pcpu_stats *p;
1202 int i;
1203
1204 for_each_possible_cpu(i) {
1205 u64 rxpackets, rxbytes, txpackets, txbytes;
1206 unsigned int start;
1207
1208 p = per_cpu_ptr(tun->pcpu_stats, i);
1209 do {
1210 start = u64_stats_fetch_begin(&p->syncp);
1211 rxpackets = p->rx_packets;
1212 rxbytes = p->rx_bytes;
1213 txpackets = p->tx_packets;
1214 txbytes = p->tx_bytes;
1215 } while (u64_stats_fetch_retry(&p->syncp, start));
1216
1217 stats->rx_packets += rxpackets;
1218 stats->rx_bytes += rxbytes;
1219 stats->tx_packets += txpackets;
1220 stats->tx_bytes += txbytes;
1221
1222
1223 rx_dropped += p->rx_dropped;
1224 rx_frame_errors += p->rx_frame_errors;
1225 tx_dropped += p->tx_dropped;
1226 }
1227 stats->rx_dropped = rx_dropped;
1228 stats->rx_frame_errors = rx_frame_errors;
1229 stats->tx_dropped = tx_dropped;
1230}
1231
1232static int tun_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1233 struct netlink_ext_ack *extack)
1234{
1235 struct tun_struct *tun = netdev_priv(dev);
1236 struct tun_file *tfile;
1237 struct bpf_prog *old_prog;
1238 int i;
1239
1240 old_prog = rtnl_dereference(tun->xdp_prog);
1241 rcu_assign_pointer(tun->xdp_prog, prog);
1242 if (old_prog)
1243 bpf_prog_put(old_prog);
1244
1245 for (i = 0; i < tun->numqueues; i++) {
1246 tfile = rtnl_dereference(tun->tfiles[i]);
1247 if (prog)
1248 sock_set_flag(&tfile->sk, SOCK_XDP);
1249 else
1250 sock_reset_flag(&tfile->sk, SOCK_XDP);
1251 }
1252 list_for_each_entry(tfile, &tun->disabled, next) {
1253 if (prog)
1254 sock_set_flag(&tfile->sk, SOCK_XDP);
1255 else
1256 sock_reset_flag(&tfile->sk, SOCK_XDP);
1257 }
1258
1259 return 0;
1260}
1261
1262static u32 tun_xdp_query(struct net_device *dev)
1263{
1264 struct tun_struct *tun = netdev_priv(dev);
1265 const struct bpf_prog *xdp_prog;
1266
1267 xdp_prog = rtnl_dereference(tun->xdp_prog);
1268 if (xdp_prog)
1269 return xdp_prog->aux->id;
1270
1271 return 0;
1272}
1273
1274static int tun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1275{
1276 switch (xdp->command) {
1277 case XDP_SETUP_PROG:
1278 return tun_xdp_set(dev, xdp->prog, xdp->extack);
1279 case XDP_QUERY_PROG:
1280 xdp->prog_id = tun_xdp_query(dev);
1281 return 0;
1282 default:
1283 return -EINVAL;
1284 }
1285}
1286
1287static int tun_net_change_carrier(struct net_device *dev, bool new_carrier)
1288{
1289 if (new_carrier) {
1290 struct tun_struct *tun = netdev_priv(dev);
1291
1292 if (!tun->numqueues)
1293 return -EPERM;
1294
1295 netif_carrier_on(dev);
1296 } else {
1297 netif_carrier_off(dev);
1298 }
1299 return 0;
1300}
1301
1302static const struct net_device_ops tun_netdev_ops = {
1303 .ndo_uninit = tun_net_uninit,
1304 .ndo_open = tun_net_open,
1305 .ndo_stop = tun_net_close,
1306 .ndo_start_xmit = tun_net_xmit,
1307 .ndo_fix_features = tun_net_fix_features,
1308 .ndo_select_queue = tun_select_queue,
1309#ifdef CONFIG_NET_POLL_CONTROLLER
1310 .ndo_poll_controller = tun_poll_controller,
1311#endif
1312 .ndo_set_rx_headroom = tun_set_headroom,
1313 .ndo_get_stats64 = tun_net_get_stats64,
1314 .ndo_change_carrier = tun_net_change_carrier,
1315};
1316
1317static void __tun_xdp_flush_tfile(struct tun_file *tfile)
1318{
1319
1320 if (tfile->flags & TUN_FASYNC)
1321 kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
1322 tfile->socket.sk->sk_data_ready(tfile->socket.sk);
1323}
1324
1325static int tun_xdp_xmit(struct net_device *dev, int n,
1326 struct xdp_frame **frames, u32 flags)
1327{
1328 struct tun_struct *tun = netdev_priv(dev);
1329 struct tun_file *tfile;
1330 u32 numqueues;
1331 int drops = 0;
1332 int cnt = n;
1333 int i;
1334
1335 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
1336 return -EINVAL;
1337
1338 rcu_read_lock();
1339
1340resample:
1341 numqueues = READ_ONCE(tun->numqueues);
1342 if (!numqueues) {
1343 rcu_read_unlock();
1344 return -ENXIO;
1345 }
1346
1347 tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
1348 numqueues]);
1349 if (unlikely(!tfile))
1350 goto resample;
1351
1352 spin_lock(&tfile->tx_ring.producer_lock);
1353 for (i = 0; i < n; i++) {
1354 struct xdp_frame *xdp = frames[i];
1355
1356
1357
1358 void *frame = tun_xdp_to_ptr(xdp);
1359
1360 if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
1361 this_cpu_inc(tun->pcpu_stats->tx_dropped);
1362 xdp_return_frame_rx_napi(xdp);
1363 drops++;
1364 }
1365 }
1366 spin_unlock(&tfile->tx_ring.producer_lock);
1367
1368 if (flags & XDP_XMIT_FLUSH)
1369 __tun_xdp_flush_tfile(tfile);
1370
1371 rcu_read_unlock();
1372 return cnt - drops;
1373}
1374
1375static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
1376{
1377 struct xdp_frame *frame = convert_to_xdp_frame(xdp);
1378
1379 if (unlikely(!frame))
1380 return -EOVERFLOW;
1381
1382 return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
1383}
1384
1385static const struct net_device_ops tap_netdev_ops = {
1386 .ndo_uninit = tun_net_uninit,
1387 .ndo_open = tun_net_open,
1388 .ndo_stop = tun_net_close,
1389 .ndo_start_xmit = tun_net_xmit,
1390 .ndo_fix_features = tun_net_fix_features,
1391 .ndo_set_rx_mode = tun_net_mclist,
1392 .ndo_set_mac_address = eth_mac_addr,
1393 .ndo_validate_addr = eth_validate_addr,
1394 .ndo_select_queue = tun_select_queue,
1395#ifdef CONFIG_NET_POLL_CONTROLLER
1396 .ndo_poll_controller = tun_poll_controller,
1397#endif
1398 .ndo_features_check = passthru_features_check,
1399 .ndo_set_rx_headroom = tun_set_headroom,
1400 .ndo_get_stats64 = tun_net_get_stats64,
1401 .ndo_bpf = tun_xdp,
1402 .ndo_xdp_xmit = tun_xdp_xmit,
1403 .ndo_change_carrier = tun_net_change_carrier,
1404};
1405
1406static void tun_flow_init(struct tun_struct *tun)
1407{
1408 int i;
1409
1410 for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++)
1411 INIT_HLIST_HEAD(&tun->flows[i]);
1412
1413 tun->ageing_time = TUN_FLOW_EXPIRE;
1414 timer_setup(&tun->flow_gc_timer, tun_flow_cleanup, 0);
1415 mod_timer(&tun->flow_gc_timer,
1416 round_jiffies_up(jiffies + tun->ageing_time));
1417}
1418
1419static void tun_flow_uninit(struct tun_struct *tun)
1420{
1421 del_timer_sync(&tun->flow_gc_timer);
1422 tun_flow_flush(tun);
1423}
1424
1425#define MIN_MTU 68
1426#define MAX_MTU 65535
1427
1428
1429static void tun_net_init(struct net_device *dev)
1430{
1431 struct tun_struct *tun = netdev_priv(dev);
1432
1433 switch (tun->flags & TUN_TYPE_MASK) {
1434 case IFF_TUN:
1435 dev->netdev_ops = &tun_netdev_ops;
1436
1437
1438 dev->hard_header_len = 0;
1439 dev->addr_len = 0;
1440 dev->mtu = 1500;
1441
1442
1443 dev->type = ARPHRD_NONE;
1444 dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
1445 break;
1446
1447 case IFF_TAP:
1448 dev->netdev_ops = &tap_netdev_ops;
1449
1450 ether_setup(dev);
1451 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1452 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
1453
1454 eth_hw_addr_random(dev);
1455
1456 break;
1457 }
1458
1459 dev->min_mtu = MIN_MTU;
1460 dev->max_mtu = MAX_MTU - dev->hard_header_len;
1461}
1462
1463static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile)
1464{
1465 struct sock *sk = tfile->socket.sk;
1466
1467 return (tun->dev->flags & IFF_UP) && sock_writeable(sk);
1468}
1469
1470
1471
1472
1473static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
1474{
1475 struct tun_file *tfile = file->private_data;
1476 struct tun_struct *tun = tun_get(tfile);
1477 struct sock *sk;
1478 __poll_t mask = 0;
1479
1480 if (!tun)
1481 return EPOLLERR;
1482
1483 sk = tfile->socket.sk;
1484
1485 tun_debug(KERN_INFO, tun, "tun_chr_poll\n");
1486
1487 poll_wait(file, sk_sleep(sk), wait);
1488
1489 if (!ptr_ring_empty(&tfile->tx_ring))
1490 mask |= EPOLLIN | EPOLLRDNORM;
1491
1492
1493
1494
1495
1496
1497 if (tun_sock_writeable(tun, tfile) ||
1498 (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) &&
1499 tun_sock_writeable(tun, tfile)))
1500 mask |= EPOLLOUT | EPOLLWRNORM;
1501
1502 if (tun->dev->reg_state != NETREG_REGISTERED)
1503 mask = EPOLLERR;
1504
1505 tun_put(tun);
1506 return mask;
1507}
1508
1509static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
1510 size_t len,
1511 const struct iov_iter *it)
1512{
1513 struct sk_buff *skb;
1514 size_t linear;
1515 int err;
1516 int i;
1517
1518 if (it->nr_segs > MAX_SKB_FRAGS + 1)
1519 return ERR_PTR(-ENOMEM);
1520
1521 local_bh_disable();
1522 skb = napi_get_frags(&tfile->napi);
1523 local_bh_enable();
1524 if (!skb)
1525 return ERR_PTR(-ENOMEM);
1526
1527 linear = iov_iter_single_seg_count(it);
1528 err = __skb_grow(skb, linear);
1529 if (err)
1530 goto free;
1531
1532 skb->len = len;
1533 skb->data_len = len - linear;
1534 skb->truesize += skb->data_len;
1535
1536 for (i = 1; i < it->nr_segs; i++) {
1537 struct page_frag *pfrag = ¤t->task_frag;
1538 size_t fragsz = it->iov[i].iov_len;
1539
1540 if (fragsz == 0 || fragsz > PAGE_SIZE) {
1541 err = -EINVAL;
1542 goto free;
1543 }
1544
1545 if (!skb_page_frag_refill(fragsz, pfrag, GFP_KERNEL)) {
1546 err = -ENOMEM;
1547 goto free;
1548 }
1549
1550 skb_fill_page_desc(skb, i - 1, pfrag->page,
1551 pfrag->offset, fragsz);
1552 page_ref_inc(pfrag->page);
1553 pfrag->offset += fragsz;
1554 }
1555
1556 return skb;
1557free:
1558
1559 napi_free_frags(&tfile->napi);
1560 return ERR_PTR(err);
1561}
1562
1563
1564
1565static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
1566 size_t prepad, size_t len,
1567 size_t linear, int noblock)
1568{
1569 struct sock *sk = tfile->socket.sk;
1570 struct sk_buff *skb;
1571 int err;
1572
1573
1574 if (prepad + len < PAGE_SIZE || !linear)
1575 linear = len;
1576
1577 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1578 &err, 0);
1579 if (!skb)
1580 return ERR_PTR(err);
1581
1582 skb_reserve(skb, prepad);
1583 skb_put(skb, linear);
1584 skb->data_len = len - linear;
1585 skb->len += len - linear;
1586
1587 return skb;
1588}
1589
1590static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
1591 struct sk_buff *skb, int more)
1592{
1593 struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
1594 struct sk_buff_head process_queue;
1595 u32 rx_batched = tun->rx_batched;
1596 bool rcv = false;
1597
1598 if (!rx_batched || (!more && skb_queue_empty(queue))) {
1599 local_bh_disable();
1600 skb_record_rx_queue(skb, tfile->queue_index);
1601 netif_receive_skb(skb);
1602 local_bh_enable();
1603 return;
1604 }
1605
1606 spin_lock(&queue->lock);
1607 if (!more || skb_queue_len(queue) == rx_batched) {
1608 __skb_queue_head_init(&process_queue);
1609 skb_queue_splice_tail_init(queue, &process_queue);
1610 rcv = true;
1611 } else {
1612 __skb_queue_tail(queue, skb);
1613 }
1614 spin_unlock(&queue->lock);
1615
1616 if (rcv) {
1617 struct sk_buff *nskb;
1618
1619 local_bh_disable();
1620 while ((nskb = __skb_dequeue(&process_queue))) {
1621 skb_record_rx_queue(nskb, tfile->queue_index);
1622 netif_receive_skb(nskb);
1623 }
1624 skb_record_rx_queue(skb, tfile->queue_index);
1625 netif_receive_skb(skb);
1626 local_bh_enable();
1627 }
1628}
1629
1630static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
1631 int len, int noblock, bool zerocopy)
1632{
1633 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
1634 return false;
1635
1636 if (tfile->socket.sk->sk_sndbuf != INT_MAX)
1637 return false;
1638
1639 if (!noblock)
1640 return false;
1641
1642 if (zerocopy)
1643 return false;
1644
1645 if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
1646 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
1647 return false;
1648
1649 return true;
1650}
1651
1652static struct sk_buff *__tun_build_skb(struct page_frag *alloc_frag, char *buf,
1653 int buflen, int len, int pad)
1654{
1655 struct sk_buff *skb = build_skb(buf, buflen);
1656
1657 if (!skb)
1658 return ERR_PTR(-ENOMEM);
1659
1660 skb_reserve(skb, pad);
1661 skb_put(skb, len);
1662
1663 get_page(alloc_frag->page);
1664 alloc_frag->offset += buflen;
1665
1666 return skb;
1667}
1668
1669static int tun_xdp_act(struct tun_struct *tun, struct bpf_prog *xdp_prog,
1670 struct xdp_buff *xdp, u32 act)
1671{
1672 int err;
1673
1674 switch (act) {
1675 case XDP_REDIRECT:
1676 err = xdp_do_redirect(tun->dev, xdp, xdp_prog);
1677 if (err)
1678 return err;
1679 break;
1680 case XDP_TX:
1681 err = tun_xdp_tx(tun->dev, xdp);
1682 if (err < 0)
1683 return err;
1684 break;
1685 case XDP_PASS:
1686 break;
1687 default:
1688 bpf_warn_invalid_xdp_action(act);
1689
1690 case XDP_ABORTED:
1691 trace_xdp_exception(tun->dev, xdp_prog, act);
1692
1693 case XDP_DROP:
1694 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1695 break;
1696 }
1697
1698 return act;
1699}
1700
1701static struct sk_buff *tun_build_skb(struct tun_struct *tun,
1702 struct tun_file *tfile,
1703 struct iov_iter *from,
1704 struct virtio_net_hdr *hdr,
1705 int len, int *skb_xdp)
1706{
1707 struct page_frag *alloc_frag = ¤t->task_frag;
1708 struct bpf_prog *xdp_prog;
1709 int buflen = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1710 char *buf;
1711 size_t copied;
1712 int pad = TUN_RX_PAD;
1713 int err = 0;
1714
1715 rcu_read_lock();
1716 xdp_prog = rcu_dereference(tun->xdp_prog);
1717 if (xdp_prog)
1718 pad += XDP_PACKET_HEADROOM;
1719 buflen += SKB_DATA_ALIGN(len + pad);
1720 rcu_read_unlock();
1721
1722 alloc_frag->offset = ALIGN((u64)alloc_frag->offset, SMP_CACHE_BYTES);
1723 if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
1724 return ERR_PTR(-ENOMEM);
1725
1726 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
1727 copied = copy_page_from_iter(alloc_frag->page,
1728 alloc_frag->offset + pad,
1729 len, from);
1730 if (copied != len)
1731 return ERR_PTR(-EFAULT);
1732
1733
1734
1735
1736
1737 if (hdr->gso_type || !xdp_prog) {
1738 *skb_xdp = 1;
1739 return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
1740 }
1741
1742 *skb_xdp = 0;
1743
1744 local_bh_disable();
1745 rcu_read_lock();
1746 xdp_prog = rcu_dereference(tun->xdp_prog);
1747 if (xdp_prog) {
1748 struct xdp_buff xdp;
1749 u32 act;
1750
1751 xdp.data_hard_start = buf;
1752 xdp.data = buf + pad;
1753 xdp_set_data_meta_invalid(&xdp);
1754 xdp.data_end = xdp.data + len;
1755 xdp.rxq = &tfile->xdp_rxq;
1756
1757 act = bpf_prog_run_xdp(xdp_prog, &xdp);
1758 if (act == XDP_REDIRECT || act == XDP_TX) {
1759 get_page(alloc_frag->page);
1760 alloc_frag->offset += buflen;
1761 }
1762 err = tun_xdp_act(tun, xdp_prog, &xdp, act);
1763 if (err < 0)
1764 goto err_xdp;
1765 if (err == XDP_REDIRECT)
1766 xdp_do_flush_map();
1767 if (err != XDP_PASS)
1768 goto out;
1769
1770 pad = xdp.data - xdp.data_hard_start;
1771 len = xdp.data_end - xdp.data;
1772 }
1773 rcu_read_unlock();
1774 local_bh_enable();
1775
1776 return __tun_build_skb(alloc_frag, buf, buflen, len, pad);
1777
1778err_xdp:
1779 put_page(alloc_frag->page);
1780out:
1781 rcu_read_unlock();
1782 local_bh_enable();
1783 return NULL;
1784}
1785
1786
1787static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
1788 void *msg_control, struct iov_iter *from,
1789 int noblock, bool more)
1790{
1791 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
1792 struct sk_buff *skb;
1793 size_t total_len = iov_iter_count(from);
1794 size_t len = total_len, align = tun->align, linear;
1795 struct virtio_net_hdr gso = { 0 };
1796 struct tun_pcpu_stats *stats;
1797 int good_linear;
1798 int copylen;
1799 bool zerocopy = false;
1800 int err;
1801 u32 rxhash = 0;
1802 int skb_xdp = 1;
1803 bool frags = tun_napi_frags_enabled(tfile);
1804
1805 if (!(tun->flags & IFF_NO_PI)) {
1806 if (len < sizeof(pi))
1807 return -EINVAL;
1808 len -= sizeof(pi);
1809
1810 if (!copy_from_iter_full(&pi, sizeof(pi), from))
1811 return -EFAULT;
1812 }
1813
1814 if (tun->flags & IFF_VNET_HDR) {
1815 int vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
1816
1817 if (len < vnet_hdr_sz)
1818 return -EINVAL;
1819 len -= vnet_hdr_sz;
1820
1821 if (!copy_from_iter_full(&gso, sizeof(gso), from))
1822 return -EFAULT;
1823
1824 if ((gso.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1825 tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2 > tun16_to_cpu(tun, gso.hdr_len))
1826 gso.hdr_len = cpu_to_tun16(tun, tun16_to_cpu(tun, gso.csum_start) + tun16_to_cpu(tun, gso.csum_offset) + 2);
1827
1828 if (tun16_to_cpu(tun, gso.hdr_len) > len)
1829 return -EINVAL;
1830 iov_iter_advance(from, vnet_hdr_sz - sizeof(gso));
1831 }
1832
1833 if ((tun->flags & TUN_TYPE_MASK) == IFF_TAP) {
1834 align += NET_IP_ALIGN;
1835 if (unlikely(len < ETH_HLEN ||
1836 (gso.hdr_len && tun16_to_cpu(tun, gso.hdr_len) < ETH_HLEN)))
1837 return -EINVAL;
1838 }
1839
1840 good_linear = SKB_MAX_HEAD(align);
1841
1842 if (msg_control) {
1843 struct iov_iter i = *from;
1844
1845
1846
1847
1848
1849 copylen = gso.hdr_len ? tun16_to_cpu(tun, gso.hdr_len) : GOODCOPY_LEN;
1850 if (copylen > good_linear)
1851 copylen = good_linear;
1852 linear = copylen;
1853 iov_iter_advance(&i, copylen);
1854 if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS)
1855 zerocopy = true;
1856 }
1857
1858 if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
1859
1860
1861
1862
1863 skb = tun_build_skb(tun, tfile, from, &gso, len, &skb_xdp);
1864 if (IS_ERR(skb)) {
1865 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1866 return PTR_ERR(skb);
1867 }
1868 if (!skb)
1869 return total_len;
1870 } else {
1871 if (!zerocopy) {
1872 copylen = len;
1873 if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
1874 linear = good_linear;
1875 else
1876 linear = tun16_to_cpu(tun, gso.hdr_len);
1877 }
1878
1879 if (frags) {
1880 mutex_lock(&tfile->napi_mutex);
1881 skb = tun_napi_alloc_frags(tfile, copylen, from);
1882
1883
1884
1885
1886 zerocopy = false;
1887 } else {
1888 skb = tun_alloc_skb(tfile, align, copylen, linear,
1889 noblock);
1890 }
1891
1892 if (IS_ERR(skb)) {
1893 if (PTR_ERR(skb) != -EAGAIN)
1894 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1895 if (frags)
1896 mutex_unlock(&tfile->napi_mutex);
1897 return PTR_ERR(skb);
1898 }
1899
1900 if (zerocopy)
1901 err = zerocopy_sg_from_iter(skb, from);
1902 else
1903 err = skb_copy_datagram_from_iter(skb, 0, from, len);
1904
1905 if (err) {
1906 err = -EFAULT;
1907drop:
1908 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1909 kfree_skb(skb);
1910 if (frags) {
1911 tfile->napi.skb = NULL;
1912 mutex_unlock(&tfile->napi_mutex);
1913 }
1914
1915 return err;
1916 }
1917 }
1918
1919 if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
1920 this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
1921 kfree_skb(skb);
1922 if (frags) {
1923 tfile->napi.skb = NULL;
1924 mutex_unlock(&tfile->napi_mutex);
1925 }
1926
1927 return -EINVAL;
1928 }
1929
1930 switch (tun->flags & TUN_TYPE_MASK) {
1931 case IFF_TUN:
1932 if (tun->flags & IFF_NO_PI) {
1933 u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
1934
1935 switch (ip_version) {
1936 case 4:
1937 pi.proto = htons(ETH_P_IP);
1938 break;
1939 case 6:
1940 pi.proto = htons(ETH_P_IPV6);
1941 break;
1942 default:
1943 this_cpu_inc(tun->pcpu_stats->rx_dropped);
1944 kfree_skb(skb);
1945 return -EINVAL;
1946 }
1947 }
1948
1949 skb_reset_mac_header(skb);
1950 skb->protocol = pi.proto;
1951 skb->dev = tun->dev;
1952 break;
1953 case IFF_TAP:
1954 if (!frags)
1955 skb->protocol = eth_type_trans(skb, tun->dev);
1956 break;
1957 }
1958
1959
1960 if (zerocopy) {
1961 skb_shinfo(skb)->destructor_arg = msg_control;
1962 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
1963 skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
1964 } else if (msg_control) {
1965 struct ubuf_info *uarg = msg_control;
1966 uarg->callback(uarg, false);
1967 }
1968
1969 skb_reset_network_header(skb);
1970 skb_probe_transport_header(skb);
1971
1972 if (skb_xdp) {
1973 struct bpf_prog *xdp_prog;
1974 int ret;
1975
1976 local_bh_disable();
1977 rcu_read_lock();
1978 xdp_prog = rcu_dereference(tun->xdp_prog);
1979 if (xdp_prog) {
1980 ret = do_xdp_generic(xdp_prog, skb);
1981 if (ret != XDP_PASS) {
1982 rcu_read_unlock();
1983 local_bh_enable();
1984 return total_len;
1985 }
1986 }
1987 rcu_read_unlock();
1988 local_bh_enable();
1989 }
1990
1991
1992
1993
1994
1995 if (!rcu_access_pointer(tun->steering_prog) && tun->numqueues > 1 &&
1996 !tfile->detached)
1997 rxhash = __skb_get_hash_symmetric(skb);
1998
1999 rcu_read_lock();
2000 if (unlikely(!(tun->dev->flags & IFF_UP))) {
2001 err = -EIO;
2002 rcu_read_unlock();
2003 goto drop;
2004 }
2005
2006 if (frags) {
2007
2008 u32 headlen = eth_get_headlen(tun->dev, skb->data,
2009 skb_headlen(skb));
2010
2011 if (unlikely(headlen > skb_headlen(skb))) {
2012 this_cpu_inc(tun->pcpu_stats->rx_dropped);
2013 napi_free_frags(&tfile->napi);
2014 rcu_read_unlock();
2015 mutex_unlock(&tfile->napi_mutex);
2016 WARN_ON(1);
2017 return -ENOMEM;
2018 }
2019
2020 local_bh_disable();
2021 napi_gro_frags(&tfile->napi);
2022 local_bh_enable();
2023 mutex_unlock(&tfile->napi_mutex);
2024 } else if (tfile->napi_enabled) {
2025 struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
2026 int queue_len;
2027
2028 spin_lock_bh(&queue->lock);
2029 __skb_queue_tail(queue, skb);
2030 queue_len = skb_queue_len(queue);
2031 spin_unlock(&queue->lock);
2032
2033 if (!more || queue_len > NAPI_POLL_WEIGHT)
2034 napi_schedule(&tfile->napi);
2035
2036 local_bh_enable();
2037 } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
2038 tun_rx_batched(tun, tfile, skb, more);
2039 } else {
2040 netif_rx_ni(skb);
2041 }
2042 rcu_read_unlock();
2043
2044 stats = get_cpu_ptr(tun->pcpu_stats);
2045 u64_stats_update_begin(&stats->syncp);
2046 stats->rx_packets++;
2047 stats->rx_bytes += len;
2048 u64_stats_update_end(&stats->syncp);
2049 put_cpu_ptr(stats);
2050
2051 if (rxhash)
2052 tun_flow_update(tun, rxhash, tfile);
2053
2054 return total_len;
2055}
2056
2057static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
2058{
2059 struct file *file = iocb->ki_filp;
2060 struct tun_file *tfile = file->private_data;
2061 struct tun_struct *tun = tun_get(tfile);
2062 ssize_t result;
2063
2064 if (!tun)
2065 return -EBADFD;
2066
2067 result = tun_get_user(tun, tfile, NULL, from,
2068 file->f_flags & O_NONBLOCK, false);
2069
2070 tun_put(tun);
2071 return result;
2072}
2073
2074static ssize_t tun_put_user_xdp(struct tun_struct *tun,
2075 struct tun_file *tfile,
2076 struct xdp_frame *xdp_frame,
2077 struct iov_iter *iter)
2078{
2079 int vnet_hdr_sz = 0;
2080 size_t size = xdp_frame->len;
2081 struct tun_pcpu_stats *stats;
2082 size_t ret;
2083
2084 if (tun->flags & IFF_VNET_HDR) {
2085 struct virtio_net_hdr gso = { 0 };
2086
2087 vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
2088 if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
2089 return -EINVAL;
2090 if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
2091 sizeof(gso)))
2092 return -EFAULT;
2093 iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
2094 }
2095
2096 ret = copy_to_iter(xdp_frame->data, size, iter) + vnet_hdr_sz;
2097
2098 stats = get_cpu_ptr(tun->pcpu_stats);
2099 u64_stats_update_begin(&stats->syncp);
2100 stats->tx_packets++;
2101 stats->tx_bytes += ret;
2102 u64_stats_update_end(&stats->syncp);
2103 put_cpu_ptr(tun->pcpu_stats);
2104
2105 return ret;
2106}
2107
2108
2109static ssize_t tun_put_user(struct tun_struct *tun,
2110 struct tun_file *tfile,
2111 struct sk_buff *skb,
2112 struct iov_iter *iter)
2113{
2114 struct tun_pi pi = { 0, skb->protocol };
2115 struct tun_pcpu_stats *stats;
2116 ssize_t total;
2117 int vlan_offset = 0;
2118 int vlan_hlen = 0;
2119 int vnet_hdr_sz = 0;
2120
2121 if (skb_vlan_tag_present(skb))
2122 vlan_hlen = VLAN_HLEN;
2123
2124 if (tun->flags & IFF_VNET_HDR)
2125 vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
2126
2127 total = skb->len + vlan_hlen + vnet_hdr_sz;
2128
2129 if (!(tun->flags & IFF_NO_PI)) {
2130 if (iov_iter_count(iter) < sizeof(pi))
2131 return -EINVAL;
2132
2133 total += sizeof(pi);
2134 if (iov_iter_count(iter) < total) {
2135
2136 pi.flags |= TUN_PKT_STRIP;
2137 }
2138
2139 if (copy_to_iter(&pi, sizeof(pi), iter) != sizeof(pi))
2140 return -EFAULT;
2141 }
2142
2143 if (vnet_hdr_sz) {
2144 struct virtio_net_hdr gso;
2145
2146 if (iov_iter_count(iter) < vnet_hdr_sz)
2147 return -EINVAL;
2148
2149 if (virtio_net_hdr_from_skb(skb, &gso,
2150 tun_is_little_endian(tun), true,
2151 vlan_hlen)) {
2152 struct skb_shared_info *sinfo = skb_shinfo(skb);
2153 pr_err("unexpected GSO type: "
2154 "0x%x, gso_size %d, hdr_len %d\n",
2155 sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
2156 tun16_to_cpu(tun, gso.hdr_len));
2157 print_hex_dump(KERN_ERR, "tun: ",
2158 DUMP_PREFIX_NONE,
2159 16, 1, skb->head,
2160 min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
2161 WARN_ON_ONCE(1);
2162 return -EINVAL;
2163 }
2164
2165 if (copy_to_iter(&gso, sizeof(gso), iter) != sizeof(gso))
2166 return -EFAULT;
2167
2168 iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
2169 }
2170
2171 if (vlan_hlen) {
2172 int ret;
2173 struct veth veth;
2174
2175 veth.h_vlan_proto = skb->vlan_proto;
2176 veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
2177
2178 vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
2179
2180 ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset);
2181 if (ret || !iov_iter_count(iter))
2182 goto done;
2183
2184 ret = copy_to_iter(&veth, sizeof(veth), iter);
2185 if (ret != sizeof(veth) || !iov_iter_count(iter))
2186 goto done;
2187 }
2188
2189 skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset);
2190
2191done:
2192
2193 stats = get_cpu_ptr(tun->pcpu_stats);
2194 u64_stats_update_begin(&stats->syncp);
2195 stats->tx_packets++;
2196 stats->tx_bytes += skb->len + vlan_hlen;
2197 u64_stats_update_end(&stats->syncp);
2198 put_cpu_ptr(tun->pcpu_stats);
2199
2200 return total;
2201}
2202
2203static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
2204{
2205 DECLARE_WAITQUEUE(wait, current);
2206 void *ptr = NULL;
2207 int error = 0;
2208
2209 ptr = ptr_ring_consume(&tfile->tx_ring);
2210 if (ptr)
2211 goto out;
2212 if (noblock) {
2213 error = -EAGAIN;
2214 goto out;
2215 }
2216
2217 add_wait_queue(&tfile->wq.wait, &wait);
2218
2219 while (1) {
2220 set_current_state(TASK_INTERRUPTIBLE);
2221 ptr = ptr_ring_consume(&tfile->tx_ring);
2222 if (ptr)
2223 break;
2224 if (signal_pending(current)) {
2225 error = -ERESTARTSYS;
2226 break;
2227 }
2228 if (tfile->socket.sk->sk_shutdown & RCV_SHUTDOWN) {
2229 error = -EFAULT;
2230 break;
2231 }
2232
2233 schedule();
2234 }
2235
2236 __set_current_state(TASK_RUNNING);
2237 remove_wait_queue(&tfile->wq.wait, &wait);
2238
2239out:
2240 *err = error;
2241 return ptr;
2242}
2243
2244static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
2245 struct iov_iter *to,
2246 int noblock, void *ptr)
2247{
2248 ssize_t ret;
2249 int err;
2250
2251 tun_debug(KERN_INFO, tun, "tun_do_read\n");
2252
2253 if (!iov_iter_count(to)) {
2254 tun_ptr_free(ptr);
2255 return 0;
2256 }
2257
2258 if (!ptr) {
2259
2260 ptr = tun_ring_recv(tfile, noblock, &err);
2261 if (!ptr)
2262 return err;
2263 }
2264
2265 if (tun_is_xdp_frame(ptr)) {
2266 struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
2267
2268 ret = tun_put_user_xdp(tun, tfile, xdpf, to);
2269 xdp_return_frame(xdpf);
2270 } else {
2271 struct sk_buff *skb = ptr;
2272
2273 ret = tun_put_user(tun, tfile, skb, to);
2274 if (unlikely(ret < 0))
2275 kfree_skb(skb);
2276 else
2277 consume_skb(skb);
2278 }
2279
2280 return ret;
2281}
2282
2283static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
2284{
2285 struct file *file = iocb->ki_filp;
2286 struct tun_file *tfile = file->private_data;
2287 struct tun_struct *tun = tun_get(tfile);
2288 ssize_t len = iov_iter_count(to), ret;
2289
2290 if (!tun)
2291 return -EBADFD;
2292 ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
2293 ret = min_t(ssize_t, ret, len);
2294 if (ret > 0)
2295 iocb->ki_pos = ret;
2296 tun_put(tun);
2297 return ret;
2298}
2299
2300static void tun_prog_free(struct rcu_head *rcu)
2301{
2302 struct tun_prog *prog = container_of(rcu, struct tun_prog, rcu);
2303
2304 bpf_prog_destroy(prog->prog);
2305 kfree(prog);
2306}
2307
2308static int __tun_set_ebpf(struct tun_struct *tun,
2309 struct tun_prog __rcu **prog_p,
2310 struct bpf_prog *prog)
2311{
2312 struct tun_prog *old, *new = NULL;
2313
2314 if (prog) {
2315 new = kmalloc(sizeof(*new), GFP_KERNEL);
2316 if (!new)
2317 return -ENOMEM;
2318 new->prog = prog;
2319 }
2320
2321 spin_lock_bh(&tun->lock);
2322 old = rcu_dereference_protected(*prog_p,
2323 lockdep_is_held(&tun->lock));
2324 rcu_assign_pointer(*prog_p, new);
2325 spin_unlock_bh(&tun->lock);
2326
2327 if (old)
2328 call_rcu(&old->rcu, tun_prog_free);
2329
2330 return 0;
2331}
2332
2333static void tun_free_netdev(struct net_device *dev)
2334{
2335 struct tun_struct *tun = netdev_priv(dev);
2336
2337 BUG_ON(!(list_empty(&tun->disabled)));
2338 free_percpu(tun->pcpu_stats);
2339 tun_flow_uninit(tun);
2340 security_tun_dev_free_security(tun->security);
2341 __tun_set_ebpf(tun, &tun->steering_prog, NULL);
2342 __tun_set_ebpf(tun, &tun->filter_prog, NULL);
2343}
2344
2345static void tun_setup(struct net_device *dev)
2346{
2347 struct tun_struct *tun = netdev_priv(dev);
2348
2349 tun->owner = INVALID_UID;
2350 tun->group = INVALID_GID;
2351 tun_default_link_ksettings(dev, &tun->link_ksettings);
2352
2353 dev->ethtool_ops = &tun_ethtool_ops;
2354 dev->needs_free_netdev = true;
2355 dev->priv_destructor = tun_free_netdev;
2356
2357 dev->tx_queue_len = TUN_READQ_SIZE;
2358}
2359
2360
2361
2362
2363static int tun_validate(struct nlattr *tb[], struct nlattr *data[],
2364 struct netlink_ext_ack *extack)
2365{
2366 NL_SET_ERR_MSG(extack,
2367 "tun/tap creation via rtnetlink is not supported.");
2368 return -EOPNOTSUPP;
2369}
2370
2371static size_t tun_get_size(const struct net_device *dev)
2372{
2373 BUILD_BUG_ON(sizeof(u32) != sizeof(uid_t));
2374 BUILD_BUG_ON(sizeof(u32) != sizeof(gid_t));
2375
2376 return nla_total_size(sizeof(uid_t)) +
2377 nla_total_size(sizeof(gid_t)) +
2378 nla_total_size(sizeof(u8)) +
2379 nla_total_size(sizeof(u8)) +
2380 nla_total_size(sizeof(u8)) +
2381 nla_total_size(sizeof(u8)) +
2382 nla_total_size(sizeof(u8)) +
2383 nla_total_size(sizeof(u32)) +
2384 nla_total_size(sizeof(u32)) +
2385 0;
2386}
2387
2388static int tun_fill_info(struct sk_buff *skb, const struct net_device *dev)
2389{
2390 struct tun_struct *tun = netdev_priv(dev);
2391
2392 if (nla_put_u8(skb, IFLA_TUN_TYPE, tun->flags & TUN_TYPE_MASK))
2393 goto nla_put_failure;
2394 if (uid_valid(tun->owner) &&
2395 nla_put_u32(skb, IFLA_TUN_OWNER,
2396 from_kuid_munged(current_user_ns(), tun->owner)))
2397 goto nla_put_failure;
2398 if (gid_valid(tun->group) &&
2399 nla_put_u32(skb, IFLA_TUN_GROUP,
2400 from_kgid_munged(current_user_ns(), tun->group)))
2401 goto nla_put_failure;
2402 if (nla_put_u8(skb, IFLA_TUN_PI, !(tun->flags & IFF_NO_PI)))
2403 goto nla_put_failure;
2404 if (nla_put_u8(skb, IFLA_TUN_VNET_HDR, !!(tun->flags & IFF_VNET_HDR)))
2405 goto nla_put_failure;
2406 if (nla_put_u8(skb, IFLA_TUN_PERSIST, !!(tun->flags & IFF_PERSIST)))
2407 goto nla_put_failure;
2408 if (nla_put_u8(skb, IFLA_TUN_MULTI_QUEUE,
2409 !!(tun->flags & IFF_MULTI_QUEUE)))
2410 goto nla_put_failure;
2411 if (tun->flags & IFF_MULTI_QUEUE) {
2412 if (nla_put_u32(skb, IFLA_TUN_NUM_QUEUES, tun->numqueues))
2413 goto nla_put_failure;
2414 if (nla_put_u32(skb, IFLA_TUN_NUM_DISABLED_QUEUES,
2415 tun->numdisabled))
2416 goto nla_put_failure;
2417 }
2418
2419 return 0;
2420
2421nla_put_failure:
2422 return -EMSGSIZE;
2423}
2424
2425static struct rtnl_link_ops tun_link_ops __read_mostly = {
2426 .kind = DRV_NAME,
2427 .priv_size = sizeof(struct tun_struct),
2428 .setup = tun_setup,
2429 .validate = tun_validate,
2430 .get_size = tun_get_size,
2431 .fill_info = tun_fill_info,
2432};
2433
2434static void tun_sock_write_space(struct sock *sk)
2435{
2436 struct tun_file *tfile;
2437 wait_queue_head_t *wqueue;
2438
2439 if (!sock_writeable(sk))
2440 return;
2441
2442 if (!test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
2443 return;
2444
2445 wqueue = sk_sleep(sk);
2446 if (wqueue && waitqueue_active(wqueue))
2447 wake_up_interruptible_sync_poll(wqueue, EPOLLOUT |
2448 EPOLLWRNORM | EPOLLWRBAND);
2449
2450 tfile = container_of(sk, struct tun_file, sk);
2451 kill_fasync(&tfile->fasync, SIGIO, POLL_OUT);
2452}
2453
2454static void tun_put_page(struct tun_page *tpage)
2455{
2456 if (tpage->page)
2457 __page_frag_cache_drain(tpage->page, tpage->count);
2458}
2459
2460static int tun_xdp_one(struct tun_struct *tun,
2461 struct tun_file *tfile,
2462 struct xdp_buff *xdp, int *flush,
2463 struct tun_page *tpage)
2464{
2465 unsigned int datasize = xdp->data_end - xdp->data;
2466 struct tun_xdp_hdr *hdr = xdp->data_hard_start;
2467 struct virtio_net_hdr *gso = &hdr->gso;
2468 struct tun_pcpu_stats *stats;
2469 struct bpf_prog *xdp_prog;
2470 struct sk_buff *skb = NULL;
2471 u32 rxhash = 0, act;
2472 int buflen = hdr->buflen;
2473 int err = 0;
2474 bool skb_xdp = false;
2475 struct page *page;
2476
2477 xdp_prog = rcu_dereference(tun->xdp_prog);
2478 if (xdp_prog) {
2479 if (gso->gso_type) {
2480 skb_xdp = true;
2481 goto build;
2482 }
2483 xdp_set_data_meta_invalid(xdp);
2484 xdp->rxq = &tfile->xdp_rxq;
2485
2486 act = bpf_prog_run_xdp(xdp_prog, xdp);
2487 err = tun_xdp_act(tun, xdp_prog, xdp, act);
2488 if (err < 0) {
2489 put_page(virt_to_head_page(xdp->data));
2490 return err;
2491 }
2492
2493 switch (err) {
2494 case XDP_REDIRECT:
2495 *flush = true;
2496
2497 case XDP_TX:
2498 return 0;
2499 case XDP_PASS:
2500 break;
2501 default:
2502 page = virt_to_head_page(xdp->data);
2503 if (tpage->page == page) {
2504 ++tpage->count;
2505 } else {
2506 tun_put_page(tpage);
2507 tpage->page = page;
2508 tpage->count = 1;
2509 }
2510 return 0;
2511 }
2512 }
2513
2514build:
2515 skb = build_skb(xdp->data_hard_start, buflen);
2516 if (!skb) {
2517 err = -ENOMEM;
2518 goto out;
2519 }
2520
2521 skb_reserve(skb, xdp->data - xdp->data_hard_start);
2522 skb_put(skb, xdp->data_end - xdp->data);
2523
2524 if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
2525 this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
2526 kfree_skb(skb);
2527 err = -EINVAL;
2528 goto out;
2529 }
2530
2531 skb->protocol = eth_type_trans(skb, tun->dev);
2532 skb_reset_network_header(skb);
2533 skb_probe_transport_header(skb);
2534
2535 if (skb_xdp) {
2536 err = do_xdp_generic(xdp_prog, skb);
2537 if (err != XDP_PASS)
2538 goto out;
2539 }
2540
2541 if (!rcu_dereference(tun->steering_prog))
2542 rxhash = __skb_get_hash_symmetric(skb);
2543
2544 skb_record_rx_queue(skb, tfile->queue_index);
2545 netif_receive_skb(skb);
2546
2547
2548
2549
2550 stats = this_cpu_ptr(tun->pcpu_stats);
2551 u64_stats_update_begin(&stats->syncp);
2552 stats->rx_packets++;
2553 stats->rx_bytes += datasize;
2554 u64_stats_update_end(&stats->syncp);
2555
2556 if (rxhash)
2557 tun_flow_update(tun, rxhash, tfile);
2558
2559out:
2560 return err;
2561}
2562
2563static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
2564{
2565 int ret, i;
2566 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2567 struct tun_struct *tun = tun_get(tfile);
2568 struct tun_msg_ctl *ctl = m->msg_control;
2569 struct xdp_buff *xdp;
2570
2571 if (!tun)
2572 return -EBADFD;
2573
2574 if (ctl && (ctl->type == TUN_MSG_PTR)) {
2575 struct tun_page tpage;
2576 int n = ctl->num;
2577 int flush = 0;
2578
2579 memset(&tpage, 0, sizeof(tpage));
2580
2581 local_bh_disable();
2582 rcu_read_lock();
2583
2584 for (i = 0; i < n; i++) {
2585 xdp = &((struct xdp_buff *)ctl->ptr)[i];
2586 tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
2587 }
2588
2589 if (flush)
2590 xdp_do_flush_map();
2591
2592 rcu_read_unlock();
2593 local_bh_enable();
2594
2595 tun_put_page(&tpage);
2596
2597 ret = total_len;
2598 goto out;
2599 }
2600
2601 ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
2602 m->msg_flags & MSG_DONTWAIT,
2603 m->msg_flags & MSG_MORE);
2604out:
2605 tun_put(tun);
2606 return ret;
2607}
2608
2609static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
2610 int flags)
2611{
2612 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2613 struct tun_struct *tun = tun_get(tfile);
2614 void *ptr = m->msg_control;
2615 int ret;
2616
2617 if (!tun) {
2618 ret = -EBADFD;
2619 goto out_free;
2620 }
2621
2622 if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
2623 ret = -EINVAL;
2624 goto out_put_tun;
2625 }
2626 if (flags & MSG_ERRQUEUE) {
2627 ret = sock_recv_errqueue(sock->sk, m, total_len,
2628 SOL_PACKET, TUN_TX_TIMESTAMP);
2629 goto out;
2630 }
2631 ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
2632 if (ret > (ssize_t)total_len) {
2633 m->msg_flags |= MSG_TRUNC;
2634 ret = flags & MSG_TRUNC ? ret : total_len;
2635 }
2636out:
2637 tun_put(tun);
2638 return ret;
2639
2640out_put_tun:
2641 tun_put(tun);
2642out_free:
2643 tun_ptr_free(ptr);
2644 return ret;
2645}
2646
2647static int tun_ptr_peek_len(void *ptr)
2648{
2649 if (likely(ptr)) {
2650 if (tun_is_xdp_frame(ptr)) {
2651 struct xdp_frame *xdpf = tun_ptr_to_xdp(ptr);
2652
2653 return xdpf->len;
2654 }
2655 return __skb_array_len_with_tag(ptr);
2656 } else {
2657 return 0;
2658 }
2659}
2660
2661static int tun_peek_len(struct socket *sock)
2662{
2663 struct tun_file *tfile = container_of(sock, struct tun_file, socket);
2664 struct tun_struct *tun;
2665 int ret = 0;
2666
2667 tun = tun_get(tfile);
2668 if (!tun)
2669 return 0;
2670
2671 ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
2672 tun_put(tun);
2673
2674 return ret;
2675}
2676
2677
2678static const struct proto_ops tun_socket_ops = {
2679 .peek_len = tun_peek_len,
2680 .sendmsg = tun_sendmsg,
2681 .recvmsg = tun_recvmsg,
2682};
2683
2684static struct proto tun_proto = {
2685 .name = "tun",
2686 .owner = THIS_MODULE,
2687 .obj_size = sizeof(struct tun_file),
2688};
2689
2690static int tun_flags(struct tun_struct *tun)
2691{
2692 return tun->flags & (TUN_FEATURES | IFF_PERSIST | IFF_TUN | IFF_TAP);
2693}
2694
2695static ssize_t tun_show_flags(struct device *dev, struct device_attribute *attr,
2696 char *buf)
2697{
2698 struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2699 return sprintf(buf, "0x%x\n", tun_flags(tun));
2700}
2701
2702static ssize_t tun_show_owner(struct device *dev, struct device_attribute *attr,
2703 char *buf)
2704{
2705 struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2706 return uid_valid(tun->owner)?
2707 sprintf(buf, "%u\n",
2708 from_kuid_munged(current_user_ns(), tun->owner)):
2709 sprintf(buf, "-1\n");
2710}
2711
2712static ssize_t tun_show_group(struct device *dev, struct device_attribute *attr,
2713 char *buf)
2714{
2715 struct tun_struct *tun = netdev_priv(to_net_dev(dev));
2716 return gid_valid(tun->group) ?
2717 sprintf(buf, "%u\n",
2718 from_kgid_munged(current_user_ns(), tun->group)):
2719 sprintf(buf, "-1\n");
2720}
2721
2722static DEVICE_ATTR(tun_flags, 0444, tun_show_flags, NULL);
2723static DEVICE_ATTR(owner, 0444, tun_show_owner, NULL);
2724static DEVICE_ATTR(group, 0444, tun_show_group, NULL);
2725
2726static struct attribute *tun_dev_attrs[] = {
2727 &dev_attr_tun_flags.attr,
2728 &dev_attr_owner.attr,
2729 &dev_attr_group.attr,
2730 NULL
2731};
2732
2733static const struct attribute_group tun_attr_group = {
2734 .attrs = tun_dev_attrs
2735};
2736
2737static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
2738{
2739 struct tun_struct *tun;
2740 struct tun_file *tfile = file->private_data;
2741 struct net_device *dev;
2742 int err;
2743
2744 if (tfile->detached)
2745 return -EINVAL;
2746
2747 if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
2748 if (!capable(CAP_NET_ADMIN))
2749 return -EPERM;
2750
2751 if (!(ifr->ifr_flags & IFF_NAPI) ||
2752 (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
2753 return -EINVAL;
2754 }
2755
2756 dev = __dev_get_by_name(net, ifr->ifr_name);
2757 if (dev) {
2758 if (ifr->ifr_flags & IFF_TUN_EXCL)
2759 return -EBUSY;
2760 if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
2761 tun = netdev_priv(dev);
2762 else if ((ifr->ifr_flags & IFF_TAP) && dev->netdev_ops == &tap_netdev_ops)
2763 tun = netdev_priv(dev);
2764 else
2765 return -EINVAL;
2766
2767 if (!!(ifr->ifr_flags & IFF_MULTI_QUEUE) !=
2768 !!(tun->flags & IFF_MULTI_QUEUE))
2769 return -EINVAL;
2770
2771 if (tun_not_capable(tun))
2772 return -EPERM;
2773 err = security_tun_dev_open(tun->security);
2774 if (err < 0)
2775 return err;
2776
2777 err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
2778 ifr->ifr_flags & IFF_NAPI,
2779 ifr->ifr_flags & IFF_NAPI_FRAGS);
2780 if (err < 0)
2781 return err;
2782
2783 if (tun->flags & IFF_MULTI_QUEUE &&
2784 (tun->numqueues + tun->numdisabled > 1)) {
2785
2786
2787
2788 netdev_state_change(dev);
2789 return 0;
2790 }
2791
2792 tun->flags = (tun->flags & ~TUN_FEATURES) |
2793 (ifr->ifr_flags & TUN_FEATURES);
2794
2795 netdev_state_change(dev);
2796 } else {
2797 char *name;
2798 unsigned long flags = 0;
2799 int queues = ifr->ifr_flags & IFF_MULTI_QUEUE ?
2800 MAX_TAP_QUEUES : 1;
2801
2802 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2803 return -EPERM;
2804 err = security_tun_dev_create();
2805 if (err < 0)
2806 return err;
2807
2808
2809 if (ifr->ifr_flags & IFF_TUN) {
2810
2811 flags |= IFF_TUN;
2812 name = "tun%d";
2813 } else if (ifr->ifr_flags & IFF_TAP) {
2814
2815 flags |= IFF_TAP;
2816 name = "tap%d";
2817 } else
2818 return -EINVAL;
2819
2820 if (*ifr->ifr_name)
2821 name = ifr->ifr_name;
2822
2823 dev = alloc_netdev_mqs(sizeof(struct tun_struct), name,
2824 NET_NAME_UNKNOWN, tun_setup, queues,
2825 queues);
2826
2827 if (!dev)
2828 return -ENOMEM;
2829 err = dev_get_valid_name(net, dev, name);
2830 if (err < 0)
2831 goto err_free_dev;
2832
2833 dev_net_set(dev, net);
2834 dev->rtnl_link_ops = &tun_link_ops;
2835 dev->ifindex = tfile->ifindex;
2836 dev->sysfs_groups[0] = &tun_attr_group;
2837
2838 tun = netdev_priv(dev);
2839 tun->dev = dev;
2840 tun->flags = flags;
2841 tun->txflt.count = 0;
2842 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
2843
2844 tun->align = NET_SKB_PAD;
2845 tun->filter_attached = false;
2846 tun->sndbuf = tfile->socket.sk->sk_sndbuf;
2847 tun->rx_batched = 0;
2848 RCU_INIT_POINTER(tun->steering_prog, NULL);
2849
2850 tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
2851 if (!tun->pcpu_stats) {
2852 err = -ENOMEM;
2853 goto err_free_dev;
2854 }
2855
2856 spin_lock_init(&tun->lock);
2857
2858 err = security_tun_dev_alloc_security(&tun->security);
2859 if (err < 0)
2860 goto err_free_stat;
2861
2862 tun_net_init(dev);
2863 tun_flow_init(tun);
2864
2865 dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST |
2866 TUN_USER_FEATURES | NETIF_F_HW_VLAN_CTAG_TX |
2867 NETIF_F_HW_VLAN_STAG_TX;
2868 dev->features = dev->hw_features | NETIF_F_LLTX;
2869 dev->vlan_features = dev->features &
2870 ~(NETIF_F_HW_VLAN_CTAG_TX |
2871 NETIF_F_HW_VLAN_STAG_TX);
2872
2873 tun->flags = (tun->flags & ~TUN_FEATURES) |
2874 (ifr->ifr_flags & TUN_FEATURES);
2875
2876 INIT_LIST_HEAD(&tun->disabled);
2877 err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
2878 ifr->ifr_flags & IFF_NAPI_FRAGS);
2879 if (err < 0)
2880 goto err_free_flow;
2881
2882 err = register_netdevice(tun->dev);
2883 if (err < 0)
2884 goto err_detach;
2885 }
2886
2887 netif_carrier_on(tun->dev);
2888
2889 tun_debug(KERN_INFO, tun, "tun_set_iff\n");
2890
2891
2892
2893
2894 if (netif_running(tun->dev))
2895 netif_tx_wake_all_queues(tun->dev);
2896
2897 strcpy(ifr->ifr_name, tun->dev->name);
2898 return 0;
2899
2900err_detach:
2901 tun_detach_all(dev);
2902
2903 goto err_free_dev;
2904
2905err_free_flow:
2906 tun_flow_uninit(tun);
2907 security_tun_dev_free_security(tun->security);
2908err_free_stat:
2909 free_percpu(tun->pcpu_stats);
2910err_free_dev:
2911 free_netdev(dev);
2912 return err;
2913}
2914
2915static void tun_get_iff(struct tun_struct *tun, struct ifreq *ifr)
2916{
2917 tun_debug(KERN_INFO, tun, "tun_get_iff\n");
2918
2919 strcpy(ifr->ifr_name, tun->dev->name);
2920
2921 ifr->ifr_flags = tun_flags(tun);
2922
2923}
2924
2925
2926
2927static int set_offload(struct tun_struct *tun, unsigned long arg)
2928{
2929 netdev_features_t features = 0;
2930
2931 if (arg & TUN_F_CSUM) {
2932 features |= NETIF_F_HW_CSUM;
2933 arg &= ~TUN_F_CSUM;
2934
2935 if (arg & (TUN_F_TSO4|TUN_F_TSO6)) {
2936 if (arg & TUN_F_TSO_ECN) {
2937 features |= NETIF_F_TSO_ECN;
2938 arg &= ~TUN_F_TSO_ECN;
2939 }
2940 if (arg & TUN_F_TSO4)
2941 features |= NETIF_F_TSO;
2942 if (arg & TUN_F_TSO6)
2943 features |= NETIF_F_TSO6;
2944 arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
2945 }
2946
2947 arg &= ~TUN_F_UFO;
2948 }
2949
2950
2951
2952 if (arg)
2953 return -EINVAL;
2954
2955 tun->set_features = features;
2956 tun->dev->wanted_features &= ~TUN_USER_FEATURES;
2957 tun->dev->wanted_features |= features;
2958 netdev_update_features(tun->dev);
2959
2960 return 0;
2961}
2962
2963static void tun_detach_filter(struct tun_struct *tun, int n)
2964{
2965 int i;
2966 struct tun_file *tfile;
2967
2968 for (i = 0; i < n; i++) {
2969 tfile = rtnl_dereference(tun->tfiles[i]);
2970 lock_sock(tfile->socket.sk);
2971 sk_detach_filter(tfile->socket.sk);
2972 release_sock(tfile->socket.sk);
2973 }
2974
2975 tun->filter_attached = false;
2976}
2977
2978static int tun_attach_filter(struct tun_struct *tun)
2979{
2980 int i, ret = 0;
2981 struct tun_file *tfile;
2982
2983 for (i = 0; i < tun->numqueues; i++) {
2984 tfile = rtnl_dereference(tun->tfiles[i]);
2985 lock_sock(tfile->socket.sk);
2986 ret = sk_attach_filter(&tun->fprog, tfile->socket.sk);
2987 release_sock(tfile->socket.sk);
2988 if (ret) {
2989 tun_detach_filter(tun, i);
2990 return ret;
2991 }
2992 }
2993
2994 tun->filter_attached = true;
2995 return ret;
2996}
2997
2998static void tun_set_sndbuf(struct tun_struct *tun)
2999{
3000 struct tun_file *tfile;
3001 int i;
3002
3003 for (i = 0; i < tun->numqueues; i++) {
3004 tfile = rtnl_dereference(tun->tfiles[i]);
3005 tfile->socket.sk->sk_sndbuf = tun->sndbuf;
3006 }
3007}
3008
3009static int tun_set_queue(struct file *file, struct ifreq *ifr)
3010{
3011 struct tun_file *tfile = file->private_data;
3012 struct tun_struct *tun;
3013 int ret = 0;
3014
3015 rtnl_lock();
3016
3017 if (ifr->ifr_flags & IFF_ATTACH_QUEUE) {
3018 tun = tfile->detached;
3019 if (!tun) {
3020 ret = -EINVAL;
3021 goto unlock;
3022 }
3023 ret = security_tun_dev_attach_queue(tun->security);
3024 if (ret < 0)
3025 goto unlock;
3026 ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
3027 tun->flags & IFF_NAPI_FRAGS);
3028 } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
3029 tun = rtnl_dereference(tfile->tun);
3030 if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
3031 ret = -EINVAL;
3032 else
3033 __tun_detach(tfile, false);
3034 } else
3035 ret = -EINVAL;
3036
3037 if (ret >= 0)
3038 netdev_state_change(tun->dev);
3039
3040unlock:
3041 rtnl_unlock();
3042 return ret;
3043}
3044
3045static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog **prog_p,
3046 void __user *data)
3047{
3048 struct bpf_prog *prog;
3049 int fd;
3050
3051 if (copy_from_user(&fd, data, sizeof(fd)))
3052 return -EFAULT;
3053
3054 if (fd == -1) {
3055 prog = NULL;
3056 } else {
3057 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
3058 if (IS_ERR(prog))
3059 return PTR_ERR(prog);
3060 }
3061
3062 return __tun_set_ebpf(tun, prog_p, prog);
3063}
3064
3065static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
3066 unsigned long arg, int ifreq_len)
3067{
3068 struct tun_file *tfile = file->private_data;
3069 struct net *net = sock_net(&tfile->sk);
3070 struct tun_struct *tun;
3071 void __user* argp = (void __user*)arg;
3072 unsigned int ifindex, carrier;
3073 struct ifreq ifr;
3074 kuid_t owner;
3075 kgid_t group;
3076 int sndbuf;
3077 int vnet_hdr_sz;
3078 int le;
3079 int ret;
3080 bool do_notify = false;
3081
3082 if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
3083 (_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
3084 if (copy_from_user(&ifr, argp, ifreq_len))
3085 return -EFAULT;
3086 } else {
3087 memset(&ifr, 0, sizeof(ifr));
3088 }
3089 if (cmd == TUNGETFEATURES) {
3090
3091
3092
3093
3094 return put_user(IFF_TUN | IFF_TAP | TUN_FEATURES,
3095 (unsigned int __user*)argp);
3096 } else if (cmd == TUNSETQUEUE) {
3097 return tun_set_queue(file, &ifr);
3098 } else if (cmd == SIOCGSKNS) {
3099 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3100 return -EPERM;
3101 return open_related_ns(&net->ns, get_net_ns);
3102 }
3103
3104 ret = 0;
3105 rtnl_lock();
3106
3107 tun = tun_get(tfile);
3108 if (cmd == TUNSETIFF) {
3109 ret = -EEXIST;
3110 if (tun)
3111 goto unlock;
3112
3113 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3114
3115 ret = tun_set_iff(net, file, &ifr);
3116
3117 if (ret)
3118 goto unlock;
3119
3120 if (copy_to_user(argp, &ifr, ifreq_len))
3121 ret = -EFAULT;
3122 goto unlock;
3123 }
3124 if (cmd == TUNSETIFINDEX) {
3125 ret = -EPERM;
3126 if (tun)
3127 goto unlock;
3128
3129 ret = -EFAULT;
3130 if (copy_from_user(&ifindex, argp, sizeof(ifindex)))
3131 goto unlock;
3132
3133 ret = 0;
3134 tfile->ifindex = ifindex;
3135 goto unlock;
3136 }
3137
3138 ret = -EBADFD;
3139 if (!tun)
3140 goto unlock;
3141
3142 tun_debug(KERN_INFO, tun, "tun_chr_ioctl cmd %u\n", cmd);
3143
3144 net = dev_net(tun->dev);
3145 ret = 0;
3146 switch (cmd) {
3147 case TUNGETIFF:
3148 tun_get_iff(tun, &ifr);
3149
3150 if (tfile->detached)
3151 ifr.ifr_flags |= IFF_DETACH_QUEUE;
3152 if (!tfile->socket.sk->sk_filter)
3153 ifr.ifr_flags |= IFF_NOFILTER;
3154
3155 if (copy_to_user(argp, &ifr, ifreq_len))
3156 ret = -EFAULT;
3157 break;
3158
3159 case TUNSETNOCSUM:
3160
3161
3162
3163 tun_debug(KERN_INFO, tun, "ignored: set checksum %s\n",
3164 arg ? "disabled" : "enabled");
3165 break;
3166
3167 case TUNSETPERSIST:
3168
3169
3170
3171 if (arg && !(tun->flags & IFF_PERSIST)) {
3172 tun->flags |= IFF_PERSIST;
3173 __module_get(THIS_MODULE);
3174 do_notify = true;
3175 }
3176 if (!arg && (tun->flags & IFF_PERSIST)) {
3177 tun->flags &= ~IFF_PERSIST;
3178 module_put(THIS_MODULE);
3179 do_notify = true;
3180 }
3181
3182 tun_debug(KERN_INFO, tun, "persist %s\n",
3183 arg ? "enabled" : "disabled");
3184 break;
3185
3186 case TUNSETOWNER:
3187
3188 owner = make_kuid(current_user_ns(), arg);
3189 if (!uid_valid(owner)) {
3190 ret = -EINVAL;
3191 break;
3192 }
3193 tun->owner = owner;
3194 do_notify = true;
3195 tun_debug(KERN_INFO, tun, "owner set to %u\n",
3196 from_kuid(&init_user_ns, tun->owner));
3197 break;
3198
3199 case TUNSETGROUP:
3200
3201 group = make_kgid(current_user_ns(), arg);
3202 if (!gid_valid(group)) {
3203 ret = -EINVAL;
3204 break;
3205 }
3206 tun->group = group;
3207 do_notify = true;
3208 tun_debug(KERN_INFO, tun, "group set to %u\n",
3209 from_kgid(&init_user_ns, tun->group));
3210 break;
3211
3212 case TUNSETLINK:
3213
3214 if (tun->dev->flags & IFF_UP) {
3215 tun_debug(KERN_INFO, tun,
3216 "Linktype set failed because interface is up\n");
3217 ret = -EBUSY;
3218 } else {
3219 tun->dev->type = (int) arg;
3220 tun_debug(KERN_INFO, tun, "linktype set to %d\n",
3221 tun->dev->type);
3222 ret = 0;
3223 }
3224 break;
3225
3226#ifdef TUN_DEBUG
3227 case TUNSETDEBUG:
3228 tun->debug = arg;
3229 break;
3230#endif
3231 case TUNSETOFFLOAD:
3232 ret = set_offload(tun, arg);
3233 break;
3234
3235 case TUNSETTXFILTER:
3236
3237 ret = -EINVAL;
3238 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3239 break;
3240 ret = update_filter(&tun->txflt, (void __user *)arg);
3241 break;
3242
3243 case SIOCGIFHWADDR:
3244
3245 memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN);
3246 ifr.ifr_hwaddr.sa_family = tun->dev->type;
3247 if (copy_to_user(argp, &ifr, ifreq_len))
3248 ret = -EFAULT;
3249 break;
3250
3251 case SIOCSIFHWADDR:
3252
3253 tun_debug(KERN_DEBUG, tun, "set hw address: %pM\n",
3254 ifr.ifr_hwaddr.sa_data);
3255
3256 ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL);
3257 break;
3258
3259 case TUNGETSNDBUF:
3260 sndbuf = tfile->socket.sk->sk_sndbuf;
3261 if (copy_to_user(argp, &sndbuf, sizeof(sndbuf)))
3262 ret = -EFAULT;
3263 break;
3264
3265 case TUNSETSNDBUF:
3266 if (copy_from_user(&sndbuf, argp, sizeof(sndbuf))) {
3267 ret = -EFAULT;
3268 break;
3269 }
3270 if (sndbuf <= 0) {
3271 ret = -EINVAL;
3272 break;
3273 }
3274
3275 tun->sndbuf = sndbuf;
3276 tun_set_sndbuf(tun);
3277 break;
3278
3279 case TUNGETVNETHDRSZ:
3280 vnet_hdr_sz = tun->vnet_hdr_sz;
3281 if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
3282 ret = -EFAULT;
3283 break;
3284
3285 case TUNSETVNETHDRSZ:
3286 if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
3287 ret = -EFAULT;
3288 break;
3289 }
3290 if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
3291 ret = -EINVAL;
3292 break;
3293 }
3294
3295 tun->vnet_hdr_sz = vnet_hdr_sz;
3296 break;
3297
3298 case TUNGETVNETLE:
3299 le = !!(tun->flags & TUN_VNET_LE);
3300 if (put_user(le, (int __user *)argp))
3301 ret = -EFAULT;
3302 break;
3303
3304 case TUNSETVNETLE:
3305 if (get_user(le, (int __user *)argp)) {
3306 ret = -EFAULT;
3307 break;
3308 }
3309 if (le)
3310 tun->flags |= TUN_VNET_LE;
3311 else
3312 tun->flags &= ~TUN_VNET_LE;
3313 break;
3314
3315 case TUNGETVNETBE:
3316 ret = tun_get_vnet_be(tun, argp);
3317 break;
3318
3319 case TUNSETVNETBE:
3320 ret = tun_set_vnet_be(tun, argp);
3321 break;
3322
3323 case TUNATTACHFILTER:
3324
3325 ret = -EINVAL;
3326 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3327 break;
3328 ret = -EFAULT;
3329 if (copy_from_user(&tun->fprog, argp, sizeof(tun->fprog)))
3330 break;
3331
3332 ret = tun_attach_filter(tun);
3333 break;
3334
3335 case TUNDETACHFILTER:
3336
3337 ret = -EINVAL;
3338 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3339 break;
3340 ret = 0;
3341 tun_detach_filter(tun, tun->numqueues);
3342 break;
3343
3344 case TUNGETFILTER:
3345 ret = -EINVAL;
3346 if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
3347 break;
3348 ret = -EFAULT;
3349 if (copy_to_user(argp, &tun->fprog, sizeof(tun->fprog)))
3350 break;
3351 ret = 0;
3352 break;
3353
3354 case TUNSETSTEERINGEBPF:
3355 ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
3356 break;
3357
3358 case TUNSETFILTEREBPF:
3359 ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
3360 break;
3361
3362 case TUNSETCARRIER:
3363 ret = -EFAULT;
3364 if (copy_from_user(&carrier, argp, sizeof(carrier)))
3365 goto unlock;
3366
3367 ret = tun_net_change_carrier(tun->dev, (bool)carrier);
3368 break;
3369
3370 case TUNGETDEVNETNS:
3371 ret = -EPERM;
3372 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3373 goto unlock;
3374 ret = open_related_ns(&net->ns, get_net_ns);
3375 break;
3376
3377 default:
3378 ret = -EINVAL;
3379 break;
3380 }
3381
3382 if (do_notify)
3383 netdev_state_change(tun->dev);
3384
3385unlock:
3386 rtnl_unlock();
3387 if (tun)
3388 tun_put(tun);
3389 return ret;
3390}
3391
3392static long tun_chr_ioctl(struct file *file,
3393 unsigned int cmd, unsigned long arg)
3394{
3395 return __tun_chr_ioctl(file, cmd, arg, sizeof (struct ifreq));
3396}
3397
3398#ifdef CONFIG_COMPAT
3399static long tun_chr_compat_ioctl(struct file *file,
3400 unsigned int cmd, unsigned long arg)
3401{
3402 switch (cmd) {
3403 case TUNSETIFF:
3404 case TUNGETIFF:
3405 case TUNSETTXFILTER:
3406 case TUNGETSNDBUF:
3407 case TUNSETSNDBUF:
3408 case SIOCGIFHWADDR:
3409 case SIOCSIFHWADDR:
3410 arg = (unsigned long)compat_ptr(arg);
3411 break;
3412 default:
3413 arg = (compat_ulong_t)arg;
3414 break;
3415 }
3416
3417
3418
3419
3420
3421
3422
3423 return __tun_chr_ioctl(file, cmd, arg, sizeof(struct compat_ifreq));
3424}
3425#endif
3426
3427static int tun_chr_fasync(int fd, struct file *file, int on)
3428{
3429 struct tun_file *tfile = file->private_data;
3430 int ret;
3431
3432 if ((ret = fasync_helper(fd, file, on, &tfile->fasync)) < 0)
3433 goto out;
3434
3435 if (on) {
3436 __f_setown(file, task_pid(current), PIDTYPE_TGID, 0);
3437 tfile->flags |= TUN_FASYNC;
3438 } else
3439 tfile->flags &= ~TUN_FASYNC;
3440 ret = 0;
3441out:
3442 return ret;
3443}
3444
3445static int tun_chr_open(struct inode *inode, struct file * file)
3446{
3447 struct net *net = current->nsproxy->net_ns;
3448 struct tun_file *tfile;
3449
3450 DBG1(KERN_INFO, "tunX: tun_chr_open\n");
3451
3452 tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
3453 &tun_proto, 0);
3454 if (!tfile)
3455 return -ENOMEM;
3456 if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) {
3457 sk_free(&tfile->sk);
3458 return -ENOMEM;
3459 }
3460
3461 mutex_init(&tfile->napi_mutex);
3462 RCU_INIT_POINTER(tfile->tun, NULL);
3463 tfile->flags = 0;
3464 tfile->ifindex = 0;
3465
3466 init_waitqueue_head(&tfile->wq.wait);
3467 RCU_INIT_POINTER(tfile->socket.wq, &tfile->wq);
3468
3469 tfile->socket.file = file;
3470 tfile->socket.ops = &tun_socket_ops;
3471
3472 sock_init_data(&tfile->socket, &tfile->sk);
3473
3474 tfile->sk.sk_write_space = tun_sock_write_space;
3475 tfile->sk.sk_sndbuf = INT_MAX;
3476
3477 file->private_data = tfile;
3478 INIT_LIST_HEAD(&tfile->next);
3479
3480 sock_set_flag(&tfile->sk, SOCK_ZEROCOPY);
3481
3482 return 0;
3483}
3484
3485static int tun_chr_close(struct inode *inode, struct file *file)
3486{
3487 struct tun_file *tfile = file->private_data;
3488
3489 tun_detach(tfile, true);
3490
3491 return 0;
3492}
3493
3494#ifdef CONFIG_PROC_FS
3495static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
3496{
3497 struct tun_file *tfile = file->private_data;
3498 struct tun_struct *tun;
3499 struct ifreq ifr;
3500
3501 memset(&ifr, 0, sizeof(ifr));
3502
3503 rtnl_lock();
3504 tun = tun_get(tfile);
3505 if (tun)
3506 tun_get_iff(tun, &ifr);
3507 rtnl_unlock();
3508
3509 if (tun)
3510 tun_put(tun);
3511
3512 seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
3513}
3514#endif
3515
3516static const struct file_operations tun_fops = {
3517 .owner = THIS_MODULE,
3518 .llseek = no_llseek,
3519 .read_iter = tun_chr_read_iter,
3520 .write_iter = tun_chr_write_iter,
3521 .poll = tun_chr_poll,
3522 .unlocked_ioctl = tun_chr_ioctl,
3523#ifdef CONFIG_COMPAT
3524 .compat_ioctl = tun_chr_compat_ioctl,
3525#endif
3526 .open = tun_chr_open,
3527 .release = tun_chr_close,
3528 .fasync = tun_chr_fasync,
3529#ifdef CONFIG_PROC_FS
3530 .show_fdinfo = tun_chr_show_fdinfo,
3531#endif
3532};
3533
3534static struct miscdevice tun_miscdev = {
3535 .minor = TUN_MINOR,
3536 .name = "tun",
3537 .nodename = "net/tun",
3538 .fops = &tun_fops,
3539};
3540
3541
3542
3543static void tun_default_link_ksettings(struct net_device *dev,
3544 struct ethtool_link_ksettings *cmd)
3545{
3546 ethtool_link_ksettings_zero_link_mode(cmd, supported);
3547 ethtool_link_ksettings_zero_link_mode(cmd, advertising);
3548 cmd->base.speed = SPEED_10;
3549 cmd->base.duplex = DUPLEX_FULL;
3550 cmd->base.port = PORT_TP;
3551 cmd->base.phy_address = 0;
3552 cmd->base.autoneg = AUTONEG_DISABLE;
3553}
3554
3555static int tun_get_link_ksettings(struct net_device *dev,
3556 struct ethtool_link_ksettings *cmd)
3557{
3558 struct tun_struct *tun = netdev_priv(dev);
3559
3560 memcpy(cmd, &tun->link_ksettings, sizeof(*cmd));
3561 return 0;
3562}
3563
3564static int tun_set_link_ksettings(struct net_device *dev,
3565 const struct ethtool_link_ksettings *cmd)
3566{
3567 struct tun_struct *tun = netdev_priv(dev);
3568
3569 memcpy(&tun->link_ksettings, cmd, sizeof(*cmd));
3570 return 0;
3571}
3572
3573static void tun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
3574{
3575 struct tun_struct *tun = netdev_priv(dev);
3576
3577 strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
3578 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
3579
3580 switch (tun->flags & TUN_TYPE_MASK) {
3581 case IFF_TUN:
3582 strlcpy(info->bus_info, "tun", sizeof(info->bus_info));
3583 break;
3584 case IFF_TAP:
3585 strlcpy(info->bus_info, "tap", sizeof(info->bus_info));
3586 break;
3587 }
3588}
3589
3590static u32 tun_get_msglevel(struct net_device *dev)
3591{
3592#ifdef TUN_DEBUG
3593 struct tun_struct *tun = netdev_priv(dev);
3594 return tun->debug;
3595#else
3596 return -EOPNOTSUPP;
3597#endif
3598}
3599
3600static void tun_set_msglevel(struct net_device *dev, u32 value)
3601{
3602#ifdef TUN_DEBUG
3603 struct tun_struct *tun = netdev_priv(dev);
3604 tun->debug = value;
3605#endif
3606}
3607
3608static int tun_get_coalesce(struct net_device *dev,
3609 struct ethtool_coalesce *ec)
3610{
3611 struct tun_struct *tun = netdev_priv(dev);
3612
3613 ec->rx_max_coalesced_frames = tun->rx_batched;
3614
3615 return 0;
3616}
3617
3618static int tun_set_coalesce(struct net_device *dev,
3619 struct ethtool_coalesce *ec)
3620{
3621 struct tun_struct *tun = netdev_priv(dev);
3622
3623 if (ec->rx_max_coalesced_frames > NAPI_POLL_WEIGHT)
3624 tun->rx_batched = NAPI_POLL_WEIGHT;
3625 else
3626 tun->rx_batched = ec->rx_max_coalesced_frames;
3627
3628 return 0;
3629}
3630
3631static const struct ethtool_ops tun_ethtool_ops = {
3632 .get_drvinfo = tun_get_drvinfo,
3633 .get_msglevel = tun_get_msglevel,
3634 .set_msglevel = tun_set_msglevel,
3635 .get_link = ethtool_op_get_link,
3636 .get_ts_info = ethtool_op_get_ts_info,
3637 .get_coalesce = tun_get_coalesce,
3638 .set_coalesce = tun_set_coalesce,
3639 .get_link_ksettings = tun_get_link_ksettings,
3640 .set_link_ksettings = tun_set_link_ksettings,
3641};
3642
3643static int tun_queue_resize(struct tun_struct *tun)
3644{
3645 struct net_device *dev = tun->dev;
3646 struct tun_file *tfile;
3647 struct ptr_ring **rings;
3648 int n = tun->numqueues + tun->numdisabled;
3649 int ret, i;
3650
3651 rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
3652 if (!rings)
3653 return -ENOMEM;
3654
3655 for (i = 0; i < tun->numqueues; i++) {
3656 tfile = rtnl_dereference(tun->tfiles[i]);
3657 rings[i] = &tfile->tx_ring;
3658 }
3659 list_for_each_entry(tfile, &tun->disabled, next)
3660 rings[i++] = &tfile->tx_ring;
3661
3662 ret = ptr_ring_resize_multiple(rings, n,
3663 dev->tx_queue_len, GFP_KERNEL,
3664 tun_ptr_free);
3665
3666 kfree(rings);
3667 return ret;
3668}
3669
3670static int tun_device_event(struct notifier_block *unused,
3671 unsigned long event, void *ptr)
3672{
3673 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3674 struct tun_struct *tun = netdev_priv(dev);
3675 int i;
3676
3677 if (dev->rtnl_link_ops != &tun_link_ops)
3678 return NOTIFY_DONE;
3679
3680 switch (event) {
3681 case NETDEV_CHANGE_TX_QUEUE_LEN:
3682 if (tun_queue_resize(tun))
3683 return NOTIFY_BAD;
3684 break;
3685 case NETDEV_UP:
3686 for (i = 0; i < tun->numqueues; i++) {
3687 struct tun_file *tfile;
3688
3689 tfile = rtnl_dereference(tun->tfiles[i]);
3690 tfile->socket.sk->sk_write_space(tfile->socket.sk);
3691 }
3692 break;
3693 default:
3694 break;
3695 }
3696
3697 return NOTIFY_DONE;
3698}
3699
3700static struct notifier_block tun_notifier_block __read_mostly = {
3701 .notifier_call = tun_device_event,
3702};
3703
3704static int __init tun_init(void)
3705{
3706 int ret = 0;
3707
3708 pr_info("%s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
3709
3710 ret = rtnl_link_register(&tun_link_ops);
3711 if (ret) {
3712 pr_err("Can't register link_ops\n");
3713 goto err_linkops;
3714 }
3715
3716 ret = misc_register(&tun_miscdev);
3717 if (ret) {
3718 pr_err("Can't register misc device %d\n", TUN_MINOR);
3719 goto err_misc;
3720 }
3721
3722 ret = register_netdevice_notifier(&tun_notifier_block);
3723 if (ret) {
3724 pr_err("Can't register netdevice notifier\n");
3725 goto err_notifier;
3726 }
3727
3728 return 0;
3729
3730err_notifier:
3731 misc_deregister(&tun_miscdev);
3732err_misc:
3733 rtnl_link_unregister(&tun_link_ops);
3734err_linkops:
3735 return ret;
3736}
3737
3738static void tun_cleanup(void)
3739{
3740 misc_deregister(&tun_miscdev);
3741 rtnl_link_unregister(&tun_link_ops);
3742 unregister_netdevice_notifier(&tun_notifier_block);
3743}
3744
3745
3746
3747
3748
3749struct socket *tun_get_socket(struct file *file)
3750{
3751 struct tun_file *tfile;
3752 if (file->f_op != &tun_fops)
3753 return ERR_PTR(-EINVAL);
3754 tfile = file->private_data;
3755 if (!tfile)
3756 return ERR_PTR(-EBADFD);
3757 return &tfile->socket;
3758}
3759EXPORT_SYMBOL_GPL(tun_get_socket);
3760
3761struct ptr_ring *tun_get_tx_ring(struct file *file)
3762{
3763 struct tun_file *tfile;
3764
3765 if (file->f_op != &tun_fops)
3766 return ERR_PTR(-EINVAL);
3767 tfile = file->private_data;
3768 if (!tfile)
3769 return ERR_PTR(-EBADFD);
3770 return &tfile->tx_ring;
3771}
3772EXPORT_SYMBOL_GPL(tun_get_tx_ring);
3773
3774module_init(tun_init);
3775module_exit(tun_cleanup);
3776MODULE_DESCRIPTION(DRV_DESCRIPTION);
3777MODULE_AUTHOR(DRV_COPYRIGHT);
3778MODULE_LICENSE("GPL");
3779MODULE_ALIAS_MISCDEV(TUN_MINOR);
3780MODULE_ALIAS("devname:net/tun");
3781