1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71#include <linux/uaccess.h>
72#include <linux/bitops.h>
73#include <linux/capability.h>
74#include <linux/cpu.h>
75#include <linux/types.h>
76#include <linux/kernel.h>
77#include <linux/hash.h>
78#include <linux/slab.h>
79#include <linux/sched.h>
80#include <linux/sched/mm.h>
81#include <linux/mutex.h>
82#include <linux/rwsem.h>
83#include <linux/string.h>
84#include <linux/mm.h>
85#include <linux/socket.h>
86#include <linux/sockios.h>
87#include <linux/errno.h>
88#include <linux/interrupt.h>
89#include <linux/if_ether.h>
90#include <linux/netdevice.h>
91#include <linux/etherdevice.h>
92#include <linux/ethtool.h>
93#include <linux/skbuff.h>
94#include <linux/bpf.h>
95#include <linux/bpf_trace.h>
96#include <net/net_namespace.h>
97#include <net/sock.h>
98#include <net/busy_poll.h>
99#include <linux/rtnetlink.h>
100#include <linux/stat.h>
101#include <net/dst.h>
102#include <net/dst_metadata.h>
103#include <net/pkt_sched.h>
104#include <net/pkt_cls.h>
105#include <net/checksum.h>
106#include <net/xfrm.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/module.h>
110#include <linux/netpoll.h>
111#include <linux/rcupdate.h>
112#include <linux/delay.h>
113#include <net/iw_handler.h>
114#include <asm/current.h>
115#include <linux/audit.h>
116#include <linux/dmaengine.h>
117#include <linux/err.h>
118#include <linux/ctype.h>
119#include <linux/if_arp.h>
120#include <linux/if_vlan.h>
121#include <linux/ip.h>
122#include <net/ip.h>
123#include <net/mpls.h>
124#include <linux/ipv6.h>
125#include <linux/in.h>
126#include <linux/jhash.h>
127#include <linux/random.h>
128#include <trace/events/napi.h>
129#include <trace/events/net.h>
130#include <trace/events/skb.h>
131#include <linux/inetdevice.h>
132#include <linux/cpu_rmap.h>
133#include <linux/static_key.h>
134#include <linux/hashtable.h>
135#include <linux/vmalloc.h>
136#include <linux/if_macvlan.h>
137#include <linux/errqueue.h>
138#include <linux/hrtimer.h>
139#include <linux/netfilter_ingress.h>
140#include <linux/crash_dump.h>
141#include <linux/sctp.h>
142#include <net/udp_tunnel.h>
143#include <linux/net_namespace.h>
144#include <linux/indirect_call_wrapper.h>
145#include <net/devlink.h>
146#include <linux/pm_runtime.h>
147
148#include "net-sysfs.h"
149
150#define MAX_GRO_SKBS 8
151
152
153#define GRO_MAX_HEAD (MAX_HEADER + 128)
154
155static DEFINE_SPINLOCK(ptype_lock);
156static DEFINE_SPINLOCK(offload_lock);
157struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
158struct list_head ptype_all __read_mostly;
159static struct list_head offload_base __read_mostly;
160
161static int netif_rx_internal(struct sk_buff *skb);
162static int call_netdevice_notifiers_info(unsigned long val,
163 struct netdev_notifier_info *info);
164static int call_netdevice_notifiers_extack(unsigned long val,
165 struct net_device *dev,
166 struct netlink_ext_ack *extack);
167static struct napi_struct *napi_by_id(unsigned int napi_id);
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188DEFINE_RWLOCK(dev_base_lock);
189EXPORT_SYMBOL(dev_base_lock);
190
191static DEFINE_MUTEX(ifalias_mutex);
192
193
194static DEFINE_SPINLOCK(napi_hash_lock);
195
196static unsigned int napi_gen_id = NR_CPUS;
197static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
198
199static DECLARE_RWSEM(devnet_rename_sem);
200
201static inline void dev_base_seq_inc(struct net *net)
202{
203 while (++net->dev_base_seq == 0)
204 ;
205}
206
207static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
208{
209 unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
210
211 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
212}
213
214static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
215{
216 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
217}
218
219static inline void rps_lock(struct softnet_data *sd)
220{
221#ifdef CONFIG_RPS
222 spin_lock(&sd->input_pkt_queue.lock);
223#endif
224}
225
226static inline void rps_unlock(struct softnet_data *sd)
227{
228#ifdef CONFIG_RPS
229 spin_unlock(&sd->input_pkt_queue.lock);
230#endif
231}
232
233static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
234 const char *name)
235{
236 struct netdev_name_node *name_node;
237
238 name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
239 if (!name_node)
240 return NULL;
241 INIT_HLIST_NODE(&name_node->hlist);
242 name_node->dev = dev;
243 name_node->name = name;
244 return name_node;
245}
246
247static struct netdev_name_node *
248netdev_name_node_head_alloc(struct net_device *dev)
249{
250 struct netdev_name_node *name_node;
251
252 name_node = netdev_name_node_alloc(dev, dev->name);
253 if (!name_node)
254 return NULL;
255 INIT_LIST_HEAD(&name_node->list);
256 return name_node;
257}
258
259static void netdev_name_node_free(struct netdev_name_node *name_node)
260{
261 kfree(name_node);
262}
263
264static void netdev_name_node_add(struct net *net,
265 struct netdev_name_node *name_node)
266{
267 hlist_add_head_rcu(&name_node->hlist,
268 dev_name_hash(net, name_node->name));
269}
270
271static void netdev_name_node_del(struct netdev_name_node *name_node)
272{
273 hlist_del_rcu(&name_node->hlist);
274}
275
276static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
277 const char *name)
278{
279 struct hlist_head *head = dev_name_hash(net, name);
280 struct netdev_name_node *name_node;
281
282 hlist_for_each_entry(name_node, head, hlist)
283 if (!strcmp(name_node->name, name))
284 return name_node;
285 return NULL;
286}
287
288static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
289 const char *name)
290{
291 struct hlist_head *head = dev_name_hash(net, name);
292 struct netdev_name_node *name_node;
293
294 hlist_for_each_entry_rcu(name_node, head, hlist)
295 if (!strcmp(name_node->name, name))
296 return name_node;
297 return NULL;
298}
299
300int netdev_name_node_alt_create(struct net_device *dev, const char *name)
301{
302 struct netdev_name_node *name_node;
303 struct net *net = dev_net(dev);
304
305 name_node = netdev_name_node_lookup(net, name);
306 if (name_node)
307 return -EEXIST;
308 name_node = netdev_name_node_alloc(dev, name);
309 if (!name_node)
310 return -ENOMEM;
311 netdev_name_node_add(net, name_node);
312
313 list_add_tail(&name_node->list, &dev->name_node->list);
314
315 return 0;
316}
317EXPORT_SYMBOL(netdev_name_node_alt_create);
318
319static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
320{
321 list_del(&name_node->list);
322 netdev_name_node_del(name_node);
323 kfree(name_node->name);
324 netdev_name_node_free(name_node);
325}
326
327int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
328{
329 struct netdev_name_node *name_node;
330 struct net *net = dev_net(dev);
331
332 name_node = netdev_name_node_lookup(net, name);
333 if (!name_node)
334 return -ENOENT;
335
336
337
338 if (name_node == dev->name_node || name_node->dev != dev)
339 return -EINVAL;
340
341 __netdev_name_node_alt_destroy(name_node);
342
343 return 0;
344}
345EXPORT_SYMBOL(netdev_name_node_alt_destroy);
346
347static void netdev_name_node_alt_flush(struct net_device *dev)
348{
349 struct netdev_name_node *name_node, *tmp;
350
351 list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
352 __netdev_name_node_alt_destroy(name_node);
353}
354
355
356static void list_netdevice(struct net_device *dev)
357{
358 struct net *net = dev_net(dev);
359
360 ASSERT_RTNL();
361
362 write_lock_bh(&dev_base_lock);
363 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
364 netdev_name_node_add(net, dev->name_node);
365 hlist_add_head_rcu(&dev->index_hlist,
366 dev_index_hash(net, dev->ifindex));
367 write_unlock_bh(&dev_base_lock);
368
369 dev_base_seq_inc(net);
370}
371
372
373
374
375static void unlist_netdevice(struct net_device *dev)
376{
377 ASSERT_RTNL();
378
379
380 write_lock_bh(&dev_base_lock);
381 list_del_rcu(&dev->dev_list);
382 netdev_name_node_del(dev->name_node);
383 hlist_del_rcu(&dev->index_hlist);
384 write_unlock_bh(&dev_base_lock);
385
386 dev_base_seq_inc(dev_net(dev));
387}
388
389
390
391
392
393static RAW_NOTIFIER_HEAD(netdev_chain);
394
395
396
397
398
399
400DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
401EXPORT_PER_CPU_SYMBOL(softnet_data);
402
403#ifdef CONFIG_LOCKDEP
404
405
406
407
408static const unsigned short netdev_lock_type[] = {
409 ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
410 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
411 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
412 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
413 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
414 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
415 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
416 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
417 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
418 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
419 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
420 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
421 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
422 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
423 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
424
425static const char *const netdev_lock_name[] = {
426 "_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
427 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
428 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
429 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
430 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
431 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
432 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
433 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
434 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
435 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
436 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
437 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
438 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
439 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
440 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
441
442static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
443static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
444
445static inline unsigned short netdev_lock_pos(unsigned short dev_type)
446{
447 int i;
448
449 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
450 if (netdev_lock_type[i] == dev_type)
451 return i;
452
453 return ARRAY_SIZE(netdev_lock_type) - 1;
454}
455
456static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
457 unsigned short dev_type)
458{
459 int i;
460
461 i = netdev_lock_pos(dev_type);
462 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
463 netdev_lock_name[i]);
464}
465
466static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
467{
468 int i;
469
470 i = netdev_lock_pos(dev->type);
471 lockdep_set_class_and_name(&dev->addr_list_lock,
472 &netdev_addr_lock_key[i],
473 netdev_lock_name[i]);
474}
475#else
476static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
477 unsigned short dev_type)
478{
479}
480
481static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
482{
483}
484#endif
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509static inline struct list_head *ptype_head(const struct packet_type *pt)
510{
511 if (pt->type == htons(ETH_P_ALL))
512 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
513 else
514 return pt->dev ? &pt->dev->ptype_specific :
515 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
516}
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531void dev_add_pack(struct packet_type *pt)
532{
533 struct list_head *head = ptype_head(pt);
534
535 spin_lock(&ptype_lock);
536 list_add_rcu(&pt->list, head);
537 spin_unlock(&ptype_lock);
538}
539EXPORT_SYMBOL(dev_add_pack);
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554void __dev_remove_pack(struct packet_type *pt)
555{
556 struct list_head *head = ptype_head(pt);
557 struct packet_type *pt1;
558
559 spin_lock(&ptype_lock);
560
561 list_for_each_entry(pt1, head, list) {
562 if (pt == pt1) {
563 list_del_rcu(&pt->list);
564 goto out;
565 }
566 }
567
568 pr_warn("dev_remove_pack: %p not found\n", pt);
569out:
570 spin_unlock(&ptype_lock);
571}
572EXPORT_SYMBOL(__dev_remove_pack);
573
574
575
576
577
578
579
580
581
582
583
584
585
586void dev_remove_pack(struct packet_type *pt)
587{
588 __dev_remove_pack(pt);
589
590 synchronize_net();
591}
592EXPORT_SYMBOL(dev_remove_pack);
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607void dev_add_offload(struct packet_offload *po)
608{
609 struct packet_offload *elem;
610
611 spin_lock(&offload_lock);
612 list_for_each_entry(elem, &offload_base, list) {
613 if (po->priority < elem->priority)
614 break;
615 }
616 list_add_rcu(&po->list, elem->list.prev);
617 spin_unlock(&offload_lock);
618}
619EXPORT_SYMBOL(dev_add_offload);
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634static void __dev_remove_offload(struct packet_offload *po)
635{
636 struct list_head *head = &offload_base;
637 struct packet_offload *po1;
638
639 spin_lock(&offload_lock);
640
641 list_for_each_entry(po1, head, list) {
642 if (po == po1) {
643 list_del_rcu(&po->list);
644 goto out;
645 }
646 }
647
648 pr_warn("dev_remove_offload: %p not found\n", po);
649out:
650 spin_unlock(&offload_lock);
651}
652
653
654
655
656
657
658
659
660
661
662
663
664
665void dev_remove_offload(struct packet_offload *po)
666{
667 __dev_remove_offload(po);
668
669 synchronize_net();
670}
671EXPORT_SYMBOL(dev_remove_offload);
672
673
674
675
676
677
678
679
680static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
681
682
683
684
685
686
687
688
689
690
691static int netdev_boot_setup_add(char *name, struct ifmap *map)
692{
693 struct netdev_boot_setup *s;
694 int i;
695
696 s = dev_boot_setup;
697 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
698 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
699 memset(s[i].name, 0, sizeof(s[i].name));
700 strlcpy(s[i].name, name, IFNAMSIZ);
701 memcpy(&s[i].map, map, sizeof(s[i].map));
702 break;
703 }
704 }
705
706 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
707}
708
709
710
711
712
713
714
715
716
717
718int netdev_boot_setup_check(struct net_device *dev)
719{
720 struct netdev_boot_setup *s = dev_boot_setup;
721 int i;
722
723 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
724 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
725 !strcmp(dev->name, s[i].name)) {
726 dev->irq = s[i].map.irq;
727 dev->base_addr = s[i].map.base_addr;
728 dev->mem_start = s[i].map.mem_start;
729 dev->mem_end = s[i].map.mem_end;
730 return 1;
731 }
732 }
733 return 0;
734}
735EXPORT_SYMBOL(netdev_boot_setup_check);
736
737
738
739
740
741
742
743
744
745
746
747
748unsigned long netdev_boot_base(const char *prefix, int unit)
749{
750 const struct netdev_boot_setup *s = dev_boot_setup;
751 char name[IFNAMSIZ];
752 int i;
753
754 sprintf(name, "%s%d", prefix, unit);
755
756
757
758
759
760 if (__dev_get_by_name(&init_net, name))
761 return 1;
762
763 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
764 if (!strcmp(name, s[i].name))
765 return s[i].map.base_addr;
766 return 0;
767}
768
769
770
771
772int __init netdev_boot_setup(char *str)
773{
774 int ints[5];
775 struct ifmap map;
776
777 str = get_options(str, ARRAY_SIZE(ints), ints);
778 if (!str || !*str)
779 return 0;
780
781
782 memset(&map, 0, sizeof(map));
783 if (ints[0] > 0)
784 map.irq = ints[1];
785 if (ints[0] > 1)
786 map.base_addr = ints[2];
787 if (ints[0] > 2)
788 map.mem_start = ints[3];
789 if (ints[0] > 3)
790 map.mem_end = ints[4];
791
792
793 return netdev_boot_setup_add(str, &map);
794}
795
796__setup("netdev=", netdev_boot_setup);
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812int dev_get_iflink(const struct net_device *dev)
813{
814 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
815 return dev->netdev_ops->ndo_get_iflink(dev);
816
817 return dev->ifindex;
818}
819EXPORT_SYMBOL(dev_get_iflink);
820
821
822
823
824
825
826
827
828
829
830int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
831{
832 struct ip_tunnel_info *info;
833
834 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
835 return -EINVAL;
836
837 info = skb_tunnel_info_unclone(skb);
838 if (!info)
839 return -ENOMEM;
840 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
841 return -EINVAL;
842
843 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
844}
845EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
846
847
848
849
850
851
852
853
854
855
856
857
858
859struct net_device *__dev_get_by_name(struct net *net, const char *name)
860{
861 struct netdev_name_node *node_name;
862
863 node_name = netdev_name_node_lookup(net, name);
864 return node_name ? node_name->dev : NULL;
865}
866EXPORT_SYMBOL(__dev_get_by_name);
867
868
869
870
871
872
873
874
875
876
877
878
879
880struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
881{
882 struct netdev_name_node *node_name;
883
884 node_name = netdev_name_node_lookup_rcu(net, name);
885 return node_name ? node_name->dev : NULL;
886}
887EXPORT_SYMBOL(dev_get_by_name_rcu);
888
889
890
891
892
893
894
895
896
897
898
899
900
901struct net_device *dev_get_by_name(struct net *net, const char *name)
902{
903 struct net_device *dev;
904
905 rcu_read_lock();
906 dev = dev_get_by_name_rcu(net, name);
907 if (dev)
908 dev_hold(dev);
909 rcu_read_unlock();
910 return dev;
911}
912EXPORT_SYMBOL(dev_get_by_name);
913
914
915
916
917
918
919
920
921
922
923
924
925
926struct net_device *__dev_get_by_index(struct net *net, int ifindex)
927{
928 struct net_device *dev;
929 struct hlist_head *head = dev_index_hash(net, ifindex);
930
931 hlist_for_each_entry(dev, head, index_hlist)
932 if (dev->ifindex == ifindex)
933 return dev;
934
935 return NULL;
936}
937EXPORT_SYMBOL(__dev_get_by_index);
938
939
940
941
942
943
944
945
946
947
948
949
950struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
951{
952 struct net_device *dev;
953 struct hlist_head *head = dev_index_hash(net, ifindex);
954
955 hlist_for_each_entry_rcu(dev, head, index_hlist)
956 if (dev->ifindex == ifindex)
957 return dev;
958
959 return NULL;
960}
961EXPORT_SYMBOL(dev_get_by_index_rcu);
962
963
964
965
966
967
968
969
970
971
972
973
974
975struct net_device *dev_get_by_index(struct net *net, int ifindex)
976{
977 struct net_device *dev;
978
979 rcu_read_lock();
980 dev = dev_get_by_index_rcu(net, ifindex);
981 if (dev)
982 dev_hold(dev);
983 rcu_read_unlock();
984 return dev;
985}
986EXPORT_SYMBOL(dev_get_by_index);
987
988
989
990
991
992
993
994
995
996
997
998struct net_device *dev_get_by_napi_id(unsigned int napi_id)
999{
1000 struct napi_struct *napi;
1001
1002 WARN_ON_ONCE(!rcu_read_lock_held());
1003
1004 if (napi_id < MIN_NAPI_ID)
1005 return NULL;
1006
1007 napi = napi_by_id(napi_id);
1008
1009 return napi ? napi->dev : NULL;
1010}
1011EXPORT_SYMBOL(dev_get_by_napi_id);
1012
1013
1014
1015
1016
1017
1018
1019int netdev_get_name(struct net *net, char *name, int ifindex)
1020{
1021 struct net_device *dev;
1022 int ret;
1023
1024 down_read(&devnet_rename_sem);
1025 rcu_read_lock();
1026
1027 dev = dev_get_by_index_rcu(net, ifindex);
1028 if (!dev) {
1029 ret = -ENODEV;
1030 goto out;
1031 }
1032
1033 strcpy(name, dev->name);
1034
1035 ret = 0;
1036out:
1037 rcu_read_unlock();
1038 up_read(&devnet_rename_sem);
1039 return ret;
1040}
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
1057 const char *ha)
1058{
1059 struct net_device *dev;
1060
1061 for_each_netdev_rcu(net, dev)
1062 if (dev->type == type &&
1063 !memcmp(dev->dev_addr, ha, dev->addr_len))
1064 return dev;
1065
1066 return NULL;
1067}
1068EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
1069
1070struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
1071{
1072 struct net_device *dev;
1073
1074 ASSERT_RTNL();
1075 for_each_netdev(net, dev)
1076 if (dev->type == type)
1077 return dev;
1078
1079 return NULL;
1080}
1081EXPORT_SYMBOL(__dev_getfirstbyhwtype);
1082
1083struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
1084{
1085 struct net_device *dev, *ret = NULL;
1086
1087 rcu_read_lock();
1088 for_each_netdev_rcu(net, dev)
1089 if (dev->type == type) {
1090 dev_hold(dev);
1091 ret = dev;
1092 break;
1093 }
1094 rcu_read_unlock();
1095 return ret;
1096}
1097EXPORT_SYMBOL(dev_getfirstbyhwtype);
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
1111 unsigned short mask)
1112{
1113 struct net_device *dev, *ret;
1114
1115 ASSERT_RTNL();
1116
1117 ret = NULL;
1118 for_each_netdev(net, dev) {
1119 if (((dev->flags ^ if_flags) & mask) == 0) {
1120 ret = dev;
1121 break;
1122 }
1123 }
1124 return ret;
1125}
1126EXPORT_SYMBOL(__dev_get_by_flags);
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136bool dev_valid_name(const char *name)
1137{
1138 if (*name == '\0')
1139 return false;
1140 if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
1141 return false;
1142 if (!strcmp(name, ".") || !strcmp(name, ".."))
1143 return false;
1144
1145 while (*name) {
1146 if (*name == '/' || *name == ':' || isspace(*name))
1147 return false;
1148 name++;
1149 }
1150 return true;
1151}
1152EXPORT_SYMBOL(dev_valid_name);
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1170{
1171 int i = 0;
1172 const char *p;
1173 const int max_netdevices = 8*PAGE_SIZE;
1174 unsigned long *inuse;
1175 struct net_device *d;
1176
1177 if (!dev_valid_name(name))
1178 return -EINVAL;
1179
1180 p = strchr(name, '%');
1181 if (p) {
1182
1183
1184
1185
1186
1187 if (p[1] != 'd' || strchr(p + 2, '%'))
1188 return -EINVAL;
1189
1190
1191 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1192 if (!inuse)
1193 return -ENOMEM;
1194
1195 for_each_netdev(net, d) {
1196 if (!sscanf(d->name, name, &i))
1197 continue;
1198 if (i < 0 || i >= max_netdevices)
1199 continue;
1200
1201
1202 snprintf(buf, IFNAMSIZ, name, i);
1203 if (!strncmp(buf, d->name, IFNAMSIZ))
1204 set_bit(i, inuse);
1205 }
1206
1207 i = find_first_zero_bit(inuse, max_netdevices);
1208 free_page((unsigned long) inuse);
1209 }
1210
1211 snprintf(buf, IFNAMSIZ, name, i);
1212 if (!__dev_get_by_name(net, buf))
1213 return i;
1214
1215
1216
1217
1218
1219 return -ENFILE;
1220}
1221
1222static int dev_alloc_name_ns(struct net *net,
1223 struct net_device *dev,
1224 const char *name)
1225{
1226 char buf[IFNAMSIZ];
1227 int ret;
1228
1229 BUG_ON(!net);
1230 ret = __dev_alloc_name(net, name, buf);
1231 if (ret >= 0)
1232 strlcpy(dev->name, buf, IFNAMSIZ);
1233 return ret;
1234}
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250int dev_alloc_name(struct net_device *dev, const char *name)
1251{
1252 return dev_alloc_name_ns(dev_net(dev), dev, name);
1253}
1254EXPORT_SYMBOL(dev_alloc_name);
1255
1256static int dev_get_valid_name(struct net *net, struct net_device *dev,
1257 const char *name)
1258{
1259 BUG_ON(!net);
1260
1261 if (!dev_valid_name(name))
1262 return -EINVAL;
1263
1264 if (strchr(name, '%'))
1265 return dev_alloc_name_ns(net, dev, name);
1266 else if (__dev_get_by_name(net, name))
1267 return -EEXIST;
1268 else if (dev->name != name)
1269 strlcpy(dev->name, name, IFNAMSIZ);
1270
1271 return 0;
1272}
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282int dev_change_name(struct net_device *dev, const char *newname)
1283{
1284 unsigned char old_assign_type;
1285 char oldname[IFNAMSIZ];
1286 int err = 0;
1287 int ret;
1288 struct net *net;
1289
1290 ASSERT_RTNL();
1291 BUG_ON(!dev_net(dev));
1292
1293 net = dev_net(dev);
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307 if (dev->flags & IFF_UP &&
1308 likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
1309 return -EBUSY;
1310
1311 down_write(&devnet_rename_sem);
1312
1313 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1314 up_write(&devnet_rename_sem);
1315 return 0;
1316 }
1317
1318 memcpy(oldname, dev->name, IFNAMSIZ);
1319
1320 err = dev_get_valid_name(net, dev, newname);
1321 if (err < 0) {
1322 up_write(&devnet_rename_sem);
1323 return err;
1324 }
1325
1326 if (oldname[0] && !strchr(oldname, '%'))
1327 netdev_info(dev, "renamed from %s\n", oldname);
1328
1329 old_assign_type = dev->name_assign_type;
1330 dev->name_assign_type = NET_NAME_RENAMED;
1331
1332rollback:
1333 ret = device_rename(&dev->dev, dev->name);
1334 if (ret) {
1335 memcpy(dev->name, oldname, IFNAMSIZ);
1336 dev->name_assign_type = old_assign_type;
1337 up_write(&devnet_rename_sem);
1338 return ret;
1339 }
1340
1341 up_write(&devnet_rename_sem);
1342
1343 netdev_adjacent_rename_links(dev, oldname);
1344
1345 write_lock_bh(&dev_base_lock);
1346 netdev_name_node_del(dev->name_node);
1347 write_unlock_bh(&dev_base_lock);
1348
1349 synchronize_rcu();
1350
1351 write_lock_bh(&dev_base_lock);
1352 netdev_name_node_add(net, dev->name_node);
1353 write_unlock_bh(&dev_base_lock);
1354
1355 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1356 ret = notifier_to_errno(ret);
1357
1358 if (ret) {
1359
1360 if (err >= 0) {
1361 err = ret;
1362 down_write(&devnet_rename_sem);
1363 memcpy(dev->name, oldname, IFNAMSIZ);
1364 memcpy(oldname, newname, IFNAMSIZ);
1365 dev->name_assign_type = old_assign_type;
1366 old_assign_type = NET_NAME_RENAMED;
1367 goto rollback;
1368 } else {
1369 pr_err("%s: name change rollback failed: %d\n",
1370 dev->name, ret);
1371 }
1372 }
1373
1374 return err;
1375}
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1386{
1387 struct dev_ifalias *new_alias = NULL;
1388
1389 if (len >= IFALIASZ)
1390 return -EINVAL;
1391
1392 if (len) {
1393 new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1394 if (!new_alias)
1395 return -ENOMEM;
1396
1397 memcpy(new_alias->ifalias, alias, len);
1398 new_alias->ifalias[len] = 0;
1399 }
1400
1401 mutex_lock(&ifalias_mutex);
1402 new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
1403 mutex_is_locked(&ifalias_mutex));
1404 mutex_unlock(&ifalias_mutex);
1405
1406 if (new_alias)
1407 kfree_rcu(new_alias, rcuhead);
1408
1409 return len;
1410}
1411EXPORT_SYMBOL(dev_set_alias);
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1423{
1424 const struct dev_ifalias *alias;
1425 int ret = 0;
1426
1427 rcu_read_lock();
1428 alias = rcu_dereference(dev->ifalias);
1429 if (alias)
1430 ret = snprintf(name, len, "%s", alias->ifalias);
1431 rcu_read_unlock();
1432
1433 return ret;
1434}
1435
1436
1437
1438
1439
1440
1441
1442void netdev_features_change(struct net_device *dev)
1443{
1444 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1445}
1446EXPORT_SYMBOL(netdev_features_change);
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456void netdev_state_change(struct net_device *dev)
1457{
1458 if (dev->flags & IFF_UP) {
1459 struct netdev_notifier_change_info change_info = {
1460 .info.dev = dev,
1461 };
1462
1463 call_netdevice_notifiers_info(NETDEV_CHANGE,
1464 &change_info.info);
1465 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1466 }
1467}
1468EXPORT_SYMBOL(netdev_state_change);
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480void netdev_notify_peers(struct net_device *dev)
1481{
1482 rtnl_lock();
1483 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1484 call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
1485 rtnl_unlock();
1486}
1487EXPORT_SYMBOL(netdev_notify_peers);
1488
1489static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1490{
1491 const struct net_device_ops *ops = dev->netdev_ops;
1492 int ret;
1493
1494 ASSERT_RTNL();
1495
1496 if (!netif_device_present(dev)) {
1497
1498 if (dev->dev.parent)
1499 pm_runtime_resume(dev->dev.parent);
1500 if (!netif_device_present(dev))
1501 return -ENODEV;
1502 }
1503
1504
1505
1506
1507
1508 netpoll_poll_disable(dev);
1509
1510 ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
1511 ret = notifier_to_errno(ret);
1512 if (ret)
1513 return ret;
1514
1515 set_bit(__LINK_STATE_START, &dev->state);
1516
1517 if (ops->ndo_validate_addr)
1518 ret = ops->ndo_validate_addr(dev);
1519
1520 if (!ret && ops->ndo_open)
1521 ret = ops->ndo_open(dev);
1522
1523 netpoll_poll_enable(dev);
1524
1525 if (ret)
1526 clear_bit(__LINK_STATE_START, &dev->state);
1527 else {
1528 dev->flags |= IFF_UP;
1529 dev_set_rx_mode(dev);
1530 dev_activate(dev);
1531 add_device_randomness(dev->dev_addr, dev->addr_len);
1532 }
1533
1534 return ret;
1535}
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1551{
1552 int ret;
1553
1554 if (dev->flags & IFF_UP)
1555 return 0;
1556
1557 ret = __dev_open(dev, extack);
1558 if (ret < 0)
1559 return ret;
1560
1561 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1562 call_netdevice_notifiers(NETDEV_UP, dev);
1563
1564 return ret;
1565}
1566EXPORT_SYMBOL(dev_open);
1567
1568static void __dev_close_many(struct list_head *head)
1569{
1570 struct net_device *dev;
1571
1572 ASSERT_RTNL();
1573 might_sleep();
1574
1575 list_for_each_entry(dev, head, close_list) {
1576
1577 netpoll_poll_disable(dev);
1578
1579 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1580
1581 clear_bit(__LINK_STATE_START, &dev->state);
1582
1583
1584
1585
1586
1587
1588
1589 smp_mb__after_atomic();
1590 }
1591
1592 dev_deactivate_many(head);
1593
1594 list_for_each_entry(dev, head, close_list) {
1595 const struct net_device_ops *ops = dev->netdev_ops;
1596
1597
1598
1599
1600
1601
1602
1603
1604 if (ops->ndo_stop)
1605 ops->ndo_stop(dev);
1606
1607 dev->flags &= ~IFF_UP;
1608 netpoll_poll_enable(dev);
1609 }
1610}
1611
1612static void __dev_close(struct net_device *dev)
1613{
1614 LIST_HEAD(single);
1615
1616 list_add(&dev->close_list, &single);
1617 __dev_close_many(&single);
1618 list_del(&single);
1619}
1620
1621void dev_close_many(struct list_head *head, bool unlink)
1622{
1623 struct net_device *dev, *tmp;
1624
1625
1626 list_for_each_entry_safe(dev, tmp, head, close_list)
1627 if (!(dev->flags & IFF_UP))
1628 list_del_init(&dev->close_list);
1629
1630 __dev_close_many(head);
1631
1632 list_for_each_entry_safe(dev, tmp, head, close_list) {
1633 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1634 call_netdevice_notifiers(NETDEV_DOWN, dev);
1635 if (unlink)
1636 list_del_init(&dev->close_list);
1637 }
1638}
1639EXPORT_SYMBOL(dev_close_many);
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650void dev_close(struct net_device *dev)
1651{
1652 if (dev->flags & IFF_UP) {
1653 LIST_HEAD(single);
1654
1655 list_add(&dev->close_list, &single);
1656 dev_close_many(&single, true);
1657 list_del(&single);
1658 }
1659}
1660EXPORT_SYMBOL(dev_close);
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671void dev_disable_lro(struct net_device *dev)
1672{
1673 struct net_device *lower_dev;
1674 struct list_head *iter;
1675
1676 dev->wanted_features &= ~NETIF_F_LRO;
1677 netdev_update_features(dev);
1678
1679 if (unlikely(dev->features & NETIF_F_LRO))
1680 netdev_WARN(dev, "failed to disable LRO!\n");
1681
1682 netdev_for_each_lower_dev(dev, lower_dev, iter)
1683 dev_disable_lro(lower_dev);
1684}
1685EXPORT_SYMBOL(dev_disable_lro);
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695static void dev_disable_gro_hw(struct net_device *dev)
1696{
1697 dev->wanted_features &= ~NETIF_F_GRO_HW;
1698 netdev_update_features(dev);
1699
1700 if (unlikely(dev->features & NETIF_F_GRO_HW))
1701 netdev_WARN(dev, "failed to disable GRO_HW!\n");
1702}
1703
1704const char *netdev_cmd_to_name(enum netdev_cmd cmd)
1705{
1706#define N(val) \
1707 case NETDEV_##val: \
1708 return "NETDEV_" __stringify(val);
1709 switch (cmd) {
1710 N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
1711 N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
1712 N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
1713 N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
1714 N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
1715 N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
1716 N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
1717 N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
1718 N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
1719 N(PRE_CHANGEADDR)
1720 }
1721#undef N
1722 return "UNKNOWN_NETDEV_EVENT";
1723}
1724EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
1725
1726static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1727 struct net_device *dev)
1728{
1729 struct netdev_notifier_info info = {
1730 .dev = dev,
1731 };
1732
1733 return nb->notifier_call(nb, val, &info);
1734}
1735
1736static int call_netdevice_register_notifiers(struct notifier_block *nb,
1737 struct net_device *dev)
1738{
1739 int err;
1740
1741 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1742 err = notifier_to_errno(err);
1743 if (err)
1744 return err;
1745
1746 if (!(dev->flags & IFF_UP))
1747 return 0;
1748
1749 call_netdevice_notifier(nb, NETDEV_UP, dev);
1750 return 0;
1751}
1752
1753static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
1754 struct net_device *dev)
1755{
1756 if (dev->flags & IFF_UP) {
1757 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1758 dev);
1759 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1760 }
1761 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1762}
1763
1764static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
1765 struct net *net)
1766{
1767 struct net_device *dev;
1768 int err;
1769
1770 for_each_netdev(net, dev) {
1771 err = call_netdevice_register_notifiers(nb, dev);
1772 if (err)
1773 goto rollback;
1774 }
1775 return 0;
1776
1777rollback:
1778 for_each_netdev_continue_reverse(net, dev)
1779 call_netdevice_unregister_notifiers(nb, dev);
1780 return err;
1781}
1782
1783static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
1784 struct net *net)
1785{
1786 struct net_device *dev;
1787
1788 for_each_netdev(net, dev)
1789 call_netdevice_unregister_notifiers(nb, dev);
1790}
1791
1792static int dev_boot_phase = 1;
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808int register_netdevice_notifier(struct notifier_block *nb)
1809{
1810 struct net *net;
1811 int err;
1812
1813
1814 down_write(&pernet_ops_rwsem);
1815 rtnl_lock();
1816 err = raw_notifier_chain_register(&netdev_chain, nb);
1817 if (err)
1818 goto unlock;
1819 if (dev_boot_phase)
1820 goto unlock;
1821 for_each_net(net) {
1822 err = call_netdevice_register_net_notifiers(nb, net);
1823 if (err)
1824 goto rollback;
1825 }
1826
1827unlock:
1828 rtnl_unlock();
1829 up_write(&pernet_ops_rwsem);
1830 return err;
1831
1832rollback:
1833 for_each_net_continue_reverse(net)
1834 call_netdevice_unregister_net_notifiers(nb, net);
1835
1836 raw_notifier_chain_unregister(&netdev_chain, nb);
1837 goto unlock;
1838}
1839EXPORT_SYMBOL(register_netdevice_notifier);
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855int unregister_netdevice_notifier(struct notifier_block *nb)
1856{
1857 struct net *net;
1858 int err;
1859
1860
1861 down_write(&pernet_ops_rwsem);
1862 rtnl_lock();
1863 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1864 if (err)
1865 goto unlock;
1866
1867 for_each_net(net)
1868 call_netdevice_unregister_net_notifiers(nb, net);
1869
1870unlock:
1871 rtnl_unlock();
1872 up_write(&pernet_ops_rwsem);
1873 return err;
1874}
1875EXPORT_SYMBOL(unregister_netdevice_notifier);
1876
1877static int __register_netdevice_notifier_net(struct net *net,
1878 struct notifier_block *nb,
1879 bool ignore_call_fail)
1880{
1881 int err;
1882
1883 err = raw_notifier_chain_register(&net->netdev_chain, nb);
1884 if (err)
1885 return err;
1886 if (dev_boot_phase)
1887 return 0;
1888
1889 err = call_netdevice_register_net_notifiers(nb, net);
1890 if (err && !ignore_call_fail)
1891 goto chain_unregister;
1892
1893 return 0;
1894
1895chain_unregister:
1896 raw_notifier_chain_unregister(&net->netdev_chain, nb);
1897 return err;
1898}
1899
1900static int __unregister_netdevice_notifier_net(struct net *net,
1901 struct notifier_block *nb)
1902{
1903 int err;
1904
1905 err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
1906 if (err)
1907 return err;
1908
1909 call_netdevice_unregister_net_notifiers(nb, net);
1910 return 0;
1911}
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
1929{
1930 int err;
1931
1932 rtnl_lock();
1933 err = __register_netdevice_notifier_net(net, nb, false);
1934 rtnl_unlock();
1935 return err;
1936}
1937EXPORT_SYMBOL(register_netdevice_notifier_net);
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955int unregister_netdevice_notifier_net(struct net *net,
1956 struct notifier_block *nb)
1957{
1958 int err;
1959
1960 rtnl_lock();
1961 err = __unregister_netdevice_notifier_net(net, nb);
1962 rtnl_unlock();
1963 return err;
1964}
1965EXPORT_SYMBOL(unregister_netdevice_notifier_net);
1966
1967int register_netdevice_notifier_dev_net(struct net_device *dev,
1968 struct notifier_block *nb,
1969 struct netdev_net_notifier *nn)
1970{
1971 int err;
1972
1973 rtnl_lock();
1974 err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
1975 if (!err) {
1976 nn->nb = nb;
1977 list_add(&nn->list, &dev->net_notifier_list);
1978 }
1979 rtnl_unlock();
1980 return err;
1981}
1982EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
1983
1984int unregister_netdevice_notifier_dev_net(struct net_device *dev,
1985 struct notifier_block *nb,
1986 struct netdev_net_notifier *nn)
1987{
1988 int err;
1989
1990 rtnl_lock();
1991 list_del(&nn->list);
1992 err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
1993 rtnl_unlock();
1994 return err;
1995}
1996EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
1997
1998static void move_netdevice_notifiers_dev_net(struct net_device *dev,
1999 struct net *net)
2000{
2001 struct netdev_net_notifier *nn;
2002
2003 list_for_each_entry(nn, &dev->net_notifier_list, list) {
2004 __unregister_netdevice_notifier_net(dev_net(dev), nn->nb);
2005 __register_netdevice_notifier_net(net, nn->nb, true);
2006 }
2007}
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018static int call_netdevice_notifiers_info(unsigned long val,
2019 struct netdev_notifier_info *info)
2020{
2021 struct net *net = dev_net(info->dev);
2022 int ret;
2023
2024 ASSERT_RTNL();
2025
2026
2027
2028
2029
2030 ret = raw_notifier_call_chain(&net->netdev_chain, val, info);
2031 if (ret & NOTIFY_STOP_MASK)
2032 return ret;
2033 return raw_notifier_call_chain(&netdev_chain, val, info);
2034}
2035
2036static int call_netdevice_notifiers_extack(unsigned long val,
2037 struct net_device *dev,
2038 struct netlink_ext_ack *extack)
2039{
2040 struct netdev_notifier_info info = {
2041 .dev = dev,
2042 .extack = extack,
2043 };
2044
2045 return call_netdevice_notifiers_info(val, &info);
2046}
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
2058{
2059 return call_netdevice_notifiers_extack(val, dev, NULL);
2060}
2061EXPORT_SYMBOL(call_netdevice_notifiers);
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072static int call_netdevice_notifiers_mtu(unsigned long val,
2073 struct net_device *dev, u32 arg)
2074{
2075 struct netdev_notifier_info_ext info = {
2076 .info.dev = dev,
2077 .ext.mtu = arg,
2078 };
2079
2080 BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
2081
2082 return call_netdevice_notifiers_info(val, &info.info);
2083}
2084
2085#ifdef CONFIG_NET_INGRESS
2086static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
2087
2088void net_inc_ingress_queue(void)
2089{
2090 static_branch_inc(&ingress_needed_key);
2091}
2092EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
2093
2094void net_dec_ingress_queue(void)
2095{
2096 static_branch_dec(&ingress_needed_key);
2097}
2098EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
2099#endif
2100
2101#ifdef CONFIG_NET_EGRESS
2102static DEFINE_STATIC_KEY_FALSE(egress_needed_key);
2103
2104void net_inc_egress_queue(void)
2105{
2106 static_branch_inc(&egress_needed_key);
2107}
2108EXPORT_SYMBOL_GPL(net_inc_egress_queue);
2109
2110void net_dec_egress_queue(void)
2111{
2112 static_branch_dec(&egress_needed_key);
2113}
2114EXPORT_SYMBOL_GPL(net_dec_egress_queue);
2115#endif
2116
2117static DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
2118#ifdef CONFIG_JUMP_LABEL
2119static atomic_t netstamp_needed_deferred;
2120static atomic_t netstamp_wanted;
2121static void netstamp_clear(struct work_struct *work)
2122{
2123 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
2124 int wanted;
2125
2126 wanted = atomic_add_return(deferred, &netstamp_wanted);
2127 if (wanted > 0)
2128 static_branch_enable(&netstamp_needed_key);
2129 else
2130 static_branch_disable(&netstamp_needed_key);
2131}
2132static DECLARE_WORK(netstamp_work, netstamp_clear);
2133#endif
2134
2135void net_enable_timestamp(void)
2136{
2137#ifdef CONFIG_JUMP_LABEL
2138 int wanted;
2139
2140 while (1) {
2141 wanted = atomic_read(&netstamp_wanted);
2142 if (wanted <= 0)
2143 break;
2144 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
2145 return;
2146 }
2147 atomic_inc(&netstamp_needed_deferred);
2148 schedule_work(&netstamp_work);
2149#else
2150 static_branch_inc(&netstamp_needed_key);
2151#endif
2152}
2153EXPORT_SYMBOL(net_enable_timestamp);
2154
2155void net_disable_timestamp(void)
2156{
2157#ifdef CONFIG_JUMP_LABEL
2158 int wanted;
2159
2160 while (1) {
2161 wanted = atomic_read(&netstamp_wanted);
2162 if (wanted <= 1)
2163 break;
2164 if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
2165 return;
2166 }
2167 atomic_dec(&netstamp_needed_deferred);
2168 schedule_work(&netstamp_work);
2169#else
2170 static_branch_dec(&netstamp_needed_key);
2171#endif
2172}
2173EXPORT_SYMBOL(net_disable_timestamp);
2174
2175static inline void net_timestamp_set(struct sk_buff *skb)
2176{
2177 skb->tstamp = 0;
2178 if (static_branch_unlikely(&netstamp_needed_key))
2179 __net_timestamp(skb);
2180}
2181
2182#define net_timestamp_check(COND, SKB) \
2183 if (static_branch_unlikely(&netstamp_needed_key)) { \
2184 if ((COND) && !(SKB)->tstamp) \
2185 __net_timestamp(SKB); \
2186 } \
2187
2188bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
2189{
2190 unsigned int len;
2191
2192 if (!(dev->flags & IFF_UP))
2193 return false;
2194
2195 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
2196 if (skb->len <= len)
2197 return true;
2198
2199
2200
2201
2202 if (skb_is_gso(skb))
2203 return true;
2204
2205 return false;
2206}
2207EXPORT_SYMBOL_GPL(is_skb_forwardable);
2208
2209int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
2210{
2211 int ret = ____dev_forward_skb(dev, skb);
2212
2213 if (likely(!ret)) {
2214 skb->protocol = eth_type_trans(skb, dev);
2215 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
2216 }
2217
2218 return ret;
2219}
2220EXPORT_SYMBOL_GPL(__dev_forward_skb);
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
2241{
2242 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
2243}
2244EXPORT_SYMBOL_GPL(dev_forward_skb);
2245
2246static inline int deliver_skb(struct sk_buff *skb,
2247 struct packet_type *pt_prev,
2248 struct net_device *orig_dev)
2249{
2250 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
2251 return -ENOMEM;
2252 refcount_inc(&skb->users);
2253 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2254}
2255
2256static inline void deliver_ptype_list_skb(struct sk_buff *skb,
2257 struct packet_type **pt,
2258 struct net_device *orig_dev,
2259 __be16 type,
2260 struct list_head *ptype_list)
2261{
2262 struct packet_type *ptype, *pt_prev = *pt;
2263
2264 list_for_each_entry_rcu(ptype, ptype_list, list) {
2265 if (ptype->type != type)
2266 continue;
2267 if (pt_prev)
2268 deliver_skb(skb, pt_prev, orig_dev);
2269 pt_prev = ptype;
2270 }
2271 *pt = pt_prev;
2272}
2273
2274static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
2275{
2276 if (!ptype->af_packet_priv || !skb->sk)
2277 return false;
2278
2279 if (ptype->id_match)
2280 return ptype->id_match(ptype, skb->sk);
2281 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
2282 return true;
2283
2284 return false;
2285}
2286
2287
2288
2289
2290
2291
2292bool dev_nit_active(struct net_device *dev)
2293{
2294 return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
2295}
2296EXPORT_SYMBOL_GPL(dev_nit_active);
2297
2298
2299
2300
2301
2302
2303void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
2304{
2305 struct packet_type *ptype;
2306 struct sk_buff *skb2 = NULL;
2307 struct packet_type *pt_prev = NULL;
2308 struct list_head *ptype_list = &ptype_all;
2309
2310 rcu_read_lock();
2311again:
2312 list_for_each_entry_rcu(ptype, ptype_list, list) {
2313 if (ptype->ignore_outgoing)
2314 continue;
2315
2316
2317
2318
2319 if (skb_loop_sk(ptype, skb))
2320 continue;
2321
2322 if (pt_prev) {
2323 deliver_skb(skb2, pt_prev, skb->dev);
2324 pt_prev = ptype;
2325 continue;
2326 }
2327
2328
2329 skb2 = skb_clone(skb, GFP_ATOMIC);
2330 if (!skb2)
2331 goto out_unlock;
2332
2333 net_timestamp_set(skb2);
2334
2335
2336
2337
2338
2339 skb_reset_mac_header(skb2);
2340
2341 if (skb_network_header(skb2) < skb2->data ||
2342 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
2343 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
2344 ntohs(skb2->protocol),
2345 dev->name);
2346 skb_reset_network_header(skb2);
2347 }
2348
2349 skb2->transport_header = skb2->network_header;
2350 skb2->pkt_type = PACKET_OUTGOING;
2351 pt_prev = ptype;
2352 }
2353
2354 if (ptype_list == &ptype_all) {
2355 ptype_list = &dev->ptype_all;
2356 goto again;
2357 }
2358out_unlock:
2359 if (pt_prev) {
2360 if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC))
2361 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
2362 else
2363 kfree_skb(skb2);
2364 }
2365 rcu_read_unlock();
2366}
2367EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382static void netif_setup_tc(struct net_device *dev, unsigned int txq)
2383{
2384 int i;
2385 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2386
2387
2388 if (tc->offset + tc->count > txq) {
2389 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
2390 dev->num_tc = 0;
2391 return;
2392 }
2393
2394
2395 for (i = 1; i < TC_BITMASK + 1; i++) {
2396 int q = netdev_get_prio_tc_map(dev, i);
2397
2398 tc = &dev->tc_to_txq[q];
2399 if (tc->offset + tc->count > txq) {
2400 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
2401 i, q);
2402 netdev_set_prio_tc_map(dev, i, 0);
2403 }
2404 }
2405}
2406
2407int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
2408{
2409 if (dev->num_tc) {
2410 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
2411 int i;
2412
2413
2414 for (i = 0; i < TC_MAX_QUEUE; i++, tc++) {
2415 if ((txq - tc->offset) < tc->count)
2416 return i;
2417 }
2418
2419
2420 return -1;
2421 }
2422
2423 return 0;
2424}
2425EXPORT_SYMBOL(netdev_txq_to_tc);
2426
2427#ifdef CONFIG_XPS
2428struct static_key xps_needed __read_mostly;
2429EXPORT_SYMBOL(xps_needed);
2430struct static_key xps_rxqs_needed __read_mostly;
2431EXPORT_SYMBOL(xps_rxqs_needed);
2432static DEFINE_MUTEX(xps_map_mutex);
2433#define xmap_dereference(P) \
2434 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
2435
2436static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
2437 int tci, u16 index)
2438{
2439 struct xps_map *map = NULL;
2440 int pos;
2441
2442 if (dev_maps)
2443 map = xmap_dereference(dev_maps->attr_map[tci]);
2444 if (!map)
2445 return false;
2446
2447 for (pos = map->len; pos--;) {
2448 if (map->queues[pos] != index)
2449 continue;
2450
2451 if (map->len > 1) {
2452 map->queues[pos] = map->queues[--map->len];
2453 break;
2454 }
2455
2456 RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
2457 kfree_rcu(map, rcu);
2458 return false;
2459 }
2460
2461 return true;
2462}
2463
2464static bool remove_xps_queue_cpu(struct net_device *dev,
2465 struct xps_dev_maps *dev_maps,
2466 int cpu, u16 offset, u16 count)
2467{
2468 int num_tc = dev->num_tc ? : 1;
2469 bool active = false;
2470 int tci;
2471
2472 for (tci = cpu * num_tc; num_tc--; tci++) {
2473 int i, j;
2474
2475 for (i = count, j = offset; i--; j++) {
2476 if (!remove_xps_queue(dev_maps, tci, j))
2477 break;
2478 }
2479
2480 active |= i < 0;
2481 }
2482
2483 return active;
2484}
2485
2486static void reset_xps_maps(struct net_device *dev,
2487 struct xps_dev_maps *dev_maps,
2488 bool is_rxqs_map)
2489{
2490 if (is_rxqs_map) {
2491 static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
2492 RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
2493 } else {
2494 RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
2495 }
2496 static_key_slow_dec_cpuslocked(&xps_needed);
2497 kfree_rcu(dev_maps, rcu);
2498}
2499
2500static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
2501 struct xps_dev_maps *dev_maps, unsigned int nr_ids,
2502 u16 offset, u16 count, bool is_rxqs_map)
2503{
2504 bool active = false;
2505 int i, j;
2506
2507 for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
2508 j < nr_ids;)
2509 active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
2510 count);
2511 if (!active)
2512 reset_xps_maps(dev, dev_maps, is_rxqs_map);
2513
2514 if (!is_rxqs_map) {
2515 for (i = offset + (count - 1); count--; i--) {
2516 netdev_queue_numa_node_write(
2517 netdev_get_tx_queue(dev, i),
2518 NUMA_NO_NODE);
2519 }
2520 }
2521}
2522
2523static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
2524 u16 count)
2525{
2526 const unsigned long *possible_mask = NULL;
2527 struct xps_dev_maps *dev_maps;
2528 unsigned int nr_ids;
2529
2530 if (!static_key_false(&xps_needed))
2531 return;
2532
2533 cpus_read_lock();
2534 mutex_lock(&xps_map_mutex);
2535
2536 if (static_key_false(&xps_rxqs_needed)) {
2537 dev_maps = xmap_dereference(dev->xps_rxqs_map);
2538 if (dev_maps) {
2539 nr_ids = dev->num_rx_queues;
2540 clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
2541 offset, count, true);
2542 }
2543 }
2544
2545 dev_maps = xmap_dereference(dev->xps_cpus_map);
2546 if (!dev_maps)
2547 goto out_no_maps;
2548
2549 if (num_possible_cpus() > 1)
2550 possible_mask = cpumask_bits(cpu_possible_mask);
2551 nr_ids = nr_cpu_ids;
2552 clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
2553 false);
2554
2555out_no_maps:
2556 mutex_unlock(&xps_map_mutex);
2557 cpus_read_unlock();
2558}
2559
2560static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
2561{
2562 netif_reset_xps_queues(dev, index, dev->num_tx_queues - index);
2563}
2564
2565static struct xps_map *expand_xps_map(struct xps_map *map, int attr_index,
2566 u16 index, bool is_rxqs_map)
2567{
2568 struct xps_map *new_map;
2569 int alloc_len = XPS_MIN_MAP_ALLOC;
2570 int i, pos;
2571
2572 for (pos = 0; map && pos < map->len; pos++) {
2573 if (map->queues[pos] != index)
2574 continue;
2575 return map;
2576 }
2577
2578
2579 if (map) {
2580 if (pos < map->alloc_len)
2581 return map;
2582
2583 alloc_len = map->alloc_len * 2;
2584 }
2585
2586
2587
2588
2589 if (is_rxqs_map)
2590 new_map = kzalloc(XPS_MAP_SIZE(alloc_len), GFP_KERNEL);
2591 else
2592 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2593 cpu_to_node(attr_index));
2594 if (!new_map)
2595 return NULL;
2596
2597 for (i = 0; i < pos; i++)
2598 new_map->queues[i] = map->queues[i];
2599 new_map->alloc_len = alloc_len;
2600 new_map->len = pos;
2601
2602 return new_map;
2603}
2604
2605
2606int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
2607 u16 index, bool is_rxqs_map)
2608{
2609 const unsigned long *online_mask = NULL, *possible_mask = NULL;
2610 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2611 int i, j, tci, numa_node_id = -2;
2612 int maps_sz, num_tc = 1, tc = 0;
2613 struct xps_map *map, *new_map;
2614 bool active = false;
2615 unsigned int nr_ids;
2616
2617 if (dev->num_tc) {
2618
2619 num_tc = dev->num_tc;
2620 if (num_tc < 0)
2621 return -EINVAL;
2622
2623
2624 dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
2625
2626 tc = netdev_txq_to_tc(dev, index);
2627 if (tc < 0)
2628 return -EINVAL;
2629 }
2630
2631 mutex_lock(&xps_map_mutex);
2632 if (is_rxqs_map) {
2633 maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
2634 dev_maps = xmap_dereference(dev->xps_rxqs_map);
2635 nr_ids = dev->num_rx_queues;
2636 } else {
2637 maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
2638 if (num_possible_cpus() > 1) {
2639 online_mask = cpumask_bits(cpu_online_mask);
2640 possible_mask = cpumask_bits(cpu_possible_mask);
2641 }
2642 dev_maps = xmap_dereference(dev->xps_cpus_map);
2643 nr_ids = nr_cpu_ids;
2644 }
2645
2646 if (maps_sz < L1_CACHE_BYTES)
2647 maps_sz = L1_CACHE_BYTES;
2648
2649
2650 for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
2651 j < nr_ids;) {
2652 if (!new_dev_maps)
2653 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2654 if (!new_dev_maps) {
2655 mutex_unlock(&xps_map_mutex);
2656 return -ENOMEM;
2657 }
2658
2659 tci = j * num_tc + tc;
2660 map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
2661 NULL;
2662
2663 map = expand_xps_map(map, j, index, is_rxqs_map);
2664 if (!map)
2665 goto error;
2666
2667 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2668 }
2669
2670 if (!new_dev_maps)
2671 goto out_no_new_maps;
2672
2673 if (!dev_maps) {
2674
2675 static_key_slow_inc_cpuslocked(&xps_needed);
2676 if (is_rxqs_map)
2677 static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
2678 }
2679
2680 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2681 j < nr_ids;) {
2682
2683 for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
2684
2685 map = xmap_dereference(dev_maps->attr_map[tci]);
2686 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2687 }
2688
2689
2690
2691
2692 tci = j * num_tc + tc;
2693
2694 if (netif_attr_test_mask(j, mask, nr_ids) &&
2695 netif_attr_test_online(j, online_mask, nr_ids)) {
2696
2697 int pos = 0;
2698
2699 map = xmap_dereference(new_dev_maps->attr_map[tci]);
2700 while ((pos < map->len) && (map->queues[pos] != index))
2701 pos++;
2702
2703 if (pos == map->len)
2704 map->queues[map->len++] = index;
2705#ifdef CONFIG_NUMA
2706 if (!is_rxqs_map) {
2707 if (numa_node_id == -2)
2708 numa_node_id = cpu_to_node(j);
2709 else if (numa_node_id != cpu_to_node(j))
2710 numa_node_id = -1;
2711 }
2712#endif
2713 } else if (dev_maps) {
2714
2715 map = xmap_dereference(dev_maps->attr_map[tci]);
2716 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2717 }
2718
2719
2720 for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
2721
2722 map = xmap_dereference(dev_maps->attr_map[tci]);
2723 RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
2724 }
2725 }
2726
2727 if (is_rxqs_map)
2728 rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
2729 else
2730 rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
2731
2732
2733 if (!dev_maps)
2734 goto out_no_old_maps;
2735
2736 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2737 j < nr_ids;) {
2738 for (i = num_tc, tci = j * num_tc; i--; tci++) {
2739 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2740 map = xmap_dereference(dev_maps->attr_map[tci]);
2741 if (map && map != new_map)
2742 kfree_rcu(map, rcu);
2743 }
2744 }
2745
2746 kfree_rcu(dev_maps, rcu);
2747
2748out_no_old_maps:
2749 dev_maps = new_dev_maps;
2750 active = true;
2751
2752out_no_new_maps:
2753 if (!is_rxqs_map) {
2754
2755 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2756 (numa_node_id >= 0) ?
2757 numa_node_id : NUMA_NO_NODE);
2758 }
2759
2760 if (!dev_maps)
2761 goto out_no_maps;
2762
2763
2764 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2765 j < nr_ids;) {
2766 for (i = tc, tci = j * num_tc; i--; tci++)
2767 active |= remove_xps_queue(dev_maps, tci, index);
2768 if (!netif_attr_test_mask(j, mask, nr_ids) ||
2769 !netif_attr_test_online(j, online_mask, nr_ids))
2770 active |= remove_xps_queue(dev_maps, tci, index);
2771 for (i = num_tc - tc, tci++; --i; tci++)
2772 active |= remove_xps_queue(dev_maps, tci, index);
2773 }
2774
2775
2776 if (!active)
2777 reset_xps_maps(dev, dev_maps, is_rxqs_map);
2778
2779out_no_maps:
2780 mutex_unlock(&xps_map_mutex);
2781
2782 return 0;
2783error:
2784
2785 for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
2786 j < nr_ids;) {
2787 for (i = num_tc, tci = j * num_tc; i--; tci++) {
2788 new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
2789 map = dev_maps ?
2790 xmap_dereference(dev_maps->attr_map[tci]) :
2791 NULL;
2792 if (new_map && new_map != map)
2793 kfree(new_map);
2794 }
2795 }
2796
2797 mutex_unlock(&xps_map_mutex);
2798
2799 kfree(new_dev_maps);
2800 return -ENOMEM;
2801}
2802EXPORT_SYMBOL_GPL(__netif_set_xps_queue);
2803
2804int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2805 u16 index)
2806{
2807 int ret;
2808
2809 cpus_read_lock();
2810 ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
2811 cpus_read_unlock();
2812
2813 return ret;
2814}
2815EXPORT_SYMBOL(netif_set_xps_queue);
2816
2817#endif
2818static void netdev_unbind_all_sb_channels(struct net_device *dev)
2819{
2820 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2821
2822
2823 while (txq-- != &dev->_tx[0]) {
2824 if (txq->sb_dev)
2825 netdev_unbind_sb_channel(dev, txq->sb_dev);
2826 }
2827}
2828
2829void netdev_reset_tc(struct net_device *dev)
2830{
2831#ifdef CONFIG_XPS
2832 netif_reset_xps_queues_gt(dev, 0);
2833#endif
2834 netdev_unbind_all_sb_channels(dev);
2835
2836
2837 dev->num_tc = 0;
2838 memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
2839 memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
2840}
2841EXPORT_SYMBOL(netdev_reset_tc);
2842
2843int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
2844{
2845 if (tc >= dev->num_tc)
2846 return -EINVAL;
2847
2848#ifdef CONFIG_XPS
2849 netif_reset_xps_queues(dev, offset, count);
2850#endif
2851 dev->tc_to_txq[tc].count = count;
2852 dev->tc_to_txq[tc].offset = offset;
2853 return 0;
2854}
2855EXPORT_SYMBOL(netdev_set_tc_queue);
2856
2857int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
2858{
2859 if (num_tc > TC_MAX_QUEUE)
2860 return -EINVAL;
2861
2862#ifdef CONFIG_XPS
2863 netif_reset_xps_queues_gt(dev, 0);
2864#endif
2865 netdev_unbind_all_sb_channels(dev);
2866
2867 dev->num_tc = num_tc;
2868 return 0;
2869}
2870EXPORT_SYMBOL(netdev_set_num_tc);
2871
2872void netdev_unbind_sb_channel(struct net_device *dev,
2873 struct net_device *sb_dev)
2874{
2875 struct netdev_queue *txq = &dev->_tx[dev->num_tx_queues];
2876
2877#ifdef CONFIG_XPS
2878 netif_reset_xps_queues_gt(sb_dev, 0);
2879#endif
2880 memset(sb_dev->tc_to_txq, 0, sizeof(sb_dev->tc_to_txq));
2881 memset(sb_dev->prio_tc_map, 0, sizeof(sb_dev->prio_tc_map));
2882
2883 while (txq-- != &dev->_tx[0]) {
2884 if (txq->sb_dev == sb_dev)
2885 txq->sb_dev = NULL;
2886 }
2887}
2888EXPORT_SYMBOL(netdev_unbind_sb_channel);
2889
2890int netdev_bind_sb_channel_queue(struct net_device *dev,
2891 struct net_device *sb_dev,
2892 u8 tc, u16 count, u16 offset)
2893{
2894
2895 if (sb_dev->num_tc >= 0 || tc >= dev->num_tc)
2896 return -EINVAL;
2897
2898
2899 if ((offset + count) > dev->real_num_tx_queues)
2900 return -EINVAL;
2901
2902
2903 sb_dev->tc_to_txq[tc].count = count;
2904 sb_dev->tc_to_txq[tc].offset = offset;
2905
2906
2907
2908
2909 while (count--)
2910 netdev_get_tx_queue(dev, count + offset)->sb_dev = sb_dev;
2911
2912 return 0;
2913}
2914EXPORT_SYMBOL(netdev_bind_sb_channel_queue);
2915
2916int netdev_set_sb_channel(struct net_device *dev, u16 channel)
2917{
2918
2919 if (netif_is_multiqueue(dev))
2920 return -ENODEV;
2921
2922
2923
2924
2925
2926
2927 if (channel > S16_MAX)
2928 return -EINVAL;
2929
2930 dev->num_tc = -channel;
2931
2932 return 0;
2933}
2934EXPORT_SYMBOL(netdev_set_sb_channel);
2935
2936
2937
2938
2939
2940int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2941{
2942 bool disabling;
2943 int rc;
2944
2945 disabling = txq < dev->real_num_tx_queues;
2946
2947 if (txq < 1 || txq > dev->num_tx_queues)
2948 return -EINVAL;
2949
2950 if (dev->reg_state == NETREG_REGISTERED ||
2951 dev->reg_state == NETREG_UNREGISTERING) {
2952 ASSERT_RTNL();
2953
2954 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2955 txq);
2956 if (rc)
2957 return rc;
2958
2959 if (dev->num_tc)
2960 netif_setup_tc(dev, txq);
2961
2962 dev->real_num_tx_queues = txq;
2963
2964 if (disabling) {
2965 synchronize_net();
2966 qdisc_reset_all_tx_gt(dev, txq);
2967#ifdef CONFIG_XPS
2968 netif_reset_xps_queues_gt(dev, txq);
2969#endif
2970 }
2971 } else {
2972 dev->real_num_tx_queues = txq;
2973 }
2974
2975 return 0;
2976}
2977EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2978
2979#ifdef CONFIG_SYSFS
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2991{
2992 int rc;
2993
2994 if (rxq < 1 || rxq > dev->num_rx_queues)
2995 return -EINVAL;
2996
2997 if (dev->reg_state == NETREG_REGISTERED) {
2998 ASSERT_RTNL();
2999
3000 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
3001 rxq);
3002 if (rc)
3003 return rc;
3004 }
3005
3006 dev->real_num_rx_queues = rxq;
3007 return 0;
3008}
3009EXPORT_SYMBOL(netif_set_real_num_rx_queues);
3010#endif
3011
3012
3013
3014
3015
3016
3017
3018int netif_get_num_default_rss_queues(void)
3019{
3020 return is_kdump_kernel() ?
3021 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
3022}
3023EXPORT_SYMBOL(netif_get_num_default_rss_queues);
3024
3025static void __netif_reschedule(struct Qdisc *q)
3026{
3027 struct softnet_data *sd;
3028 unsigned long flags;
3029
3030 local_irq_save(flags);
3031 sd = this_cpu_ptr(&softnet_data);
3032 q->next_sched = NULL;
3033 *sd->output_queue_tailp = q;
3034 sd->output_queue_tailp = &q->next_sched;
3035 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3036 local_irq_restore(flags);
3037}
3038
3039void __netif_schedule(struct Qdisc *q)
3040{
3041 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
3042 __netif_reschedule(q);
3043}
3044EXPORT_SYMBOL(__netif_schedule);
3045
3046struct dev_kfree_skb_cb {
3047 enum skb_free_reason reason;
3048};
3049
3050static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
3051{
3052 return (struct dev_kfree_skb_cb *)skb->cb;
3053}
3054
3055void netif_schedule_queue(struct netdev_queue *txq)
3056{
3057 rcu_read_lock();
3058 if (!netif_xmit_stopped(txq)) {
3059 struct Qdisc *q = rcu_dereference(txq->qdisc);
3060
3061 __netif_schedule(q);
3062 }
3063 rcu_read_unlock();
3064}
3065EXPORT_SYMBOL(netif_schedule_queue);
3066
3067void netif_tx_wake_queue(struct netdev_queue *dev_queue)
3068{
3069 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
3070 struct Qdisc *q;
3071
3072 rcu_read_lock();
3073 q = rcu_dereference(dev_queue->qdisc);
3074 __netif_schedule(q);
3075 rcu_read_unlock();
3076 }
3077}
3078EXPORT_SYMBOL(netif_tx_wake_queue);
3079
3080void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
3081{
3082 unsigned long flags;
3083
3084 if (unlikely(!skb))
3085 return;
3086
3087 if (likely(refcount_read(&skb->users) == 1)) {
3088 smp_rmb();
3089 refcount_set(&skb->users, 0);
3090 } else if (likely(!refcount_dec_and_test(&skb->users))) {
3091 return;
3092 }
3093 get_kfree_skb_cb(skb)->reason = reason;
3094 local_irq_save(flags);
3095 skb->next = __this_cpu_read(softnet_data.completion_queue);
3096 __this_cpu_write(softnet_data.completion_queue, skb);
3097 raise_softirq_irqoff(NET_TX_SOFTIRQ);
3098 local_irq_restore(flags);
3099}
3100EXPORT_SYMBOL(__dev_kfree_skb_irq);
3101
3102void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
3103{
3104 if (in_irq() || irqs_disabled())
3105 __dev_kfree_skb_irq(skb, reason);
3106 else
3107 dev_kfree_skb(skb);
3108}
3109EXPORT_SYMBOL(__dev_kfree_skb_any);
3110
3111
3112
3113
3114
3115
3116
3117
3118void netif_device_detach(struct net_device *dev)
3119{
3120 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
3121 netif_running(dev)) {
3122 netif_tx_stop_all_queues(dev);
3123 }
3124}
3125EXPORT_SYMBOL(netif_device_detach);
3126
3127
3128
3129
3130
3131
3132
3133void netif_device_attach(struct net_device *dev)
3134{
3135 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
3136 netif_running(dev)) {
3137 netif_tx_wake_all_queues(dev);
3138 __netdev_watchdog_up(dev);
3139 }
3140}
3141EXPORT_SYMBOL(netif_device_attach);
3142
3143
3144
3145
3146
3147static u16 skb_tx_hash(const struct net_device *dev,
3148 const struct net_device *sb_dev,
3149 struct sk_buff *skb)
3150{
3151 u32 hash;
3152 u16 qoffset = 0;
3153 u16 qcount = dev->real_num_tx_queues;
3154
3155 if (dev->num_tc) {
3156 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
3157
3158 qoffset = sb_dev->tc_to_txq[tc].offset;
3159 qcount = sb_dev->tc_to_txq[tc].count;
3160 }
3161
3162 if (skb_rx_queue_recorded(skb)) {
3163 hash = skb_get_rx_queue(skb);
3164 if (hash >= qoffset)
3165 hash -= qoffset;
3166 while (unlikely(hash >= qcount))
3167 hash -= qcount;
3168 return hash + qoffset;
3169 }
3170
3171 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
3172}
3173
3174static void skb_warn_bad_offload(const struct sk_buff *skb)
3175{
3176 static const netdev_features_t null_features;
3177 struct net_device *dev = skb->dev;
3178 const char *name = "";
3179
3180 if (!net_ratelimit())
3181 return;
3182
3183 if (dev) {
3184 if (dev->dev.parent)
3185 name = dev_driver_string(dev->dev.parent);
3186 else
3187 name = netdev_name(dev);
3188 }
3189 skb_dump(KERN_WARNING, skb, false);
3190 WARN(1, "%s: caps=(%pNF, %pNF)\n",
3191 name, dev ? &dev->features : &null_features,
3192 skb->sk ? &skb->sk->sk_route_caps : &null_features);
3193}
3194
3195
3196
3197
3198
3199int skb_checksum_help(struct sk_buff *skb)
3200{
3201 __wsum csum;
3202 int ret = 0, offset;
3203
3204 if (skb->ip_summed == CHECKSUM_COMPLETE)
3205 goto out_set_summed;
3206
3207 if (unlikely(skb_shinfo(skb)->gso_size)) {
3208 skb_warn_bad_offload(skb);
3209 return -EINVAL;
3210 }
3211
3212
3213
3214
3215 if (skb_has_shared_frag(skb)) {
3216 ret = __skb_linearize(skb);
3217 if (ret)
3218 goto out;
3219 }
3220
3221 offset = skb_checksum_start_offset(skb);
3222 BUG_ON(offset >= skb_headlen(skb));
3223 csum = skb_checksum(skb, offset, skb->len - offset, 0);
3224
3225 offset += skb->csum_offset;
3226 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
3227
3228 ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
3229 if (ret)
3230 goto out;
3231
3232 *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
3233out_set_summed:
3234 skb->ip_summed = CHECKSUM_NONE;
3235out:
3236 return ret;
3237}
3238EXPORT_SYMBOL(skb_checksum_help);
3239
3240int skb_crc32c_csum_help(struct sk_buff *skb)
3241{
3242 __le32 crc32c_csum;
3243 int ret = 0, offset, start;
3244
3245 if (skb->ip_summed != CHECKSUM_PARTIAL)
3246 goto out;
3247
3248 if (unlikely(skb_is_gso(skb)))
3249 goto out;
3250
3251
3252
3253
3254 if (unlikely(skb_has_shared_frag(skb))) {
3255 ret = __skb_linearize(skb);
3256 if (ret)
3257 goto out;
3258 }
3259 start = skb_checksum_start_offset(skb);
3260 offset = start + offsetof(struct sctphdr, checksum);
3261 if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
3262 ret = -EINVAL;
3263 goto out;
3264 }
3265
3266 ret = skb_ensure_writable(skb, offset + sizeof(__le32));
3267 if (ret)
3268 goto out;
3269
3270 crc32c_csum = cpu_to_le32(~__skb_checksum(skb, start,
3271 skb->len - start, ~(__u32)0,
3272 crc32c_csum_stub));
3273 *(__le32 *)(skb->data + offset) = crc32c_csum;
3274 skb->ip_summed = CHECKSUM_NONE;
3275 skb->csum_not_inet = 0;
3276out:
3277 return ret;
3278}
3279
3280__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
3281{
3282 __be16 type = skb->protocol;
3283
3284
3285 if (type == htons(ETH_P_TEB)) {
3286 struct ethhdr *eth;
3287
3288 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
3289 return 0;
3290
3291 eth = (struct ethhdr *)skb->data;
3292 type = eth->h_proto;
3293 }
3294
3295 return __vlan_get_protocol(skb, type, depth);
3296}
3297
3298
3299
3300
3301
3302
3303struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
3304 netdev_features_t features)
3305{
3306 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
3307 struct packet_offload *ptype;
3308 int vlan_depth = skb->mac_len;
3309 __be16 type = skb_network_protocol(skb, &vlan_depth);
3310
3311 if (unlikely(!type))
3312 return ERR_PTR(-EINVAL);
3313
3314 __skb_pull(skb, vlan_depth);
3315
3316 rcu_read_lock();
3317 list_for_each_entry_rcu(ptype, &offload_base, list) {
3318 if (ptype->type == type && ptype->callbacks.gso_segment) {
3319 segs = ptype->callbacks.gso_segment(skb, features);
3320 break;
3321 }
3322 }
3323 rcu_read_unlock();
3324
3325 __skb_push(skb, skb->data - skb_mac_header(skb));
3326
3327 return segs;
3328}
3329EXPORT_SYMBOL(skb_mac_gso_segment);
3330
3331
3332
3333
3334static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
3335{
3336 if (tx_path)
3337 return skb->ip_summed != CHECKSUM_PARTIAL &&
3338 skb->ip_summed != CHECKSUM_UNNECESSARY;
3339
3340 return skb->ip_summed == CHECKSUM_NONE;
3341}
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
3357 netdev_features_t features, bool tx_path)
3358{
3359 struct sk_buff *segs;
3360
3361 if (unlikely(skb_needs_check(skb, tx_path))) {
3362 int err;
3363
3364
3365 err = skb_cow_head(skb, 0);
3366 if (err < 0)
3367 return ERR_PTR(err);
3368 }
3369
3370
3371
3372
3373
3374 if (features & NETIF_F_GSO_PARTIAL) {
3375 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
3376 struct net_device *dev = skb->dev;
3377
3378 partial_features |= dev->features & dev->gso_partial_features;
3379 if (!skb_gso_ok(skb, features | partial_features))
3380 features &= ~NETIF_F_GSO_PARTIAL;
3381 }
3382
3383 BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
3384 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
3385
3386 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
3387 SKB_GSO_CB(skb)->encap_level = 0;
3388
3389 skb_reset_mac_header(skb);
3390 skb_reset_mac_len(skb);
3391
3392 segs = skb_mac_gso_segment(skb, features);
3393
3394 if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
3395 skb_warn_bad_offload(skb);
3396
3397 return segs;
3398}
3399EXPORT_SYMBOL(__skb_gso_segment);
3400
3401
3402#ifdef CONFIG_BUG
3403void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
3404{
3405 if (net_ratelimit()) {
3406 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
3407 skb_dump(KERN_ERR, skb, true);
3408 dump_stack();
3409 }
3410}
3411EXPORT_SYMBOL(netdev_rx_csum_fault);
3412#endif
3413
3414
3415static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
3416{
3417#ifdef CONFIG_HIGHMEM
3418 int i;
3419
3420 if (!(dev->features & NETIF_F_HIGHDMA)) {
3421 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
3422 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
3423
3424 if (PageHighMem(skb_frag_page(frag)))
3425 return 1;
3426 }
3427 }
3428#endif
3429 return 0;
3430}
3431
3432
3433
3434
3435#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
3436static netdev_features_t net_mpls_features(struct sk_buff *skb,
3437 netdev_features_t features,
3438 __be16 type)
3439{
3440 if (eth_p_mpls(type))
3441 features &= skb->dev->mpls_features;
3442
3443 return features;
3444}
3445#else
3446static netdev_features_t net_mpls_features(struct sk_buff *skb,
3447 netdev_features_t features,
3448 __be16 type)
3449{
3450 return features;
3451}
3452#endif
3453
3454static netdev_features_t harmonize_features(struct sk_buff *skb,
3455 netdev_features_t features)
3456{
3457 __be16 type;
3458
3459 type = skb_network_protocol(skb, NULL);
3460 features = net_mpls_features(skb, features, type);
3461
3462 if (skb->ip_summed != CHECKSUM_NONE &&
3463 !can_checksum_protocol(features, type)) {
3464 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
3465 }
3466 if (illegal_highdma(skb->dev, skb))
3467 features &= ~NETIF_F_SG;
3468
3469 return features;
3470}
3471
3472netdev_features_t passthru_features_check(struct sk_buff *skb,
3473 struct net_device *dev,
3474 netdev_features_t features)
3475{
3476 return features;
3477}
3478EXPORT_SYMBOL(passthru_features_check);
3479
3480static netdev_features_t dflt_features_check(struct sk_buff *skb,
3481 struct net_device *dev,
3482 netdev_features_t features)
3483{
3484 return vlan_features_check(skb, features);
3485}
3486
3487static netdev_features_t gso_features_check(const struct sk_buff *skb,
3488 struct net_device *dev,
3489 netdev_features_t features)
3490{
3491 u16 gso_segs = skb_shinfo(skb)->gso_segs;
3492
3493 if (gso_segs > dev->gso_max_segs)
3494 return features & ~NETIF_F_GSO_MASK;
3495
3496
3497
3498
3499
3500
3501
3502 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
3503 features &= ~dev->gso_partial_features;
3504
3505
3506
3507
3508 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
3509 struct iphdr *iph = skb->encapsulation ?
3510 inner_ip_hdr(skb) : ip_hdr(skb);
3511
3512 if (!(iph->frag_off & htons(IP_DF)))
3513 features &= ~NETIF_F_TSO_MANGLEID;
3514 }
3515
3516 return features;
3517}
3518
3519netdev_features_t netif_skb_features(struct sk_buff *skb)
3520{
3521 struct net_device *dev = skb->dev;
3522 netdev_features_t features = dev->features;
3523
3524 if (skb_is_gso(skb))
3525 features = gso_features_check(skb, dev, features);
3526
3527
3528
3529
3530
3531 if (skb->encapsulation)
3532 features &= dev->hw_enc_features;
3533
3534 if (skb_vlan_tagged(skb))
3535 features = netdev_intersect_features(features,
3536 dev->vlan_features |
3537 NETIF_F_HW_VLAN_CTAG_TX |
3538 NETIF_F_HW_VLAN_STAG_TX);
3539
3540 if (dev->netdev_ops->ndo_features_check)
3541 features &= dev->netdev_ops->ndo_features_check(skb, dev,
3542 features);
3543 else
3544 features &= dflt_features_check(skb, dev, features);
3545
3546 return harmonize_features(skb, features);
3547}
3548EXPORT_SYMBOL(netif_skb_features);
3549
3550static int xmit_one(struct sk_buff *skb, struct net_device *dev,
3551 struct netdev_queue *txq, bool more)
3552{
3553 unsigned int len;
3554 int rc;
3555
3556 if (dev_nit_active(dev))
3557 dev_queue_xmit_nit(skb, dev);
3558
3559 len = skb->len;
3560 trace_net_dev_start_xmit(skb, dev);
3561 rc = netdev_start_xmit(skb, dev, txq, more);
3562 trace_net_dev_xmit(skb, rc, dev, len);
3563
3564 return rc;
3565}
3566
3567struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
3568 struct netdev_queue *txq, int *ret)
3569{
3570 struct sk_buff *skb = first;
3571 int rc = NETDEV_TX_OK;
3572
3573 while (skb) {
3574 struct sk_buff *next = skb->next;
3575
3576 skb_mark_not_on_list(skb);
3577 rc = xmit_one(skb, dev, txq, next != NULL);
3578 if (unlikely(!dev_xmit_complete(rc))) {
3579 skb->next = next;
3580 goto out;
3581 }
3582
3583 skb = next;
3584 if (netif_tx_queue_stopped(txq) && skb) {
3585 rc = NETDEV_TX_BUSY;
3586 break;
3587 }
3588 }
3589
3590out:
3591 *ret = rc;
3592 return skb;
3593}
3594
3595static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
3596 netdev_features_t features)
3597{
3598 if (skb_vlan_tag_present(skb) &&
3599 !vlan_hw_offload_capable(features, skb->vlan_proto))
3600 skb = __vlan_hwaccel_push_inside(skb);
3601 return skb;
3602}
3603
3604int skb_csum_hwoffload_help(struct sk_buff *skb,
3605 const netdev_features_t features)
3606{
3607 if (unlikely(skb->csum_not_inet))
3608 return !!(features & NETIF_F_SCTP_CRC) ? 0 :
3609 skb_crc32c_csum_help(skb);
3610
3611 return !!(features & NETIF_F_CSUM_MASK) ? 0 : skb_checksum_help(skb);
3612}
3613EXPORT_SYMBOL(skb_csum_hwoffload_help);
3614
3615static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev, bool *again)
3616{
3617 netdev_features_t features;
3618
3619 features = netif_skb_features(skb);
3620 skb = validate_xmit_vlan(skb, features);
3621 if (unlikely(!skb))
3622 goto out_null;
3623
3624 skb = sk_validate_xmit_skb(skb, dev);
3625 if (unlikely(!skb))
3626 goto out_null;
3627
3628 if (netif_needs_gso(skb, features)) {
3629 struct sk_buff *segs;
3630
3631 segs = skb_gso_segment(skb, features);
3632 if (IS_ERR(segs)) {
3633 goto out_kfree_skb;
3634 } else if (segs) {
3635 consume_skb(skb);
3636 skb = segs;
3637 }
3638 } else {
3639 if (skb_needs_linearize(skb, features) &&
3640 __skb_linearize(skb))
3641 goto out_kfree_skb;
3642
3643
3644
3645
3646
3647 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3648 if (skb->encapsulation)
3649 skb_set_inner_transport_header(skb,
3650 skb_checksum_start_offset(skb));
3651 else
3652 skb_set_transport_header(skb,
3653 skb_checksum_start_offset(skb));
3654 if (skb_csum_hwoffload_help(skb, features))
3655 goto out_kfree_skb;
3656 }
3657 }
3658
3659 skb = validate_xmit_xfrm(skb, features, again);
3660
3661 return skb;
3662
3663out_kfree_skb:
3664 kfree_skb(skb);
3665out_null:
3666 atomic_long_inc(&dev->tx_dropped);
3667 return NULL;
3668}
3669
3670struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again)
3671{
3672 struct sk_buff *next, *head = NULL, *tail;
3673
3674 for (; skb != NULL; skb = next) {
3675 next = skb->next;
3676 skb_mark_not_on_list(skb);
3677
3678
3679 skb->prev = skb;
3680
3681 skb = validate_xmit_skb(skb, dev, again);
3682 if (!skb)
3683 continue;
3684
3685 if (!head)
3686 head = skb;
3687 else
3688 tail->next = skb;
3689
3690
3691
3692 tail = skb->prev;
3693 }
3694 return head;
3695}
3696EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
3697
3698static void qdisc_pkt_len_init(struct sk_buff *skb)
3699{
3700 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3701
3702 qdisc_skb_cb(skb)->pkt_len = skb->len;
3703
3704
3705
3706
3707 if (shinfo->gso_size && skb_transport_header_was_set(skb)) {
3708 unsigned int hdr_len;
3709 u16 gso_segs = shinfo->gso_segs;
3710
3711
3712 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3713
3714
3715 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
3716 const struct tcphdr *th;
3717 struct tcphdr _tcphdr;
3718
3719 th = skb_header_pointer(skb, skb_transport_offset(skb),
3720 sizeof(_tcphdr), &_tcphdr);
3721 if (likely(th))
3722 hdr_len += __tcp_hdrlen(th);
3723 } else {
3724 struct udphdr _udphdr;
3725
3726 if (skb_header_pointer(skb, skb_transport_offset(skb),
3727 sizeof(_udphdr), &_udphdr))
3728 hdr_len += sizeof(struct udphdr);
3729 }
3730
3731 if (shinfo->gso_type & SKB_GSO_DODGY)
3732 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3733 shinfo->gso_size);
3734
3735 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3736 }
3737}
3738
3739static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3740 struct net_device *dev,
3741 struct netdev_queue *txq)
3742{
3743 spinlock_t *root_lock = qdisc_lock(q);
3744 struct sk_buff *to_free = NULL;
3745 bool contended;
3746 int rc;
3747
3748 qdisc_calculate_pkt_len(skb, q);
3749
3750 if (q->flags & TCQ_F_NOLOCK) {
3751 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3752 qdisc_run(q);
3753
3754 if (unlikely(to_free))
3755 kfree_skb_list(to_free);
3756 return rc;
3757 }
3758
3759
3760
3761
3762
3763
3764
3765 contended = qdisc_is_running(q);
3766 if (unlikely(contended))
3767 spin_lock(&q->busylock);
3768
3769 spin_lock(root_lock);
3770 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3771 __qdisc_drop(skb, &to_free);
3772 rc = NET_XMIT_DROP;
3773 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3774 qdisc_run_begin(q)) {
3775
3776
3777
3778
3779
3780
3781 qdisc_bstats_update(q, skb);
3782
3783 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3784 if (unlikely(contended)) {
3785 spin_unlock(&q->busylock);
3786 contended = false;
3787 }
3788 __qdisc_run(q);
3789 }
3790
3791 qdisc_run_end(q);
3792 rc = NET_XMIT_SUCCESS;
3793 } else {
3794 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3795 if (qdisc_run_begin(q)) {
3796 if (unlikely(contended)) {
3797 spin_unlock(&q->busylock);
3798 contended = false;
3799 }
3800 __qdisc_run(q);
3801 qdisc_run_end(q);
3802 }
3803 }
3804 spin_unlock(root_lock);
3805 if (unlikely(to_free))
3806 kfree_skb_list(to_free);
3807 if (unlikely(contended))
3808 spin_unlock(&q->busylock);
3809 return rc;
3810}
3811
3812#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3813static void skb_update_prio(struct sk_buff *skb)
3814{
3815 const struct netprio_map *map;
3816 const struct sock *sk;
3817 unsigned int prioidx;
3818
3819 if (skb->priority)
3820 return;
3821 map = rcu_dereference_bh(skb->dev->priomap);
3822 if (!map)
3823 return;
3824 sk = skb_to_full_sk(skb);
3825 if (!sk)
3826 return;
3827
3828 prioidx = sock_cgroup_prioidx(&sk->sk_cgrp_data);
3829
3830 if (prioidx < map->priomap_len)
3831 skb->priority = map->priomap[prioidx];
3832}
3833#else
3834#define skb_update_prio(skb)
3835#endif
3836
3837
3838
3839
3840
3841
3842
3843int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3844{
3845 skb_reset_mac_header(skb);
3846 __skb_pull(skb, skb_network_offset(skb));
3847 skb->pkt_type = PACKET_LOOPBACK;
3848 skb->ip_summed = CHECKSUM_UNNECESSARY;
3849 WARN_ON(!skb_dst(skb));
3850 skb_dst_force(skb);
3851 netif_rx_ni(skb);
3852 return 0;
3853}
3854EXPORT_SYMBOL(dev_loopback_xmit);
3855
3856#ifdef CONFIG_NET_EGRESS
3857static struct sk_buff *
3858sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3859{
3860 struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
3861 struct tcf_result cl_res;
3862
3863 if (!miniq)
3864 return skb;
3865
3866
3867 mini_qdisc_bstats_cpu_update(miniq, skb);
3868
3869 switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) {
3870 case TC_ACT_OK:
3871 case TC_ACT_RECLASSIFY:
3872 skb->tc_index = TC_H_MIN(cl_res.classid);
3873 break;
3874 case TC_ACT_SHOT:
3875 mini_qdisc_qstats_cpu_drop(miniq);
3876 *ret = NET_XMIT_DROP;
3877 kfree_skb(skb);
3878 return NULL;
3879 case TC_ACT_STOLEN:
3880 case TC_ACT_QUEUED:
3881 case TC_ACT_TRAP:
3882 *ret = NET_XMIT_SUCCESS;
3883 consume_skb(skb);
3884 return NULL;
3885 case TC_ACT_REDIRECT:
3886
3887 skb_do_redirect(skb);
3888 *ret = NET_XMIT_SUCCESS;
3889 return NULL;
3890 default:
3891 break;
3892 }
3893
3894 return skb;
3895}
3896#endif
3897
3898#ifdef CONFIG_XPS
3899static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
3900 struct xps_dev_maps *dev_maps, unsigned int tci)
3901{
3902 struct xps_map *map;
3903 int queue_index = -1;
3904
3905 if (dev->num_tc) {
3906 tci *= dev->num_tc;
3907 tci += netdev_get_prio_tc_map(dev, skb->priority);
3908 }
3909
3910 map = rcu_dereference(dev_maps->attr_map[tci]);
3911 if (map) {
3912 if (map->len == 1)
3913 queue_index = map->queues[0];
3914 else
3915 queue_index = map->queues[reciprocal_scale(
3916 skb_get_hash(skb), map->len)];
3917 if (unlikely(queue_index >= dev->real_num_tx_queues))
3918 queue_index = -1;
3919 }
3920 return queue_index;
3921}
3922#endif
3923
3924static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev,
3925 struct sk_buff *skb)
3926{
3927#ifdef CONFIG_XPS
3928 struct xps_dev_maps *dev_maps;
3929 struct sock *sk = skb->sk;
3930 int queue_index = -1;
3931
3932 if (!static_key_false(&xps_needed))
3933 return -1;
3934
3935 rcu_read_lock();
3936 if (!static_key_false(&xps_rxqs_needed))
3937 goto get_cpus_map;
3938
3939 dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
3940 if (dev_maps) {
3941 int tci = sk_rx_queue_get(sk);
3942
3943 if (tci >= 0 && tci < dev->num_rx_queues)
3944 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
3945 tci);
3946 }
3947
3948get_cpus_map:
3949 if (queue_index < 0) {
3950 dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
3951 if (dev_maps) {
3952 unsigned int tci = skb->sender_cpu - 1;
3953
3954 queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
3955 tci);
3956 }
3957 }
3958 rcu_read_unlock();
3959
3960 return queue_index;
3961#else
3962 return -1;
3963#endif
3964}
3965
3966u16 dev_pick_tx_zero(struct net_device *dev, struct sk_buff *skb,
3967 struct net_device *sb_dev)
3968{
3969 return 0;
3970}
3971EXPORT_SYMBOL(dev_pick_tx_zero);
3972
3973u16 dev_pick_tx_cpu_id(struct net_device *dev, struct sk_buff *skb,
3974 struct net_device *sb_dev)
3975{
3976 return (u16)raw_smp_processor_id() % dev->real_num_tx_queues;
3977}
3978EXPORT_SYMBOL(dev_pick_tx_cpu_id);
3979
3980u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
3981 struct net_device *sb_dev)
3982{
3983 struct sock *sk = skb->sk;
3984 int queue_index = sk_tx_queue_get(sk);
3985
3986 sb_dev = sb_dev ? : dev;
3987
3988 if (queue_index < 0 || skb->ooo_okay ||
3989 queue_index >= dev->real_num_tx_queues) {
3990 int new_index = get_xps_queue(dev, sb_dev, skb);
3991
3992 if (new_index < 0)
3993 new_index = skb_tx_hash(dev, sb_dev, skb);
3994
3995 if (queue_index != new_index && sk &&
3996 sk_fullsock(sk) &&
3997 rcu_access_pointer(sk->sk_dst_cache))
3998 sk_tx_queue_set(sk, new_index);
3999
4000 queue_index = new_index;
4001 }
4002
4003 return queue_index;
4004}
4005EXPORT_SYMBOL(netdev_pick_tx);
4006
4007struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
4008 struct sk_buff *skb,
4009 struct net_device *sb_dev)
4010{
4011 int queue_index = 0;
4012
4013#ifdef CONFIG_XPS
4014 u32 sender_cpu = skb->sender_cpu - 1;
4015
4016 if (sender_cpu >= (u32)NR_CPUS)
4017 skb->sender_cpu = raw_smp_processor_id() + 1;
4018#endif
4019
4020 if (dev->real_num_tx_queues != 1) {
4021 const struct net_device_ops *ops = dev->netdev_ops;
4022
4023 if (ops->ndo_select_queue)
4024 queue_index = ops->ndo_select_queue(dev, skb, sb_dev);
4025 else
4026 queue_index = netdev_pick_tx(dev, skb, sb_dev);
4027
4028 queue_index = netdev_cap_txqueue(dev, queue_index);
4029 }
4030
4031 skb_set_queue_mapping(skb, queue_index);
4032 return netdev_get_tx_queue(dev, queue_index);
4033}
4034
4035
4036
4037
4038
4039
4040
4041
4042
4043
4044
4045
4046
4047
4048
4049
4050
4051
4052
4053
4054
4055
4056
4057
4058
4059
4060
4061static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
4062{
4063 struct net_device *dev = skb->dev;
4064 struct netdev_queue *txq;
4065 struct Qdisc *q;
4066 int rc = -ENOMEM;
4067 bool again = false;
4068
4069 skb_reset_mac_header(skb);
4070
4071 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
4072 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
4073
4074
4075
4076
4077 rcu_read_lock_bh();
4078
4079 skb_update_prio(skb);
4080
4081 qdisc_pkt_len_init(skb);
4082#ifdef CONFIG_NET_CLS_ACT
4083 skb->tc_at_ingress = 0;
4084# ifdef CONFIG_NET_EGRESS
4085 if (static_branch_unlikely(&egress_needed_key)) {
4086 skb = sch_handle_egress(skb, &rc, dev);
4087 if (!skb)
4088 goto out;
4089 }
4090# endif
4091#endif
4092
4093
4094
4095 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
4096 skb_dst_drop(skb);
4097 else
4098 skb_dst_force(skb);
4099
4100 txq = netdev_core_pick_tx(dev, skb, sb_dev);
4101 q = rcu_dereference_bh(txq->qdisc);
4102
4103 trace_net_dev_queue(skb);
4104 if (q->enqueue) {
4105 rc = __dev_xmit_skb(skb, q, dev, txq);
4106 goto out;
4107 }
4108
4109
4110
4111
4112
4113
4114
4115
4116
4117
4118
4119
4120
4121 if (dev->flags & IFF_UP) {
4122 int cpu = smp_processor_id();
4123
4124 if (txq->xmit_lock_owner != cpu) {
4125 if (dev_xmit_recursion())
4126 goto recursion_alert;
4127
4128 skb = validate_xmit_skb(skb, dev, &again);
4129 if (!skb)
4130 goto out;
4131
4132 HARD_TX_LOCK(dev, txq, cpu);
4133
4134 if (!netif_xmit_stopped(txq)) {
4135 dev_xmit_recursion_inc();
4136 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
4137 dev_xmit_recursion_dec();
4138 if (dev_xmit_complete(rc)) {
4139 HARD_TX_UNLOCK(dev, txq);
4140 goto out;
4141 }
4142 }
4143 HARD_TX_UNLOCK(dev, txq);
4144 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
4145 dev->name);
4146 } else {
4147
4148
4149
4150recursion_alert:
4151 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
4152 dev->name);
4153 }
4154 }
4155
4156 rc = -ENETDOWN;
4157 rcu_read_unlock_bh();
4158
4159 atomic_long_inc(&dev->tx_dropped);
4160 kfree_skb_list(skb);
4161 return rc;
4162out:
4163 rcu_read_unlock_bh();
4164 return rc;
4165}
4166
4167int dev_queue_xmit(struct sk_buff *skb)
4168{
4169 return __dev_queue_xmit(skb, NULL);
4170}
4171EXPORT_SYMBOL(dev_queue_xmit);
4172
4173int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
4174{
4175 return __dev_queue_xmit(skb, sb_dev);
4176}
4177EXPORT_SYMBOL(dev_queue_xmit_accel);
4178
4179int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
4180{
4181 struct net_device *dev = skb->dev;
4182 struct sk_buff *orig_skb = skb;
4183 struct netdev_queue *txq;
4184 int ret = NETDEV_TX_BUSY;
4185 bool again = false;
4186
4187 if (unlikely(!netif_running(dev) ||
4188 !netif_carrier_ok(dev)))
4189 goto drop;
4190
4191 skb = validate_xmit_skb_list(skb, dev, &again);
4192 if (skb != orig_skb)
4193 goto drop;
4194
4195 skb_set_queue_mapping(skb, queue_id);
4196 txq = skb_get_tx_queue(dev, skb);
4197
4198 local_bh_disable();
4199
4200 dev_xmit_recursion_inc();
4201 HARD_TX_LOCK(dev, txq, smp_processor_id());
4202 if (!netif_xmit_frozen_or_drv_stopped(txq))
4203 ret = netdev_start_xmit(skb, dev, txq, false);
4204 HARD_TX_UNLOCK(dev, txq);
4205 dev_xmit_recursion_dec();
4206
4207 local_bh_enable();
4208
4209 if (!dev_xmit_complete(ret))
4210 kfree_skb(skb);
4211
4212 return ret;
4213drop:
4214 atomic_long_inc(&dev->tx_dropped);
4215 kfree_skb_list(skb);
4216 return NET_XMIT_DROP;
4217}
4218EXPORT_SYMBOL(dev_direct_xmit);
4219
4220
4221
4222
4223
4224int netdev_max_backlog __read_mostly = 1000;
4225EXPORT_SYMBOL(netdev_max_backlog);
4226
4227int netdev_tstamp_prequeue __read_mostly = 1;
4228int netdev_budget __read_mostly = 300;
4229
4230unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
4231int weight_p __read_mostly = 64;
4232int dev_weight_rx_bias __read_mostly = 1;
4233int dev_weight_tx_bias __read_mostly = 1;
4234int dev_rx_weight __read_mostly = 64;
4235int dev_tx_weight __read_mostly = 64;
4236
4237int gro_normal_batch __read_mostly = 8;
4238
4239
4240static inline void ____napi_schedule(struct softnet_data *sd,
4241 struct napi_struct *napi)
4242{
4243 list_add_tail(&napi->poll_list, &sd->poll_list);
4244 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4245}
4246
4247#ifdef CONFIG_RPS
4248
4249
4250struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
4251EXPORT_SYMBOL(rps_sock_flow_table);
4252u32 rps_cpu_mask __read_mostly;
4253EXPORT_SYMBOL(rps_cpu_mask);
4254
4255struct static_key_false rps_needed __read_mostly;
4256EXPORT_SYMBOL(rps_needed);
4257struct static_key_false rfs_needed __read_mostly;
4258EXPORT_SYMBOL(rfs_needed);
4259
4260static struct rps_dev_flow *
4261set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
4262 struct rps_dev_flow *rflow, u16 next_cpu)
4263{
4264 if (next_cpu < nr_cpu_ids) {
4265#ifdef CONFIG_RFS_ACCEL
4266 struct netdev_rx_queue *rxqueue;
4267 struct rps_dev_flow_table *flow_table;
4268 struct rps_dev_flow *old_rflow;
4269 u32 flow_id;
4270 u16 rxq_index;
4271 int rc;
4272
4273
4274 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
4275 !(dev->features & NETIF_F_NTUPLE))
4276 goto out;
4277 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
4278 if (rxq_index == skb_get_rx_queue(skb))
4279 goto out;
4280
4281 rxqueue = dev->_rx + rxq_index;
4282 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4283 if (!flow_table)
4284 goto out;
4285 flow_id = skb_get_hash(skb) & flow_table->mask;
4286 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
4287 rxq_index, flow_id);
4288 if (rc < 0)
4289 goto out;
4290 old_rflow = rflow;
4291 rflow = &flow_table->flows[flow_id];
4292 rflow->filter = rc;
4293 if (old_rflow->filter == rflow->filter)
4294 old_rflow->filter = RPS_NO_FILTER;
4295 out:
4296#endif
4297 rflow->last_qtail =
4298 per_cpu(softnet_data, next_cpu).input_queue_head;
4299 }
4300
4301 rflow->cpu = next_cpu;
4302 return rflow;
4303}
4304
4305
4306
4307
4308
4309
4310static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
4311 struct rps_dev_flow **rflowp)
4312{
4313 const struct rps_sock_flow_table *sock_flow_table;
4314 struct netdev_rx_queue *rxqueue = dev->_rx;
4315 struct rps_dev_flow_table *flow_table;
4316 struct rps_map *map;
4317 int cpu = -1;
4318 u32 tcpu;
4319 u32 hash;
4320
4321 if (skb_rx_queue_recorded(skb)) {
4322 u16 index = skb_get_rx_queue(skb);
4323
4324 if (unlikely(index >= dev->real_num_rx_queues)) {
4325 WARN_ONCE(dev->real_num_rx_queues > 1,
4326 "%s received packet on queue %u, but number "
4327 "of RX queues is %u\n",
4328 dev->name, index, dev->real_num_rx_queues);
4329 goto done;
4330 }
4331 rxqueue += index;
4332 }
4333
4334
4335
4336 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4337 map = rcu_dereference(rxqueue->rps_map);
4338 if (!flow_table && !map)
4339 goto done;
4340
4341 skb_reset_network_header(skb);
4342 hash = skb_get_hash(skb);
4343 if (!hash)
4344 goto done;
4345
4346 sock_flow_table = rcu_dereference(rps_sock_flow_table);
4347 if (flow_table && sock_flow_table) {
4348 struct rps_dev_flow *rflow;
4349 u32 next_cpu;
4350 u32 ident;
4351
4352
4353 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
4354 if ((ident ^ hash) & ~rps_cpu_mask)
4355 goto try_rps;
4356
4357 next_cpu = ident & rps_cpu_mask;
4358
4359
4360
4361
4362 rflow = &flow_table->flows[hash & flow_table->mask];
4363 tcpu = rflow->cpu;
4364
4365
4366
4367
4368
4369
4370
4371
4372
4373
4374
4375
4376 if (unlikely(tcpu != next_cpu) &&
4377 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
4378 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
4379 rflow->last_qtail)) >= 0)) {
4380 tcpu = next_cpu;
4381 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
4382 }
4383
4384 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
4385 *rflowp = rflow;
4386 cpu = tcpu;
4387 goto done;
4388 }
4389 }
4390
4391try_rps:
4392
4393 if (map) {
4394 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
4395 if (cpu_online(tcpu)) {
4396 cpu = tcpu;
4397 goto done;
4398 }
4399 }
4400
4401done:
4402 return cpu;
4403}
4404
4405#ifdef CONFIG_RFS_ACCEL
4406
4407
4408
4409
4410
4411
4412
4413
4414
4415
4416
4417
4418bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
4419 u32 flow_id, u16 filter_id)
4420{
4421 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
4422 struct rps_dev_flow_table *flow_table;
4423 struct rps_dev_flow *rflow;
4424 bool expire = true;
4425 unsigned int cpu;
4426
4427 rcu_read_lock();
4428 flow_table = rcu_dereference(rxqueue->rps_flow_table);
4429 if (flow_table && flow_id <= flow_table->mask) {
4430 rflow = &flow_table->flows[flow_id];
4431 cpu = READ_ONCE(rflow->cpu);
4432 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
4433 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
4434 rflow->last_qtail) <
4435 (int)(10 * flow_table->mask)))
4436 expire = false;
4437 }
4438 rcu_read_unlock();
4439 return expire;
4440}
4441EXPORT_SYMBOL(rps_may_expire_flow);
4442
4443#endif
4444
4445
4446static void rps_trigger_softirq(void *data)
4447{
4448 struct softnet_data *sd = data;
4449
4450 ____napi_schedule(sd, &sd->backlog);
4451 sd->received_rps++;
4452}
4453
4454#endif
4455
4456
4457
4458
4459
4460
4461static int rps_ipi_queued(struct softnet_data *sd)
4462{
4463#ifdef CONFIG_RPS
4464 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
4465
4466 if (sd != mysd) {
4467 sd->rps_ipi_next = mysd->rps_ipi_list;
4468 mysd->rps_ipi_list = sd;
4469
4470 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4471 return 1;
4472 }
4473#endif
4474 return 0;
4475}
4476
4477#ifdef CONFIG_NET_FLOW_LIMIT
4478int netdev_flow_limit_table_len __read_mostly = (1 << 12);
4479#endif
4480
4481static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
4482{
4483#ifdef CONFIG_NET_FLOW_LIMIT
4484 struct sd_flow_limit *fl;
4485 struct softnet_data *sd;
4486 unsigned int old_flow, new_flow;
4487
4488 if (qlen < (netdev_max_backlog >> 1))
4489 return false;
4490
4491 sd = this_cpu_ptr(&softnet_data);
4492
4493 rcu_read_lock();
4494 fl = rcu_dereference(sd->flow_limit);
4495 if (fl) {
4496 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
4497 old_flow = fl->history[fl->history_head];
4498 fl->history[fl->history_head] = new_flow;
4499
4500 fl->history_head++;
4501 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
4502
4503 if (likely(fl->buckets[old_flow]))
4504 fl->buckets[old_flow]--;
4505
4506 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
4507 fl->count++;
4508 rcu_read_unlock();
4509 return true;
4510 }
4511 }
4512 rcu_read_unlock();
4513#endif
4514 return false;
4515}
4516
4517
4518
4519
4520
4521static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
4522 unsigned int *qtail)
4523{
4524 struct softnet_data *sd;
4525 unsigned long flags;
4526 unsigned int qlen;
4527
4528 sd = &per_cpu(softnet_data, cpu);
4529
4530 local_irq_save(flags);
4531
4532 rps_lock(sd);
4533 if (!netif_running(skb->dev))
4534 goto drop;
4535 qlen = skb_queue_len(&sd->input_pkt_queue);
4536 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
4537 if (qlen) {
4538enqueue:
4539 __skb_queue_tail(&sd->input_pkt_queue, skb);
4540 input_queue_tail_incr_save(sd, qtail);
4541 rps_unlock(sd);
4542 local_irq_restore(flags);
4543 return NET_RX_SUCCESS;
4544 }
4545
4546
4547
4548
4549 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
4550 if (!rps_ipi_queued(sd))
4551 ____napi_schedule(sd, &sd->backlog);
4552 }
4553 goto enqueue;
4554 }
4555
4556drop:
4557 sd->dropped++;
4558 rps_unlock(sd);
4559
4560 local_irq_restore(flags);
4561
4562 atomic_long_inc(&skb->dev->rx_dropped);
4563 kfree_skb(skb);
4564 return NET_RX_DROP;
4565}
4566
4567static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
4568{
4569 struct net_device *dev = skb->dev;
4570 struct netdev_rx_queue *rxqueue;
4571
4572 rxqueue = dev->_rx;
4573
4574 if (skb_rx_queue_recorded(skb)) {
4575 u16 index = skb_get_rx_queue(skb);
4576
4577 if (unlikely(index >= dev->real_num_rx_queues)) {
4578 WARN_ONCE(dev->real_num_rx_queues > 1,
4579 "%s received packet on queue %u, but number "
4580 "of RX queues is %u\n",
4581 dev->name, index, dev->real_num_rx_queues);
4582
4583 return rxqueue;
4584 }
4585 rxqueue += index;
4586 }
4587 return rxqueue;
4588}
4589
4590static u32 netif_receive_generic_xdp(struct sk_buff *skb,
4591 struct xdp_buff *xdp,
4592 struct bpf_prog *xdp_prog)
4593{
4594 struct netdev_rx_queue *rxqueue;
4595 void *orig_data, *orig_data_end;
4596 u32 metalen, act = XDP_DROP;
4597 __be16 orig_eth_type;
4598 struct ethhdr *eth;
4599 bool orig_bcast;
4600 int hlen, off;
4601 u32 mac_len;
4602
4603
4604
4605
4606 if (skb_is_redirected(skb))
4607 return XDP_PASS;
4608
4609
4610
4611
4612
4613 if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
4614 skb_headroom(skb) < XDP_PACKET_HEADROOM) {
4615 int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
4616 int troom = skb->tail + skb->data_len - skb->end;
4617
4618
4619
4620
4621 if (pskb_expand_head(skb,
4622 hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
4623 troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
4624 goto do_drop;
4625 if (skb_linearize(skb))
4626 goto do_drop;
4627 }
4628
4629
4630
4631
4632 mac_len = skb->data - skb_mac_header(skb);
4633 hlen = skb_headlen(skb) + mac_len;
4634 xdp->data = skb->data - mac_len;
4635 xdp->data_meta = xdp->data;
4636 xdp->data_end = xdp->data + hlen;
4637 xdp->data_hard_start = skb->data - skb_headroom(skb);
4638
4639
4640 xdp->frame_sz = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
4641 xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
4642
4643 orig_data_end = xdp->data_end;
4644 orig_data = xdp->data;
4645 eth = (struct ethhdr *)xdp->data;
4646 orig_bcast = is_multicast_ether_addr_64bits(eth->h_dest);
4647 orig_eth_type = eth->h_proto;
4648
4649 rxqueue = netif_get_rxqueue(skb);
4650 xdp->rxq = &rxqueue->xdp_rxq;
4651
4652 act = bpf_prog_run_xdp(xdp_prog, xdp);
4653
4654
4655 off = xdp->data - orig_data;
4656 if (off) {
4657 if (off > 0)
4658 __skb_pull(skb, off);
4659 else if (off < 0)
4660 __skb_push(skb, -off);
4661
4662 skb->mac_header += off;
4663 skb_reset_network_header(skb);
4664 }
4665
4666
4667 off = xdp->data_end - orig_data_end;
4668 if (off != 0) {
4669 skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
4670 skb->len += off;
4671 }
4672
4673
4674 eth = (struct ethhdr *)xdp->data;
4675 if ((orig_eth_type != eth->h_proto) ||
4676 (orig_bcast != is_multicast_ether_addr_64bits(eth->h_dest))) {
4677 __skb_push(skb, ETH_HLEN);
4678 skb->protocol = eth_type_trans(skb, skb->dev);
4679 }
4680
4681 switch (act) {
4682 case XDP_REDIRECT:
4683 case XDP_TX:
4684 __skb_push(skb, mac_len);
4685 break;
4686 case XDP_PASS:
4687 metalen = xdp->data - xdp->data_meta;
4688 if (metalen)
4689 skb_metadata_set(skb, metalen);
4690 break;
4691 default:
4692 bpf_warn_invalid_xdp_action(act);
4693 fallthrough;
4694 case XDP_ABORTED:
4695 trace_xdp_exception(skb->dev, xdp_prog, act);
4696 fallthrough;
4697 case XDP_DROP:
4698 do_drop:
4699 kfree_skb(skb);
4700 break;
4701 }
4702
4703 return act;
4704}
4705
4706
4707
4708
4709void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
4710{
4711 struct net_device *dev = skb->dev;
4712 struct netdev_queue *txq;
4713 bool free_skb = true;
4714 int cpu, rc;
4715
4716 txq = netdev_core_pick_tx(dev, skb, NULL);
4717 cpu = smp_processor_id();
4718 HARD_TX_LOCK(dev, txq, cpu);
4719 if (!netif_xmit_stopped(txq)) {
4720 rc = netdev_start_xmit(skb, dev, txq, 0);
4721 if (dev_xmit_complete(rc))
4722 free_skb = false;
4723 }
4724 HARD_TX_UNLOCK(dev, txq);
4725 if (free_skb) {
4726 trace_xdp_exception(dev, xdp_prog, XDP_TX);
4727 kfree_skb(skb);
4728 }
4729}
4730
4731static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key);
4732
4733int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb)
4734{
4735 if (xdp_prog) {
4736 struct xdp_buff xdp;
4737 u32 act;
4738 int err;
4739
4740 act = netif_receive_generic_xdp(skb, &xdp, xdp_prog);
4741 if (act != XDP_PASS) {
4742 switch (act) {
4743 case XDP_REDIRECT:
4744 err = xdp_do_generic_redirect(skb->dev, skb,
4745 &xdp, xdp_prog);
4746 if (err)
4747 goto out_redir;
4748 break;
4749 case XDP_TX:
4750 generic_xdp_tx(skb, xdp_prog);
4751 break;
4752 }
4753 return XDP_DROP;
4754 }
4755 }
4756 return XDP_PASS;
4757out_redir:
4758 kfree_skb(skb);
4759 return XDP_DROP;
4760}
4761EXPORT_SYMBOL_GPL(do_xdp_generic);
4762
4763static int netif_rx_internal(struct sk_buff *skb)
4764{
4765 int ret;
4766
4767 net_timestamp_check(netdev_tstamp_prequeue, skb);
4768
4769 trace_netif_rx(skb);
4770
4771#ifdef CONFIG_RPS
4772 if (static_branch_unlikely(&rps_needed)) {
4773 struct rps_dev_flow voidflow, *rflow = &voidflow;
4774 int cpu;
4775
4776 preempt_disable();
4777 rcu_read_lock();
4778
4779 cpu = get_rps_cpu(skb->dev, skb, &rflow);
4780 if (cpu < 0)
4781 cpu = smp_processor_id();
4782
4783 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4784
4785 rcu_read_unlock();
4786 preempt_enable();
4787 } else
4788#endif
4789 {
4790 unsigned int qtail;
4791
4792 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
4793 put_cpu();
4794 }
4795 return ret;
4796}
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812
4813int netif_rx(struct sk_buff *skb)
4814{
4815 int ret;
4816
4817 trace_netif_rx_entry(skb);
4818
4819 ret = netif_rx_internal(skb);
4820 trace_netif_rx_exit(ret);
4821
4822 return ret;
4823}
4824EXPORT_SYMBOL(netif_rx);
4825
4826int netif_rx_ni(struct sk_buff *skb)
4827{
4828 int err;
4829
4830 trace_netif_rx_ni_entry(skb);
4831
4832 preempt_disable();
4833 err = netif_rx_internal(skb);
4834 if (local_softirq_pending())
4835 do_softirq();
4836 preempt_enable();
4837 trace_netif_rx_ni_exit(err);
4838
4839 return err;
4840}
4841EXPORT_SYMBOL(netif_rx_ni);
4842
4843static __latent_entropy void net_tx_action(struct softirq_action *h)
4844{
4845 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4846
4847 if (sd->completion_queue) {
4848 struct sk_buff *clist;
4849
4850 local_irq_disable();
4851 clist = sd->completion_queue;
4852 sd->completion_queue = NULL;
4853 local_irq_enable();
4854
4855 while (clist) {
4856 struct sk_buff *skb = clist;
4857
4858 clist = clist->next;
4859
4860 WARN_ON(refcount_read(&skb->users));
4861 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
4862 trace_consume_skb(skb);
4863 else
4864 trace_kfree_skb(skb, net_tx_action);
4865
4866 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
4867 __kfree_skb(skb);
4868 else
4869 __kfree_skb_defer(skb);
4870 }
4871
4872 __kfree_skb_flush();
4873 }
4874
4875 if (sd->output_queue) {
4876 struct Qdisc *head;
4877
4878 local_irq_disable();
4879 head = sd->output_queue;
4880 sd->output_queue = NULL;
4881 sd->output_queue_tailp = &sd->output_queue;
4882 local_irq_enable();
4883
4884 while (head) {
4885 struct Qdisc *q = head;
4886 spinlock_t *root_lock = NULL;
4887
4888 head = head->next_sched;
4889
4890 if (!(q->flags & TCQ_F_NOLOCK)) {
4891 root_lock = qdisc_lock(q);
4892 spin_lock(root_lock);
4893 }
4894
4895
4896
4897 smp_mb__before_atomic();
4898 clear_bit(__QDISC_STATE_SCHED, &q->state);
4899 qdisc_run(q);
4900 if (root_lock)
4901 spin_unlock(root_lock);
4902 }
4903 }
4904
4905 xfrm_dev_backlog(sd);
4906}
4907
4908#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
4909
4910int (*br_fdb_test_addr_hook)(struct net_device *dev,
4911 unsigned char *addr) __read_mostly;
4912EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
4913#endif
4914
4915static inline struct sk_buff *
4916sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
4917 struct net_device *orig_dev)
4918{
4919#ifdef CONFIG_NET_CLS_ACT
4920 struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
4921 struct tcf_result cl_res;
4922
4923
4924
4925
4926
4927
4928 if (!miniq)
4929 return skb;
4930
4931 if (*pt_prev) {
4932 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4933 *pt_prev = NULL;
4934 }
4935
4936 qdisc_skb_cb(skb)->pkt_len = skb->len;
4937 skb->tc_at_ingress = 1;
4938 mini_qdisc_bstats_cpu_update(miniq, skb);
4939
4940 switch (tcf_classify_ingress(skb, miniq->block, miniq->filter_list,
4941 &cl_res, false)) {
4942 case TC_ACT_OK:
4943 case TC_ACT_RECLASSIFY:
4944 skb->tc_index = TC_H_MIN(cl_res.classid);
4945 break;
4946 case TC_ACT_SHOT:
4947 mini_qdisc_qstats_cpu_drop(miniq);
4948 kfree_skb(skb);
4949 return NULL;
4950 case TC_ACT_STOLEN:
4951 case TC_ACT_QUEUED:
4952 case TC_ACT_TRAP:
4953 consume_skb(skb);
4954 return NULL;
4955 case TC_ACT_REDIRECT:
4956
4957
4958
4959
4960 __skb_push(skb, skb->mac_len);
4961 skb_do_redirect(skb);
4962 return NULL;
4963 case TC_ACT_CONSUMED:
4964 return NULL;
4965 default:
4966 break;
4967 }
4968#endif
4969 return skb;
4970}
4971
4972
4973
4974
4975
4976
4977
4978
4979
4980
4981bool netdev_is_rx_handler_busy(struct net_device *dev)
4982{
4983 ASSERT_RTNL();
4984 return dev && rtnl_dereference(dev->rx_handler);
4985}
4986EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
4987
4988
4989
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999
5000
5001
5002int netdev_rx_handler_register(struct net_device *dev,
5003 rx_handler_func_t *rx_handler,
5004 void *rx_handler_data)
5005{
5006 if (netdev_is_rx_handler_busy(dev))
5007 return -EBUSY;
5008
5009 if (dev->priv_flags & IFF_NO_RX_HANDLER)
5010 return -EINVAL;
5011
5012
5013 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
5014 rcu_assign_pointer(dev->rx_handler, rx_handler);
5015
5016 return 0;
5017}
5018EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028void netdev_rx_handler_unregister(struct net_device *dev)
5029{
5030
5031 ASSERT_RTNL();
5032 RCU_INIT_POINTER(dev->rx_handler, NULL);
5033
5034
5035
5036
5037 synchronize_net();
5038 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
5039}
5040EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
5041
5042
5043
5044
5045
5046static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
5047{
5048 switch (skb->protocol) {
5049 case htons(ETH_P_ARP):
5050 case htons(ETH_P_IP):
5051 case htons(ETH_P_IPV6):
5052 case htons(ETH_P_8021Q):
5053 case htons(ETH_P_8021AD):
5054 return true;
5055 default:
5056 return false;
5057 }
5058}
5059
5060static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
5061 int *ret, struct net_device *orig_dev)
5062{
5063 if (nf_hook_ingress_active(skb)) {
5064 int ingress_retval;
5065
5066 if (*pt_prev) {
5067 *ret = deliver_skb(skb, *pt_prev, orig_dev);
5068 *pt_prev = NULL;
5069 }
5070
5071 rcu_read_lock();
5072 ingress_retval = nf_hook_ingress(skb);
5073 rcu_read_unlock();
5074 return ingress_retval;
5075 }
5076 return 0;
5077}
5078
5079static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
5080 struct packet_type **ppt_prev)
5081{
5082 struct packet_type *ptype, *pt_prev;
5083 rx_handler_func_t *rx_handler;
5084 struct sk_buff *skb = *pskb;
5085 struct net_device *orig_dev;
5086 bool deliver_exact = false;
5087 int ret = NET_RX_DROP;
5088 __be16 type;
5089
5090 net_timestamp_check(!netdev_tstamp_prequeue, skb);
5091
5092 trace_netif_receive_skb(skb);
5093
5094 orig_dev = skb->dev;
5095
5096 skb_reset_network_header(skb);
5097 if (!skb_transport_header_was_set(skb))
5098 skb_reset_transport_header(skb);
5099 skb_reset_mac_len(skb);
5100
5101 pt_prev = NULL;
5102
5103another_round:
5104 skb->skb_iif = skb->dev->ifindex;
5105
5106 __this_cpu_inc(softnet_data.processed);
5107
5108 if (static_branch_unlikely(&generic_xdp_needed_key)) {
5109 int ret2;
5110
5111 preempt_disable();
5112 ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb);
5113 preempt_enable();
5114
5115 if (ret2 != XDP_PASS) {
5116 ret = NET_RX_DROP;
5117 goto out;
5118 }
5119 skb_reset_mac_len(skb);
5120 }
5121
5122 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
5123 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
5124 skb = skb_vlan_untag(skb);
5125 if (unlikely(!skb))
5126 goto out;
5127 }
5128
5129 if (skb_skip_tc_classify(skb))
5130 goto skip_classify;
5131
5132 if (pfmemalloc)
5133 goto skip_taps;
5134
5135 list_for_each_entry_rcu(ptype, &ptype_all, list) {
5136 if (pt_prev)
5137 ret = deliver_skb(skb, pt_prev, orig_dev);
5138 pt_prev = ptype;
5139 }
5140
5141 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
5142 if (pt_prev)
5143 ret = deliver_skb(skb, pt_prev, orig_dev);
5144 pt_prev = ptype;
5145 }
5146
5147skip_taps:
5148#ifdef CONFIG_NET_INGRESS
5149 if (static_branch_unlikely(&ingress_needed_key)) {
5150 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
5151 if (!skb)
5152 goto out;
5153
5154 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
5155 goto out;
5156 }
5157#endif
5158 skb_reset_redirect(skb);
5159skip_classify:
5160 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
5161 goto drop;
5162
5163 if (skb_vlan_tag_present(skb)) {
5164 if (pt_prev) {
5165 ret = deliver_skb(skb, pt_prev, orig_dev);
5166 pt_prev = NULL;
5167 }
5168 if (vlan_do_receive(&skb))
5169 goto another_round;
5170 else if (unlikely(!skb))
5171 goto out;
5172 }
5173
5174 rx_handler = rcu_dereference(skb->dev->rx_handler);
5175 if (rx_handler) {
5176 if (pt_prev) {
5177 ret = deliver_skb(skb, pt_prev, orig_dev);
5178 pt_prev = NULL;
5179 }
5180 switch (rx_handler(&skb)) {
5181 case RX_HANDLER_CONSUMED:
5182 ret = NET_RX_SUCCESS;
5183 goto out;
5184 case RX_HANDLER_ANOTHER:
5185 goto another_round;
5186 case RX_HANDLER_EXACT:
5187 deliver_exact = true;
5188 case RX_HANDLER_PASS:
5189 break;
5190 default:
5191 BUG();
5192 }
5193 }
5194
5195 if (unlikely(skb_vlan_tag_present(skb))) {
5196check_vlan_id:
5197 if (skb_vlan_tag_get_id(skb)) {
5198
5199
5200
5201 skb->pkt_type = PACKET_OTHERHOST;
5202 } else if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
5203 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
5204
5205
5206
5207
5208 __vlan_hwaccel_clear_tag(skb);
5209 skb = skb_vlan_untag(skb);
5210 if (unlikely(!skb))
5211 goto out;
5212 if (vlan_do_receive(&skb))
5213
5214
5215
5216 goto another_round;
5217 else if (unlikely(!skb))
5218 goto out;
5219 else
5220
5221
5222
5223
5224 goto check_vlan_id;
5225 }
5226
5227
5228
5229
5230 __vlan_hwaccel_clear_tag(skb);
5231 }
5232
5233 type = skb->protocol;
5234
5235
5236 if (likely(!deliver_exact)) {
5237 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5238 &ptype_base[ntohs(type) &
5239 PTYPE_HASH_MASK]);
5240 }
5241
5242 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5243 &orig_dev->ptype_specific);
5244
5245 if (unlikely(skb->dev != orig_dev)) {
5246 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
5247 &skb->dev->ptype_specific);
5248 }
5249
5250 if (pt_prev) {
5251 if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
5252 goto drop;
5253 *ppt_prev = pt_prev;
5254 } else {
5255drop:
5256 if (!deliver_exact)
5257 atomic_long_inc(&skb->dev->rx_dropped);
5258 else
5259 atomic_long_inc(&skb->dev->rx_nohandler);
5260 kfree_skb(skb);
5261
5262
5263
5264 ret = NET_RX_DROP;
5265 }
5266
5267out:
5268
5269
5270
5271
5272
5273
5274 *pskb = skb;
5275 return ret;
5276}
5277
5278static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc)
5279{
5280 struct net_device *orig_dev = skb->dev;
5281 struct packet_type *pt_prev = NULL;
5282 int ret;
5283
5284 ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
5285 if (pt_prev)
5286 ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb,
5287 skb->dev, pt_prev, orig_dev);
5288 return ret;
5289}
5290
5291
5292
5293
5294
5295
5296
5297
5298
5299
5300
5301
5302
5303
5304
5305
5306int netif_receive_skb_core(struct sk_buff *skb)
5307{
5308 int ret;
5309
5310 rcu_read_lock();
5311 ret = __netif_receive_skb_one_core(skb, false);
5312 rcu_read_unlock();
5313
5314 return ret;
5315}
5316EXPORT_SYMBOL(netif_receive_skb_core);
5317
5318static inline void __netif_receive_skb_list_ptype(struct list_head *head,
5319 struct packet_type *pt_prev,
5320 struct net_device *orig_dev)
5321{
5322 struct sk_buff *skb, *next;
5323
5324 if (!pt_prev)
5325 return;
5326 if (list_empty(head))
5327 return;
5328 if (pt_prev->list_func != NULL)
5329 INDIRECT_CALL_INET(pt_prev->list_func, ipv6_list_rcv,
5330 ip_list_rcv, head, pt_prev, orig_dev);
5331 else
5332 list_for_each_entry_safe(skb, next, head, list) {
5333 skb_list_del_init(skb);
5334 pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
5335 }
5336}
5337
5338static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemalloc)
5339{
5340
5341
5342
5343
5344
5345
5346
5347
5348
5349
5350 struct packet_type *pt_curr = NULL;
5351
5352 struct net_device *od_curr = NULL;
5353 struct list_head sublist;
5354 struct sk_buff *skb, *next;
5355
5356 INIT_LIST_HEAD(&sublist);
5357 list_for_each_entry_safe(skb, next, head, list) {
5358 struct net_device *orig_dev = skb->dev;
5359 struct packet_type *pt_prev = NULL;
5360
5361 skb_list_del_init(skb);
5362 __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev);
5363 if (!pt_prev)
5364 continue;
5365 if (pt_curr != pt_prev || od_curr != orig_dev) {
5366
5367 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5368
5369 INIT_LIST_HEAD(&sublist);
5370 pt_curr = pt_prev;
5371 od_curr = orig_dev;
5372 }
5373 list_add_tail(&skb->list, &sublist);
5374 }
5375
5376
5377 __netif_receive_skb_list_ptype(&sublist, pt_curr, od_curr);
5378}
5379
5380static int __netif_receive_skb(struct sk_buff *skb)
5381{
5382 int ret;
5383
5384 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
5385 unsigned int noreclaim_flag;
5386
5387
5388
5389
5390
5391
5392
5393
5394
5395
5396 noreclaim_flag = memalloc_noreclaim_save();
5397 ret = __netif_receive_skb_one_core(skb, true);
5398 memalloc_noreclaim_restore(noreclaim_flag);
5399 } else
5400 ret = __netif_receive_skb_one_core(skb, false);
5401
5402 return ret;
5403}
5404
5405static void __netif_receive_skb_list(struct list_head *head)
5406{
5407 unsigned long noreclaim_flag = 0;
5408 struct sk_buff *skb, *next;
5409 bool pfmemalloc = false;
5410
5411 list_for_each_entry_safe(skb, next, head, list) {
5412 if ((sk_memalloc_socks() && skb_pfmemalloc(skb)) != pfmemalloc) {
5413 struct list_head sublist;
5414
5415
5416 list_cut_before(&sublist, head, &skb->list);
5417 if (!list_empty(&sublist))
5418 __netif_receive_skb_list_core(&sublist, pfmemalloc);
5419 pfmemalloc = !pfmemalloc;
5420
5421 if (pfmemalloc)
5422 noreclaim_flag = memalloc_noreclaim_save();
5423 else
5424 memalloc_noreclaim_restore(noreclaim_flag);
5425 }
5426 }
5427
5428 if (!list_empty(head))
5429 __netif_receive_skb_list_core(head, pfmemalloc);
5430
5431 if (pfmemalloc)
5432 memalloc_noreclaim_restore(noreclaim_flag);
5433}
5434
5435static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
5436{
5437 struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
5438 struct bpf_prog *new = xdp->prog;
5439 int ret = 0;
5440
5441 if (new) {
5442 u32 i;
5443
5444
5445
5446
5447 for (i = 0; i < new->aux->used_map_cnt; i++) {
5448 if (dev_map_can_have_prog(new->aux->used_maps[i]))
5449 return -EINVAL;
5450 if (cpu_map_prog_allowed(new->aux->used_maps[i]))
5451 return -EINVAL;
5452 }
5453 }
5454
5455 switch (xdp->command) {
5456 case XDP_SETUP_PROG:
5457 rcu_assign_pointer(dev->xdp_prog, new);
5458 if (old)
5459 bpf_prog_put(old);
5460
5461 if (old && !new) {
5462 static_branch_dec(&generic_xdp_needed_key);
5463 } else if (new && !old) {
5464 static_branch_inc(&generic_xdp_needed_key);
5465 dev_disable_lro(dev);
5466 dev_disable_gro_hw(dev);
5467 }
5468 break;
5469
5470 default:
5471 ret = -EINVAL;
5472 break;
5473 }
5474
5475 return ret;
5476}
5477
5478static int netif_receive_skb_internal(struct sk_buff *skb)
5479{
5480 int ret;
5481
5482 net_timestamp_check(netdev_tstamp_prequeue, skb);
5483
5484 if (skb_defer_rx_timestamp(skb))
5485 return NET_RX_SUCCESS;
5486
5487 rcu_read_lock();
5488#ifdef CONFIG_RPS
5489 if (static_branch_unlikely(&rps_needed)) {
5490 struct rps_dev_flow voidflow, *rflow = &voidflow;
5491 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5492
5493 if (cpu >= 0) {
5494 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5495 rcu_read_unlock();
5496 return ret;
5497 }
5498 }
5499#endif
5500 ret = __netif_receive_skb(skb);
5501 rcu_read_unlock();
5502 return ret;
5503}
5504
5505static void netif_receive_skb_list_internal(struct list_head *head)
5506{
5507 struct sk_buff *skb, *next;
5508 struct list_head sublist;
5509
5510 INIT_LIST_HEAD(&sublist);
5511 list_for_each_entry_safe(skb, next, head, list) {
5512 net_timestamp_check(netdev_tstamp_prequeue, skb);
5513 skb_list_del_init(skb);
5514 if (!skb_defer_rx_timestamp(skb))
5515 list_add_tail(&skb->list, &sublist);
5516 }
5517 list_splice_init(&sublist, head);
5518
5519 rcu_read_lock();
5520#ifdef CONFIG_RPS
5521 if (static_branch_unlikely(&rps_needed)) {
5522 list_for_each_entry_safe(skb, next, head, list) {
5523 struct rps_dev_flow voidflow, *rflow = &voidflow;
5524 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
5525
5526 if (cpu >= 0) {
5527
5528 skb_list_del_init(skb);
5529 enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
5530 }
5531 }
5532 }
5533#endif
5534 __netif_receive_skb_list(head);
5535 rcu_read_unlock();
5536}
5537
5538
5539
5540
5541
5542
5543
5544
5545
5546
5547
5548
5549
5550
5551
5552
5553int netif_receive_skb(struct sk_buff *skb)
5554{
5555 int ret;
5556
5557 trace_netif_receive_skb_entry(skb);
5558
5559 ret = netif_receive_skb_internal(skb);
5560 trace_netif_receive_skb_exit(ret);
5561
5562 return ret;
5563}
5564EXPORT_SYMBOL(netif_receive_skb);
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576void netif_receive_skb_list(struct list_head *head)
5577{
5578 struct sk_buff *skb;
5579
5580 if (list_empty(head))
5581 return;
5582 if (trace_netif_receive_skb_list_entry_enabled()) {
5583 list_for_each_entry(skb, head, list)
5584 trace_netif_receive_skb_list_entry(skb);
5585 }
5586 netif_receive_skb_list_internal(head);
5587 trace_netif_receive_skb_list_exit(0);
5588}
5589EXPORT_SYMBOL(netif_receive_skb_list);
5590
5591static DEFINE_PER_CPU(struct work_struct, flush_works);
5592
5593
5594static void flush_backlog(struct work_struct *work)
5595{
5596 struct sk_buff *skb, *tmp;
5597 struct softnet_data *sd;
5598
5599 local_bh_disable();
5600 sd = this_cpu_ptr(&softnet_data);
5601
5602 local_irq_disable();
5603 rps_lock(sd);
5604 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
5605 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5606 __skb_unlink(skb, &sd->input_pkt_queue);
5607 dev_kfree_skb_irq(skb);
5608 input_queue_head_incr(sd);
5609 }
5610 }
5611 rps_unlock(sd);
5612 local_irq_enable();
5613
5614 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
5615 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
5616 __skb_unlink(skb, &sd->process_queue);
5617 kfree_skb(skb);
5618 input_queue_head_incr(sd);
5619 }
5620 }
5621 local_bh_enable();
5622}
5623
5624static void flush_all_backlogs(void)
5625{
5626 unsigned int cpu;
5627
5628 get_online_cpus();
5629
5630 for_each_online_cpu(cpu)
5631 queue_work_on(cpu, system_highpri_wq,
5632 per_cpu_ptr(&flush_works, cpu));
5633
5634 for_each_online_cpu(cpu)
5635 flush_work(per_cpu_ptr(&flush_works, cpu));
5636
5637 put_online_cpus();
5638}
5639
5640
5641static void gro_normal_list(struct napi_struct *napi)
5642{
5643 if (!napi->rx_count)
5644 return;
5645 netif_receive_skb_list_internal(&napi->rx_list);
5646 INIT_LIST_HEAD(&napi->rx_list);
5647 napi->rx_count = 0;
5648}
5649
5650
5651
5652
5653static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb)
5654{
5655 list_add_tail(&skb->list, &napi->rx_list);
5656 if (++napi->rx_count >= gro_normal_batch)
5657 gro_normal_list(napi);
5658}
5659
5660INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
5661INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
5662static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
5663{
5664 struct packet_offload *ptype;
5665 __be16 type = skb->protocol;
5666 struct list_head *head = &offload_base;
5667 int err = -ENOENT;
5668
5669 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
5670
5671 if (NAPI_GRO_CB(skb)->count == 1) {
5672 skb_shinfo(skb)->gso_size = 0;
5673 goto out;
5674 }
5675
5676 rcu_read_lock();
5677 list_for_each_entry_rcu(ptype, head, list) {
5678 if (ptype->type != type || !ptype->callbacks.gro_complete)
5679 continue;
5680
5681 err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
5682 ipv6_gro_complete, inet_gro_complete,
5683 skb, 0);
5684 break;
5685 }
5686 rcu_read_unlock();
5687
5688 if (err) {
5689 WARN_ON(&ptype->list == head);
5690 kfree_skb(skb);
5691 return NET_RX_SUCCESS;
5692 }
5693
5694out:
5695 gro_normal_one(napi, skb);
5696 return NET_RX_SUCCESS;
5697}
5698
5699static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
5700 bool flush_old)
5701{
5702 struct list_head *head = &napi->gro_hash[index].list;
5703 struct sk_buff *skb, *p;
5704
5705 list_for_each_entry_safe_reverse(skb, p, head, list) {
5706 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
5707 return;
5708 skb_list_del_init(skb);
5709 napi_gro_complete(napi, skb);
5710 napi->gro_hash[index].count--;
5711 }
5712
5713 if (!napi->gro_hash[index].count)
5714 __clear_bit(index, &napi->gro_bitmask);
5715}
5716
5717
5718
5719
5720
5721void napi_gro_flush(struct napi_struct *napi, bool flush_old)
5722{
5723 unsigned long bitmask = napi->gro_bitmask;
5724 unsigned int i, base = ~0U;
5725
5726 while ((i = ffs(bitmask)) != 0) {
5727 bitmask >>= i;
5728 base += i;
5729 __napi_gro_flush_chain(napi, base, flush_old);
5730 }
5731}
5732EXPORT_SYMBOL(napi_gro_flush);
5733
5734static struct list_head *gro_list_prepare(struct napi_struct *napi,
5735 struct sk_buff *skb)
5736{
5737 unsigned int maclen = skb->dev->hard_header_len;
5738 u32 hash = skb_get_hash_raw(skb);
5739 struct list_head *head;
5740 struct sk_buff *p;
5741
5742 head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
5743 list_for_each_entry(p, head, list) {
5744 unsigned long diffs;
5745
5746 NAPI_GRO_CB(p)->flush = 0;
5747
5748 if (hash != skb_get_hash_raw(p)) {
5749 NAPI_GRO_CB(p)->same_flow = 0;
5750 continue;
5751 }
5752
5753 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
5754 diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
5755 if (skb_vlan_tag_present(p))
5756 diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
5757 diffs |= skb_metadata_dst_cmp(p, skb);
5758 diffs |= skb_metadata_differs(p, skb);
5759 if (maclen == ETH_HLEN)
5760 diffs |= compare_ether_header(skb_mac_header(p),
5761 skb_mac_header(skb));
5762 else if (!diffs)
5763 diffs = memcmp(skb_mac_header(p),
5764 skb_mac_header(skb),
5765 maclen);
5766 NAPI_GRO_CB(p)->same_flow = !diffs;
5767 }
5768
5769 return head;
5770}
5771
5772static void skb_gro_reset_offset(struct sk_buff *skb)
5773{
5774 const struct skb_shared_info *pinfo = skb_shinfo(skb);
5775 const skb_frag_t *frag0 = &pinfo->frags[0];
5776
5777 NAPI_GRO_CB(skb)->data_offset = 0;
5778 NAPI_GRO_CB(skb)->frag0 = NULL;
5779 NAPI_GRO_CB(skb)->frag0_len = 0;
5780
5781 if (!skb_headlen(skb) && pinfo->nr_frags &&
5782 !PageHighMem(skb_frag_page(frag0))) {
5783 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
5784 NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
5785 skb_frag_size(frag0),
5786 skb->end - skb->tail);
5787 }
5788}
5789
5790static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
5791{
5792 struct skb_shared_info *pinfo = skb_shinfo(skb);
5793
5794 BUG_ON(skb->end - skb->tail < grow);
5795
5796 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
5797
5798 skb->data_len -= grow;
5799 skb->tail += grow;
5800
5801 skb_frag_off_add(&pinfo->frags[0], grow);
5802 skb_frag_size_sub(&pinfo->frags[0], grow);
5803
5804 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
5805 skb_frag_unref(skb, 0);
5806 memmove(pinfo->frags, pinfo->frags + 1,
5807 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
5808 }
5809}
5810
5811static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
5812{
5813 struct sk_buff *oldest;
5814
5815 oldest = list_last_entry(head, struct sk_buff, list);
5816
5817
5818
5819
5820 if (WARN_ON_ONCE(!oldest))
5821 return;
5822
5823
5824
5825
5826 skb_list_del_init(oldest);
5827 napi_gro_complete(napi, oldest);
5828}
5829
5830INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
5831 struct sk_buff *));
5832INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
5833 struct sk_buff *));
5834static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
5835{
5836 u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
5837 struct list_head *head = &offload_base;
5838 struct packet_offload *ptype;
5839 __be16 type = skb->protocol;
5840 struct list_head *gro_head;
5841 struct sk_buff *pp = NULL;
5842 enum gro_result ret;
5843 int same_flow;
5844 int grow;
5845
5846 if (netif_elide_gro(skb->dev))
5847 goto normal;
5848
5849 gro_head = gro_list_prepare(napi, skb);
5850
5851 rcu_read_lock();
5852 list_for_each_entry_rcu(ptype, head, list) {
5853 if (ptype->type != type || !ptype->callbacks.gro_receive)
5854 continue;
5855
5856 skb_set_network_header(skb, skb_gro_offset(skb));
5857 skb_reset_mac_len(skb);
5858 NAPI_GRO_CB(skb)->same_flow = 0;
5859 NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
5860 NAPI_GRO_CB(skb)->free = 0;
5861 NAPI_GRO_CB(skb)->encap_mark = 0;
5862 NAPI_GRO_CB(skb)->recursion_counter = 0;
5863 NAPI_GRO_CB(skb)->is_fou = 0;
5864 NAPI_GRO_CB(skb)->is_atomic = 1;
5865 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
5866
5867
5868 switch (skb->ip_summed) {
5869 case CHECKSUM_COMPLETE:
5870 NAPI_GRO_CB(skb)->csum = skb->csum;
5871 NAPI_GRO_CB(skb)->csum_valid = 1;
5872 NAPI_GRO_CB(skb)->csum_cnt = 0;
5873 break;
5874 case CHECKSUM_UNNECESSARY:
5875 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
5876 NAPI_GRO_CB(skb)->csum_valid = 0;
5877 break;
5878 default:
5879 NAPI_GRO_CB(skb)->csum_cnt = 0;
5880 NAPI_GRO_CB(skb)->csum_valid = 0;
5881 }
5882
5883 pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
5884 ipv6_gro_receive, inet_gro_receive,
5885 gro_head, skb);
5886 break;
5887 }
5888 rcu_read_unlock();
5889
5890 if (&ptype->list == head)
5891 goto normal;
5892
5893 if (PTR_ERR(pp) == -EINPROGRESS) {
5894 ret = GRO_CONSUMED;
5895 goto ok;
5896 }
5897
5898 same_flow = NAPI_GRO_CB(skb)->same_flow;
5899 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
5900
5901 if (pp) {
5902 skb_list_del_init(pp);
5903 napi_gro_complete(napi, pp);
5904 napi->gro_hash[hash].count--;
5905 }
5906
5907 if (same_flow)
5908 goto ok;
5909
5910 if (NAPI_GRO_CB(skb)->flush)
5911 goto normal;
5912
5913 if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
5914 gro_flush_oldest(napi, gro_head);
5915 } else {
5916 napi->gro_hash[hash].count++;
5917 }
5918 NAPI_GRO_CB(skb)->count = 1;
5919 NAPI_GRO_CB(skb)->age = jiffies;
5920 NAPI_GRO_CB(skb)->last = skb;
5921 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
5922 list_add(&skb->list, gro_head);
5923 ret = GRO_HELD;
5924
5925pull:
5926 grow = skb_gro_offset(skb) - skb_headlen(skb);
5927 if (grow > 0)
5928 gro_pull_from_frag0(skb, grow);
5929ok:
5930 if (napi->gro_hash[hash].count) {
5931 if (!test_bit(hash, &napi->gro_bitmask))
5932 __set_bit(hash, &napi->gro_bitmask);
5933 } else if (test_bit(hash, &napi->gro_bitmask)) {
5934 __clear_bit(hash, &napi->gro_bitmask);
5935 }
5936
5937 return ret;
5938
5939normal:
5940 ret = GRO_NORMAL;
5941 goto pull;
5942}
5943
5944struct packet_offload *gro_find_receive_by_type(__be16 type)
5945{
5946 struct list_head *offload_head = &offload_base;
5947 struct packet_offload *ptype;
5948
5949 list_for_each_entry_rcu(ptype, offload_head, list) {
5950 if (ptype->type != type || !ptype->callbacks.gro_receive)
5951 continue;
5952 return ptype;
5953 }
5954 return NULL;
5955}
5956EXPORT_SYMBOL(gro_find_receive_by_type);
5957
5958struct packet_offload *gro_find_complete_by_type(__be16 type)
5959{
5960 struct list_head *offload_head = &offload_base;
5961 struct packet_offload *ptype;
5962
5963 list_for_each_entry_rcu(ptype, offload_head, list) {
5964 if (ptype->type != type || !ptype->callbacks.gro_complete)
5965 continue;
5966 return ptype;
5967 }
5968 return NULL;
5969}
5970EXPORT_SYMBOL(gro_find_complete_by_type);
5971
5972static void napi_skb_free_stolen_head(struct sk_buff *skb)
5973{
5974 skb_dst_drop(skb);
5975 skb_ext_put(skb);
5976 kmem_cache_free(skbuff_head_cache, skb);
5977}
5978
5979static gro_result_t napi_skb_finish(struct napi_struct *napi,
5980 struct sk_buff *skb,
5981 gro_result_t ret)
5982{
5983 switch (ret) {
5984 case GRO_NORMAL:
5985 gro_normal_one(napi, skb);
5986 break;
5987
5988 case GRO_DROP:
5989 kfree_skb(skb);
5990 break;
5991
5992 case GRO_MERGED_FREE:
5993 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
5994 napi_skb_free_stolen_head(skb);
5995 else
5996 __kfree_skb(skb);
5997 break;
5998
5999 case GRO_HELD:
6000 case GRO_MERGED:
6001 case GRO_CONSUMED:
6002 break;
6003 }
6004
6005 return ret;
6006}
6007
6008gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
6009{
6010 gro_result_t ret;
6011
6012 skb_mark_napi_id(skb, napi);
6013 trace_napi_gro_receive_entry(skb);
6014
6015 skb_gro_reset_offset(skb);
6016
6017 ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
6018 trace_napi_gro_receive_exit(ret);
6019
6020 return ret;
6021}
6022EXPORT_SYMBOL(napi_gro_receive);
6023
6024static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
6025{
6026 if (unlikely(skb->pfmemalloc)) {
6027 consume_skb(skb);
6028 return;
6029 }
6030 __skb_pull(skb, skb_headlen(skb));
6031
6032 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
6033 __vlan_hwaccel_clear_tag(skb);
6034 skb->dev = napi->dev;
6035 skb->skb_iif = 0;
6036
6037
6038 skb->pkt_type = PACKET_HOST;
6039
6040 skb->encapsulation = 0;
6041 skb_shinfo(skb)->gso_type = 0;
6042 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
6043 skb_ext_reset(skb);
6044
6045 napi->skb = skb;
6046}
6047
6048struct sk_buff *napi_get_frags(struct napi_struct *napi)
6049{
6050 struct sk_buff *skb = napi->skb;
6051
6052 if (!skb) {
6053 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
6054 if (skb) {
6055 napi->skb = skb;
6056 skb_mark_napi_id(skb, napi);
6057 }
6058 }
6059 return skb;
6060}
6061EXPORT_SYMBOL(napi_get_frags);
6062
6063static gro_result_t napi_frags_finish(struct napi_struct *napi,
6064 struct sk_buff *skb,
6065 gro_result_t ret)
6066{
6067 switch (ret) {
6068 case GRO_NORMAL:
6069 case GRO_HELD:
6070 __skb_push(skb, ETH_HLEN);
6071 skb->protocol = eth_type_trans(skb, skb->dev);
6072 if (ret == GRO_NORMAL)
6073 gro_normal_one(napi, skb);
6074 break;
6075
6076 case GRO_DROP:
6077 napi_reuse_skb(napi, skb);
6078 break;
6079
6080 case GRO_MERGED_FREE:
6081 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
6082 napi_skb_free_stolen_head(skb);
6083 else
6084 napi_reuse_skb(napi, skb);
6085 break;
6086
6087 case GRO_MERGED:
6088 case GRO_CONSUMED:
6089 break;
6090 }
6091
6092 return ret;
6093}
6094
6095
6096
6097
6098
6099static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
6100{
6101 struct sk_buff *skb = napi->skb;
6102 const struct ethhdr *eth;
6103 unsigned int hlen = sizeof(*eth);
6104
6105 napi->skb = NULL;
6106
6107 skb_reset_mac_header(skb);
6108 skb_gro_reset_offset(skb);
6109
6110 if (unlikely(skb_gro_header_hard(skb, hlen))) {
6111 eth = skb_gro_header_slow(skb, hlen, 0);
6112 if (unlikely(!eth)) {
6113 net_warn_ratelimited("%s: dropping impossible skb from %s\n",
6114 __func__, napi->dev->name);
6115 napi_reuse_skb(napi, skb);
6116 return NULL;
6117 }
6118 } else {
6119 eth = (const struct ethhdr *)skb->data;
6120 gro_pull_from_frag0(skb, hlen);
6121 NAPI_GRO_CB(skb)->frag0 += hlen;
6122 NAPI_GRO_CB(skb)->frag0_len -= hlen;
6123 }
6124 __skb_pull(skb, hlen);
6125
6126
6127
6128
6129
6130
6131 skb->protocol = eth->h_proto;
6132
6133 return skb;
6134}
6135
6136gro_result_t napi_gro_frags(struct napi_struct *napi)
6137{
6138 gro_result_t ret;
6139 struct sk_buff *skb = napi_frags_skb(napi);
6140
6141 if (!skb)
6142 return GRO_DROP;
6143
6144 trace_napi_gro_frags_entry(skb);
6145
6146 ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
6147 trace_napi_gro_frags_exit(ret);
6148
6149 return ret;
6150}
6151EXPORT_SYMBOL(napi_gro_frags);
6152
6153
6154
6155
6156__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
6157{
6158 __wsum wsum;
6159 __sum16 sum;
6160
6161 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
6162
6163
6164 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
6165
6166 if (likely(!sum)) {
6167 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
6168 !skb->csum_complete_sw)
6169 netdev_rx_csum_fault(skb->dev, skb);
6170 }
6171
6172 NAPI_GRO_CB(skb)->csum = wsum;
6173 NAPI_GRO_CB(skb)->csum_valid = 1;
6174
6175 return sum;
6176}
6177EXPORT_SYMBOL(__skb_gro_checksum_complete);
6178
6179static void net_rps_send_ipi(struct softnet_data *remsd)
6180{
6181#ifdef CONFIG_RPS
6182 while (remsd) {
6183 struct softnet_data *next = remsd->rps_ipi_next;
6184
6185 if (cpu_online(remsd->cpu))
6186 smp_call_function_single_async(remsd->cpu, &remsd->csd);
6187 remsd = next;
6188 }
6189#endif
6190}
6191
6192
6193
6194
6195
6196static void net_rps_action_and_irq_enable(struct softnet_data *sd)
6197{
6198#ifdef CONFIG_RPS
6199 struct softnet_data *remsd = sd->rps_ipi_list;
6200
6201 if (remsd) {
6202 sd->rps_ipi_list = NULL;
6203
6204 local_irq_enable();
6205
6206
6207 net_rps_send_ipi(remsd);
6208 } else
6209#endif
6210 local_irq_enable();
6211}
6212
6213static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
6214{
6215#ifdef CONFIG_RPS
6216 return sd->rps_ipi_list != NULL;
6217#else
6218 return false;
6219#endif
6220}
6221
6222static int process_backlog(struct napi_struct *napi, int quota)
6223{
6224 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
6225 bool again = true;
6226 int work = 0;
6227
6228
6229
6230
6231 if (sd_has_rps_ipi_waiting(sd)) {
6232 local_irq_disable();
6233 net_rps_action_and_irq_enable(sd);
6234 }
6235
6236 napi->weight = dev_rx_weight;
6237 while (again) {
6238 struct sk_buff *skb;
6239
6240 while ((skb = __skb_dequeue(&sd->process_queue))) {
6241 rcu_read_lock();
6242 __netif_receive_skb(skb);
6243 rcu_read_unlock();
6244 input_queue_head_incr(sd);
6245 if (++work >= quota)
6246 return work;
6247
6248 }
6249
6250 local_irq_disable();
6251 rps_lock(sd);
6252 if (skb_queue_empty(&sd->input_pkt_queue)) {
6253
6254
6255
6256
6257
6258
6259
6260
6261 napi->state = 0;
6262 again = false;
6263 } else {
6264 skb_queue_splice_tail_init(&sd->input_pkt_queue,
6265 &sd->process_queue);
6266 }
6267 rps_unlock(sd);
6268 local_irq_enable();
6269 }
6270
6271 return work;
6272}
6273
6274
6275
6276
6277
6278
6279
6280
6281void __napi_schedule(struct napi_struct *n)
6282{
6283 unsigned long flags;
6284
6285 local_irq_save(flags);
6286 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
6287 local_irq_restore(flags);
6288}
6289EXPORT_SYMBOL(__napi_schedule);
6290
6291
6292
6293
6294
6295
6296
6297
6298
6299
6300bool napi_schedule_prep(struct napi_struct *n)
6301{
6302 unsigned long val, new;
6303
6304 do {
6305 val = READ_ONCE(n->state);
6306 if (unlikely(val & NAPIF_STATE_DISABLE))
6307 return false;
6308 new = val | NAPIF_STATE_SCHED;
6309
6310
6311
6312
6313
6314
6315
6316 new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
6317 NAPIF_STATE_MISSED;
6318 } while (cmpxchg(&n->state, val, new) != val);
6319
6320 return !(val & NAPIF_STATE_SCHED);
6321}
6322EXPORT_SYMBOL(napi_schedule_prep);
6323
6324
6325
6326
6327
6328
6329
6330void __napi_schedule_irqoff(struct napi_struct *n)
6331{
6332 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
6333}
6334EXPORT_SYMBOL(__napi_schedule_irqoff);
6335
6336bool napi_complete_done(struct napi_struct *n, int work_done)
6337{
6338 unsigned long flags, val, new, timeout = 0;
6339 bool ret = true;
6340
6341
6342
6343
6344
6345
6346
6347 if (unlikely(n->state & (NAPIF_STATE_NPSVC |
6348 NAPIF_STATE_IN_BUSY_POLL)))
6349 return false;
6350
6351 if (work_done) {
6352 if (n->gro_bitmask)
6353 timeout = READ_ONCE(n->dev->gro_flush_timeout);
6354 n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
6355 }
6356 if (n->defer_hard_irqs_count > 0) {
6357 n->defer_hard_irqs_count--;
6358 timeout = READ_ONCE(n->dev->gro_flush_timeout);
6359 if (timeout)
6360 ret = false;
6361 }
6362 if (n->gro_bitmask) {
6363
6364
6365
6366
6367 napi_gro_flush(n, !!timeout);
6368 }
6369
6370 gro_normal_list(n);
6371
6372 if (unlikely(!list_empty(&n->poll_list))) {
6373
6374 local_irq_save(flags);
6375 list_del_init(&n->poll_list);
6376 local_irq_restore(flags);
6377 }
6378
6379 do {
6380 val = READ_ONCE(n->state);
6381
6382 WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
6383
6384 new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
6385
6386
6387
6388
6389
6390 new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
6391 NAPIF_STATE_SCHED;
6392 } while (cmpxchg(&n->state, val, new) != val);
6393
6394 if (unlikely(val & NAPIF_STATE_MISSED)) {
6395 __napi_schedule(n);
6396 return false;
6397 }
6398
6399 if (timeout)
6400 hrtimer_start(&n->timer, ns_to_ktime(timeout),
6401 HRTIMER_MODE_REL_PINNED);
6402 return ret;
6403}
6404EXPORT_SYMBOL(napi_complete_done);
6405
6406
6407static struct napi_struct *napi_by_id(unsigned int napi_id)
6408{
6409 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
6410 struct napi_struct *napi;
6411
6412 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
6413 if (napi->napi_id == napi_id)
6414 return napi;
6415
6416 return NULL;
6417}
6418
6419#if defined(CONFIG_NET_RX_BUSY_POLL)
6420
6421#define BUSY_POLL_BUDGET 8
6422
6423static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
6424{
6425 int rc;
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436 clear_bit(NAPI_STATE_MISSED, &napi->state);
6437 clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
6438
6439 local_bh_disable();
6440
6441
6442
6443
6444 rc = napi->poll(napi, BUSY_POLL_BUDGET);
6445
6446
6447
6448
6449 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
6450 netpoll_poll_unlock(have_poll_lock);
6451 if (rc == BUSY_POLL_BUDGET) {
6452
6453
6454
6455 gro_normal_list(napi);
6456 __napi_schedule(napi);
6457 }
6458 local_bh_enable();
6459}
6460
6461void napi_busy_loop(unsigned int napi_id,
6462 bool (*loop_end)(void *, unsigned long),
6463 void *loop_end_arg)
6464{
6465 unsigned long start_time = loop_end ? busy_loop_current_time() : 0;
6466 int (*napi_poll)(struct napi_struct *napi, int budget);
6467 void *have_poll_lock = NULL;
6468 struct napi_struct *napi;
6469
6470restart:
6471 napi_poll = NULL;
6472
6473 rcu_read_lock();
6474
6475 napi = napi_by_id(napi_id);
6476 if (!napi)
6477 goto out;
6478
6479 preempt_disable();
6480 for (;;) {
6481 int work = 0;
6482
6483 local_bh_disable();
6484 if (!napi_poll) {
6485 unsigned long val = READ_ONCE(napi->state);
6486
6487
6488
6489
6490 if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED |
6491 NAPIF_STATE_IN_BUSY_POLL))
6492 goto count;
6493 if (cmpxchg(&napi->state, val,
6494 val | NAPIF_STATE_IN_BUSY_POLL |
6495 NAPIF_STATE_SCHED) != val)
6496 goto count;
6497 have_poll_lock = netpoll_poll_lock(napi);
6498 napi_poll = napi->poll;
6499 }
6500 work = napi_poll(napi, BUSY_POLL_BUDGET);
6501 trace_napi_poll(napi, work, BUSY_POLL_BUDGET);
6502 gro_normal_list(napi);
6503count:
6504 if (work > 0)
6505 __NET_ADD_STATS(dev_net(napi->dev),
6506 LINUX_MIB_BUSYPOLLRXPACKETS, work);
6507 local_bh_enable();
6508
6509 if (!loop_end || loop_end(loop_end_arg, start_time))
6510 break;
6511
6512 if (unlikely(need_resched())) {
6513 if (napi_poll)
6514 busy_poll_stop(napi, have_poll_lock);
6515 preempt_enable();
6516 rcu_read_unlock();
6517 cond_resched();
6518 if (loop_end(loop_end_arg, start_time))
6519 return;
6520 goto restart;
6521 }
6522 cpu_relax();
6523 }
6524 if (napi_poll)
6525 busy_poll_stop(napi, have_poll_lock);
6526 preempt_enable();
6527out:
6528 rcu_read_unlock();
6529}
6530EXPORT_SYMBOL(napi_busy_loop);
6531
6532#endif
6533
6534static void napi_hash_add(struct napi_struct *napi)
6535{
6536 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
6537 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
6538 return;
6539
6540 spin_lock(&napi_hash_lock);
6541
6542
6543 do {
6544 if (unlikely(++napi_gen_id < MIN_NAPI_ID))
6545 napi_gen_id = MIN_NAPI_ID;
6546 } while (napi_by_id(napi_gen_id));
6547 napi->napi_id = napi_gen_id;
6548
6549 hlist_add_head_rcu(&napi->napi_hash_node,
6550 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
6551
6552 spin_unlock(&napi_hash_lock);
6553}
6554
6555
6556
6557
6558bool napi_hash_del(struct napi_struct *napi)
6559{
6560 bool rcu_sync_needed = false;
6561
6562 spin_lock(&napi_hash_lock);
6563
6564 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
6565 rcu_sync_needed = true;
6566 hlist_del_rcu(&napi->napi_hash_node);
6567 }
6568 spin_unlock(&napi_hash_lock);
6569 return rcu_sync_needed;
6570}
6571EXPORT_SYMBOL_GPL(napi_hash_del);
6572
6573static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
6574{
6575 struct napi_struct *napi;
6576
6577 napi = container_of(timer, struct napi_struct, timer);
6578
6579
6580
6581
6582 if (!napi_disable_pending(napi) &&
6583 !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
6584 __napi_schedule_irqoff(napi);
6585
6586 return HRTIMER_NORESTART;
6587}
6588
6589static void init_gro_hash(struct napi_struct *napi)
6590{
6591 int i;
6592
6593 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6594 INIT_LIST_HEAD(&napi->gro_hash[i].list);
6595 napi->gro_hash[i].count = 0;
6596 }
6597 napi->gro_bitmask = 0;
6598}
6599
6600void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
6601 int (*poll)(struct napi_struct *, int), int weight)
6602{
6603 INIT_LIST_HEAD(&napi->poll_list);
6604 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
6605 napi->timer.function = napi_watchdog;
6606 init_gro_hash(napi);
6607 napi->skb = NULL;
6608 INIT_LIST_HEAD(&napi->rx_list);
6609 napi->rx_count = 0;
6610 napi->poll = poll;
6611 if (weight > NAPI_POLL_WEIGHT)
6612 netdev_err_once(dev, "%s() called with weight %d\n", __func__,
6613 weight);
6614 napi->weight = weight;
6615 napi->dev = dev;
6616#ifdef CONFIG_NETPOLL
6617 napi->poll_owner = -1;
6618#endif
6619 set_bit(NAPI_STATE_SCHED, &napi->state);
6620 set_bit(NAPI_STATE_NPSVC, &napi->state);
6621 list_add_rcu(&napi->dev_list, &dev->napi_list);
6622 napi_hash_add(napi);
6623}
6624EXPORT_SYMBOL(netif_napi_add);
6625
6626void napi_disable(struct napi_struct *n)
6627{
6628 might_sleep();
6629 set_bit(NAPI_STATE_DISABLE, &n->state);
6630
6631 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
6632 msleep(1);
6633 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
6634 msleep(1);
6635
6636 hrtimer_cancel(&n->timer);
6637
6638 clear_bit(NAPI_STATE_DISABLE, &n->state);
6639}
6640EXPORT_SYMBOL(napi_disable);
6641
6642static void flush_gro_hash(struct napi_struct *napi)
6643{
6644 int i;
6645
6646 for (i = 0; i < GRO_HASH_BUCKETS; i++) {
6647 struct sk_buff *skb, *n;
6648
6649 list_for_each_entry_safe(skb, n, &napi->gro_hash[i].list, list)
6650 kfree_skb(skb);
6651 napi->gro_hash[i].count = 0;
6652 }
6653}
6654
6655
6656void netif_napi_del(struct napi_struct *napi)
6657{
6658 might_sleep();
6659 if (napi_hash_del(napi))
6660 synchronize_net();
6661 list_del_init(&napi->dev_list);
6662 napi_free_frags(napi);
6663
6664 flush_gro_hash(napi);
6665 napi->gro_bitmask = 0;
6666}
6667EXPORT_SYMBOL(netif_napi_del);
6668
6669static int napi_poll(struct napi_struct *n, struct list_head *repoll)
6670{
6671 void *have;
6672 int work, weight;
6673
6674 list_del_init(&n->poll_list);
6675
6676 have = netpoll_poll_lock(n);
6677
6678 weight = n->weight;
6679
6680
6681
6682
6683
6684
6685
6686 work = 0;
6687 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
6688 work = n->poll(n, weight);
6689 trace_napi_poll(n, work, weight);
6690 }
6691
6692 if (unlikely(work > weight))
6693 pr_err_once("NAPI poll function %pS returned %d, exceeding its budget of %d.\n",
6694 n->poll, work, weight);
6695
6696 if (likely(work < weight))
6697 goto out_unlock;
6698
6699
6700
6701
6702
6703
6704 if (unlikely(napi_disable_pending(n))) {
6705 napi_complete(n);
6706 goto out_unlock;
6707 }
6708
6709 if (n->gro_bitmask) {
6710
6711
6712
6713 napi_gro_flush(n, HZ >= 1000);
6714 }
6715
6716 gro_normal_list(n);
6717
6718
6719
6720
6721 if (unlikely(!list_empty(&n->poll_list))) {
6722 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
6723 n->dev ? n->dev->name : "backlog");
6724 goto out_unlock;
6725 }
6726
6727 list_add_tail(&n->poll_list, repoll);
6728
6729out_unlock:
6730 netpoll_poll_unlock(have);
6731
6732 return work;
6733}
6734
6735static __latent_entropy void net_rx_action(struct softirq_action *h)
6736{
6737 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
6738 unsigned long time_limit = jiffies +
6739 usecs_to_jiffies(netdev_budget_usecs);
6740 int budget = netdev_budget;
6741 LIST_HEAD(list);
6742 LIST_HEAD(repoll);
6743
6744 local_irq_disable();
6745 list_splice_init(&sd->poll_list, &list);
6746 local_irq_enable();
6747
6748 for (;;) {
6749 struct napi_struct *n;
6750
6751 if (list_empty(&list)) {
6752 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
6753 goto out;
6754 break;
6755 }
6756
6757 n = list_first_entry(&list, struct napi_struct, poll_list);
6758 budget -= napi_poll(n, &repoll);
6759
6760
6761
6762
6763
6764 if (unlikely(budget <= 0 ||
6765 time_after_eq(jiffies, time_limit))) {
6766 sd->time_squeeze++;
6767 break;
6768 }
6769 }
6770
6771 local_irq_disable();
6772
6773 list_splice_tail_init(&sd->poll_list, &list);
6774 list_splice_tail(&repoll, &list);
6775 list_splice(&list, &sd->poll_list);
6776 if (!list_empty(&sd->poll_list))
6777 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
6778
6779 net_rps_action_and_irq_enable(sd);
6780out:
6781 __kfree_skb_flush();
6782}
6783
6784struct netdev_adjacent {
6785 struct net_device *dev;
6786
6787
6788 bool master;
6789
6790
6791 bool ignore;
6792
6793
6794 u16 ref_nr;
6795
6796
6797 void *private;
6798
6799 struct list_head list;
6800 struct rcu_head rcu;
6801};
6802
6803static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
6804 struct list_head *adj_list)
6805{
6806 struct netdev_adjacent *adj;
6807
6808 list_for_each_entry(adj, adj_list, list) {
6809 if (adj->dev == adj_dev)
6810 return adj;
6811 }
6812 return NULL;
6813}
6814
6815static int ____netdev_has_upper_dev(struct net_device *upper_dev,
6816 struct netdev_nested_priv *priv)
6817{
6818 struct net_device *dev = (struct net_device *)priv->data;
6819
6820 return upper_dev == dev;
6821}
6822
6823
6824
6825
6826
6827
6828
6829
6830
6831
6832bool netdev_has_upper_dev(struct net_device *dev,
6833 struct net_device *upper_dev)
6834{
6835 struct netdev_nested_priv priv = {
6836 .data = (void *)upper_dev,
6837 };
6838
6839 ASSERT_RTNL();
6840
6841 return netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6842 &priv);
6843}
6844EXPORT_SYMBOL(netdev_has_upper_dev);
6845
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856bool netdev_has_upper_dev_all_rcu(struct net_device *dev,
6857 struct net_device *upper_dev)
6858{
6859 struct netdev_nested_priv priv = {
6860 .data = (void *)upper_dev,
6861 };
6862
6863 return !!netdev_walk_all_upper_dev_rcu(dev, ____netdev_has_upper_dev,
6864 &priv);
6865}
6866EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
6867
6868
6869
6870
6871
6872
6873
6874
6875bool netdev_has_any_upper_dev(struct net_device *dev)
6876{
6877 ASSERT_RTNL();
6878
6879 return !list_empty(&dev->adj_list.upper);
6880}
6881EXPORT_SYMBOL(netdev_has_any_upper_dev);
6882
6883
6884
6885
6886
6887
6888
6889
6890struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
6891{
6892 struct netdev_adjacent *upper;
6893
6894 ASSERT_RTNL();
6895
6896 if (list_empty(&dev->adj_list.upper))
6897 return NULL;
6898
6899 upper = list_first_entry(&dev->adj_list.upper,
6900 struct netdev_adjacent, list);
6901 if (likely(upper->master))
6902 return upper->dev;
6903 return NULL;
6904}
6905EXPORT_SYMBOL(netdev_master_upper_dev_get);
6906
6907static struct net_device *__netdev_master_upper_dev_get(struct net_device *dev)
6908{
6909 struct netdev_adjacent *upper;
6910
6911 ASSERT_RTNL();
6912
6913 if (list_empty(&dev->adj_list.upper))
6914 return NULL;
6915
6916 upper = list_first_entry(&dev->adj_list.upper,
6917 struct netdev_adjacent, list);
6918 if (likely(upper->master) && !upper->ignore)
6919 return upper->dev;
6920 return NULL;
6921}
6922
6923
6924
6925
6926
6927
6928
6929
6930static bool netdev_has_any_lower_dev(struct net_device *dev)
6931{
6932 ASSERT_RTNL();
6933
6934 return !list_empty(&dev->adj_list.lower);
6935}
6936
6937void *netdev_adjacent_get_private(struct list_head *adj_list)
6938{
6939 struct netdev_adjacent *adj;
6940
6941 adj = list_entry(adj_list, struct netdev_adjacent, list);
6942
6943 return adj->private;
6944}
6945EXPORT_SYMBOL(netdev_adjacent_get_private);
6946
6947
6948
6949
6950
6951
6952
6953
6954
6955struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
6956 struct list_head **iter)
6957{
6958 struct netdev_adjacent *upper;
6959
6960 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6961
6962 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6963
6964 if (&upper->list == &dev->adj_list.upper)
6965 return NULL;
6966
6967 *iter = &upper->list;
6968
6969 return upper->dev;
6970}
6971EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
6972
6973static struct net_device *__netdev_next_upper_dev(struct net_device *dev,
6974 struct list_head **iter,
6975 bool *ignore)
6976{
6977 struct netdev_adjacent *upper;
6978
6979 upper = list_entry((*iter)->next, struct netdev_adjacent, list);
6980
6981 if (&upper->list == &dev->adj_list.upper)
6982 return NULL;
6983
6984 *iter = &upper->list;
6985 *ignore = upper->ignore;
6986
6987 return upper->dev;
6988}
6989
6990static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev,
6991 struct list_head **iter)
6992{
6993 struct netdev_adjacent *upper;
6994
6995 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
6996
6997 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
6998
6999 if (&upper->list == &dev->adj_list.upper)
7000 return NULL;
7001
7002 *iter = &upper->list;
7003
7004 return upper->dev;
7005}
7006
7007static int __netdev_walk_all_upper_dev(struct net_device *dev,
7008 int (*fn)(struct net_device *dev,
7009 struct netdev_nested_priv *priv),
7010 struct netdev_nested_priv *priv)
7011{
7012 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7013 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7014 int ret, cur = 0;
7015 bool ignore;
7016
7017 now = dev;
7018 iter = &dev->adj_list.upper;
7019
7020 while (1) {
7021 if (now != dev) {
7022 ret = fn(now, priv);
7023 if (ret)
7024 return ret;
7025 }
7026
7027 next = NULL;
7028 while (1) {
7029 udev = __netdev_next_upper_dev(now, &iter, &ignore);
7030 if (!udev)
7031 break;
7032 if (ignore)
7033 continue;
7034
7035 next = udev;
7036 niter = &udev->adj_list.upper;
7037 dev_stack[cur] = now;
7038 iter_stack[cur++] = iter;
7039 break;
7040 }
7041
7042 if (!next) {
7043 if (!cur)
7044 return 0;
7045 next = dev_stack[--cur];
7046 niter = iter_stack[cur];
7047 }
7048
7049 now = next;
7050 iter = niter;
7051 }
7052
7053 return 0;
7054}
7055
7056int netdev_walk_all_upper_dev_rcu(struct net_device *dev,
7057 int (*fn)(struct net_device *dev,
7058 struct netdev_nested_priv *priv),
7059 struct netdev_nested_priv *priv)
7060{
7061 struct net_device *udev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7062 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7063 int ret, cur = 0;
7064
7065 now = dev;
7066 iter = &dev->adj_list.upper;
7067
7068 while (1) {
7069 if (now != dev) {
7070 ret = fn(now, priv);
7071 if (ret)
7072 return ret;
7073 }
7074
7075 next = NULL;
7076 while (1) {
7077 udev = netdev_next_upper_dev_rcu(now, &iter);
7078 if (!udev)
7079 break;
7080
7081 next = udev;
7082 niter = &udev->adj_list.upper;
7083 dev_stack[cur] = now;
7084 iter_stack[cur++] = iter;
7085 break;
7086 }
7087
7088 if (!next) {
7089 if (!cur)
7090 return 0;
7091 next = dev_stack[--cur];
7092 niter = iter_stack[cur];
7093 }
7094
7095 now = next;
7096 iter = niter;
7097 }
7098
7099 return 0;
7100}
7101EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu);
7102
7103static bool __netdev_has_upper_dev(struct net_device *dev,
7104 struct net_device *upper_dev)
7105{
7106 struct netdev_nested_priv priv = {
7107 .flags = 0,
7108 .data = (void *)upper_dev,
7109 };
7110
7111 ASSERT_RTNL();
7112
7113 return __netdev_walk_all_upper_dev(dev, ____netdev_has_upper_dev,
7114 &priv);
7115}
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128void *netdev_lower_get_next_private(struct net_device *dev,
7129 struct list_head **iter)
7130{
7131 struct netdev_adjacent *lower;
7132
7133 lower = list_entry(*iter, struct netdev_adjacent, list);
7134
7135 if (&lower->list == &dev->adj_list.lower)
7136 return NULL;
7137
7138 *iter = lower->list.next;
7139
7140 return lower->private;
7141}
7142EXPORT_SYMBOL(netdev_lower_get_next_private);
7143
7144
7145
7146
7147
7148
7149
7150
7151
7152
7153
7154void *netdev_lower_get_next_private_rcu(struct net_device *dev,
7155 struct list_head **iter)
7156{
7157 struct netdev_adjacent *lower;
7158
7159 WARN_ON_ONCE(!rcu_read_lock_held());
7160
7161 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
7162
7163 if (&lower->list == &dev->adj_list.lower)
7164 return NULL;
7165
7166 *iter = &lower->list;
7167
7168 return lower->private;
7169}
7170EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
7171
7172
7173
7174
7175
7176
7177
7178
7179
7180
7181
7182
7183void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
7184{
7185 struct netdev_adjacent *lower;
7186
7187 lower = list_entry(*iter, struct netdev_adjacent, list);
7188
7189 if (&lower->list == &dev->adj_list.lower)
7190 return NULL;
7191
7192 *iter = lower->list.next;
7193
7194 return lower->dev;
7195}
7196EXPORT_SYMBOL(netdev_lower_get_next);
7197
7198static struct net_device *netdev_next_lower_dev(struct net_device *dev,
7199 struct list_head **iter)
7200{
7201 struct netdev_adjacent *lower;
7202
7203 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
7204
7205 if (&lower->list == &dev->adj_list.lower)
7206 return NULL;
7207
7208 *iter = &lower->list;
7209
7210 return lower->dev;
7211}
7212
7213static struct net_device *__netdev_next_lower_dev(struct net_device *dev,
7214 struct list_head **iter,
7215 bool *ignore)
7216{
7217 struct netdev_adjacent *lower;
7218
7219 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
7220
7221 if (&lower->list == &dev->adj_list.lower)
7222 return NULL;
7223
7224 *iter = &lower->list;
7225 *ignore = lower->ignore;
7226
7227 return lower->dev;
7228}
7229
7230int netdev_walk_all_lower_dev(struct net_device *dev,
7231 int (*fn)(struct net_device *dev,
7232 struct netdev_nested_priv *priv),
7233 struct netdev_nested_priv *priv)
7234{
7235 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7236 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7237 int ret, cur = 0;
7238
7239 now = dev;
7240 iter = &dev->adj_list.lower;
7241
7242 while (1) {
7243 if (now != dev) {
7244 ret = fn(now, priv);
7245 if (ret)
7246 return ret;
7247 }
7248
7249 next = NULL;
7250 while (1) {
7251 ldev = netdev_next_lower_dev(now, &iter);
7252 if (!ldev)
7253 break;
7254
7255 next = ldev;
7256 niter = &ldev->adj_list.lower;
7257 dev_stack[cur] = now;
7258 iter_stack[cur++] = iter;
7259 break;
7260 }
7261
7262 if (!next) {
7263 if (!cur)
7264 return 0;
7265 next = dev_stack[--cur];
7266 niter = iter_stack[cur];
7267 }
7268
7269 now = next;
7270 iter = niter;
7271 }
7272
7273 return 0;
7274}
7275EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev);
7276
7277static int __netdev_walk_all_lower_dev(struct net_device *dev,
7278 int (*fn)(struct net_device *dev,
7279 struct netdev_nested_priv *priv),
7280 struct netdev_nested_priv *priv)
7281{
7282 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7283 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7284 int ret, cur = 0;
7285 bool ignore;
7286
7287 now = dev;
7288 iter = &dev->adj_list.lower;
7289
7290 while (1) {
7291 if (now != dev) {
7292 ret = fn(now, priv);
7293 if (ret)
7294 return ret;
7295 }
7296
7297 next = NULL;
7298 while (1) {
7299 ldev = __netdev_next_lower_dev(now, &iter, &ignore);
7300 if (!ldev)
7301 break;
7302 if (ignore)
7303 continue;
7304
7305 next = ldev;
7306 niter = &ldev->adj_list.lower;
7307 dev_stack[cur] = now;
7308 iter_stack[cur++] = iter;
7309 break;
7310 }
7311
7312 if (!next) {
7313 if (!cur)
7314 return 0;
7315 next = dev_stack[--cur];
7316 niter = iter_stack[cur];
7317 }
7318
7319 now = next;
7320 iter = niter;
7321 }
7322
7323 return 0;
7324}
7325
7326struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev,
7327 struct list_head **iter)
7328{
7329 struct netdev_adjacent *lower;
7330
7331 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
7332 if (&lower->list == &dev->adj_list.lower)
7333 return NULL;
7334
7335 *iter = &lower->list;
7336
7337 return lower->dev;
7338}
7339EXPORT_SYMBOL(netdev_next_lower_dev_rcu);
7340
7341static u8 __netdev_upper_depth(struct net_device *dev)
7342{
7343 struct net_device *udev;
7344 struct list_head *iter;
7345 u8 max_depth = 0;
7346 bool ignore;
7347
7348 for (iter = &dev->adj_list.upper,
7349 udev = __netdev_next_upper_dev(dev, &iter, &ignore);
7350 udev;
7351 udev = __netdev_next_upper_dev(dev, &iter, &ignore)) {
7352 if (ignore)
7353 continue;
7354 if (max_depth < udev->upper_level)
7355 max_depth = udev->upper_level;
7356 }
7357
7358 return max_depth;
7359}
7360
7361static u8 __netdev_lower_depth(struct net_device *dev)
7362{
7363 struct net_device *ldev;
7364 struct list_head *iter;
7365 u8 max_depth = 0;
7366 bool ignore;
7367
7368 for (iter = &dev->adj_list.lower,
7369 ldev = __netdev_next_lower_dev(dev, &iter, &ignore);
7370 ldev;
7371 ldev = __netdev_next_lower_dev(dev, &iter, &ignore)) {
7372 if (ignore)
7373 continue;
7374 if (max_depth < ldev->lower_level)
7375 max_depth = ldev->lower_level;
7376 }
7377
7378 return max_depth;
7379}
7380
7381static int __netdev_update_upper_level(struct net_device *dev,
7382 struct netdev_nested_priv *__unused)
7383{
7384 dev->upper_level = __netdev_upper_depth(dev) + 1;
7385 return 0;
7386}
7387
7388static int __netdev_update_lower_level(struct net_device *dev,
7389 struct netdev_nested_priv *priv)
7390{
7391 dev->lower_level = __netdev_lower_depth(dev) + 1;
7392
7393#ifdef CONFIG_LOCKDEP
7394 if (!priv)
7395 return 0;
7396
7397 if (priv->flags & NESTED_SYNC_IMM)
7398 dev->nested_level = dev->lower_level - 1;
7399 if (priv->flags & NESTED_SYNC_TODO)
7400 net_unlink_todo(dev);
7401#endif
7402 return 0;
7403}
7404
7405int netdev_walk_all_lower_dev_rcu(struct net_device *dev,
7406 int (*fn)(struct net_device *dev,
7407 struct netdev_nested_priv *priv),
7408 struct netdev_nested_priv *priv)
7409{
7410 struct net_device *ldev, *next, *now, *dev_stack[MAX_NEST_DEV + 1];
7411 struct list_head *niter, *iter, *iter_stack[MAX_NEST_DEV + 1];
7412 int ret, cur = 0;
7413
7414 now = dev;
7415 iter = &dev->adj_list.lower;
7416
7417 while (1) {
7418 if (now != dev) {
7419 ret = fn(now, priv);
7420 if (ret)
7421 return ret;
7422 }
7423
7424 next = NULL;
7425 while (1) {
7426 ldev = netdev_next_lower_dev_rcu(now, &iter);
7427 if (!ldev)
7428 break;
7429
7430 next = ldev;
7431 niter = &ldev->adj_list.lower;
7432 dev_stack[cur] = now;
7433 iter_stack[cur++] = iter;
7434 break;
7435 }
7436
7437 if (!next) {
7438 if (!cur)
7439 return 0;
7440 next = dev_stack[--cur];
7441 niter = iter_stack[cur];
7442 }
7443
7444 now = next;
7445 iter = niter;
7446 }
7447
7448 return 0;
7449}
7450EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu);
7451
7452
7453
7454
7455
7456
7457
7458
7459
7460
7461void *netdev_lower_get_first_private_rcu(struct net_device *dev)
7462{
7463 struct netdev_adjacent *lower;
7464
7465 lower = list_first_or_null_rcu(&dev->adj_list.lower,
7466 struct netdev_adjacent, list);
7467 if (lower)
7468 return lower->private;
7469 return NULL;
7470}
7471EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
7472
7473
7474
7475
7476
7477
7478
7479
7480struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
7481{
7482 struct netdev_adjacent *upper;
7483
7484 upper = list_first_or_null_rcu(&dev->adj_list.upper,
7485 struct netdev_adjacent, list);
7486 if (upper && likely(upper->master))
7487 return upper->dev;
7488 return NULL;
7489}
7490EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
7491
7492static int netdev_adjacent_sysfs_add(struct net_device *dev,
7493 struct net_device *adj_dev,
7494 struct list_head *dev_list)
7495{
7496 char linkname[IFNAMSIZ+7];
7497
7498 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7499 "upper_%s" : "lower_%s", adj_dev->name);
7500 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
7501 linkname);
7502}
7503static void netdev_adjacent_sysfs_del(struct net_device *dev,
7504 char *name,
7505 struct list_head *dev_list)
7506{
7507 char linkname[IFNAMSIZ+7];
7508
7509 sprintf(linkname, dev_list == &dev->adj_list.upper ?
7510 "upper_%s" : "lower_%s", name);
7511 sysfs_remove_link(&(dev->dev.kobj), linkname);
7512}
7513
7514static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
7515 struct net_device *adj_dev,
7516 struct list_head *dev_list)
7517{
7518 return (dev_list == &dev->adj_list.upper ||
7519 dev_list == &dev->adj_list.lower) &&
7520 net_eq(dev_net(dev), dev_net(adj_dev));
7521}
7522
7523static int __netdev_adjacent_dev_insert(struct net_device *dev,
7524 struct net_device *adj_dev,
7525 struct list_head *dev_list,
7526 void *private, bool master)
7527{
7528 struct netdev_adjacent *adj;
7529 int ret;
7530
7531 adj = __netdev_find_adj(adj_dev, dev_list);
7532
7533 if (adj) {
7534 adj->ref_nr += 1;
7535 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n",
7536 dev->name, adj_dev->name, adj->ref_nr);
7537
7538 return 0;
7539 }
7540
7541 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
7542 if (!adj)
7543 return -ENOMEM;
7544
7545 adj->dev = adj_dev;
7546 adj->master = master;
7547 adj->ref_nr = 1;
7548 adj->private = private;
7549 adj->ignore = false;
7550 dev_hold(adj_dev);
7551
7552 pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
7553 dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
7554
7555 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
7556 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
7557 if (ret)
7558 goto free_adj;
7559 }
7560
7561
7562 if (master) {
7563 ret = sysfs_create_link(&(dev->dev.kobj),
7564 &(adj_dev->dev.kobj), "master");
7565 if (ret)
7566 goto remove_symlinks;
7567
7568 list_add_rcu(&adj->list, dev_list);
7569 } else {
7570 list_add_tail_rcu(&adj->list, dev_list);
7571 }
7572
7573 return 0;
7574
7575remove_symlinks:
7576 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7577 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7578free_adj:
7579 kfree(adj);
7580 dev_put(adj_dev);
7581
7582 return ret;
7583}
7584
7585static void __netdev_adjacent_dev_remove(struct net_device *dev,
7586 struct net_device *adj_dev,
7587 u16 ref_nr,
7588 struct list_head *dev_list)
7589{
7590 struct netdev_adjacent *adj;
7591
7592 pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n",
7593 dev->name, adj_dev->name, ref_nr);
7594
7595 adj = __netdev_find_adj(adj_dev, dev_list);
7596
7597 if (!adj) {
7598 pr_err("Adjacency does not exist for device %s from %s\n",
7599 dev->name, adj_dev->name);
7600 WARN_ON(1);
7601 return;
7602 }
7603
7604 if (adj->ref_nr > ref_nr) {
7605 pr_debug("adjacency: %s to %s ref_nr - %d = %d\n",
7606 dev->name, adj_dev->name, ref_nr,
7607 adj->ref_nr - ref_nr);
7608 adj->ref_nr -= ref_nr;
7609 return;
7610 }
7611
7612 if (adj->master)
7613 sysfs_remove_link(&(dev->dev.kobj), "master");
7614
7615 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
7616 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
7617
7618 list_del_rcu(&adj->list);
7619 pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
7620 adj_dev->name, dev->name, adj_dev->name);
7621 dev_put(adj_dev);
7622 kfree_rcu(adj, rcu);
7623}
7624
7625static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
7626 struct net_device *upper_dev,
7627 struct list_head *up_list,
7628 struct list_head *down_list,
7629 void *private, bool master)
7630{
7631 int ret;
7632
7633 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list,
7634 private, master);
7635 if (ret)
7636 return ret;
7637
7638 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list,
7639 private, false);
7640 if (ret) {
7641 __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list);
7642 return ret;
7643 }
7644
7645 return 0;
7646}
7647
7648static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
7649 struct net_device *upper_dev,
7650 u16 ref_nr,
7651 struct list_head *up_list,
7652 struct list_head *down_list)
7653{
7654 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
7655 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
7656}
7657
7658static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
7659 struct net_device *upper_dev,
7660 void *private, bool master)
7661{
7662 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
7663 &dev->adj_list.upper,
7664 &upper_dev->adj_list.lower,
7665 private, master);
7666}
7667
7668static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
7669 struct net_device *upper_dev)
7670{
7671 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
7672 &dev->adj_list.upper,
7673 &upper_dev->adj_list.lower);
7674}
7675
7676static int __netdev_upper_dev_link(struct net_device *dev,
7677 struct net_device *upper_dev, bool master,
7678 void *upper_priv, void *upper_info,
7679 struct netdev_nested_priv *priv,
7680 struct netlink_ext_ack *extack)
7681{
7682 struct netdev_notifier_changeupper_info changeupper_info = {
7683 .info = {
7684 .dev = dev,
7685 .extack = extack,
7686 },
7687 .upper_dev = upper_dev,
7688 .master = master,
7689 .linking = true,
7690 .upper_info = upper_info,
7691 };
7692 struct net_device *master_dev;
7693 int ret = 0;
7694
7695 ASSERT_RTNL();
7696
7697 if (dev == upper_dev)
7698 return -EBUSY;
7699
7700
7701 if (__netdev_has_upper_dev(upper_dev, dev))
7702 return -EBUSY;
7703
7704 if ((dev->lower_level + upper_dev->upper_level) > MAX_NEST_DEV)
7705 return -EMLINK;
7706
7707 if (!master) {
7708 if (__netdev_has_upper_dev(dev, upper_dev))
7709 return -EEXIST;
7710 } else {
7711 master_dev = __netdev_master_upper_dev_get(dev);
7712 if (master_dev)
7713 return master_dev == upper_dev ? -EEXIST : -EBUSY;
7714 }
7715
7716 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7717 &changeupper_info.info);
7718 ret = notifier_to_errno(ret);
7719 if (ret)
7720 return ret;
7721
7722 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
7723 master);
7724 if (ret)
7725 return ret;
7726
7727 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7728 &changeupper_info.info);
7729 ret = notifier_to_errno(ret);
7730 if (ret)
7731 goto rollback;
7732
7733 __netdev_update_upper_level(dev, NULL);
7734 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7735
7736 __netdev_update_lower_level(upper_dev, priv);
7737 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7738 priv);
7739
7740 return 0;
7741
7742rollback:
7743 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7744
7745 return ret;
7746}
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759int netdev_upper_dev_link(struct net_device *dev,
7760 struct net_device *upper_dev,
7761 struct netlink_ext_ack *extack)
7762{
7763 struct netdev_nested_priv priv = {
7764 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7765 .data = NULL,
7766 };
7767
7768 return __netdev_upper_dev_link(dev, upper_dev, false,
7769 NULL, NULL, &priv, extack);
7770}
7771EXPORT_SYMBOL(netdev_upper_dev_link);
7772
7773
7774
7775
7776
7777
7778
7779
7780
7781
7782
7783
7784
7785
7786
7787int netdev_master_upper_dev_link(struct net_device *dev,
7788 struct net_device *upper_dev,
7789 void *upper_priv, void *upper_info,
7790 struct netlink_ext_ack *extack)
7791{
7792 struct netdev_nested_priv priv = {
7793 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7794 .data = NULL,
7795 };
7796
7797 return __netdev_upper_dev_link(dev, upper_dev, true,
7798 upper_priv, upper_info, &priv, extack);
7799}
7800EXPORT_SYMBOL(netdev_master_upper_dev_link);
7801
7802static void __netdev_upper_dev_unlink(struct net_device *dev,
7803 struct net_device *upper_dev,
7804 struct netdev_nested_priv *priv)
7805{
7806 struct netdev_notifier_changeupper_info changeupper_info = {
7807 .info = {
7808 .dev = dev,
7809 },
7810 .upper_dev = upper_dev,
7811 .linking = false,
7812 };
7813
7814 ASSERT_RTNL();
7815
7816 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
7817
7818 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
7819 &changeupper_info.info);
7820
7821 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
7822
7823 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
7824 &changeupper_info.info);
7825
7826 __netdev_update_upper_level(dev, NULL);
7827 __netdev_walk_all_lower_dev(dev, __netdev_update_upper_level, NULL);
7828
7829 __netdev_update_lower_level(upper_dev, priv);
7830 __netdev_walk_all_upper_dev(upper_dev, __netdev_update_lower_level,
7831 priv);
7832}
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842void netdev_upper_dev_unlink(struct net_device *dev,
7843 struct net_device *upper_dev)
7844{
7845 struct netdev_nested_priv priv = {
7846 .flags = NESTED_SYNC_TODO,
7847 .data = NULL,
7848 };
7849
7850 __netdev_upper_dev_unlink(dev, upper_dev, &priv);
7851}
7852EXPORT_SYMBOL(netdev_upper_dev_unlink);
7853
7854static void __netdev_adjacent_dev_set(struct net_device *upper_dev,
7855 struct net_device *lower_dev,
7856 bool val)
7857{
7858 struct netdev_adjacent *adj;
7859
7860 adj = __netdev_find_adj(lower_dev, &upper_dev->adj_list.lower);
7861 if (adj)
7862 adj->ignore = val;
7863
7864 adj = __netdev_find_adj(upper_dev, &lower_dev->adj_list.upper);
7865 if (adj)
7866 adj->ignore = val;
7867}
7868
7869static void netdev_adjacent_dev_disable(struct net_device *upper_dev,
7870 struct net_device *lower_dev)
7871{
7872 __netdev_adjacent_dev_set(upper_dev, lower_dev, true);
7873}
7874
7875static void netdev_adjacent_dev_enable(struct net_device *upper_dev,
7876 struct net_device *lower_dev)
7877{
7878 __netdev_adjacent_dev_set(upper_dev, lower_dev, false);
7879}
7880
7881int netdev_adjacent_change_prepare(struct net_device *old_dev,
7882 struct net_device *new_dev,
7883 struct net_device *dev,
7884 struct netlink_ext_ack *extack)
7885{
7886 struct netdev_nested_priv priv = {
7887 .flags = 0,
7888 .data = NULL,
7889 };
7890 int err;
7891
7892 if (!new_dev)
7893 return 0;
7894
7895 if (old_dev && new_dev != old_dev)
7896 netdev_adjacent_dev_disable(dev, old_dev);
7897 err = __netdev_upper_dev_link(new_dev, dev, false, NULL, NULL, &priv,
7898 extack);
7899 if (err) {
7900 if (old_dev && new_dev != old_dev)
7901 netdev_adjacent_dev_enable(dev, old_dev);
7902 return err;
7903 }
7904
7905 return 0;
7906}
7907EXPORT_SYMBOL(netdev_adjacent_change_prepare);
7908
7909void netdev_adjacent_change_commit(struct net_device *old_dev,
7910 struct net_device *new_dev,
7911 struct net_device *dev)
7912{
7913 struct netdev_nested_priv priv = {
7914 .flags = NESTED_SYNC_IMM | NESTED_SYNC_TODO,
7915 .data = NULL,
7916 };
7917
7918 if (!new_dev || !old_dev)
7919 return;
7920
7921 if (new_dev == old_dev)
7922 return;
7923
7924 netdev_adjacent_dev_enable(dev, old_dev);
7925 __netdev_upper_dev_unlink(old_dev, dev, &priv);
7926}
7927EXPORT_SYMBOL(netdev_adjacent_change_commit);
7928
7929void netdev_adjacent_change_abort(struct net_device *old_dev,
7930 struct net_device *new_dev,
7931 struct net_device *dev)
7932{
7933 struct netdev_nested_priv priv = {
7934 .flags = 0,
7935 .data = NULL,
7936 };
7937
7938 if (!new_dev)
7939 return;
7940
7941 if (old_dev && new_dev != old_dev)
7942 netdev_adjacent_dev_enable(dev, old_dev);
7943
7944 __netdev_upper_dev_unlink(new_dev, dev, &priv);
7945}
7946EXPORT_SYMBOL(netdev_adjacent_change_abort);
7947
7948
7949
7950
7951
7952
7953
7954
7955
7956void netdev_bonding_info_change(struct net_device *dev,
7957 struct netdev_bonding_info *bonding_info)
7958{
7959 struct netdev_notifier_bonding_info info = {
7960 .info.dev = dev,
7961 };
7962
7963 memcpy(&info.bonding_info, bonding_info,
7964 sizeof(struct netdev_bonding_info));
7965 call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
7966 &info.info);
7967}
7968EXPORT_SYMBOL(netdev_bonding_info_change);
7969
7970
7971
7972
7973
7974
7975
7976
7977
7978
7979
7980
7981struct net_device *netdev_get_xmit_slave(struct net_device *dev,
7982 struct sk_buff *skb,
7983 bool all_slaves)
7984{
7985 const struct net_device_ops *ops = dev->netdev_ops;
7986
7987 if (!ops->ndo_get_xmit_slave)
7988 return NULL;
7989 return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
7990}
7991EXPORT_SYMBOL(netdev_get_xmit_slave);
7992
7993static void netdev_adjacent_add_links(struct net_device *dev)
7994{
7995 struct netdev_adjacent *iter;
7996
7997 struct net *net = dev_net(dev);
7998
7999 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8000 if (!net_eq(net, dev_net(iter->dev)))
8001 continue;
8002 netdev_adjacent_sysfs_add(iter->dev, dev,
8003 &iter->dev->adj_list.lower);
8004 netdev_adjacent_sysfs_add(dev, iter->dev,
8005 &dev->adj_list.upper);
8006 }
8007
8008 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8009 if (!net_eq(net, dev_net(iter->dev)))
8010 continue;
8011 netdev_adjacent_sysfs_add(iter->dev, dev,
8012 &iter->dev->adj_list.upper);
8013 netdev_adjacent_sysfs_add(dev, iter->dev,
8014 &dev->adj_list.lower);
8015 }
8016}
8017
8018static void netdev_adjacent_del_links(struct net_device *dev)
8019{
8020 struct netdev_adjacent *iter;
8021
8022 struct net *net = dev_net(dev);
8023
8024 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8025 if (!net_eq(net, dev_net(iter->dev)))
8026 continue;
8027 netdev_adjacent_sysfs_del(iter->dev, dev->name,
8028 &iter->dev->adj_list.lower);
8029 netdev_adjacent_sysfs_del(dev, iter->dev->name,
8030 &dev->adj_list.upper);
8031 }
8032
8033 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8034 if (!net_eq(net, dev_net(iter->dev)))
8035 continue;
8036 netdev_adjacent_sysfs_del(iter->dev, dev->name,
8037 &iter->dev->adj_list.upper);
8038 netdev_adjacent_sysfs_del(dev, iter->dev->name,
8039 &dev->adj_list.lower);
8040 }
8041}
8042
8043void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
8044{
8045 struct netdev_adjacent *iter;
8046
8047 struct net *net = dev_net(dev);
8048
8049 list_for_each_entry(iter, &dev->adj_list.upper, list) {
8050 if (!net_eq(net, dev_net(iter->dev)))
8051 continue;
8052 netdev_adjacent_sysfs_del(iter->dev, oldname,
8053 &iter->dev->adj_list.lower);
8054 netdev_adjacent_sysfs_add(iter->dev, dev,
8055 &iter->dev->adj_list.lower);
8056 }
8057
8058 list_for_each_entry(iter, &dev->adj_list.lower, list) {
8059 if (!net_eq(net, dev_net(iter->dev)))
8060 continue;
8061 netdev_adjacent_sysfs_del(iter->dev, oldname,
8062 &iter->dev->adj_list.upper);
8063 netdev_adjacent_sysfs_add(iter->dev, dev,
8064 &iter->dev->adj_list.upper);
8065 }
8066}
8067
8068void *netdev_lower_dev_get_private(struct net_device *dev,
8069 struct net_device *lower_dev)
8070{
8071 struct netdev_adjacent *lower;
8072
8073 if (!lower_dev)
8074 return NULL;
8075 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
8076 if (!lower)
8077 return NULL;
8078
8079 return lower->private;
8080}
8081EXPORT_SYMBOL(netdev_lower_dev_get_private);
8082
8083
8084
8085
8086
8087
8088
8089
8090
8091
8092void netdev_lower_state_changed(struct net_device *lower_dev,
8093 void *lower_state_info)
8094{
8095 struct netdev_notifier_changelowerstate_info changelowerstate_info = {
8096 .info.dev = lower_dev,
8097 };
8098
8099 ASSERT_RTNL();
8100 changelowerstate_info.lower_state_info = lower_state_info;
8101 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
8102 &changelowerstate_info.info);
8103}
8104EXPORT_SYMBOL(netdev_lower_state_changed);
8105
8106static void dev_change_rx_flags(struct net_device *dev, int flags)
8107{
8108 const struct net_device_ops *ops = dev->netdev_ops;
8109
8110 if (ops->ndo_change_rx_flags)
8111 ops->ndo_change_rx_flags(dev, flags);
8112}
8113
8114static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
8115{
8116 unsigned int old_flags = dev->flags;
8117 kuid_t uid;
8118 kgid_t gid;
8119
8120 ASSERT_RTNL();
8121
8122 dev->flags |= IFF_PROMISC;
8123 dev->promiscuity += inc;
8124 if (dev->promiscuity == 0) {
8125
8126
8127
8128
8129 if (inc < 0)
8130 dev->flags &= ~IFF_PROMISC;
8131 else {
8132 dev->promiscuity -= inc;
8133 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
8134 dev->name);
8135 return -EOVERFLOW;
8136 }
8137 }
8138 if (dev->flags != old_flags) {
8139 pr_info("device %s %s promiscuous mode\n",
8140 dev->name,
8141 dev->flags & IFF_PROMISC ? "entered" : "left");
8142 if (audit_enabled) {
8143 current_uid_gid(&uid, &gid);
8144 audit_log(audit_context(), GFP_ATOMIC,
8145 AUDIT_ANOM_PROMISCUOUS,
8146 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
8147 dev->name, (dev->flags & IFF_PROMISC),
8148 (old_flags & IFF_PROMISC),
8149 from_kuid(&init_user_ns, audit_get_loginuid(current)),
8150 from_kuid(&init_user_ns, uid),
8151 from_kgid(&init_user_ns, gid),
8152 audit_get_sessionid(current));
8153 }
8154
8155 dev_change_rx_flags(dev, IFF_PROMISC);
8156 }
8157 if (notify)
8158 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
8159 return 0;
8160}
8161
8162
8163
8164
8165
8166
8167
8168
8169
8170
8171
8172
8173int dev_set_promiscuity(struct net_device *dev, int inc)
8174{
8175 unsigned int old_flags = dev->flags;
8176 int err;
8177
8178 err = __dev_set_promiscuity(dev, inc, true);
8179 if (err < 0)
8180 return err;
8181 if (dev->flags != old_flags)
8182 dev_set_rx_mode(dev);
8183 return err;
8184}
8185EXPORT_SYMBOL(dev_set_promiscuity);
8186
8187static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
8188{
8189 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
8190
8191 ASSERT_RTNL();
8192
8193 dev->flags |= IFF_ALLMULTI;
8194 dev->allmulti += inc;
8195 if (dev->allmulti == 0) {
8196
8197
8198
8199
8200 if (inc < 0)
8201 dev->flags &= ~IFF_ALLMULTI;
8202 else {
8203 dev->allmulti -= inc;
8204 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
8205 dev->name);
8206 return -EOVERFLOW;
8207 }
8208 }
8209 if (dev->flags ^ old_flags) {
8210 dev_change_rx_flags(dev, IFF_ALLMULTI);
8211 dev_set_rx_mode(dev);
8212 if (notify)
8213 __dev_notify_flags(dev, old_flags,
8214 dev->gflags ^ old_gflags);
8215 }
8216 return 0;
8217}
8218
8219
8220
8221
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232int dev_set_allmulti(struct net_device *dev, int inc)
8233{
8234 return __dev_set_allmulti(dev, inc, true);
8235}
8236EXPORT_SYMBOL(dev_set_allmulti);
8237
8238
8239
8240
8241
8242
8243
8244void __dev_set_rx_mode(struct net_device *dev)
8245{
8246 const struct net_device_ops *ops = dev->netdev_ops;
8247
8248
8249 if (!(dev->flags&IFF_UP))
8250 return;
8251
8252 if (!netif_device_present(dev))
8253 return;
8254
8255 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
8256
8257
8258
8259 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
8260 __dev_set_promiscuity(dev, 1, false);
8261 dev->uc_promisc = true;
8262 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
8263 __dev_set_promiscuity(dev, -1, false);
8264 dev->uc_promisc = false;
8265 }
8266 }
8267
8268 if (ops->ndo_set_rx_mode)
8269 ops->ndo_set_rx_mode(dev);
8270}
8271
8272void dev_set_rx_mode(struct net_device *dev)
8273{
8274 netif_addr_lock_bh(dev);
8275 __dev_set_rx_mode(dev);
8276 netif_addr_unlock_bh(dev);
8277}
8278
8279
8280
8281
8282
8283
8284
8285unsigned int dev_get_flags(const struct net_device *dev)
8286{
8287 unsigned int flags;
8288
8289 flags = (dev->flags & ~(IFF_PROMISC |
8290 IFF_ALLMULTI |
8291 IFF_RUNNING |
8292 IFF_LOWER_UP |
8293 IFF_DORMANT)) |
8294 (dev->gflags & (IFF_PROMISC |
8295 IFF_ALLMULTI));
8296
8297 if (netif_running(dev)) {
8298 if (netif_oper_up(dev))
8299 flags |= IFF_RUNNING;
8300 if (netif_carrier_ok(dev))
8301 flags |= IFF_LOWER_UP;
8302 if (netif_dormant(dev))
8303 flags |= IFF_DORMANT;
8304 }
8305
8306 return flags;
8307}
8308EXPORT_SYMBOL(dev_get_flags);
8309
8310int __dev_change_flags(struct net_device *dev, unsigned int flags,
8311 struct netlink_ext_ack *extack)
8312{
8313 unsigned int old_flags = dev->flags;
8314 int ret;
8315
8316 ASSERT_RTNL();
8317
8318
8319
8320
8321
8322 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
8323 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
8324 IFF_AUTOMEDIA)) |
8325 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
8326 IFF_ALLMULTI));
8327
8328
8329
8330
8331
8332 if ((old_flags ^ flags) & IFF_MULTICAST)
8333 dev_change_rx_flags(dev, IFF_MULTICAST);
8334
8335 dev_set_rx_mode(dev);
8336
8337
8338
8339
8340
8341
8342
8343 ret = 0;
8344 if ((old_flags ^ flags) & IFF_UP) {
8345 if (old_flags & IFF_UP)
8346 __dev_close(dev);
8347 else
8348 ret = __dev_open(dev, extack);
8349 }
8350
8351 if ((flags ^ dev->gflags) & IFF_PROMISC) {
8352 int inc = (flags & IFF_PROMISC) ? 1 : -1;
8353 unsigned int old_flags = dev->flags;
8354
8355 dev->gflags ^= IFF_PROMISC;
8356
8357 if (__dev_set_promiscuity(dev, inc, false) >= 0)
8358 if (dev->flags != old_flags)
8359 dev_set_rx_mode(dev);
8360 }
8361
8362
8363
8364
8365
8366 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
8367 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
8368
8369 dev->gflags ^= IFF_ALLMULTI;
8370 __dev_set_allmulti(dev, inc, false);
8371 }
8372
8373 return ret;
8374}
8375
8376void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
8377 unsigned int gchanges)
8378{
8379 unsigned int changes = dev->flags ^ old_flags;
8380
8381 if (gchanges)
8382 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
8383
8384 if (changes & IFF_UP) {
8385 if (dev->flags & IFF_UP)
8386 call_netdevice_notifiers(NETDEV_UP, dev);
8387 else
8388 call_netdevice_notifiers(NETDEV_DOWN, dev);
8389 }
8390
8391 if (dev->flags & IFF_UP &&
8392 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
8393 struct netdev_notifier_change_info change_info = {
8394 .info = {
8395 .dev = dev,
8396 },
8397 .flags_changed = changes,
8398 };
8399
8400 call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
8401 }
8402}
8403
8404
8405
8406
8407
8408
8409
8410
8411
8412
8413int dev_change_flags(struct net_device *dev, unsigned int flags,
8414 struct netlink_ext_ack *extack)
8415{
8416 int ret;
8417 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
8418
8419 ret = __dev_change_flags(dev, flags, extack);
8420 if (ret < 0)
8421 return ret;
8422
8423 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
8424 __dev_notify_flags(dev, old_flags, changes);
8425 return ret;
8426}
8427EXPORT_SYMBOL(dev_change_flags);
8428
8429int __dev_set_mtu(struct net_device *dev, int new_mtu)
8430{
8431 const struct net_device_ops *ops = dev->netdev_ops;
8432
8433 if (ops->ndo_change_mtu)
8434 return ops->ndo_change_mtu(dev, new_mtu);
8435
8436
8437 WRITE_ONCE(dev->mtu, new_mtu);
8438 return 0;
8439}
8440EXPORT_SYMBOL(__dev_set_mtu);
8441
8442int dev_validate_mtu(struct net_device *dev, int new_mtu,
8443 struct netlink_ext_ack *extack)
8444{
8445
8446 if (new_mtu < 0 || new_mtu < dev->min_mtu) {
8447 NL_SET_ERR_MSG(extack, "mtu less than device minimum");
8448 return -EINVAL;
8449 }
8450
8451 if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) {
8452 NL_SET_ERR_MSG(extack, "mtu greater than device maximum");
8453 return -EINVAL;
8454 }
8455 return 0;
8456}
8457
8458
8459
8460
8461
8462
8463
8464
8465
8466int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
8467 struct netlink_ext_ack *extack)
8468{
8469 int err, orig_mtu;
8470
8471 if (new_mtu == dev->mtu)
8472 return 0;
8473
8474 err = dev_validate_mtu(dev, new_mtu, extack);
8475 if (err)
8476 return err;
8477
8478 if (!netif_device_present(dev))
8479 return -ENODEV;
8480
8481 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
8482 err = notifier_to_errno(err);
8483 if (err)
8484 return err;
8485
8486 orig_mtu = dev->mtu;
8487 err = __dev_set_mtu(dev, new_mtu);
8488
8489 if (!err) {
8490 err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8491 orig_mtu);
8492 err = notifier_to_errno(err);
8493 if (err) {
8494
8495
8496
8497 __dev_set_mtu(dev, orig_mtu);
8498 call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
8499 new_mtu);
8500 }
8501 }
8502 return err;
8503}
8504
8505int dev_set_mtu(struct net_device *dev, int new_mtu)
8506{
8507 struct netlink_ext_ack extack;
8508 int err;
8509
8510 memset(&extack, 0, sizeof(extack));
8511 err = dev_set_mtu_ext(dev, new_mtu, &extack);
8512 if (err && extack._msg)
8513 net_err_ratelimited("%s: %s\n", dev->name, extack._msg);
8514 return err;
8515}
8516EXPORT_SYMBOL(dev_set_mtu);
8517
8518
8519
8520
8521
8522
8523int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
8524{
8525 unsigned int orig_len = dev->tx_queue_len;
8526 int res;
8527
8528 if (new_len != (unsigned int)new_len)
8529 return -ERANGE;
8530
8531 if (new_len != orig_len) {
8532 dev->tx_queue_len = new_len;
8533 res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
8534 res = notifier_to_errno(res);
8535 if (res)
8536 goto err_rollback;
8537 res = dev_qdisc_change_tx_queue_len(dev);
8538 if (res)
8539 goto err_rollback;
8540 }
8541
8542 return 0;
8543
8544err_rollback:
8545 netdev_err(dev, "refused to change device tx_queue_len\n");
8546 dev->tx_queue_len = orig_len;
8547 return res;
8548}
8549
8550
8551
8552
8553
8554
8555void dev_set_group(struct net_device *dev, int new_group)
8556{
8557 dev->group = new_group;
8558}
8559EXPORT_SYMBOL(dev_set_group);
8560
8561
8562
8563
8564
8565
8566
8567int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
8568 struct netlink_ext_ack *extack)
8569{
8570 struct netdev_notifier_pre_changeaddr_info info = {
8571 .info.dev = dev,
8572 .info.extack = extack,
8573 .dev_addr = addr,
8574 };
8575 int rc;
8576
8577 rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
8578 return notifier_to_errno(rc);
8579}
8580EXPORT_SYMBOL(dev_pre_changeaddr_notify);
8581
8582
8583
8584
8585
8586
8587
8588
8589
8590int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
8591 struct netlink_ext_ack *extack)
8592{
8593 const struct net_device_ops *ops = dev->netdev_ops;
8594 int err;
8595
8596 if (!ops->ndo_set_mac_address)
8597 return -EOPNOTSUPP;
8598 if (sa->sa_family != dev->type)
8599 return -EINVAL;
8600 if (!netif_device_present(dev))
8601 return -ENODEV;
8602 err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
8603 if (err)
8604 return err;
8605 err = ops->ndo_set_mac_address(dev, sa);
8606 if (err)
8607 return err;
8608 dev->addr_assign_type = NET_ADDR_SET;
8609 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
8610 add_device_randomness(dev->dev_addr, dev->addr_len);
8611 return 0;
8612}
8613EXPORT_SYMBOL(dev_set_mac_address);
8614
8615
8616
8617
8618
8619
8620
8621
8622int dev_change_carrier(struct net_device *dev, bool new_carrier)
8623{
8624 const struct net_device_ops *ops = dev->netdev_ops;
8625
8626 if (!ops->ndo_change_carrier)
8627 return -EOPNOTSUPP;
8628 if (!netif_device_present(dev))
8629 return -ENODEV;
8630 return ops->ndo_change_carrier(dev, new_carrier);
8631}
8632EXPORT_SYMBOL(dev_change_carrier);
8633
8634
8635
8636
8637
8638
8639
8640
8641int dev_get_phys_port_id(struct net_device *dev,
8642 struct netdev_phys_item_id *ppid)
8643{
8644 const struct net_device_ops *ops = dev->netdev_ops;
8645
8646 if (!ops->ndo_get_phys_port_id)
8647 return -EOPNOTSUPP;
8648 return ops->ndo_get_phys_port_id(dev, ppid);
8649}
8650EXPORT_SYMBOL(dev_get_phys_port_id);
8651
8652
8653
8654
8655
8656
8657
8658
8659
8660int dev_get_phys_port_name(struct net_device *dev,
8661 char *name, size_t len)
8662{
8663 const struct net_device_ops *ops = dev->netdev_ops;
8664 int err;
8665
8666 if (ops->ndo_get_phys_port_name) {
8667 err = ops->ndo_get_phys_port_name(dev, name, len);
8668 if (err != -EOPNOTSUPP)
8669 return err;
8670 }
8671 return devlink_compat_phys_port_name_get(dev, name, len);
8672}
8673EXPORT_SYMBOL(dev_get_phys_port_name);
8674
8675
8676
8677
8678
8679
8680
8681
8682
8683int dev_get_port_parent_id(struct net_device *dev,
8684 struct netdev_phys_item_id *ppid,
8685 bool recurse)
8686{
8687 const struct net_device_ops *ops = dev->netdev_ops;
8688 struct netdev_phys_item_id first = { };
8689 struct net_device *lower_dev;
8690 struct list_head *iter;
8691 int err;
8692
8693 if (ops->ndo_get_port_parent_id) {
8694 err = ops->ndo_get_port_parent_id(dev, ppid);
8695 if (err != -EOPNOTSUPP)
8696 return err;
8697 }
8698
8699 err = devlink_compat_switch_id_get(dev, ppid);
8700 if (!err || err != -EOPNOTSUPP)
8701 return err;
8702
8703 if (!recurse)
8704 return -EOPNOTSUPP;
8705
8706 netdev_for_each_lower_dev(dev, lower_dev, iter) {
8707 err = dev_get_port_parent_id(lower_dev, ppid, recurse);
8708 if (err)
8709 break;
8710 if (!first.id_len)
8711 first = *ppid;
8712 else if (memcmp(&first, ppid, sizeof(*ppid)))
8713 return -EOPNOTSUPP;
8714 }
8715
8716 return err;
8717}
8718EXPORT_SYMBOL(dev_get_port_parent_id);
8719
8720
8721
8722
8723
8724
8725
8726bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
8727{
8728 struct netdev_phys_item_id a_id = { };
8729 struct netdev_phys_item_id b_id = { };
8730
8731 if (dev_get_port_parent_id(a, &a_id, true) ||
8732 dev_get_port_parent_id(b, &b_id, true))
8733 return false;
8734
8735 return netdev_phys_item_id_same(&a_id, &b_id);
8736}
8737EXPORT_SYMBOL(netdev_port_same_parent_id);
8738
8739
8740
8741
8742
8743
8744
8745
8746
8747int dev_change_proto_down(struct net_device *dev, bool proto_down)
8748{
8749 const struct net_device_ops *ops = dev->netdev_ops;
8750
8751 if (!ops->ndo_change_proto_down)
8752 return -EOPNOTSUPP;
8753 if (!netif_device_present(dev))
8754 return -ENODEV;
8755 return ops->ndo_change_proto_down(dev, proto_down);
8756}
8757EXPORT_SYMBOL(dev_change_proto_down);
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
8768{
8769 if (proto_down)
8770 netif_carrier_off(dev);
8771 else
8772 netif_carrier_on(dev);
8773 dev->proto_down = proto_down;
8774 return 0;
8775}
8776EXPORT_SYMBOL(dev_change_proto_down_generic);
8777
8778
8779
8780
8781
8782
8783
8784
8785void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
8786 u32 value)
8787{
8788 int b;
8789
8790 if (!mask) {
8791 dev->proto_down_reason = value;
8792 } else {
8793 for_each_set_bit(b, &mask, 32) {
8794 if (value & (1 << b))
8795 dev->proto_down_reason |= BIT(b);
8796 else
8797 dev->proto_down_reason &= ~BIT(b);
8798 }
8799 }
8800}
8801EXPORT_SYMBOL(dev_change_proto_down_reason);
8802
8803struct bpf_xdp_link {
8804 struct bpf_link link;
8805 struct net_device *dev;
8806 int flags;
8807};
8808
8809static enum bpf_xdp_mode dev_xdp_mode(struct net_device *dev, u32 flags)
8810{
8811 if (flags & XDP_FLAGS_HW_MODE)
8812 return XDP_MODE_HW;
8813 if (flags & XDP_FLAGS_DRV_MODE)
8814 return XDP_MODE_DRV;
8815 if (flags & XDP_FLAGS_SKB_MODE)
8816 return XDP_MODE_SKB;
8817 return dev->netdev_ops->ndo_bpf ? XDP_MODE_DRV : XDP_MODE_SKB;
8818}
8819
8820static bpf_op_t dev_xdp_bpf_op(struct net_device *dev, enum bpf_xdp_mode mode)
8821{
8822 switch (mode) {
8823 case XDP_MODE_SKB:
8824 return generic_xdp_install;
8825 case XDP_MODE_DRV:
8826 case XDP_MODE_HW:
8827 return dev->netdev_ops->ndo_bpf;
8828 default:
8829 return NULL;
8830 };
8831}
8832
8833static struct bpf_xdp_link *dev_xdp_link(struct net_device *dev,
8834 enum bpf_xdp_mode mode)
8835{
8836 return dev->xdp_state[mode].link;
8837}
8838
8839static struct bpf_prog *dev_xdp_prog(struct net_device *dev,
8840 enum bpf_xdp_mode mode)
8841{
8842 struct bpf_xdp_link *link = dev_xdp_link(dev, mode);
8843
8844 if (link)
8845 return link->link.prog;
8846 return dev->xdp_state[mode].prog;
8847}
8848
8849u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode)
8850{
8851 struct bpf_prog *prog = dev_xdp_prog(dev, mode);
8852
8853 return prog ? prog->aux->id : 0;
8854}
8855
8856static void dev_xdp_set_link(struct net_device *dev, enum bpf_xdp_mode mode,
8857 struct bpf_xdp_link *link)
8858{
8859 dev->xdp_state[mode].link = link;
8860 dev->xdp_state[mode].prog = NULL;
8861}
8862
8863static void dev_xdp_set_prog(struct net_device *dev, enum bpf_xdp_mode mode,
8864 struct bpf_prog *prog)
8865{
8866 dev->xdp_state[mode].link = NULL;
8867 dev->xdp_state[mode].prog = prog;
8868}
8869
8870static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode,
8871 bpf_op_t bpf_op, struct netlink_ext_ack *extack,
8872 u32 flags, struct bpf_prog *prog)
8873{
8874 struct netdev_bpf xdp;
8875 int err;
8876
8877 memset(&xdp, 0, sizeof(xdp));
8878 xdp.command = mode == XDP_MODE_HW ? XDP_SETUP_PROG_HW : XDP_SETUP_PROG;
8879 xdp.extack = extack;
8880 xdp.flags = flags;
8881 xdp.prog = prog;
8882
8883
8884
8885
8886
8887
8888
8889 if (prog)
8890 bpf_prog_inc(prog);
8891 err = bpf_op(dev, &xdp);
8892 if (err) {
8893 if (prog)
8894 bpf_prog_put(prog);
8895 return err;
8896 }
8897
8898 if (mode != XDP_MODE_HW)
8899 bpf_prog_change_xdp(dev_xdp_prog(dev, mode), prog);
8900
8901 return 0;
8902}
8903
8904static void dev_xdp_uninstall(struct net_device *dev)
8905{
8906 struct bpf_xdp_link *link;
8907 struct bpf_prog *prog;
8908 enum bpf_xdp_mode mode;
8909 bpf_op_t bpf_op;
8910
8911 ASSERT_RTNL();
8912
8913 for (mode = XDP_MODE_SKB; mode < __MAX_XDP_MODE; mode++) {
8914 prog = dev_xdp_prog(dev, mode);
8915 if (!prog)
8916 continue;
8917
8918 bpf_op = dev_xdp_bpf_op(dev, mode);
8919 if (!bpf_op)
8920 continue;
8921
8922 WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
8923
8924
8925 link = dev_xdp_link(dev, mode);
8926 if (link)
8927 link->dev = NULL;
8928 else
8929 bpf_prog_put(prog);
8930
8931 dev_xdp_set_link(dev, mode, NULL);
8932 }
8933}
8934
8935static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack,
8936 struct bpf_xdp_link *link, struct bpf_prog *new_prog,
8937 struct bpf_prog *old_prog, u32 flags)
8938{
8939 struct bpf_prog *cur_prog;
8940 enum bpf_xdp_mode mode;
8941 bpf_op_t bpf_op;
8942 int err;
8943
8944 ASSERT_RTNL();
8945
8946
8947 if (link && (new_prog || old_prog))
8948 return -EINVAL;
8949
8950 if (link && (flags & ~XDP_FLAGS_MODES)) {
8951 NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment");
8952 return -EINVAL;
8953 }
8954
8955 if (hweight32(flags & XDP_FLAGS_MODES) > 1) {
8956 NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set");
8957 return -EINVAL;
8958 }
8959
8960 if (old_prog && !(flags & XDP_FLAGS_REPLACE)) {
8961 NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified");
8962 return -EINVAL;
8963 }
8964
8965 mode = dev_xdp_mode(dev, flags);
8966
8967 if (dev_xdp_link(dev, mode)) {
8968 NL_SET_ERR_MSG(extack, "Can't replace active BPF XDP link");
8969 return -EBUSY;
8970 }
8971
8972 cur_prog = dev_xdp_prog(dev, mode);
8973
8974 if (link && cur_prog) {
8975 NL_SET_ERR_MSG(extack, "Can't replace active XDP program with BPF link");
8976 return -EBUSY;
8977 }
8978 if ((flags & XDP_FLAGS_REPLACE) && cur_prog != old_prog) {
8979 NL_SET_ERR_MSG(extack, "Active program does not match expected");
8980 return -EEXIST;
8981 }
8982
8983
8984 if (link)
8985 new_prog = link->link.prog;
8986
8987 if (new_prog) {
8988 bool offload = mode == XDP_MODE_HW;
8989 enum bpf_xdp_mode other_mode = mode == XDP_MODE_SKB
8990 ? XDP_MODE_DRV : XDP_MODE_SKB;
8991
8992 if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && cur_prog) {
8993 NL_SET_ERR_MSG(extack, "XDP program already attached");
8994 return -EBUSY;
8995 }
8996 if (!offload && dev_xdp_prog(dev, other_mode)) {
8997 NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
8998 return -EEXIST;
8999 }
9000 if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
9001 NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
9002 return -EINVAL;
9003 }
9004 if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
9005 NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
9006 return -EINVAL;
9007 }
9008 if (new_prog->expected_attach_type == BPF_XDP_CPUMAP) {
9009 NL_SET_ERR_MSG(extack, "BPF_XDP_CPUMAP programs can not be attached to a device");
9010 return -EINVAL;
9011 }
9012 }
9013
9014
9015 if (new_prog != cur_prog) {
9016 bpf_op = dev_xdp_bpf_op(dev, mode);
9017 if (!bpf_op) {
9018 NL_SET_ERR_MSG(extack, "Underlying driver does not support XDP in native mode");
9019 return -EOPNOTSUPP;
9020 }
9021
9022 err = dev_xdp_install(dev, mode, bpf_op, extack, flags, new_prog);
9023 if (err)
9024 return err;
9025 }
9026
9027 if (link)
9028 dev_xdp_set_link(dev, mode, link);
9029 else
9030 dev_xdp_set_prog(dev, mode, new_prog);
9031 if (cur_prog)
9032 bpf_prog_put(cur_prog);
9033
9034 return 0;
9035}
9036
9037static int dev_xdp_attach_link(struct net_device *dev,
9038 struct netlink_ext_ack *extack,
9039 struct bpf_xdp_link *link)
9040{
9041 return dev_xdp_attach(dev, extack, link, NULL, NULL, link->flags);
9042}
9043
9044static int dev_xdp_detach_link(struct net_device *dev,
9045 struct netlink_ext_ack *extack,
9046 struct bpf_xdp_link *link)
9047{
9048 enum bpf_xdp_mode mode;
9049 bpf_op_t bpf_op;
9050
9051 ASSERT_RTNL();
9052
9053 mode = dev_xdp_mode(dev, link->flags);
9054 if (dev_xdp_link(dev, mode) != link)
9055 return -EINVAL;
9056
9057 bpf_op = dev_xdp_bpf_op(dev, mode);
9058 WARN_ON(dev_xdp_install(dev, mode, bpf_op, NULL, 0, NULL));
9059 dev_xdp_set_link(dev, mode, NULL);
9060 return 0;
9061}
9062
9063static void bpf_xdp_link_release(struct bpf_link *link)
9064{
9065 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9066
9067 rtnl_lock();
9068
9069
9070
9071
9072 if (xdp_link->dev) {
9073 WARN_ON(dev_xdp_detach_link(xdp_link->dev, NULL, xdp_link));
9074 xdp_link->dev = NULL;
9075 }
9076
9077 rtnl_unlock();
9078}
9079
9080static int bpf_xdp_link_detach(struct bpf_link *link)
9081{
9082 bpf_xdp_link_release(link);
9083 return 0;
9084}
9085
9086static void bpf_xdp_link_dealloc(struct bpf_link *link)
9087{
9088 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9089
9090 kfree(xdp_link);
9091}
9092
9093static void bpf_xdp_link_show_fdinfo(const struct bpf_link *link,
9094 struct seq_file *seq)
9095{
9096 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9097 u32 ifindex = 0;
9098
9099 rtnl_lock();
9100 if (xdp_link->dev)
9101 ifindex = xdp_link->dev->ifindex;
9102 rtnl_unlock();
9103
9104 seq_printf(seq, "ifindex:\t%u\n", ifindex);
9105}
9106
9107static int bpf_xdp_link_fill_link_info(const struct bpf_link *link,
9108 struct bpf_link_info *info)
9109{
9110 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9111 u32 ifindex = 0;
9112
9113 rtnl_lock();
9114 if (xdp_link->dev)
9115 ifindex = xdp_link->dev->ifindex;
9116 rtnl_unlock();
9117
9118 info->xdp.ifindex = ifindex;
9119 return 0;
9120}
9121
9122static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog,
9123 struct bpf_prog *old_prog)
9124{
9125 struct bpf_xdp_link *xdp_link = container_of(link, struct bpf_xdp_link, link);
9126 enum bpf_xdp_mode mode;
9127 bpf_op_t bpf_op;
9128 int err = 0;
9129
9130 rtnl_lock();
9131
9132
9133 if (!xdp_link->dev) {
9134 err = -ENOLINK;
9135 goto out_unlock;
9136 }
9137
9138 if (old_prog && link->prog != old_prog) {
9139 err = -EPERM;
9140 goto out_unlock;
9141 }
9142 old_prog = link->prog;
9143 if (old_prog == new_prog) {
9144
9145 bpf_prog_put(new_prog);
9146 goto out_unlock;
9147 }
9148
9149 mode = dev_xdp_mode(xdp_link->dev, xdp_link->flags);
9150 bpf_op = dev_xdp_bpf_op(xdp_link->dev, mode);
9151 err = dev_xdp_install(xdp_link->dev, mode, bpf_op, NULL,
9152 xdp_link->flags, new_prog);
9153 if (err)
9154 goto out_unlock;
9155
9156 old_prog = xchg(&link->prog, new_prog);
9157 bpf_prog_put(old_prog);
9158
9159out_unlock:
9160 rtnl_unlock();
9161 return err;
9162}
9163
9164static const struct bpf_link_ops bpf_xdp_link_lops = {
9165 .release = bpf_xdp_link_release,
9166 .dealloc = bpf_xdp_link_dealloc,
9167 .detach = bpf_xdp_link_detach,
9168 .show_fdinfo = bpf_xdp_link_show_fdinfo,
9169 .fill_link_info = bpf_xdp_link_fill_link_info,
9170 .update_prog = bpf_xdp_link_update,
9171};
9172
9173int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
9174{
9175 struct net *net = current->nsproxy->net_ns;
9176 struct bpf_link_primer link_primer;
9177 struct bpf_xdp_link *link;
9178 struct net_device *dev;
9179 int err, fd;
9180
9181 dev = dev_get_by_index(net, attr->link_create.target_ifindex);
9182 if (!dev)
9183 return -EINVAL;
9184
9185 link = kzalloc(sizeof(*link), GFP_USER);
9186 if (!link) {
9187 err = -ENOMEM;
9188 goto out_put_dev;
9189 }
9190
9191 bpf_link_init(&link->link, BPF_LINK_TYPE_XDP, &bpf_xdp_link_lops, prog);
9192 link->dev = dev;
9193 link->flags = attr->link_create.flags;
9194
9195 err = bpf_link_prime(&link->link, &link_primer);
9196 if (err) {
9197 kfree(link);
9198 goto out_put_dev;
9199 }
9200
9201 rtnl_lock();
9202 err = dev_xdp_attach_link(dev, NULL, link);
9203 rtnl_unlock();
9204
9205 if (err) {
9206 bpf_link_cleanup(&link_primer);
9207 goto out_put_dev;
9208 }
9209
9210 fd = bpf_link_settle(&link_primer);
9211
9212 dev_put(dev);
9213 return fd;
9214
9215out_put_dev:
9216 dev_put(dev);
9217 return err;
9218}
9219
9220
9221
9222
9223
9224
9225
9226
9227
9228
9229
9230int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
9231 int fd, int expected_fd, u32 flags)
9232{
9233 enum bpf_xdp_mode mode = dev_xdp_mode(dev, flags);
9234 struct bpf_prog *new_prog = NULL, *old_prog = NULL;
9235 int err;
9236
9237 ASSERT_RTNL();
9238
9239 if (fd >= 0) {
9240 new_prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
9241 mode != XDP_MODE_SKB);
9242 if (IS_ERR(new_prog))
9243 return PTR_ERR(new_prog);
9244 }
9245
9246 if (expected_fd >= 0) {
9247 old_prog = bpf_prog_get_type_dev(expected_fd, BPF_PROG_TYPE_XDP,
9248 mode != XDP_MODE_SKB);
9249 if (IS_ERR(old_prog)) {
9250 err = PTR_ERR(old_prog);
9251 old_prog = NULL;
9252 goto err_out;
9253 }
9254 }
9255
9256 err = dev_xdp_attach(dev, extack, NULL, new_prog, old_prog, flags);
9257
9258err_out:
9259 if (err && new_prog)
9260 bpf_prog_put(new_prog);
9261 if (old_prog)
9262 bpf_prog_put(old_prog);
9263 return err;
9264}
9265
9266
9267
9268
9269
9270
9271
9272
9273
9274static int dev_new_index(struct net *net)
9275{
9276 int ifindex = net->ifindex;
9277
9278 for (;;) {
9279 if (++ifindex <= 0)
9280 ifindex = 1;
9281 if (!__dev_get_by_index(net, ifindex))
9282 return net->ifindex = ifindex;
9283 }
9284}
9285
9286
9287static LIST_HEAD(net_todo_list);
9288DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
9289
9290static void net_set_todo(struct net_device *dev)
9291{
9292 list_add_tail(&dev->todo_list, &net_todo_list);
9293 dev_net(dev)->dev_unreg_count++;
9294}
9295
9296static void rollback_registered_many(struct list_head *head)
9297{
9298 struct net_device *dev, *tmp;
9299 LIST_HEAD(close_head);
9300
9301 BUG_ON(dev_boot_phase);
9302 ASSERT_RTNL();
9303
9304 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
9305
9306
9307
9308
9309 if (dev->reg_state == NETREG_UNINITIALIZED) {
9310 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
9311 dev->name, dev);
9312
9313 WARN_ON(1);
9314 list_del(&dev->unreg_list);
9315 continue;
9316 }
9317 dev->dismantle = true;
9318 BUG_ON(dev->reg_state != NETREG_REGISTERED);
9319 }
9320
9321
9322 list_for_each_entry(dev, head, unreg_list)
9323 list_add_tail(&dev->close_list, &close_head);
9324 dev_close_many(&close_head, true);
9325
9326 list_for_each_entry(dev, head, unreg_list) {
9327
9328 unlist_netdevice(dev);
9329
9330 dev->reg_state = NETREG_UNREGISTERING;
9331 }
9332 flush_all_backlogs();
9333
9334 synchronize_net();
9335
9336 list_for_each_entry(dev, head, unreg_list) {
9337 struct sk_buff *skb = NULL;
9338
9339
9340 dev_shutdown(dev);
9341
9342 dev_xdp_uninstall(dev);
9343
9344
9345
9346
9347 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
9348
9349 if (!dev->rtnl_link_ops ||
9350 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
9351 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
9352 GFP_KERNEL, NULL, 0);
9353
9354
9355
9356
9357 dev_uc_flush(dev);
9358 dev_mc_flush(dev);
9359
9360 netdev_name_node_alt_flush(dev);
9361 netdev_name_node_free(dev->name_node);
9362
9363 if (dev->netdev_ops->ndo_uninit)
9364 dev->netdev_ops->ndo_uninit(dev);
9365
9366 if (skb)
9367 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
9368
9369
9370 WARN_ON(netdev_has_any_upper_dev(dev));
9371 WARN_ON(netdev_has_any_lower_dev(dev));
9372
9373
9374 netdev_unregister_kobject(dev);
9375#ifdef CONFIG_XPS
9376
9377 netif_reset_xps_queues_gt(dev, 0);
9378#endif
9379 }
9380
9381 synchronize_net();
9382
9383 list_for_each_entry(dev, head, unreg_list)
9384 dev_put(dev);
9385}
9386
9387static void rollback_registered(struct net_device *dev)
9388{
9389 LIST_HEAD(single);
9390
9391 list_add(&dev->unreg_list, &single);
9392 rollback_registered_many(&single);
9393 list_del(&single);
9394}
9395
9396static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
9397 struct net_device *upper, netdev_features_t features)
9398{
9399 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9400 netdev_features_t feature;
9401 int feature_bit;
9402
9403 for_each_netdev_feature(upper_disables, feature_bit) {
9404 feature = __NETIF_F_BIT(feature_bit);
9405 if (!(upper->wanted_features & feature)
9406 && (features & feature)) {
9407 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
9408 &feature, upper->name);
9409 features &= ~feature;
9410 }
9411 }
9412
9413 return features;
9414}
9415
9416static void netdev_sync_lower_features(struct net_device *upper,
9417 struct net_device *lower, netdev_features_t features)
9418{
9419 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
9420 netdev_features_t feature;
9421 int feature_bit;
9422
9423 for_each_netdev_feature(upper_disables, feature_bit) {
9424 feature = __NETIF_F_BIT(feature_bit);
9425 if (!(features & feature) && (lower->features & feature)) {
9426 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
9427 &feature, lower->name);
9428 lower->wanted_features &= ~feature;
9429 __netdev_update_features(lower);
9430
9431 if (unlikely(lower->features & feature))
9432 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
9433 &feature, lower->name);
9434 else
9435 netdev_features_change(lower);
9436 }
9437 }
9438}
9439
9440static netdev_features_t netdev_fix_features(struct net_device *dev,
9441 netdev_features_t features)
9442{
9443
9444 if ((features & NETIF_F_HW_CSUM) &&
9445 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
9446 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
9447 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
9448 }
9449
9450
9451 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
9452 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
9453 features &= ~NETIF_F_ALL_TSO;
9454 }
9455
9456 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
9457 !(features & NETIF_F_IP_CSUM)) {
9458 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
9459 features &= ~NETIF_F_TSO;
9460 features &= ~NETIF_F_TSO_ECN;
9461 }
9462
9463 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
9464 !(features & NETIF_F_IPV6_CSUM)) {
9465 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
9466 features &= ~NETIF_F_TSO6;
9467 }
9468
9469
9470 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
9471 features &= ~NETIF_F_TSO_MANGLEID;
9472
9473
9474 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
9475 features &= ~NETIF_F_TSO_ECN;
9476
9477
9478 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
9479 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
9480 features &= ~NETIF_F_GSO;
9481 }
9482
9483
9484 if ((features & dev->gso_partial_features) &&
9485 !(features & NETIF_F_GSO_PARTIAL)) {
9486 netdev_dbg(dev,
9487 "Dropping partially supported GSO features since no GSO partial.\n");
9488 features &= ~dev->gso_partial_features;
9489 }
9490
9491 if (!(features & NETIF_F_RXCSUM)) {
9492
9493
9494
9495
9496
9497 if (features & NETIF_F_GRO_HW) {
9498 netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
9499 features &= ~NETIF_F_GRO_HW;
9500 }
9501 }
9502
9503
9504 if (features & NETIF_F_RXFCS) {
9505 if (features & NETIF_F_LRO) {
9506 netdev_dbg(dev, "Dropping LRO feature since RX-FCS is requested.\n");
9507 features &= ~NETIF_F_LRO;
9508 }
9509
9510 if (features & NETIF_F_GRO_HW) {
9511 netdev_dbg(dev, "Dropping HW-GRO feature since RX-FCS is requested.\n");
9512 features &= ~NETIF_F_GRO_HW;
9513 }
9514 }
9515
9516 return features;
9517}
9518
9519int __netdev_update_features(struct net_device *dev)
9520{
9521 struct net_device *upper, *lower;
9522 netdev_features_t features;
9523 struct list_head *iter;
9524 int err = -1;
9525
9526 ASSERT_RTNL();
9527
9528 features = netdev_get_wanted_features(dev);
9529
9530 if (dev->netdev_ops->ndo_fix_features)
9531 features = dev->netdev_ops->ndo_fix_features(dev, features);
9532
9533
9534 features = netdev_fix_features(dev, features);
9535
9536
9537 netdev_for_each_upper_dev_rcu(dev, upper, iter)
9538 features = netdev_sync_upper_features(dev, upper, features);
9539
9540 if (dev->features == features)
9541 goto sync_lower;
9542
9543 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
9544 &dev->features, &features);
9545
9546 if (dev->netdev_ops->ndo_set_features)
9547 err = dev->netdev_ops->ndo_set_features(dev, features);
9548 else
9549 err = 0;
9550
9551 if (unlikely(err < 0)) {
9552 netdev_err(dev,
9553 "set_features() failed (%d); wanted %pNF, left %pNF\n",
9554 err, &features, &dev->features);
9555
9556
9557
9558 return -1;
9559 }
9560
9561sync_lower:
9562
9563
9564
9565 netdev_for_each_lower_dev(dev, lower, iter)
9566 netdev_sync_lower_features(dev, lower, features);
9567
9568 if (!err) {
9569 netdev_features_t diff = features ^ dev->features;
9570
9571 if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
9572
9573
9574
9575
9576
9577
9578
9579 if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
9580 dev->features = features;
9581 udp_tunnel_get_rx_info(dev);
9582 } else {
9583 udp_tunnel_drop_rx_info(dev);
9584 }
9585 }
9586
9587 if (diff & NETIF_F_HW_VLAN_CTAG_FILTER) {
9588 if (features & NETIF_F_HW_VLAN_CTAG_FILTER) {
9589 dev->features = features;
9590 err |= vlan_get_rx_ctag_filter_info(dev);
9591 } else {
9592 vlan_drop_rx_ctag_filter_info(dev);
9593 }
9594 }
9595
9596 if (diff & NETIF_F_HW_VLAN_STAG_FILTER) {
9597 if (features & NETIF_F_HW_VLAN_STAG_FILTER) {
9598 dev->features = features;
9599 err |= vlan_get_rx_stag_filter_info(dev);
9600 } else {
9601 vlan_drop_rx_stag_filter_info(dev);
9602 }
9603 }
9604
9605 dev->features = features;
9606 }
9607
9608 return err < 0 ? 0 : 1;
9609}
9610
9611
9612
9613
9614
9615
9616
9617
9618
9619void netdev_update_features(struct net_device *dev)
9620{
9621 if (__netdev_update_features(dev))
9622 netdev_features_change(dev);
9623}
9624EXPORT_SYMBOL(netdev_update_features);
9625
9626
9627
9628
9629
9630
9631
9632
9633
9634
9635
9636void netdev_change_features(struct net_device *dev)
9637{
9638 __netdev_update_features(dev);
9639 netdev_features_change(dev);
9640}
9641EXPORT_SYMBOL(netdev_change_features);
9642
9643
9644
9645
9646
9647
9648
9649
9650
9651
9652void netif_stacked_transfer_operstate(const struct net_device *rootdev,
9653 struct net_device *dev)
9654{
9655 if (rootdev->operstate == IF_OPER_DORMANT)
9656 netif_dormant_on(dev);
9657 else
9658 netif_dormant_off(dev);
9659
9660 if (rootdev->operstate == IF_OPER_TESTING)
9661 netif_testing_on(dev);
9662 else
9663 netif_testing_off(dev);
9664
9665 if (netif_carrier_ok(rootdev))
9666 netif_carrier_on(dev);
9667 else
9668 netif_carrier_off(dev);
9669}
9670EXPORT_SYMBOL(netif_stacked_transfer_operstate);
9671
9672static int netif_alloc_rx_queues(struct net_device *dev)
9673{
9674 unsigned int i, count = dev->num_rx_queues;
9675 struct netdev_rx_queue *rx;
9676 size_t sz = count * sizeof(*rx);
9677 int err = 0;
9678
9679 BUG_ON(count < 1);
9680
9681 rx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
9682 if (!rx)
9683 return -ENOMEM;
9684
9685 dev->_rx = rx;
9686
9687 for (i = 0; i < count; i++) {
9688 rx[i].dev = dev;
9689
9690
9691 err = xdp_rxq_info_reg(&rx[i].xdp_rxq, dev, i);
9692 if (err < 0)
9693 goto err_rxq_info;
9694 }
9695 return 0;
9696
9697err_rxq_info:
9698
9699 while (i--)
9700 xdp_rxq_info_unreg(&rx[i].xdp_rxq);
9701 kvfree(dev->_rx);
9702 dev->_rx = NULL;
9703 return err;
9704}
9705
9706static void netif_free_rx_queues(struct net_device *dev)
9707{
9708 unsigned int i, count = dev->num_rx_queues;
9709
9710
9711 if (!dev->_rx)
9712 return;
9713
9714 for (i = 0; i < count; i++)
9715 xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq);
9716
9717 kvfree(dev->_rx);
9718}
9719
9720static void netdev_init_one_queue(struct net_device *dev,
9721 struct netdev_queue *queue, void *_unused)
9722{
9723
9724 spin_lock_init(&queue->_xmit_lock);
9725 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
9726 queue->xmit_lock_owner = -1;
9727 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
9728 queue->dev = dev;
9729#ifdef CONFIG_BQL
9730 dql_init(&queue->dql, HZ);
9731#endif
9732}
9733
9734static void netif_free_tx_queues(struct net_device *dev)
9735{
9736 kvfree(dev->_tx);
9737}
9738
9739static int netif_alloc_netdev_queues(struct net_device *dev)
9740{
9741 unsigned int count = dev->num_tx_queues;
9742 struct netdev_queue *tx;
9743 size_t sz = count * sizeof(*tx);
9744
9745 if (count < 1 || count > 0xffff)
9746 return -EINVAL;
9747
9748 tx = kvzalloc(sz, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
9749 if (!tx)
9750 return -ENOMEM;
9751
9752 dev->_tx = tx;
9753
9754 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
9755 spin_lock_init(&dev->tx_global_lock);
9756
9757 return 0;
9758}
9759
9760void netif_tx_stop_all_queues(struct net_device *dev)
9761{
9762 unsigned int i;
9763
9764 for (i = 0; i < dev->num_tx_queues; i++) {
9765 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
9766
9767 netif_tx_stop_queue(txq);
9768 }
9769}
9770EXPORT_SYMBOL(netif_tx_stop_all_queues);
9771
9772
9773
9774
9775
9776
9777
9778
9779
9780
9781
9782
9783
9784
9785
9786
9787
9788
9789int register_netdevice(struct net_device *dev)
9790{
9791 int ret;
9792 struct net *net = dev_net(dev);
9793
9794 BUILD_BUG_ON(sizeof(netdev_features_t) * BITS_PER_BYTE <
9795 NETDEV_FEATURE_COUNT);
9796 BUG_ON(dev_boot_phase);
9797 ASSERT_RTNL();
9798
9799 might_sleep();
9800
9801
9802 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
9803 BUG_ON(!net);
9804
9805 ret = ethtool_check_ops(dev->ethtool_ops);
9806 if (ret)
9807 return ret;
9808
9809 spin_lock_init(&dev->addr_list_lock);
9810 netdev_set_addr_lockdep_class(dev);
9811
9812 ret = dev_get_valid_name(net, dev, dev->name);
9813 if (ret < 0)
9814 goto out;
9815
9816 ret = -ENOMEM;
9817 dev->name_node = netdev_name_node_head_alloc(dev);
9818 if (!dev->name_node)
9819 goto out;
9820
9821
9822 if (dev->netdev_ops->ndo_init) {
9823 ret = dev->netdev_ops->ndo_init(dev);
9824 if (ret) {
9825 if (ret > 0)
9826 ret = -EIO;
9827 goto err_free_name;
9828 }
9829 }
9830
9831 if (((dev->hw_features | dev->features) &
9832 NETIF_F_HW_VLAN_CTAG_FILTER) &&
9833 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
9834 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
9835 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
9836 ret = -EINVAL;
9837 goto err_uninit;
9838 }
9839
9840 ret = -EBUSY;
9841 if (!dev->ifindex)
9842 dev->ifindex = dev_new_index(net);
9843 else if (__dev_get_by_index(net, dev->ifindex))
9844 goto err_uninit;
9845
9846
9847
9848
9849 dev->hw_features |= (NETIF_F_SOFT_FEATURES | NETIF_F_SOFT_FEATURES_OFF);
9850 dev->features |= NETIF_F_SOFT_FEATURES;
9851
9852 if (dev->netdev_ops->ndo_udp_tunnel_add) {
9853 dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
9854 dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
9855 }
9856
9857 dev->wanted_features = dev->features & dev->hw_features;
9858
9859 if (!(dev->flags & IFF_LOOPBACK))
9860 dev->hw_features |= NETIF_F_NOCACHE_COPY;
9861
9862
9863
9864
9865
9866
9867 if (dev->hw_features & NETIF_F_TSO)
9868 dev->hw_features |= NETIF_F_TSO_MANGLEID;
9869 if (dev->vlan_features & NETIF_F_TSO)
9870 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
9871 if (dev->mpls_features & NETIF_F_TSO)
9872 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
9873 if (dev->hw_enc_features & NETIF_F_TSO)
9874 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
9875
9876
9877
9878 dev->vlan_features |= NETIF_F_HIGHDMA;
9879
9880
9881
9882 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
9883
9884
9885
9886 dev->mpls_features |= NETIF_F_SG;
9887
9888 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
9889 ret = notifier_to_errno(ret);
9890 if (ret)
9891 goto err_uninit;
9892
9893 ret = netdev_register_kobject(dev);
9894 if (ret) {
9895 dev->reg_state = NETREG_UNREGISTERED;
9896 goto err_uninit;
9897 }
9898 dev->reg_state = NETREG_REGISTERED;
9899
9900 __netdev_update_features(dev);
9901
9902
9903
9904
9905
9906
9907 set_bit(__LINK_STATE_PRESENT, &dev->state);
9908
9909 linkwatch_init_dev(dev);
9910
9911 dev_init_scheduler(dev);
9912 dev_hold(dev);
9913 list_netdevice(dev);
9914 add_device_randomness(dev->dev_addr, dev->addr_len);
9915
9916
9917
9918
9919
9920 if (dev->addr_assign_type == NET_ADDR_PERM)
9921 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
9922
9923
9924 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
9925 ret = notifier_to_errno(ret);
9926 if (ret) {
9927 rollback_registered(dev);
9928 rcu_barrier();
9929
9930 dev->reg_state = NETREG_UNREGISTERED;
9931
9932
9933
9934
9935
9936
9937 kobject_put(&dev->dev.kobj);
9938 }
9939
9940
9941
9942
9943 if (!dev->rtnl_link_ops ||
9944 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
9945 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
9946
9947out:
9948 return ret;
9949
9950err_uninit:
9951 if (dev->netdev_ops->ndo_uninit)
9952 dev->netdev_ops->ndo_uninit(dev);
9953 if (dev->priv_destructor)
9954 dev->priv_destructor(dev);
9955err_free_name:
9956 netdev_name_node_free(dev->name_node);
9957 goto out;
9958}
9959EXPORT_SYMBOL(register_netdevice);
9960
9961
9962
9963
9964
9965
9966
9967
9968
9969
9970
9971int init_dummy_netdev(struct net_device *dev)
9972{
9973
9974
9975
9976
9977
9978 memset(dev, 0, sizeof(struct net_device));
9979
9980
9981
9982
9983 dev->reg_state = NETREG_DUMMY;
9984
9985
9986 INIT_LIST_HEAD(&dev->napi_list);
9987
9988
9989 set_bit(__LINK_STATE_PRESENT, &dev->state);
9990 set_bit(__LINK_STATE_START, &dev->state);
9991
9992
9993 dev_net_set(dev, &init_net);
9994
9995
9996
9997
9998
9999
10000 return 0;
10001}
10002EXPORT_SYMBOL_GPL(init_dummy_netdev);
10003
10004
10005
10006
10007
10008
10009
10010
10011
10012
10013
10014
10015
10016
10017
10018int register_netdev(struct net_device *dev)
10019{
10020 int err;
10021
10022 if (rtnl_lock_killable())
10023 return -EINTR;
10024 err = register_netdevice(dev);
10025 rtnl_unlock();
10026 return err;
10027}
10028EXPORT_SYMBOL(register_netdev);
10029
10030int netdev_refcnt_read(const struct net_device *dev)
10031{
10032 int i, refcnt = 0;
10033
10034 for_each_possible_cpu(i)
10035 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
10036 return refcnt;
10037}
10038EXPORT_SYMBOL(netdev_refcnt_read);
10039
10040
10041
10042
10043
10044
10045
10046
10047
10048
10049
10050
10051
10052static void netdev_wait_allrefs(struct net_device *dev)
10053{
10054 unsigned long rebroadcast_time, warning_time;
10055 int refcnt;
10056
10057 linkwatch_forget_dev(dev);
10058
10059 rebroadcast_time = warning_time = jiffies;
10060 refcnt = netdev_refcnt_read(dev);
10061
10062 while (refcnt != 0) {
10063 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
10064 rtnl_lock();
10065
10066
10067 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
10068
10069 __rtnl_unlock();
10070 rcu_barrier();
10071 rtnl_lock();
10072
10073 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
10074 &dev->state)) {
10075
10076
10077
10078
10079
10080
10081 linkwatch_run_queue();
10082 }
10083
10084 __rtnl_unlock();
10085
10086 rebroadcast_time = jiffies;
10087 }
10088
10089 msleep(250);
10090
10091 refcnt = netdev_refcnt_read(dev);
10092
10093 if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
10094 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
10095 dev->name, refcnt);
10096 warning_time = jiffies;
10097 }
10098 }
10099}
10100
10101
10102
10103
10104
10105
10106
10107
10108
10109
10110
10111
10112
10113
10114
10115
10116
10117
10118
10119
10120
10121
10122
10123
10124
10125void netdev_run_todo(void)
10126{
10127 struct list_head list;
10128#ifdef CONFIG_LOCKDEP
10129 struct list_head unlink_list;
10130
10131 list_replace_init(&net_unlink_list, &unlink_list);
10132
10133 while (!list_empty(&unlink_list)) {
10134 struct net_device *dev = list_first_entry(&unlink_list,
10135 struct net_device,
10136 unlink_list);
10137 list_del(&dev->unlink_list);
10138 dev->nested_level = dev->lower_level - 1;
10139 }
10140#endif
10141
10142
10143 list_replace_init(&net_todo_list, &list);
10144
10145 __rtnl_unlock();
10146
10147
10148
10149 if (!list_empty(&list))
10150 rcu_barrier();
10151
10152 while (!list_empty(&list)) {
10153 struct net_device *dev
10154 = list_first_entry(&list, struct net_device, todo_list);
10155 list_del(&dev->todo_list);
10156
10157 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
10158 pr_err("network todo '%s' but state %d\n",
10159 dev->name, dev->reg_state);
10160 dump_stack();
10161 continue;
10162 }
10163
10164 dev->reg_state = NETREG_UNREGISTERED;
10165
10166 netdev_wait_allrefs(dev);
10167
10168
10169 BUG_ON(netdev_refcnt_read(dev));
10170 BUG_ON(!list_empty(&dev->ptype_all));
10171 BUG_ON(!list_empty(&dev->ptype_specific));
10172 WARN_ON(rcu_access_pointer(dev->ip_ptr));
10173 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
10174#if IS_ENABLED(CONFIG_DECNET)
10175 WARN_ON(dev->dn_ptr);
10176#endif
10177 if (dev->priv_destructor)
10178 dev->priv_destructor(dev);
10179 if (dev->needs_free_netdev)
10180 free_netdev(dev);
10181
10182
10183 rtnl_lock();
10184 dev_net(dev)->dev_unreg_count--;
10185 __rtnl_unlock();
10186 wake_up(&netdev_unregistering_wq);
10187
10188
10189 kobject_put(&dev->dev.kobj);
10190 }
10191}
10192
10193
10194
10195
10196
10197
10198void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
10199 const struct net_device_stats *netdev_stats)
10200{
10201#if BITS_PER_LONG == 64
10202 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
10203 memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
10204
10205 memset((char *)stats64 + sizeof(*netdev_stats), 0,
10206 sizeof(*stats64) - sizeof(*netdev_stats));
10207#else
10208 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
10209 const unsigned long *src = (const unsigned long *)netdev_stats;
10210 u64 *dst = (u64 *)stats64;
10211
10212 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
10213 for (i = 0; i < n; i++)
10214 dst[i] = src[i];
10215
10216 memset((char *)stats64 + n * sizeof(u64), 0,
10217 sizeof(*stats64) - n * sizeof(u64));
10218#endif
10219}
10220EXPORT_SYMBOL(netdev_stats_to_stats64);
10221
10222
10223
10224
10225
10226
10227
10228
10229
10230
10231
10232struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
10233 struct rtnl_link_stats64 *storage)
10234{
10235 const struct net_device_ops *ops = dev->netdev_ops;
10236
10237 if (ops->ndo_get_stats64) {
10238 memset(storage, 0, sizeof(*storage));
10239 ops->ndo_get_stats64(dev, storage);
10240 } else if (ops->ndo_get_stats) {
10241 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
10242 } else {
10243 netdev_stats_to_stats64(storage, &dev->stats);
10244 }
10245 storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
10246 storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
10247 storage->rx_nohandler += (unsigned long)atomic_long_read(&dev->rx_nohandler);
10248 return storage;
10249}
10250EXPORT_SYMBOL(dev_get_stats);
10251
10252struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
10253{
10254 struct netdev_queue *queue = dev_ingress_queue(dev);
10255
10256#ifdef CONFIG_NET_CLS_ACT
10257 if (queue)
10258 return queue;
10259 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
10260 if (!queue)
10261 return NULL;
10262 netdev_init_one_queue(dev, queue, NULL);
10263 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
10264 queue->qdisc_sleeping = &noop_qdisc;
10265 rcu_assign_pointer(dev->ingress_queue, queue);
10266#endif
10267 return queue;
10268}
10269
10270static const struct ethtool_ops default_ethtool_ops;
10271
10272void netdev_set_default_ethtool_ops(struct net_device *dev,
10273 const struct ethtool_ops *ops)
10274{
10275 if (dev->ethtool_ops == &default_ethtool_ops)
10276 dev->ethtool_ops = ops;
10277}
10278EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
10279
10280void netdev_freemem(struct net_device *dev)
10281{
10282 char *addr = (char *)dev - dev->padded;
10283
10284 kvfree(addr);
10285}
10286
10287
10288
10289
10290
10291
10292
10293
10294
10295
10296
10297
10298
10299
10300struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
10301 unsigned char name_assign_type,
10302 void (*setup)(struct net_device *),
10303 unsigned int txqs, unsigned int rxqs)
10304{
10305 struct net_device *dev;
10306 unsigned int alloc_size;
10307 struct net_device *p;
10308
10309 BUG_ON(strlen(name) >= sizeof(dev->name));
10310
10311 if (txqs < 1) {
10312 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
10313 return NULL;
10314 }
10315
10316 if (rxqs < 1) {
10317 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
10318 return NULL;
10319 }
10320
10321 alloc_size = sizeof(struct net_device);
10322 if (sizeof_priv) {
10323
10324 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
10325 alloc_size += sizeof_priv;
10326 }
10327
10328 alloc_size += NETDEV_ALIGN - 1;
10329
10330 p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
10331 if (!p)
10332 return NULL;
10333
10334 dev = PTR_ALIGN(p, NETDEV_ALIGN);
10335 dev->padded = (char *)dev - (char *)p;
10336
10337 dev->pcpu_refcnt = alloc_percpu(int);
10338 if (!dev->pcpu_refcnt)
10339 goto free_dev;
10340
10341 if (dev_addr_init(dev))
10342 goto free_pcpu;
10343
10344 dev_mc_init(dev);
10345 dev_uc_init(dev);
10346
10347 dev_net_set(dev, &init_net);
10348
10349 dev->gso_max_size = GSO_MAX_SIZE;
10350 dev->gso_max_segs = GSO_MAX_SEGS;
10351 dev->upper_level = 1;
10352 dev->lower_level = 1;
10353#ifdef CONFIG_LOCKDEP
10354 dev->nested_level = 0;
10355 INIT_LIST_HEAD(&dev->unlink_list);
10356#endif
10357
10358 INIT_LIST_HEAD(&dev->napi_list);
10359 INIT_LIST_HEAD(&dev->unreg_list);
10360 INIT_LIST_HEAD(&dev->close_list);
10361 INIT_LIST_HEAD(&dev->link_watch_list);
10362 INIT_LIST_HEAD(&dev->adj_list.upper);
10363 INIT_LIST_HEAD(&dev->adj_list.lower);
10364 INIT_LIST_HEAD(&dev->ptype_all);
10365 INIT_LIST_HEAD(&dev->ptype_specific);
10366 INIT_LIST_HEAD(&dev->net_notifier_list);
10367#ifdef CONFIG_NET_SCHED
10368 hash_init(dev->qdisc_hash);
10369#endif
10370 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
10371 setup(dev);
10372
10373 if (!dev->tx_queue_len) {
10374 dev->priv_flags |= IFF_NO_QUEUE;
10375 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
10376 }
10377
10378 dev->num_tx_queues = txqs;
10379 dev->real_num_tx_queues = txqs;
10380 if (netif_alloc_netdev_queues(dev))
10381 goto free_all;
10382
10383 dev->num_rx_queues = rxqs;
10384 dev->real_num_rx_queues = rxqs;
10385 if (netif_alloc_rx_queues(dev))
10386 goto free_all;
10387
10388 strcpy(dev->name, name);
10389 dev->name_assign_type = name_assign_type;
10390 dev->group = INIT_NETDEV_GROUP;
10391 if (!dev->ethtool_ops)
10392 dev->ethtool_ops = &default_ethtool_ops;
10393
10394 nf_hook_ingress_init(dev);
10395
10396 return dev;
10397
10398free_all:
10399 free_netdev(dev);
10400 return NULL;
10401
10402free_pcpu:
10403 free_percpu(dev->pcpu_refcnt);
10404free_dev:
10405 netdev_freemem(dev);
10406 return NULL;
10407}
10408EXPORT_SYMBOL(alloc_netdev_mqs);
10409
10410
10411
10412
10413
10414
10415
10416
10417
10418
10419void free_netdev(struct net_device *dev)
10420{
10421 struct napi_struct *p, *n;
10422
10423 might_sleep();
10424 netif_free_tx_queues(dev);
10425 netif_free_rx_queues(dev);
10426
10427 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
10428
10429
10430 dev_addr_flush(dev);
10431
10432 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
10433 netif_napi_del(p);
10434
10435 free_percpu(dev->pcpu_refcnt);
10436 dev->pcpu_refcnt = NULL;
10437 free_percpu(dev->xdp_bulkq);
10438 dev->xdp_bulkq = NULL;
10439
10440
10441 if (dev->reg_state == NETREG_UNINITIALIZED) {
10442 netdev_freemem(dev);
10443 return;
10444 }
10445
10446 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
10447 dev->reg_state = NETREG_RELEASED;
10448
10449
10450 put_device(&dev->dev);
10451}
10452EXPORT_SYMBOL(free_netdev);
10453
10454
10455
10456
10457
10458
10459
10460void synchronize_net(void)
10461{
10462 might_sleep();
10463 if (rtnl_is_locked())
10464 synchronize_rcu_expedited();
10465 else
10466 synchronize_rcu();
10467}
10468EXPORT_SYMBOL(synchronize_net);
10469
10470
10471
10472
10473
10474
10475
10476
10477
10478
10479
10480
10481
10482
10483void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
10484{
10485 ASSERT_RTNL();
10486
10487 if (head) {
10488 list_move_tail(&dev->unreg_list, head);
10489 } else {
10490 rollback_registered(dev);
10491
10492 net_set_todo(dev);
10493 }
10494}
10495EXPORT_SYMBOL(unregister_netdevice_queue);
10496
10497
10498
10499
10500
10501
10502
10503
10504void unregister_netdevice_many(struct list_head *head)
10505{
10506 struct net_device *dev;
10507
10508 if (!list_empty(head)) {
10509 rollback_registered_many(head);
10510 list_for_each_entry(dev, head, unreg_list)
10511 net_set_todo(dev);
10512 list_del(head);
10513 }
10514}
10515EXPORT_SYMBOL(unregister_netdevice_many);
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528void unregister_netdev(struct net_device *dev)
10529{
10530 rtnl_lock();
10531 unregister_netdevice(dev);
10532 rtnl_unlock();
10533}
10534EXPORT_SYMBOL(unregister_netdev);
10535
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
10551{
10552 struct net *net_old = dev_net(dev);
10553 int err, new_nsid, new_ifindex;
10554
10555 ASSERT_RTNL();
10556
10557
10558 err = -EINVAL;
10559 if (dev->features & NETIF_F_NETNS_LOCAL)
10560 goto out;
10561
10562
10563 if (dev->reg_state != NETREG_REGISTERED)
10564 goto out;
10565
10566
10567 err = 0;
10568 if (net_eq(net_old, net))
10569 goto out;
10570
10571
10572
10573
10574 err = -EEXIST;
10575 if (__dev_get_by_name(net, dev->name)) {
10576
10577 if (!pat)
10578 goto out;
10579 err = dev_get_valid_name(net, dev, pat);
10580 if (err < 0)
10581 goto out;
10582 }
10583
10584
10585
10586
10587
10588
10589 dev_close(dev);
10590
10591
10592 unlist_netdevice(dev);
10593
10594 synchronize_net();
10595
10596
10597 dev_shutdown(dev);
10598
10599
10600
10601
10602
10603
10604
10605
10606 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
10607 rcu_barrier();
10608
10609 new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
10610
10611 if (__dev_get_by_index(net, dev->ifindex))
10612 new_ifindex = dev_new_index(net);
10613 else
10614 new_ifindex = dev->ifindex;
10615
10616 rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
10617 new_ifindex);
10618
10619
10620
10621
10622 dev_uc_flush(dev);
10623 dev_mc_flush(dev);
10624
10625
10626 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
10627 netdev_adjacent_del_links(dev);
10628
10629
10630 move_netdevice_notifiers_dev_net(dev, net);
10631
10632
10633 dev_net_set(dev, net);
10634 dev->ifindex = new_ifindex;
10635
10636
10637 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
10638 netdev_adjacent_add_links(dev);
10639
10640
10641 err = device_rename(&dev->dev, dev->name);
10642 WARN_ON(err);
10643
10644
10645
10646
10647 err = netdev_change_owner(dev, net_old, net);
10648 WARN_ON(err);
10649
10650
10651 list_netdevice(dev);
10652
10653
10654 call_netdevice_notifiers(NETDEV_REGISTER, dev);
10655
10656
10657
10658
10659
10660 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
10661
10662 synchronize_net();
10663 err = 0;
10664out:
10665 return err;
10666}
10667EXPORT_SYMBOL_GPL(dev_change_net_namespace);
10668
10669static int dev_cpu_dead(unsigned int oldcpu)
10670{
10671 struct sk_buff **list_skb;
10672 struct sk_buff *skb;
10673 unsigned int cpu;
10674 struct softnet_data *sd, *oldsd, *remsd = NULL;
10675
10676 local_irq_disable();
10677 cpu = smp_processor_id();
10678 sd = &per_cpu(softnet_data, cpu);
10679 oldsd = &per_cpu(softnet_data, oldcpu);
10680
10681
10682 list_skb = &sd->completion_queue;
10683 while (*list_skb)
10684 list_skb = &(*list_skb)->next;
10685
10686 *list_skb = oldsd->completion_queue;
10687 oldsd->completion_queue = NULL;
10688
10689
10690 if (oldsd->output_queue) {
10691 *sd->output_queue_tailp = oldsd->output_queue;
10692 sd->output_queue_tailp = oldsd->output_queue_tailp;
10693 oldsd->output_queue = NULL;
10694 oldsd->output_queue_tailp = &oldsd->output_queue;
10695 }
10696
10697
10698
10699
10700 while (!list_empty(&oldsd->poll_list)) {
10701 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
10702 struct napi_struct,
10703 poll_list);
10704
10705 list_del_init(&napi->poll_list);
10706 if (napi->poll == process_backlog)
10707 napi->state = 0;
10708 else
10709 ____napi_schedule(sd, napi);
10710 }
10711
10712 raise_softirq_irqoff(NET_TX_SOFTIRQ);
10713 local_irq_enable();
10714
10715#ifdef CONFIG_RPS
10716 remsd = oldsd->rps_ipi_list;
10717 oldsd->rps_ipi_list = NULL;
10718#endif
10719
10720 net_rps_send_ipi(remsd);
10721
10722
10723 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
10724 netif_rx_ni(skb);
10725 input_queue_head_incr(oldsd);
10726 }
10727 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
10728 netif_rx_ni(skb);
10729 input_queue_head_incr(oldsd);
10730 }
10731
10732 return 0;
10733}
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745netdev_features_t netdev_increment_features(netdev_features_t all,
10746 netdev_features_t one, netdev_features_t mask)
10747{
10748 if (mask & NETIF_F_HW_CSUM)
10749 mask |= NETIF_F_CSUM_MASK;
10750 mask |= NETIF_F_VLAN_CHALLENGED;
10751
10752 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
10753 all &= one | ~NETIF_F_ALL_FOR_ALL;
10754
10755
10756 if (all & NETIF_F_HW_CSUM)
10757 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
10758
10759 return all;
10760}
10761EXPORT_SYMBOL(netdev_increment_features);
10762
10763static struct hlist_head * __net_init netdev_create_hash(void)
10764{
10765 int i;
10766 struct hlist_head *hash;
10767
10768 hash = kmalloc_array(NETDEV_HASHENTRIES, sizeof(*hash), GFP_KERNEL);
10769 if (hash != NULL)
10770 for (i = 0; i < NETDEV_HASHENTRIES; i++)
10771 INIT_HLIST_HEAD(&hash[i]);
10772
10773 return hash;
10774}
10775
10776
10777static int __net_init netdev_init(struct net *net)
10778{
10779 BUILD_BUG_ON(GRO_HASH_BUCKETS >
10780 8 * sizeof_field(struct napi_struct, gro_bitmask));
10781
10782 if (net != &init_net)
10783 INIT_LIST_HEAD(&net->dev_base_head);
10784
10785 net->dev_name_head = netdev_create_hash();
10786 if (net->dev_name_head == NULL)
10787 goto err_name;
10788
10789 net->dev_index_head = netdev_create_hash();
10790 if (net->dev_index_head == NULL)
10791 goto err_idx;
10792
10793 RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
10794
10795 return 0;
10796
10797err_idx:
10798 kfree(net->dev_name_head);
10799err_name:
10800 return -ENOMEM;
10801}
10802
10803
10804
10805
10806
10807
10808
10809const char *netdev_drivername(const struct net_device *dev)
10810{
10811 const struct device_driver *driver;
10812 const struct device *parent;
10813 const char *empty = "";
10814
10815 parent = dev->dev.parent;
10816 if (!parent)
10817 return empty;
10818
10819 driver = parent->driver;
10820 if (driver && driver->name)
10821 return driver->name;
10822 return empty;
10823}
10824
10825static void __netdev_printk(const char *level, const struct net_device *dev,
10826 struct va_format *vaf)
10827{
10828 if (dev && dev->dev.parent) {
10829 dev_printk_emit(level[1] - '0',
10830 dev->dev.parent,
10831 "%s %s %s%s: %pV",
10832 dev_driver_string(dev->dev.parent),
10833 dev_name(dev->dev.parent),
10834 netdev_name(dev), netdev_reg_state(dev),
10835 vaf);
10836 } else if (dev) {
10837 printk("%s%s%s: %pV",
10838 level, netdev_name(dev), netdev_reg_state(dev), vaf);
10839 } else {
10840 printk("%s(NULL net_device): %pV", level, vaf);
10841 }
10842}
10843
10844void netdev_printk(const char *level, const struct net_device *dev,
10845 const char *format, ...)
10846{
10847 struct va_format vaf;
10848 va_list args;
10849
10850 va_start(args, format);
10851
10852 vaf.fmt = format;
10853 vaf.va = &args;
10854
10855 __netdev_printk(level, dev, &vaf);
10856
10857 va_end(args);
10858}
10859EXPORT_SYMBOL(netdev_printk);
10860
10861#define define_netdev_printk_level(func, level) \
10862void func(const struct net_device *dev, const char *fmt, ...) \
10863{ \
10864 struct va_format vaf; \
10865 va_list args; \
10866 \
10867 va_start(args, fmt); \
10868 \
10869 vaf.fmt = fmt; \
10870 vaf.va = &args; \
10871 \
10872 __netdev_printk(level, dev, &vaf); \
10873 \
10874 va_end(args); \
10875} \
10876EXPORT_SYMBOL(func);
10877
10878define_netdev_printk_level(netdev_emerg, KERN_EMERG);
10879define_netdev_printk_level(netdev_alert, KERN_ALERT);
10880define_netdev_printk_level(netdev_crit, KERN_CRIT);
10881define_netdev_printk_level(netdev_err, KERN_ERR);
10882define_netdev_printk_level(netdev_warn, KERN_WARNING);
10883define_netdev_printk_level(netdev_notice, KERN_NOTICE);
10884define_netdev_printk_level(netdev_info, KERN_INFO);
10885
10886static void __net_exit netdev_exit(struct net *net)
10887{
10888 kfree(net->dev_name_head);
10889 kfree(net->dev_index_head);
10890 if (net != &init_net)
10891 WARN_ON_ONCE(!list_empty(&net->dev_base_head));
10892}
10893
10894static struct pernet_operations __net_initdata netdev_net_ops = {
10895 .init = netdev_init,
10896 .exit = netdev_exit,
10897};
10898
10899static void __net_exit default_device_exit(struct net *net)
10900{
10901 struct net_device *dev, *aux;
10902
10903
10904
10905
10906 rtnl_lock();
10907 for_each_netdev_safe(net, dev, aux) {
10908 int err;
10909 char fb_name[IFNAMSIZ];
10910
10911
10912 if (dev->features & NETIF_F_NETNS_LOCAL)
10913 continue;
10914
10915
10916 if (dev->rtnl_link_ops)
10917 continue;
10918
10919
10920 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
10921 if (__dev_get_by_name(&init_net, fb_name))
10922 snprintf(fb_name, IFNAMSIZ, "dev%%d");
10923 err = dev_change_net_namespace(dev, &init_net, fb_name);
10924 if (err) {
10925 pr_emerg("%s: failed to move %s to init_net: %d\n",
10926 __func__, dev->name, err);
10927 BUG();
10928 }
10929 }
10930 rtnl_unlock();
10931}
10932
10933static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
10934{
10935
10936
10937
10938 struct net *net;
10939 bool unregistering;
10940 DEFINE_WAIT_FUNC(wait, woken_wake_function);
10941
10942 add_wait_queue(&netdev_unregistering_wq, &wait);
10943 for (;;) {
10944 unregistering = false;
10945 rtnl_lock();
10946 list_for_each_entry(net, net_list, exit_list) {
10947 if (net->dev_unreg_count > 0) {
10948 unregistering = true;
10949 break;
10950 }
10951 }
10952 if (!unregistering)
10953 break;
10954 __rtnl_unlock();
10955
10956 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
10957 }
10958 remove_wait_queue(&netdev_unregistering_wq, &wait);
10959}
10960
10961static void __net_exit default_device_exit_batch(struct list_head *net_list)
10962{
10963
10964
10965
10966
10967
10968 struct net_device *dev;
10969 struct net *net;
10970 LIST_HEAD(dev_kill_list);
10971
10972
10973
10974
10975
10976
10977
10978
10979
10980
10981
10982
10983 rtnl_lock_unregistering(net_list);
10984 list_for_each_entry(net, net_list, exit_list) {
10985 for_each_netdev_reverse(net, dev) {
10986 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
10987 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
10988 else
10989 unregister_netdevice_queue(dev, &dev_kill_list);
10990 }
10991 }
10992 unregister_netdevice_many(&dev_kill_list);
10993 rtnl_unlock();
10994}
10995
10996static struct pernet_operations __net_initdata default_device_ops = {
10997 .exit = default_device_exit,
10998 .exit_batch = default_device_exit_batch,
10999};
11000
11001
11002
11003
11004
11005
11006
11007
11008
11009
11010
11011
11012static int __init net_dev_init(void)
11013{
11014 int i, rc = -ENOMEM;
11015
11016 BUG_ON(!dev_boot_phase);
11017
11018 if (dev_proc_init())
11019 goto out;
11020
11021 if (netdev_kobject_init())
11022 goto out;
11023
11024 INIT_LIST_HEAD(&ptype_all);
11025 for (i = 0; i < PTYPE_HASH_SIZE; i++)
11026 INIT_LIST_HEAD(&ptype_base[i]);
11027
11028 INIT_LIST_HEAD(&offload_base);
11029
11030 if (register_pernet_subsys(&netdev_net_ops))
11031 goto out;
11032
11033
11034
11035
11036
11037 for_each_possible_cpu(i) {
11038 struct work_struct *flush = per_cpu_ptr(&flush_works, i);
11039 struct softnet_data *sd = &per_cpu(softnet_data, i);
11040
11041 INIT_WORK(flush, flush_backlog);
11042
11043 skb_queue_head_init(&sd->input_pkt_queue);
11044 skb_queue_head_init(&sd->process_queue);
11045#ifdef CONFIG_XFRM_OFFLOAD
11046 skb_queue_head_init(&sd->xfrm_backlog);
11047#endif
11048 INIT_LIST_HEAD(&sd->poll_list);
11049 sd->output_queue_tailp = &sd->output_queue;
11050#ifdef CONFIG_RPS
11051 sd->csd.func = rps_trigger_softirq;
11052 sd->csd.info = sd;
11053 sd->cpu = i;
11054#endif
11055
11056 init_gro_hash(&sd->backlog);
11057 sd->backlog.poll = process_backlog;
11058 sd->backlog.weight = weight_p;
11059 }
11060
11061 dev_boot_phase = 0;
11062
11063
11064
11065
11066
11067
11068
11069
11070
11071
11072 if (register_pernet_device(&loopback_net_ops))
11073 goto out;
11074
11075 if (register_pernet_device(&default_device_ops))
11076 goto out;
11077
11078 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
11079 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
11080
11081 rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
11082 NULL, dev_cpu_dead);
11083 WARN_ON(rc < 0);
11084 rc = 0;
11085out:
11086 return rc;
11087}
11088
11089subsys_initcall(net_dev_init);
11090