1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <linux/bpf.h>
98#include <net/net_namespace.h>
99#include <net/sock.h>
100#include <net/busy_poll.h>
101#include <linux/rtnetlink.h>
102#include <linux/stat.h>
103#include <net/dst.h>
104#include <net/dst_metadata.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <net/xfrm.h>
108#include <linux/highmem.h>
109#include <linux/init.h>
110#include <linux/module.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
114#include <net/iw_handler.h>
115#include <asm/current.h>
116#include <linux/audit.h>
117#include <linux/dmaengine.h>
118#include <linux/err.h>
119#include <linux/ctype.h>
120#include <linux/if_arp.h>
121#include <linux/if_vlan.h>
122#include <linux/ip.h>
123#include <net/ip.h>
124#include <net/mpls.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
127#include <linux/jhash.h>
128#include <linux/random.h>
129#include <trace/events/napi.h>
130#include <trace/events/net.h>
131#include <trace/events/skb.h>
132#include <linux/pci.h>
133#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h>
135#include <linux/static_key.h>
136#include <linux/hashtable.h>
137#include <linux/vmalloc.h>
138#include <linux/if_macvlan.h>
139#include <linux/errqueue.h>
140#include <linux/hrtimer.h>
141#include <linux/netfilter_ingress.h>
142#include <linux/sctp.h>
143#include <linux/crash_dump.h>
144
145#include "net-sysfs.h"
146
147
148#define MAX_GRO_SKBS 8
149
150
151#define GRO_MAX_HEAD (MAX_HEADER + 128)
152
153static DEFINE_SPINLOCK(ptype_lock);
154static DEFINE_SPINLOCK(offload_lock);
155struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
156struct list_head ptype_all __read_mostly;
157static struct list_head offload_base __read_mostly;
158
159static int netif_rx_internal(struct sk_buff *skb);
160static int call_netdevice_notifiers_info(unsigned long val,
161 struct net_device *dev,
162 struct netdev_notifier_info *info);
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183DEFINE_RWLOCK(dev_base_lock);
184EXPORT_SYMBOL(dev_base_lock);
185
186
187static DEFINE_SPINLOCK(napi_hash_lock);
188
189static unsigned int napi_gen_id = NR_CPUS;
190static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
191
192static seqcount_t devnet_rename_seq;
193
194static inline void dev_base_seq_inc(struct net *net)
195{
196 while (++net->dev_base_seq == 0);
197}
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{
201 unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
202
203 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
204}
205
206static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207{
208 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
209}
210
211static inline void rps_lock(struct softnet_data *sd)
212{
213#ifdef CONFIG_RPS
214 spin_lock(&sd->input_pkt_queue.lock);
215#endif
216}
217
218static inline void rps_unlock(struct softnet_data *sd)
219{
220#ifdef CONFIG_RPS
221 spin_unlock(&sd->input_pkt_queue.lock);
222#endif
223}
224
225
226static void list_netdevice(struct net_device *dev)
227{
228 struct net *net = dev_net(dev);
229
230 ASSERT_RTNL();
231
232 write_lock_bh(&dev_base_lock);
233 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
234 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
235 hlist_add_head_rcu(&dev->index_hlist,
236 dev_index_hash(net, dev->ifindex));
237 write_unlock_bh(&dev_base_lock);
238
239 dev_base_seq_inc(net);
240}
241
242
243
244
245static void unlist_netdevice(struct net_device *dev)
246{
247 ASSERT_RTNL();
248
249
250 write_lock_bh(&dev_base_lock);
251 list_del_rcu(&dev->dev_list);
252 hlist_del_rcu(&dev->name_hlist);
253 hlist_del_rcu(&dev->index_hlist);
254 write_unlock_bh(&dev_base_lock);
255
256 dev_base_seq_inc(dev_net(dev));
257}
258
259
260
261
262
263static RAW_NOTIFIER_HEAD(netdev_chain);
264
265
266
267
268
269
270DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
271EXPORT_PER_CPU_SYMBOL(softnet_data);
272
273#ifdef CONFIG_LOCKDEP
274
275
276
277
278static const unsigned short netdev_lock_type[] =
279 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
280 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
281 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
282 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
283 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
284 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
285 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
286 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
287 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
288 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
289 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
290 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
291 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
292 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
293 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
294
295static const char *const netdev_lock_name[] =
296 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
297 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
298 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
299 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
300 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
301 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
302 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
303 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
304 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
305 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
306 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
307 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
308 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
309 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
310 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
311
312static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
313static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
314
315static inline unsigned short netdev_lock_pos(unsigned short dev_type)
316{
317 int i;
318
319 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
320 if (netdev_lock_type[i] == dev_type)
321 return i;
322
323 return ARRAY_SIZE(netdev_lock_type) - 1;
324}
325
326static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
327 unsigned short dev_type)
328{
329 int i;
330
331 i = netdev_lock_pos(dev_type);
332 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
333 netdev_lock_name[i]);
334}
335
336static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
337{
338 int i;
339
340 i = netdev_lock_pos(dev->type);
341 lockdep_set_class_and_name(&dev->addr_list_lock,
342 &netdev_addr_lock_key[i],
343 netdev_lock_name[i]);
344}
345#else
346static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
347 unsigned short dev_type)
348{
349}
350static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
351{
352}
353#endif
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377static inline struct list_head *ptype_head(const struct packet_type *pt)
378{
379 if (pt->type == htons(ETH_P_ALL))
380 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
381 else
382 return pt->dev ? &pt->dev->ptype_specific :
383 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
384}
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399void dev_add_pack(struct packet_type *pt)
400{
401 struct list_head *head = ptype_head(pt);
402
403 spin_lock(&ptype_lock);
404 list_add_rcu(&pt->list, head);
405 spin_unlock(&ptype_lock);
406}
407EXPORT_SYMBOL(dev_add_pack);
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422void __dev_remove_pack(struct packet_type *pt)
423{
424 struct list_head *head = ptype_head(pt);
425 struct packet_type *pt1;
426
427 spin_lock(&ptype_lock);
428
429 list_for_each_entry(pt1, head, list) {
430 if (pt == pt1) {
431 list_del_rcu(&pt->list);
432 goto out;
433 }
434 }
435
436 pr_warn("dev_remove_pack: %p not found\n", pt);
437out:
438 spin_unlock(&ptype_lock);
439}
440EXPORT_SYMBOL(__dev_remove_pack);
441
442
443
444
445
446
447
448
449
450
451
452
453
454void dev_remove_pack(struct packet_type *pt)
455{
456 __dev_remove_pack(pt);
457
458 synchronize_net();
459}
460EXPORT_SYMBOL(dev_remove_pack);
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475void dev_add_offload(struct packet_offload *po)
476{
477 struct packet_offload *elem;
478
479 spin_lock(&offload_lock);
480 list_for_each_entry(elem, &offload_base, list) {
481 if (po->priority < elem->priority)
482 break;
483 }
484 list_add_rcu(&po->list, elem->list.prev);
485 spin_unlock(&offload_lock);
486}
487EXPORT_SYMBOL(dev_add_offload);
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static void __dev_remove_offload(struct packet_offload *po)
503{
504 struct list_head *head = &offload_base;
505 struct packet_offload *po1;
506
507 spin_lock(&offload_lock);
508
509 list_for_each_entry(po1, head, list) {
510 if (po == po1) {
511 list_del_rcu(&po->list);
512 goto out;
513 }
514 }
515
516 pr_warn("dev_remove_offload: %p not found\n", po);
517out:
518 spin_unlock(&offload_lock);
519}
520
521
522
523
524
525
526
527
528
529
530
531
532
533void dev_remove_offload(struct packet_offload *po)
534{
535 __dev_remove_offload(po);
536
537 synchronize_net();
538}
539EXPORT_SYMBOL(dev_remove_offload);
540
541
542
543
544
545
546
547
548static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
549
550
551
552
553
554
555
556
557
558
559static int netdev_boot_setup_add(char *name, struct ifmap *map)
560{
561 struct netdev_boot_setup *s;
562 int i;
563
564 s = dev_boot_setup;
565 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
566 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
567 memset(s[i].name, 0, sizeof(s[i].name));
568 strlcpy(s[i].name, name, IFNAMSIZ);
569 memcpy(&s[i].map, map, sizeof(s[i].map));
570 break;
571 }
572 }
573
574 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
575}
576
577
578
579
580
581
582
583
584
585
586int netdev_boot_setup_check(struct net_device *dev)
587{
588 struct netdev_boot_setup *s = dev_boot_setup;
589 int i;
590
591 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
592 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
593 !strcmp(dev->name, s[i].name)) {
594 dev->irq = s[i].map.irq;
595 dev->base_addr = s[i].map.base_addr;
596 dev->mem_start = s[i].map.mem_start;
597 dev->mem_end = s[i].map.mem_end;
598 return 1;
599 }
600 }
601 return 0;
602}
603EXPORT_SYMBOL(netdev_boot_setup_check);
604
605
606
607
608
609
610
611
612
613
614
615
616unsigned long netdev_boot_base(const char *prefix, int unit)
617{
618 const struct netdev_boot_setup *s = dev_boot_setup;
619 char name[IFNAMSIZ];
620 int i;
621
622 sprintf(name, "%s%d", prefix, unit);
623
624
625
626
627
628 if (__dev_get_by_name(&init_net, name))
629 return 1;
630
631 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
632 if (!strcmp(name, s[i].name))
633 return s[i].map.base_addr;
634 return 0;
635}
636
637
638
639
640int __init netdev_boot_setup(char *str)
641{
642 int ints[5];
643 struct ifmap map;
644
645 str = get_options(str, ARRAY_SIZE(ints), ints);
646 if (!str || !*str)
647 return 0;
648
649
650 memset(&map, 0, sizeof(map));
651 if (ints[0] > 0)
652 map.irq = ints[1];
653 if (ints[0] > 1)
654 map.base_addr = ints[2];
655 if (ints[0] > 2)
656 map.mem_start = ints[3];
657 if (ints[0] > 3)
658 map.mem_end = ints[4];
659
660
661 return netdev_boot_setup_add(str, &map);
662}
663
664__setup("netdev=", netdev_boot_setup);
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680int dev_get_iflink(const struct net_device *dev)
681{
682 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
683 return dev->netdev_ops->ndo_get_iflink(dev);
684
685 return dev->ifindex;
686}
687EXPORT_SYMBOL(dev_get_iflink);
688
689
690
691
692
693
694
695
696
697
698int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
699{
700 struct ip_tunnel_info *info;
701
702 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
703 return -EINVAL;
704
705 info = skb_tunnel_info_unclone(skb);
706 if (!info)
707 return -ENOMEM;
708 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
709 return -EINVAL;
710
711 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
712}
713EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
714
715
716
717
718
719
720
721
722
723
724
725
726
727struct net_device *__dev_get_by_name(struct net *net, const char *name)
728{
729 struct net_device *dev;
730 struct hlist_head *head = dev_name_hash(net, name);
731
732 hlist_for_each_entry(dev, head, name_hlist)
733 if (!strncmp(dev->name, name, IFNAMSIZ))
734 return dev;
735
736 return NULL;
737}
738EXPORT_SYMBOL(__dev_get_by_name);
739
740
741
742
743
744
745
746
747
748
749
750
751
752struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
753{
754 struct net_device *dev;
755 struct hlist_head *head = dev_name_hash(net, name);
756
757 hlist_for_each_entry_rcu(dev, head, name_hlist)
758 if (!strncmp(dev->name, name, IFNAMSIZ))
759 return dev;
760
761 return NULL;
762}
763EXPORT_SYMBOL(dev_get_by_name_rcu);
764
765
766
767
768
769
770
771
772
773
774
775
776
777struct net_device *dev_get_by_name(struct net *net, const char *name)
778{
779 struct net_device *dev;
780
781 rcu_read_lock();
782 dev = dev_get_by_name_rcu(net, name);
783 if (dev)
784 dev_hold(dev);
785 rcu_read_unlock();
786 return dev;
787}
788EXPORT_SYMBOL(dev_get_by_name);
789
790
791
792
793
794
795
796
797
798
799
800
801
802struct net_device *__dev_get_by_index(struct net *net, int ifindex)
803{
804 struct net_device *dev;
805 struct hlist_head *head = dev_index_hash(net, ifindex);
806
807 hlist_for_each_entry(dev, head, index_hlist)
808 if (dev->ifindex == ifindex)
809 return dev;
810
811 return NULL;
812}
813EXPORT_SYMBOL(__dev_get_by_index);
814
815
816
817
818
819
820
821
822
823
824
825
826struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
827{
828 struct net_device *dev;
829 struct hlist_head *head = dev_index_hash(net, ifindex);
830
831 hlist_for_each_entry_rcu(dev, head, index_hlist)
832 if (dev->ifindex == ifindex)
833 return dev;
834
835 return NULL;
836}
837EXPORT_SYMBOL(dev_get_by_index_rcu);
838
839
840
841
842
843
844
845
846
847
848
849
850
851struct net_device *dev_get_by_index(struct net *net, int ifindex)
852{
853 struct net_device *dev;
854
855 rcu_read_lock();
856 dev = dev_get_by_index_rcu(net, ifindex);
857 if (dev)
858 dev_hold(dev);
859 rcu_read_unlock();
860 return dev;
861}
862EXPORT_SYMBOL(dev_get_by_index);
863
864
865
866
867
868
869
870
871
872
873
874int netdev_get_name(struct net *net, char *name, int ifindex)
875{
876 struct net_device *dev;
877 unsigned int seq;
878
879retry:
880 seq = raw_seqcount_begin(&devnet_rename_seq);
881 rcu_read_lock();
882 dev = dev_get_by_index_rcu(net, ifindex);
883 if (!dev) {
884 rcu_read_unlock();
885 return -ENODEV;
886 }
887
888 strcpy(name, dev->name);
889 rcu_read_unlock();
890 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
891 cond_resched();
892 goto retry;
893 }
894
895 return 0;
896}
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
913 const char *ha)
914{
915 struct net_device *dev;
916
917 for_each_netdev_rcu(net, dev)
918 if (dev->type == type &&
919 !memcmp(dev->dev_addr, ha, dev->addr_len))
920 return dev;
921
922 return NULL;
923}
924EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
925
926struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
927{
928 struct net_device *dev;
929
930 ASSERT_RTNL();
931 for_each_netdev(net, dev)
932 if (dev->type == type)
933 return dev;
934
935 return NULL;
936}
937EXPORT_SYMBOL(__dev_getfirstbyhwtype);
938
939struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
940{
941 struct net_device *dev, *ret = NULL;
942
943 rcu_read_lock();
944 for_each_netdev_rcu(net, dev)
945 if (dev->type == type) {
946 dev_hold(dev);
947 ret = dev;
948 break;
949 }
950 rcu_read_unlock();
951 return ret;
952}
953EXPORT_SYMBOL(dev_getfirstbyhwtype);
954
955
956
957
958
959
960
961
962
963
964
965
966struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
967 unsigned short mask)
968{
969 struct net_device *dev, *ret;
970
971 ASSERT_RTNL();
972
973 ret = NULL;
974 for_each_netdev(net, dev) {
975 if (((dev->flags ^ if_flags) & mask) == 0) {
976 ret = dev;
977 break;
978 }
979 }
980 return ret;
981}
982EXPORT_SYMBOL(__dev_get_by_flags);
983
984
985
986
987
988
989
990
991
992bool dev_valid_name(const char *name)
993{
994 if (*name == '\0')
995 return false;
996 if (strlen(name) >= IFNAMSIZ)
997 return false;
998 if (!strcmp(name, ".") || !strcmp(name, ".."))
999 return false;
1000
1001 while (*name) {
1002 if (*name == '/' || *name == ':' || isspace(*name))
1003 return false;
1004 name++;
1005 }
1006 return true;
1007}
1008EXPORT_SYMBOL(dev_valid_name);
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1026{
1027 int i = 0;
1028 const char *p;
1029 const int max_netdevices = 8*PAGE_SIZE;
1030 unsigned long *inuse;
1031 struct net_device *d;
1032
1033 p = strnchr(name, IFNAMSIZ-1, '%');
1034 if (p) {
1035
1036
1037
1038
1039
1040 if (p[1] != 'd' || strchr(p + 2, '%'))
1041 return -EINVAL;
1042
1043
1044 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1045 if (!inuse)
1046 return -ENOMEM;
1047
1048 for_each_netdev(net, d) {
1049 if (!sscanf(d->name, name, &i))
1050 continue;
1051 if (i < 0 || i >= max_netdevices)
1052 continue;
1053
1054
1055 snprintf(buf, IFNAMSIZ, name, i);
1056 if (!strncmp(buf, d->name, IFNAMSIZ))
1057 set_bit(i, inuse);
1058 }
1059
1060 i = find_first_zero_bit(inuse, max_netdevices);
1061 free_page((unsigned long) inuse);
1062 }
1063
1064 if (buf != name)
1065 snprintf(buf, IFNAMSIZ, name, i);
1066 if (!__dev_get_by_name(net, buf))
1067 return i;
1068
1069
1070
1071
1072
1073 return -ENFILE;
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090int dev_alloc_name(struct net_device *dev, const char *name)
1091{
1092 char buf[IFNAMSIZ];
1093 struct net *net;
1094 int ret;
1095
1096 BUG_ON(!dev_net(dev));
1097 net = dev_net(dev);
1098 ret = __dev_alloc_name(net, name, buf);
1099 if (ret >= 0)
1100 strlcpy(dev->name, buf, IFNAMSIZ);
1101 return ret;
1102}
1103EXPORT_SYMBOL(dev_alloc_name);
1104
1105static int dev_alloc_name_ns(struct net *net,
1106 struct net_device *dev,
1107 const char *name)
1108{
1109 char buf[IFNAMSIZ];
1110 int ret;
1111
1112 ret = __dev_alloc_name(net, name, buf);
1113 if (ret >= 0)
1114 strlcpy(dev->name, buf, IFNAMSIZ);
1115 return ret;
1116}
1117
1118static int dev_get_valid_name(struct net *net,
1119 struct net_device *dev,
1120 const char *name)
1121{
1122 BUG_ON(!net);
1123
1124 if (!dev_valid_name(name))
1125 return -EINVAL;
1126
1127 if (strchr(name, '%'))
1128 return dev_alloc_name_ns(net, dev, name);
1129 else if (__dev_get_by_name(net, name))
1130 return -EEXIST;
1131 else if (dev->name != name)
1132 strlcpy(dev->name, name, IFNAMSIZ);
1133
1134 return 0;
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145int dev_change_name(struct net_device *dev, const char *newname)
1146{
1147 unsigned char old_assign_type;
1148 char oldname[IFNAMSIZ];
1149 int err = 0;
1150 int ret;
1151 struct net *net;
1152
1153 ASSERT_RTNL();
1154 BUG_ON(!dev_net(dev));
1155
1156 net = dev_net(dev);
1157 if (dev->flags & IFF_UP)
1158 return -EBUSY;
1159
1160 write_seqcount_begin(&devnet_rename_seq);
1161
1162 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1163 write_seqcount_end(&devnet_rename_seq);
1164 return 0;
1165 }
1166
1167 memcpy(oldname, dev->name, IFNAMSIZ);
1168
1169 err = dev_get_valid_name(net, dev, newname);
1170 if (err < 0) {
1171 write_seqcount_end(&devnet_rename_seq);
1172 return err;
1173 }
1174
1175 if (oldname[0] && !strchr(oldname, '%'))
1176 netdev_info(dev, "renamed from %s\n", oldname);
1177
1178 old_assign_type = dev->name_assign_type;
1179 dev->name_assign_type = NET_NAME_RENAMED;
1180
1181rollback:
1182 ret = device_rename(&dev->dev, dev->name);
1183 if (ret) {
1184 memcpy(dev->name, oldname, IFNAMSIZ);
1185 dev->name_assign_type = old_assign_type;
1186 write_seqcount_end(&devnet_rename_seq);
1187 return ret;
1188 }
1189
1190 write_seqcount_end(&devnet_rename_seq);
1191
1192 netdev_adjacent_rename_links(dev, oldname);
1193
1194 write_lock_bh(&dev_base_lock);
1195 hlist_del_rcu(&dev->name_hlist);
1196 write_unlock_bh(&dev_base_lock);
1197
1198 synchronize_rcu();
1199
1200 write_lock_bh(&dev_base_lock);
1201 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1202 write_unlock_bh(&dev_base_lock);
1203
1204 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1205 ret = notifier_to_errno(ret);
1206
1207 if (ret) {
1208
1209 if (err >= 0) {
1210 err = ret;
1211 write_seqcount_begin(&devnet_rename_seq);
1212 memcpy(dev->name, oldname, IFNAMSIZ);
1213 memcpy(oldname, newname, IFNAMSIZ);
1214 dev->name_assign_type = old_assign_type;
1215 old_assign_type = NET_NAME_RENAMED;
1216 goto rollback;
1217 } else {
1218 pr_err("%s: name change rollback failed: %d\n",
1219 dev->name, ret);
1220 }
1221 }
1222
1223 return err;
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1235{
1236 char *new_ifalias;
1237
1238 ASSERT_RTNL();
1239
1240 if (len >= IFALIASZ)
1241 return -EINVAL;
1242
1243 if (!len) {
1244 kfree(dev->ifalias);
1245 dev->ifalias = NULL;
1246 return 0;
1247 }
1248
1249 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1250 if (!new_ifalias)
1251 return -ENOMEM;
1252 dev->ifalias = new_ifalias;
1253
1254 strlcpy(dev->ifalias, alias, len+1);
1255 return len;
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265void netdev_features_change(struct net_device *dev)
1266{
1267 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1268}
1269EXPORT_SYMBOL(netdev_features_change);
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279void netdev_state_change(struct net_device *dev)
1280{
1281 if (dev->flags & IFF_UP) {
1282 struct netdev_notifier_change_info change_info;
1283
1284 change_info.flags_changed = 0;
1285 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1286 &change_info.info);
1287 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1288 }
1289}
1290EXPORT_SYMBOL(netdev_state_change);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302void netdev_notify_peers(struct net_device *dev)
1303{
1304 rtnl_lock();
1305 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1306 rtnl_unlock();
1307}
1308EXPORT_SYMBOL(netdev_notify_peers);
1309
1310static int __dev_open(struct net_device *dev)
1311{
1312 const struct net_device_ops *ops = dev->netdev_ops;
1313 int ret;
1314
1315 ASSERT_RTNL();
1316
1317 if (!netif_device_present(dev))
1318 return -ENODEV;
1319
1320
1321
1322
1323
1324 netpoll_poll_disable(dev);
1325
1326 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1327 ret = notifier_to_errno(ret);
1328 if (ret)
1329 return ret;
1330
1331 set_bit(__LINK_STATE_START, &dev->state);
1332
1333 if (ops->ndo_validate_addr)
1334 ret = ops->ndo_validate_addr(dev);
1335
1336 if (!ret && ops->ndo_open)
1337 ret = ops->ndo_open(dev);
1338
1339 netpoll_poll_enable(dev);
1340
1341 if (ret)
1342 clear_bit(__LINK_STATE_START, &dev->state);
1343 else {
1344 dev->flags |= IFF_UP;
1345 dev_set_rx_mode(dev);
1346 dev_activate(dev);
1347 add_device_randomness(dev->dev_addr, dev->addr_len);
1348 }
1349
1350 return ret;
1351}
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365int dev_open(struct net_device *dev)
1366{
1367 int ret;
1368
1369 if (dev->flags & IFF_UP)
1370 return 0;
1371
1372 ret = __dev_open(dev);
1373 if (ret < 0)
1374 return ret;
1375
1376 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1377 call_netdevice_notifiers(NETDEV_UP, dev);
1378
1379 return ret;
1380}
1381EXPORT_SYMBOL(dev_open);
1382
1383static int __dev_close_many(struct list_head *head)
1384{
1385 struct net_device *dev;
1386
1387 ASSERT_RTNL();
1388 might_sleep();
1389
1390 list_for_each_entry(dev, head, close_list) {
1391
1392 netpoll_poll_disable(dev);
1393
1394 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1395
1396 clear_bit(__LINK_STATE_START, &dev->state);
1397
1398
1399
1400
1401
1402
1403
1404 smp_mb__after_atomic();
1405 }
1406
1407 dev_deactivate_many(head);
1408
1409 list_for_each_entry(dev, head, close_list) {
1410 const struct net_device_ops *ops = dev->netdev_ops;
1411
1412
1413
1414
1415
1416
1417
1418
1419 if (ops->ndo_stop)
1420 ops->ndo_stop(dev);
1421
1422 dev->flags &= ~IFF_UP;
1423 netpoll_poll_enable(dev);
1424 }
1425
1426 return 0;
1427}
1428
1429static int __dev_close(struct net_device *dev)
1430{
1431 int retval;
1432 LIST_HEAD(single);
1433
1434 list_add(&dev->close_list, &single);
1435 retval = __dev_close_many(&single);
1436 list_del(&single);
1437
1438 return retval;
1439}
1440
1441int dev_close_many(struct list_head *head, bool unlink)
1442{
1443 struct net_device *dev, *tmp;
1444
1445
1446 list_for_each_entry_safe(dev, tmp, head, close_list)
1447 if (!(dev->flags & IFF_UP))
1448 list_del_init(&dev->close_list);
1449
1450 __dev_close_many(head);
1451
1452 list_for_each_entry_safe(dev, tmp, head, close_list) {
1453 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1454 call_netdevice_notifiers(NETDEV_DOWN, dev);
1455 if (unlink)
1456 list_del_init(&dev->close_list);
1457 }
1458
1459 return 0;
1460}
1461EXPORT_SYMBOL(dev_close_many);
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472int dev_close(struct net_device *dev)
1473{
1474 if (dev->flags & IFF_UP) {
1475 LIST_HEAD(single);
1476
1477 list_add(&dev->close_list, &single);
1478 dev_close_many(&single, true);
1479 list_del(&single);
1480 }
1481 return 0;
1482}
1483EXPORT_SYMBOL(dev_close);
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494void dev_disable_lro(struct net_device *dev)
1495{
1496 struct net_device *lower_dev;
1497 struct list_head *iter;
1498
1499 dev->wanted_features &= ~NETIF_F_LRO;
1500 netdev_update_features(dev);
1501
1502 if (unlikely(dev->features & NETIF_F_LRO))
1503 netdev_WARN(dev, "failed to disable LRO!\n");
1504
1505 netdev_for_each_lower_dev(dev, lower_dev, iter)
1506 dev_disable_lro(lower_dev);
1507}
1508EXPORT_SYMBOL(dev_disable_lro);
1509
1510static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1511 struct net_device *dev)
1512{
1513 struct netdev_notifier_info info;
1514
1515 netdev_notifier_info_init(&info, dev);
1516 return nb->notifier_call(nb, val, &info);
1517}
1518
1519static int dev_boot_phase = 1;
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535int register_netdevice_notifier(struct notifier_block *nb)
1536{
1537 struct net_device *dev;
1538 struct net_device *last;
1539 struct net *net;
1540 int err;
1541
1542 rtnl_lock();
1543 err = raw_notifier_chain_register(&netdev_chain, nb);
1544 if (err)
1545 goto unlock;
1546 if (dev_boot_phase)
1547 goto unlock;
1548 for_each_net(net) {
1549 for_each_netdev(net, dev) {
1550 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1551 err = notifier_to_errno(err);
1552 if (err)
1553 goto rollback;
1554
1555 if (!(dev->flags & IFF_UP))
1556 continue;
1557
1558 call_netdevice_notifier(nb, NETDEV_UP, dev);
1559 }
1560 }
1561
1562unlock:
1563 rtnl_unlock();
1564 return err;
1565
1566rollback:
1567 last = dev;
1568 for_each_net(net) {
1569 for_each_netdev(net, dev) {
1570 if (dev == last)
1571 goto outroll;
1572
1573 if (dev->flags & IFF_UP) {
1574 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1575 dev);
1576 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1577 }
1578 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1579 }
1580 }
1581
1582outroll:
1583 raw_notifier_chain_unregister(&netdev_chain, nb);
1584 goto unlock;
1585}
1586EXPORT_SYMBOL(register_netdevice_notifier);
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602int unregister_netdevice_notifier(struct notifier_block *nb)
1603{
1604 struct net_device *dev;
1605 struct net *net;
1606 int err;
1607
1608 rtnl_lock();
1609 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1610 if (err)
1611 goto unlock;
1612
1613 for_each_net(net) {
1614 for_each_netdev(net, dev) {
1615 if (dev->flags & IFF_UP) {
1616 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1617 dev);
1618 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1619 }
1620 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1621 }
1622 }
1623unlock:
1624 rtnl_unlock();
1625 return err;
1626}
1627EXPORT_SYMBOL(unregister_netdevice_notifier);
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639static int call_netdevice_notifiers_info(unsigned long val,
1640 struct net_device *dev,
1641 struct netdev_notifier_info *info)
1642{
1643 ASSERT_RTNL();
1644 netdev_notifier_info_init(info, dev);
1645 return raw_notifier_call_chain(&netdev_chain, val, info);
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1658{
1659 struct netdev_notifier_info info;
1660
1661 return call_netdevice_notifiers_info(val, dev, &info);
1662}
1663EXPORT_SYMBOL(call_netdevice_notifiers);
1664
1665#ifdef CONFIG_NET_INGRESS
1666static struct static_key ingress_needed __read_mostly;
1667
1668void net_inc_ingress_queue(void)
1669{
1670 static_key_slow_inc(&ingress_needed);
1671}
1672EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
1673
1674void net_dec_ingress_queue(void)
1675{
1676 static_key_slow_dec(&ingress_needed);
1677}
1678EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
1679#endif
1680
1681#ifdef CONFIG_NET_EGRESS
1682static struct static_key egress_needed __read_mostly;
1683
1684void net_inc_egress_queue(void)
1685{
1686 static_key_slow_inc(&egress_needed);
1687}
1688EXPORT_SYMBOL_GPL(net_inc_egress_queue);
1689
1690void net_dec_egress_queue(void)
1691{
1692 static_key_slow_dec(&egress_needed);
1693}
1694EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1695#endif
1696
1697static struct static_key netstamp_needed __read_mostly;
1698#ifdef HAVE_JUMP_LABEL
1699
1700
1701
1702
1703static atomic_t netstamp_needed_deferred;
1704#endif
1705
1706void net_enable_timestamp(void)
1707{
1708#ifdef HAVE_JUMP_LABEL
1709 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1710
1711 if (deferred) {
1712 while (--deferred)
1713 static_key_slow_dec(&netstamp_needed);
1714 return;
1715 }
1716#endif
1717 static_key_slow_inc(&netstamp_needed);
1718}
1719EXPORT_SYMBOL(net_enable_timestamp);
1720
1721void net_disable_timestamp(void)
1722{
1723#ifdef HAVE_JUMP_LABEL
1724 if (in_interrupt()) {
1725 atomic_inc(&netstamp_needed_deferred);
1726 return;
1727 }
1728#endif
1729 static_key_slow_dec(&netstamp_needed);
1730}
1731EXPORT_SYMBOL(net_disable_timestamp);
1732
1733static inline void net_timestamp_set(struct sk_buff *skb)
1734{
1735 skb->tstamp.tv64 = 0;
1736 if (static_key_false(&netstamp_needed))
1737 __net_timestamp(skb);
1738}
1739
1740#define net_timestamp_check(COND, SKB) \
1741 if (static_key_false(&netstamp_needed)) { \
1742 if ((COND) && !(SKB)->tstamp.tv64) \
1743 __net_timestamp(SKB); \
1744 } \
1745
1746bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
1747{
1748 unsigned int len;
1749
1750 if (!(dev->flags & IFF_UP))
1751 return false;
1752
1753 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1754 if (skb->len <= len)
1755 return true;
1756
1757
1758
1759
1760 if (skb_is_gso(skb))
1761 return true;
1762
1763 return false;
1764}
1765EXPORT_SYMBOL_GPL(is_skb_forwardable);
1766
1767int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1768{
1769 int ret = ____dev_forward_skb(dev, skb);
1770
1771 if (likely(!ret)) {
1772 skb->protocol = eth_type_trans(skb, dev);
1773 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1774 }
1775
1776 return ret;
1777}
1778EXPORT_SYMBOL_GPL(__dev_forward_skb);
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1799{
1800 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1801}
1802EXPORT_SYMBOL_GPL(dev_forward_skb);
1803
1804static inline int deliver_skb(struct sk_buff *skb,
1805 struct packet_type *pt_prev,
1806 struct net_device *orig_dev)
1807{
1808 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1809 return -ENOMEM;
1810 atomic_inc(&skb->users);
1811 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1812}
1813
1814static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1815 struct packet_type **pt,
1816 struct net_device *orig_dev,
1817 __be16 type,
1818 struct list_head *ptype_list)
1819{
1820 struct packet_type *ptype, *pt_prev = *pt;
1821
1822 list_for_each_entry_rcu(ptype, ptype_list, list) {
1823 if (ptype->type != type)
1824 continue;
1825 if (pt_prev)
1826 deliver_skb(skb, pt_prev, orig_dev);
1827 pt_prev = ptype;
1828 }
1829 *pt = pt_prev;
1830}
1831
1832static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1833{
1834 if (!ptype->af_packet_priv || !skb->sk)
1835 return false;
1836
1837 if (ptype->id_match)
1838 return ptype->id_match(ptype, skb->sk);
1839 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1840 return true;
1841
1842 return false;
1843}
1844
1845
1846
1847
1848
1849
1850void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1851{
1852 struct packet_type *ptype;
1853 struct sk_buff *skb2 = NULL;
1854 struct packet_type *pt_prev = NULL;
1855 struct list_head *ptype_list = &ptype_all;
1856
1857 rcu_read_lock();
1858again:
1859 list_for_each_entry_rcu(ptype, ptype_list, list) {
1860
1861
1862
1863 if (skb_loop_sk(ptype, skb))
1864 continue;
1865
1866 if (pt_prev) {
1867 deliver_skb(skb2, pt_prev, skb->dev);
1868 pt_prev = ptype;
1869 continue;
1870 }
1871
1872
1873 skb2 = skb_clone(skb, GFP_ATOMIC);
1874 if (!skb2)
1875 goto out_unlock;
1876
1877 net_timestamp_set(skb2);
1878
1879
1880
1881
1882
1883 skb_reset_mac_header(skb2);
1884
1885 if (skb_network_header(skb2) < skb2->data ||
1886 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1887 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1888 ntohs(skb2->protocol),
1889 dev->name);
1890 skb_reset_network_header(skb2);
1891 }
1892
1893 skb2->transport_header = skb2->network_header;
1894 skb2->pkt_type = PACKET_OUTGOING;
1895 pt_prev = ptype;
1896 }
1897
1898 if (ptype_list == &ptype_all) {
1899 ptype_list = &dev->ptype_all;
1900 goto again;
1901 }
1902out_unlock:
1903 if (pt_prev)
1904 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1905 rcu_read_unlock();
1906}
1907EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1923{
1924 int i;
1925 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1926
1927
1928 if (tc->offset + tc->count > txq) {
1929 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1930 dev->num_tc = 0;
1931 return;
1932 }
1933
1934
1935 for (i = 1; i < TC_BITMASK + 1; i++) {
1936 int q = netdev_get_prio_tc_map(dev, i);
1937
1938 tc = &dev->tc_to_txq[q];
1939 if (tc->offset + tc->count > txq) {
1940 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1941 i, q);
1942 netdev_set_prio_tc_map(dev, i, 0);
1943 }
1944 }
1945}
1946
1947#ifdef CONFIG_XPS
1948static DEFINE_MUTEX(xps_map_mutex);
1949#define xmap_dereference(P) \
1950 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1951
1952static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1953 int cpu, u16 index)
1954{
1955 struct xps_map *map = NULL;
1956 int pos;
1957
1958 if (dev_maps)
1959 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1960
1961 for (pos = 0; map && pos < map->len; pos++) {
1962 if (map->queues[pos] == index) {
1963 if (map->len > 1) {
1964 map->queues[pos] = map->queues[--map->len];
1965 } else {
1966 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1967 kfree_rcu(map, rcu);
1968 map = NULL;
1969 }
1970 break;
1971 }
1972 }
1973
1974 return map;
1975}
1976
1977static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1978{
1979 struct xps_dev_maps *dev_maps;
1980 int cpu, i;
1981 bool active = false;
1982
1983 mutex_lock(&xps_map_mutex);
1984 dev_maps = xmap_dereference(dev->xps_maps);
1985
1986 if (!dev_maps)
1987 goto out_no_maps;
1988
1989 for_each_possible_cpu(cpu) {
1990 for (i = index; i < dev->num_tx_queues; i++) {
1991 if (!remove_xps_queue(dev_maps, cpu, i))
1992 break;
1993 }
1994 if (i == dev->num_tx_queues)
1995 active = true;
1996 }
1997
1998 if (!active) {
1999 RCU_INIT_POINTER(dev->xps_maps, NULL);
2000 kfree_rcu(dev_maps, rcu);
2001 }
2002
2003 for (i = index; i < dev->num_tx_queues; i++)
2004 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
2005 NUMA_NO_NODE);
2006
2007out_no_maps:
2008 mutex_unlock(&xps_map_mutex);
2009}
2010
2011static struct xps_map *expand_xps_map(struct xps_map *map,
2012 int cpu, u16 index)
2013{
2014 struct xps_map *new_map;
2015 int alloc_len = XPS_MIN_MAP_ALLOC;
2016 int i, pos;
2017
2018 for (pos = 0; map && pos < map->len; pos++) {
2019 if (map->queues[pos] != index)
2020 continue;
2021 return map;
2022 }
2023
2024
2025 if (map) {
2026 if (pos < map->alloc_len)
2027 return map;
2028
2029 alloc_len = map->alloc_len * 2;
2030 }
2031
2032
2033 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2034 cpu_to_node(cpu));
2035 if (!new_map)
2036 return NULL;
2037
2038 for (i = 0; i < pos; i++)
2039 new_map->queues[i] = map->queues[i];
2040 new_map->alloc_len = alloc_len;
2041 new_map->len = pos;
2042
2043 return new_map;
2044}
2045
2046int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2047 u16 index)
2048{
2049 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2050 struct xps_map *map, *new_map;
2051 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
2052 int cpu, numa_node_id = -2;
2053 bool active = false;
2054
2055 mutex_lock(&xps_map_mutex);
2056
2057 dev_maps = xmap_dereference(dev->xps_maps);
2058
2059
2060 for_each_online_cpu(cpu) {
2061 if (!cpumask_test_cpu(cpu, mask))
2062 continue;
2063
2064 if (!new_dev_maps)
2065 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2066 if (!new_dev_maps) {
2067 mutex_unlock(&xps_map_mutex);
2068 return -ENOMEM;
2069 }
2070
2071 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2072 NULL;
2073
2074 map = expand_xps_map(map, cpu, index);
2075 if (!map)
2076 goto error;
2077
2078 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2079 }
2080
2081 if (!new_dev_maps)
2082 goto out_no_new_maps;
2083
2084 for_each_possible_cpu(cpu) {
2085 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2086
2087 int pos = 0;
2088
2089 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2090 while ((pos < map->len) && (map->queues[pos] != index))
2091 pos++;
2092
2093 if (pos == map->len)
2094 map->queues[map->len++] = index;
2095#ifdef CONFIG_NUMA
2096 if (numa_node_id == -2)
2097 numa_node_id = cpu_to_node(cpu);
2098 else if (numa_node_id != cpu_to_node(cpu))
2099 numa_node_id = -1;
2100#endif
2101 } else if (dev_maps) {
2102
2103 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2104 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2105 }
2106
2107 }
2108
2109 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2110
2111
2112 if (dev_maps) {
2113 for_each_possible_cpu(cpu) {
2114 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2115 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2116 if (map && map != new_map)
2117 kfree_rcu(map, rcu);
2118 }
2119
2120 kfree_rcu(dev_maps, rcu);
2121 }
2122
2123 dev_maps = new_dev_maps;
2124 active = true;
2125
2126out_no_new_maps:
2127
2128 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2129 (numa_node_id >= 0) ? numa_node_id :
2130 NUMA_NO_NODE);
2131
2132 if (!dev_maps)
2133 goto out_no_maps;
2134
2135
2136 for_each_possible_cpu(cpu) {
2137 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2138 continue;
2139
2140 if (remove_xps_queue(dev_maps, cpu, index))
2141 active = true;
2142 }
2143
2144
2145 if (!active) {
2146 RCU_INIT_POINTER(dev->xps_maps, NULL);
2147 kfree_rcu(dev_maps, rcu);
2148 }
2149
2150out_no_maps:
2151 mutex_unlock(&xps_map_mutex);
2152
2153 return 0;
2154error:
2155
2156 for_each_possible_cpu(cpu) {
2157 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2158 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2159 NULL;
2160 if (new_map && new_map != map)
2161 kfree(new_map);
2162 }
2163
2164 mutex_unlock(&xps_map_mutex);
2165
2166 kfree(new_dev_maps);
2167 return -ENOMEM;
2168}
2169EXPORT_SYMBOL(netif_set_xps_queue);
2170
2171#endif
2172
2173
2174
2175
2176int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2177{
2178 int rc;
2179
2180 if (txq < 1 || txq > dev->num_tx_queues)
2181 return -EINVAL;
2182
2183 if (dev->reg_state == NETREG_REGISTERED ||
2184 dev->reg_state == NETREG_UNREGISTERING) {
2185 ASSERT_RTNL();
2186
2187 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2188 txq);
2189 if (rc)
2190 return rc;
2191
2192 if (dev->num_tc)
2193 netif_setup_tc(dev, txq);
2194
2195 if (txq < dev->real_num_tx_queues) {
2196 qdisc_reset_all_tx_gt(dev, txq);
2197#ifdef CONFIG_XPS
2198 netif_reset_xps_queues_gt(dev, txq);
2199#endif
2200 }
2201 }
2202
2203 dev->real_num_tx_queues = txq;
2204 return 0;
2205}
2206EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2207
2208#ifdef CONFIG_SYSFS
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2220{
2221 int rc;
2222
2223 if (rxq < 1 || rxq > dev->num_rx_queues)
2224 return -EINVAL;
2225
2226 if (dev->reg_state == NETREG_REGISTERED) {
2227 ASSERT_RTNL();
2228
2229 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2230 rxq);
2231 if (rc)
2232 return rc;
2233 }
2234
2235 dev->real_num_rx_queues = rxq;
2236 return 0;
2237}
2238EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2239#endif
2240
2241
2242
2243
2244
2245
2246
2247int netif_get_num_default_rss_queues(void)
2248{
2249 return is_kdump_kernel() ?
2250 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2251}
2252EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2253
2254static void __netif_reschedule(struct Qdisc *q)
2255{
2256 struct softnet_data *sd;
2257 unsigned long flags;
2258
2259 local_irq_save(flags);
2260 sd = this_cpu_ptr(&softnet_data);
2261 q->next_sched = NULL;
2262 *sd->output_queue_tailp = q;
2263 sd->output_queue_tailp = &q->next_sched;
2264 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2265 local_irq_restore(flags);
2266}
2267
2268void __netif_schedule(struct Qdisc *q)
2269{
2270 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2271 __netif_reschedule(q);
2272}
2273EXPORT_SYMBOL(__netif_schedule);
2274
2275struct dev_kfree_skb_cb {
2276 enum skb_free_reason reason;
2277};
2278
2279static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2280{
2281 return (struct dev_kfree_skb_cb *)skb->cb;
2282}
2283
2284void netif_schedule_queue(struct netdev_queue *txq)
2285{
2286 rcu_read_lock();
2287 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2288 struct Qdisc *q = rcu_dereference(txq->qdisc);
2289
2290 __netif_schedule(q);
2291 }
2292 rcu_read_unlock();
2293}
2294EXPORT_SYMBOL(netif_schedule_queue);
2295
2296
2297
2298
2299
2300
2301
2302
2303void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2304{
2305 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2306
2307 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
2308 struct Qdisc *q;
2309
2310 rcu_read_lock();
2311 q = rcu_dereference(txq->qdisc);
2312 __netif_schedule(q);
2313 rcu_read_unlock();
2314 }
2315}
2316EXPORT_SYMBOL(netif_wake_subqueue);
2317
2318void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2319{
2320 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2321 struct Qdisc *q;
2322
2323 rcu_read_lock();
2324 q = rcu_dereference(dev_queue->qdisc);
2325 __netif_schedule(q);
2326 rcu_read_unlock();
2327 }
2328}
2329EXPORT_SYMBOL(netif_tx_wake_queue);
2330
2331void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2332{
2333 unsigned long flags;
2334
2335 if (likely(atomic_read(&skb->users) == 1)) {
2336 smp_rmb();
2337 atomic_set(&skb->users, 0);
2338 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2339 return;
2340 }
2341 get_kfree_skb_cb(skb)->reason = reason;
2342 local_irq_save(flags);
2343 skb->next = __this_cpu_read(softnet_data.completion_queue);
2344 __this_cpu_write(softnet_data.completion_queue, skb);
2345 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2346 local_irq_restore(flags);
2347}
2348EXPORT_SYMBOL(__dev_kfree_skb_irq);
2349
2350void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2351{
2352 if (in_irq() || irqs_disabled())
2353 __dev_kfree_skb_irq(skb, reason);
2354 else
2355 dev_kfree_skb(skb);
2356}
2357EXPORT_SYMBOL(__dev_kfree_skb_any);
2358
2359
2360
2361
2362
2363
2364
2365
2366void netif_device_detach(struct net_device *dev)
2367{
2368 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2369 netif_running(dev)) {
2370 netif_tx_stop_all_queues(dev);
2371 }
2372}
2373EXPORT_SYMBOL(netif_device_detach);
2374
2375
2376
2377
2378
2379
2380
2381void netif_device_attach(struct net_device *dev)
2382{
2383 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2384 netif_running(dev)) {
2385 netif_tx_wake_all_queues(dev);
2386 __netdev_watchdog_up(dev);
2387 }
2388}
2389EXPORT_SYMBOL(netif_device_attach);
2390
2391
2392
2393
2394
2395u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
2396 unsigned int num_tx_queues)
2397{
2398 u32 hash;
2399 u16 qoffset = 0;
2400 u16 qcount = num_tx_queues;
2401
2402 if (skb_rx_queue_recorded(skb)) {
2403 hash = skb_get_rx_queue(skb);
2404 while (unlikely(hash >= num_tx_queues))
2405 hash -= num_tx_queues;
2406 return hash;
2407 }
2408
2409 if (dev->num_tc) {
2410 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2411 qoffset = dev->tc_to_txq[tc].offset;
2412 qcount = dev->tc_to_txq[tc].count;
2413 }
2414
2415 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
2416}
2417EXPORT_SYMBOL(__skb_tx_hash);
2418
2419static void skb_warn_bad_offload(const struct sk_buff *skb)
2420{
2421 static const netdev_features_t null_features;
2422 struct net_device *dev = skb->dev;
2423 const char *name = "";
2424
2425 if (!net_ratelimit())
2426 return;
2427
2428 if (dev) {
2429 if (dev->dev.parent)
2430 name = dev_driver_string(dev->dev.parent);
2431 else
2432 name = netdev_name(dev);
2433 }
2434 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2435 "gso_type=%d ip_summed=%d\n",
2436 name, dev ? &dev->features : &null_features,
2437 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2438 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2439 skb_shinfo(skb)->gso_type, skb->ip_summed);
2440}
2441
2442
2443
2444
2445
2446int skb_checksum_help(struct sk_buff *skb)
2447{
2448 __wsum csum;
2449 int ret = 0, offset;
2450
2451 if (skb->ip_summed == CHECKSUM_COMPLETE)
2452 goto out_set_summed;
2453
2454 if (unlikely(skb_shinfo(skb)->gso_size)) {
2455 skb_warn_bad_offload(skb);
2456 return -EINVAL;
2457 }
2458
2459
2460
2461
2462 if (skb_has_shared_frag(skb)) {
2463 ret = __skb_linearize(skb);
2464 if (ret)
2465 goto out;
2466 }
2467
2468 offset = skb_checksum_start_offset(skb);
2469 BUG_ON(offset >= skb_headlen(skb));
2470 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2471
2472 offset += skb->csum_offset;
2473 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2474
2475 if (skb_cloned(skb) &&
2476 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2477 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2478 if (ret)
2479 goto out;
2480 }
2481
2482 *(__sum16 *)(skb->data + offset) = csum_fold(csum) ?: CSUM_MANGLED_0;
2483out_set_summed:
2484 skb->ip_summed = CHECKSUM_NONE;
2485out:
2486 return ret;
2487}
2488EXPORT_SYMBOL(skb_checksum_help);
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511bool __skb_csum_offload_chk(struct sk_buff *skb,
2512 const struct skb_csum_offl_spec *spec,
2513 bool *csum_encapped,
2514 bool csum_help)
2515{
2516 struct iphdr *iph;
2517 struct ipv6hdr *ipv6;
2518 void *nhdr;
2519 int protocol;
2520 u8 ip_proto;
2521
2522 if (skb->protocol == htons(ETH_P_8021Q) ||
2523 skb->protocol == htons(ETH_P_8021AD)) {
2524 if (!spec->vlan_okay)
2525 goto need_help;
2526 }
2527
2528
2529
2530
2531
2532
2533 if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
2534
2535 protocol = eproto_to_ipproto(vlan_get_protocol(skb));
2536 nhdr = skb_network_header(skb);
2537 *csum_encapped = false;
2538 if (spec->no_not_encapped)
2539 goto need_help;
2540 } else if (skb->encapsulation && spec->encap_okay &&
2541 skb_checksum_start_offset(skb) ==
2542 skb_inner_transport_offset(skb)) {
2543
2544 *csum_encapped = true;
2545 switch (skb->inner_protocol_type) {
2546 case ENCAP_TYPE_ETHER:
2547 protocol = eproto_to_ipproto(skb->inner_protocol);
2548 break;
2549 case ENCAP_TYPE_IPPROTO:
2550 protocol = skb->inner_protocol;
2551 break;
2552 }
2553 nhdr = skb_inner_network_header(skb);
2554 } else {
2555 goto need_help;
2556 }
2557
2558 switch (protocol) {
2559 case IPPROTO_IP:
2560 if (!spec->ipv4_okay)
2561 goto need_help;
2562 iph = nhdr;
2563 ip_proto = iph->protocol;
2564 if (iph->ihl != 5 && !spec->ip_options_okay)
2565 goto need_help;
2566 break;
2567 case IPPROTO_IPV6:
2568 if (!spec->ipv6_okay)
2569 goto need_help;
2570 if (spec->no_encapped_ipv6 && *csum_encapped)
2571 goto need_help;
2572 ipv6 = nhdr;
2573 nhdr += sizeof(*ipv6);
2574 ip_proto = ipv6->nexthdr;
2575 break;
2576 default:
2577 goto need_help;
2578 }
2579
2580ip_proto_again:
2581 switch (ip_proto) {
2582 case IPPROTO_TCP:
2583 if (!spec->tcp_okay ||
2584 skb->csum_offset != offsetof(struct tcphdr, check))
2585 goto need_help;
2586 break;
2587 case IPPROTO_UDP:
2588 if (!spec->udp_okay ||
2589 skb->csum_offset != offsetof(struct udphdr, check))
2590 goto need_help;
2591 break;
2592 case IPPROTO_SCTP:
2593 if (!spec->sctp_okay ||
2594 skb->csum_offset != offsetof(struct sctphdr, checksum))
2595 goto cant_help;
2596 break;
2597 case NEXTHDR_HOP:
2598 case NEXTHDR_ROUTING:
2599 case NEXTHDR_DEST: {
2600 u8 *opthdr = nhdr;
2601
2602 if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
2603 goto need_help;
2604
2605 ip_proto = opthdr[0];
2606 nhdr += (opthdr[1] + 1) << 3;
2607
2608 goto ip_proto_again;
2609 }
2610 default:
2611 goto need_help;
2612 }
2613
2614
2615 return true;
2616
2617need_help:
2618 if (csum_help && !skb_shinfo(skb)->gso_size)
2619 skb_checksum_help(skb);
2620cant_help:
2621 return false;
2622}
2623EXPORT_SYMBOL(__skb_csum_offload_chk);
2624
2625__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2626{
2627 __be16 type = skb->protocol;
2628
2629
2630 if (type == htons(ETH_P_TEB)) {
2631 struct ethhdr *eth;
2632
2633 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2634 return 0;
2635
2636 eth = (struct ethhdr *)skb_mac_header(skb);
2637 type = eth->h_proto;
2638 }
2639
2640 return __vlan_get_protocol(skb, type, depth);
2641}
2642
2643
2644
2645
2646
2647
2648struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2649 netdev_features_t features)
2650{
2651 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2652 struct packet_offload *ptype;
2653 int vlan_depth = skb->mac_len;
2654 __be16 type = skb_network_protocol(skb, &vlan_depth);
2655
2656 if (unlikely(!type))
2657 return ERR_PTR(-EINVAL);
2658
2659 __skb_pull(skb, vlan_depth);
2660
2661 rcu_read_lock();
2662 list_for_each_entry_rcu(ptype, &offload_base, list) {
2663 if (ptype->type == type && ptype->callbacks.gso_segment) {
2664 segs = ptype->callbacks.gso_segment(skb, features);
2665 break;
2666 }
2667 }
2668 rcu_read_unlock();
2669
2670 __skb_push(skb, skb->data - skb_mac_header(skb));
2671
2672 return segs;
2673}
2674EXPORT_SYMBOL(skb_mac_gso_segment);
2675
2676
2677
2678
2679static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2680{
2681 if (tx_path)
2682 return skb->ip_summed != CHECKSUM_PARTIAL;
2683 else
2684 return skb->ip_summed == CHECKSUM_NONE;
2685}
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2701 netdev_features_t features, bool tx_path)
2702{
2703 if (unlikely(skb_needs_check(skb, tx_path))) {
2704 int err;
2705
2706 skb_warn_bad_offload(skb);
2707
2708 err = skb_cow_head(skb, 0);
2709 if (err < 0)
2710 return ERR_PTR(err);
2711 }
2712
2713
2714
2715
2716
2717 if (features & NETIF_F_GSO_PARTIAL) {
2718 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
2719 struct net_device *dev = skb->dev;
2720
2721 partial_features |= dev->features & dev->gso_partial_features;
2722 if (!skb_gso_ok(skb, features | partial_features))
2723 features &= ~NETIF_F_GSO_PARTIAL;
2724 }
2725
2726 BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
2727 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
2728
2729 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2730 SKB_GSO_CB(skb)->encap_level = 0;
2731
2732 skb_reset_mac_header(skb);
2733 skb_reset_mac_len(skb);
2734
2735 return skb_mac_gso_segment(skb, features);
2736}
2737EXPORT_SYMBOL(__skb_gso_segment);
2738
2739
2740#ifdef CONFIG_BUG
2741void netdev_rx_csum_fault(struct net_device *dev)
2742{
2743 if (net_ratelimit()) {
2744 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2745 dump_stack();
2746 }
2747}
2748EXPORT_SYMBOL(netdev_rx_csum_fault);
2749#endif
2750
2751
2752
2753
2754
2755
2756static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2757{
2758#ifdef CONFIG_HIGHMEM
2759 int i;
2760 if (!(dev->features & NETIF_F_HIGHDMA)) {
2761 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2762 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2763 if (PageHighMem(skb_frag_page(frag)))
2764 return 1;
2765 }
2766 }
2767
2768 if (PCI_DMA_BUS_IS_PHYS) {
2769 struct device *pdev = dev->dev.parent;
2770
2771 if (!pdev)
2772 return 0;
2773 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2774 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2775 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2776 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2777 return 1;
2778 }
2779 }
2780#endif
2781 return 0;
2782}
2783
2784
2785
2786
2787#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
2788static netdev_features_t net_mpls_features(struct sk_buff *skb,
2789 netdev_features_t features,
2790 __be16 type)
2791{
2792 if (eth_p_mpls(type))
2793 features &= skb->dev->mpls_features;
2794
2795 return features;
2796}
2797#else
2798static netdev_features_t net_mpls_features(struct sk_buff *skb,
2799 netdev_features_t features,
2800 __be16 type)
2801{
2802 return features;
2803}
2804#endif
2805
2806static netdev_features_t harmonize_features(struct sk_buff *skb,
2807 netdev_features_t features)
2808{
2809 int tmp;
2810 __be16 type;
2811
2812 type = skb_network_protocol(skb, &tmp);
2813 features = net_mpls_features(skb, features, type);
2814
2815 if (skb->ip_summed != CHECKSUM_NONE &&
2816 !can_checksum_protocol(features, type)) {
2817 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2818 } else if (illegal_highdma(skb->dev, skb)) {
2819 features &= ~NETIF_F_SG;
2820 }
2821
2822 return features;
2823}
2824
2825netdev_features_t passthru_features_check(struct sk_buff *skb,
2826 struct net_device *dev,
2827 netdev_features_t features)
2828{
2829 return features;
2830}
2831EXPORT_SYMBOL(passthru_features_check);
2832
2833static netdev_features_t dflt_features_check(const struct sk_buff *skb,
2834 struct net_device *dev,
2835 netdev_features_t features)
2836{
2837 return vlan_features_check(skb, features);
2838}
2839
2840static netdev_features_t gso_features_check(const struct sk_buff *skb,
2841 struct net_device *dev,
2842 netdev_features_t features)
2843{
2844 u16 gso_segs = skb_shinfo(skb)->gso_segs;
2845
2846 if (gso_segs > dev->gso_max_segs)
2847 return features & ~NETIF_F_GSO_MASK;
2848
2849
2850
2851
2852
2853
2854
2855 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
2856 features &= ~dev->gso_partial_features;
2857
2858
2859
2860
2861 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
2862 struct iphdr *iph = skb->encapsulation ?
2863 inner_ip_hdr(skb) : ip_hdr(skb);
2864
2865 if (!(iph->frag_off & htons(IP_DF)))
2866 features &= ~NETIF_F_TSO_MANGLEID;
2867 }
2868
2869 return features;
2870}
2871
2872netdev_features_t netif_skb_features(struct sk_buff *skb)
2873{
2874 struct net_device *dev = skb->dev;
2875 netdev_features_t features = dev->features;
2876
2877 if (skb_is_gso(skb))
2878 features = gso_features_check(skb, dev, features);
2879
2880
2881
2882
2883
2884 if (skb->encapsulation)
2885 features &= dev->hw_enc_features;
2886
2887 if (skb_vlan_tagged(skb))
2888 features = netdev_intersect_features(features,
2889 dev->vlan_features |
2890 NETIF_F_HW_VLAN_CTAG_TX |
2891 NETIF_F_HW_VLAN_STAG_TX);
2892
2893 if (dev->netdev_ops->ndo_features_check)
2894 features &= dev->netdev_ops->ndo_features_check(skb, dev,
2895 features);
2896 else
2897 features &= dflt_features_check(skb, dev, features);
2898
2899 return harmonize_features(skb, features);
2900}
2901EXPORT_SYMBOL(netif_skb_features);
2902
2903static int xmit_one(struct sk_buff *skb, struct net_device *dev,
2904 struct netdev_queue *txq, bool more)
2905{
2906 unsigned int len;
2907 int rc;
2908
2909 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
2910 dev_queue_xmit_nit(skb, dev);
2911
2912 len = skb->len;
2913 trace_net_dev_start_xmit(skb, dev);
2914 rc = netdev_start_xmit(skb, dev, txq, more);
2915 trace_net_dev_xmit(skb, rc, dev, len);
2916
2917 return rc;
2918}
2919
2920struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
2921 struct netdev_queue *txq, int *ret)
2922{
2923 struct sk_buff *skb = first;
2924 int rc = NETDEV_TX_OK;
2925
2926 while (skb) {
2927 struct sk_buff *next = skb->next;
2928
2929 skb->next = NULL;
2930 rc = xmit_one(skb, dev, txq, next != NULL);
2931 if (unlikely(!dev_xmit_complete(rc))) {
2932 skb->next = next;
2933 goto out;
2934 }
2935
2936 skb = next;
2937 if (netif_xmit_stopped(txq) && skb) {
2938 rc = NETDEV_TX_BUSY;
2939 break;
2940 }
2941 }
2942
2943out:
2944 *ret = rc;
2945 return skb;
2946}
2947
2948static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2949 netdev_features_t features)
2950{
2951 if (skb_vlan_tag_present(skb) &&
2952 !vlan_hw_offload_capable(features, skb->vlan_proto))
2953 skb = __vlan_hwaccel_push_inside(skb);
2954 return skb;
2955}
2956
2957static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
2958{
2959 netdev_features_t features;
2960
2961 features = netif_skb_features(skb);
2962 skb = validate_xmit_vlan(skb, features);
2963 if (unlikely(!skb))
2964 goto out_null;
2965
2966 if (netif_needs_gso(skb, features)) {
2967 struct sk_buff *segs;
2968
2969 segs = skb_gso_segment(skb, features);
2970 if (IS_ERR(segs)) {
2971 goto out_kfree_skb;
2972 } else if (segs) {
2973 consume_skb(skb);
2974 skb = segs;
2975 }
2976 } else {
2977 if (skb_needs_linearize(skb, features) &&
2978 __skb_linearize(skb))
2979 goto out_kfree_skb;
2980
2981
2982
2983
2984
2985 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2986 if (skb->encapsulation)
2987 skb_set_inner_transport_header(skb,
2988 skb_checksum_start_offset(skb));
2989 else
2990 skb_set_transport_header(skb,
2991 skb_checksum_start_offset(skb));
2992 if (!(features & NETIF_F_CSUM_MASK) &&
2993 skb_checksum_help(skb))
2994 goto out_kfree_skb;
2995 }
2996 }
2997
2998 return skb;
2999
3000out_kfree_skb:
3001 kfree_skb(skb);
3002out_null:
3003 atomic_long_inc(&dev->tx_dropped);
3004 return NULL;
3005}
3006
3007struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
3008{
3009 struct sk_buff *next, *head = NULL, *tail;
3010
3011 for (; skb != NULL; skb = next) {
3012 next = skb->next;
3013 skb->next = NULL;
3014
3015
3016 skb->prev = skb;
3017
3018 skb = validate_xmit_skb(skb, dev);
3019 if (!skb)
3020 continue;
3021
3022 if (!head)
3023 head = skb;
3024 else
3025 tail->next = skb;
3026
3027
3028
3029 tail = skb->prev;
3030 }
3031 return head;
3032}
3033EXPORT_SYMBOL_GPL(validate_xmit_skb_list);
3034
3035static void qdisc_pkt_len_init(struct sk_buff *skb)
3036{
3037 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3038
3039 qdisc_skb_cb(skb)->pkt_len = skb->len;
3040
3041
3042
3043
3044 if (shinfo->gso_size) {
3045 unsigned int hdr_len;
3046 u16 gso_segs = shinfo->gso_segs;
3047
3048
3049 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3050
3051
3052 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
3053 hdr_len += tcp_hdrlen(skb);
3054 else
3055 hdr_len += sizeof(struct udphdr);
3056
3057 if (shinfo->gso_type & SKB_GSO_DODGY)
3058 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3059 shinfo->gso_size);
3060
3061 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3062 }
3063}
3064
3065static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3066 struct net_device *dev,
3067 struct netdev_queue *txq)
3068{
3069 spinlock_t *root_lock = qdisc_lock(q);
3070 struct sk_buff *to_free = NULL;
3071 bool contended;
3072 int rc;
3073
3074 qdisc_calculate_pkt_len(skb, q);
3075
3076
3077
3078
3079
3080
3081 contended = qdisc_is_running(q);
3082 if (unlikely(contended))
3083 spin_lock(&q->busylock);
3084
3085 spin_lock(root_lock);
3086 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3087 __qdisc_drop(skb, &to_free);
3088 rc = NET_XMIT_DROP;
3089 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3090 qdisc_run_begin(q)) {
3091
3092
3093
3094
3095
3096
3097 qdisc_bstats_update(q, skb);
3098
3099 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3100 if (unlikely(contended)) {
3101 spin_unlock(&q->busylock);
3102 contended = false;
3103 }
3104 __qdisc_run(q);
3105 } else
3106 qdisc_run_end(q);
3107
3108 rc = NET_XMIT_SUCCESS;
3109 } else {
3110 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3111 if (qdisc_run_begin(q)) {
3112 if (unlikely(contended)) {
3113 spin_unlock(&q->busylock);
3114 contended = false;
3115 }
3116 __qdisc_run(q);
3117 }
3118 }
3119 spin_unlock(root_lock);
3120 if (unlikely(to_free))
3121 kfree_skb_list(to_free);
3122 if (unlikely(contended))
3123 spin_unlock(&q->busylock);
3124 return rc;
3125}
3126
3127#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3128static void skb_update_prio(struct sk_buff *skb)
3129{
3130 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
3131
3132 if (!skb->priority && skb->sk && map) {
3133 unsigned int prioidx =
3134 sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
3135
3136 if (prioidx < map->priomap_len)
3137 skb->priority = map->priomap[prioidx];
3138 }
3139}
3140#else
3141#define skb_update_prio(skb)
3142#endif
3143
3144DEFINE_PER_CPU(int, xmit_recursion);
3145EXPORT_SYMBOL(xmit_recursion);
3146
3147
3148
3149
3150
3151
3152
3153int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3154{
3155 skb_reset_mac_header(skb);
3156 __skb_pull(skb, skb_network_offset(skb));
3157 skb->pkt_type = PACKET_LOOPBACK;
3158 skb->ip_summed = CHECKSUM_UNNECESSARY;
3159 WARN_ON(!skb_dst(skb));
3160 skb_dst_force(skb);
3161 netif_rx_ni(skb);
3162 return 0;
3163}
3164EXPORT_SYMBOL(dev_loopback_xmit);
3165
3166#ifdef CONFIG_NET_EGRESS
3167static struct sk_buff *
3168sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3169{
3170 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
3171 struct tcf_result cl_res;
3172
3173 if (!cl)
3174 return skb;
3175
3176
3177
3178
3179 qdisc_bstats_cpu_update(cl->q, skb);
3180
3181 switch (tc_classify(skb, cl, &cl_res, false)) {
3182 case TC_ACT_OK:
3183 case TC_ACT_RECLASSIFY:
3184 skb->tc_index = TC_H_MIN(cl_res.classid);
3185 break;
3186 case TC_ACT_SHOT:
3187 qdisc_qstats_cpu_drop(cl->q);
3188 *ret = NET_XMIT_DROP;
3189 kfree_skb(skb);
3190 return NULL;
3191 case TC_ACT_STOLEN:
3192 case TC_ACT_QUEUED:
3193 *ret = NET_XMIT_SUCCESS;
3194 consume_skb(skb);
3195 return NULL;
3196 case TC_ACT_REDIRECT:
3197
3198 skb_do_redirect(skb);
3199 *ret = NET_XMIT_SUCCESS;
3200 return NULL;
3201 default:
3202 break;
3203 }
3204
3205 return skb;
3206}
3207#endif
3208
3209static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
3210{
3211#ifdef CONFIG_XPS
3212 struct xps_dev_maps *dev_maps;
3213 struct xps_map *map;
3214 int queue_index = -1;
3215
3216 rcu_read_lock();
3217 dev_maps = rcu_dereference(dev->xps_maps);
3218 if (dev_maps) {
3219 map = rcu_dereference(
3220 dev_maps->cpu_map[skb->sender_cpu - 1]);
3221 if (map) {
3222 if (map->len == 1)
3223 queue_index = map->queues[0];
3224 else
3225 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
3226 map->len)];
3227 if (unlikely(queue_index >= dev->real_num_tx_queues))
3228 queue_index = -1;
3229 }
3230 }
3231 rcu_read_unlock();
3232
3233 return queue_index;
3234#else
3235 return -1;
3236#endif
3237}
3238
3239static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
3240{
3241 struct sock *sk = skb->sk;
3242 int queue_index = sk_tx_queue_get(sk);
3243
3244 if (queue_index < 0 || skb->ooo_okay ||
3245 queue_index >= dev->real_num_tx_queues) {
3246 int new_index = get_xps_queue(dev, skb);
3247 if (new_index < 0)
3248 new_index = skb_tx_hash(dev, skb);
3249
3250 if (queue_index != new_index && sk &&
3251 sk_fullsock(sk) &&
3252 rcu_access_pointer(sk->sk_dst_cache))
3253 sk_tx_queue_set(sk, new_index);
3254
3255 queue_index = new_index;
3256 }
3257
3258 return queue_index;
3259}
3260
3261struct netdev_queue *netdev_pick_tx(struct net_device *dev,
3262 struct sk_buff *skb,
3263 void *accel_priv)
3264{
3265 int queue_index = 0;
3266
3267#ifdef CONFIG_XPS
3268 u32 sender_cpu = skb->sender_cpu - 1;
3269
3270 if (sender_cpu >= (u32)NR_CPUS)
3271 skb->sender_cpu = raw_smp_processor_id() + 1;
3272#endif
3273
3274 if (dev->real_num_tx_queues != 1) {
3275 const struct net_device_ops *ops = dev->netdev_ops;
3276 if (ops->ndo_select_queue)
3277 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
3278 __netdev_pick_tx);
3279 else
3280 queue_index = __netdev_pick_tx(dev, skb);
3281
3282 if (!accel_priv)
3283 queue_index = netdev_cap_txqueue(dev, queue_index);
3284 }
3285
3286 skb_set_queue_mapping(skb, queue_index);
3287 return netdev_get_tx_queue(dev, queue_index);
3288}
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3317{
3318 struct net_device *dev = skb->dev;
3319 struct netdev_queue *txq;
3320 struct Qdisc *q;
3321 int rc = -ENOMEM;
3322
3323 skb_reset_mac_header(skb);
3324
3325 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
3326 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
3327
3328
3329
3330
3331 rcu_read_lock_bh();
3332
3333 skb_update_prio(skb);
3334
3335 qdisc_pkt_len_init(skb);
3336#ifdef CONFIG_NET_CLS_ACT
3337 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3338# ifdef CONFIG_NET_EGRESS
3339 if (static_key_false(&egress_needed)) {
3340 skb = sch_handle_egress(skb, &rc, dev);
3341 if (!skb)
3342 goto out;
3343 }
3344# endif
3345#endif
3346
3347
3348
3349 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
3350 skb_dst_drop(skb);
3351 else
3352 skb_dst_force(skb);
3353
3354 txq = netdev_pick_tx(dev, skb, accel_priv);
3355 q = rcu_dereference_bh(txq->qdisc);
3356
3357 trace_net_dev_queue(skb);
3358 if (q->enqueue) {
3359 rc = __dev_xmit_skb(skb, q, dev, txq);
3360 goto out;
3361 }
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375 if (dev->flags & IFF_UP) {
3376 int cpu = smp_processor_id();
3377
3378 if (txq->xmit_lock_owner != cpu) {
3379 if (unlikely(__this_cpu_read(xmit_recursion) >
3380 XMIT_RECURSION_LIMIT))
3381 goto recursion_alert;
3382
3383 skb = validate_xmit_skb(skb, dev);
3384 if (!skb)
3385 goto out;
3386
3387 HARD_TX_LOCK(dev, txq, cpu);
3388
3389 if (!netif_xmit_stopped(txq)) {
3390 __this_cpu_inc(xmit_recursion);
3391 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
3392 __this_cpu_dec(xmit_recursion);
3393 if (dev_xmit_complete(rc)) {
3394 HARD_TX_UNLOCK(dev, txq);
3395 goto out;
3396 }
3397 }
3398 HARD_TX_UNLOCK(dev, txq);
3399 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
3400 dev->name);
3401 } else {
3402
3403
3404
3405recursion_alert:
3406 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
3407 dev->name);
3408 }
3409 }
3410
3411 rc = -ENETDOWN;
3412 rcu_read_unlock_bh();
3413
3414 atomic_long_inc(&dev->tx_dropped);
3415 kfree_skb_list(skb);
3416 return rc;
3417out:
3418 rcu_read_unlock_bh();
3419 return rc;
3420}
3421
3422int dev_queue_xmit(struct sk_buff *skb)
3423{
3424 return __dev_queue_xmit(skb, NULL);
3425}
3426EXPORT_SYMBOL(dev_queue_xmit);
3427
3428int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
3429{
3430 return __dev_queue_xmit(skb, accel_priv);
3431}
3432EXPORT_SYMBOL(dev_queue_xmit_accel);
3433
3434
3435
3436
3437
3438
3439int netdev_max_backlog __read_mostly = 1000;
3440EXPORT_SYMBOL(netdev_max_backlog);
3441
3442int netdev_tstamp_prequeue __read_mostly = 1;
3443int netdev_budget __read_mostly = 300;
3444int weight_p __read_mostly = 64;
3445
3446
3447static inline void ____napi_schedule(struct softnet_data *sd,
3448 struct napi_struct *napi)
3449{
3450 list_add_tail(&napi->poll_list, &sd->poll_list);
3451 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3452}
3453
3454#ifdef CONFIG_RPS
3455
3456
3457struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3458EXPORT_SYMBOL(rps_sock_flow_table);
3459u32 rps_cpu_mask __read_mostly;
3460EXPORT_SYMBOL(rps_cpu_mask);
3461
3462struct static_key rps_needed __read_mostly;
3463EXPORT_SYMBOL(rps_needed);
3464
3465static struct rps_dev_flow *
3466set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3467 struct rps_dev_flow *rflow, u16 next_cpu)
3468{
3469 if (next_cpu < nr_cpu_ids) {
3470#ifdef CONFIG_RFS_ACCEL
3471 struct netdev_rx_queue *rxqueue;
3472 struct rps_dev_flow_table *flow_table;
3473 struct rps_dev_flow *old_rflow;
3474 u32 flow_id;
3475 u16 rxq_index;
3476 int rc;
3477
3478
3479 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3480 !(dev->features & NETIF_F_NTUPLE))
3481 goto out;
3482 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3483 if (rxq_index == skb_get_rx_queue(skb))
3484 goto out;
3485
3486 rxqueue = dev->_rx + rxq_index;
3487 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3488 if (!flow_table)
3489 goto out;
3490 flow_id = skb_get_hash(skb) & flow_table->mask;
3491 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3492 rxq_index, flow_id);
3493 if (rc < 0)
3494 goto out;
3495 old_rflow = rflow;
3496 rflow = &flow_table->flows[flow_id];
3497 rflow->filter = rc;
3498 if (old_rflow->filter == rflow->filter)
3499 old_rflow->filter = RPS_NO_FILTER;
3500 out:
3501#endif
3502 rflow->last_qtail =
3503 per_cpu(softnet_data, next_cpu).input_queue_head;
3504 }
3505
3506 rflow->cpu = next_cpu;
3507 return rflow;
3508}
3509
3510
3511
3512
3513
3514
3515static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3516 struct rps_dev_flow **rflowp)
3517{
3518 const struct rps_sock_flow_table *sock_flow_table;
3519 struct netdev_rx_queue *rxqueue = dev->_rx;
3520 struct rps_dev_flow_table *flow_table;
3521 struct rps_map *map;
3522 int cpu = -1;
3523 u32 tcpu;
3524 u32 hash;
3525
3526 if (skb_rx_queue_recorded(skb)) {
3527 u16 index = skb_get_rx_queue(skb);
3528
3529 if (unlikely(index >= dev->real_num_rx_queues)) {
3530 WARN_ONCE(dev->real_num_rx_queues > 1,
3531 "%s received packet on queue %u, but number "
3532 "of RX queues is %u\n",
3533 dev->name, index, dev->real_num_rx_queues);
3534 goto done;
3535 }
3536 rxqueue += index;
3537 }
3538
3539
3540
3541 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3542 map = rcu_dereference(rxqueue->rps_map);
3543 if (!flow_table && !map)
3544 goto done;
3545
3546 skb_reset_network_header(skb);
3547 hash = skb_get_hash(skb);
3548 if (!hash)
3549 goto done;
3550
3551 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3552 if (flow_table && sock_flow_table) {
3553 struct rps_dev_flow *rflow;
3554 u32 next_cpu;
3555 u32 ident;
3556
3557
3558 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
3559 if ((ident ^ hash) & ~rps_cpu_mask)
3560 goto try_rps;
3561
3562 next_cpu = ident & rps_cpu_mask;
3563
3564
3565
3566
3567 rflow = &flow_table->flows[hash & flow_table->mask];
3568 tcpu = rflow->cpu;
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581 if (unlikely(tcpu != next_cpu) &&
3582 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
3583 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3584 rflow->last_qtail)) >= 0)) {
3585 tcpu = next_cpu;
3586 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3587 }
3588
3589 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
3590 *rflowp = rflow;
3591 cpu = tcpu;
3592 goto done;
3593 }
3594 }
3595
3596try_rps:
3597
3598 if (map) {
3599 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3600 if (cpu_online(tcpu)) {
3601 cpu = tcpu;
3602 goto done;
3603 }
3604 }
3605
3606done:
3607 return cpu;
3608}
3609
3610#ifdef CONFIG_RFS_ACCEL
3611
3612
3613
3614
3615
3616
3617
3618
3619
3620
3621
3622
3623bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3624 u32 flow_id, u16 filter_id)
3625{
3626 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3627 struct rps_dev_flow_table *flow_table;
3628 struct rps_dev_flow *rflow;
3629 bool expire = true;
3630 unsigned int cpu;
3631
3632 rcu_read_lock();
3633 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3634 if (flow_table && flow_id <= flow_table->mask) {
3635 rflow = &flow_table->flows[flow_id];
3636 cpu = ACCESS_ONCE(rflow->cpu);
3637 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3638 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3639 rflow->last_qtail) <
3640 (int)(10 * flow_table->mask)))
3641 expire = false;
3642 }
3643 rcu_read_unlock();
3644 return expire;
3645}
3646EXPORT_SYMBOL(rps_may_expire_flow);
3647
3648#endif
3649
3650
3651static void rps_trigger_softirq(void *data)
3652{
3653 struct softnet_data *sd = data;
3654
3655 ____napi_schedule(sd, &sd->backlog);
3656 sd->received_rps++;
3657}
3658
3659#endif
3660
3661
3662
3663
3664
3665
3666static int rps_ipi_queued(struct softnet_data *sd)
3667{
3668#ifdef CONFIG_RPS
3669 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
3670
3671 if (sd != mysd) {
3672 sd->rps_ipi_next = mysd->rps_ipi_list;
3673 mysd->rps_ipi_list = sd;
3674
3675 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3676 return 1;
3677 }
3678#endif
3679 return 0;
3680}
3681
3682#ifdef CONFIG_NET_FLOW_LIMIT
3683int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3684#endif
3685
3686static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3687{
3688#ifdef CONFIG_NET_FLOW_LIMIT
3689 struct sd_flow_limit *fl;
3690 struct softnet_data *sd;
3691 unsigned int old_flow, new_flow;
3692
3693 if (qlen < (netdev_max_backlog >> 1))
3694 return false;
3695
3696 sd = this_cpu_ptr(&softnet_data);
3697
3698 rcu_read_lock();
3699 fl = rcu_dereference(sd->flow_limit);
3700 if (fl) {
3701 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3702 old_flow = fl->history[fl->history_head];
3703 fl->history[fl->history_head] = new_flow;
3704
3705 fl->history_head++;
3706 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3707
3708 if (likely(fl->buckets[old_flow]))
3709 fl->buckets[old_flow]--;
3710
3711 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3712 fl->count++;
3713 rcu_read_unlock();
3714 return true;
3715 }
3716 }
3717 rcu_read_unlock();
3718#endif
3719 return false;
3720}
3721
3722
3723
3724
3725
3726static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3727 unsigned int *qtail)
3728{
3729 struct softnet_data *sd;
3730 unsigned long flags;
3731 unsigned int qlen;
3732
3733 sd = &per_cpu(softnet_data, cpu);
3734
3735 local_irq_save(flags);
3736
3737 rps_lock(sd);
3738 if (!netif_running(skb->dev))
3739 goto drop;
3740 qlen = skb_queue_len(&sd->input_pkt_queue);
3741 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3742 if (qlen) {
3743enqueue:
3744 __skb_queue_tail(&sd->input_pkt_queue, skb);
3745 input_queue_tail_incr_save(sd, qtail);
3746 rps_unlock(sd);
3747 local_irq_restore(flags);
3748 return NET_RX_SUCCESS;
3749 }
3750
3751
3752
3753
3754 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3755 if (!rps_ipi_queued(sd))
3756 ____napi_schedule(sd, &sd->backlog);
3757 }
3758 goto enqueue;
3759 }
3760
3761drop:
3762 sd->dropped++;
3763 rps_unlock(sd);
3764
3765 local_irq_restore(flags);
3766
3767 atomic_long_inc(&skb->dev->rx_dropped);
3768 kfree_skb(skb);
3769 return NET_RX_DROP;
3770}
3771
3772static int netif_rx_internal(struct sk_buff *skb)
3773{
3774 int ret;
3775
3776 net_timestamp_check(netdev_tstamp_prequeue, skb);
3777
3778 trace_netif_rx(skb);
3779#ifdef CONFIG_RPS
3780 if (static_key_false(&rps_needed)) {
3781 struct rps_dev_flow voidflow, *rflow = &voidflow;
3782 int cpu;
3783
3784 preempt_disable();
3785 rcu_read_lock();
3786
3787 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3788 if (cpu < 0)
3789 cpu = smp_processor_id();
3790
3791 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3792
3793 rcu_read_unlock();
3794 preempt_enable();
3795 } else
3796#endif
3797 {
3798 unsigned int qtail;
3799 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3800 put_cpu();
3801 }
3802 return ret;
3803}
3804
3805
3806
3807
3808
3809
3810
3811
3812
3813
3814
3815
3816
3817
3818
3819
3820int netif_rx(struct sk_buff *skb)
3821{
3822 trace_netif_rx_entry(skb);
3823
3824 return netif_rx_internal(skb);
3825}
3826EXPORT_SYMBOL(netif_rx);
3827
3828int netif_rx_ni(struct sk_buff *skb)
3829{
3830 int err;
3831
3832 trace_netif_rx_ni_entry(skb);
3833
3834 preempt_disable();
3835 err = netif_rx_internal(skb);
3836 if (local_softirq_pending())
3837 do_softirq();
3838 preempt_enable();
3839
3840 return err;
3841}
3842EXPORT_SYMBOL(netif_rx_ni);
3843
3844static __latent_entropy void net_tx_action(struct softirq_action *h)
3845{
3846 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
3847
3848 if (sd->completion_queue) {
3849 struct sk_buff *clist;
3850
3851 local_irq_disable();
3852 clist = sd->completion_queue;
3853 sd->completion_queue = NULL;
3854 local_irq_enable();
3855
3856 while (clist) {
3857 struct sk_buff *skb = clist;
3858 clist = clist->next;
3859
3860 WARN_ON(atomic_read(&skb->users));
3861 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3862 trace_consume_skb(skb);
3863 else
3864 trace_kfree_skb(skb, net_tx_action);
3865
3866 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3867 __kfree_skb(skb);
3868 else
3869 __kfree_skb_defer(skb);
3870 }
3871
3872 __kfree_skb_flush();
3873 }
3874
3875 if (sd->output_queue) {
3876 struct Qdisc *head;
3877
3878 local_irq_disable();
3879 head = sd->output_queue;
3880 sd->output_queue = NULL;
3881 sd->output_queue_tailp = &sd->output_queue;
3882 local_irq_enable();
3883
3884 while (head) {
3885 struct Qdisc *q = head;
3886 spinlock_t *root_lock;
3887
3888 head = head->next_sched;
3889
3890 root_lock = qdisc_lock(q);
3891 spin_lock(root_lock);
3892
3893
3894
3895 smp_mb__before_atomic();
3896 clear_bit(__QDISC_STATE_SCHED, &q->state);
3897 qdisc_run(q);
3898 spin_unlock(root_lock);
3899 }
3900 }
3901}
3902
3903#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE)
3904
3905int (*br_fdb_test_addr_hook)(struct net_device *dev,
3906 unsigned char *addr) __read_mostly;
3907EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3908#endif
3909
3910static inline struct sk_buff *
3911sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
3912 struct net_device *orig_dev)
3913{
3914#ifdef CONFIG_NET_CLS_ACT
3915 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
3916 struct tcf_result cl_res;
3917
3918
3919
3920
3921
3922
3923 if (!cl)
3924 return skb;
3925 if (*pt_prev) {
3926 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3927 *pt_prev = NULL;
3928 }
3929
3930 qdisc_skb_cb(skb)->pkt_len = skb->len;
3931 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3932 qdisc_bstats_cpu_update(cl->q, skb);
3933
3934 switch (tc_classify(skb, cl, &cl_res, false)) {
3935 case TC_ACT_OK:
3936 case TC_ACT_RECLASSIFY:
3937 skb->tc_index = TC_H_MIN(cl_res.classid);
3938 break;
3939 case TC_ACT_SHOT:
3940 qdisc_qstats_cpu_drop(cl->q);
3941 kfree_skb(skb);
3942 return NULL;
3943 case TC_ACT_STOLEN:
3944 case TC_ACT_QUEUED:
3945 consume_skb(skb);
3946 return NULL;
3947 case TC_ACT_REDIRECT:
3948
3949
3950
3951
3952 __skb_push(skb, skb->mac_len);
3953 skb_do_redirect(skb);
3954 return NULL;
3955 default:
3956 break;
3957 }
3958#endif
3959 return skb;
3960}
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971bool netdev_is_rx_handler_busy(struct net_device *dev)
3972{
3973 ASSERT_RTNL();
3974 return dev && rtnl_dereference(dev->rx_handler);
3975}
3976EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992int netdev_rx_handler_register(struct net_device *dev,
3993 rx_handler_func_t *rx_handler,
3994 void *rx_handler_data)
3995{
3996 ASSERT_RTNL();
3997
3998 if (dev->rx_handler)
3999 return -EBUSY;
4000
4001
4002 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
4003 rcu_assign_pointer(dev->rx_handler, rx_handler);
4004
4005 return 0;
4006}
4007EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017void netdev_rx_handler_unregister(struct net_device *dev)
4018{
4019
4020 ASSERT_RTNL();
4021 RCU_INIT_POINTER(dev->rx_handler, NULL);
4022
4023
4024
4025
4026 synchronize_net();
4027 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
4028}
4029EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
4030
4031
4032
4033
4034
4035static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
4036{
4037 switch (skb->protocol) {
4038 case htons(ETH_P_ARP):
4039 case htons(ETH_P_IP):
4040 case htons(ETH_P_IPV6):
4041 case htons(ETH_P_8021Q):
4042 case htons(ETH_P_8021AD):
4043 return true;
4044 default:
4045 return false;
4046 }
4047}
4048
4049static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
4050 int *ret, struct net_device *orig_dev)
4051{
4052#ifdef CONFIG_NETFILTER_INGRESS
4053 if (nf_hook_ingress_active(skb)) {
4054 int ingress_retval;
4055
4056 if (*pt_prev) {
4057 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4058 *pt_prev = NULL;
4059 }
4060
4061 rcu_read_lock();
4062 ingress_retval = nf_hook_ingress(skb);
4063 rcu_read_unlock();
4064 return ingress_retval;
4065 }
4066#endif
4067 return 0;
4068}
4069
4070static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
4071{
4072 struct packet_type *ptype, *pt_prev;
4073 rx_handler_func_t *rx_handler;
4074 struct net_device *orig_dev;
4075 bool deliver_exact = false;
4076 int ret = NET_RX_DROP;
4077 __be16 type;
4078
4079 net_timestamp_check(!netdev_tstamp_prequeue, skb);
4080
4081 trace_netif_receive_skb(skb);
4082
4083 orig_dev = skb->dev;
4084
4085 skb_reset_network_header(skb);
4086 if (!skb_transport_header_was_set(skb))
4087 skb_reset_transport_header(skb);
4088 skb_reset_mac_len(skb);
4089
4090 pt_prev = NULL;
4091
4092another_round:
4093 skb->skb_iif = skb->dev->ifindex;
4094
4095 __this_cpu_inc(softnet_data.processed);
4096
4097 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
4098 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
4099 skb = skb_vlan_untag(skb);
4100 if (unlikely(!skb))
4101 goto out;
4102 }
4103
4104#ifdef CONFIG_NET_CLS_ACT
4105 if (skb->tc_verd & TC_NCLS) {
4106 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
4107 goto ncls;
4108 }
4109#endif
4110
4111 if (pfmemalloc)
4112 goto skip_taps;
4113
4114 list_for_each_entry_rcu(ptype, &ptype_all, list) {
4115 if (pt_prev)
4116 ret = deliver_skb(skb, pt_prev, orig_dev);
4117 pt_prev = ptype;
4118 }
4119
4120 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
4121 if (pt_prev)
4122 ret = deliver_skb(skb, pt_prev, orig_dev);
4123 pt_prev = ptype;
4124 }
4125
4126skip_taps:
4127#ifdef CONFIG_NET_INGRESS
4128 if (static_key_false(&ingress_needed)) {
4129 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
4130 if (!skb)
4131 goto out;
4132
4133 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
4134 goto out;
4135 }
4136#endif
4137#ifdef CONFIG_NET_CLS_ACT
4138 skb->tc_verd = 0;
4139ncls:
4140#endif
4141 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
4142 goto drop;
4143
4144 if (skb_vlan_tag_present(skb)) {
4145 if (pt_prev) {
4146 ret = deliver_skb(skb, pt_prev, orig_dev);
4147 pt_prev = NULL;
4148 }
4149 if (vlan_do_receive(&skb))
4150 goto another_round;
4151 else if (unlikely(!skb))
4152 goto out;
4153 }
4154
4155 rx_handler = rcu_dereference(skb->dev->rx_handler);
4156 if (rx_handler) {
4157 if (pt_prev) {
4158 ret = deliver_skb(skb, pt_prev, orig_dev);
4159 pt_prev = NULL;
4160 }
4161 switch (rx_handler(&skb)) {
4162 case RX_HANDLER_CONSUMED:
4163 ret = NET_RX_SUCCESS;
4164 goto out;
4165 case RX_HANDLER_ANOTHER:
4166 goto another_round;
4167 case RX_HANDLER_EXACT:
4168 deliver_exact = true;
4169 case RX_HANDLER_PASS:
4170 break;
4171 default:
4172 BUG();
4173 }
4174 }
4175
4176 if (unlikely(skb_vlan_tag_present(skb))) {
4177 if (skb_vlan_tag_get_id(skb))
4178 skb->pkt_type = PACKET_OTHERHOST;
4179
4180
4181
4182
4183 skb->vlan_tci = 0;
4184 }
4185
4186 type = skb->protocol;
4187
4188
4189 if (likely(!deliver_exact)) {
4190 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4191 &ptype_base[ntohs(type) &
4192 PTYPE_HASH_MASK]);
4193 }
4194
4195 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4196 &orig_dev->ptype_specific);
4197
4198 if (unlikely(skb->dev != orig_dev)) {
4199 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4200 &skb->dev->ptype_specific);
4201 }
4202
4203 if (pt_prev) {
4204 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
4205 goto drop;
4206 else
4207 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4208 } else {
4209drop:
4210 if (!deliver_exact)
4211 atomic_long_inc(&skb->dev->rx_dropped);
4212 else
4213 atomic_long_inc(&skb->dev->rx_nohandler);
4214 kfree_skb(skb);
4215
4216
4217
4218 ret = NET_RX_DROP;
4219 }
4220
4221out:
4222 return ret;
4223}
4224
4225static int __netif_receive_skb(struct sk_buff *skb)
4226{
4227 int ret;
4228
4229 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
4230 unsigned long pflags = current->flags;
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241 current->flags |= PF_MEMALLOC;
4242 ret = __netif_receive_skb_core(skb, true);
4243 tsk_restore_flags(current, pflags, PF_MEMALLOC);
4244 } else
4245 ret = __netif_receive_skb_core(skb, false);
4246
4247 return ret;
4248}
4249
4250static int netif_receive_skb_internal(struct sk_buff *skb)
4251{
4252 int ret;
4253
4254 net_timestamp_check(netdev_tstamp_prequeue, skb);
4255
4256 if (skb_defer_rx_timestamp(skb))
4257 return NET_RX_SUCCESS;
4258
4259 rcu_read_lock();
4260
4261#ifdef CONFIG_RPS
4262 if (static_key_false(&rps_needed)) {
4263 struct rps_dev_flow voidflow, *rflow = &voidflow;
4264 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
4265
4266 if (cpu >= 0) {
4267 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4268 rcu_read_unlock();
4269 return ret;
4270 }
4271 }
4272#endif
4273 ret = __netif_receive_skb(skb);
4274 rcu_read_unlock();
4275 return ret;
4276}
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291
4292
4293int netif_receive_skb(struct sk_buff *skb)
4294{
4295 trace_netif_receive_skb_entry(skb);
4296
4297 return netif_receive_skb_internal(skb);
4298}
4299EXPORT_SYMBOL(netif_receive_skb);
4300
4301DEFINE_PER_CPU(struct work_struct, flush_works);
4302
4303
4304static void flush_backlog(struct work_struct *work)
4305{
4306 struct sk_buff *skb, *tmp;
4307 struct softnet_data *sd;
4308
4309 local_bh_disable();
4310 sd = this_cpu_ptr(&softnet_data);
4311
4312 local_irq_disable();
4313 rps_lock(sd);
4314 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
4315 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
4316 __skb_unlink(skb, &sd->input_pkt_queue);
4317 kfree_skb(skb);
4318 input_queue_head_incr(sd);
4319 }
4320 }
4321 rps_unlock(sd);
4322 local_irq_enable();
4323
4324 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
4325 if (skb->dev->reg_state == NETREG_UNREGISTERING) {
4326 __skb_unlink(skb, &sd->process_queue);
4327 kfree_skb(skb);
4328 input_queue_head_incr(sd);
4329 }
4330 }
4331 local_bh_enable();
4332}
4333
4334static void flush_all_backlogs(void)
4335{
4336 unsigned int cpu;
4337
4338 get_online_cpus();
4339
4340 for_each_online_cpu(cpu)
4341 queue_work_on(cpu, system_highpri_wq,
4342 per_cpu_ptr(&flush_works, cpu));
4343
4344 for_each_online_cpu(cpu)
4345 flush_work(per_cpu_ptr(&flush_works, cpu));
4346
4347 put_online_cpus();
4348}
4349
4350static int napi_gro_complete(struct sk_buff *skb)
4351{
4352 struct packet_offload *ptype;
4353 __be16 type = skb->protocol;
4354 struct list_head *head = &offload_base;
4355 int err = -ENOENT;
4356
4357 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
4358
4359 if (NAPI_GRO_CB(skb)->count == 1) {
4360 skb_shinfo(skb)->gso_size = 0;
4361 goto out;
4362 }
4363
4364 rcu_read_lock();
4365 list_for_each_entry_rcu(ptype, head, list) {
4366 if (ptype->type != type || !ptype->callbacks.gro_complete)
4367 continue;
4368
4369 err = ptype->callbacks.gro_complete(skb, 0);
4370 break;
4371 }
4372 rcu_read_unlock();
4373
4374 if (err) {
4375 WARN_ON(&ptype->list == head);
4376 kfree_skb(skb);
4377 return NET_RX_SUCCESS;
4378 }
4379
4380out:
4381 return netif_receive_skb_internal(skb);
4382}
4383
4384
4385
4386
4387
4388void napi_gro_flush(struct napi_struct *napi, bool flush_old)
4389{
4390 struct sk_buff *skb, *prev = NULL;
4391
4392
4393 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
4394 skb->prev = prev;
4395 prev = skb;
4396 }
4397
4398 for (skb = prev; skb; skb = prev) {
4399 skb->next = NULL;
4400
4401 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
4402 return;
4403
4404 prev = skb->prev;
4405 napi_gro_complete(skb);
4406 napi->gro_count--;
4407 }
4408
4409 napi->gro_list = NULL;
4410}
4411EXPORT_SYMBOL(napi_gro_flush);
4412
4413static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4414{
4415 struct sk_buff *p;
4416 unsigned int maclen = skb->dev->hard_header_len;
4417 u32 hash = skb_get_hash_raw(skb);
4418
4419 for (p = napi->gro_list; p; p = p->next) {
4420 unsigned long diffs;
4421
4422 NAPI_GRO_CB(p)->flush = 0;
4423
4424 if (hash != skb_get_hash_raw(p)) {
4425 NAPI_GRO_CB(p)->same_flow = 0;
4426 continue;
4427 }
4428
4429 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4430 diffs |= p->vlan_tci ^ skb->vlan_tci;
4431 diffs |= skb_metadata_dst_cmp(p, skb);
4432 if (maclen == ETH_HLEN)
4433 diffs |= compare_ether_header(skb_mac_header(p),
4434 skb_mac_header(skb));
4435 else if (!diffs)
4436 diffs = memcmp(skb_mac_header(p),
4437 skb_mac_header(skb),
4438 maclen);
4439 NAPI_GRO_CB(p)->same_flow = !diffs;
4440 }
4441}
4442
4443static void skb_gro_reset_offset(struct sk_buff *skb)
4444{
4445 const struct skb_shared_info *pinfo = skb_shinfo(skb);
4446 const skb_frag_t *frag0 = &pinfo->frags[0];
4447
4448 NAPI_GRO_CB(skb)->data_offset = 0;
4449 NAPI_GRO_CB(skb)->frag0 = NULL;
4450 NAPI_GRO_CB(skb)->frag0_len = 0;
4451
4452 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
4453 pinfo->nr_frags &&
4454 !PageHighMem(skb_frag_page(frag0))) {
4455 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
4456 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
4457 }
4458}
4459
4460static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
4461{
4462 struct skb_shared_info *pinfo = skb_shinfo(skb);
4463
4464 BUG_ON(skb->end - skb->tail < grow);
4465
4466 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
4467
4468 skb->data_len -= grow;
4469 skb->tail += grow;
4470
4471 pinfo->frags[0].page_offset += grow;
4472 skb_frag_size_sub(&pinfo->frags[0], grow);
4473
4474 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
4475 skb_frag_unref(skb, 0);
4476 memmove(pinfo->frags, pinfo->frags + 1,
4477 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
4478 }
4479}
4480
4481static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4482{
4483 struct sk_buff **pp = NULL;
4484 struct packet_offload *ptype;
4485 __be16 type = skb->protocol;
4486 struct list_head *head = &offload_base;
4487 int same_flow;
4488 enum gro_result ret;
4489 int grow;
4490
4491 if (!(skb->dev->features & NETIF_F_GRO))
4492 goto normal;
4493
4494 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
4495 goto normal;
4496
4497 gro_list_prepare(napi, skb);
4498
4499 rcu_read_lock();
4500 list_for_each_entry_rcu(ptype, head, list) {
4501 if (ptype->type != type || !ptype->callbacks.gro_receive)
4502 continue;
4503
4504 skb_set_network_header(skb, skb_gro_offset(skb));
4505 skb_reset_mac_len(skb);
4506 NAPI_GRO_CB(skb)->same_flow = 0;
4507 NAPI_GRO_CB(skb)->flush = 0;
4508 NAPI_GRO_CB(skb)->free = 0;
4509 NAPI_GRO_CB(skb)->encap_mark = 0;
4510 NAPI_GRO_CB(skb)->recursion_counter = 0;
4511 NAPI_GRO_CB(skb)->is_fou = 0;
4512 NAPI_GRO_CB(skb)->is_atomic = 1;
4513 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4514
4515
4516 switch (skb->ip_summed) {
4517 case CHECKSUM_COMPLETE:
4518 NAPI_GRO_CB(skb)->csum = skb->csum;
4519 NAPI_GRO_CB(skb)->csum_valid = 1;
4520 NAPI_GRO_CB(skb)->csum_cnt = 0;
4521 break;
4522 case CHECKSUM_UNNECESSARY:
4523 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
4524 NAPI_GRO_CB(skb)->csum_valid = 0;
4525 break;
4526 default:
4527 NAPI_GRO_CB(skb)->csum_cnt = 0;
4528 NAPI_GRO_CB(skb)->csum_valid = 0;
4529 }
4530
4531 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
4532 break;
4533 }
4534 rcu_read_unlock();
4535
4536 if (&ptype->list == head)
4537 goto normal;
4538
4539 same_flow = NAPI_GRO_CB(skb)->same_flow;
4540 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
4541
4542 if (pp) {
4543 struct sk_buff *nskb = *pp;
4544
4545 *pp = nskb->next;
4546 nskb->next = NULL;
4547 napi_gro_complete(nskb);
4548 napi->gro_count--;
4549 }
4550
4551 if (same_flow)
4552 goto ok;
4553
4554 if (NAPI_GRO_CB(skb)->flush)
4555 goto normal;
4556
4557 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
4558 struct sk_buff *nskb = napi->gro_list;
4559
4560
4561 while (nskb->next) {
4562 pp = &nskb->next;
4563 nskb = *pp;
4564 }
4565 *pp = NULL;
4566 nskb->next = NULL;
4567 napi_gro_complete(nskb);
4568 } else {
4569 napi->gro_count++;
4570 }
4571 NAPI_GRO_CB(skb)->count = 1;
4572 NAPI_GRO_CB(skb)->age = jiffies;
4573 NAPI_GRO_CB(skb)->last = skb;
4574 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
4575 skb->next = napi->gro_list;
4576 napi->gro_list = skb;
4577 ret = GRO_HELD;
4578
4579pull:
4580 grow = skb_gro_offset(skb) - skb_headlen(skb);
4581 if (grow > 0)
4582 gro_pull_from_frag0(skb, grow);
4583ok:
4584 return ret;
4585
4586normal:
4587 ret = GRO_NORMAL;
4588 goto pull;
4589}
4590
4591struct packet_offload *gro_find_receive_by_type(__be16 type)
4592{
4593 struct list_head *offload_head = &offload_base;
4594 struct packet_offload *ptype;
4595
4596 list_for_each_entry_rcu(ptype, offload_head, list) {
4597 if (ptype->type != type || !ptype->callbacks.gro_receive)
4598 continue;
4599 return ptype;
4600 }
4601 return NULL;
4602}
4603EXPORT_SYMBOL(gro_find_receive_by_type);
4604
4605struct packet_offload *gro_find_complete_by_type(__be16 type)
4606{
4607 struct list_head *offload_head = &offload_base;
4608 struct packet_offload *ptype;
4609
4610 list_for_each_entry_rcu(ptype, offload_head, list) {
4611 if (ptype->type != type || !ptype->callbacks.gro_complete)
4612 continue;
4613 return ptype;
4614 }
4615 return NULL;
4616}
4617EXPORT_SYMBOL(gro_find_complete_by_type);
4618
4619static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4620{
4621 switch (ret) {
4622 case GRO_NORMAL:
4623 if (netif_receive_skb_internal(skb))
4624 ret = GRO_DROP;
4625 break;
4626
4627 case GRO_DROP:
4628 kfree_skb(skb);
4629 break;
4630
4631 case GRO_MERGED_FREE:
4632 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
4633 skb_dst_drop(skb);
4634 kmem_cache_free(skbuff_head_cache, skb);
4635 } else {
4636 __kfree_skb(skb);
4637 }
4638 break;
4639
4640 case GRO_HELD:
4641 case GRO_MERGED:
4642 break;
4643 }
4644
4645 return ret;
4646}
4647
4648gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4649{
4650 skb_mark_napi_id(skb, napi);
4651 trace_napi_gro_receive_entry(skb);
4652
4653 skb_gro_reset_offset(skb);
4654
4655 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4656}
4657EXPORT_SYMBOL(napi_gro_receive);
4658
4659static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
4660{
4661 if (unlikely(skb->pfmemalloc)) {
4662 consume_skb(skb);
4663 return;
4664 }
4665 __skb_pull(skb, skb_headlen(skb));
4666
4667 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
4668 skb->vlan_tci = 0;
4669 skb->dev = napi->dev;
4670 skb->skb_iif = 0;
4671 skb->encapsulation = 0;
4672 skb_shinfo(skb)->gso_type = 0;
4673 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4674
4675 napi->skb = skb;
4676}
4677
4678struct sk_buff *napi_get_frags(struct napi_struct *napi)
4679{
4680 struct sk_buff *skb = napi->skb;
4681
4682 if (!skb) {
4683 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4684 if (skb) {
4685 napi->skb = skb;
4686 skb_mark_napi_id(skb, napi);
4687 }
4688 }
4689 return skb;
4690}
4691EXPORT_SYMBOL(napi_get_frags);
4692
4693static gro_result_t napi_frags_finish(struct napi_struct *napi,
4694 struct sk_buff *skb,
4695 gro_result_t ret)
4696{
4697 switch (ret) {
4698 case GRO_NORMAL:
4699 case GRO_HELD:
4700 __skb_push(skb, ETH_HLEN);
4701 skb->protocol = eth_type_trans(skb, skb->dev);
4702 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4703 ret = GRO_DROP;
4704 break;
4705
4706 case GRO_DROP:
4707 case GRO_MERGED_FREE:
4708 napi_reuse_skb(napi, skb);
4709 break;
4710
4711 case GRO_MERGED:
4712 break;
4713 }
4714
4715 return ret;
4716}
4717
4718
4719
4720
4721
4722static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4723{
4724 struct sk_buff *skb = napi->skb;
4725 const struct ethhdr *eth;
4726 unsigned int hlen = sizeof(*eth);
4727
4728 napi->skb = NULL;
4729
4730 skb_reset_mac_header(skb);
4731 skb_gro_reset_offset(skb);
4732
4733 eth = skb_gro_header_fast(skb, 0);
4734 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4735 eth = skb_gro_header_slow(skb, hlen, 0);
4736 if (unlikely(!eth)) {
4737 net_warn_ratelimited("%s: dropping impossible skb from %s\n",
4738 __func__, napi->dev->name);
4739 napi_reuse_skb(napi, skb);
4740 return NULL;
4741 }
4742 } else {
4743 gro_pull_from_frag0(skb, hlen);
4744 NAPI_GRO_CB(skb)->frag0 += hlen;
4745 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4746 }
4747 __skb_pull(skb, hlen);
4748
4749
4750
4751
4752
4753
4754 skb->protocol = eth->h_proto;
4755
4756 return skb;
4757}
4758
4759gro_result_t napi_gro_frags(struct napi_struct *napi)
4760{
4761 struct sk_buff *skb = napi_frags_skb(napi);
4762
4763 if (!skb)
4764 return GRO_DROP;
4765
4766 trace_napi_gro_frags_entry(skb);
4767
4768 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4769}
4770EXPORT_SYMBOL(napi_gro_frags);
4771
4772
4773
4774
4775__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
4776{
4777 __wsum wsum;
4778 __sum16 sum;
4779
4780 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
4781
4782
4783 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
4784 if (likely(!sum)) {
4785 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
4786 !skb->csum_complete_sw)
4787 netdev_rx_csum_fault(skb->dev);
4788 }
4789
4790 NAPI_GRO_CB(skb)->csum = wsum;
4791 NAPI_GRO_CB(skb)->csum_valid = 1;
4792
4793 return sum;
4794}
4795EXPORT_SYMBOL(__skb_gro_checksum_complete);
4796
4797
4798
4799
4800
4801static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4802{
4803#ifdef CONFIG_RPS
4804 struct softnet_data *remsd = sd->rps_ipi_list;
4805
4806 if (remsd) {
4807 sd->rps_ipi_list = NULL;
4808
4809 local_irq_enable();
4810
4811
4812 while (remsd) {
4813 struct softnet_data *next = remsd->rps_ipi_next;
4814
4815 if (cpu_online(remsd->cpu))
4816 smp_call_function_single_async(remsd->cpu,
4817 &remsd->csd);
4818 remsd = next;
4819 }
4820 } else
4821#endif
4822 local_irq_enable();
4823}
4824
4825static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4826{
4827#ifdef CONFIG_RPS
4828 return sd->rps_ipi_list != NULL;
4829#else
4830 return false;
4831#endif
4832}
4833
4834static int process_backlog(struct napi_struct *napi, int quota)
4835{
4836 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4837 bool again = true;
4838 int work = 0;
4839
4840
4841
4842
4843 if (sd_has_rps_ipi_waiting(sd)) {
4844 local_irq_disable();
4845 net_rps_action_and_irq_enable(sd);
4846 }
4847
4848 napi->weight = weight_p;
4849 while (again) {
4850 struct sk_buff *skb;
4851
4852 while ((skb = __skb_dequeue(&sd->process_queue))) {
4853 rcu_read_lock();
4854 __netif_receive_skb(skb);
4855 rcu_read_unlock();
4856 input_queue_head_incr(sd);
4857 if (++work >= quota)
4858 return work;
4859
4860 }
4861
4862 local_irq_disable();
4863 rps_lock(sd);
4864 if (skb_queue_empty(&sd->input_pkt_queue)) {
4865
4866
4867
4868
4869
4870
4871
4872
4873 napi->state = 0;
4874 again = false;
4875 } else {
4876 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4877 &sd->process_queue);
4878 }
4879 rps_unlock(sd);
4880 local_irq_enable();
4881 }
4882
4883 return work;
4884}
4885
4886
4887
4888
4889
4890
4891
4892
4893void __napi_schedule(struct napi_struct *n)
4894{
4895 unsigned long flags;
4896
4897 local_irq_save(flags);
4898 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4899 local_irq_restore(flags);
4900}
4901EXPORT_SYMBOL(__napi_schedule);
4902
4903
4904
4905
4906
4907
4908
4909void __napi_schedule_irqoff(struct napi_struct *n)
4910{
4911 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4912}
4913EXPORT_SYMBOL(__napi_schedule_irqoff);
4914
4915void __napi_complete(struct napi_struct *n)
4916{
4917 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4918
4919 list_del_init(&n->poll_list);
4920 smp_mb__before_atomic();
4921 clear_bit(NAPI_STATE_SCHED, &n->state);
4922}
4923EXPORT_SYMBOL(__napi_complete);
4924
4925void napi_complete_done(struct napi_struct *n, int work_done)
4926{
4927 unsigned long flags;
4928
4929
4930
4931
4932
4933 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4934 return;
4935
4936 if (n->gro_list) {
4937 unsigned long timeout = 0;
4938
4939 if (work_done)
4940 timeout = n->dev->gro_flush_timeout;
4941
4942 if (timeout)
4943 hrtimer_start(&n->timer, ns_to_ktime(timeout),
4944 HRTIMER_MODE_REL_PINNED);
4945 else
4946 napi_gro_flush(n, false);
4947 }
4948 if (likely(list_empty(&n->poll_list))) {
4949 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4950 } else {
4951
4952 local_irq_save(flags);
4953 __napi_complete(n);
4954 local_irq_restore(flags);
4955 }
4956}
4957EXPORT_SYMBOL(napi_complete_done);
4958
4959
4960static struct napi_struct *napi_by_id(unsigned int napi_id)
4961{
4962 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4963 struct napi_struct *napi;
4964
4965 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4966 if (napi->napi_id == napi_id)
4967 return napi;
4968
4969 return NULL;
4970}
4971
4972#if defined(CONFIG_NET_RX_BUSY_POLL)
4973#define BUSY_POLL_BUDGET 8
4974bool sk_busy_loop(struct sock *sk, int nonblock)
4975{
4976 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4977 int (*busy_poll)(struct napi_struct *dev);
4978 struct napi_struct *napi;
4979 int rc = false;
4980
4981 rcu_read_lock();
4982
4983 napi = napi_by_id(sk->sk_napi_id);
4984 if (!napi)
4985 goto out;
4986
4987
4988 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4989
4990 do {
4991 rc = 0;
4992 local_bh_disable();
4993 if (busy_poll) {
4994 rc = busy_poll(napi);
4995 } else if (napi_schedule_prep(napi)) {
4996 void *have = netpoll_poll_lock(napi);
4997
4998 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
4999 rc = napi->poll(napi, BUSY_POLL_BUDGET);
5000 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
5001 if (rc == BUSY_POLL_BUDGET) {
5002 napi_complete_done(napi, rc);
5003 napi_schedule(napi);
5004 }
5005 }
5006 netpoll_poll_unlock(have);
5007 }
5008 if (rc > 0)
5009 __NET_ADD_STATS(sock_net(sk),
5010 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
5011 local_bh_enable();
5012
5013 if (rc == LL_FLUSH_FAILED)
5014 break;
5015
5016 cpu_relax();
5017 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
5018 !need_resched() && !busy_loop_timeout(end_time));
5019
5020 rc = !skb_queue_empty(&sk->sk_receive_queue);
5021out:
5022 rcu_read_unlock();
5023 return rc;
5024}
5025EXPORT_SYMBOL(sk_busy_loop);
5026
5027#endif
5028
5029void napi_hash_add(struct napi_struct *napi)
5030{
5031 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
5032 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
5033 return;
5034
5035 spin_lock(&napi_hash_lock);
5036
5037
5038 do {
5039 if (unlikely(++napi_gen_id < NR_CPUS + 1))
5040 napi_gen_id = NR_CPUS + 1;
5041 } while (napi_by_id(napi_gen_id));
5042 napi->napi_id = napi_gen_id;
5043
5044 hlist_add_head_rcu(&napi->napi_hash_node,
5045 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
5046
5047 spin_unlock(&napi_hash_lock);
5048}
5049EXPORT_SYMBOL_GPL(napi_hash_add);
5050
5051
5052
5053
5054bool napi_hash_del(struct napi_struct *napi)
5055{
5056 bool rcu_sync_needed = false;
5057
5058 spin_lock(&napi_hash_lock);
5059
5060 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
5061 rcu_sync_needed = true;
5062 hlist_del_rcu(&napi->napi_hash_node);
5063 }
5064 spin_unlock(&napi_hash_lock);
5065 return rcu_sync_needed;
5066}
5067EXPORT_SYMBOL_GPL(napi_hash_del);
5068
5069static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
5070{
5071 struct napi_struct *napi;
5072
5073 napi = container_of(timer, struct napi_struct, timer);
5074 if (napi->gro_list)
5075 napi_schedule(napi);
5076
5077 return HRTIMER_NORESTART;
5078}
5079
5080void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
5081 int (*poll)(struct napi_struct *, int), int weight)
5082{
5083 INIT_LIST_HEAD(&napi->poll_list);
5084 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
5085 napi->timer.function = napi_watchdog;
5086 napi->gro_count = 0;
5087 napi->gro_list = NULL;
5088 napi->skb = NULL;
5089 napi->poll = poll;
5090 if (weight > NAPI_POLL_WEIGHT)
5091 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
5092 weight, dev->name);
5093 napi->weight = weight;
5094 list_add(&napi->dev_list, &dev->napi_list);
5095 napi->dev = dev;
5096#ifdef CONFIG_NETPOLL
5097 spin_lock_init(&napi->poll_lock);
5098 napi->poll_owner = -1;
5099#endif
5100 set_bit(NAPI_STATE_SCHED, &napi->state);
5101 napi_hash_add(napi);
5102}
5103EXPORT_SYMBOL(netif_napi_add);
5104
5105void napi_disable(struct napi_struct *n)
5106{
5107 might_sleep();
5108 set_bit(NAPI_STATE_DISABLE, &n->state);
5109
5110 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
5111 msleep(1);
5112 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
5113 msleep(1);
5114
5115 hrtimer_cancel(&n->timer);
5116
5117 clear_bit(NAPI_STATE_DISABLE, &n->state);
5118}
5119EXPORT_SYMBOL(napi_disable);
5120
5121
5122void netif_napi_del(struct napi_struct *napi)
5123{
5124 might_sleep();
5125 if (napi_hash_del(napi))
5126 synchronize_net();
5127 list_del_init(&napi->dev_list);
5128 napi_free_frags(napi);
5129
5130 kfree_skb_list(napi->gro_list);
5131 napi->gro_list = NULL;
5132 napi->gro_count = 0;
5133}
5134EXPORT_SYMBOL(netif_napi_del);
5135
5136static int napi_poll(struct napi_struct *n, struct list_head *repoll)
5137{
5138 void *have;
5139 int work, weight;
5140
5141 list_del_init(&n->poll_list);
5142
5143 have = netpoll_poll_lock(n);
5144
5145 weight = n->weight;
5146
5147
5148
5149
5150
5151
5152
5153 work = 0;
5154 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
5155 work = n->poll(n, weight);
5156 trace_napi_poll(n, work, weight);
5157 }
5158
5159 WARN_ON_ONCE(work > weight);
5160
5161 if (likely(work < weight))
5162 goto out_unlock;
5163
5164
5165
5166
5167
5168
5169 if (unlikely(napi_disable_pending(n))) {
5170 napi_complete(n);
5171 goto out_unlock;
5172 }
5173
5174 if (n->gro_list) {
5175
5176
5177
5178 napi_gro_flush(n, HZ >= 1000);
5179 }
5180
5181
5182
5183
5184 if (unlikely(!list_empty(&n->poll_list))) {
5185 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
5186 n->dev ? n->dev->name : "backlog");
5187 goto out_unlock;
5188 }
5189
5190 list_add_tail(&n->poll_list, repoll);
5191
5192out_unlock:
5193 netpoll_poll_unlock(have);
5194
5195 return work;
5196}
5197
5198static __latent_entropy void net_rx_action(struct softirq_action *h)
5199{
5200 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
5201 unsigned long time_limit = jiffies + 2;
5202 int budget = netdev_budget;
5203 LIST_HEAD(list);
5204 LIST_HEAD(repoll);
5205
5206 local_irq_disable();
5207 list_splice_init(&sd->poll_list, &list);
5208 local_irq_enable();
5209
5210 for (;;) {
5211 struct napi_struct *n;
5212
5213 if (list_empty(&list)) {
5214 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
5215 return;
5216 break;
5217 }
5218
5219 n = list_first_entry(&list, struct napi_struct, poll_list);
5220 budget -= napi_poll(n, &repoll);
5221
5222
5223
5224
5225
5226 if (unlikely(budget <= 0 ||
5227 time_after_eq(jiffies, time_limit))) {
5228 sd->time_squeeze++;
5229 break;
5230 }
5231 }
5232
5233 __kfree_skb_flush();
5234 local_irq_disable();
5235
5236 list_splice_tail_init(&sd->poll_list, &list);
5237 list_splice_tail(&repoll, &list);
5238 list_splice(&list, &sd->poll_list);
5239 if (!list_empty(&sd->poll_list))
5240 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
5241
5242 net_rps_action_and_irq_enable(sd);
5243}
5244
5245struct netdev_adjacent {
5246 struct net_device *dev;
5247
5248
5249 bool master;
5250
5251
5252 u16 ref_nr;
5253
5254
5255 void *private;
5256
5257 struct list_head list;
5258 struct rcu_head rcu;
5259};
5260
5261static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
5262 struct list_head *adj_list)
5263{
5264 struct netdev_adjacent *adj;
5265
5266 list_for_each_entry(adj, adj_list, list) {
5267 if (adj->dev == adj_dev)
5268 return adj;
5269 }
5270 return NULL;
5271}
5272
5273
5274
5275
5276
5277
5278
5279
5280
5281
5282bool netdev_has_upper_dev(struct net_device *dev,
5283 struct net_device *upper_dev)
5284{
5285 ASSERT_RTNL();
5286
5287 return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
5288}
5289EXPORT_SYMBOL(netdev_has_upper_dev);
5290
5291
5292
5293
5294
5295
5296
5297
5298static bool netdev_has_any_upper_dev(struct net_device *dev)
5299{
5300 ASSERT_RTNL();
5301
5302 return !list_empty(&dev->all_adj_list.upper);
5303}
5304
5305
5306
5307
5308
5309
5310
5311
5312struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
5313{
5314 struct netdev_adjacent *upper;
5315
5316 ASSERT_RTNL();
5317
5318 if (list_empty(&dev->adj_list.upper))
5319 return NULL;
5320
5321 upper = list_first_entry(&dev->adj_list.upper,
5322 struct netdev_adjacent, list);
5323 if (likely(upper->master))
5324 return upper->dev;
5325 return NULL;
5326}
5327EXPORT_SYMBOL(netdev_master_upper_dev_get);
5328
5329void *netdev_adjacent_get_private(struct list_head *adj_list)
5330{
5331 struct netdev_adjacent *adj;
5332
5333 adj = list_entry(adj_list, struct netdev_adjacent, list);
5334
5335 return adj->private;
5336}
5337EXPORT_SYMBOL(netdev_adjacent_get_private);
5338
5339
5340
5341
5342
5343
5344
5345
5346
5347struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
5348 struct list_head **iter)
5349{
5350 struct netdev_adjacent *upper;
5351
5352 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5353
5354 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5355
5356 if (&upper->list == &dev->adj_list.upper)
5357 return NULL;
5358
5359 *iter = &upper->list;
5360
5361 return upper->dev;
5362}
5363EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
5364
5365
5366
5367
5368
5369
5370
5371
5372
5373struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
5374 struct list_head **iter)
5375{
5376 struct netdev_adjacent *upper;
5377
5378 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5379
5380 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5381
5382 if (&upper->list == &dev->all_adj_list.upper)
5383 return NULL;
5384
5385 *iter = &upper->list;
5386
5387 return upper->dev;
5388}
5389EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
5390
5391
5392
5393
5394
5395
5396
5397
5398
5399
5400
5401
5402void *netdev_lower_get_next_private(struct net_device *dev,
5403 struct list_head **iter)
5404{
5405 struct netdev_adjacent *lower;
5406
5407 lower = list_entry(*iter, struct netdev_adjacent, list);
5408
5409 if (&lower->list == &dev->adj_list.lower)
5410 return NULL;
5411
5412 *iter = lower->list.next;
5413
5414 return lower->private;
5415}
5416EXPORT_SYMBOL(netdev_lower_get_next_private);
5417
5418
5419
5420
5421
5422
5423
5424
5425
5426
5427
5428void *netdev_lower_get_next_private_rcu(struct net_device *dev,
5429 struct list_head **iter)
5430{
5431 struct netdev_adjacent *lower;
5432
5433 WARN_ON_ONCE(!rcu_read_lock_held());
5434
5435 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5436
5437 if (&lower->list == &dev->adj_list.lower)
5438 return NULL;
5439
5440 *iter = &lower->list;
5441
5442 return lower->private;
5443}
5444EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
5445
5446
5447
5448
5449
5450
5451
5452
5453
5454
5455
5456
5457void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5458{
5459 struct netdev_adjacent *lower;
5460
5461 lower = list_entry(*iter, struct netdev_adjacent, list);
5462
5463 if (&lower->list == &dev->adj_list.lower)
5464 return NULL;
5465
5466 *iter = lower->list.next;
5467
5468 return lower->dev;
5469}
5470EXPORT_SYMBOL(netdev_lower_get_next);
5471
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
5483{
5484 struct netdev_adjacent *lower;
5485
5486 lower = list_entry(*iter, struct netdev_adjacent, list);
5487
5488 if (&lower->list == &dev->all_adj_list.lower)
5489 return NULL;
5490
5491 *iter = lower->list.next;
5492
5493 return lower->dev;
5494}
5495EXPORT_SYMBOL(netdev_all_lower_get_next);
5496
5497
5498
5499
5500
5501
5502
5503
5504
5505
5506struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
5507 struct list_head **iter)
5508{
5509 struct netdev_adjacent *lower;
5510
5511 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5512
5513 if (&lower->list == &dev->all_adj_list.lower)
5514 return NULL;
5515
5516 *iter = &lower->list;
5517
5518 return lower->dev;
5519}
5520EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531void *netdev_lower_get_first_private_rcu(struct net_device *dev)
5532{
5533 struct netdev_adjacent *lower;
5534
5535 lower = list_first_or_null_rcu(&dev->adj_list.lower,
5536 struct netdev_adjacent, list);
5537 if (lower)
5538 return lower->private;
5539 return NULL;
5540}
5541EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
5542
5543
5544
5545
5546
5547
5548
5549
5550struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
5551{
5552 struct netdev_adjacent *upper;
5553
5554 upper = list_first_or_null_rcu(&dev->adj_list.upper,
5555 struct netdev_adjacent, list);
5556 if (upper && likely(upper->master))
5557 return upper->dev;
5558 return NULL;
5559}
5560EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
5561
5562static int netdev_adjacent_sysfs_add(struct net_device *dev,
5563 struct net_device *adj_dev,
5564 struct list_head *dev_list)
5565{
5566 char linkname[IFNAMSIZ+7];
5567 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5568 "upper_%s" : "lower_%s", adj_dev->name);
5569 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
5570 linkname);
5571}
5572static void netdev_adjacent_sysfs_del(struct net_device *dev,
5573 char *name,
5574 struct list_head *dev_list)
5575{
5576 char linkname[IFNAMSIZ+7];
5577 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5578 "upper_%s" : "lower_%s", name);
5579 sysfs_remove_link(&(dev->dev.kobj), linkname);
5580}
5581
5582static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
5583 struct net_device *adj_dev,
5584 struct list_head *dev_list)
5585{
5586 return (dev_list == &dev->adj_list.upper ||
5587 dev_list == &dev->adj_list.lower) &&
5588 net_eq(dev_net(dev), dev_net(adj_dev));
5589}
5590
5591static int __netdev_adjacent_dev_insert(struct net_device *dev,
5592 struct net_device *adj_dev,
5593 u16 ref_nr,
5594 struct list_head *dev_list,
5595 void *private, bool master)
5596{
5597 struct netdev_adjacent *adj;
5598 int ret;
5599
5600 adj = __netdev_find_adj(adj_dev, dev_list);
5601
5602 if (adj) {
5603 adj->ref_nr += ref_nr;
5604 return 0;
5605 }
5606
5607 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
5608 if (!adj)
5609 return -ENOMEM;
5610
5611 adj->dev = adj_dev;
5612 adj->master = master;
5613 adj->ref_nr = ref_nr;
5614 adj->private = private;
5615 dev_hold(adj_dev);
5616
5617 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
5618 adj_dev->name, dev->name, adj_dev->name);
5619
5620 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
5621 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
5622 if (ret)
5623 goto free_adj;
5624 }
5625
5626
5627 if (master) {
5628 ret = sysfs_create_link(&(dev->dev.kobj),
5629 &(adj_dev->dev.kobj), "master");
5630 if (ret)
5631 goto remove_symlinks;
5632
5633 list_add_rcu(&adj->list, dev_list);
5634 } else {
5635 list_add_tail_rcu(&adj->list, dev_list);
5636 }
5637
5638 return 0;
5639
5640remove_symlinks:
5641 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5642 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5643free_adj:
5644 kfree(adj);
5645 dev_put(adj_dev);
5646
5647 return ret;
5648}
5649
5650static void __netdev_adjacent_dev_remove(struct net_device *dev,
5651 struct net_device *adj_dev,
5652 u16 ref_nr,
5653 struct list_head *dev_list)
5654{
5655 struct netdev_adjacent *adj;
5656
5657 adj = __netdev_find_adj(adj_dev, dev_list);
5658
5659 if (!adj) {
5660 pr_err("tried to remove device %s from %s\n",
5661 dev->name, adj_dev->name);
5662 BUG();
5663 }
5664
5665 if (adj->ref_nr > ref_nr) {
5666 pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name,
5667 ref_nr, adj->ref_nr-ref_nr);
5668 adj->ref_nr -= ref_nr;
5669 return;
5670 }
5671
5672 if (adj->master)
5673 sysfs_remove_link(&(dev->dev.kobj), "master");
5674
5675 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5676 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5677
5678 list_del_rcu(&adj->list);
5679 pr_debug("dev_put for %s, because link removed from %s to %s\n",
5680 adj_dev->name, dev->name, adj_dev->name);
5681 dev_put(adj_dev);
5682 kfree_rcu(adj, rcu);
5683}
5684
5685static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5686 struct net_device *upper_dev,
5687 u16 ref_nr,
5688 struct list_head *up_list,
5689 struct list_head *down_list,
5690 void *private, bool master)
5691{
5692 int ret;
5693
5694 ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list,
5695 private, master);
5696 if (ret)
5697 return ret;
5698
5699 ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list,
5700 private, false);
5701 if (ret) {
5702 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
5703 return ret;
5704 }
5705
5706 return 0;
5707}
5708
5709static int __netdev_adjacent_dev_link(struct net_device *dev,
5710 struct net_device *upper_dev,
5711 u16 ref_nr)
5712{
5713 return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr,
5714 &dev->all_adj_list.upper,
5715 &upper_dev->all_adj_list.lower,
5716 NULL, false);
5717}
5718
5719static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5720 struct net_device *upper_dev,
5721 u16 ref_nr,
5722 struct list_head *up_list,
5723 struct list_head *down_list)
5724{
5725 __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list);
5726 __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list);
5727}
5728
5729static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5730 struct net_device *upper_dev,
5731 u16 ref_nr)
5732{
5733 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr,
5734 &dev->all_adj_list.upper,
5735 &upper_dev->all_adj_list.lower);
5736}
5737
5738static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5739 struct net_device *upper_dev,
5740 void *private, bool master)
5741{
5742 int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1);
5743
5744 if (ret)
5745 return ret;
5746
5747 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1,
5748 &dev->adj_list.upper,
5749 &upper_dev->adj_list.lower,
5750 private, master);
5751 if (ret) {
5752 __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
5753 return ret;
5754 }
5755
5756 return 0;
5757}
5758
5759static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5760 struct net_device *upper_dev)
5761{
5762 __netdev_adjacent_dev_unlink(dev, upper_dev, 1);
5763 __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1,
5764 &dev->adj_list.upper,
5765 &upper_dev->adj_list.lower);
5766}
5767
5768static int __netdev_upper_dev_link(struct net_device *dev,
5769 struct net_device *upper_dev, bool master,
5770 void *upper_priv, void *upper_info)
5771{
5772 struct netdev_notifier_changeupper_info changeupper_info;
5773 struct netdev_adjacent *i, *j, *to_i, *to_j;
5774 int ret = 0;
5775
5776 ASSERT_RTNL();
5777
5778 if (dev == upper_dev)
5779 return -EBUSY;
5780
5781
5782 if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
5783 return -EBUSY;
5784
5785 if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
5786 return -EEXIST;
5787
5788 if (master && netdev_master_upper_dev_get(dev))
5789 return -EBUSY;
5790
5791 changeupper_info.upper_dev = upper_dev;
5792 changeupper_info.master = master;
5793 changeupper_info.linking = true;
5794 changeupper_info.upper_info = upper_info;
5795
5796 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5797 &changeupper_info.info);
5798 ret = notifier_to_errno(ret);
5799 if (ret)
5800 return ret;
5801
5802 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
5803 master);
5804 if (ret)
5805 return ret;
5806
5807
5808
5809
5810
5811
5812 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5813 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5814 pr_debug("Interlinking %s with %s, non-neighbour\n",
5815 i->dev->name, j->dev->name);
5816 ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr);
5817 if (ret)
5818 goto rollback_mesh;
5819 }
5820 }
5821
5822
5823 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5824 pr_debug("linking %s's upper device %s with %s\n",
5825 upper_dev->name, i->dev->name, dev->name);
5826 ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr);
5827 if (ret)
5828 goto rollback_upper_mesh;
5829 }
5830
5831
5832 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5833 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5834 i->dev->name, upper_dev->name);
5835 ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr);
5836 if (ret)
5837 goto rollback_lower_mesh;
5838 }
5839
5840 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5841 &changeupper_info.info);
5842 ret = notifier_to_errno(ret);
5843 if (ret)
5844 goto rollback_lower_mesh;
5845
5846 return 0;
5847
5848rollback_lower_mesh:
5849 to_i = i;
5850 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5851 if (i == to_i)
5852 break;
5853 __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
5854 }
5855
5856 i = NULL;
5857
5858rollback_upper_mesh:
5859 to_i = i;
5860 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5861 if (i == to_i)
5862 break;
5863 __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
5864 }
5865
5866 i = j = NULL;
5867
5868rollback_mesh:
5869 to_i = i;
5870 to_j = j;
5871 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5872 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5873 if (i == to_i && j == to_j)
5874 break;
5875 __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
5876 }
5877 if (i == to_i)
5878 break;
5879 }
5880
5881 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5882
5883 return ret;
5884}
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896int netdev_upper_dev_link(struct net_device *dev,
5897 struct net_device *upper_dev)
5898{
5899 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
5900}
5901EXPORT_SYMBOL(netdev_upper_dev_link);
5902
5903
5904
5905
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915
5916int netdev_master_upper_dev_link(struct net_device *dev,
5917 struct net_device *upper_dev,
5918 void *upper_priv, void *upper_info)
5919{
5920 return __netdev_upper_dev_link(dev, upper_dev, true,
5921 upper_priv, upper_info);
5922}
5923EXPORT_SYMBOL(netdev_master_upper_dev_link);
5924
5925
5926
5927
5928
5929
5930
5931
5932
5933void netdev_upper_dev_unlink(struct net_device *dev,
5934 struct net_device *upper_dev)
5935{
5936 struct netdev_notifier_changeupper_info changeupper_info;
5937 struct netdev_adjacent *i, *j;
5938 ASSERT_RTNL();
5939
5940 changeupper_info.upper_dev = upper_dev;
5941 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
5942 changeupper_info.linking = false;
5943
5944 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5945 &changeupper_info.info);
5946
5947 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5948
5949
5950
5951
5952
5953 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5954 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
5955 __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr);
5956
5957
5958
5959
5960 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5961 __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr);
5962
5963 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
5964 __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr);
5965
5966 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5967 &changeupper_info.info);
5968}
5969EXPORT_SYMBOL(netdev_upper_dev_unlink);
5970
5971
5972
5973
5974
5975
5976
5977
5978
5979void netdev_bonding_info_change(struct net_device *dev,
5980 struct netdev_bonding_info *bonding_info)
5981{
5982 struct netdev_notifier_bonding_info info;
5983
5984 memcpy(&info.bonding_info, bonding_info,
5985 sizeof(struct netdev_bonding_info));
5986 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
5987 &info.info);
5988}
5989EXPORT_SYMBOL(netdev_bonding_info_change);
5990
5991static void netdev_adjacent_add_links(struct net_device *dev)
5992{
5993 struct netdev_adjacent *iter;
5994
5995 struct net *net = dev_net(dev);
5996
5997 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5998 if (!net_eq(net, dev_net(iter->dev)))
5999 continue;
6000 netdev_adjacent_sysfs_add(iter->dev, dev,
6001 &iter->dev->adj_list.lower);
6002 netdev_adjacent_sysfs_add(dev, iter->dev,
6003 &dev->adj_list.upper);
6004 }
6005
6006 list_for_each_entry(iter, &dev->adj_list.lower, list) {
6007 if (!net_eq(net, dev_net(iter->dev)))
6008 continue;
6009 netdev_adjacent_sysfs_add(iter->dev, dev,
6010 &iter->dev->adj_list.upper);
6011 netdev_adjacent_sysfs_add(dev, iter->dev,
6012 &dev->adj_list.lower);
6013 }
6014}
6015
6016static void netdev_adjacent_del_links(struct net_device *dev)
6017{
6018 struct netdev_adjacent *iter;
6019
6020 struct net *net = dev_net(dev);
6021
6022 list_for_each_entry(iter, &dev->adj_list.upper, list) {
6023 if (!net_eq(net, dev_net(iter->dev)))
6024 continue;
6025 netdev_adjacent_sysfs_del(iter->dev, dev->name,
6026 &iter->dev->adj_list.lower);
6027 netdev_adjacent_sysfs_del(dev, iter->dev->name,
6028 &dev->adj_list.upper);
6029 }
6030
6031 list_for_each_entry(iter, &dev->adj_list.lower, list) {
6032 if (!net_eq(net, dev_net(iter->dev)))
6033 continue;
6034 netdev_adjacent_sysfs_del(iter->dev, dev->name,
6035 &iter->dev->adj_list.upper);
6036 netdev_adjacent_sysfs_del(dev, iter->dev->name,
6037 &dev->adj_list.lower);
6038 }
6039}
6040
6041void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
6042{
6043 struct netdev_adjacent *iter;
6044
6045 struct net *net = dev_net(dev);
6046
6047 list_for_each_entry(iter, &dev->adj_list.upper, list) {
6048 if (!net_eq(net, dev_net(iter->dev)))
6049 continue;
6050 netdev_adjacent_sysfs_del(iter->dev, oldname,
6051 &iter->dev->adj_list.lower);
6052 netdev_adjacent_sysfs_add(iter->dev, dev,
6053 &iter->dev->adj_list.lower);
6054 }
6055
6056 list_for_each_entry(iter, &dev->adj_list.lower, list) {
6057 if (!net_eq(net, dev_net(iter->dev)))
6058 continue;
6059 netdev_adjacent_sysfs_del(iter->dev, oldname,
6060 &iter->dev->adj_list.upper);
6061 netdev_adjacent_sysfs_add(iter->dev, dev,
6062 &iter->dev->adj_list.upper);
6063 }
6064}
6065
6066void *netdev_lower_dev_get_private(struct net_device *dev,
6067 struct net_device *lower_dev)
6068{
6069 struct netdev_adjacent *lower;
6070
6071 if (!lower_dev)
6072 return NULL;
6073 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
6074 if (!lower)
6075 return NULL;
6076
6077 return lower->private;
6078}
6079EXPORT_SYMBOL(netdev_lower_dev_get_private);
6080
6081
6082int dev_get_nest_level(struct net_device *dev)
6083{
6084 struct net_device *lower = NULL;
6085 struct list_head *iter;
6086 int max_nest = -1;
6087 int nest;
6088
6089 ASSERT_RTNL();
6090
6091 netdev_for_each_lower_dev(dev, lower, iter) {
6092 nest = dev_get_nest_level(lower);
6093 if (max_nest < nest)
6094 max_nest = nest;
6095 }
6096
6097 return max_nest + 1;
6098}
6099EXPORT_SYMBOL(dev_get_nest_level);
6100
6101
6102
6103
6104
6105
6106
6107
6108
6109void netdev_lower_state_changed(struct net_device *lower_dev,
6110 void *lower_state_info)
6111{
6112 struct netdev_notifier_changelowerstate_info changelowerstate_info;
6113
6114 ASSERT_RTNL();
6115 changelowerstate_info.lower_state_info = lower_state_info;
6116 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
6117 &changelowerstate_info.info);
6118}
6119EXPORT_SYMBOL(netdev_lower_state_changed);
6120
6121int netdev_default_l2upper_neigh_construct(struct net_device *dev,
6122 struct neighbour *n)
6123{
6124 struct net_device *lower_dev, *stop_dev;
6125 struct list_head *iter;
6126 int err;
6127
6128 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6129 if (!lower_dev->netdev_ops->ndo_neigh_construct)
6130 continue;
6131 err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
6132 if (err) {
6133 stop_dev = lower_dev;
6134 goto rollback;
6135 }
6136 }
6137 return 0;
6138
6139rollback:
6140 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6141 if (lower_dev == stop_dev)
6142 break;
6143 if (!lower_dev->netdev_ops->ndo_neigh_destroy)
6144 continue;
6145 lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
6146 }
6147 return err;
6148}
6149EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
6150
6151void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
6152 struct neighbour *n)
6153{
6154 struct net_device *lower_dev;
6155 struct list_head *iter;
6156
6157 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6158 if (!lower_dev->netdev_ops->ndo_neigh_destroy)
6159 continue;
6160 lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
6161 }
6162}
6163EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
6164
6165static void dev_change_rx_flags(struct net_device *dev, int flags)
6166{
6167 const struct net_device_ops *ops = dev->netdev_ops;
6168
6169 if (ops->ndo_change_rx_flags)
6170 ops->ndo_change_rx_flags(dev, flags);
6171}
6172
6173static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
6174{
6175 unsigned int old_flags = dev->flags;
6176 kuid_t uid;
6177 kgid_t gid;
6178
6179 ASSERT_RTNL();
6180
6181 dev->flags |= IFF_PROMISC;
6182 dev->promiscuity += inc;
6183 if (dev->promiscuity == 0) {
6184
6185
6186
6187
6188 if (inc < 0)
6189 dev->flags &= ~IFF_PROMISC;
6190 else {
6191 dev->promiscuity -= inc;
6192 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
6193 dev->name);
6194 return -EOVERFLOW;
6195 }
6196 }
6197 if (dev->flags != old_flags) {
6198 pr_info("device %s %s promiscuous mode\n",
6199 dev->name,
6200 dev->flags & IFF_PROMISC ? "entered" : "left");
6201 if (audit_enabled) {
6202 current_uid_gid(&uid, &gid);
6203 audit_log(current->audit_context, GFP_ATOMIC,
6204 AUDIT_ANOM_PROMISCUOUS,
6205 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
6206 dev->name, (dev->flags & IFF_PROMISC),
6207 (old_flags & IFF_PROMISC),
6208 from_kuid(&init_user_ns, audit_get_loginuid(current)),
6209 from_kuid(&init_user_ns, uid),
6210 from_kgid(&init_user_ns, gid),
6211 audit_get_sessionid(current));
6212 }
6213
6214 dev_change_rx_flags(dev, IFF_PROMISC);
6215 }
6216 if (notify)
6217 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
6218 return 0;
6219}
6220
6221
6222
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232int dev_set_promiscuity(struct net_device *dev, int inc)
6233{
6234 unsigned int old_flags = dev->flags;
6235 int err;
6236
6237 err = __dev_set_promiscuity(dev, inc, true);
6238 if (err < 0)
6239 return err;
6240 if (dev->flags != old_flags)
6241 dev_set_rx_mode(dev);
6242 return err;
6243}
6244EXPORT_SYMBOL(dev_set_promiscuity);
6245
6246static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
6247{
6248 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
6249
6250 ASSERT_RTNL();
6251
6252 dev->flags |= IFF_ALLMULTI;
6253 dev->allmulti += inc;
6254 if (dev->allmulti == 0) {
6255
6256
6257
6258
6259 if (inc < 0)
6260 dev->flags &= ~IFF_ALLMULTI;
6261 else {
6262 dev->allmulti -= inc;
6263 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
6264 dev->name);
6265 return -EOVERFLOW;
6266 }
6267 }
6268 if (dev->flags ^ old_flags) {
6269 dev_change_rx_flags(dev, IFF_ALLMULTI);
6270 dev_set_rx_mode(dev);
6271 if (notify)
6272 __dev_notify_flags(dev, old_flags,
6273 dev->gflags ^ old_gflags);
6274 }
6275 return 0;
6276}
6277
6278
6279
6280
6281
6282
6283
6284
6285
6286
6287
6288
6289
6290
6291int dev_set_allmulti(struct net_device *dev, int inc)
6292{
6293 return __dev_set_allmulti(dev, inc, true);
6294}
6295EXPORT_SYMBOL(dev_set_allmulti);
6296
6297
6298
6299
6300
6301
6302
6303void __dev_set_rx_mode(struct net_device *dev)
6304{
6305 const struct net_device_ops *ops = dev->netdev_ops;
6306
6307
6308 if (!(dev->flags&IFF_UP))
6309 return;
6310
6311 if (!netif_device_present(dev))
6312 return;
6313
6314 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
6315
6316
6317
6318 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
6319 __dev_set_promiscuity(dev, 1, false);
6320 dev->uc_promisc = true;
6321 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
6322 __dev_set_promiscuity(dev, -1, false);
6323 dev->uc_promisc = false;
6324 }
6325 }
6326
6327 if (ops->ndo_set_rx_mode)
6328 ops->ndo_set_rx_mode(dev);
6329}
6330
6331void dev_set_rx_mode(struct net_device *dev)
6332{
6333 netif_addr_lock_bh(dev);
6334 __dev_set_rx_mode(dev);
6335 netif_addr_unlock_bh(dev);
6336}
6337
6338
6339
6340
6341
6342
6343
6344unsigned int dev_get_flags(const struct net_device *dev)
6345{
6346 unsigned int flags;
6347
6348 flags = (dev->flags & ~(IFF_PROMISC |
6349 IFF_ALLMULTI |
6350 IFF_RUNNING |
6351 IFF_LOWER_UP |
6352 IFF_DORMANT)) |
6353 (dev->gflags & (IFF_PROMISC |
6354 IFF_ALLMULTI));
6355
6356 if (netif_running(dev)) {
6357 if (netif_oper_up(dev))
6358 flags |= IFF_RUNNING;
6359 if (netif_carrier_ok(dev))
6360 flags |= IFF_LOWER_UP;
6361 if (netif_dormant(dev))
6362 flags |= IFF_DORMANT;
6363 }
6364
6365 return flags;
6366}
6367EXPORT_SYMBOL(dev_get_flags);
6368
6369int __dev_change_flags(struct net_device *dev, unsigned int flags)
6370{
6371 unsigned int old_flags = dev->flags;
6372 int ret;
6373
6374 ASSERT_RTNL();
6375
6376
6377
6378
6379
6380 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
6381 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
6382 IFF_AUTOMEDIA)) |
6383 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
6384 IFF_ALLMULTI));
6385
6386
6387
6388
6389
6390 if ((old_flags ^ flags) & IFF_MULTICAST)
6391 dev_change_rx_flags(dev, IFF_MULTICAST);
6392
6393 dev_set_rx_mode(dev);
6394
6395
6396
6397
6398
6399
6400
6401 ret = 0;
6402 if ((old_flags ^ flags) & IFF_UP)
6403 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
6404
6405 if ((flags ^ dev->gflags) & IFF_PROMISC) {
6406 int inc = (flags & IFF_PROMISC) ? 1 : -1;
6407 unsigned int old_flags = dev->flags;
6408
6409 dev->gflags ^= IFF_PROMISC;
6410
6411 if (__dev_set_promiscuity(dev, inc, false) >= 0)
6412 if (dev->flags != old_flags)
6413 dev_set_rx_mode(dev);
6414 }
6415
6416
6417
6418
6419
6420 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
6421 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
6422
6423 dev->gflags ^= IFF_ALLMULTI;
6424 __dev_set_allmulti(dev, inc, false);
6425 }
6426
6427 return ret;
6428}
6429
6430void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
6431 unsigned int gchanges)
6432{
6433 unsigned int changes = dev->flags ^ old_flags;
6434
6435 if (gchanges)
6436 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
6437
6438 if (changes & IFF_UP) {
6439 if (dev->flags & IFF_UP)
6440 call_netdevice_notifiers(NETDEV_UP, dev);
6441 else
6442 call_netdevice_notifiers(NETDEV_DOWN, dev);
6443 }
6444
6445 if (dev->flags & IFF_UP &&
6446 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
6447 struct netdev_notifier_change_info change_info;
6448
6449 change_info.flags_changed = changes;
6450 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
6451 &change_info.info);
6452 }
6453}
6454
6455
6456
6457
6458
6459
6460
6461
6462
6463int dev_change_flags(struct net_device *dev, unsigned int flags)
6464{
6465 int ret;
6466 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
6467
6468 ret = __dev_change_flags(dev, flags);
6469 if (ret < 0)
6470 return ret;
6471
6472 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
6473 __dev_notify_flags(dev, old_flags, changes);
6474 return ret;
6475}
6476EXPORT_SYMBOL(dev_change_flags);
6477
6478static int __dev_set_mtu(struct net_device *dev, int new_mtu)
6479{
6480 const struct net_device_ops *ops = dev->netdev_ops;
6481
6482 if (ops->ndo_change_mtu)
6483 return ops->ndo_change_mtu(dev, new_mtu);
6484
6485 dev->mtu = new_mtu;
6486 return 0;
6487}
6488
6489
6490
6491
6492
6493
6494
6495
6496int dev_set_mtu(struct net_device *dev, int new_mtu)
6497{
6498 int err, orig_mtu;
6499
6500 if (new_mtu == dev->mtu)
6501 return 0;
6502
6503
6504 if (new_mtu < 0)
6505 return -EINVAL;
6506
6507 if (!netif_device_present(dev))
6508 return -ENODEV;
6509
6510 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
6511 err = notifier_to_errno(err);
6512 if (err)
6513 return err;
6514
6515 orig_mtu = dev->mtu;
6516 err = __dev_set_mtu(dev, new_mtu);
6517
6518 if (!err) {
6519 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6520 err = notifier_to_errno(err);
6521 if (err) {
6522
6523
6524
6525 __dev_set_mtu(dev, orig_mtu);
6526 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6527 }
6528 }
6529 return err;
6530}
6531EXPORT_SYMBOL(dev_set_mtu);
6532
6533
6534
6535
6536
6537
6538void dev_set_group(struct net_device *dev, int new_group)
6539{
6540 dev->group = new_group;
6541}
6542EXPORT_SYMBOL(dev_set_group);
6543
6544
6545
6546
6547
6548
6549
6550
6551int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
6552{
6553 const struct net_device_ops *ops = dev->netdev_ops;
6554 int err;
6555
6556 if (!ops->ndo_set_mac_address)
6557 return -EOPNOTSUPP;
6558 if (sa->sa_family != dev->type)
6559 return -EINVAL;
6560 if (!netif_device_present(dev))
6561 return -ENODEV;
6562 err = ops->ndo_set_mac_address(dev, sa);
6563 if (err)
6564 return err;
6565 dev->addr_assign_type = NET_ADDR_SET;
6566 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
6567 add_device_randomness(dev->dev_addr, dev->addr_len);
6568 return 0;
6569}
6570EXPORT_SYMBOL(dev_set_mac_address);
6571
6572
6573
6574
6575
6576
6577
6578
6579int dev_change_carrier(struct net_device *dev, bool new_carrier)
6580{
6581 const struct net_device_ops *ops = dev->netdev_ops;
6582
6583 if (!ops->ndo_change_carrier)
6584 return -EOPNOTSUPP;
6585 if (!netif_device_present(dev))
6586 return -ENODEV;
6587 return ops->ndo_change_carrier(dev, new_carrier);
6588}
6589EXPORT_SYMBOL(dev_change_carrier);
6590
6591
6592
6593
6594
6595
6596
6597
6598int dev_get_phys_port_id(struct net_device *dev,
6599 struct netdev_phys_item_id *ppid)
6600{
6601 const struct net_device_ops *ops = dev->netdev_ops;
6602
6603 if (!ops->ndo_get_phys_port_id)
6604 return -EOPNOTSUPP;
6605 return ops->ndo_get_phys_port_id(dev, ppid);
6606}
6607EXPORT_SYMBOL(dev_get_phys_port_id);
6608
6609
6610
6611
6612
6613
6614
6615
6616
6617int dev_get_phys_port_name(struct net_device *dev,
6618 char *name, size_t len)
6619{
6620 const struct net_device_ops *ops = dev->netdev_ops;
6621
6622 if (!ops->ndo_get_phys_port_name)
6623 return -EOPNOTSUPP;
6624 return ops->ndo_get_phys_port_name(dev, name, len);
6625}
6626EXPORT_SYMBOL(dev_get_phys_port_name);
6627
6628
6629
6630
6631
6632
6633
6634
6635
6636int dev_change_proto_down(struct net_device *dev, bool proto_down)
6637{
6638 const struct net_device_ops *ops = dev->netdev_ops;
6639
6640 if (!ops->ndo_change_proto_down)
6641 return -EOPNOTSUPP;
6642 if (!netif_device_present(dev))
6643 return -ENODEV;
6644 return ops->ndo_change_proto_down(dev, proto_down);
6645}
6646EXPORT_SYMBOL(dev_change_proto_down);
6647
6648
6649
6650
6651
6652
6653
6654
6655int dev_change_xdp_fd(struct net_device *dev, int fd)
6656{
6657 const struct net_device_ops *ops = dev->netdev_ops;
6658 struct bpf_prog *prog = NULL;
6659 struct netdev_xdp xdp = {};
6660 int err;
6661
6662 if (!ops->ndo_xdp)
6663 return -EOPNOTSUPP;
6664 if (fd >= 0) {
6665 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
6666 if (IS_ERR(prog))
6667 return PTR_ERR(prog);
6668 }
6669
6670 xdp.command = XDP_SETUP_PROG;
6671 xdp.prog = prog;
6672 err = ops->ndo_xdp(dev, &xdp);
6673 if (err < 0 && prog)
6674 bpf_prog_put(prog);
6675
6676 return err;
6677}
6678EXPORT_SYMBOL(dev_change_xdp_fd);
6679
6680
6681
6682
6683
6684
6685
6686
6687
6688static int dev_new_index(struct net *net)
6689{
6690 int ifindex = net->ifindex;
6691 for (;;) {
6692 if (++ifindex <= 0)
6693 ifindex = 1;
6694 if (!__dev_get_by_index(net, ifindex))
6695 return net->ifindex = ifindex;
6696 }
6697}
6698
6699
6700static LIST_HEAD(net_todo_list);
6701DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
6702
6703static void net_set_todo(struct net_device *dev)
6704{
6705 list_add_tail(&dev->todo_list, &net_todo_list);
6706 dev_net(dev)->dev_unreg_count++;
6707}
6708
6709static void rollback_registered_many(struct list_head *head)
6710{
6711 struct net_device *dev, *tmp;
6712 LIST_HEAD(close_head);
6713
6714 BUG_ON(dev_boot_phase);
6715 ASSERT_RTNL();
6716
6717 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
6718
6719
6720
6721
6722 if (dev->reg_state == NETREG_UNINITIALIZED) {
6723 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
6724 dev->name, dev);
6725
6726 WARN_ON(1);
6727 list_del(&dev->unreg_list);
6728 continue;
6729 }
6730 dev->dismantle = true;
6731 BUG_ON(dev->reg_state != NETREG_REGISTERED);
6732 }
6733
6734
6735 list_for_each_entry(dev, head, unreg_list)
6736 list_add_tail(&dev->close_list, &close_head);
6737 dev_close_many(&close_head, true);
6738
6739 list_for_each_entry(dev, head, unreg_list) {
6740
6741 unlist_netdevice(dev);
6742
6743 dev->reg_state = NETREG_UNREGISTERING;
6744 }
6745 flush_all_backlogs();
6746
6747 synchronize_net();
6748
6749 list_for_each_entry(dev, head, unreg_list) {
6750 struct sk_buff *skb = NULL;
6751
6752
6753 dev_shutdown(dev);
6754
6755
6756
6757
6758
6759 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6760
6761 if (!dev->rtnl_link_ops ||
6762 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
6763 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
6764 GFP_KERNEL);
6765
6766
6767
6768
6769 dev_uc_flush(dev);
6770 dev_mc_flush(dev);
6771
6772 if (dev->netdev_ops->ndo_uninit)
6773 dev->netdev_ops->ndo_uninit(dev);
6774
6775 if (skb)
6776 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
6777
6778
6779 WARN_ON(netdev_has_any_upper_dev(dev));
6780
6781
6782 netdev_unregister_kobject(dev);
6783#ifdef CONFIG_XPS
6784
6785 netif_reset_xps_queues_gt(dev, 0);
6786#endif
6787 }
6788
6789 synchronize_net();
6790
6791 list_for_each_entry(dev, head, unreg_list)
6792 dev_put(dev);
6793}
6794
6795static void rollback_registered(struct net_device *dev)
6796{
6797 LIST_HEAD(single);
6798
6799 list_add(&dev->unreg_list, &single);
6800 rollback_registered_many(&single);
6801 list_del(&single);
6802}
6803
6804static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
6805 struct net_device *upper, netdev_features_t features)
6806{
6807 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6808 netdev_features_t feature;
6809 int feature_bit;
6810
6811 for_each_netdev_feature(&upper_disables, feature_bit) {
6812 feature = __NETIF_F_BIT(feature_bit);
6813 if (!(upper->wanted_features & feature)
6814 && (features & feature)) {
6815 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
6816 &feature, upper->name);
6817 features &= ~feature;
6818 }
6819 }
6820
6821 return features;
6822}
6823
6824static void netdev_sync_lower_features(struct net_device *upper,
6825 struct net_device *lower, netdev_features_t features)
6826{
6827 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6828 netdev_features_t feature;
6829 int feature_bit;
6830
6831 for_each_netdev_feature(&upper_disables, feature_bit) {
6832 feature = __NETIF_F_BIT(feature_bit);
6833 if (!(features & feature) && (lower->features & feature)) {
6834 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
6835 &feature, lower->name);
6836 lower->wanted_features &= ~feature;
6837 netdev_update_features(lower);
6838
6839 if (unlikely(lower->features & feature))
6840 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
6841 &feature, lower->name);
6842 }
6843 }
6844}
6845
6846static netdev_features_t netdev_fix_features(struct net_device *dev,
6847 netdev_features_t features)
6848{
6849
6850 if ((features & NETIF_F_HW_CSUM) &&
6851 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
6852 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
6853 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
6854 }
6855
6856
6857 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
6858 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
6859 features &= ~NETIF_F_ALL_TSO;
6860 }
6861
6862 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
6863 !(features & NETIF_F_IP_CSUM)) {
6864 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
6865 features &= ~NETIF_F_TSO;
6866 features &= ~NETIF_F_TSO_ECN;
6867 }
6868
6869 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
6870 !(features & NETIF_F_IPV6_CSUM)) {
6871 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
6872 features &= ~NETIF_F_TSO6;
6873 }
6874
6875
6876 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
6877 features &= ~NETIF_F_TSO_MANGLEID;
6878
6879
6880 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6881 features &= ~NETIF_F_TSO_ECN;
6882
6883
6884 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
6885 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
6886 features &= ~NETIF_F_GSO;
6887 }
6888
6889
6890 if (features & NETIF_F_UFO) {
6891
6892 if (!(features & NETIF_F_HW_CSUM) &&
6893 ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
6894 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
6895 netdev_dbg(dev,
6896 "Dropping NETIF_F_UFO since no checksum offload features.\n");
6897 features &= ~NETIF_F_UFO;
6898 }
6899
6900 if (!(features & NETIF_F_SG)) {
6901 netdev_dbg(dev,
6902 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
6903 features &= ~NETIF_F_UFO;
6904 }
6905 }
6906
6907
6908 if ((features & dev->gso_partial_features) &&
6909 !(features & NETIF_F_GSO_PARTIAL)) {
6910 netdev_dbg(dev,
6911 "Dropping partially supported GSO features since no GSO partial.\n");
6912 features &= ~dev->gso_partial_features;
6913 }
6914
6915#ifdef CONFIG_NET_RX_BUSY_POLL
6916 if (dev->netdev_ops->ndo_busy_poll)
6917 features |= NETIF_F_BUSY_POLL;
6918 else
6919#endif
6920 features &= ~NETIF_F_BUSY_POLL;
6921
6922 return features;
6923}
6924
6925int __netdev_update_features(struct net_device *dev)
6926{
6927 struct net_device *upper, *lower;
6928 netdev_features_t features;
6929 struct list_head *iter;
6930 int err = -1;
6931
6932 ASSERT_RTNL();
6933
6934 features = netdev_get_wanted_features(dev);
6935
6936 if (dev->netdev_ops->ndo_fix_features)
6937 features = dev->netdev_ops->ndo_fix_features(dev, features);
6938
6939
6940 features = netdev_fix_features(dev, features);
6941
6942
6943 netdev_for_each_upper_dev_rcu(dev, upper, iter)
6944 features = netdev_sync_upper_features(dev, upper, features);
6945
6946 if (dev->features == features)
6947 goto sync_lower;
6948
6949 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6950 &dev->features, &features);
6951
6952 if (dev->netdev_ops->ndo_set_features)
6953 err = dev->netdev_ops->ndo_set_features(dev, features);
6954 else
6955 err = 0;
6956
6957 if (unlikely(err < 0)) {
6958 netdev_err(dev,
6959 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6960 err, &features, &dev->features);
6961
6962
6963
6964 return -1;
6965 }
6966
6967sync_lower:
6968
6969
6970
6971 netdev_for_each_lower_dev(dev, lower, iter)
6972 netdev_sync_lower_features(dev, lower, features);
6973
6974 if (!err)
6975 dev->features = features;
6976
6977 return err < 0 ? 0 : 1;
6978}
6979
6980
6981
6982
6983
6984
6985
6986
6987
6988void netdev_update_features(struct net_device *dev)
6989{
6990 if (__netdev_update_features(dev))
6991 netdev_features_change(dev);
6992}
6993EXPORT_SYMBOL(netdev_update_features);
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003
7004
7005void netdev_change_features(struct net_device *dev)
7006{
7007 __netdev_update_features(dev);
7008 netdev_features_change(dev);
7009}
7010EXPORT_SYMBOL(netdev_change_features);
7011
7012
7013
7014
7015
7016
7017
7018
7019
7020
7021void netif_stacked_transfer_operstate(const struct net_device *rootdev,
7022 struct net_device *dev)
7023{
7024 if (rootdev->operstate == IF_OPER_DORMANT)
7025 netif_dormant_on(dev);
7026 else
7027 netif_dormant_off(dev);
7028
7029 if (netif_carrier_ok(rootdev)) {
7030 if (!netif_carrier_ok(dev))
7031 netif_carrier_on(dev);
7032 } else {
7033 if (netif_carrier_ok(dev))
7034 netif_carrier_off(dev);
7035 }
7036}
7037EXPORT_SYMBOL(netif_stacked_transfer_operstate);
7038
7039#ifdef CONFIG_SYSFS
7040static int netif_alloc_rx_queues(struct net_device *dev)
7041{
7042 unsigned int i, count = dev->num_rx_queues;
7043 struct netdev_rx_queue *rx;
7044 size_t sz = count * sizeof(*rx);
7045
7046 BUG_ON(count < 1);
7047
7048 rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7049 if (!rx) {
7050 rx = vzalloc(sz);
7051 if (!rx)
7052 return -ENOMEM;
7053 }
7054 dev->_rx = rx;
7055
7056 for (i = 0; i < count; i++)
7057 rx[i].dev = dev;
7058 return 0;
7059}
7060#endif
7061
7062static void netdev_init_one_queue(struct net_device *dev,
7063 struct netdev_queue *queue, void *_unused)
7064{
7065
7066 spin_lock_init(&queue->_xmit_lock);
7067 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
7068 queue->xmit_lock_owner = -1;
7069 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
7070 queue->dev = dev;
7071#ifdef CONFIG_BQL
7072 dql_init(&queue->dql, HZ);
7073#endif
7074}
7075
7076static void netif_free_tx_queues(struct net_device *dev)
7077{
7078 kvfree(dev->_tx);
7079}
7080
7081static int netif_alloc_netdev_queues(struct net_device *dev)
7082{
7083 unsigned int count = dev->num_tx_queues;
7084 struct netdev_queue *tx;
7085 size_t sz = count * sizeof(*tx);
7086
7087 if (count < 1 || count > 0xffff)
7088 return -EINVAL;
7089
7090 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7091 if (!tx) {
7092 tx = vzalloc(sz);
7093 if (!tx)
7094 return -ENOMEM;
7095 }
7096 dev->_tx = tx;
7097
7098 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
7099 spin_lock_init(&dev->tx_global_lock);
7100
7101 return 0;
7102}
7103
7104void netif_tx_stop_all_queues(struct net_device *dev)
7105{
7106 unsigned int i;
7107
7108 for (i = 0; i < dev->num_tx_queues; i++) {
7109 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
7110 netif_tx_stop_queue(txq);
7111 }
7112}
7113EXPORT_SYMBOL(netif_tx_stop_all_queues);
7114
7115
7116
7117
7118
7119
7120
7121
7122
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132int register_netdevice(struct net_device *dev)
7133{
7134 int ret;
7135 struct net *net = dev_net(dev);
7136
7137 BUG_ON(dev_boot_phase);
7138 ASSERT_RTNL();
7139
7140 might_sleep();
7141
7142
7143 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
7144 BUG_ON(!net);
7145
7146 spin_lock_init(&dev->addr_list_lock);
7147 netdev_set_addr_lockdep_class(dev);
7148
7149 ret = dev_get_valid_name(net, dev, dev->name);
7150 if (ret < 0)
7151 goto out;
7152
7153
7154 if (dev->netdev_ops->ndo_init) {
7155 ret = dev->netdev_ops->ndo_init(dev);
7156 if (ret) {
7157 if (ret > 0)
7158 ret = -EIO;
7159 goto out;
7160 }
7161 }
7162
7163 if (((dev->hw_features | dev->features) &
7164 NETIF_F_HW_VLAN_CTAG_FILTER) &&
7165 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
7166 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
7167 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
7168 ret = -EINVAL;
7169 goto err_uninit;
7170 }
7171
7172 ret = -EBUSY;
7173 if (!dev->ifindex)
7174 dev->ifindex = dev_new_index(net);
7175 else if (__dev_get_by_index(net, dev->ifindex))
7176 goto err_uninit;
7177
7178
7179
7180
7181 dev->hw_features |= NETIF_F_SOFT_FEATURES;
7182 dev->features |= NETIF_F_SOFT_FEATURES;
7183 dev->wanted_features = dev->features & dev->hw_features;
7184
7185 if (!(dev->flags & IFF_LOOPBACK))
7186 dev->hw_features |= NETIF_F_NOCACHE_COPY;
7187
7188
7189
7190
7191
7192
7193 if (dev->hw_features & NETIF_F_TSO)
7194 dev->hw_features |= NETIF_F_TSO_MANGLEID;
7195 if (dev->vlan_features & NETIF_F_TSO)
7196 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
7197 if (dev->mpls_features & NETIF_F_TSO)
7198 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
7199 if (dev->hw_enc_features & NETIF_F_TSO)
7200 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
7201
7202
7203
7204 dev->vlan_features |= NETIF_F_HIGHDMA;
7205
7206
7207
7208 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
7209
7210
7211
7212 dev->mpls_features |= NETIF_F_SG;
7213
7214 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
7215 ret = notifier_to_errno(ret);
7216 if (ret)
7217 goto err_uninit;
7218
7219 ret = netdev_register_kobject(dev);
7220 if (ret)
7221 goto err_uninit;
7222 dev->reg_state = NETREG_REGISTERED;
7223
7224 __netdev_update_features(dev);
7225
7226
7227
7228
7229
7230
7231 set_bit(__LINK_STATE_PRESENT, &dev->state);
7232
7233 linkwatch_init_dev(dev);
7234
7235 dev_init_scheduler(dev);
7236 dev_hold(dev);
7237 list_netdevice(dev);
7238 add_device_randomness(dev->dev_addr, dev->addr_len);
7239
7240
7241
7242
7243
7244 if (dev->addr_assign_type == NET_ADDR_PERM)
7245 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
7246
7247
7248 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
7249 ret = notifier_to_errno(ret);
7250 if (ret) {
7251 rollback_registered(dev);
7252 dev->reg_state = NETREG_UNREGISTERED;
7253 }
7254
7255
7256
7257
7258 if (!dev->rtnl_link_ops ||
7259 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7260 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7261
7262out:
7263 return ret;
7264
7265err_uninit:
7266 if (dev->netdev_ops->ndo_uninit)
7267 dev->netdev_ops->ndo_uninit(dev);
7268 goto out;
7269}
7270EXPORT_SYMBOL(register_netdevice);
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282int init_dummy_netdev(struct net_device *dev)
7283{
7284
7285
7286
7287
7288
7289 memset(dev, 0, sizeof(struct net_device));
7290
7291
7292
7293
7294 dev->reg_state = NETREG_DUMMY;
7295
7296
7297 INIT_LIST_HEAD(&dev->napi_list);
7298
7299
7300 set_bit(__LINK_STATE_PRESENT, &dev->state);
7301 set_bit(__LINK_STATE_START, &dev->state);
7302
7303
7304
7305
7306
7307
7308 return 0;
7309}
7310EXPORT_SYMBOL_GPL(init_dummy_netdev);
7311
7312
7313
7314
7315
7316
7317
7318
7319
7320
7321
7322
7323
7324
7325
7326int register_netdev(struct net_device *dev)
7327{
7328 int err;
7329
7330 rtnl_lock();
7331 err = register_netdevice(dev);
7332 rtnl_unlock();
7333 return err;
7334}
7335EXPORT_SYMBOL(register_netdev);
7336
7337int netdev_refcnt_read(const struct net_device *dev)
7338{
7339 int i, refcnt = 0;
7340
7341 for_each_possible_cpu(i)
7342 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
7343 return refcnt;
7344}
7345EXPORT_SYMBOL(netdev_refcnt_read);
7346
7347
7348
7349
7350
7351
7352
7353
7354
7355
7356
7357
7358
7359static void netdev_wait_allrefs(struct net_device *dev)
7360{
7361 unsigned long rebroadcast_time, warning_time;
7362 int refcnt;
7363
7364 linkwatch_forget_dev(dev);
7365
7366 rebroadcast_time = warning_time = jiffies;
7367 refcnt = netdev_refcnt_read(dev);
7368
7369 while (refcnt != 0) {
7370 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
7371 rtnl_lock();
7372
7373
7374 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7375
7376 __rtnl_unlock();
7377 rcu_barrier();
7378 rtnl_lock();
7379
7380 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7381 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
7382 &dev->state)) {
7383
7384
7385
7386
7387
7388
7389 linkwatch_run_queue();
7390 }
7391
7392 __rtnl_unlock();
7393
7394 rebroadcast_time = jiffies;
7395 }
7396
7397 msleep(250);
7398
7399 refcnt = netdev_refcnt_read(dev);
7400
7401 if (time_after(jiffies, warning_time + 10 * HZ)) {
7402 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
7403 dev->name, refcnt);
7404 warning_time = jiffies;
7405 }
7406 }
7407}
7408
7409
7410
7411
7412
7413
7414
7415
7416
7417
7418
7419
7420
7421
7422
7423
7424
7425
7426
7427
7428
7429
7430
7431
7432
7433void netdev_run_todo(void)
7434{
7435 struct list_head list;
7436
7437
7438 list_replace_init(&net_todo_list, &list);
7439
7440 __rtnl_unlock();
7441
7442
7443
7444 if (!list_empty(&list))
7445 rcu_barrier();
7446
7447 while (!list_empty(&list)) {
7448 struct net_device *dev
7449 = list_first_entry(&list, struct net_device, todo_list);
7450 list_del(&dev->todo_list);
7451
7452 rtnl_lock();
7453 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7454 __rtnl_unlock();
7455
7456 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
7457 pr_err("network todo '%s' but state %d\n",
7458 dev->name, dev->reg_state);
7459 dump_stack();
7460 continue;
7461 }
7462
7463 dev->reg_state = NETREG_UNREGISTERED;
7464
7465 netdev_wait_allrefs(dev);
7466
7467
7468 BUG_ON(netdev_refcnt_read(dev));
7469 BUG_ON(!list_empty(&dev->ptype_all));
7470 BUG_ON(!list_empty(&dev->ptype_specific));
7471 WARN_ON(rcu_access_pointer(dev->ip_ptr));
7472 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
7473 WARN_ON(dev->dn_ptr);
7474
7475 if (dev->destructor)
7476 dev->destructor(dev);
7477
7478
7479 rtnl_lock();
7480 dev_net(dev)->dev_unreg_count--;
7481 __rtnl_unlock();
7482 wake_up(&netdev_unregistering_wq);
7483
7484
7485 kobject_put(&dev->dev.kobj);
7486 }
7487}
7488
7489
7490
7491
7492
7493
7494void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7495 const struct net_device_stats *netdev_stats)
7496{
7497#if BITS_PER_LONG == 64
7498 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7499 memcpy(stats64, netdev_stats, sizeof(*stats64));
7500
7501 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7502 sizeof(*stats64) - sizeof(*netdev_stats));
7503#else
7504 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7505 const unsigned long *src = (const unsigned long *)netdev_stats;
7506 u64 *dst = (u64 *)stats64;
7507
7508 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7509 for (i = 0; i < n; i++)
7510 dst[i] = src[i];
7511
7512 memset((char *)stats64 + n * sizeof(u64), 0,
7513 sizeof(*stats64) - n * sizeof(u64));
7514#endif
7515}
7516EXPORT_SYMBOL(netdev_stats_to_stats64);
7517
7518
7519
7520
7521
7522
7523
7524
7525
7526
7527
7528struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7529 struct rtnl_link_stats64 *storage)
7530{
7531 const struct net_device_ops *ops = dev->netdev_ops;
7532
7533 if (ops->ndo_get_stats64) {
7534 memset(storage, 0, sizeof(*storage));
7535 ops->ndo_get_stats64(dev, storage);
7536 } else if (ops->ndo_get_stats) {
7537 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
7538 } else {
7539 netdev_stats_to_stats64(storage, &dev->stats);
7540 }
7541 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7542 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7543 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7544 return storage;
7545}
7546EXPORT_SYMBOL(dev_get_stats);
7547
7548struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
7549{
7550 struct netdev_queue *queue = dev_ingress_queue(dev);
7551
7552#ifdef CONFIG_NET_CLS_ACT
7553 if (queue)
7554 return queue;
7555 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
7556 if (!queue)
7557 return NULL;
7558 netdev_init_one_queue(dev, queue, NULL);
7559 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
7560 queue->qdisc_sleeping = &noop_qdisc;
7561 rcu_assign_pointer(dev->ingress_queue, queue);
7562#endif
7563 return queue;
7564}
7565
7566static const struct ethtool_ops default_ethtool_ops;
7567
7568void netdev_set_default_ethtool_ops(struct net_device *dev,
7569 const struct ethtool_ops *ops)
7570{
7571 if (dev->ethtool_ops == &default_ethtool_ops)
7572 dev->ethtool_ops = ops;
7573}
7574EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
7575
7576void netdev_freemem(struct net_device *dev)
7577{
7578 char *addr = (char *)dev - dev->padded;
7579
7580 kvfree(addr);
7581}
7582
7583
7584
7585
7586
7587
7588
7589
7590
7591
7592
7593
7594
7595
7596struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7597 unsigned char name_assign_type,
7598 void (*setup)(struct net_device *),
7599 unsigned int txqs, unsigned int rxqs)
7600{
7601 struct net_device *dev;
7602 size_t alloc_size;
7603 struct net_device *p;
7604
7605 BUG_ON(strlen(name) >= sizeof(dev->name));
7606
7607 if (txqs < 1) {
7608 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
7609 return NULL;
7610 }
7611
7612#ifdef CONFIG_SYSFS
7613 if (rxqs < 1) {
7614 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
7615 return NULL;
7616 }
7617#endif
7618
7619 alloc_size = sizeof(struct net_device);
7620 if (sizeof_priv) {
7621
7622 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
7623 alloc_size += sizeof_priv;
7624 }
7625
7626 alloc_size += NETDEV_ALIGN - 1;
7627
7628 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7629 if (!p)
7630 p = vzalloc(alloc_size);
7631 if (!p)
7632 return NULL;
7633
7634 dev = PTR_ALIGN(p, NETDEV_ALIGN);
7635 dev->padded = (char *)dev - (char *)p;
7636
7637 dev->pcpu_refcnt = alloc_percpu(int);
7638 if (!dev->pcpu_refcnt)
7639 goto free_dev;
7640
7641 if (dev_addr_init(dev))
7642 goto free_pcpu;
7643
7644 dev_mc_init(dev);
7645 dev_uc_init(dev);
7646
7647 dev_net_set(dev, &init_net);
7648
7649 dev->gso_max_size = GSO_MAX_SIZE;
7650 dev->gso_max_segs = GSO_MAX_SEGS;
7651
7652 INIT_LIST_HEAD(&dev->napi_list);
7653 INIT_LIST_HEAD(&dev->unreg_list);
7654 INIT_LIST_HEAD(&dev->close_list);
7655 INIT_LIST_HEAD(&dev->link_watch_list);
7656 INIT_LIST_HEAD(&dev->adj_list.upper);
7657 INIT_LIST_HEAD(&dev->adj_list.lower);
7658 INIT_LIST_HEAD(&dev->all_adj_list.upper);
7659 INIT_LIST_HEAD(&dev->all_adj_list.lower);
7660 INIT_LIST_HEAD(&dev->ptype_all);
7661 INIT_LIST_HEAD(&dev->ptype_specific);
7662#ifdef CONFIG_NET_SCHED
7663 hash_init(dev->qdisc_hash);
7664#endif
7665 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
7666 setup(dev);
7667
7668 if (!dev->tx_queue_len) {
7669 dev->priv_flags |= IFF_NO_QUEUE;
7670 dev->tx_queue_len = 1;
7671 }
7672
7673 dev->num_tx_queues = txqs;
7674 dev->real_num_tx_queues = txqs;
7675 if (netif_alloc_netdev_queues(dev))
7676 goto free_all;
7677
7678#ifdef CONFIG_SYSFS
7679 dev->num_rx_queues = rxqs;
7680 dev->real_num_rx_queues = rxqs;
7681 if (netif_alloc_rx_queues(dev))
7682 goto free_all;
7683#endif
7684
7685 strcpy(dev->name, name);
7686 dev->name_assign_type = name_assign_type;
7687 dev->group = INIT_NETDEV_GROUP;
7688 if (!dev->ethtool_ops)
7689 dev->ethtool_ops = &default_ethtool_ops;
7690
7691 nf_hook_ingress_init(dev);
7692
7693 return dev;
7694
7695free_all:
7696 free_netdev(dev);
7697 return NULL;
7698
7699free_pcpu:
7700 free_percpu(dev->pcpu_refcnt);
7701free_dev:
7702 netdev_freemem(dev);
7703 return NULL;
7704}
7705EXPORT_SYMBOL(alloc_netdev_mqs);
7706
7707
7708
7709
7710
7711
7712
7713
7714
7715
7716void free_netdev(struct net_device *dev)
7717{
7718 struct napi_struct *p, *n;
7719
7720 might_sleep();
7721 netif_free_tx_queues(dev);
7722#ifdef CONFIG_SYSFS
7723 kvfree(dev->_rx);
7724#endif
7725
7726 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
7727
7728
7729 dev_addr_flush(dev);
7730
7731 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
7732 netif_napi_del(p);
7733
7734 free_percpu(dev->pcpu_refcnt);
7735 dev->pcpu_refcnt = NULL;
7736
7737
7738 if (dev->reg_state == NETREG_UNINITIALIZED) {
7739 netdev_freemem(dev);
7740 return;
7741 }
7742
7743 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
7744 dev->reg_state = NETREG_RELEASED;
7745
7746
7747 put_device(&dev->dev);
7748}
7749EXPORT_SYMBOL(free_netdev);
7750
7751
7752
7753
7754
7755
7756
7757void synchronize_net(void)
7758{
7759 might_sleep();
7760 if (rtnl_is_locked())
7761 synchronize_rcu_expedited();
7762 else
7763 synchronize_rcu();
7764}
7765EXPORT_SYMBOL(synchronize_net);
7766
7767
7768
7769
7770
7771
7772
7773
7774
7775
7776
7777
7778
7779
7780void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
7781{
7782 ASSERT_RTNL();
7783
7784 if (head) {
7785 list_move_tail(&dev->unreg_list, head);
7786 } else {
7787 rollback_registered(dev);
7788
7789 net_set_todo(dev);
7790 }
7791}
7792EXPORT_SYMBOL(unregister_netdevice_queue);
7793
7794
7795
7796
7797
7798
7799
7800
7801void unregister_netdevice_many(struct list_head *head)
7802{
7803 struct net_device *dev;
7804
7805 if (!list_empty(head)) {
7806 rollback_registered_many(head);
7807 list_for_each_entry(dev, head, unreg_list)
7808 net_set_todo(dev);
7809 list_del(head);
7810 }
7811}
7812EXPORT_SYMBOL(unregister_netdevice_many);
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825void unregister_netdev(struct net_device *dev)
7826{
7827 rtnl_lock();
7828 unregister_netdevice(dev);
7829 rtnl_unlock();
7830}
7831EXPORT_SYMBOL(unregister_netdev);
7832
7833
7834
7835
7836
7837
7838
7839
7840
7841
7842
7843
7844
7845
7846
7847int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
7848{
7849 int err;
7850
7851 ASSERT_RTNL();
7852
7853
7854 err = -EINVAL;
7855 if (dev->features & NETIF_F_NETNS_LOCAL)
7856 goto out;
7857
7858
7859 if (dev->reg_state != NETREG_REGISTERED)
7860 goto out;
7861
7862
7863 err = 0;
7864 if (net_eq(dev_net(dev), net))
7865 goto out;
7866
7867
7868
7869
7870 err = -EEXIST;
7871 if (__dev_get_by_name(net, dev->name)) {
7872
7873 if (!pat)
7874 goto out;
7875 if (dev_get_valid_name(net, dev, pat) < 0)
7876 goto out;
7877 }
7878
7879
7880
7881
7882
7883
7884 dev_close(dev);
7885
7886
7887 err = -ENODEV;
7888 unlist_netdevice(dev);
7889
7890 synchronize_net();
7891
7892
7893 dev_shutdown(dev);
7894
7895
7896
7897
7898
7899
7900
7901
7902 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7903 rcu_barrier();
7904 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7905 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
7906
7907
7908
7909
7910 dev_uc_flush(dev);
7911 dev_mc_flush(dev);
7912
7913
7914 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
7915 netdev_adjacent_del_links(dev);
7916
7917
7918 dev_net_set(dev, net);
7919
7920
7921 if (__dev_get_by_index(net, dev->ifindex))
7922 dev->ifindex = dev_new_index(net);
7923
7924
7925 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
7926 netdev_adjacent_add_links(dev);
7927
7928
7929 err = device_rename(&dev->dev, dev->name);
7930 WARN_ON(err);
7931
7932
7933 list_netdevice(dev);
7934
7935
7936 call_netdevice_notifiers(NETDEV_REGISTER, dev);
7937
7938
7939
7940
7941
7942 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7943
7944 synchronize_net();
7945 err = 0;
7946out:
7947 return err;
7948}
7949EXPORT_SYMBOL_GPL(dev_change_net_namespace);
7950
7951static int dev_cpu_callback(struct notifier_block *nfb,
7952 unsigned long action,
7953 void *ocpu)
7954{
7955 struct sk_buff **list_skb;
7956 struct sk_buff *skb;
7957 unsigned int cpu, oldcpu = (unsigned long)ocpu;
7958 struct softnet_data *sd, *oldsd;
7959
7960 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
7961 return NOTIFY_OK;
7962
7963 local_irq_disable();
7964 cpu = smp_processor_id();
7965 sd = &per_cpu(softnet_data, cpu);
7966 oldsd = &per_cpu(softnet_data, oldcpu);
7967
7968
7969 list_skb = &sd->completion_queue;
7970 while (*list_skb)
7971 list_skb = &(*list_skb)->next;
7972
7973 *list_skb = oldsd->completion_queue;
7974 oldsd->completion_queue = NULL;
7975
7976
7977 if (oldsd->output_queue) {
7978 *sd->output_queue_tailp = oldsd->output_queue;
7979 sd->output_queue_tailp = oldsd->output_queue_tailp;
7980 oldsd->output_queue = NULL;
7981 oldsd->output_queue_tailp = &oldsd->output_queue;
7982 }
7983
7984
7985
7986
7987 while (!list_empty(&oldsd->poll_list)) {
7988 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
7989 struct napi_struct,
7990 poll_list);
7991
7992 list_del_init(&napi->poll_list);
7993 if (napi->poll == process_backlog)
7994 napi->state = 0;
7995 else
7996 ____napi_schedule(sd, napi);
7997 }
7998
7999 raise_softirq_irqoff(NET_TX_SOFTIRQ);
8000 local_irq_enable();
8001
8002
8003 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
8004 netif_rx_ni(skb);
8005 input_queue_head_incr(oldsd);
8006 }
8007 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
8008 netif_rx_ni(skb);
8009 input_queue_head_incr(oldsd);
8010 }
8011
8012 return NOTIFY_OK;
8013}
8014
8015
8016
8017
8018
8019
8020
8021
8022
8023
8024
8025
8026netdev_features_t netdev_increment_features(netdev_features_t all,
8027 netdev_features_t one, netdev_features_t mask)
8028{
8029 if (mask & NETIF_F_HW_CSUM)
8030 mask |= NETIF_F_CSUM_MASK;
8031 mask |= NETIF_F_VLAN_CHALLENGED;
8032
8033 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
8034 all &= one | ~NETIF_F_ALL_FOR_ALL;
8035
8036
8037 if (all & NETIF_F_HW_CSUM)
8038 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
8039
8040 return all;
8041}
8042EXPORT_SYMBOL(netdev_increment_features);
8043
8044static struct hlist_head * __net_init netdev_create_hash(void)
8045{
8046 int i;
8047 struct hlist_head *hash;
8048
8049 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
8050 if (hash != NULL)
8051 for (i = 0; i < NETDEV_HASHENTRIES; i++)
8052 INIT_HLIST_HEAD(&hash[i]);
8053
8054 return hash;
8055}
8056
8057
8058static int __net_init netdev_init(struct net *net)
8059{
8060 if (net != &init_net)
8061 INIT_LIST_HEAD(&net->dev_base_head);
8062
8063 net->dev_name_head = netdev_create_hash();
8064 if (net->dev_name_head == NULL)
8065 goto err_name;
8066
8067 net->dev_index_head = netdev_create_hash();
8068 if (net->dev_index_head == NULL)
8069 goto err_idx;
8070
8071 return 0;
8072
8073err_idx:
8074 kfree(net->dev_name_head);
8075err_name:
8076 return -ENOMEM;
8077}
8078
8079
8080
8081
8082
8083
8084
8085const char *netdev_drivername(const struct net_device *dev)
8086{
8087 const struct device_driver *driver;
8088 const struct device *parent;
8089 const char *empty = "";
8090
8091 parent = dev->dev.parent;
8092 if (!parent)
8093 return empty;
8094
8095 driver = parent->driver;
8096 if (driver && driver->name)
8097 return driver->name;
8098 return empty;
8099}
8100
8101static void __netdev_printk(const char *level, const struct net_device *dev,
8102 struct va_format *vaf)
8103{
8104 if (dev && dev->dev.parent) {
8105 dev_printk_emit(level[1] - '0',
8106 dev->dev.parent,
8107 "%s %s %s%s: %pV",
8108 dev_driver_string(dev->dev.parent),
8109 dev_name(dev->dev.parent),
8110 netdev_name(dev), netdev_reg_state(dev),
8111 vaf);
8112 } else if (dev) {
8113 printk("%s%s%s: %pV",
8114 level, netdev_name(dev), netdev_reg_state(dev), vaf);
8115 } else {
8116 printk("%s(NULL net_device): %pV", level, vaf);
8117 }
8118}
8119
8120void netdev_printk(const char *level, const struct net_device *dev,
8121 const char *format, ...)
8122{
8123 struct va_format vaf;
8124 va_list args;
8125
8126 va_start(args, format);
8127
8128 vaf.fmt = format;
8129 vaf.va = &args;
8130
8131 __netdev_printk(level, dev, &vaf);
8132
8133 va_end(args);
8134}
8135EXPORT_SYMBOL(netdev_printk);
8136
8137#define define_netdev_printk_level(func, level) \
8138void func(const struct net_device *dev, const char *fmt, ...) \
8139{ \
8140 struct va_format vaf; \
8141 va_list args; \
8142 \
8143 va_start(args, fmt); \
8144 \
8145 vaf.fmt = fmt; \
8146 vaf.va = &args; \
8147 \
8148 __netdev_printk(level, dev, &vaf); \
8149 \
8150 va_end(args); \
8151} \
8152EXPORT_SYMBOL(func);
8153
8154define_netdev_printk_level(netdev_emerg, KERN_EMERG);
8155define_netdev_printk_level(netdev_alert, KERN_ALERT);
8156define_netdev_printk_level(netdev_crit, KERN_CRIT);
8157define_netdev_printk_level(netdev_err, KERN_ERR);
8158define_netdev_printk_level(netdev_warn, KERN_WARNING);
8159define_netdev_printk_level(netdev_notice, KERN_NOTICE);
8160define_netdev_printk_level(netdev_info, KERN_INFO);
8161
8162static void __net_exit netdev_exit(struct net *net)
8163{
8164 kfree(net->dev_name_head);
8165 kfree(net->dev_index_head);
8166}
8167
8168static struct pernet_operations __net_initdata netdev_net_ops = {
8169 .init = netdev_init,
8170 .exit = netdev_exit,
8171};
8172
8173static void __net_exit default_device_exit(struct net *net)
8174{
8175 struct net_device *dev, *aux;
8176
8177
8178
8179
8180 rtnl_lock();
8181 for_each_netdev_safe(net, dev, aux) {
8182 int err;
8183 char fb_name[IFNAMSIZ];
8184
8185
8186 if (dev->features & NETIF_F_NETNS_LOCAL)
8187 continue;
8188
8189
8190 if (dev->rtnl_link_ops)
8191 continue;
8192
8193
8194 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
8195 err = dev_change_net_namespace(dev, &init_net, fb_name);
8196 if (err) {
8197 pr_emerg("%s: failed to move %s to init_net: %d\n",
8198 __func__, dev->name, err);
8199 BUG();
8200 }
8201 }
8202 rtnl_unlock();
8203}
8204
8205static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
8206{
8207
8208
8209
8210 struct net *net;
8211 bool unregistering;
8212 DEFINE_WAIT_FUNC(wait, woken_wake_function);
8213
8214 add_wait_queue(&netdev_unregistering_wq, &wait);
8215 for (;;) {
8216 unregistering = false;
8217 rtnl_lock();
8218 list_for_each_entry(net, net_list, exit_list) {
8219 if (net->dev_unreg_count > 0) {
8220 unregistering = true;
8221 break;
8222 }
8223 }
8224 if (!unregistering)
8225 break;
8226 __rtnl_unlock();
8227
8228 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
8229 }
8230 remove_wait_queue(&netdev_unregistering_wq, &wait);
8231}
8232
8233static void __net_exit default_device_exit_batch(struct list_head *net_list)
8234{
8235
8236
8237
8238
8239
8240 struct net_device *dev;
8241 struct net *net;
8242 LIST_HEAD(dev_kill_list);
8243
8244
8245
8246
8247
8248
8249
8250
8251
8252
8253
8254
8255 rtnl_lock_unregistering(net_list);
8256 list_for_each_entry(net, net_list, exit_list) {
8257 for_each_netdev_reverse(net, dev) {
8258 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
8259 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
8260 else
8261 unregister_netdevice_queue(dev, &dev_kill_list);
8262 }
8263 }
8264 unregister_netdevice_many(&dev_kill_list);
8265 rtnl_unlock();
8266}
8267
8268static struct pernet_operations __net_initdata default_device_ops = {
8269 .exit = default_device_exit,
8270 .exit_batch = default_device_exit_batch,
8271};
8272
8273
8274
8275
8276
8277
8278
8279
8280
8281
8282
8283
8284static int __init net_dev_init(void)
8285{
8286 int i, rc = -ENOMEM;
8287
8288 BUG_ON(!dev_boot_phase);
8289
8290 if (dev_proc_init())
8291 goto out;
8292
8293 if (netdev_kobject_init())
8294 goto out;
8295
8296 INIT_LIST_HEAD(&ptype_all);
8297 for (i = 0; i < PTYPE_HASH_SIZE; i++)
8298 INIT_LIST_HEAD(&ptype_base[i]);
8299
8300 INIT_LIST_HEAD(&offload_base);
8301
8302 if (register_pernet_subsys(&netdev_net_ops))
8303 goto out;
8304
8305
8306
8307
8308
8309 for_each_possible_cpu(i) {
8310 struct work_struct *flush = per_cpu_ptr(&flush_works, i);
8311 struct softnet_data *sd = &per_cpu(softnet_data, i);
8312
8313 INIT_WORK(flush, flush_backlog);
8314
8315 skb_queue_head_init(&sd->input_pkt_queue);
8316 skb_queue_head_init(&sd->process_queue);
8317 INIT_LIST_HEAD(&sd->poll_list);
8318 sd->output_queue_tailp = &sd->output_queue;
8319#ifdef CONFIG_RPS
8320 sd->csd.func = rps_trigger_softirq;
8321 sd->csd.info = sd;
8322 sd->cpu = i;
8323#endif
8324
8325 sd->backlog.poll = process_backlog;
8326 sd->backlog.weight = weight_p;
8327 }
8328
8329 dev_boot_phase = 0;
8330
8331
8332
8333
8334
8335
8336
8337
8338
8339
8340 if (register_pernet_device(&loopback_net_ops))
8341 goto out;
8342
8343 if (register_pernet_device(&default_device_ops))
8344 goto out;
8345
8346 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
8347 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
8348
8349 hotcpu_notifier(dev_cpu_callback, 0);
8350 dst_subsys_init();
8351 rc = 0;
8352out:
8353 return rc;
8354}
8355
8356subsys_initcall(net_dev_init);
8357