1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/stat.h>
101#include <net/dst.h>
102#include <net/pkt_sched.h>
103#include <net/checksum.h>
104#include <net/xfrm.h>
105#include <linux/highmem.h>
106#include <linux/init.h>
107#include <linux/module.h>
108#include <linux/netpoll.h>
109#include <linux/rcupdate.h>
110#include <linux/delay.h>
111#include <net/iw_handler.h>
112#include <asm/current.h>
113#include <linux/audit.h>
114#include <linux/dmaengine.h>
115#include <linux/err.h>
116#include <linux/ctype.h>
117#include <linux/if_arp.h>
118#include <linux/if_vlan.h>
119#include <linux/ip.h>
120#include <net/ip.h>
121#include <net/mpls.h>
122#include <linux/ipv6.h>
123#include <linux/in.h>
124#include <linux/jhash.h>
125#include <linux/random.h>
126#include <trace/events/napi.h>
127#include <trace/events/net.h>
128#include <trace/events/skb.h>
129#include <linux/pci.h>
130#include <linux/inetdevice.h>
131#include <linux/cpu_rmap.h>
132#include <linux/static_key.h>
133#include <linux/hashtable.h>
134#include <linux/vmalloc.h>
135#include <linux/if_macvlan.h>
136#include <linux/errqueue.h>
137#include <linux/hrtimer.h>
138
139#include "net-sysfs.h"
140
141
142#define MAX_GRO_SKBS 8
143
144
145#define GRO_MAX_HEAD (MAX_HEADER + 128)
146
147static DEFINE_SPINLOCK(ptype_lock);
148static DEFINE_SPINLOCK(offload_lock);
149struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
150struct list_head ptype_all __read_mostly;
151static struct list_head offload_base __read_mostly;
152
153static int netif_rx_internal(struct sk_buff *skb);
154static int call_netdevice_notifiers_info(unsigned long val,
155 struct net_device *dev,
156 struct netdev_notifier_info *info);
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177DEFINE_RWLOCK(dev_base_lock);
178EXPORT_SYMBOL(dev_base_lock);
179
180
181static DEFINE_SPINLOCK(napi_hash_lock);
182
183static unsigned int napi_gen_id;
184static DEFINE_HASHTABLE(napi_hash, 8);
185
186static seqcount_t devnet_rename_seq;
187
188static inline void dev_base_seq_inc(struct net *net)
189{
190 while (++net->dev_base_seq == 0);
191}
192
193static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
194{
195 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
196
197 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
198}
199
200static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
201{
202 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
203}
204
205static inline void rps_lock(struct softnet_data *sd)
206{
207#ifdef CONFIG_RPS
208 spin_lock(&sd->input_pkt_queue.lock);
209#endif
210}
211
212static inline void rps_unlock(struct softnet_data *sd)
213{
214#ifdef CONFIG_RPS
215 spin_unlock(&sd->input_pkt_queue.lock);
216#endif
217}
218
219
220static void list_netdevice(struct net_device *dev)
221{
222 struct net *net = dev_net(dev);
223
224 ASSERT_RTNL();
225
226 write_lock_bh(&dev_base_lock);
227 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
228 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
229 hlist_add_head_rcu(&dev->index_hlist,
230 dev_index_hash(net, dev->ifindex));
231 write_unlock_bh(&dev_base_lock);
232
233 dev_base_seq_inc(net);
234}
235
236
237
238
239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243
244 write_lock_bh(&dev_base_lock);
245 list_del_rcu(&dev->dev_list);
246 hlist_del_rcu(&dev->name_hlist);
247 hlist_del_rcu(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
249
250 dev_base_seq_inc(dev_net(dev));
251}
252
253
254
255
256
257static RAW_NOTIFIER_HEAD(netdev_chain);
258
259
260
261
262
263
264DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
265EXPORT_PER_CPU_SYMBOL(softnet_data);
266
267#ifdef CONFIG_LOCKDEP
268
269
270
271
272static const unsigned short netdev_lock_type[] =
273 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
274 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
275 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
276 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
277 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
278 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
279 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
280 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
281 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
282 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
283 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
284 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
285 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
286 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
287 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
288
289static const char *const netdev_lock_name[] =
290 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
291 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
292 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
293 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
294 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
295 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
296 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
297 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
298 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
299 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
300 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
301 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
302 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
303 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
304 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
305
306static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
307static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
308
309static inline unsigned short netdev_lock_pos(unsigned short dev_type)
310{
311 int i;
312
313 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
314 if (netdev_lock_type[i] == dev_type)
315 return i;
316
317 return ARRAY_SIZE(netdev_lock_type) - 1;
318}
319
320static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
321 unsigned short dev_type)
322{
323 int i;
324
325 i = netdev_lock_pos(dev_type);
326 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
327 netdev_lock_name[i]);
328}
329
330static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
331{
332 int i;
333
334 i = netdev_lock_pos(dev->type);
335 lockdep_set_class_and_name(&dev->addr_list_lock,
336 &netdev_addr_lock_key[i],
337 netdev_lock_name[i]);
338}
339#else
340static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
341 unsigned short dev_type)
342{
343}
344static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
345{
346}
347#endif
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371static inline struct list_head *ptype_head(const struct packet_type *pt)
372{
373 if (pt->type == htons(ETH_P_ALL))
374 return &ptype_all;
375 else
376 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
377}
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392void dev_add_pack(struct packet_type *pt)
393{
394 struct list_head *head = ptype_head(pt);
395
396 spin_lock(&ptype_lock);
397 list_add_rcu(&pt->list, head);
398 spin_unlock(&ptype_lock);
399}
400EXPORT_SYMBOL(dev_add_pack);
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415void __dev_remove_pack(struct packet_type *pt)
416{
417 struct list_head *head = ptype_head(pt);
418 struct packet_type *pt1;
419
420 spin_lock(&ptype_lock);
421
422 list_for_each_entry(pt1, head, list) {
423 if (pt == pt1) {
424 list_del_rcu(&pt->list);
425 goto out;
426 }
427 }
428
429 pr_warn("dev_remove_pack: %p not found\n", pt);
430out:
431 spin_unlock(&ptype_lock);
432}
433EXPORT_SYMBOL(__dev_remove_pack);
434
435
436
437
438
439
440
441
442
443
444
445
446
447void dev_remove_pack(struct packet_type *pt)
448{
449 __dev_remove_pack(pt);
450
451 synchronize_net();
452}
453EXPORT_SYMBOL(dev_remove_pack);
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468void dev_add_offload(struct packet_offload *po)
469{
470 struct list_head *head = &offload_base;
471
472 spin_lock(&offload_lock);
473 list_add_rcu(&po->list, head);
474 spin_unlock(&offload_lock);
475}
476EXPORT_SYMBOL(dev_add_offload);
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491static void __dev_remove_offload(struct packet_offload *po)
492{
493 struct list_head *head = &offload_base;
494 struct packet_offload *po1;
495
496 spin_lock(&offload_lock);
497
498 list_for_each_entry(po1, head, list) {
499 if (po == po1) {
500 list_del_rcu(&po->list);
501 goto out;
502 }
503 }
504
505 pr_warn("dev_remove_offload: %p not found\n", po);
506out:
507 spin_unlock(&offload_lock);
508}
509
510
511
512
513
514
515
516
517
518
519
520
521
522void dev_remove_offload(struct packet_offload *po)
523{
524 __dev_remove_offload(po);
525
526 synchronize_net();
527}
528EXPORT_SYMBOL(dev_remove_offload);
529
530
531
532
533
534
535
536
537static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
538
539
540
541
542
543
544
545
546
547
548static int netdev_boot_setup_add(char *name, struct ifmap *map)
549{
550 struct netdev_boot_setup *s;
551 int i;
552
553 s = dev_boot_setup;
554 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
555 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
556 memset(s[i].name, 0, sizeof(s[i].name));
557 strlcpy(s[i].name, name, IFNAMSIZ);
558 memcpy(&s[i].map, map, sizeof(s[i].map));
559 break;
560 }
561 }
562
563 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
564}
565
566
567
568
569
570
571
572
573
574
575int netdev_boot_setup_check(struct net_device *dev)
576{
577 struct netdev_boot_setup *s = dev_boot_setup;
578 int i;
579
580 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
581 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
582 !strcmp(dev->name, s[i].name)) {
583 dev->irq = s[i].map.irq;
584 dev->base_addr = s[i].map.base_addr;
585 dev->mem_start = s[i].map.mem_start;
586 dev->mem_end = s[i].map.mem_end;
587 return 1;
588 }
589 }
590 return 0;
591}
592EXPORT_SYMBOL(netdev_boot_setup_check);
593
594
595
596
597
598
599
600
601
602
603
604
605unsigned long netdev_boot_base(const char *prefix, int unit)
606{
607 const struct netdev_boot_setup *s = dev_boot_setup;
608 char name[IFNAMSIZ];
609 int i;
610
611 sprintf(name, "%s%d", prefix, unit);
612
613
614
615
616
617 if (__dev_get_by_name(&init_net, name))
618 return 1;
619
620 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
621 if (!strcmp(name, s[i].name))
622 return s[i].map.base_addr;
623 return 0;
624}
625
626
627
628
629int __init netdev_boot_setup(char *str)
630{
631 int ints[5];
632 struct ifmap map;
633
634 str = get_options(str, ARRAY_SIZE(ints), ints);
635 if (!str || !*str)
636 return 0;
637
638
639 memset(&map, 0, sizeof(map));
640 if (ints[0] > 0)
641 map.irq = ints[1];
642 if (ints[0] > 1)
643 map.base_addr = ints[2];
644 if (ints[0] > 2)
645 map.mem_start = ints[3];
646 if (ints[0] > 3)
647 map.mem_end = ints[4];
648
649
650 return netdev_boot_setup_add(str, &map);
651}
652
653__setup("netdev=", netdev_boot_setup);
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673struct net_device *__dev_get_by_name(struct net *net, const char *name)
674{
675 struct net_device *dev;
676 struct hlist_head *head = dev_name_hash(net, name);
677
678 hlist_for_each_entry(dev, head, name_hlist)
679 if (!strncmp(dev->name, name, IFNAMSIZ))
680 return dev;
681
682 return NULL;
683}
684EXPORT_SYMBOL(__dev_get_by_name);
685
686
687
688
689
690
691
692
693
694
695
696
697
698struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
699{
700 struct net_device *dev;
701 struct hlist_head *head = dev_name_hash(net, name);
702
703 hlist_for_each_entry_rcu(dev, head, name_hlist)
704 if (!strncmp(dev->name, name, IFNAMSIZ))
705 return dev;
706
707 return NULL;
708}
709EXPORT_SYMBOL(dev_get_by_name_rcu);
710
711
712
713
714
715
716
717
718
719
720
721
722
723struct net_device *dev_get_by_name(struct net *net, const char *name)
724{
725 struct net_device *dev;
726
727 rcu_read_lock();
728 dev = dev_get_by_name_rcu(net, name);
729 if (dev)
730 dev_hold(dev);
731 rcu_read_unlock();
732 return dev;
733}
734EXPORT_SYMBOL(dev_get_by_name);
735
736
737
738
739
740
741
742
743
744
745
746
747
748struct net_device *__dev_get_by_index(struct net *net, int ifindex)
749{
750 struct net_device *dev;
751 struct hlist_head *head = dev_index_hash(net, ifindex);
752
753 hlist_for_each_entry(dev, head, index_hlist)
754 if (dev->ifindex == ifindex)
755 return dev;
756
757 return NULL;
758}
759EXPORT_SYMBOL(__dev_get_by_index);
760
761
762
763
764
765
766
767
768
769
770
771
772struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
773{
774 struct net_device *dev;
775 struct hlist_head *head = dev_index_hash(net, ifindex);
776
777 hlist_for_each_entry_rcu(dev, head, index_hlist)
778 if (dev->ifindex == ifindex)
779 return dev;
780
781 return NULL;
782}
783EXPORT_SYMBOL(dev_get_by_index_rcu);
784
785
786
787
788
789
790
791
792
793
794
795
796
797struct net_device *dev_get_by_index(struct net *net, int ifindex)
798{
799 struct net_device *dev;
800
801 rcu_read_lock();
802 dev = dev_get_by_index_rcu(net, ifindex);
803 if (dev)
804 dev_hold(dev);
805 rcu_read_unlock();
806 return dev;
807}
808EXPORT_SYMBOL(dev_get_by_index);
809
810
811
812
813
814
815
816
817
818
819
820int netdev_get_name(struct net *net, char *name, int ifindex)
821{
822 struct net_device *dev;
823 unsigned int seq;
824
825retry:
826 seq = raw_seqcount_begin(&devnet_rename_seq);
827 rcu_read_lock();
828 dev = dev_get_by_index_rcu(net, ifindex);
829 if (!dev) {
830 rcu_read_unlock();
831 return -ENODEV;
832 }
833
834 strcpy(name, dev->name);
835 rcu_read_unlock();
836 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
837 cond_resched();
838 goto retry;
839 }
840
841 return 0;
842}
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
859 const char *ha)
860{
861 struct net_device *dev;
862
863 for_each_netdev_rcu(net, dev)
864 if (dev->type == type &&
865 !memcmp(dev->dev_addr, ha, dev->addr_len))
866 return dev;
867
868 return NULL;
869}
870EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
871
872struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
873{
874 struct net_device *dev;
875
876 ASSERT_RTNL();
877 for_each_netdev(net, dev)
878 if (dev->type == type)
879 return dev;
880
881 return NULL;
882}
883EXPORT_SYMBOL(__dev_getfirstbyhwtype);
884
885struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
886{
887 struct net_device *dev, *ret = NULL;
888
889 rcu_read_lock();
890 for_each_netdev_rcu(net, dev)
891 if (dev->type == type) {
892 dev_hold(dev);
893 ret = dev;
894 break;
895 }
896 rcu_read_unlock();
897 return ret;
898}
899EXPORT_SYMBOL(dev_getfirstbyhwtype);
900
901
902
903
904
905
906
907
908
909
910
911
912struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
913 unsigned short mask)
914{
915 struct net_device *dev, *ret;
916
917 ASSERT_RTNL();
918
919 ret = NULL;
920 for_each_netdev(net, dev) {
921 if (((dev->flags ^ if_flags) & mask) == 0) {
922 ret = dev;
923 break;
924 }
925 }
926 return ret;
927}
928EXPORT_SYMBOL(__dev_get_by_flags);
929
930
931
932
933
934
935
936
937
938bool dev_valid_name(const char *name)
939{
940 if (*name == '\0')
941 return false;
942 if (strlen(name) >= IFNAMSIZ)
943 return false;
944 if (!strcmp(name, ".") || !strcmp(name, ".."))
945 return false;
946
947 while (*name) {
948 if (*name == '/' || isspace(*name))
949 return false;
950 name++;
951 }
952 return true;
953}
954EXPORT_SYMBOL(dev_valid_name);
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971static int __dev_alloc_name(struct net *net, const char *name, char *buf)
972{
973 int i = 0;
974 const char *p;
975 const int max_netdevices = 8*PAGE_SIZE;
976 unsigned long *inuse;
977 struct net_device *d;
978
979 p = strnchr(name, IFNAMSIZ-1, '%');
980 if (p) {
981
982
983
984
985
986 if (p[1] != 'd' || strchr(p + 2, '%'))
987 return -EINVAL;
988
989
990 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
991 if (!inuse)
992 return -ENOMEM;
993
994 for_each_netdev(net, d) {
995 if (!sscanf(d->name, name, &i))
996 continue;
997 if (i < 0 || i >= max_netdevices)
998 continue;
999
1000
1001 snprintf(buf, IFNAMSIZ, name, i);
1002 if (!strncmp(buf, d->name, IFNAMSIZ))
1003 set_bit(i, inuse);
1004 }
1005
1006 i = find_first_zero_bit(inuse, max_netdevices);
1007 free_page((unsigned long) inuse);
1008 }
1009
1010 if (buf != name)
1011 snprintf(buf, IFNAMSIZ, name, i);
1012 if (!__dev_get_by_name(net, buf))
1013 return i;
1014
1015
1016
1017
1018
1019 return -ENFILE;
1020}
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036int dev_alloc_name(struct net_device *dev, const char *name)
1037{
1038 char buf[IFNAMSIZ];
1039 struct net *net;
1040 int ret;
1041
1042 BUG_ON(!dev_net(dev));
1043 net = dev_net(dev);
1044 ret = __dev_alloc_name(net, name, buf);
1045 if (ret >= 0)
1046 strlcpy(dev->name, buf, IFNAMSIZ);
1047 return ret;
1048}
1049EXPORT_SYMBOL(dev_alloc_name);
1050
1051static int dev_alloc_name_ns(struct net *net,
1052 struct net_device *dev,
1053 const char *name)
1054{
1055 char buf[IFNAMSIZ];
1056 int ret;
1057
1058 ret = __dev_alloc_name(net, name, buf);
1059 if (ret >= 0)
1060 strlcpy(dev->name, buf, IFNAMSIZ);
1061 return ret;
1062}
1063
1064static int dev_get_valid_name(struct net *net,
1065 struct net_device *dev,
1066 const char *name)
1067{
1068 BUG_ON(!net);
1069
1070 if (!dev_valid_name(name))
1071 return -EINVAL;
1072
1073 if (strchr(name, '%'))
1074 return dev_alloc_name_ns(net, dev, name);
1075 else if (__dev_get_by_name(net, name))
1076 return -EEXIST;
1077 else if (dev->name != name)
1078 strlcpy(dev->name, name, IFNAMSIZ);
1079
1080 return 0;
1081}
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091int dev_change_name(struct net_device *dev, const char *newname)
1092{
1093 unsigned char old_assign_type;
1094 char oldname[IFNAMSIZ];
1095 int err = 0;
1096 int ret;
1097 struct net *net;
1098
1099 ASSERT_RTNL();
1100 BUG_ON(!dev_net(dev));
1101
1102 net = dev_net(dev);
1103 if (dev->flags & IFF_UP)
1104 return -EBUSY;
1105
1106 write_seqcount_begin(&devnet_rename_seq);
1107
1108 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1109 write_seqcount_end(&devnet_rename_seq);
1110 return 0;
1111 }
1112
1113 memcpy(oldname, dev->name, IFNAMSIZ);
1114
1115 err = dev_get_valid_name(net, dev, newname);
1116 if (err < 0) {
1117 write_seqcount_end(&devnet_rename_seq);
1118 return err;
1119 }
1120
1121 if (oldname[0] && !strchr(oldname, '%'))
1122 netdev_info(dev, "renamed from %s\n", oldname);
1123
1124 old_assign_type = dev->name_assign_type;
1125 dev->name_assign_type = NET_NAME_RENAMED;
1126
1127rollback:
1128 ret = device_rename(&dev->dev, dev->name);
1129 if (ret) {
1130 memcpy(dev->name, oldname, IFNAMSIZ);
1131 dev->name_assign_type = old_assign_type;
1132 write_seqcount_end(&devnet_rename_seq);
1133 return ret;
1134 }
1135
1136 write_seqcount_end(&devnet_rename_seq);
1137
1138 netdev_adjacent_rename_links(dev, oldname);
1139
1140 write_lock_bh(&dev_base_lock);
1141 hlist_del_rcu(&dev->name_hlist);
1142 write_unlock_bh(&dev_base_lock);
1143
1144 synchronize_rcu();
1145
1146 write_lock_bh(&dev_base_lock);
1147 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1148 write_unlock_bh(&dev_base_lock);
1149
1150 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1151 ret = notifier_to_errno(ret);
1152
1153 if (ret) {
1154
1155 if (err >= 0) {
1156 err = ret;
1157 write_seqcount_begin(&devnet_rename_seq);
1158 memcpy(dev->name, oldname, IFNAMSIZ);
1159 memcpy(oldname, newname, IFNAMSIZ);
1160 dev->name_assign_type = old_assign_type;
1161 old_assign_type = NET_NAME_RENAMED;
1162 goto rollback;
1163 } else {
1164 pr_err("%s: name change rollback failed: %d\n",
1165 dev->name, ret);
1166 }
1167 }
1168
1169 return err;
1170}
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1181{
1182 char *new_ifalias;
1183
1184 ASSERT_RTNL();
1185
1186 if (len >= IFALIASZ)
1187 return -EINVAL;
1188
1189 if (!len) {
1190 kfree(dev->ifalias);
1191 dev->ifalias = NULL;
1192 return 0;
1193 }
1194
1195 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1196 if (!new_ifalias)
1197 return -ENOMEM;
1198 dev->ifalias = new_ifalias;
1199
1200 strlcpy(dev->ifalias, alias, len+1);
1201 return len;
1202}
1203
1204
1205
1206
1207
1208
1209
1210
1211void netdev_features_change(struct net_device *dev)
1212{
1213 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1214}
1215EXPORT_SYMBOL(netdev_features_change);
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225void netdev_state_change(struct net_device *dev)
1226{
1227 if (dev->flags & IFF_UP) {
1228 struct netdev_notifier_change_info change_info;
1229
1230 change_info.flags_changed = 0;
1231 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1232 &change_info.info);
1233 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1234 }
1235}
1236EXPORT_SYMBOL(netdev_state_change);
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248void netdev_notify_peers(struct net_device *dev)
1249{
1250 rtnl_lock();
1251 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1252 rtnl_unlock();
1253}
1254EXPORT_SYMBOL(netdev_notify_peers);
1255
1256static int __dev_open(struct net_device *dev)
1257{
1258 const struct net_device_ops *ops = dev->netdev_ops;
1259 int ret;
1260
1261 ASSERT_RTNL();
1262
1263 if (!netif_device_present(dev))
1264 return -ENODEV;
1265
1266
1267
1268
1269
1270 netpoll_poll_disable(dev);
1271
1272 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1273 ret = notifier_to_errno(ret);
1274 if (ret)
1275 return ret;
1276
1277 set_bit(__LINK_STATE_START, &dev->state);
1278
1279 if (ops->ndo_validate_addr)
1280 ret = ops->ndo_validate_addr(dev);
1281
1282 if (!ret && ops->ndo_open)
1283 ret = ops->ndo_open(dev);
1284
1285 netpoll_poll_enable(dev);
1286
1287 if (ret)
1288 clear_bit(__LINK_STATE_START, &dev->state);
1289 else {
1290 dev->flags |= IFF_UP;
1291 dev_set_rx_mode(dev);
1292 dev_activate(dev);
1293 add_device_randomness(dev->dev_addr, dev->addr_len);
1294 }
1295
1296 return ret;
1297}
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311int dev_open(struct net_device *dev)
1312{
1313 int ret;
1314
1315 if (dev->flags & IFF_UP)
1316 return 0;
1317
1318 ret = __dev_open(dev);
1319 if (ret < 0)
1320 return ret;
1321
1322 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1323 call_netdevice_notifiers(NETDEV_UP, dev);
1324
1325 return ret;
1326}
1327EXPORT_SYMBOL(dev_open);
1328
1329static int __dev_close_many(struct list_head *head)
1330{
1331 struct net_device *dev;
1332
1333 ASSERT_RTNL();
1334 might_sleep();
1335
1336 list_for_each_entry(dev, head, close_list) {
1337
1338 netpoll_poll_disable(dev);
1339
1340 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1341
1342 clear_bit(__LINK_STATE_START, &dev->state);
1343
1344
1345
1346
1347
1348
1349
1350 smp_mb__after_atomic();
1351 }
1352
1353 dev_deactivate_many(head);
1354
1355 list_for_each_entry(dev, head, close_list) {
1356 const struct net_device_ops *ops = dev->netdev_ops;
1357
1358
1359
1360
1361
1362
1363
1364
1365 if (ops->ndo_stop)
1366 ops->ndo_stop(dev);
1367
1368 dev->flags &= ~IFF_UP;
1369 netpoll_poll_enable(dev);
1370 }
1371
1372 return 0;
1373}
1374
1375static int __dev_close(struct net_device *dev)
1376{
1377 int retval;
1378 LIST_HEAD(single);
1379
1380 list_add(&dev->close_list, &single);
1381 retval = __dev_close_many(&single);
1382 list_del(&single);
1383
1384 return retval;
1385}
1386
1387static int dev_close_many(struct list_head *head)
1388{
1389 struct net_device *dev, *tmp;
1390
1391
1392 list_for_each_entry_safe(dev, tmp, head, close_list)
1393 if (!(dev->flags & IFF_UP))
1394 list_del_init(&dev->close_list);
1395
1396 __dev_close_many(head);
1397
1398 list_for_each_entry_safe(dev, tmp, head, close_list) {
1399 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1400 call_netdevice_notifiers(NETDEV_DOWN, dev);
1401 list_del_init(&dev->close_list);
1402 }
1403
1404 return 0;
1405}
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416int dev_close(struct net_device *dev)
1417{
1418 if (dev->flags & IFF_UP) {
1419 LIST_HEAD(single);
1420
1421 list_add(&dev->close_list, &single);
1422 dev_close_many(&single);
1423 list_del(&single);
1424 }
1425 return 0;
1426}
1427EXPORT_SYMBOL(dev_close);
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438void dev_disable_lro(struct net_device *dev)
1439{
1440 struct net_device *lower_dev;
1441 struct list_head *iter;
1442
1443 dev->wanted_features &= ~NETIF_F_LRO;
1444 netdev_update_features(dev);
1445
1446 if (unlikely(dev->features & NETIF_F_LRO))
1447 netdev_WARN(dev, "failed to disable LRO!\n");
1448
1449 netdev_for_each_lower_dev(dev, lower_dev, iter)
1450 dev_disable_lro(lower_dev);
1451}
1452EXPORT_SYMBOL(dev_disable_lro);
1453
1454static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1455 struct net_device *dev)
1456{
1457 struct netdev_notifier_info info;
1458
1459 netdev_notifier_info_init(&info, dev);
1460 return nb->notifier_call(nb, val, &info);
1461}
1462
1463static int dev_boot_phase = 1;
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479int register_netdevice_notifier(struct notifier_block *nb)
1480{
1481 struct net_device *dev;
1482 struct net_device *last;
1483 struct net *net;
1484 int err;
1485
1486 rtnl_lock();
1487 err = raw_notifier_chain_register(&netdev_chain, nb);
1488 if (err)
1489 goto unlock;
1490 if (dev_boot_phase)
1491 goto unlock;
1492 for_each_net(net) {
1493 for_each_netdev(net, dev) {
1494 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1495 err = notifier_to_errno(err);
1496 if (err)
1497 goto rollback;
1498
1499 if (!(dev->flags & IFF_UP))
1500 continue;
1501
1502 call_netdevice_notifier(nb, NETDEV_UP, dev);
1503 }
1504 }
1505
1506unlock:
1507 rtnl_unlock();
1508 return err;
1509
1510rollback:
1511 last = dev;
1512 for_each_net(net) {
1513 for_each_netdev(net, dev) {
1514 if (dev == last)
1515 goto outroll;
1516
1517 if (dev->flags & IFF_UP) {
1518 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1519 dev);
1520 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1521 }
1522 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1523 }
1524 }
1525
1526outroll:
1527 raw_notifier_chain_unregister(&netdev_chain, nb);
1528 goto unlock;
1529}
1530EXPORT_SYMBOL(register_netdevice_notifier);
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546int unregister_netdevice_notifier(struct notifier_block *nb)
1547{
1548 struct net_device *dev;
1549 struct net *net;
1550 int err;
1551
1552 rtnl_lock();
1553 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1554 if (err)
1555 goto unlock;
1556
1557 for_each_net(net) {
1558 for_each_netdev(net, dev) {
1559 if (dev->flags & IFF_UP) {
1560 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1561 dev);
1562 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1563 }
1564 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1565 }
1566 }
1567unlock:
1568 rtnl_unlock();
1569 return err;
1570}
1571EXPORT_SYMBOL(unregister_netdevice_notifier);
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583static int call_netdevice_notifiers_info(unsigned long val,
1584 struct net_device *dev,
1585 struct netdev_notifier_info *info)
1586{
1587 ASSERT_RTNL();
1588 netdev_notifier_info_init(info, dev);
1589 return raw_notifier_call_chain(&netdev_chain, val, info);
1590}
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1602{
1603 struct netdev_notifier_info info;
1604
1605 return call_netdevice_notifiers_info(val, dev, &info);
1606}
1607EXPORT_SYMBOL(call_netdevice_notifiers);
1608
1609static struct static_key netstamp_needed __read_mostly;
1610#ifdef HAVE_JUMP_LABEL
1611
1612
1613
1614
1615static atomic_t netstamp_needed_deferred;
1616#endif
1617
1618void net_enable_timestamp(void)
1619{
1620#ifdef HAVE_JUMP_LABEL
1621 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1622
1623 if (deferred) {
1624 while (--deferred)
1625 static_key_slow_dec(&netstamp_needed);
1626 return;
1627 }
1628#endif
1629 static_key_slow_inc(&netstamp_needed);
1630}
1631EXPORT_SYMBOL(net_enable_timestamp);
1632
1633void net_disable_timestamp(void)
1634{
1635#ifdef HAVE_JUMP_LABEL
1636 if (in_interrupt()) {
1637 atomic_inc(&netstamp_needed_deferred);
1638 return;
1639 }
1640#endif
1641 static_key_slow_dec(&netstamp_needed);
1642}
1643EXPORT_SYMBOL(net_disable_timestamp);
1644
1645static inline void net_timestamp_set(struct sk_buff *skb)
1646{
1647 skb->tstamp.tv64 = 0;
1648 if (static_key_false(&netstamp_needed))
1649 __net_timestamp(skb);
1650}
1651
1652#define net_timestamp_check(COND, SKB) \
1653 if (static_key_false(&netstamp_needed)) { \
1654 if ((COND) && !(SKB)->tstamp.tv64) \
1655 __net_timestamp(SKB); \
1656 } \
1657
1658bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb)
1659{
1660 unsigned int len;
1661
1662 if (!(dev->flags & IFF_UP))
1663 return false;
1664
1665 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1666 if (skb->len <= len)
1667 return true;
1668
1669
1670
1671
1672 if (skb_is_gso(skb))
1673 return true;
1674
1675 return false;
1676}
1677EXPORT_SYMBOL_GPL(is_skb_forwardable);
1678
1679int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1680{
1681 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1682 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1683 atomic_long_inc(&dev->rx_dropped);
1684 kfree_skb(skb);
1685 return NET_RX_DROP;
1686 }
1687 }
1688
1689 if (unlikely(!is_skb_forwardable(dev, skb))) {
1690 atomic_long_inc(&dev->rx_dropped);
1691 kfree_skb(skb);
1692 return NET_RX_DROP;
1693 }
1694
1695 skb_scrub_packet(skb, true);
1696 skb->protocol = eth_type_trans(skb, dev);
1697 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1698
1699 return 0;
1700}
1701EXPORT_SYMBOL_GPL(__dev_forward_skb);
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1722{
1723 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1724}
1725EXPORT_SYMBOL_GPL(dev_forward_skb);
1726
1727static inline int deliver_skb(struct sk_buff *skb,
1728 struct packet_type *pt_prev,
1729 struct net_device *orig_dev)
1730{
1731 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1732 return -ENOMEM;
1733 atomic_inc(&skb->users);
1734 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1735}
1736
1737static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1738{
1739 if (!ptype->af_packet_priv || !skb->sk)
1740 return false;
1741
1742 if (ptype->id_match)
1743 return ptype->id_match(ptype, skb->sk);
1744 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1745 return true;
1746
1747 return false;
1748}
1749
1750
1751
1752
1753
1754
1755static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1756{
1757 struct packet_type *ptype;
1758 struct sk_buff *skb2 = NULL;
1759 struct packet_type *pt_prev = NULL;
1760
1761 rcu_read_lock();
1762 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1763
1764
1765
1766 if ((ptype->dev == dev || !ptype->dev) &&
1767 (!skb_loop_sk(ptype, skb))) {
1768 if (pt_prev) {
1769 deliver_skb(skb2, pt_prev, skb->dev);
1770 pt_prev = ptype;
1771 continue;
1772 }
1773
1774 skb2 = skb_clone(skb, GFP_ATOMIC);
1775 if (!skb2)
1776 break;
1777
1778 net_timestamp_set(skb2);
1779
1780
1781
1782
1783
1784 skb_reset_mac_header(skb2);
1785
1786 if (skb_network_header(skb2) < skb2->data ||
1787 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1788 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1789 ntohs(skb2->protocol),
1790 dev->name);
1791 skb_reset_network_header(skb2);
1792 }
1793
1794 skb2->transport_header = skb2->network_header;
1795 skb2->pkt_type = PACKET_OUTGOING;
1796 pt_prev = ptype;
1797 }
1798 }
1799 if (pt_prev)
1800 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1801 rcu_read_unlock();
1802}
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1818{
1819 int i;
1820 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1821
1822
1823 if (tc->offset + tc->count > txq) {
1824 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1825 dev->num_tc = 0;
1826 return;
1827 }
1828
1829
1830 for (i = 1; i < TC_BITMASK + 1; i++) {
1831 int q = netdev_get_prio_tc_map(dev, i);
1832
1833 tc = &dev->tc_to_txq[q];
1834 if (tc->offset + tc->count > txq) {
1835 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1836 i, q);
1837 netdev_set_prio_tc_map(dev, i, 0);
1838 }
1839 }
1840}
1841
1842#ifdef CONFIG_XPS
1843static DEFINE_MUTEX(xps_map_mutex);
1844#define xmap_dereference(P) \
1845 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1846
1847static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1848 int cpu, u16 index)
1849{
1850 struct xps_map *map = NULL;
1851 int pos;
1852
1853 if (dev_maps)
1854 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1855
1856 for (pos = 0; map && pos < map->len; pos++) {
1857 if (map->queues[pos] == index) {
1858 if (map->len > 1) {
1859 map->queues[pos] = map->queues[--map->len];
1860 } else {
1861 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1862 kfree_rcu(map, rcu);
1863 map = NULL;
1864 }
1865 break;
1866 }
1867 }
1868
1869 return map;
1870}
1871
1872static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1873{
1874 struct xps_dev_maps *dev_maps;
1875 int cpu, i;
1876 bool active = false;
1877
1878 mutex_lock(&xps_map_mutex);
1879 dev_maps = xmap_dereference(dev->xps_maps);
1880
1881 if (!dev_maps)
1882 goto out_no_maps;
1883
1884 for_each_possible_cpu(cpu) {
1885 for (i = index; i < dev->num_tx_queues; i++) {
1886 if (!remove_xps_queue(dev_maps, cpu, i))
1887 break;
1888 }
1889 if (i == dev->num_tx_queues)
1890 active = true;
1891 }
1892
1893 if (!active) {
1894 RCU_INIT_POINTER(dev->xps_maps, NULL);
1895 kfree_rcu(dev_maps, rcu);
1896 }
1897
1898 for (i = index; i < dev->num_tx_queues; i++)
1899 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1900 NUMA_NO_NODE);
1901
1902out_no_maps:
1903 mutex_unlock(&xps_map_mutex);
1904}
1905
1906static struct xps_map *expand_xps_map(struct xps_map *map,
1907 int cpu, u16 index)
1908{
1909 struct xps_map *new_map;
1910 int alloc_len = XPS_MIN_MAP_ALLOC;
1911 int i, pos;
1912
1913 for (pos = 0; map && pos < map->len; pos++) {
1914 if (map->queues[pos] != index)
1915 continue;
1916 return map;
1917 }
1918
1919
1920 if (map) {
1921 if (pos < map->alloc_len)
1922 return map;
1923
1924 alloc_len = map->alloc_len * 2;
1925 }
1926
1927
1928 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1929 cpu_to_node(cpu));
1930 if (!new_map)
1931 return NULL;
1932
1933 for (i = 0; i < pos; i++)
1934 new_map->queues[i] = map->queues[i];
1935 new_map->alloc_len = alloc_len;
1936 new_map->len = pos;
1937
1938 return new_map;
1939}
1940
1941int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
1942 u16 index)
1943{
1944 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1945 struct xps_map *map, *new_map;
1946 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1947 int cpu, numa_node_id = -2;
1948 bool active = false;
1949
1950 mutex_lock(&xps_map_mutex);
1951
1952 dev_maps = xmap_dereference(dev->xps_maps);
1953
1954
1955 for_each_online_cpu(cpu) {
1956 if (!cpumask_test_cpu(cpu, mask))
1957 continue;
1958
1959 if (!new_dev_maps)
1960 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1961 if (!new_dev_maps) {
1962 mutex_unlock(&xps_map_mutex);
1963 return -ENOMEM;
1964 }
1965
1966 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1967 NULL;
1968
1969 map = expand_xps_map(map, cpu, index);
1970 if (!map)
1971 goto error;
1972
1973 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1974 }
1975
1976 if (!new_dev_maps)
1977 goto out_no_new_maps;
1978
1979 for_each_possible_cpu(cpu) {
1980 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1981
1982 int pos = 0;
1983
1984 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1985 while ((pos < map->len) && (map->queues[pos] != index))
1986 pos++;
1987
1988 if (pos == map->len)
1989 map->queues[map->len++] = index;
1990#ifdef CONFIG_NUMA
1991 if (numa_node_id == -2)
1992 numa_node_id = cpu_to_node(cpu);
1993 else if (numa_node_id != cpu_to_node(cpu))
1994 numa_node_id = -1;
1995#endif
1996 } else if (dev_maps) {
1997
1998 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1999 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2000 }
2001
2002 }
2003
2004 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2005
2006
2007 if (dev_maps) {
2008 for_each_possible_cpu(cpu) {
2009 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2010 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2011 if (map && map != new_map)
2012 kfree_rcu(map, rcu);
2013 }
2014
2015 kfree_rcu(dev_maps, rcu);
2016 }
2017
2018 dev_maps = new_dev_maps;
2019 active = true;
2020
2021out_no_new_maps:
2022
2023 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2024 (numa_node_id >= 0) ? numa_node_id :
2025 NUMA_NO_NODE);
2026
2027 if (!dev_maps)
2028 goto out_no_maps;
2029
2030
2031 for_each_possible_cpu(cpu) {
2032 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2033 continue;
2034
2035 if (remove_xps_queue(dev_maps, cpu, index))
2036 active = true;
2037 }
2038
2039
2040 if (!active) {
2041 RCU_INIT_POINTER(dev->xps_maps, NULL);
2042 kfree_rcu(dev_maps, rcu);
2043 }
2044
2045out_no_maps:
2046 mutex_unlock(&xps_map_mutex);
2047
2048 return 0;
2049error:
2050
2051 for_each_possible_cpu(cpu) {
2052 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2053 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2054 NULL;
2055 if (new_map && new_map != map)
2056 kfree(new_map);
2057 }
2058
2059 mutex_unlock(&xps_map_mutex);
2060
2061 kfree(new_dev_maps);
2062 return -ENOMEM;
2063}
2064EXPORT_SYMBOL(netif_set_xps_queue);
2065
2066#endif
2067
2068
2069
2070
2071int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2072{
2073 int rc;
2074
2075 if (txq < 1 || txq > dev->num_tx_queues)
2076 return -EINVAL;
2077
2078 if (dev->reg_state == NETREG_REGISTERED ||
2079 dev->reg_state == NETREG_UNREGISTERING) {
2080 ASSERT_RTNL();
2081
2082 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2083 txq);
2084 if (rc)
2085 return rc;
2086
2087 if (dev->num_tc)
2088 netif_setup_tc(dev, txq);
2089
2090 if (txq < dev->real_num_tx_queues) {
2091 qdisc_reset_all_tx_gt(dev, txq);
2092#ifdef CONFIG_XPS
2093 netif_reset_xps_queues_gt(dev, txq);
2094#endif
2095 }
2096 }
2097
2098 dev->real_num_tx_queues = txq;
2099 return 0;
2100}
2101EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2102
2103#ifdef CONFIG_SYSFS
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2115{
2116 int rc;
2117
2118 if (rxq < 1 || rxq > dev->num_rx_queues)
2119 return -EINVAL;
2120
2121 if (dev->reg_state == NETREG_REGISTERED) {
2122 ASSERT_RTNL();
2123
2124 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2125 rxq);
2126 if (rc)
2127 return rc;
2128 }
2129
2130 dev->real_num_rx_queues = rxq;
2131 return 0;
2132}
2133EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2134#endif
2135
2136
2137
2138
2139
2140
2141
2142int netif_get_num_default_rss_queues(void)
2143{
2144 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2145}
2146EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2147
2148static inline void __netif_reschedule(struct Qdisc *q)
2149{
2150 struct softnet_data *sd;
2151 unsigned long flags;
2152
2153 local_irq_save(flags);
2154 sd = this_cpu_ptr(&softnet_data);
2155 q->next_sched = NULL;
2156 *sd->output_queue_tailp = q;
2157 sd->output_queue_tailp = &q->next_sched;
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2159 local_irq_restore(flags);
2160}
2161
2162void __netif_schedule(struct Qdisc *q)
2163{
2164 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2165 __netif_reschedule(q);
2166}
2167EXPORT_SYMBOL(__netif_schedule);
2168
2169struct dev_kfree_skb_cb {
2170 enum skb_free_reason reason;
2171};
2172
2173static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2174{
2175 return (struct dev_kfree_skb_cb *)skb->cb;
2176}
2177
2178void netif_schedule_queue(struct netdev_queue *txq)
2179{
2180 rcu_read_lock();
2181 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2182 struct Qdisc *q = rcu_dereference(txq->qdisc);
2183
2184 __netif_schedule(q);
2185 }
2186 rcu_read_unlock();
2187}
2188EXPORT_SYMBOL(netif_schedule_queue);
2189
2190
2191
2192
2193
2194
2195
2196
2197void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2198{
2199 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2200
2201 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
2202 struct Qdisc *q;
2203
2204 rcu_read_lock();
2205 q = rcu_dereference(txq->qdisc);
2206 __netif_schedule(q);
2207 rcu_read_unlock();
2208 }
2209}
2210EXPORT_SYMBOL(netif_wake_subqueue);
2211
2212void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2213{
2214 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2215 struct Qdisc *q;
2216
2217 rcu_read_lock();
2218 q = rcu_dereference(dev_queue->qdisc);
2219 __netif_schedule(q);
2220 rcu_read_unlock();
2221 }
2222}
2223EXPORT_SYMBOL(netif_tx_wake_queue);
2224
2225void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2226{
2227 unsigned long flags;
2228
2229 if (likely(atomic_read(&skb->users) == 1)) {
2230 smp_rmb();
2231 atomic_set(&skb->users, 0);
2232 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2233 return;
2234 }
2235 get_kfree_skb_cb(skb)->reason = reason;
2236 local_irq_save(flags);
2237 skb->next = __this_cpu_read(softnet_data.completion_queue);
2238 __this_cpu_write(softnet_data.completion_queue, skb);
2239 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2240 local_irq_restore(flags);
2241}
2242EXPORT_SYMBOL(__dev_kfree_skb_irq);
2243
2244void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2245{
2246 if (in_irq() || irqs_disabled())
2247 __dev_kfree_skb_irq(skb, reason);
2248 else
2249 dev_kfree_skb(skb);
2250}
2251EXPORT_SYMBOL(__dev_kfree_skb_any);
2252
2253
2254
2255
2256
2257
2258
2259
2260void netif_device_detach(struct net_device *dev)
2261{
2262 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2263 netif_running(dev)) {
2264 netif_tx_stop_all_queues(dev);
2265 }
2266}
2267EXPORT_SYMBOL(netif_device_detach);
2268
2269
2270
2271
2272
2273
2274
2275void netif_device_attach(struct net_device *dev)
2276{
2277 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2278 netif_running(dev)) {
2279 netif_tx_wake_all_queues(dev);
2280 __netdev_watchdog_up(dev);
2281 }
2282}
2283EXPORT_SYMBOL(netif_device_attach);
2284
2285static void skb_warn_bad_offload(const struct sk_buff *skb)
2286{
2287 static const netdev_features_t null_features = 0;
2288 struct net_device *dev = skb->dev;
2289 const char *driver = "";
2290
2291 if (!net_ratelimit())
2292 return;
2293
2294 if (dev && dev->dev.parent)
2295 driver = dev_driver_string(dev->dev.parent);
2296
2297 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2298 "gso_type=%d ip_summed=%d\n",
2299 driver, dev ? &dev->features : &null_features,
2300 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2301 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2302 skb_shinfo(skb)->gso_type, skb->ip_summed);
2303}
2304
2305
2306
2307
2308
2309int skb_checksum_help(struct sk_buff *skb)
2310{
2311 __wsum csum;
2312 int ret = 0, offset;
2313
2314 if (skb->ip_summed == CHECKSUM_COMPLETE)
2315 goto out_set_summed;
2316
2317 if (unlikely(skb_shinfo(skb)->gso_size)) {
2318 skb_warn_bad_offload(skb);
2319 return -EINVAL;
2320 }
2321
2322
2323
2324
2325 if (skb_has_shared_frag(skb)) {
2326 ret = __skb_linearize(skb);
2327 if (ret)
2328 goto out;
2329 }
2330
2331 offset = skb_checksum_start_offset(skb);
2332 BUG_ON(offset >= skb_headlen(skb));
2333 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2334
2335 offset += skb->csum_offset;
2336 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2337
2338 if (skb_cloned(skb) &&
2339 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2340 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2341 if (ret)
2342 goto out;
2343 }
2344
2345 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2346out_set_summed:
2347 skb->ip_summed = CHECKSUM_NONE;
2348out:
2349 return ret;
2350}
2351EXPORT_SYMBOL(skb_checksum_help);
2352
2353__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2354{
2355 __be16 type = skb->protocol;
2356
2357
2358 if (type == htons(ETH_P_TEB)) {
2359 struct ethhdr *eth;
2360
2361 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2362 return 0;
2363
2364 eth = (struct ethhdr *)skb_mac_header(skb);
2365 type = eth->h_proto;
2366 }
2367
2368 return __vlan_get_protocol(skb, type, depth);
2369}
2370
2371
2372
2373
2374
2375
2376struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2377 netdev_features_t features)
2378{
2379 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2380 struct packet_offload *ptype;
2381 int vlan_depth = skb->mac_len;
2382 __be16 type = skb_network_protocol(skb, &vlan_depth);
2383
2384 if (unlikely(!type))
2385 return ERR_PTR(-EINVAL);
2386
2387 __skb_pull(skb, vlan_depth);
2388
2389 rcu_read_lock();
2390 list_for_each_entry_rcu(ptype, &offload_base, list) {
2391 if (ptype->type == type && ptype->callbacks.gso_segment) {
2392 segs = ptype->callbacks.gso_segment(skb, features);
2393 break;
2394 }
2395 }
2396 rcu_read_unlock();
2397
2398 __skb_push(skb, skb->data - skb_mac_header(skb));
2399
2400 return segs;
2401}
2402EXPORT_SYMBOL(skb_mac_gso_segment);
2403
2404
2405
2406
2407static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2408{
2409 if (tx_path)
2410 return skb->ip_summed != CHECKSUM_PARTIAL;
2411 else
2412 return skb->ip_summed == CHECKSUM_NONE;
2413}
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2427 netdev_features_t features, bool tx_path)
2428{
2429 if (unlikely(skb_needs_check(skb, tx_path))) {
2430 int err;
2431
2432 skb_warn_bad_offload(skb);
2433
2434 err = skb_cow_head(skb, 0);
2435 if (err < 0)
2436 return ERR_PTR(err);
2437 }
2438
2439 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2440 SKB_GSO_CB(skb)->encap_level = 0;
2441
2442 skb_reset_mac_header(skb);
2443 skb_reset_mac_len(skb);
2444
2445 return skb_mac_gso_segment(skb, features);
2446}
2447EXPORT_SYMBOL(__skb_gso_segment);
2448
2449
2450#ifdef CONFIG_BUG
2451void netdev_rx_csum_fault(struct net_device *dev)
2452{
2453 if (net_ratelimit()) {
2454 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2455 dump_stack();
2456 }
2457}
2458EXPORT_SYMBOL(netdev_rx_csum_fault);
2459#endif
2460
2461
2462
2463
2464
2465
2466static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2467{
2468#ifdef CONFIG_HIGHMEM
2469 int i;
2470 if (!(dev->features & NETIF_F_HIGHDMA)) {
2471 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2472 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2473 if (PageHighMem(skb_frag_page(frag)))
2474 return 1;
2475 }
2476 }
2477
2478 if (PCI_DMA_BUS_IS_PHYS) {
2479 struct device *pdev = dev->dev.parent;
2480
2481 if (!pdev)
2482 return 0;
2483 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2484 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2485 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2486 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2487 return 1;
2488 }
2489 }
2490#endif
2491 return 0;
2492}
2493
2494
2495
2496
2497#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
2498static netdev_features_t net_mpls_features(struct sk_buff *skb,
2499 netdev_features_t features,
2500 __be16 type)
2501{
2502 if (eth_p_mpls(type))
2503 features &= skb->dev->mpls_features;
2504
2505 return features;
2506}
2507#else
2508static netdev_features_t net_mpls_features(struct sk_buff *skb,
2509 netdev_features_t features,
2510 __be16 type)
2511{
2512 return features;
2513}
2514#endif
2515
2516static netdev_features_t harmonize_features(struct sk_buff *skb,
2517 netdev_features_t features)
2518{
2519 int tmp;
2520 __be16 type;
2521
2522 type = skb_network_protocol(skb, &tmp);
2523 features = net_mpls_features(skb, features, type);
2524
2525 if (skb->ip_summed != CHECKSUM_NONE &&
2526 !can_checksum_protocol(features, type)) {
2527 features &= ~NETIF_F_ALL_CSUM;
2528 } else if (illegal_highdma(skb->dev, skb)) {
2529 features &= ~NETIF_F_SG;
2530 }
2531
2532 return features;
2533}
2534
2535netdev_features_t netif_skb_features(struct sk_buff *skb)
2536{
2537 struct net_device *dev = skb->dev;
2538 netdev_features_t features = dev->features;
2539 u16 gso_segs = skb_shinfo(skb)->gso_segs;
2540 __be16 protocol = skb->protocol;
2541
2542 if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
2543 features &= ~NETIF_F_GSO_MASK;
2544
2545
2546
2547
2548
2549 if (skb->encapsulation)
2550 features &= dev->hw_enc_features;
2551
2552 if (!vlan_tx_tag_present(skb)) {
2553 if (unlikely(protocol == htons(ETH_P_8021Q) ||
2554 protocol == htons(ETH_P_8021AD))) {
2555 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2556 protocol = veh->h_vlan_encapsulated_proto;
2557 } else {
2558 goto finalize;
2559 }
2560 }
2561
2562 features = netdev_intersect_features(features,
2563 dev->vlan_features |
2564 NETIF_F_HW_VLAN_CTAG_TX |
2565 NETIF_F_HW_VLAN_STAG_TX);
2566
2567 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
2568 features = netdev_intersect_features(features,
2569 NETIF_F_SG |
2570 NETIF_F_HIGHDMA |
2571 NETIF_F_FRAGLIST |
2572 NETIF_F_GEN_CSUM |
2573 NETIF_F_HW_VLAN_CTAG_TX |
2574 NETIF_F_HW_VLAN_STAG_TX);
2575
2576finalize:
2577 if (dev->netdev_ops->ndo_features_check)
2578 features &= dev->netdev_ops->ndo_features_check(skb, dev,
2579 features);
2580
2581 return harmonize_features(skb, features);
2582}
2583EXPORT_SYMBOL(netif_skb_features);
2584
2585static int xmit_one(struct sk_buff *skb, struct net_device *dev,
2586 struct netdev_queue *txq, bool more)
2587{
2588 unsigned int len;
2589 int rc;
2590
2591 if (!list_empty(&ptype_all))
2592 dev_queue_xmit_nit(skb, dev);
2593
2594 len = skb->len;
2595 trace_net_dev_start_xmit(skb, dev);
2596 rc = netdev_start_xmit(skb, dev, txq, more);
2597 trace_net_dev_xmit(skb, rc, dev, len);
2598
2599 return rc;
2600}
2601
2602struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
2603 struct netdev_queue *txq, int *ret)
2604{
2605 struct sk_buff *skb = first;
2606 int rc = NETDEV_TX_OK;
2607
2608 while (skb) {
2609 struct sk_buff *next = skb->next;
2610
2611 skb->next = NULL;
2612 rc = xmit_one(skb, dev, txq, next != NULL);
2613 if (unlikely(!dev_xmit_complete(rc))) {
2614 skb->next = next;
2615 goto out;
2616 }
2617
2618 skb = next;
2619 if (netif_xmit_stopped(txq) && skb) {
2620 rc = NETDEV_TX_BUSY;
2621 break;
2622 }
2623 }
2624
2625out:
2626 *ret = rc;
2627 return skb;
2628}
2629
2630static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2631 netdev_features_t features)
2632{
2633 if (vlan_tx_tag_present(skb) &&
2634 !vlan_hw_offload_capable(features, skb->vlan_proto))
2635 skb = __vlan_hwaccel_push_inside(skb);
2636 return skb;
2637}
2638
2639static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
2640{
2641 netdev_features_t features;
2642
2643 if (skb->next)
2644 return skb;
2645
2646 features = netif_skb_features(skb);
2647 skb = validate_xmit_vlan(skb, features);
2648 if (unlikely(!skb))
2649 goto out_null;
2650
2651 if (netif_needs_gso(dev, skb, features)) {
2652 struct sk_buff *segs;
2653
2654 segs = skb_gso_segment(skb, features);
2655 if (IS_ERR(segs)) {
2656 goto out_kfree_skb;
2657 } else if (segs) {
2658 consume_skb(skb);
2659 skb = segs;
2660 }
2661 } else {
2662 if (skb_needs_linearize(skb, features) &&
2663 __skb_linearize(skb))
2664 goto out_kfree_skb;
2665
2666
2667
2668
2669
2670 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2671 if (skb->encapsulation)
2672 skb_set_inner_transport_header(skb,
2673 skb_checksum_start_offset(skb));
2674 else
2675 skb_set_transport_header(skb,
2676 skb_checksum_start_offset(skb));
2677 if (!(features & NETIF_F_ALL_CSUM) &&
2678 skb_checksum_help(skb))
2679 goto out_kfree_skb;
2680 }
2681 }
2682
2683 return skb;
2684
2685out_kfree_skb:
2686 kfree_skb(skb);
2687out_null:
2688 return NULL;
2689}
2690
2691struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
2692{
2693 struct sk_buff *next, *head = NULL, *tail;
2694
2695 for (; skb != NULL; skb = next) {
2696 next = skb->next;
2697 skb->next = NULL;
2698
2699
2700 skb->prev = skb;
2701
2702 skb = validate_xmit_skb(skb, dev);
2703 if (!skb)
2704 continue;
2705
2706 if (!head)
2707 head = skb;
2708 else
2709 tail->next = skb;
2710
2711
2712
2713 tail = skb->prev;
2714 }
2715 return head;
2716}
2717
2718static void qdisc_pkt_len_init(struct sk_buff *skb)
2719{
2720 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2721
2722 qdisc_skb_cb(skb)->pkt_len = skb->len;
2723
2724
2725
2726
2727 if (shinfo->gso_size) {
2728 unsigned int hdr_len;
2729 u16 gso_segs = shinfo->gso_segs;
2730
2731
2732 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2733
2734
2735 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2736 hdr_len += tcp_hdrlen(skb);
2737 else
2738 hdr_len += sizeof(struct udphdr);
2739
2740 if (shinfo->gso_type & SKB_GSO_DODGY)
2741 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2742 shinfo->gso_size);
2743
2744 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2745 }
2746}
2747
2748static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2749 struct net_device *dev,
2750 struct netdev_queue *txq)
2751{
2752 spinlock_t *root_lock = qdisc_lock(q);
2753 bool contended;
2754 int rc;
2755
2756 qdisc_pkt_len_init(skb);
2757 qdisc_calculate_pkt_len(skb, q);
2758
2759
2760
2761
2762
2763
2764 contended = qdisc_is_running(q);
2765 if (unlikely(contended))
2766 spin_lock(&q->busylock);
2767
2768 spin_lock(root_lock);
2769 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2770 kfree_skb(skb);
2771 rc = NET_XMIT_DROP;
2772 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2773 qdisc_run_begin(q)) {
2774
2775
2776
2777
2778
2779
2780 qdisc_bstats_update(q, skb);
2781
2782 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
2783 if (unlikely(contended)) {
2784 spin_unlock(&q->busylock);
2785 contended = false;
2786 }
2787 __qdisc_run(q);
2788 } else
2789 qdisc_run_end(q);
2790
2791 rc = NET_XMIT_SUCCESS;
2792 } else {
2793 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2794 if (qdisc_run_begin(q)) {
2795 if (unlikely(contended)) {
2796 spin_unlock(&q->busylock);
2797 contended = false;
2798 }
2799 __qdisc_run(q);
2800 }
2801 }
2802 spin_unlock(root_lock);
2803 if (unlikely(contended))
2804 spin_unlock(&q->busylock);
2805 return rc;
2806}
2807
2808#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
2809static void skb_update_prio(struct sk_buff *skb)
2810{
2811 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2812
2813 if (!skb->priority && skb->sk && map) {
2814 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2815
2816 if (prioidx < map->priomap_len)
2817 skb->priority = map->priomap[prioidx];
2818 }
2819}
2820#else
2821#define skb_update_prio(skb)
2822#endif
2823
2824static DEFINE_PER_CPU(int, xmit_recursion);
2825#define RECURSION_LIMIT 10
2826
2827
2828
2829
2830
2831int dev_loopback_xmit(struct sk_buff *skb)
2832{
2833 skb_reset_mac_header(skb);
2834 __skb_pull(skb, skb_network_offset(skb));
2835 skb->pkt_type = PACKET_LOOPBACK;
2836 skb->ip_summed = CHECKSUM_UNNECESSARY;
2837 WARN_ON(!skb_dst(skb));
2838 skb_dst_force(skb);
2839 netif_rx_ni(skb);
2840 return 0;
2841}
2842EXPORT_SYMBOL(dev_loopback_xmit);
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
2871{
2872 struct net_device *dev = skb->dev;
2873 struct netdev_queue *txq;
2874 struct Qdisc *q;
2875 int rc = -ENOMEM;
2876
2877 skb_reset_mac_header(skb);
2878
2879 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
2880 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
2881
2882
2883
2884
2885 rcu_read_lock_bh();
2886
2887 skb_update_prio(skb);
2888
2889
2890
2891
2892 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2893 skb_dst_drop(skb);
2894 else
2895 skb_dst_force(skb);
2896
2897 txq = netdev_pick_tx(dev, skb, accel_priv);
2898 q = rcu_dereference_bh(txq->qdisc);
2899
2900#ifdef CONFIG_NET_CLS_ACT
2901 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2902#endif
2903 trace_net_dev_queue(skb);
2904 if (q->enqueue) {
2905 rc = __dev_xmit_skb(skb, q, dev, txq);
2906 goto out;
2907 }
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921 if (dev->flags & IFF_UP) {
2922 int cpu = smp_processor_id();
2923
2924 if (txq->xmit_lock_owner != cpu) {
2925
2926 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2927 goto recursion_alert;
2928
2929 skb = validate_xmit_skb(skb, dev);
2930 if (!skb)
2931 goto drop;
2932
2933 HARD_TX_LOCK(dev, txq, cpu);
2934
2935 if (!netif_xmit_stopped(txq)) {
2936 __this_cpu_inc(xmit_recursion);
2937 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
2938 __this_cpu_dec(xmit_recursion);
2939 if (dev_xmit_complete(rc)) {
2940 HARD_TX_UNLOCK(dev, txq);
2941 goto out;
2942 }
2943 }
2944 HARD_TX_UNLOCK(dev, txq);
2945 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2946 dev->name);
2947 } else {
2948
2949
2950
2951recursion_alert:
2952 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2953 dev->name);
2954 }
2955 }
2956
2957 rc = -ENETDOWN;
2958drop:
2959 rcu_read_unlock_bh();
2960
2961 atomic_long_inc(&dev->tx_dropped);
2962 kfree_skb_list(skb);
2963 return rc;
2964out:
2965 rcu_read_unlock_bh();
2966 return rc;
2967}
2968
2969int dev_queue_xmit(struct sk_buff *skb)
2970{
2971 return __dev_queue_xmit(skb, NULL);
2972}
2973EXPORT_SYMBOL(dev_queue_xmit);
2974
2975int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
2976{
2977 return __dev_queue_xmit(skb, accel_priv);
2978}
2979EXPORT_SYMBOL(dev_queue_xmit_accel);
2980
2981
2982
2983
2984
2985
2986int netdev_max_backlog __read_mostly = 1000;
2987EXPORT_SYMBOL(netdev_max_backlog);
2988
2989int netdev_tstamp_prequeue __read_mostly = 1;
2990int netdev_budget __read_mostly = 300;
2991int weight_p __read_mostly = 64;
2992
2993
2994static inline void ____napi_schedule(struct softnet_data *sd,
2995 struct napi_struct *napi)
2996{
2997 list_add_tail(&napi->poll_list, &sd->poll_list);
2998 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2999}
3000
3001#ifdef CONFIG_RPS
3002
3003
3004struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3005EXPORT_SYMBOL(rps_sock_flow_table);
3006
3007struct static_key rps_needed __read_mostly;
3008
3009static struct rps_dev_flow *
3010set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3011 struct rps_dev_flow *rflow, u16 next_cpu)
3012{
3013 if (next_cpu != RPS_NO_CPU) {
3014#ifdef CONFIG_RFS_ACCEL
3015 struct netdev_rx_queue *rxqueue;
3016 struct rps_dev_flow_table *flow_table;
3017 struct rps_dev_flow *old_rflow;
3018 u32 flow_id;
3019 u16 rxq_index;
3020 int rc;
3021
3022
3023 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3024 !(dev->features & NETIF_F_NTUPLE))
3025 goto out;
3026 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3027 if (rxq_index == skb_get_rx_queue(skb))
3028 goto out;
3029
3030 rxqueue = dev->_rx + rxq_index;
3031 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3032 if (!flow_table)
3033 goto out;
3034 flow_id = skb_get_hash(skb) & flow_table->mask;
3035 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3036 rxq_index, flow_id);
3037 if (rc < 0)
3038 goto out;
3039 old_rflow = rflow;
3040 rflow = &flow_table->flows[flow_id];
3041 rflow->filter = rc;
3042 if (old_rflow->filter == rflow->filter)
3043 old_rflow->filter = RPS_NO_FILTER;
3044 out:
3045#endif
3046 rflow->last_qtail =
3047 per_cpu(softnet_data, next_cpu).input_queue_head;
3048 }
3049
3050 rflow->cpu = next_cpu;
3051 return rflow;
3052}
3053
3054
3055
3056
3057
3058
3059static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3060 struct rps_dev_flow **rflowp)
3061{
3062 struct netdev_rx_queue *rxqueue;
3063 struct rps_map *map;
3064 struct rps_dev_flow_table *flow_table;
3065 struct rps_sock_flow_table *sock_flow_table;
3066 int cpu = -1;
3067 u16 tcpu;
3068 u32 hash;
3069
3070 if (skb_rx_queue_recorded(skb)) {
3071 u16 index = skb_get_rx_queue(skb);
3072 if (unlikely(index >= dev->real_num_rx_queues)) {
3073 WARN_ONCE(dev->real_num_rx_queues > 1,
3074 "%s received packet on queue %u, but number "
3075 "of RX queues is %u\n",
3076 dev->name, index, dev->real_num_rx_queues);
3077 goto done;
3078 }
3079 rxqueue = dev->_rx + index;
3080 } else
3081 rxqueue = dev->_rx;
3082
3083 map = rcu_dereference(rxqueue->rps_map);
3084 if (map) {
3085 if (map->len == 1 &&
3086 !rcu_access_pointer(rxqueue->rps_flow_table)) {
3087 tcpu = map->cpus[0];
3088 if (cpu_online(tcpu))
3089 cpu = tcpu;
3090 goto done;
3091 }
3092 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
3093 goto done;
3094 }
3095
3096 skb_reset_network_header(skb);
3097 hash = skb_get_hash(skb);
3098 if (!hash)
3099 goto done;
3100
3101 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3102 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3103 if (flow_table && sock_flow_table) {
3104 u16 next_cpu;
3105 struct rps_dev_flow *rflow;
3106
3107 rflow = &flow_table->flows[hash & flow_table->mask];
3108 tcpu = rflow->cpu;
3109
3110 next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123 if (unlikely(tcpu != next_cpu) &&
3124 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3125 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3126 rflow->last_qtail)) >= 0)) {
3127 tcpu = next_cpu;
3128 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3129 }
3130
3131 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3132 *rflowp = rflow;
3133 cpu = tcpu;
3134 goto done;
3135 }
3136 }
3137
3138 if (map) {
3139 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3140 if (cpu_online(tcpu)) {
3141 cpu = tcpu;
3142 goto done;
3143 }
3144 }
3145
3146done:
3147 return cpu;
3148}
3149
3150#ifdef CONFIG_RFS_ACCEL
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3164 u32 flow_id, u16 filter_id)
3165{
3166 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3167 struct rps_dev_flow_table *flow_table;
3168 struct rps_dev_flow *rflow;
3169 bool expire = true;
3170 int cpu;
3171
3172 rcu_read_lock();
3173 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3174 if (flow_table && flow_id <= flow_table->mask) {
3175 rflow = &flow_table->flows[flow_id];
3176 cpu = ACCESS_ONCE(rflow->cpu);
3177 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3178 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3179 rflow->last_qtail) <
3180 (int)(10 * flow_table->mask)))
3181 expire = false;
3182 }
3183 rcu_read_unlock();
3184 return expire;
3185}
3186EXPORT_SYMBOL(rps_may_expire_flow);
3187
3188#endif
3189
3190
3191static void rps_trigger_softirq(void *data)
3192{
3193 struct softnet_data *sd = data;
3194
3195 ____napi_schedule(sd, &sd->backlog);
3196 sd->received_rps++;
3197}
3198
3199#endif
3200
3201
3202
3203
3204
3205
3206static int rps_ipi_queued(struct softnet_data *sd)
3207{
3208#ifdef CONFIG_RPS
3209 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
3210
3211 if (sd != mysd) {
3212 sd->rps_ipi_next = mysd->rps_ipi_list;
3213 mysd->rps_ipi_list = sd;
3214
3215 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3216 return 1;
3217 }
3218#endif
3219 return 0;
3220}
3221
3222#ifdef CONFIG_NET_FLOW_LIMIT
3223int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3224#endif
3225
3226static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3227{
3228#ifdef CONFIG_NET_FLOW_LIMIT
3229 struct sd_flow_limit *fl;
3230 struct softnet_data *sd;
3231 unsigned int old_flow, new_flow;
3232
3233 if (qlen < (netdev_max_backlog >> 1))
3234 return false;
3235
3236 sd = this_cpu_ptr(&softnet_data);
3237
3238 rcu_read_lock();
3239 fl = rcu_dereference(sd->flow_limit);
3240 if (fl) {
3241 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3242 old_flow = fl->history[fl->history_head];
3243 fl->history[fl->history_head] = new_flow;
3244
3245 fl->history_head++;
3246 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3247
3248 if (likely(fl->buckets[old_flow]))
3249 fl->buckets[old_flow]--;
3250
3251 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3252 fl->count++;
3253 rcu_read_unlock();
3254 return true;
3255 }
3256 }
3257 rcu_read_unlock();
3258#endif
3259 return false;
3260}
3261
3262
3263
3264
3265
3266static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3267 unsigned int *qtail)
3268{
3269 struct softnet_data *sd;
3270 unsigned long flags;
3271 unsigned int qlen;
3272
3273 sd = &per_cpu(softnet_data, cpu);
3274
3275 local_irq_save(flags);
3276
3277 rps_lock(sd);
3278 qlen = skb_queue_len(&sd->input_pkt_queue);
3279 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3280 if (qlen) {
3281enqueue:
3282 __skb_queue_tail(&sd->input_pkt_queue, skb);
3283 input_queue_tail_incr_save(sd, qtail);
3284 rps_unlock(sd);
3285 local_irq_restore(flags);
3286 return NET_RX_SUCCESS;
3287 }
3288
3289
3290
3291
3292 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3293 if (!rps_ipi_queued(sd))
3294 ____napi_schedule(sd, &sd->backlog);
3295 }
3296 goto enqueue;
3297 }
3298
3299 sd->dropped++;
3300 rps_unlock(sd);
3301
3302 local_irq_restore(flags);
3303
3304 atomic_long_inc(&skb->dev->rx_dropped);
3305 kfree_skb(skb);
3306 return NET_RX_DROP;
3307}
3308
3309static int netif_rx_internal(struct sk_buff *skb)
3310{
3311 int ret;
3312
3313 net_timestamp_check(netdev_tstamp_prequeue, skb);
3314
3315 trace_netif_rx(skb);
3316#ifdef CONFIG_RPS
3317 if (static_key_false(&rps_needed)) {
3318 struct rps_dev_flow voidflow, *rflow = &voidflow;
3319 int cpu;
3320
3321 preempt_disable();
3322 rcu_read_lock();
3323
3324 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3325 if (cpu < 0)
3326 cpu = smp_processor_id();
3327
3328 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3329
3330 rcu_read_unlock();
3331 preempt_enable();
3332 } else
3333#endif
3334 {
3335 unsigned int qtail;
3336 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3337 put_cpu();
3338 }
3339 return ret;
3340}
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357int netif_rx(struct sk_buff *skb)
3358{
3359 trace_netif_rx_entry(skb);
3360
3361 return netif_rx_internal(skb);
3362}
3363EXPORT_SYMBOL(netif_rx);
3364
3365int netif_rx_ni(struct sk_buff *skb)
3366{
3367 int err;
3368
3369 trace_netif_rx_ni_entry(skb);
3370
3371 preempt_disable();
3372 err = netif_rx_internal(skb);
3373 if (local_softirq_pending())
3374 do_softirq();
3375 preempt_enable();
3376
3377 return err;
3378}
3379EXPORT_SYMBOL(netif_rx_ni);
3380
3381static void net_tx_action(struct softirq_action *h)
3382{
3383 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
3384
3385 if (sd->completion_queue) {
3386 struct sk_buff *clist;
3387
3388 local_irq_disable();
3389 clist = sd->completion_queue;
3390 sd->completion_queue = NULL;
3391 local_irq_enable();
3392
3393 while (clist) {
3394 struct sk_buff *skb = clist;
3395 clist = clist->next;
3396
3397 WARN_ON(atomic_read(&skb->users));
3398 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3399 trace_consume_skb(skb);
3400 else
3401 trace_kfree_skb(skb, net_tx_action);
3402 __kfree_skb(skb);
3403 }
3404 }
3405
3406 if (sd->output_queue) {
3407 struct Qdisc *head;
3408
3409 local_irq_disable();
3410 head = sd->output_queue;
3411 sd->output_queue = NULL;
3412 sd->output_queue_tailp = &sd->output_queue;
3413 local_irq_enable();
3414
3415 while (head) {
3416 struct Qdisc *q = head;
3417 spinlock_t *root_lock;
3418
3419 head = head->next_sched;
3420
3421 root_lock = qdisc_lock(q);
3422 if (spin_trylock(root_lock)) {
3423 smp_mb__before_atomic();
3424 clear_bit(__QDISC_STATE_SCHED,
3425 &q->state);
3426 qdisc_run(q);
3427 spin_unlock(root_lock);
3428 } else {
3429 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3430 &q->state)) {
3431 __netif_reschedule(q);
3432 } else {
3433 smp_mb__before_atomic();
3434 clear_bit(__QDISC_STATE_SCHED,
3435 &q->state);
3436 }
3437 }
3438 }
3439 }
3440}
3441
3442#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3443 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3444
3445int (*br_fdb_test_addr_hook)(struct net_device *dev,
3446 unsigned char *addr) __read_mostly;
3447EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3448#endif
3449
3450#ifdef CONFIG_NET_CLS_ACT
3451
3452
3453
3454
3455
3456
3457
3458
3459static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3460{
3461 struct net_device *dev = skb->dev;
3462 u32 ttl = G_TC_RTTL(skb->tc_verd);
3463 int result = TC_ACT_OK;
3464 struct Qdisc *q;
3465
3466 if (unlikely(MAX_RED_LOOP < ttl++)) {
3467 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3468 skb->skb_iif, dev->ifindex);
3469 return TC_ACT_SHOT;
3470 }
3471
3472 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3473 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3474
3475 q = rcu_dereference(rxq->qdisc);
3476 if (q != &noop_qdisc) {
3477 spin_lock(qdisc_lock(q));
3478 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3479 result = qdisc_enqueue_root(skb, q);
3480 spin_unlock(qdisc_lock(q));
3481 }
3482
3483 return result;
3484}
3485
3486static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3487 struct packet_type **pt_prev,
3488 int *ret, struct net_device *orig_dev)
3489{
3490 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3491
3492 if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
3493 goto out;
3494
3495 if (*pt_prev) {
3496 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3497 *pt_prev = NULL;
3498 }
3499
3500 switch (ing_filter(skb, rxq)) {
3501 case TC_ACT_SHOT:
3502 case TC_ACT_STOLEN:
3503 kfree_skb(skb);
3504 return NULL;
3505 }
3506
3507out:
3508 skb->tc_verd = 0;
3509 return skb;
3510}
3511#endif
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527int netdev_rx_handler_register(struct net_device *dev,
3528 rx_handler_func_t *rx_handler,
3529 void *rx_handler_data)
3530{
3531 ASSERT_RTNL();
3532
3533 if (dev->rx_handler)
3534 return -EBUSY;
3535
3536
3537 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3538 rcu_assign_pointer(dev->rx_handler, rx_handler);
3539
3540 return 0;
3541}
3542EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552void netdev_rx_handler_unregister(struct net_device *dev)
3553{
3554
3555 ASSERT_RTNL();
3556 RCU_INIT_POINTER(dev->rx_handler, NULL);
3557
3558
3559
3560
3561 synchronize_net();
3562 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3563}
3564EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3565
3566
3567
3568
3569
3570static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3571{
3572 switch (skb->protocol) {
3573 case htons(ETH_P_ARP):
3574 case htons(ETH_P_IP):
3575 case htons(ETH_P_IPV6):
3576 case htons(ETH_P_8021Q):
3577 case htons(ETH_P_8021AD):
3578 return true;
3579 default:
3580 return false;
3581 }
3582}
3583
3584static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3585{
3586 struct packet_type *ptype, *pt_prev;
3587 rx_handler_func_t *rx_handler;
3588 struct net_device *orig_dev;
3589 struct net_device *null_or_dev;
3590 bool deliver_exact = false;
3591 int ret = NET_RX_DROP;
3592 __be16 type;
3593
3594 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3595
3596 trace_netif_receive_skb(skb);
3597
3598 orig_dev = skb->dev;
3599
3600 skb_reset_network_header(skb);
3601 if (!skb_transport_header_was_set(skb))
3602 skb_reset_transport_header(skb);
3603 skb_reset_mac_len(skb);
3604
3605 pt_prev = NULL;
3606
3607 rcu_read_lock();
3608
3609another_round:
3610 skb->skb_iif = skb->dev->ifindex;
3611
3612 __this_cpu_inc(softnet_data.processed);
3613
3614 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3615 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3616 skb = skb_vlan_untag(skb);
3617 if (unlikely(!skb))
3618 goto unlock;
3619 }
3620
3621#ifdef CONFIG_NET_CLS_ACT
3622 if (skb->tc_verd & TC_NCLS) {
3623 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3624 goto ncls;
3625 }
3626#endif
3627
3628 if (pfmemalloc)
3629 goto skip_taps;
3630
3631 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3632 if (!ptype->dev || ptype->dev == skb->dev) {
3633 if (pt_prev)
3634 ret = deliver_skb(skb, pt_prev, orig_dev);
3635 pt_prev = ptype;
3636 }
3637 }
3638
3639skip_taps:
3640#ifdef CONFIG_NET_CLS_ACT
3641 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3642 if (!skb)
3643 goto unlock;
3644ncls:
3645#endif
3646
3647 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3648 goto drop;
3649
3650 if (vlan_tx_tag_present(skb)) {
3651 if (pt_prev) {
3652 ret = deliver_skb(skb, pt_prev, orig_dev);
3653 pt_prev = NULL;
3654 }
3655 if (vlan_do_receive(&skb))
3656 goto another_round;
3657 else if (unlikely(!skb))
3658 goto unlock;
3659 }
3660
3661 rx_handler = rcu_dereference(skb->dev->rx_handler);
3662 if (rx_handler) {
3663 if (pt_prev) {
3664 ret = deliver_skb(skb, pt_prev, orig_dev);
3665 pt_prev = NULL;
3666 }
3667 switch (rx_handler(&skb)) {
3668 case RX_HANDLER_CONSUMED:
3669 ret = NET_RX_SUCCESS;
3670 goto unlock;
3671 case RX_HANDLER_ANOTHER:
3672 goto another_round;
3673 case RX_HANDLER_EXACT:
3674 deliver_exact = true;
3675 case RX_HANDLER_PASS:
3676 break;
3677 default:
3678 BUG();
3679 }
3680 }
3681
3682 if (unlikely(vlan_tx_tag_present(skb))) {
3683 if (vlan_tx_tag_get_id(skb))
3684 skb->pkt_type = PACKET_OTHERHOST;
3685
3686
3687
3688
3689 skb->vlan_tci = 0;
3690 }
3691
3692
3693 null_or_dev = deliver_exact ? skb->dev : NULL;
3694
3695 type = skb->protocol;
3696 list_for_each_entry_rcu(ptype,
3697 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3698 if (ptype->type == type &&
3699 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3700 ptype->dev == orig_dev)) {
3701 if (pt_prev)
3702 ret = deliver_skb(skb, pt_prev, orig_dev);
3703 pt_prev = ptype;
3704 }
3705 }
3706
3707 if (pt_prev) {
3708 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3709 goto drop;
3710 else
3711 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3712 } else {
3713drop:
3714 atomic_long_inc(&skb->dev->rx_dropped);
3715 kfree_skb(skb);
3716
3717
3718
3719 ret = NET_RX_DROP;
3720 }
3721
3722unlock:
3723 rcu_read_unlock();
3724 return ret;
3725}
3726
3727static int __netif_receive_skb(struct sk_buff *skb)
3728{
3729 int ret;
3730
3731 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3732 unsigned long pflags = current->flags;
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743 current->flags |= PF_MEMALLOC;
3744 ret = __netif_receive_skb_core(skb, true);
3745 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3746 } else
3747 ret = __netif_receive_skb_core(skb, false);
3748
3749 return ret;
3750}
3751
3752static int netif_receive_skb_internal(struct sk_buff *skb)
3753{
3754 net_timestamp_check(netdev_tstamp_prequeue, skb);
3755
3756 if (skb_defer_rx_timestamp(skb))
3757 return NET_RX_SUCCESS;
3758
3759#ifdef CONFIG_RPS
3760 if (static_key_false(&rps_needed)) {
3761 struct rps_dev_flow voidflow, *rflow = &voidflow;
3762 int cpu, ret;
3763
3764 rcu_read_lock();
3765
3766 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3767
3768 if (cpu >= 0) {
3769 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3770 rcu_read_unlock();
3771 return ret;
3772 }
3773 rcu_read_unlock();
3774 }
3775#endif
3776 return __netif_receive_skb(skb);
3777}
3778
3779
3780
3781
3782
3783
3784
3785
3786
3787
3788
3789
3790
3791
3792
3793
3794int netif_receive_skb(struct sk_buff *skb)
3795{
3796 trace_netif_receive_skb_entry(skb);
3797
3798 return netif_receive_skb_internal(skb);
3799}
3800EXPORT_SYMBOL(netif_receive_skb);
3801
3802
3803
3804
3805static void flush_backlog(void *arg)
3806{
3807 struct net_device *dev = arg;
3808 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
3809 struct sk_buff *skb, *tmp;
3810
3811 rps_lock(sd);
3812 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3813 if (skb->dev == dev) {
3814 __skb_unlink(skb, &sd->input_pkt_queue);
3815 kfree_skb(skb);
3816 input_queue_head_incr(sd);
3817 }
3818 }
3819 rps_unlock(sd);
3820
3821 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3822 if (skb->dev == dev) {
3823 __skb_unlink(skb, &sd->process_queue);
3824 kfree_skb(skb);
3825 input_queue_head_incr(sd);
3826 }
3827 }
3828}
3829
3830static int napi_gro_complete(struct sk_buff *skb)
3831{
3832 struct packet_offload *ptype;
3833 __be16 type = skb->protocol;
3834 struct list_head *head = &offload_base;
3835 int err = -ENOENT;
3836
3837 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3838
3839 if (NAPI_GRO_CB(skb)->count == 1) {
3840 skb_shinfo(skb)->gso_size = 0;
3841 goto out;
3842 }
3843
3844 rcu_read_lock();
3845 list_for_each_entry_rcu(ptype, head, list) {
3846 if (ptype->type != type || !ptype->callbacks.gro_complete)
3847 continue;
3848
3849 err = ptype->callbacks.gro_complete(skb, 0);
3850 break;
3851 }
3852 rcu_read_unlock();
3853
3854 if (err) {
3855 WARN_ON(&ptype->list == head);
3856 kfree_skb(skb);
3857 return NET_RX_SUCCESS;
3858 }
3859
3860out:
3861 return netif_receive_skb_internal(skb);
3862}
3863
3864
3865
3866
3867
3868void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3869{
3870 struct sk_buff *skb, *prev = NULL;
3871
3872
3873 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3874 skb->prev = prev;
3875 prev = skb;
3876 }
3877
3878 for (skb = prev; skb; skb = prev) {
3879 skb->next = NULL;
3880
3881 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3882 return;
3883
3884 prev = skb->prev;
3885 napi_gro_complete(skb);
3886 napi->gro_count--;
3887 }
3888
3889 napi->gro_list = NULL;
3890}
3891EXPORT_SYMBOL(napi_gro_flush);
3892
3893static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3894{
3895 struct sk_buff *p;
3896 unsigned int maclen = skb->dev->hard_header_len;
3897 u32 hash = skb_get_hash_raw(skb);
3898
3899 for (p = napi->gro_list; p; p = p->next) {
3900 unsigned long diffs;
3901
3902 NAPI_GRO_CB(p)->flush = 0;
3903
3904 if (hash != skb_get_hash_raw(p)) {
3905 NAPI_GRO_CB(p)->same_flow = 0;
3906 continue;
3907 }
3908
3909 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3910 diffs |= p->vlan_tci ^ skb->vlan_tci;
3911 if (maclen == ETH_HLEN)
3912 diffs |= compare_ether_header(skb_mac_header(p),
3913 skb_mac_header(skb));
3914 else if (!diffs)
3915 diffs = memcmp(skb_mac_header(p),
3916 skb_mac_header(skb),
3917 maclen);
3918 NAPI_GRO_CB(p)->same_flow = !diffs;
3919 }
3920}
3921
3922static void skb_gro_reset_offset(struct sk_buff *skb)
3923{
3924 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3925 const skb_frag_t *frag0 = &pinfo->frags[0];
3926
3927 NAPI_GRO_CB(skb)->data_offset = 0;
3928 NAPI_GRO_CB(skb)->frag0 = NULL;
3929 NAPI_GRO_CB(skb)->frag0_len = 0;
3930
3931 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3932 pinfo->nr_frags &&
3933 !PageHighMem(skb_frag_page(frag0))) {
3934 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3935 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3936 }
3937}
3938
3939static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
3940{
3941 struct skb_shared_info *pinfo = skb_shinfo(skb);
3942
3943 BUG_ON(skb->end - skb->tail < grow);
3944
3945 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3946
3947 skb->data_len -= grow;
3948 skb->tail += grow;
3949
3950 pinfo->frags[0].page_offset += grow;
3951 skb_frag_size_sub(&pinfo->frags[0], grow);
3952
3953 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
3954 skb_frag_unref(skb, 0);
3955 memmove(pinfo->frags, pinfo->frags + 1,
3956 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
3957 }
3958}
3959
3960static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3961{
3962 struct sk_buff **pp = NULL;
3963 struct packet_offload *ptype;
3964 __be16 type = skb->protocol;
3965 struct list_head *head = &offload_base;
3966 int same_flow;
3967 enum gro_result ret;
3968 int grow;
3969
3970 if (!(skb->dev->features & NETIF_F_GRO))
3971 goto normal;
3972
3973 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
3974 goto normal;
3975
3976 gro_list_prepare(napi, skb);
3977
3978 rcu_read_lock();
3979 list_for_each_entry_rcu(ptype, head, list) {
3980 if (ptype->type != type || !ptype->callbacks.gro_receive)
3981 continue;
3982
3983 skb_set_network_header(skb, skb_gro_offset(skb));
3984 skb_reset_mac_len(skb);
3985 NAPI_GRO_CB(skb)->same_flow = 0;
3986 NAPI_GRO_CB(skb)->flush = 0;
3987 NAPI_GRO_CB(skb)->free = 0;
3988 NAPI_GRO_CB(skb)->udp_mark = 0;
3989
3990
3991 switch (skb->ip_summed) {
3992 case CHECKSUM_COMPLETE:
3993 NAPI_GRO_CB(skb)->csum = skb->csum;
3994 NAPI_GRO_CB(skb)->csum_valid = 1;
3995 NAPI_GRO_CB(skb)->csum_cnt = 0;
3996 break;
3997 case CHECKSUM_UNNECESSARY:
3998 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
3999 NAPI_GRO_CB(skb)->csum_valid = 0;
4000 break;
4001 default:
4002 NAPI_GRO_CB(skb)->csum_cnt = 0;
4003 NAPI_GRO_CB(skb)->csum_valid = 0;
4004 }
4005
4006 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
4007 break;
4008 }
4009 rcu_read_unlock();
4010
4011 if (&ptype->list == head)
4012 goto normal;
4013
4014 same_flow = NAPI_GRO_CB(skb)->same_flow;
4015 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
4016
4017 if (pp) {
4018 struct sk_buff *nskb = *pp;
4019
4020 *pp = nskb->next;
4021 nskb->next = NULL;
4022 napi_gro_complete(nskb);
4023 napi->gro_count--;
4024 }
4025
4026 if (same_flow)
4027 goto ok;
4028
4029 if (NAPI_GRO_CB(skb)->flush)
4030 goto normal;
4031
4032 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
4033 struct sk_buff *nskb = napi->gro_list;
4034
4035
4036 while (nskb->next) {
4037 pp = &nskb->next;
4038 nskb = *pp;
4039 }
4040 *pp = NULL;
4041 nskb->next = NULL;
4042 napi_gro_complete(nskb);
4043 } else {
4044 napi->gro_count++;
4045 }
4046 NAPI_GRO_CB(skb)->count = 1;
4047 NAPI_GRO_CB(skb)->age = jiffies;
4048 NAPI_GRO_CB(skb)->last = skb;
4049 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
4050 skb->next = napi->gro_list;
4051 napi->gro_list = skb;
4052 ret = GRO_HELD;
4053
4054pull:
4055 grow = skb_gro_offset(skb) - skb_headlen(skb);
4056 if (grow > 0)
4057 gro_pull_from_frag0(skb, grow);
4058ok:
4059 return ret;
4060
4061normal:
4062 ret = GRO_NORMAL;
4063 goto pull;
4064}
4065
4066struct packet_offload *gro_find_receive_by_type(__be16 type)
4067{
4068 struct list_head *offload_head = &offload_base;
4069 struct packet_offload *ptype;
4070
4071 list_for_each_entry_rcu(ptype, offload_head, list) {
4072 if (ptype->type != type || !ptype->callbacks.gro_receive)
4073 continue;
4074 return ptype;
4075 }
4076 return NULL;
4077}
4078EXPORT_SYMBOL(gro_find_receive_by_type);
4079
4080struct packet_offload *gro_find_complete_by_type(__be16 type)
4081{
4082 struct list_head *offload_head = &offload_base;
4083 struct packet_offload *ptype;
4084
4085 list_for_each_entry_rcu(ptype, offload_head, list) {
4086 if (ptype->type != type || !ptype->callbacks.gro_complete)
4087 continue;
4088 return ptype;
4089 }
4090 return NULL;
4091}
4092EXPORT_SYMBOL(gro_find_complete_by_type);
4093
4094static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4095{
4096 switch (ret) {
4097 case GRO_NORMAL:
4098 if (netif_receive_skb_internal(skb))
4099 ret = GRO_DROP;
4100 break;
4101
4102 case GRO_DROP:
4103 kfree_skb(skb);
4104 break;
4105
4106 case GRO_MERGED_FREE:
4107 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
4108 kmem_cache_free(skbuff_head_cache, skb);
4109 else
4110 __kfree_skb(skb);
4111 break;
4112
4113 case GRO_HELD:
4114 case GRO_MERGED:
4115 break;
4116 }
4117
4118 return ret;
4119}
4120
4121gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4122{
4123 trace_napi_gro_receive_entry(skb);
4124
4125 skb_gro_reset_offset(skb);
4126
4127 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4128}
4129EXPORT_SYMBOL(napi_gro_receive);
4130
4131static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
4132{
4133 if (unlikely(skb->pfmemalloc)) {
4134 consume_skb(skb);
4135 return;
4136 }
4137 __skb_pull(skb, skb_headlen(skb));
4138
4139 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
4140 skb->vlan_tci = 0;
4141 skb->dev = napi->dev;
4142 skb->skb_iif = 0;
4143 skb->encapsulation = 0;
4144 skb_shinfo(skb)->gso_type = 0;
4145 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4146
4147 napi->skb = skb;
4148}
4149
4150struct sk_buff *napi_get_frags(struct napi_struct *napi)
4151{
4152 struct sk_buff *skb = napi->skb;
4153
4154 if (!skb) {
4155 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4156 napi->skb = skb;
4157 }
4158 return skb;
4159}
4160EXPORT_SYMBOL(napi_get_frags);
4161
4162static gro_result_t napi_frags_finish(struct napi_struct *napi,
4163 struct sk_buff *skb,
4164 gro_result_t ret)
4165{
4166 switch (ret) {
4167 case GRO_NORMAL:
4168 case GRO_HELD:
4169 __skb_push(skb, ETH_HLEN);
4170 skb->protocol = eth_type_trans(skb, skb->dev);
4171 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4172 ret = GRO_DROP;
4173 break;
4174
4175 case GRO_DROP:
4176 case GRO_MERGED_FREE:
4177 napi_reuse_skb(napi, skb);
4178 break;
4179
4180 case GRO_MERGED:
4181 break;
4182 }
4183
4184 return ret;
4185}
4186
4187
4188
4189
4190
4191static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4192{
4193 struct sk_buff *skb = napi->skb;
4194 const struct ethhdr *eth;
4195 unsigned int hlen = sizeof(*eth);
4196
4197 napi->skb = NULL;
4198
4199 skb_reset_mac_header(skb);
4200 skb_gro_reset_offset(skb);
4201
4202 eth = skb_gro_header_fast(skb, 0);
4203 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4204 eth = skb_gro_header_slow(skb, hlen, 0);
4205 if (unlikely(!eth)) {
4206 napi_reuse_skb(napi, skb);
4207 return NULL;
4208 }
4209 } else {
4210 gro_pull_from_frag0(skb, hlen);
4211 NAPI_GRO_CB(skb)->frag0 += hlen;
4212 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4213 }
4214 __skb_pull(skb, hlen);
4215
4216
4217
4218
4219
4220
4221 skb->protocol = eth->h_proto;
4222
4223 return skb;
4224}
4225
4226gro_result_t napi_gro_frags(struct napi_struct *napi)
4227{
4228 struct sk_buff *skb = napi_frags_skb(napi);
4229
4230 if (!skb)
4231 return GRO_DROP;
4232
4233 trace_napi_gro_frags_entry(skb);
4234
4235 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4236}
4237EXPORT_SYMBOL(napi_gro_frags);
4238
4239
4240
4241
4242__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
4243{
4244 __wsum wsum;
4245 __sum16 sum;
4246
4247 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
4248
4249
4250 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
4251 if (likely(!sum)) {
4252 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
4253 !skb->csum_complete_sw)
4254 netdev_rx_csum_fault(skb->dev);
4255 }
4256
4257 NAPI_GRO_CB(skb)->csum = wsum;
4258 NAPI_GRO_CB(skb)->csum_valid = 1;
4259
4260 return sum;
4261}
4262EXPORT_SYMBOL(__skb_gro_checksum_complete);
4263
4264
4265
4266
4267
4268static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4269{
4270#ifdef CONFIG_RPS
4271 struct softnet_data *remsd = sd->rps_ipi_list;
4272
4273 if (remsd) {
4274 sd->rps_ipi_list = NULL;
4275
4276 local_irq_enable();
4277
4278
4279 while (remsd) {
4280 struct softnet_data *next = remsd->rps_ipi_next;
4281
4282 if (cpu_online(remsd->cpu))
4283 smp_call_function_single_async(remsd->cpu,
4284 &remsd->csd);
4285 remsd = next;
4286 }
4287 } else
4288#endif
4289 local_irq_enable();
4290}
4291
4292static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4293{
4294#ifdef CONFIG_RPS
4295 return sd->rps_ipi_list != NULL;
4296#else
4297 return false;
4298#endif
4299}
4300
4301static int process_backlog(struct napi_struct *napi, int quota)
4302{
4303 int work = 0;
4304 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4305
4306
4307
4308
4309 if (sd_has_rps_ipi_waiting(sd)) {
4310 local_irq_disable();
4311 net_rps_action_and_irq_enable(sd);
4312 }
4313
4314 napi->weight = weight_p;
4315 local_irq_disable();
4316 while (1) {
4317 struct sk_buff *skb;
4318
4319 while ((skb = __skb_dequeue(&sd->process_queue))) {
4320 local_irq_enable();
4321 __netif_receive_skb(skb);
4322 local_irq_disable();
4323 input_queue_head_incr(sd);
4324 if (++work >= quota) {
4325 local_irq_enable();
4326 return work;
4327 }
4328 }
4329
4330 rps_lock(sd);
4331 if (skb_queue_empty(&sd->input_pkt_queue)) {
4332
4333
4334
4335
4336
4337
4338
4339
4340 napi->state = 0;
4341 rps_unlock(sd);
4342
4343 break;
4344 }
4345
4346 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4347 &sd->process_queue);
4348 rps_unlock(sd);
4349 }
4350 local_irq_enable();
4351
4352 return work;
4353}
4354
4355
4356
4357
4358
4359
4360
4361
4362void __napi_schedule(struct napi_struct *n)
4363{
4364 unsigned long flags;
4365
4366 local_irq_save(flags);
4367 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4368 local_irq_restore(flags);
4369}
4370EXPORT_SYMBOL(__napi_schedule);
4371
4372
4373
4374
4375
4376
4377
4378void __napi_schedule_irqoff(struct napi_struct *n)
4379{
4380 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4381}
4382EXPORT_SYMBOL(__napi_schedule_irqoff);
4383
4384void __napi_complete(struct napi_struct *n)
4385{
4386 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4387
4388 list_del_init(&n->poll_list);
4389 smp_mb__before_atomic();
4390 clear_bit(NAPI_STATE_SCHED, &n->state);
4391}
4392EXPORT_SYMBOL(__napi_complete);
4393
4394void napi_complete_done(struct napi_struct *n, int work_done)
4395{
4396 unsigned long flags;
4397
4398
4399
4400
4401
4402 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4403 return;
4404
4405 if (n->gro_list) {
4406 unsigned long timeout = 0;
4407
4408 if (work_done)
4409 timeout = n->dev->gro_flush_timeout;
4410
4411 if (timeout)
4412 hrtimer_start(&n->timer, ns_to_ktime(timeout),
4413 HRTIMER_MODE_REL_PINNED);
4414 else
4415 napi_gro_flush(n, false);
4416 }
4417 if (likely(list_empty(&n->poll_list))) {
4418 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4419 } else {
4420
4421 local_irq_save(flags);
4422 __napi_complete(n);
4423 local_irq_restore(flags);
4424 }
4425}
4426EXPORT_SYMBOL(napi_complete_done);
4427
4428
4429struct napi_struct *napi_by_id(unsigned int napi_id)
4430{
4431 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4432 struct napi_struct *napi;
4433
4434 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4435 if (napi->napi_id == napi_id)
4436 return napi;
4437
4438 return NULL;
4439}
4440EXPORT_SYMBOL_GPL(napi_by_id);
4441
4442void napi_hash_add(struct napi_struct *napi)
4443{
4444 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4445
4446 spin_lock(&napi_hash_lock);
4447
4448
4449
4450
4451 napi->napi_id = 0;
4452 while (!napi->napi_id) {
4453 napi->napi_id = ++napi_gen_id;
4454 if (napi_by_id(napi->napi_id))
4455 napi->napi_id = 0;
4456 }
4457
4458 hlist_add_head_rcu(&napi->napi_hash_node,
4459 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4460
4461 spin_unlock(&napi_hash_lock);
4462 }
4463}
4464EXPORT_SYMBOL_GPL(napi_hash_add);
4465
4466
4467
4468
4469void napi_hash_del(struct napi_struct *napi)
4470{
4471 spin_lock(&napi_hash_lock);
4472
4473 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4474 hlist_del_rcu(&napi->napi_hash_node);
4475
4476 spin_unlock(&napi_hash_lock);
4477}
4478EXPORT_SYMBOL_GPL(napi_hash_del);
4479
4480static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
4481{
4482 struct napi_struct *napi;
4483
4484 napi = container_of(timer, struct napi_struct, timer);
4485 if (napi->gro_list)
4486 napi_schedule(napi);
4487
4488 return HRTIMER_NORESTART;
4489}
4490
4491void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4492 int (*poll)(struct napi_struct *, int), int weight)
4493{
4494 INIT_LIST_HEAD(&napi->poll_list);
4495 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
4496 napi->timer.function = napi_watchdog;
4497 napi->gro_count = 0;
4498 napi->gro_list = NULL;
4499 napi->skb = NULL;
4500 napi->poll = poll;
4501 if (weight > NAPI_POLL_WEIGHT)
4502 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4503 weight, dev->name);
4504 napi->weight = weight;
4505 list_add(&napi->dev_list, &dev->napi_list);
4506 napi->dev = dev;
4507#ifdef CONFIG_NETPOLL
4508 spin_lock_init(&napi->poll_lock);
4509 napi->poll_owner = -1;
4510#endif
4511 set_bit(NAPI_STATE_SCHED, &napi->state);
4512}
4513EXPORT_SYMBOL(netif_napi_add);
4514
4515void napi_disable(struct napi_struct *n)
4516{
4517 might_sleep();
4518 set_bit(NAPI_STATE_DISABLE, &n->state);
4519
4520 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
4521 msleep(1);
4522
4523 hrtimer_cancel(&n->timer);
4524
4525 clear_bit(NAPI_STATE_DISABLE, &n->state);
4526}
4527EXPORT_SYMBOL(napi_disable);
4528
4529void netif_napi_del(struct napi_struct *napi)
4530{
4531 list_del_init(&napi->dev_list);
4532 napi_free_frags(napi);
4533
4534 kfree_skb_list(napi->gro_list);
4535 napi->gro_list = NULL;
4536 napi->gro_count = 0;
4537}
4538EXPORT_SYMBOL(netif_napi_del);
4539
4540static int napi_poll(struct napi_struct *n, struct list_head *repoll)
4541{
4542 void *have;
4543 int work, weight;
4544
4545 list_del_init(&n->poll_list);
4546
4547 have = netpoll_poll_lock(n);
4548
4549 weight = n->weight;
4550
4551
4552
4553
4554
4555
4556
4557 work = 0;
4558 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4559 work = n->poll(n, weight);
4560 trace_napi_poll(n);
4561 }
4562
4563 WARN_ON_ONCE(work > weight);
4564
4565 if (likely(work < weight))
4566 goto out_unlock;
4567
4568
4569
4570
4571
4572
4573 if (unlikely(napi_disable_pending(n))) {
4574 napi_complete(n);
4575 goto out_unlock;
4576 }
4577
4578 if (n->gro_list) {
4579
4580
4581
4582 napi_gro_flush(n, HZ >= 1000);
4583 }
4584
4585
4586
4587
4588 if (unlikely(!list_empty(&n->poll_list))) {
4589 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
4590 n->dev ? n->dev->name : "backlog");
4591 goto out_unlock;
4592 }
4593
4594 list_add_tail(&n->poll_list, repoll);
4595
4596out_unlock:
4597 netpoll_poll_unlock(have);
4598
4599 return work;
4600}
4601
4602static void net_rx_action(struct softirq_action *h)
4603{
4604 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4605 unsigned long time_limit = jiffies + 2;
4606 int budget = netdev_budget;
4607 LIST_HEAD(list);
4608 LIST_HEAD(repoll);
4609
4610 local_irq_disable();
4611 list_splice_init(&sd->poll_list, &list);
4612 local_irq_enable();
4613
4614 for (;;) {
4615 struct napi_struct *n;
4616
4617 if (list_empty(&list)) {
4618 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
4619 return;
4620 break;
4621 }
4622
4623 n = list_first_entry(&list, struct napi_struct, poll_list);
4624 budget -= napi_poll(n, &repoll);
4625
4626
4627
4628
4629
4630 if (unlikely(budget <= 0 ||
4631 time_after_eq(jiffies, time_limit))) {
4632 sd->time_squeeze++;
4633 break;
4634 }
4635 }
4636
4637 local_irq_disable();
4638
4639 list_splice_tail_init(&sd->poll_list, &list);
4640 list_splice_tail(&repoll, &list);
4641 list_splice(&list, &sd->poll_list);
4642 if (!list_empty(&sd->poll_list))
4643 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4644
4645 net_rps_action_and_irq_enable(sd);
4646}
4647
4648struct netdev_adjacent {
4649 struct net_device *dev;
4650
4651
4652 bool master;
4653
4654
4655 u16 ref_nr;
4656
4657
4658 void *private;
4659
4660 struct list_head list;
4661 struct rcu_head rcu;
4662};
4663
4664static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4665 struct net_device *adj_dev,
4666 struct list_head *adj_list)
4667{
4668 struct netdev_adjacent *adj;
4669
4670 list_for_each_entry(adj, adj_list, list) {
4671 if (adj->dev == adj_dev)
4672 return adj;
4673 }
4674 return NULL;
4675}
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686bool netdev_has_upper_dev(struct net_device *dev,
4687 struct net_device *upper_dev)
4688{
4689 ASSERT_RTNL();
4690
4691 return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
4692}
4693EXPORT_SYMBOL(netdev_has_upper_dev);
4694
4695
4696
4697
4698
4699
4700
4701
4702static bool netdev_has_any_upper_dev(struct net_device *dev)
4703{
4704 ASSERT_RTNL();
4705
4706 return !list_empty(&dev->all_adj_list.upper);
4707}
4708
4709
4710
4711
4712
4713
4714
4715
4716struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4717{
4718 struct netdev_adjacent *upper;
4719
4720 ASSERT_RTNL();
4721
4722 if (list_empty(&dev->adj_list.upper))
4723 return NULL;
4724
4725 upper = list_first_entry(&dev->adj_list.upper,
4726 struct netdev_adjacent, list);
4727 if (likely(upper->master))
4728 return upper->dev;
4729 return NULL;
4730}
4731EXPORT_SYMBOL(netdev_master_upper_dev_get);
4732
4733void *netdev_adjacent_get_private(struct list_head *adj_list)
4734{
4735 struct netdev_adjacent *adj;
4736
4737 adj = list_entry(adj_list, struct netdev_adjacent, list);
4738
4739 return adj->private;
4740}
4741EXPORT_SYMBOL(netdev_adjacent_get_private);
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
4752 struct list_head **iter)
4753{
4754 struct netdev_adjacent *upper;
4755
4756 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
4757
4758 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4759
4760 if (&upper->list == &dev->adj_list.upper)
4761 return NULL;
4762
4763 *iter = &upper->list;
4764
4765 return upper->dev;
4766}
4767EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
4768
4769
4770
4771
4772
4773
4774
4775
4776
4777struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
4778 struct list_head **iter)
4779{
4780 struct netdev_adjacent *upper;
4781
4782 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
4783
4784 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4785
4786 if (&upper->list == &dev->all_adj_list.upper)
4787 return NULL;
4788
4789 *iter = &upper->list;
4790
4791 return upper->dev;
4792}
4793EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
4794
4795
4796
4797
4798
4799
4800
4801
4802
4803
4804
4805
4806void *netdev_lower_get_next_private(struct net_device *dev,
4807 struct list_head **iter)
4808{
4809 struct netdev_adjacent *lower;
4810
4811 lower = list_entry(*iter, struct netdev_adjacent, list);
4812
4813 if (&lower->list == &dev->adj_list.lower)
4814 return NULL;
4815
4816 *iter = lower->list.next;
4817
4818 return lower->private;
4819}
4820EXPORT_SYMBOL(netdev_lower_get_next_private);
4821
4822
4823
4824
4825
4826
4827
4828
4829
4830
4831
4832void *netdev_lower_get_next_private_rcu(struct net_device *dev,
4833 struct list_head **iter)
4834{
4835 struct netdev_adjacent *lower;
4836
4837 WARN_ON_ONCE(!rcu_read_lock_held());
4838
4839 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4840
4841 if (&lower->list == &dev->adj_list.lower)
4842 return NULL;
4843
4844 *iter = &lower->list;
4845
4846 return lower->private;
4847}
4848EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
4849
4850
4851
4852
4853
4854
4855
4856
4857
4858
4859
4860
4861void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
4862{
4863 struct netdev_adjacent *lower;
4864
4865 lower = list_entry((*iter)->next, struct netdev_adjacent, list);
4866
4867 if (&lower->list == &dev->adj_list.lower)
4868 return NULL;
4869
4870 *iter = &lower->list;
4871
4872 return lower->dev;
4873}
4874EXPORT_SYMBOL(netdev_lower_get_next);
4875
4876
4877
4878
4879
4880
4881
4882
4883
4884
4885void *netdev_lower_get_first_private_rcu(struct net_device *dev)
4886{
4887 struct netdev_adjacent *lower;
4888
4889 lower = list_first_or_null_rcu(&dev->adj_list.lower,
4890 struct netdev_adjacent, list);
4891 if (lower)
4892 return lower->private;
4893 return NULL;
4894}
4895EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
4896
4897
4898
4899
4900
4901
4902
4903
4904struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4905{
4906 struct netdev_adjacent *upper;
4907
4908 upper = list_first_or_null_rcu(&dev->adj_list.upper,
4909 struct netdev_adjacent, list);
4910 if (upper && likely(upper->master))
4911 return upper->dev;
4912 return NULL;
4913}
4914EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4915
4916static int netdev_adjacent_sysfs_add(struct net_device *dev,
4917 struct net_device *adj_dev,
4918 struct list_head *dev_list)
4919{
4920 char linkname[IFNAMSIZ+7];
4921 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4922 "upper_%s" : "lower_%s", adj_dev->name);
4923 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
4924 linkname);
4925}
4926static void netdev_adjacent_sysfs_del(struct net_device *dev,
4927 char *name,
4928 struct list_head *dev_list)
4929{
4930 char linkname[IFNAMSIZ+7];
4931 sprintf(linkname, dev_list == &dev->adj_list.upper ?
4932 "upper_%s" : "lower_%s", name);
4933 sysfs_remove_link(&(dev->dev.kobj), linkname);
4934}
4935
4936static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
4937 struct net_device *adj_dev,
4938 struct list_head *dev_list)
4939{
4940 return (dev_list == &dev->adj_list.upper ||
4941 dev_list == &dev->adj_list.lower) &&
4942 net_eq(dev_net(dev), dev_net(adj_dev));
4943}
4944
4945static int __netdev_adjacent_dev_insert(struct net_device *dev,
4946 struct net_device *adj_dev,
4947 struct list_head *dev_list,
4948 void *private, bool master)
4949{
4950 struct netdev_adjacent *adj;
4951 int ret;
4952
4953 adj = __netdev_find_adj(dev, adj_dev, dev_list);
4954
4955 if (adj) {
4956 adj->ref_nr++;
4957 return 0;
4958 }
4959
4960 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4961 if (!adj)
4962 return -ENOMEM;
4963
4964 adj->dev = adj_dev;
4965 adj->master = master;
4966 adj->ref_nr = 1;
4967 adj->private = private;
4968 dev_hold(adj_dev);
4969
4970 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
4971 adj_dev->name, dev->name, adj_dev->name);
4972
4973 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
4974 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
4975 if (ret)
4976 goto free_adj;
4977 }
4978
4979
4980 if (master) {
4981 ret = sysfs_create_link(&(dev->dev.kobj),
4982 &(adj_dev->dev.kobj), "master");
4983 if (ret)
4984 goto remove_symlinks;
4985
4986 list_add_rcu(&adj->list, dev_list);
4987 } else {
4988 list_add_tail_rcu(&adj->list, dev_list);
4989 }
4990
4991 return 0;
4992
4993remove_symlinks:
4994 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
4995 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
4996free_adj:
4997 kfree(adj);
4998 dev_put(adj_dev);
4999
5000 return ret;
5001}
5002
5003static void __netdev_adjacent_dev_remove(struct net_device *dev,
5004 struct net_device *adj_dev,
5005 struct list_head *dev_list)
5006{
5007 struct netdev_adjacent *adj;
5008
5009 adj = __netdev_find_adj(dev, adj_dev, dev_list);
5010
5011 if (!adj) {
5012 pr_err("tried to remove device %s from %s\n",
5013 dev->name, adj_dev->name);
5014 BUG();
5015 }
5016
5017 if (adj->ref_nr > 1) {
5018 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
5019 adj->ref_nr-1);
5020 adj->ref_nr--;
5021 return;
5022 }
5023
5024 if (adj->master)
5025 sysfs_remove_link(&(dev->dev.kobj), "master");
5026
5027 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5028 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5029
5030 list_del_rcu(&adj->list);
5031 pr_debug("dev_put for %s, because link removed from %s to %s\n",
5032 adj_dev->name, dev->name, adj_dev->name);
5033 dev_put(adj_dev);
5034 kfree_rcu(adj, rcu);
5035}
5036
5037static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5038 struct net_device *upper_dev,
5039 struct list_head *up_list,
5040 struct list_head *down_list,
5041 void *private, bool master)
5042{
5043 int ret;
5044
5045 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
5046 master);
5047 if (ret)
5048 return ret;
5049
5050 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
5051 false);
5052 if (ret) {
5053 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5054 return ret;
5055 }
5056
5057 return 0;
5058}
5059
5060static int __netdev_adjacent_dev_link(struct net_device *dev,
5061 struct net_device *upper_dev)
5062{
5063 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
5064 &dev->all_adj_list.upper,
5065 &upper_dev->all_adj_list.lower,
5066 NULL, false);
5067}
5068
5069static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5070 struct net_device *upper_dev,
5071 struct list_head *up_list,
5072 struct list_head *down_list)
5073{
5074 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5075 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
5076}
5077
5078static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5079 struct net_device *upper_dev)
5080{
5081 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5082 &dev->all_adj_list.upper,
5083 &upper_dev->all_adj_list.lower);
5084}
5085
5086static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5087 struct net_device *upper_dev,
5088 void *private, bool master)
5089{
5090 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
5091
5092 if (ret)
5093 return ret;
5094
5095 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
5096 &dev->adj_list.upper,
5097 &upper_dev->adj_list.lower,
5098 private, master);
5099 if (ret) {
5100 __netdev_adjacent_dev_unlink(dev, upper_dev);
5101 return ret;
5102 }
5103
5104 return 0;
5105}
5106
5107static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5108 struct net_device *upper_dev)
5109{
5110 __netdev_adjacent_dev_unlink(dev, upper_dev);
5111 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5112 &dev->adj_list.upper,
5113 &upper_dev->adj_list.lower);
5114}
5115
5116static int __netdev_upper_dev_link(struct net_device *dev,
5117 struct net_device *upper_dev, bool master,
5118 void *private)
5119{
5120 struct netdev_adjacent *i, *j, *to_i, *to_j;
5121 int ret = 0;
5122
5123 ASSERT_RTNL();
5124
5125 if (dev == upper_dev)
5126 return -EBUSY;
5127
5128
5129 if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
5130 return -EBUSY;
5131
5132 if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
5133 return -EEXIST;
5134
5135 if (master && netdev_master_upper_dev_get(dev))
5136 return -EBUSY;
5137
5138 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
5139 master);
5140 if (ret)
5141 return ret;
5142
5143
5144
5145
5146
5147
5148 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5149 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5150 pr_debug("Interlinking %s with %s, non-neighbour\n",
5151 i->dev->name, j->dev->name);
5152 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
5153 if (ret)
5154 goto rollback_mesh;
5155 }
5156 }
5157
5158
5159 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5160 pr_debug("linking %s's upper device %s with %s\n",
5161 upper_dev->name, i->dev->name, dev->name);
5162 ret = __netdev_adjacent_dev_link(dev, i->dev);
5163 if (ret)
5164 goto rollback_upper_mesh;
5165 }
5166
5167
5168 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5169 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5170 i->dev->name, upper_dev->name);
5171 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
5172 if (ret)
5173 goto rollback_lower_mesh;
5174 }
5175
5176 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
5177 return 0;
5178
5179rollback_lower_mesh:
5180 to_i = i;
5181 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5182 if (i == to_i)
5183 break;
5184 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5185 }
5186
5187 i = NULL;
5188
5189rollback_upper_mesh:
5190 to_i = i;
5191 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5192 if (i == to_i)
5193 break;
5194 __netdev_adjacent_dev_unlink(dev, i->dev);
5195 }
5196
5197 i = j = NULL;
5198
5199rollback_mesh:
5200 to_i = i;
5201 to_j = j;
5202 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5203 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5204 if (i == to_i && j == to_j)
5205 break;
5206 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5207 }
5208 if (i == to_i)
5209 break;
5210 }
5211
5212 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5213
5214 return ret;
5215}
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225
5226
5227int netdev_upper_dev_link(struct net_device *dev,
5228 struct net_device *upper_dev)
5229{
5230 return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
5231}
5232EXPORT_SYMBOL(netdev_upper_dev_link);
5233
5234
5235
5236
5237
5238
5239
5240
5241
5242
5243
5244
5245int netdev_master_upper_dev_link(struct net_device *dev,
5246 struct net_device *upper_dev)
5247{
5248 return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
5249}
5250EXPORT_SYMBOL(netdev_master_upper_dev_link);
5251
5252int netdev_master_upper_dev_link_private(struct net_device *dev,
5253 struct net_device *upper_dev,
5254 void *private)
5255{
5256 return __netdev_upper_dev_link(dev, upper_dev, true, private);
5257}
5258EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268void netdev_upper_dev_unlink(struct net_device *dev,
5269 struct net_device *upper_dev)
5270{
5271 struct netdev_adjacent *i, *j;
5272 ASSERT_RTNL();
5273
5274 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5275
5276
5277
5278
5279
5280 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5281 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
5282 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5283
5284
5285
5286
5287 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5288 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5289
5290 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
5291 __netdev_adjacent_dev_unlink(dev, i->dev);
5292
5293 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
5294}
5295EXPORT_SYMBOL(netdev_upper_dev_unlink);
5296
5297static void netdev_adjacent_add_links(struct net_device *dev)
5298{
5299 struct netdev_adjacent *iter;
5300
5301 struct net *net = dev_net(dev);
5302
5303 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5304 if (!net_eq(net,dev_net(iter->dev)))
5305 continue;
5306 netdev_adjacent_sysfs_add(iter->dev, dev,
5307 &iter->dev->adj_list.lower);
5308 netdev_adjacent_sysfs_add(dev, iter->dev,
5309 &dev->adj_list.upper);
5310 }
5311
5312 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5313 if (!net_eq(net,dev_net(iter->dev)))
5314 continue;
5315 netdev_adjacent_sysfs_add(iter->dev, dev,
5316 &iter->dev->adj_list.upper);
5317 netdev_adjacent_sysfs_add(dev, iter->dev,
5318 &dev->adj_list.lower);
5319 }
5320}
5321
5322static void netdev_adjacent_del_links(struct net_device *dev)
5323{
5324 struct netdev_adjacent *iter;
5325
5326 struct net *net = dev_net(dev);
5327
5328 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5329 if (!net_eq(net,dev_net(iter->dev)))
5330 continue;
5331 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5332 &iter->dev->adj_list.lower);
5333 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5334 &dev->adj_list.upper);
5335 }
5336
5337 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5338 if (!net_eq(net,dev_net(iter->dev)))
5339 continue;
5340 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5341 &iter->dev->adj_list.upper);
5342 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5343 &dev->adj_list.lower);
5344 }
5345}
5346
5347void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
5348{
5349 struct netdev_adjacent *iter;
5350
5351 struct net *net = dev_net(dev);
5352
5353 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5354 if (!net_eq(net,dev_net(iter->dev)))
5355 continue;
5356 netdev_adjacent_sysfs_del(iter->dev, oldname,
5357 &iter->dev->adj_list.lower);
5358 netdev_adjacent_sysfs_add(iter->dev, dev,
5359 &iter->dev->adj_list.lower);
5360 }
5361
5362 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5363 if (!net_eq(net,dev_net(iter->dev)))
5364 continue;
5365 netdev_adjacent_sysfs_del(iter->dev, oldname,
5366 &iter->dev->adj_list.upper);
5367 netdev_adjacent_sysfs_add(iter->dev, dev,
5368 &iter->dev->adj_list.upper);
5369 }
5370}
5371
5372void *netdev_lower_dev_get_private(struct net_device *dev,
5373 struct net_device *lower_dev)
5374{
5375 struct netdev_adjacent *lower;
5376
5377 if (!lower_dev)
5378 return NULL;
5379 lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
5380 if (!lower)
5381 return NULL;
5382
5383 return lower->private;
5384}
5385EXPORT_SYMBOL(netdev_lower_dev_get_private);
5386
5387
5388int dev_get_nest_level(struct net_device *dev,
5389 bool (*type_check)(struct net_device *dev))
5390{
5391 struct net_device *lower = NULL;
5392 struct list_head *iter;
5393 int max_nest = -1;
5394 int nest;
5395
5396 ASSERT_RTNL();
5397
5398 netdev_for_each_lower_dev(dev, lower, iter) {
5399 nest = dev_get_nest_level(lower, type_check);
5400 if (max_nest < nest)
5401 max_nest = nest;
5402 }
5403
5404 if (type_check(dev))
5405 max_nest++;
5406
5407 return max_nest;
5408}
5409EXPORT_SYMBOL(dev_get_nest_level);
5410
5411static void dev_change_rx_flags(struct net_device *dev, int flags)
5412{
5413 const struct net_device_ops *ops = dev->netdev_ops;
5414
5415 if (ops->ndo_change_rx_flags)
5416 ops->ndo_change_rx_flags(dev, flags);
5417}
5418
5419static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
5420{
5421 unsigned int old_flags = dev->flags;
5422 kuid_t uid;
5423 kgid_t gid;
5424
5425 ASSERT_RTNL();
5426
5427 dev->flags |= IFF_PROMISC;
5428 dev->promiscuity += inc;
5429 if (dev->promiscuity == 0) {
5430
5431
5432
5433
5434 if (inc < 0)
5435 dev->flags &= ~IFF_PROMISC;
5436 else {
5437 dev->promiscuity -= inc;
5438 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
5439 dev->name);
5440 return -EOVERFLOW;
5441 }
5442 }
5443 if (dev->flags != old_flags) {
5444 pr_info("device %s %s promiscuous mode\n",
5445 dev->name,
5446 dev->flags & IFF_PROMISC ? "entered" : "left");
5447 if (audit_enabled) {
5448 current_uid_gid(&uid, &gid);
5449 audit_log(current->audit_context, GFP_ATOMIC,
5450 AUDIT_ANOM_PROMISCUOUS,
5451 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
5452 dev->name, (dev->flags & IFF_PROMISC),
5453 (old_flags & IFF_PROMISC),
5454 from_kuid(&init_user_ns, audit_get_loginuid(current)),
5455 from_kuid(&init_user_ns, uid),
5456 from_kgid(&init_user_ns, gid),
5457 audit_get_sessionid(current));
5458 }
5459
5460 dev_change_rx_flags(dev, IFF_PROMISC);
5461 }
5462 if (notify)
5463 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
5464 return 0;
5465}
5466
5467
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477
5478int dev_set_promiscuity(struct net_device *dev, int inc)
5479{
5480 unsigned int old_flags = dev->flags;
5481 int err;
5482
5483 err = __dev_set_promiscuity(dev, inc, true);
5484 if (err < 0)
5485 return err;
5486 if (dev->flags != old_flags)
5487 dev_set_rx_mode(dev);
5488 return err;
5489}
5490EXPORT_SYMBOL(dev_set_promiscuity);
5491
5492static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
5493{
5494 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
5495
5496 ASSERT_RTNL();
5497
5498 dev->flags |= IFF_ALLMULTI;
5499 dev->allmulti += inc;
5500 if (dev->allmulti == 0) {
5501
5502
5503
5504
5505 if (inc < 0)
5506 dev->flags &= ~IFF_ALLMULTI;
5507 else {
5508 dev->allmulti -= inc;
5509 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
5510 dev->name);
5511 return -EOVERFLOW;
5512 }
5513 }
5514 if (dev->flags ^ old_flags) {
5515 dev_change_rx_flags(dev, IFF_ALLMULTI);
5516 dev_set_rx_mode(dev);
5517 if (notify)
5518 __dev_notify_flags(dev, old_flags,
5519 dev->gflags ^ old_gflags);
5520 }
5521 return 0;
5522}
5523
5524
5525
5526
5527
5528
5529
5530
5531
5532
5533
5534
5535
5536
5537int dev_set_allmulti(struct net_device *dev, int inc)
5538{
5539 return __dev_set_allmulti(dev, inc, true);
5540}
5541EXPORT_SYMBOL(dev_set_allmulti);
5542
5543
5544
5545
5546
5547
5548
5549void __dev_set_rx_mode(struct net_device *dev)
5550{
5551 const struct net_device_ops *ops = dev->netdev_ops;
5552
5553
5554 if (!(dev->flags&IFF_UP))
5555 return;
5556
5557 if (!netif_device_present(dev))
5558 return;
5559
5560 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
5561
5562
5563
5564 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
5565 __dev_set_promiscuity(dev, 1, false);
5566 dev->uc_promisc = true;
5567 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
5568 __dev_set_promiscuity(dev, -1, false);
5569 dev->uc_promisc = false;
5570 }
5571 }
5572
5573 if (ops->ndo_set_rx_mode)
5574 ops->ndo_set_rx_mode(dev);
5575}
5576
5577void dev_set_rx_mode(struct net_device *dev)
5578{
5579 netif_addr_lock_bh(dev);
5580 __dev_set_rx_mode(dev);
5581 netif_addr_unlock_bh(dev);
5582}
5583
5584
5585
5586
5587
5588
5589
5590unsigned int dev_get_flags(const struct net_device *dev)
5591{
5592 unsigned int flags;
5593
5594 flags = (dev->flags & ~(IFF_PROMISC |
5595 IFF_ALLMULTI |
5596 IFF_RUNNING |
5597 IFF_LOWER_UP |
5598 IFF_DORMANT)) |
5599 (dev->gflags & (IFF_PROMISC |
5600 IFF_ALLMULTI));
5601
5602 if (netif_running(dev)) {
5603 if (netif_oper_up(dev))
5604 flags |= IFF_RUNNING;
5605 if (netif_carrier_ok(dev))
5606 flags |= IFF_LOWER_UP;
5607 if (netif_dormant(dev))
5608 flags |= IFF_DORMANT;
5609 }
5610
5611 return flags;
5612}
5613EXPORT_SYMBOL(dev_get_flags);
5614
5615int __dev_change_flags(struct net_device *dev, unsigned int flags)
5616{
5617 unsigned int old_flags = dev->flags;
5618 int ret;
5619
5620 ASSERT_RTNL();
5621
5622
5623
5624
5625
5626 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
5627 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
5628 IFF_AUTOMEDIA)) |
5629 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
5630 IFF_ALLMULTI));
5631
5632
5633
5634
5635
5636 if ((old_flags ^ flags) & IFF_MULTICAST)
5637 dev_change_rx_flags(dev, IFF_MULTICAST);
5638
5639 dev_set_rx_mode(dev);
5640
5641
5642
5643
5644
5645
5646
5647 ret = 0;
5648 if ((old_flags ^ flags) & IFF_UP)
5649 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
5650
5651 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5652 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5653 unsigned int old_flags = dev->flags;
5654
5655 dev->gflags ^= IFF_PROMISC;
5656
5657 if (__dev_set_promiscuity(dev, inc, false) >= 0)
5658 if (dev->flags != old_flags)
5659 dev_set_rx_mode(dev);
5660 }
5661
5662
5663
5664
5665
5666 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
5667 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5668
5669 dev->gflags ^= IFF_ALLMULTI;
5670 __dev_set_allmulti(dev, inc, false);
5671 }
5672
5673 return ret;
5674}
5675
5676void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
5677 unsigned int gchanges)
5678{
5679 unsigned int changes = dev->flags ^ old_flags;
5680
5681 if (gchanges)
5682 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
5683
5684 if (changes & IFF_UP) {
5685 if (dev->flags & IFF_UP)
5686 call_netdevice_notifiers(NETDEV_UP, dev);
5687 else
5688 call_netdevice_notifiers(NETDEV_DOWN, dev);
5689 }
5690
5691 if (dev->flags & IFF_UP &&
5692 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
5693 struct netdev_notifier_change_info change_info;
5694
5695 change_info.flags_changed = changes;
5696 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
5697 &change_info.info);
5698 }
5699}
5700
5701
5702
5703
5704
5705
5706
5707
5708
5709int dev_change_flags(struct net_device *dev, unsigned int flags)
5710{
5711 int ret;
5712 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
5713
5714 ret = __dev_change_flags(dev, flags);
5715 if (ret < 0)
5716 return ret;
5717
5718 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
5719 __dev_notify_flags(dev, old_flags, changes);
5720 return ret;
5721}
5722EXPORT_SYMBOL(dev_change_flags);
5723
5724static int __dev_set_mtu(struct net_device *dev, int new_mtu)
5725{
5726 const struct net_device_ops *ops = dev->netdev_ops;
5727
5728 if (ops->ndo_change_mtu)
5729 return ops->ndo_change_mtu(dev, new_mtu);
5730
5731 dev->mtu = new_mtu;
5732 return 0;
5733}
5734
5735
5736
5737
5738
5739
5740
5741
5742int dev_set_mtu(struct net_device *dev, int new_mtu)
5743{
5744 int err, orig_mtu;
5745
5746 if (new_mtu == dev->mtu)
5747 return 0;
5748
5749
5750 if (new_mtu < 0)
5751 return -EINVAL;
5752
5753 if (!netif_device_present(dev))
5754 return -ENODEV;
5755
5756 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
5757 err = notifier_to_errno(err);
5758 if (err)
5759 return err;
5760
5761 orig_mtu = dev->mtu;
5762 err = __dev_set_mtu(dev, new_mtu);
5763
5764 if (!err) {
5765 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5766 err = notifier_to_errno(err);
5767 if (err) {
5768
5769
5770
5771 __dev_set_mtu(dev, orig_mtu);
5772 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5773 }
5774 }
5775 return err;
5776}
5777EXPORT_SYMBOL(dev_set_mtu);
5778
5779
5780
5781
5782
5783
5784void dev_set_group(struct net_device *dev, int new_group)
5785{
5786 dev->group = new_group;
5787}
5788EXPORT_SYMBOL(dev_set_group);
5789
5790
5791
5792
5793
5794
5795
5796
5797int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5798{
5799 const struct net_device_ops *ops = dev->netdev_ops;
5800 int err;
5801
5802 if (!ops->ndo_set_mac_address)
5803 return -EOPNOTSUPP;
5804 if (sa->sa_family != dev->type)
5805 return -EINVAL;
5806 if (!netif_device_present(dev))
5807 return -ENODEV;
5808 err = ops->ndo_set_mac_address(dev, sa);
5809 if (err)
5810 return err;
5811 dev->addr_assign_type = NET_ADDR_SET;
5812 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5813 add_device_randomness(dev->dev_addr, dev->addr_len);
5814 return 0;
5815}
5816EXPORT_SYMBOL(dev_set_mac_address);
5817
5818
5819
5820
5821
5822
5823
5824
5825int dev_change_carrier(struct net_device *dev, bool new_carrier)
5826{
5827 const struct net_device_ops *ops = dev->netdev_ops;
5828
5829 if (!ops->ndo_change_carrier)
5830 return -EOPNOTSUPP;
5831 if (!netif_device_present(dev))
5832 return -ENODEV;
5833 return ops->ndo_change_carrier(dev, new_carrier);
5834}
5835EXPORT_SYMBOL(dev_change_carrier);
5836
5837
5838
5839
5840
5841
5842
5843
5844int dev_get_phys_port_id(struct net_device *dev,
5845 struct netdev_phys_item_id *ppid)
5846{
5847 const struct net_device_ops *ops = dev->netdev_ops;
5848
5849 if (!ops->ndo_get_phys_port_id)
5850 return -EOPNOTSUPP;
5851 return ops->ndo_get_phys_port_id(dev, ppid);
5852}
5853EXPORT_SYMBOL(dev_get_phys_port_id);
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863static int dev_new_index(struct net *net)
5864{
5865 int ifindex = net->ifindex;
5866 for (;;) {
5867 if (++ifindex <= 0)
5868 ifindex = 1;
5869 if (!__dev_get_by_index(net, ifindex))
5870 return net->ifindex = ifindex;
5871 }
5872}
5873
5874
5875static LIST_HEAD(net_todo_list);
5876DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5877
5878static void net_set_todo(struct net_device *dev)
5879{
5880 list_add_tail(&dev->todo_list, &net_todo_list);
5881 dev_net(dev)->dev_unreg_count++;
5882}
5883
5884static void rollback_registered_many(struct list_head *head)
5885{
5886 struct net_device *dev, *tmp;
5887 LIST_HEAD(close_head);
5888
5889 BUG_ON(dev_boot_phase);
5890 ASSERT_RTNL();
5891
5892 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5893
5894
5895
5896
5897 if (dev->reg_state == NETREG_UNINITIALIZED) {
5898 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5899 dev->name, dev);
5900
5901 WARN_ON(1);
5902 list_del(&dev->unreg_list);
5903 continue;
5904 }
5905 dev->dismantle = true;
5906 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5907 }
5908
5909
5910 list_for_each_entry(dev, head, unreg_list)
5911 list_add_tail(&dev->close_list, &close_head);
5912 dev_close_many(&close_head);
5913
5914 list_for_each_entry(dev, head, unreg_list) {
5915
5916 unlist_netdevice(dev);
5917
5918 dev->reg_state = NETREG_UNREGISTERING;
5919 }
5920
5921 synchronize_net();
5922
5923 list_for_each_entry(dev, head, unreg_list) {
5924 struct sk_buff *skb = NULL;
5925
5926
5927 dev_shutdown(dev);
5928
5929
5930
5931
5932
5933 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5934
5935 if (!dev->rtnl_link_ops ||
5936 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5937 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
5938 GFP_KERNEL);
5939
5940
5941
5942
5943 dev_uc_flush(dev);
5944 dev_mc_flush(dev);
5945
5946 if (dev->netdev_ops->ndo_uninit)
5947 dev->netdev_ops->ndo_uninit(dev);
5948
5949 if (skb)
5950 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
5951
5952
5953 WARN_ON(netdev_has_any_upper_dev(dev));
5954
5955
5956 netdev_unregister_kobject(dev);
5957#ifdef CONFIG_XPS
5958
5959 netif_reset_xps_queues_gt(dev, 0);
5960#endif
5961 }
5962
5963 synchronize_net();
5964
5965 list_for_each_entry(dev, head, unreg_list)
5966 dev_put(dev);
5967}
5968
5969static void rollback_registered(struct net_device *dev)
5970{
5971 LIST_HEAD(single);
5972
5973 list_add(&dev->unreg_list, &single);
5974 rollback_registered_many(&single);
5975 list_del(&single);
5976}
5977
5978static netdev_features_t netdev_fix_features(struct net_device *dev,
5979 netdev_features_t features)
5980{
5981
5982 if ((features & NETIF_F_HW_CSUM) &&
5983 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5984 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5985 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5986 }
5987
5988
5989 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5990 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5991 features &= ~NETIF_F_ALL_TSO;
5992 }
5993
5994 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
5995 !(features & NETIF_F_IP_CSUM)) {
5996 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
5997 features &= ~NETIF_F_TSO;
5998 features &= ~NETIF_F_TSO_ECN;
5999 }
6000
6001 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
6002 !(features & NETIF_F_IPV6_CSUM)) {
6003 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
6004 features &= ~NETIF_F_TSO6;
6005 }
6006
6007
6008 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6009 features &= ~NETIF_F_TSO_ECN;
6010
6011
6012 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
6013 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
6014 features &= ~NETIF_F_GSO;
6015 }
6016
6017
6018 if (features & NETIF_F_UFO) {
6019
6020 if (!((features & NETIF_F_GEN_CSUM) ||
6021 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
6022 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
6023 netdev_dbg(dev,
6024 "Dropping NETIF_F_UFO since no checksum offload features.\n");
6025 features &= ~NETIF_F_UFO;
6026 }
6027
6028 if (!(features & NETIF_F_SG)) {
6029 netdev_dbg(dev,
6030 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
6031 features &= ~NETIF_F_UFO;
6032 }
6033 }
6034
6035#ifdef CONFIG_NET_RX_BUSY_POLL
6036 if (dev->netdev_ops->ndo_busy_poll)
6037 features |= NETIF_F_BUSY_POLL;
6038 else
6039#endif
6040 features &= ~NETIF_F_BUSY_POLL;
6041
6042 return features;
6043}
6044
6045int __netdev_update_features(struct net_device *dev)
6046{
6047 netdev_features_t features;
6048 int err = 0;
6049
6050 ASSERT_RTNL();
6051
6052 features = netdev_get_wanted_features(dev);
6053
6054 if (dev->netdev_ops->ndo_fix_features)
6055 features = dev->netdev_ops->ndo_fix_features(dev, features);
6056
6057
6058 features = netdev_fix_features(dev, features);
6059
6060 if (dev->features == features)
6061 return 0;
6062
6063 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6064 &dev->features, &features);
6065
6066 if (dev->netdev_ops->ndo_set_features)
6067 err = dev->netdev_ops->ndo_set_features(dev, features);
6068
6069 if (unlikely(err < 0)) {
6070 netdev_err(dev,
6071 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6072 err, &features, &dev->features);
6073 return -1;
6074 }
6075
6076 if (!err)
6077 dev->features = features;
6078
6079 return 1;
6080}
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090void netdev_update_features(struct net_device *dev)
6091{
6092 if (__netdev_update_features(dev))
6093 netdev_features_change(dev);
6094}
6095EXPORT_SYMBOL(netdev_update_features);
6096
6097
6098
6099
6100
6101
6102
6103
6104
6105
6106
6107void netdev_change_features(struct net_device *dev)
6108{
6109 __netdev_update_features(dev);
6110 netdev_features_change(dev);
6111}
6112EXPORT_SYMBOL(netdev_change_features);
6113
6114
6115
6116
6117
6118
6119
6120
6121
6122
6123void netif_stacked_transfer_operstate(const struct net_device *rootdev,
6124 struct net_device *dev)
6125{
6126 if (rootdev->operstate == IF_OPER_DORMANT)
6127 netif_dormant_on(dev);
6128 else
6129 netif_dormant_off(dev);
6130
6131 if (netif_carrier_ok(rootdev)) {
6132 if (!netif_carrier_ok(dev))
6133 netif_carrier_on(dev);
6134 } else {
6135 if (netif_carrier_ok(dev))
6136 netif_carrier_off(dev);
6137 }
6138}
6139EXPORT_SYMBOL(netif_stacked_transfer_operstate);
6140
6141#ifdef CONFIG_SYSFS
6142static int netif_alloc_rx_queues(struct net_device *dev)
6143{
6144 unsigned int i, count = dev->num_rx_queues;
6145 struct netdev_rx_queue *rx;
6146
6147 BUG_ON(count < 1);
6148
6149 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
6150 if (!rx)
6151 return -ENOMEM;
6152
6153 dev->_rx = rx;
6154
6155 for (i = 0; i < count; i++)
6156 rx[i].dev = dev;
6157 return 0;
6158}
6159#endif
6160
6161static void netdev_init_one_queue(struct net_device *dev,
6162 struct netdev_queue *queue, void *_unused)
6163{
6164
6165 spin_lock_init(&queue->_xmit_lock);
6166 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
6167 queue->xmit_lock_owner = -1;
6168 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
6169 queue->dev = dev;
6170#ifdef CONFIG_BQL
6171 dql_init(&queue->dql, HZ);
6172#endif
6173}
6174
6175static void netif_free_tx_queues(struct net_device *dev)
6176{
6177 kvfree(dev->_tx);
6178}
6179
6180static int netif_alloc_netdev_queues(struct net_device *dev)
6181{
6182 unsigned int count = dev->num_tx_queues;
6183 struct netdev_queue *tx;
6184 size_t sz = count * sizeof(*tx);
6185
6186 BUG_ON(count < 1 || count > 0xffff);
6187
6188 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6189 if (!tx) {
6190 tx = vzalloc(sz);
6191 if (!tx)
6192 return -ENOMEM;
6193 }
6194 dev->_tx = tx;
6195
6196 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
6197 spin_lock_init(&dev->tx_global_lock);
6198
6199 return 0;
6200}
6201
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218
6219int register_netdevice(struct net_device *dev)
6220{
6221 int ret;
6222 struct net *net = dev_net(dev);
6223
6224 BUG_ON(dev_boot_phase);
6225 ASSERT_RTNL();
6226
6227 might_sleep();
6228
6229
6230 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
6231 BUG_ON(!net);
6232
6233 spin_lock_init(&dev->addr_list_lock);
6234 netdev_set_addr_lockdep_class(dev);
6235
6236 dev->iflink = -1;
6237
6238 ret = dev_get_valid_name(net, dev, dev->name);
6239 if (ret < 0)
6240 goto out;
6241
6242
6243 if (dev->netdev_ops->ndo_init) {
6244 ret = dev->netdev_ops->ndo_init(dev);
6245 if (ret) {
6246 if (ret > 0)
6247 ret = -EIO;
6248 goto out;
6249 }
6250 }
6251
6252 if (((dev->hw_features | dev->features) &
6253 NETIF_F_HW_VLAN_CTAG_FILTER) &&
6254 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
6255 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
6256 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
6257 ret = -EINVAL;
6258 goto err_uninit;
6259 }
6260
6261 ret = -EBUSY;
6262 if (!dev->ifindex)
6263 dev->ifindex = dev_new_index(net);
6264 else if (__dev_get_by_index(net, dev->ifindex))
6265 goto err_uninit;
6266
6267 if (dev->iflink == -1)
6268 dev->iflink = dev->ifindex;
6269
6270
6271
6272
6273 dev->hw_features |= NETIF_F_SOFT_FEATURES;
6274 dev->features |= NETIF_F_SOFT_FEATURES;
6275 dev->wanted_features = dev->features & dev->hw_features;
6276
6277 if (!(dev->flags & IFF_LOOPBACK)) {
6278 dev->hw_features |= NETIF_F_NOCACHE_COPY;
6279 }
6280
6281
6282
6283 dev->vlan_features |= NETIF_F_HIGHDMA;
6284
6285
6286
6287 dev->hw_enc_features |= NETIF_F_SG;
6288
6289
6290
6291 dev->mpls_features |= NETIF_F_SG;
6292
6293 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
6294 ret = notifier_to_errno(ret);
6295 if (ret)
6296 goto err_uninit;
6297
6298 ret = netdev_register_kobject(dev);
6299 if (ret)
6300 goto err_uninit;
6301 dev->reg_state = NETREG_REGISTERED;
6302
6303 __netdev_update_features(dev);
6304
6305
6306
6307
6308
6309
6310 set_bit(__LINK_STATE_PRESENT, &dev->state);
6311
6312 linkwatch_init_dev(dev);
6313
6314 dev_init_scheduler(dev);
6315 dev_hold(dev);
6316 list_netdevice(dev);
6317 add_device_randomness(dev->dev_addr, dev->addr_len);
6318
6319
6320
6321
6322
6323 if (dev->addr_assign_type == NET_ADDR_PERM)
6324 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
6325
6326
6327 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
6328 ret = notifier_to_errno(ret);
6329 if (ret) {
6330 rollback_registered(dev);
6331 dev->reg_state = NETREG_UNREGISTERED;
6332 }
6333
6334
6335
6336
6337 if (!dev->rtnl_link_ops ||
6338 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
6339 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
6340
6341out:
6342 return ret;
6343
6344err_uninit:
6345 if (dev->netdev_ops->ndo_uninit)
6346 dev->netdev_ops->ndo_uninit(dev);
6347 goto out;
6348}
6349EXPORT_SYMBOL(register_netdevice);
6350
6351
6352
6353
6354
6355
6356
6357
6358
6359
6360
6361int init_dummy_netdev(struct net_device *dev)
6362{
6363
6364
6365
6366
6367
6368 memset(dev, 0, sizeof(struct net_device));
6369
6370
6371
6372
6373 dev->reg_state = NETREG_DUMMY;
6374
6375
6376 INIT_LIST_HEAD(&dev->napi_list);
6377
6378
6379 set_bit(__LINK_STATE_PRESENT, &dev->state);
6380 set_bit(__LINK_STATE_START, &dev->state);
6381
6382
6383
6384
6385
6386
6387 return 0;
6388}
6389EXPORT_SYMBOL_GPL(init_dummy_netdev);
6390
6391
6392
6393
6394
6395
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405int register_netdev(struct net_device *dev)
6406{
6407 int err;
6408
6409 rtnl_lock();
6410 err = register_netdevice(dev);
6411 rtnl_unlock();
6412 return err;
6413}
6414EXPORT_SYMBOL(register_netdev);
6415
6416int netdev_refcnt_read(const struct net_device *dev)
6417{
6418 int i, refcnt = 0;
6419
6420 for_each_possible_cpu(i)
6421 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
6422 return refcnt;
6423}
6424EXPORT_SYMBOL(netdev_refcnt_read);
6425
6426
6427
6428
6429
6430
6431
6432
6433
6434
6435
6436
6437
6438static void netdev_wait_allrefs(struct net_device *dev)
6439{
6440 unsigned long rebroadcast_time, warning_time;
6441 int refcnt;
6442
6443 linkwatch_forget_dev(dev);
6444
6445 rebroadcast_time = warning_time = jiffies;
6446 refcnt = netdev_refcnt_read(dev);
6447
6448 while (refcnt != 0) {
6449 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
6450 rtnl_lock();
6451
6452
6453 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6454
6455 __rtnl_unlock();
6456 rcu_barrier();
6457 rtnl_lock();
6458
6459 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6460 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
6461 &dev->state)) {
6462
6463
6464
6465
6466
6467
6468 linkwatch_run_queue();
6469 }
6470
6471 __rtnl_unlock();
6472
6473 rebroadcast_time = jiffies;
6474 }
6475
6476 msleep(250);
6477
6478 refcnt = netdev_refcnt_read(dev);
6479
6480 if (time_after(jiffies, warning_time + 10 * HZ)) {
6481 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
6482 dev->name, refcnt);
6483 warning_time = jiffies;
6484 }
6485 }
6486}
6487
6488
6489
6490
6491
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501
6502
6503
6504
6505
6506
6507
6508
6509
6510
6511
6512void netdev_run_todo(void)
6513{
6514 struct list_head list;
6515
6516
6517 list_replace_init(&net_todo_list, &list);
6518
6519 __rtnl_unlock();
6520
6521
6522
6523 if (!list_empty(&list))
6524 rcu_barrier();
6525
6526 while (!list_empty(&list)) {
6527 struct net_device *dev
6528 = list_first_entry(&list, struct net_device, todo_list);
6529 list_del(&dev->todo_list);
6530
6531 rtnl_lock();
6532 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6533 __rtnl_unlock();
6534
6535 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
6536 pr_err("network todo '%s' but state %d\n",
6537 dev->name, dev->reg_state);
6538 dump_stack();
6539 continue;
6540 }
6541
6542 dev->reg_state = NETREG_UNREGISTERED;
6543
6544 on_each_cpu(flush_backlog, dev, 1);
6545
6546 netdev_wait_allrefs(dev);
6547
6548
6549 BUG_ON(netdev_refcnt_read(dev));
6550 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6551 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
6552 WARN_ON(dev->dn_ptr);
6553
6554 if (dev->destructor)
6555 dev->destructor(dev);
6556
6557
6558 rtnl_lock();
6559 dev_net(dev)->dev_unreg_count--;
6560 __rtnl_unlock();
6561 wake_up(&netdev_unregistering_wq);
6562
6563
6564 kobject_put(&dev->dev.kobj);
6565 }
6566}
6567
6568
6569
6570
6571void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
6572 const struct net_device_stats *netdev_stats)
6573{
6574#if BITS_PER_LONG == 64
6575 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
6576 memcpy(stats64, netdev_stats, sizeof(*stats64));
6577#else
6578 size_t i, n = sizeof(*stats64) / sizeof(u64);
6579 const unsigned long *src = (const unsigned long *)netdev_stats;
6580 u64 *dst = (u64 *)stats64;
6581
6582 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
6583 sizeof(*stats64) / sizeof(u64));
6584 for (i = 0; i < n; i++)
6585 dst[i] = src[i];
6586#endif
6587}
6588EXPORT_SYMBOL(netdev_stats_to_stats64);
6589
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599
6600struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6601 struct rtnl_link_stats64 *storage)
6602{
6603 const struct net_device_ops *ops = dev->netdev_ops;
6604
6605 if (ops->ndo_get_stats64) {
6606 memset(storage, 0, sizeof(*storage));
6607 ops->ndo_get_stats64(dev, storage);
6608 } else if (ops->ndo_get_stats) {
6609 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
6610 } else {
6611 netdev_stats_to_stats64(storage, &dev->stats);
6612 }
6613 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6614 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
6615 return storage;
6616}
6617EXPORT_SYMBOL(dev_get_stats);
6618
6619struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6620{
6621 struct netdev_queue *queue = dev_ingress_queue(dev);
6622
6623#ifdef CONFIG_NET_CLS_ACT
6624 if (queue)
6625 return queue;
6626 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
6627 if (!queue)
6628 return NULL;
6629 netdev_init_one_queue(dev, queue, NULL);
6630 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
6631 queue->qdisc_sleeping = &noop_qdisc;
6632 rcu_assign_pointer(dev->ingress_queue, queue);
6633#endif
6634 return queue;
6635}
6636
6637static const struct ethtool_ops default_ethtool_ops;
6638
6639void netdev_set_default_ethtool_ops(struct net_device *dev,
6640 const struct ethtool_ops *ops)
6641{
6642 if (dev->ethtool_ops == &default_ethtool_ops)
6643 dev->ethtool_ops = ops;
6644}
6645EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6646
6647void netdev_freemem(struct net_device *dev)
6648{
6649 char *addr = (char *)dev - dev->padded;
6650
6651 kvfree(addr);
6652}
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665
6666
6667struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6668 unsigned char name_assign_type,
6669 void (*setup)(struct net_device *),
6670 unsigned int txqs, unsigned int rxqs)
6671{
6672 struct net_device *dev;
6673 size_t alloc_size;
6674 struct net_device *p;
6675
6676 BUG_ON(strlen(name) >= sizeof(dev->name));
6677
6678 if (txqs < 1) {
6679 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
6680 return NULL;
6681 }
6682
6683#ifdef CONFIG_SYSFS
6684 if (rxqs < 1) {
6685 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6686 return NULL;
6687 }
6688#endif
6689
6690 alloc_size = sizeof(struct net_device);
6691 if (sizeof_priv) {
6692
6693 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6694 alloc_size += sizeof_priv;
6695 }
6696
6697 alloc_size += NETDEV_ALIGN - 1;
6698
6699 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6700 if (!p)
6701 p = vzalloc(alloc_size);
6702 if (!p)
6703 return NULL;
6704
6705 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6706 dev->padded = (char *)dev - (char *)p;
6707
6708 dev->pcpu_refcnt = alloc_percpu(int);
6709 if (!dev->pcpu_refcnt)
6710 goto free_dev;
6711
6712 if (dev_addr_init(dev))
6713 goto free_pcpu;
6714
6715 dev_mc_init(dev);
6716 dev_uc_init(dev);
6717
6718 dev_net_set(dev, &init_net);
6719
6720 dev->gso_max_size = GSO_MAX_SIZE;
6721 dev->gso_max_segs = GSO_MAX_SEGS;
6722 dev->gso_min_segs = 0;
6723
6724 INIT_LIST_HEAD(&dev->napi_list);
6725 INIT_LIST_HEAD(&dev->unreg_list);
6726 INIT_LIST_HEAD(&dev->close_list);
6727 INIT_LIST_HEAD(&dev->link_watch_list);
6728 INIT_LIST_HEAD(&dev->adj_list.upper);
6729 INIT_LIST_HEAD(&dev->adj_list.lower);
6730 INIT_LIST_HEAD(&dev->all_adj_list.upper);
6731 INIT_LIST_HEAD(&dev->all_adj_list.lower);
6732 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
6733 setup(dev);
6734
6735 dev->num_tx_queues = txqs;
6736 dev->real_num_tx_queues = txqs;
6737 if (netif_alloc_netdev_queues(dev))
6738 goto free_all;
6739
6740#ifdef CONFIG_SYSFS
6741 dev->num_rx_queues = rxqs;
6742 dev->real_num_rx_queues = rxqs;
6743 if (netif_alloc_rx_queues(dev))
6744 goto free_all;
6745#endif
6746
6747 strcpy(dev->name, name);
6748 dev->name_assign_type = name_assign_type;
6749 dev->group = INIT_NETDEV_GROUP;
6750 if (!dev->ethtool_ops)
6751 dev->ethtool_ops = &default_ethtool_ops;
6752 return dev;
6753
6754free_all:
6755 free_netdev(dev);
6756 return NULL;
6757
6758free_pcpu:
6759 free_percpu(dev->pcpu_refcnt);
6760free_dev:
6761 netdev_freemem(dev);
6762 return NULL;
6763}
6764EXPORT_SYMBOL(alloc_netdev_mqs);
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774void free_netdev(struct net_device *dev)
6775{
6776 struct napi_struct *p, *n;
6777
6778 release_net(dev_net(dev));
6779
6780 netif_free_tx_queues(dev);
6781#ifdef CONFIG_SYSFS
6782 kfree(dev->_rx);
6783#endif
6784
6785 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6786
6787
6788 dev_addr_flush(dev);
6789
6790 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6791 netif_napi_del(p);
6792
6793 free_percpu(dev->pcpu_refcnt);
6794 dev->pcpu_refcnt = NULL;
6795
6796
6797 if (dev->reg_state == NETREG_UNINITIALIZED) {
6798 netdev_freemem(dev);
6799 return;
6800 }
6801
6802 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6803 dev->reg_state = NETREG_RELEASED;
6804
6805
6806 put_device(&dev->dev);
6807}
6808EXPORT_SYMBOL(free_netdev);
6809
6810
6811
6812
6813
6814
6815
6816void synchronize_net(void)
6817{
6818 might_sleep();
6819 if (rtnl_is_locked())
6820 synchronize_rcu_expedited();
6821 else
6822 synchronize_rcu();
6823}
6824EXPORT_SYMBOL(synchronize_net);
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834
6835
6836
6837
6838
6839void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6840{
6841 ASSERT_RTNL();
6842
6843 if (head) {
6844 list_move_tail(&dev->unreg_list, head);
6845 } else {
6846 rollback_registered(dev);
6847
6848 net_set_todo(dev);
6849 }
6850}
6851EXPORT_SYMBOL(unregister_netdevice_queue);
6852
6853
6854
6855
6856
6857
6858
6859
6860void unregister_netdevice_many(struct list_head *head)
6861{
6862 struct net_device *dev;
6863
6864 if (!list_empty(head)) {
6865 rollback_registered_many(head);
6866 list_for_each_entry(dev, head, unreg_list)
6867 net_set_todo(dev);
6868 list_del(head);
6869 }
6870}
6871EXPORT_SYMBOL(unregister_netdevice_many);
6872
6873
6874
6875
6876
6877
6878
6879
6880
6881
6882
6883
6884void unregister_netdev(struct net_device *dev)
6885{
6886 rtnl_lock();
6887 unregister_netdevice(dev);
6888 rtnl_unlock();
6889}
6890EXPORT_SYMBOL(unregister_netdev);
6891
6892
6893
6894
6895
6896
6897
6898
6899
6900
6901
6902
6903
6904
6905
6906int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6907{
6908 int err;
6909
6910 ASSERT_RTNL();
6911
6912
6913 err = -EINVAL;
6914 if (dev->features & NETIF_F_NETNS_LOCAL)
6915 goto out;
6916
6917
6918 if (dev->reg_state != NETREG_REGISTERED)
6919 goto out;
6920
6921
6922 err = 0;
6923 if (net_eq(dev_net(dev), net))
6924 goto out;
6925
6926
6927
6928
6929 err = -EEXIST;
6930 if (__dev_get_by_name(net, dev->name)) {
6931
6932 if (!pat)
6933 goto out;
6934 if (dev_get_valid_name(net, dev, pat) < 0)
6935 goto out;
6936 }
6937
6938
6939
6940
6941
6942
6943 dev_close(dev);
6944
6945
6946 err = -ENODEV;
6947 unlist_netdevice(dev);
6948
6949 synchronize_net();
6950
6951
6952 dev_shutdown(dev);
6953
6954
6955
6956
6957
6958
6959
6960
6961 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6962 rcu_barrier();
6963 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6964 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
6965
6966
6967
6968
6969 dev_uc_flush(dev);
6970 dev_mc_flush(dev);
6971
6972
6973 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6974 netdev_adjacent_del_links(dev);
6975
6976
6977 dev_net_set(dev, net);
6978
6979
6980 if (__dev_get_by_index(net, dev->ifindex)) {
6981 int iflink = (dev->iflink == dev->ifindex);
6982 dev->ifindex = dev_new_index(net);
6983 if (iflink)
6984 dev->iflink = dev->ifindex;
6985 }
6986
6987
6988 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6989 netdev_adjacent_add_links(dev);
6990
6991
6992 err = device_rename(&dev->dev, dev->name);
6993 WARN_ON(err);
6994
6995
6996 list_netdevice(dev);
6997
6998
6999 call_netdevice_notifiers(NETDEV_REGISTER, dev);
7000
7001
7002
7003
7004
7005 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7006
7007 synchronize_net();
7008 err = 0;
7009out:
7010 return err;
7011}
7012EXPORT_SYMBOL_GPL(dev_change_net_namespace);
7013
7014static int dev_cpu_callback(struct notifier_block *nfb,
7015 unsigned long action,
7016 void *ocpu)
7017{
7018 struct sk_buff **list_skb;
7019 struct sk_buff *skb;
7020 unsigned int cpu, oldcpu = (unsigned long)ocpu;
7021 struct softnet_data *sd, *oldsd;
7022
7023 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
7024 return NOTIFY_OK;
7025
7026 local_irq_disable();
7027 cpu = smp_processor_id();
7028 sd = &per_cpu(softnet_data, cpu);
7029 oldsd = &per_cpu(softnet_data, oldcpu);
7030
7031
7032 list_skb = &sd->completion_queue;
7033 while (*list_skb)
7034 list_skb = &(*list_skb)->next;
7035
7036 *list_skb = oldsd->completion_queue;
7037 oldsd->completion_queue = NULL;
7038
7039
7040 if (oldsd->output_queue) {
7041 *sd->output_queue_tailp = oldsd->output_queue;
7042 sd->output_queue_tailp = oldsd->output_queue_tailp;
7043 oldsd->output_queue = NULL;
7044 oldsd->output_queue_tailp = &oldsd->output_queue;
7045 }
7046
7047
7048
7049
7050 while (!list_empty(&oldsd->poll_list)) {
7051 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
7052 struct napi_struct,
7053 poll_list);
7054
7055 list_del_init(&napi->poll_list);
7056 if (napi->poll == process_backlog)
7057 napi->state = 0;
7058 else
7059 ____napi_schedule(sd, napi);
7060 }
7061
7062 raise_softirq_irqoff(NET_TX_SOFTIRQ);
7063 local_irq_enable();
7064
7065
7066 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
7067 netif_rx_internal(skb);
7068 input_queue_head_incr(oldsd);
7069 }
7070 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
7071 netif_rx_internal(skb);
7072 input_queue_head_incr(oldsd);
7073 }
7074
7075 return NOTIFY_OK;
7076}
7077
7078
7079
7080
7081
7082
7083
7084
7085
7086
7087
7088
7089netdev_features_t netdev_increment_features(netdev_features_t all,
7090 netdev_features_t one, netdev_features_t mask)
7091{
7092 if (mask & NETIF_F_GEN_CSUM)
7093 mask |= NETIF_F_ALL_CSUM;
7094 mask |= NETIF_F_VLAN_CHALLENGED;
7095
7096 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
7097 all &= one | ~NETIF_F_ALL_FOR_ALL;
7098
7099
7100 if (all & NETIF_F_GEN_CSUM)
7101 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
7102
7103 return all;
7104}
7105EXPORT_SYMBOL(netdev_increment_features);
7106
7107static struct hlist_head * __net_init netdev_create_hash(void)
7108{
7109 int i;
7110 struct hlist_head *hash;
7111
7112 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
7113 if (hash != NULL)
7114 for (i = 0; i < NETDEV_HASHENTRIES; i++)
7115 INIT_HLIST_HEAD(&hash[i]);
7116
7117 return hash;
7118}
7119
7120
7121static int __net_init netdev_init(struct net *net)
7122{
7123 if (net != &init_net)
7124 INIT_LIST_HEAD(&net->dev_base_head);
7125
7126 net->dev_name_head = netdev_create_hash();
7127 if (net->dev_name_head == NULL)
7128 goto err_name;
7129
7130 net->dev_index_head = netdev_create_hash();
7131 if (net->dev_index_head == NULL)
7132 goto err_idx;
7133
7134 return 0;
7135
7136err_idx:
7137 kfree(net->dev_name_head);
7138err_name:
7139 return -ENOMEM;
7140}
7141
7142
7143
7144
7145
7146
7147
7148const char *netdev_drivername(const struct net_device *dev)
7149{
7150 const struct device_driver *driver;
7151 const struct device *parent;
7152 const char *empty = "";
7153
7154 parent = dev->dev.parent;
7155 if (!parent)
7156 return empty;
7157
7158 driver = parent->driver;
7159 if (driver && driver->name)
7160 return driver->name;
7161 return empty;
7162}
7163
7164static void __netdev_printk(const char *level, const struct net_device *dev,
7165 struct va_format *vaf)
7166{
7167 if (dev && dev->dev.parent) {
7168 dev_printk_emit(level[1] - '0',
7169 dev->dev.parent,
7170 "%s %s %s%s: %pV",
7171 dev_driver_string(dev->dev.parent),
7172 dev_name(dev->dev.parent),
7173 netdev_name(dev), netdev_reg_state(dev),
7174 vaf);
7175 } else if (dev) {
7176 printk("%s%s%s: %pV",
7177 level, netdev_name(dev), netdev_reg_state(dev), vaf);
7178 } else {
7179 printk("%s(NULL net_device): %pV", level, vaf);
7180 }
7181}
7182
7183void netdev_printk(const char *level, const struct net_device *dev,
7184 const char *format, ...)
7185{
7186 struct va_format vaf;
7187 va_list args;
7188
7189 va_start(args, format);
7190
7191 vaf.fmt = format;
7192 vaf.va = &args;
7193
7194 __netdev_printk(level, dev, &vaf);
7195
7196 va_end(args);
7197}
7198EXPORT_SYMBOL(netdev_printk);
7199
7200#define define_netdev_printk_level(func, level) \
7201void func(const struct net_device *dev, const char *fmt, ...) \
7202{ \
7203 struct va_format vaf; \
7204 va_list args; \
7205 \
7206 va_start(args, fmt); \
7207 \
7208 vaf.fmt = fmt; \
7209 vaf.va = &args; \
7210 \
7211 __netdev_printk(level, dev, &vaf); \
7212 \
7213 va_end(args); \
7214} \
7215EXPORT_SYMBOL(func);
7216
7217define_netdev_printk_level(netdev_emerg, KERN_EMERG);
7218define_netdev_printk_level(netdev_alert, KERN_ALERT);
7219define_netdev_printk_level(netdev_crit, KERN_CRIT);
7220define_netdev_printk_level(netdev_err, KERN_ERR);
7221define_netdev_printk_level(netdev_warn, KERN_WARNING);
7222define_netdev_printk_level(netdev_notice, KERN_NOTICE);
7223define_netdev_printk_level(netdev_info, KERN_INFO);
7224
7225static void __net_exit netdev_exit(struct net *net)
7226{
7227 kfree(net->dev_name_head);
7228 kfree(net->dev_index_head);
7229}
7230
7231static struct pernet_operations __net_initdata netdev_net_ops = {
7232 .init = netdev_init,
7233 .exit = netdev_exit,
7234};
7235
7236static void __net_exit default_device_exit(struct net *net)
7237{
7238 struct net_device *dev, *aux;
7239
7240
7241
7242
7243 rtnl_lock();
7244 for_each_netdev_safe(net, dev, aux) {
7245 int err;
7246 char fb_name[IFNAMSIZ];
7247
7248
7249 if (dev->features & NETIF_F_NETNS_LOCAL)
7250 continue;
7251
7252
7253 if (dev->rtnl_link_ops)
7254 continue;
7255
7256
7257 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
7258 err = dev_change_net_namespace(dev, &init_net, fb_name);
7259 if (err) {
7260 pr_emerg("%s: failed to move %s to init_net: %d\n",
7261 __func__, dev->name, err);
7262 BUG();
7263 }
7264 }
7265 rtnl_unlock();
7266}
7267
7268static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
7269{
7270
7271
7272
7273 struct net *net;
7274 bool unregistering;
7275 DEFINE_WAIT_FUNC(wait, woken_wake_function);
7276
7277 add_wait_queue(&netdev_unregistering_wq, &wait);
7278 for (;;) {
7279 unregistering = false;
7280 rtnl_lock();
7281 list_for_each_entry(net, net_list, exit_list) {
7282 if (net->dev_unreg_count > 0) {
7283 unregistering = true;
7284 break;
7285 }
7286 }
7287 if (!unregistering)
7288 break;
7289 __rtnl_unlock();
7290
7291 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
7292 }
7293 remove_wait_queue(&netdev_unregistering_wq, &wait);
7294}
7295
7296static void __net_exit default_device_exit_batch(struct list_head *net_list)
7297{
7298
7299
7300
7301
7302
7303 struct net_device *dev;
7304 struct net *net;
7305 LIST_HEAD(dev_kill_list);
7306
7307
7308
7309
7310
7311
7312
7313
7314
7315
7316
7317
7318 rtnl_lock_unregistering(net_list);
7319 list_for_each_entry(net, net_list, exit_list) {
7320 for_each_netdev_reverse(net, dev) {
7321 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
7322 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
7323 else
7324 unregister_netdevice_queue(dev, &dev_kill_list);
7325 }
7326 }
7327 unregister_netdevice_many(&dev_kill_list);
7328 rtnl_unlock();
7329}
7330
7331static struct pernet_operations __net_initdata default_device_ops = {
7332 .exit = default_device_exit,
7333 .exit_batch = default_device_exit_batch,
7334};
7335
7336
7337
7338
7339
7340
7341
7342
7343
7344
7345
7346
7347static int __init net_dev_init(void)
7348{
7349 int i, rc = -ENOMEM;
7350
7351 BUG_ON(!dev_boot_phase);
7352
7353 if (dev_proc_init())
7354 goto out;
7355
7356 if (netdev_kobject_init())
7357 goto out;
7358
7359 INIT_LIST_HEAD(&ptype_all);
7360 for (i = 0; i < PTYPE_HASH_SIZE; i++)
7361 INIT_LIST_HEAD(&ptype_base[i]);
7362
7363 INIT_LIST_HEAD(&offload_base);
7364
7365 if (register_pernet_subsys(&netdev_net_ops))
7366 goto out;
7367
7368
7369
7370
7371
7372 for_each_possible_cpu(i) {
7373 struct softnet_data *sd = &per_cpu(softnet_data, i);
7374
7375 skb_queue_head_init(&sd->input_pkt_queue);
7376 skb_queue_head_init(&sd->process_queue);
7377 INIT_LIST_HEAD(&sd->poll_list);
7378 sd->output_queue_tailp = &sd->output_queue;
7379#ifdef CONFIG_RPS
7380 sd->csd.func = rps_trigger_softirq;
7381 sd->csd.info = sd;
7382 sd->cpu = i;
7383#endif
7384
7385 sd->backlog.poll = process_backlog;
7386 sd->backlog.weight = weight_p;
7387 }
7388
7389 dev_boot_phase = 0;
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400 if (register_pernet_device(&loopback_net_ops))
7401 goto out;
7402
7403 if (register_pernet_device(&default_device_ops))
7404 goto out;
7405
7406 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
7407 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
7408
7409 hotcpu_notifier(dev_cpu_callback, 0);
7410 dst_init();
7411 rc = 0;
7412out:
7413 return rc;
7414}
7415
7416subsys_initcall(net_dev_init);
7417