1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/stat.h>
101#include <net/dst.h>
102#include <net/pkt_sched.h>
103#include <net/checksum.h>
104#include <net/xfrm.h>
105#include <linux/highmem.h>
106#include <linux/init.h>
107#include <linux/module.h>
108#include <linux/netpoll.h>
109#include <linux/rcupdate.h>
110#include <linux/delay.h>
111#include <net/iw_handler.h>
112#include <asm/current.h>
113#include <linux/audit.h>
114#include <linux/dmaengine.h>
115#include <linux/err.h>
116#include <linux/ctype.h>
117#include <linux/if_arp.h>
118#include <linux/if_vlan.h>
119#include <linux/ip.h>
120#include <net/ip.h>
121#include <linux/ipv6.h>
122#include <linux/in.h>
123#include <linux/jhash.h>
124#include <linux/random.h>
125#include <trace/events/napi.h>
126#include <trace/events/net.h>
127#include <trace/events/skb.h>
128#include <linux/pci.h>
129#include <linux/inetdevice.h>
130#include <linux/cpu_rmap.h>
131#include <linux/static_key.h>
132#include <linux/hashtable.h>
133#include <linux/vmalloc.h>
134
135#include "net-sysfs.h"
136
137
138#define MAX_GRO_SKBS 8
139
140
141#define GRO_MAX_HEAD (MAX_HEADER + 128)
142
143static DEFINE_SPINLOCK(ptype_lock);
144static DEFINE_SPINLOCK(offload_lock);
145struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
146struct list_head ptype_all __read_mostly;
147static struct list_head offload_base __read_mostly;
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168DEFINE_RWLOCK(dev_base_lock);
169EXPORT_SYMBOL(dev_base_lock);
170
171
172static DEFINE_SPINLOCK(napi_hash_lock);
173
174static unsigned int napi_gen_id;
175static DEFINE_HASHTABLE(napi_hash, 8);
176
177static seqcount_t devnet_rename_seq;
178
179static inline void dev_base_seq_inc(struct net *net)
180{
181 while (++net->dev_base_seq == 0);
182}
183
184static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
185{
186 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
187
188 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
189}
190
191static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
192{
193 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
194}
195
196static inline void rps_lock(struct softnet_data *sd)
197{
198#ifdef CONFIG_RPS
199 spin_lock(&sd->input_pkt_queue.lock);
200#endif
201}
202
203static inline void rps_unlock(struct softnet_data *sd)
204{
205#ifdef CONFIG_RPS
206 spin_unlock(&sd->input_pkt_queue.lock);
207#endif
208}
209
210
211static void list_netdevice(struct net_device *dev)
212{
213 struct net *net = dev_net(dev);
214
215 ASSERT_RTNL();
216
217 write_lock_bh(&dev_base_lock);
218 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head_rcu(&dev->index_hlist,
221 dev_index_hash(net, dev->ifindex));
222 write_unlock_bh(&dev_base_lock);
223
224 dev_base_seq_inc(net);
225}
226
227
228
229
230static void unlist_netdevice(struct net_device *dev)
231{
232 ASSERT_RTNL();
233
234
235 write_lock_bh(&dev_base_lock);
236 list_del_rcu(&dev->dev_list);
237 hlist_del_rcu(&dev->name_hlist);
238 hlist_del_rcu(&dev->index_hlist);
239 write_unlock_bh(&dev_base_lock);
240
241 dev_base_seq_inc(dev_net(dev));
242}
243
244
245
246
247
248static RAW_NOTIFIER_HEAD(netdev_chain);
249
250
251
252
253
254
255DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
256EXPORT_PER_CPU_SYMBOL(softnet_data);
257
258#ifdef CONFIG_LOCKDEP
259
260
261
262
263static const unsigned short netdev_lock_type[] =
264 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
265 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
266 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
267 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
268 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
269 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
270 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
271 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
272 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
273 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
274 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
275 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
276 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
277 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
278 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
279
280static const char *const netdev_lock_name[] =
281 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
282 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
283 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
284 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
285 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
286 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
287 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
288 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
289 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
290 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
291 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
292 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
293 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
294 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
295 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
296
297static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
298static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
299
300static inline unsigned short netdev_lock_pos(unsigned short dev_type)
301{
302 int i;
303
304 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
305 if (netdev_lock_type[i] == dev_type)
306 return i;
307
308 return ARRAY_SIZE(netdev_lock_type) - 1;
309}
310
311static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
312 unsigned short dev_type)
313{
314 int i;
315
316 i = netdev_lock_pos(dev_type);
317 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
318 netdev_lock_name[i]);
319}
320
321static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
322{
323 int i;
324
325 i = netdev_lock_pos(dev->type);
326 lockdep_set_class_and_name(&dev->addr_list_lock,
327 &netdev_addr_lock_key[i],
328 netdev_lock_name[i]);
329}
330#else
331static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
332 unsigned short dev_type)
333{
334}
335static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
336{
337}
338#endif
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362static inline struct list_head *ptype_head(const struct packet_type *pt)
363{
364 if (pt->type == htons(ETH_P_ALL))
365 return &ptype_all;
366 else
367 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
368}
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383void dev_add_pack(struct packet_type *pt)
384{
385 struct list_head *head = ptype_head(pt);
386
387 spin_lock(&ptype_lock);
388 list_add_rcu(&pt->list, head);
389 spin_unlock(&ptype_lock);
390}
391EXPORT_SYMBOL(dev_add_pack);
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406void __dev_remove_pack(struct packet_type *pt)
407{
408 struct list_head *head = ptype_head(pt);
409 struct packet_type *pt1;
410
411 spin_lock(&ptype_lock);
412
413 list_for_each_entry(pt1, head, list) {
414 if (pt == pt1) {
415 list_del_rcu(&pt->list);
416 goto out;
417 }
418 }
419
420 pr_warn("dev_remove_pack: %p not found\n", pt);
421out:
422 spin_unlock(&ptype_lock);
423}
424EXPORT_SYMBOL(__dev_remove_pack);
425
426
427
428
429
430
431
432
433
434
435
436
437
438void dev_remove_pack(struct packet_type *pt)
439{
440 __dev_remove_pack(pt);
441
442 synchronize_net();
443}
444EXPORT_SYMBOL(dev_remove_pack);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459void dev_add_offload(struct packet_offload *po)
460{
461 struct list_head *head = &offload_base;
462
463 spin_lock(&offload_lock);
464 list_add_rcu(&po->list, head);
465 spin_unlock(&offload_lock);
466}
467EXPORT_SYMBOL(dev_add_offload);
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482void __dev_remove_offload(struct packet_offload *po)
483{
484 struct list_head *head = &offload_base;
485 struct packet_offload *po1;
486
487 spin_lock(&offload_lock);
488
489 list_for_each_entry(po1, head, list) {
490 if (po == po1) {
491 list_del_rcu(&po->list);
492 goto out;
493 }
494 }
495
496 pr_warn("dev_remove_offload: %p not found\n", po);
497out:
498 spin_unlock(&offload_lock);
499}
500EXPORT_SYMBOL(__dev_remove_offload);
501
502
503
504
505
506
507
508
509
510
511
512
513
514void dev_remove_offload(struct packet_offload *po)
515{
516 __dev_remove_offload(po);
517
518 synchronize_net();
519}
520EXPORT_SYMBOL(dev_remove_offload);
521
522
523
524
525
526
527
528
529static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
530
531
532
533
534
535
536
537
538
539
540static int netdev_boot_setup_add(char *name, struct ifmap *map)
541{
542 struct netdev_boot_setup *s;
543 int i;
544
545 s = dev_boot_setup;
546 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
547 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
548 memset(s[i].name, 0, sizeof(s[i].name));
549 strlcpy(s[i].name, name, IFNAMSIZ);
550 memcpy(&s[i].map, map, sizeof(s[i].map));
551 break;
552 }
553 }
554
555 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
556}
557
558
559
560
561
562
563
564
565
566
567int netdev_boot_setup_check(struct net_device *dev)
568{
569 struct netdev_boot_setup *s = dev_boot_setup;
570 int i;
571
572 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
573 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
574 !strcmp(dev->name, s[i].name)) {
575 dev->irq = s[i].map.irq;
576 dev->base_addr = s[i].map.base_addr;
577 dev->mem_start = s[i].map.mem_start;
578 dev->mem_end = s[i].map.mem_end;
579 return 1;
580 }
581 }
582 return 0;
583}
584EXPORT_SYMBOL(netdev_boot_setup_check);
585
586
587
588
589
590
591
592
593
594
595
596
597unsigned long netdev_boot_base(const char *prefix, int unit)
598{
599 const struct netdev_boot_setup *s = dev_boot_setup;
600 char name[IFNAMSIZ];
601 int i;
602
603 sprintf(name, "%s%d", prefix, unit);
604
605
606
607
608
609 if (__dev_get_by_name(&init_net, name))
610 return 1;
611
612 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
613 if (!strcmp(name, s[i].name))
614 return s[i].map.base_addr;
615 return 0;
616}
617
618
619
620
621int __init netdev_boot_setup(char *str)
622{
623 int ints[5];
624 struct ifmap map;
625
626 str = get_options(str, ARRAY_SIZE(ints), ints);
627 if (!str || !*str)
628 return 0;
629
630
631 memset(&map, 0, sizeof(map));
632 if (ints[0] > 0)
633 map.irq = ints[1];
634 if (ints[0] > 1)
635 map.base_addr = ints[2];
636 if (ints[0] > 2)
637 map.mem_start = ints[3];
638 if (ints[0] > 3)
639 map.mem_end = ints[4];
640
641
642 return netdev_boot_setup_add(str, &map);
643}
644
645__setup("netdev=", netdev_boot_setup);
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665struct net_device *__dev_get_by_name(struct net *net, const char *name)
666{
667 struct net_device *dev;
668 struct hlist_head *head = dev_name_hash(net, name);
669
670 hlist_for_each_entry(dev, head, name_hlist)
671 if (!strncmp(dev->name, name, IFNAMSIZ))
672 return dev;
673
674 return NULL;
675}
676EXPORT_SYMBOL(__dev_get_by_name);
677
678
679
680
681
682
683
684
685
686
687
688
689
690struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
691{
692 struct net_device *dev;
693 struct hlist_head *head = dev_name_hash(net, name);
694
695 hlist_for_each_entry_rcu(dev, head, name_hlist)
696 if (!strncmp(dev->name, name, IFNAMSIZ))
697 return dev;
698
699 return NULL;
700}
701EXPORT_SYMBOL(dev_get_by_name_rcu);
702
703
704
705
706
707
708
709
710
711
712
713
714
715struct net_device *dev_get_by_name(struct net *net, const char *name)
716{
717 struct net_device *dev;
718
719 rcu_read_lock();
720 dev = dev_get_by_name_rcu(net, name);
721 if (dev)
722 dev_hold(dev);
723 rcu_read_unlock();
724 return dev;
725}
726EXPORT_SYMBOL(dev_get_by_name);
727
728
729
730
731
732
733
734
735
736
737
738
739
740struct net_device *__dev_get_by_index(struct net *net, int ifindex)
741{
742 struct net_device *dev;
743 struct hlist_head *head = dev_index_hash(net, ifindex);
744
745 hlist_for_each_entry(dev, head, index_hlist)
746 if (dev->ifindex == ifindex)
747 return dev;
748
749 return NULL;
750}
751EXPORT_SYMBOL(__dev_get_by_index);
752
753
754
755
756
757
758
759
760
761
762
763
764struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
765{
766 struct net_device *dev;
767 struct hlist_head *head = dev_index_hash(net, ifindex);
768
769 hlist_for_each_entry_rcu(dev, head, index_hlist)
770 if (dev->ifindex == ifindex)
771 return dev;
772
773 return NULL;
774}
775EXPORT_SYMBOL(dev_get_by_index_rcu);
776
777
778
779
780
781
782
783
784
785
786
787
788
789struct net_device *dev_get_by_index(struct net *net, int ifindex)
790{
791 struct net_device *dev;
792
793 rcu_read_lock();
794 dev = dev_get_by_index_rcu(net, ifindex);
795 if (dev)
796 dev_hold(dev);
797 rcu_read_unlock();
798 return dev;
799}
800EXPORT_SYMBOL(dev_get_by_index);
801
802
803
804
805
806
807
808
809
810
811
812int netdev_get_name(struct net *net, char *name, int ifindex)
813{
814 struct net_device *dev;
815 unsigned int seq;
816
817retry:
818 seq = raw_seqcount_begin(&devnet_rename_seq);
819 rcu_read_lock();
820 dev = dev_get_by_index_rcu(net, ifindex);
821 if (!dev) {
822 rcu_read_unlock();
823 return -ENODEV;
824 }
825
826 strcpy(name, dev->name);
827 rcu_read_unlock();
828 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
829 cond_resched();
830 goto retry;
831 }
832
833 return 0;
834}
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
851 const char *ha)
852{
853 struct net_device *dev;
854
855 for_each_netdev_rcu(net, dev)
856 if (dev->type == type &&
857 !memcmp(dev->dev_addr, ha, dev->addr_len))
858 return dev;
859
860 return NULL;
861}
862EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
863
864struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
865{
866 struct net_device *dev;
867
868 ASSERT_RTNL();
869 for_each_netdev(net, dev)
870 if (dev->type == type)
871 return dev;
872
873 return NULL;
874}
875EXPORT_SYMBOL(__dev_getfirstbyhwtype);
876
877struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
878{
879 struct net_device *dev, *ret = NULL;
880
881 rcu_read_lock();
882 for_each_netdev_rcu(net, dev)
883 if (dev->type == type) {
884 dev_hold(dev);
885 ret = dev;
886 break;
887 }
888 rcu_read_unlock();
889 return ret;
890}
891EXPORT_SYMBOL(dev_getfirstbyhwtype);
892
893
894
895
896
897
898
899
900
901
902
903
904struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
905 unsigned short mask)
906{
907 struct net_device *dev, *ret;
908
909 ret = NULL;
910 for_each_netdev_rcu(net, dev) {
911 if (((dev->flags ^ if_flags) & mask) == 0) {
912 ret = dev;
913 break;
914 }
915 }
916 return ret;
917}
918EXPORT_SYMBOL(dev_get_by_flags_rcu);
919
920
921
922
923
924
925
926
927
928bool dev_valid_name(const char *name)
929{
930 if (*name == '\0')
931 return false;
932 if (strlen(name) >= IFNAMSIZ)
933 return false;
934 if (!strcmp(name, ".") || !strcmp(name, ".."))
935 return false;
936
937 while (*name) {
938 if (*name == '/' || isspace(*name))
939 return false;
940 name++;
941 }
942 return true;
943}
944EXPORT_SYMBOL(dev_valid_name);
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961static int __dev_alloc_name(struct net *net, const char *name, char *buf)
962{
963 int i = 0;
964 const char *p;
965 const int max_netdevices = 8*PAGE_SIZE;
966 unsigned long *inuse;
967 struct net_device *d;
968
969 p = strnchr(name, IFNAMSIZ-1, '%');
970 if (p) {
971
972
973
974
975
976 if (p[1] != 'd' || strchr(p + 2, '%'))
977 return -EINVAL;
978
979
980 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
981 if (!inuse)
982 return -ENOMEM;
983
984 for_each_netdev(net, d) {
985 if (!sscanf(d->name, name, &i))
986 continue;
987 if (i < 0 || i >= max_netdevices)
988 continue;
989
990
991 snprintf(buf, IFNAMSIZ, name, i);
992 if (!strncmp(buf, d->name, IFNAMSIZ))
993 set_bit(i, inuse);
994 }
995
996 i = find_first_zero_bit(inuse, max_netdevices);
997 free_page((unsigned long) inuse);
998 }
999
1000 if (buf != name)
1001 snprintf(buf, IFNAMSIZ, name, i);
1002 if (!__dev_get_by_name(net, buf))
1003 return i;
1004
1005
1006
1007
1008
1009 return -ENFILE;
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026int dev_alloc_name(struct net_device *dev, const char *name)
1027{
1028 char buf[IFNAMSIZ];
1029 struct net *net;
1030 int ret;
1031
1032 BUG_ON(!dev_net(dev));
1033 net = dev_net(dev);
1034 ret = __dev_alloc_name(net, name, buf);
1035 if (ret >= 0)
1036 strlcpy(dev->name, buf, IFNAMSIZ);
1037 return ret;
1038}
1039EXPORT_SYMBOL(dev_alloc_name);
1040
1041static int dev_alloc_name_ns(struct net *net,
1042 struct net_device *dev,
1043 const char *name)
1044{
1045 char buf[IFNAMSIZ];
1046 int ret;
1047
1048 ret = __dev_alloc_name(net, name, buf);
1049 if (ret >= 0)
1050 strlcpy(dev->name, buf, IFNAMSIZ);
1051 return ret;
1052}
1053
1054static int dev_get_valid_name(struct net *net,
1055 struct net_device *dev,
1056 const char *name)
1057{
1058 BUG_ON(!net);
1059
1060 if (!dev_valid_name(name))
1061 return -EINVAL;
1062
1063 if (strchr(name, '%'))
1064 return dev_alloc_name_ns(net, dev, name);
1065 else if (__dev_get_by_name(net, name))
1066 return -EEXIST;
1067 else if (dev->name != name)
1068 strlcpy(dev->name, name, IFNAMSIZ);
1069
1070 return 0;
1071}
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081int dev_change_name(struct net_device *dev, const char *newname)
1082{
1083 char oldname[IFNAMSIZ];
1084 int err = 0;
1085 int ret;
1086 struct net *net;
1087
1088 ASSERT_RTNL();
1089 BUG_ON(!dev_net(dev));
1090
1091 net = dev_net(dev);
1092 if (dev->flags & IFF_UP)
1093 return -EBUSY;
1094
1095 write_seqcount_begin(&devnet_rename_seq);
1096
1097 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1098 write_seqcount_end(&devnet_rename_seq);
1099 return 0;
1100 }
1101
1102 memcpy(oldname, dev->name, IFNAMSIZ);
1103
1104 err = dev_get_valid_name(net, dev, newname);
1105 if (err < 0) {
1106 write_seqcount_end(&devnet_rename_seq);
1107 return err;
1108 }
1109
1110rollback:
1111 ret = device_rename(&dev->dev, dev->name);
1112 if (ret) {
1113 memcpy(dev->name, oldname, IFNAMSIZ);
1114 write_seqcount_end(&devnet_rename_seq);
1115 return ret;
1116 }
1117
1118 write_seqcount_end(&devnet_rename_seq);
1119
1120 write_lock_bh(&dev_base_lock);
1121 hlist_del_rcu(&dev->name_hlist);
1122 write_unlock_bh(&dev_base_lock);
1123
1124 synchronize_rcu();
1125
1126 write_lock_bh(&dev_base_lock);
1127 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1128 write_unlock_bh(&dev_base_lock);
1129
1130 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1131 ret = notifier_to_errno(ret);
1132
1133 if (ret) {
1134
1135 if (err >= 0) {
1136 err = ret;
1137 write_seqcount_begin(&devnet_rename_seq);
1138 memcpy(dev->name, oldname, IFNAMSIZ);
1139 goto rollback;
1140 } else {
1141 pr_err("%s: name change rollback failed: %d\n",
1142 dev->name, ret);
1143 }
1144 }
1145
1146 return err;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1158{
1159 char *new_ifalias;
1160
1161 ASSERT_RTNL();
1162
1163 if (len >= IFALIASZ)
1164 return -EINVAL;
1165
1166 if (!len) {
1167 kfree(dev->ifalias);
1168 dev->ifalias = NULL;
1169 return 0;
1170 }
1171
1172 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1173 if (!new_ifalias)
1174 return -ENOMEM;
1175 dev->ifalias = new_ifalias;
1176
1177 strlcpy(dev->ifalias, alias, len+1);
1178 return len;
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188void netdev_features_change(struct net_device *dev)
1189{
1190 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1191}
1192EXPORT_SYMBOL(netdev_features_change);
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202void netdev_state_change(struct net_device *dev)
1203{
1204 if (dev->flags & IFF_UP) {
1205 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1206 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1207 }
1208}
1209EXPORT_SYMBOL(netdev_state_change);
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221void netdev_notify_peers(struct net_device *dev)
1222{
1223 rtnl_lock();
1224 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1225 rtnl_unlock();
1226}
1227EXPORT_SYMBOL(netdev_notify_peers);
1228
1229static int __dev_open(struct net_device *dev)
1230{
1231 const struct net_device_ops *ops = dev->netdev_ops;
1232 int ret;
1233
1234 ASSERT_RTNL();
1235
1236 if (!netif_device_present(dev))
1237 return -ENODEV;
1238
1239
1240
1241
1242
1243 netpoll_rx_disable(dev);
1244
1245 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1246 ret = notifier_to_errno(ret);
1247 if (ret)
1248 return ret;
1249
1250 set_bit(__LINK_STATE_START, &dev->state);
1251
1252 if (ops->ndo_validate_addr)
1253 ret = ops->ndo_validate_addr(dev);
1254
1255 if (!ret && ops->ndo_open)
1256 ret = ops->ndo_open(dev);
1257
1258 netpoll_rx_enable(dev);
1259
1260 if (ret)
1261 clear_bit(__LINK_STATE_START, &dev->state);
1262 else {
1263 dev->flags |= IFF_UP;
1264 net_dmaengine_get();
1265 dev_set_rx_mode(dev);
1266 dev_activate(dev);
1267 add_device_randomness(dev->dev_addr, dev->addr_len);
1268 }
1269
1270 return ret;
1271}
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285int dev_open(struct net_device *dev)
1286{
1287 int ret;
1288
1289 if (dev->flags & IFF_UP)
1290 return 0;
1291
1292 ret = __dev_open(dev);
1293 if (ret < 0)
1294 return ret;
1295
1296 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1297 call_netdevice_notifiers(NETDEV_UP, dev);
1298
1299 return ret;
1300}
1301EXPORT_SYMBOL(dev_open);
1302
1303static int __dev_close_many(struct list_head *head)
1304{
1305 struct net_device *dev;
1306
1307 ASSERT_RTNL();
1308 might_sleep();
1309
1310 list_for_each_entry(dev, head, unreg_list) {
1311 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1312
1313 clear_bit(__LINK_STATE_START, &dev->state);
1314
1315
1316
1317
1318
1319
1320
1321 smp_mb__after_clear_bit();
1322 }
1323
1324 dev_deactivate_many(head);
1325
1326 list_for_each_entry(dev, head, unreg_list) {
1327 const struct net_device_ops *ops = dev->netdev_ops;
1328
1329
1330
1331
1332
1333
1334
1335
1336 if (ops->ndo_stop)
1337 ops->ndo_stop(dev);
1338
1339 dev->flags &= ~IFF_UP;
1340 net_dmaengine_put();
1341 }
1342
1343 return 0;
1344}
1345
1346static int __dev_close(struct net_device *dev)
1347{
1348 int retval;
1349 LIST_HEAD(single);
1350
1351
1352 netpoll_rx_disable(dev);
1353
1354 list_add(&dev->unreg_list, &single);
1355 retval = __dev_close_many(&single);
1356 list_del(&single);
1357
1358 netpoll_rx_enable(dev);
1359 return retval;
1360}
1361
1362static int dev_close_many(struct list_head *head)
1363{
1364 struct net_device *dev, *tmp;
1365 LIST_HEAD(tmp_list);
1366
1367 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1368 if (!(dev->flags & IFF_UP))
1369 list_move(&dev->unreg_list, &tmp_list);
1370
1371 __dev_close_many(head);
1372
1373 list_for_each_entry(dev, head, unreg_list) {
1374 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1375 call_netdevice_notifiers(NETDEV_DOWN, dev);
1376 }
1377
1378
1379 list_splice(&tmp_list, head);
1380 return 0;
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392int dev_close(struct net_device *dev)
1393{
1394 if (dev->flags & IFF_UP) {
1395 LIST_HEAD(single);
1396
1397
1398 netpoll_rx_disable(dev);
1399
1400 list_add(&dev->unreg_list, &single);
1401 dev_close_many(&single);
1402 list_del(&single);
1403
1404 netpoll_rx_enable(dev);
1405 }
1406 return 0;
1407}
1408EXPORT_SYMBOL(dev_close);
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419void dev_disable_lro(struct net_device *dev)
1420{
1421
1422
1423
1424
1425 if (is_vlan_dev(dev))
1426 dev = vlan_dev_real_dev(dev);
1427
1428 dev->wanted_features &= ~NETIF_F_LRO;
1429 netdev_update_features(dev);
1430
1431 if (unlikely(dev->features & NETIF_F_LRO))
1432 netdev_WARN(dev, "failed to disable LRO!\n");
1433}
1434EXPORT_SYMBOL(dev_disable_lro);
1435
1436static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1437 struct net_device *dev)
1438{
1439 struct netdev_notifier_info info;
1440
1441 netdev_notifier_info_init(&info, dev);
1442 return nb->notifier_call(nb, val, &info);
1443}
1444
1445static int dev_boot_phase = 1;
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461int register_netdevice_notifier(struct notifier_block *nb)
1462{
1463 struct net_device *dev;
1464 struct net_device *last;
1465 struct net *net;
1466 int err;
1467
1468 rtnl_lock();
1469 err = raw_notifier_chain_register(&netdev_chain, nb);
1470 if (err)
1471 goto unlock;
1472 if (dev_boot_phase)
1473 goto unlock;
1474 for_each_net(net) {
1475 for_each_netdev(net, dev) {
1476 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1477 err = notifier_to_errno(err);
1478 if (err)
1479 goto rollback;
1480
1481 if (!(dev->flags & IFF_UP))
1482 continue;
1483
1484 call_netdevice_notifier(nb, NETDEV_UP, dev);
1485 }
1486 }
1487
1488unlock:
1489 rtnl_unlock();
1490 return err;
1491
1492rollback:
1493 last = dev;
1494 for_each_net(net) {
1495 for_each_netdev(net, dev) {
1496 if (dev == last)
1497 goto outroll;
1498
1499 if (dev->flags & IFF_UP) {
1500 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1501 dev);
1502 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1503 }
1504 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1505 }
1506 }
1507
1508outroll:
1509 raw_notifier_chain_unregister(&netdev_chain, nb);
1510 goto unlock;
1511}
1512EXPORT_SYMBOL(register_netdevice_notifier);
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528int unregister_netdevice_notifier(struct notifier_block *nb)
1529{
1530 struct net_device *dev;
1531 struct net *net;
1532 int err;
1533
1534 rtnl_lock();
1535 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1536 if (err)
1537 goto unlock;
1538
1539 for_each_net(net) {
1540 for_each_netdev(net, dev) {
1541 if (dev->flags & IFF_UP) {
1542 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1543 dev);
1544 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1545 }
1546 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1547 }
1548 }
1549unlock:
1550 rtnl_unlock();
1551 return err;
1552}
1553EXPORT_SYMBOL(unregister_netdevice_notifier);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
1566 struct netdev_notifier_info *info)
1567{
1568 ASSERT_RTNL();
1569 netdev_notifier_info_init(info, dev);
1570 return raw_notifier_call_chain(&netdev_chain, val, info);
1571}
1572EXPORT_SYMBOL(call_netdevice_notifiers_info);
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1584{
1585 struct netdev_notifier_info info;
1586
1587 return call_netdevice_notifiers_info(val, dev, &info);
1588}
1589EXPORT_SYMBOL(call_netdevice_notifiers);
1590
1591static struct static_key netstamp_needed __read_mostly;
1592#ifdef HAVE_JUMP_LABEL
1593
1594
1595
1596
1597static atomic_t netstamp_needed_deferred;
1598#endif
1599
1600void net_enable_timestamp(void)
1601{
1602#ifdef HAVE_JUMP_LABEL
1603 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1604
1605 if (deferred) {
1606 while (--deferred)
1607 static_key_slow_dec(&netstamp_needed);
1608 return;
1609 }
1610#endif
1611 static_key_slow_inc(&netstamp_needed);
1612}
1613EXPORT_SYMBOL(net_enable_timestamp);
1614
1615void net_disable_timestamp(void)
1616{
1617#ifdef HAVE_JUMP_LABEL
1618 if (in_interrupt()) {
1619 atomic_inc(&netstamp_needed_deferred);
1620 return;
1621 }
1622#endif
1623 static_key_slow_dec(&netstamp_needed);
1624}
1625EXPORT_SYMBOL(net_disable_timestamp);
1626
1627static inline void net_timestamp_set(struct sk_buff *skb)
1628{
1629 skb->tstamp.tv64 = 0;
1630 if (static_key_false(&netstamp_needed))
1631 __net_timestamp(skb);
1632}
1633
1634#define net_timestamp_check(COND, SKB) \
1635 if (static_key_false(&netstamp_needed)) { \
1636 if ((COND) && !(SKB)->tstamp.tv64) \
1637 __net_timestamp(SKB); \
1638 } \
1639
1640static inline bool is_skb_forwardable(struct net_device *dev,
1641 struct sk_buff *skb)
1642{
1643 unsigned int len;
1644
1645 if (!(dev->flags & IFF_UP))
1646 return false;
1647
1648 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1649 if (skb->len <= len)
1650 return true;
1651
1652
1653
1654
1655 if (skb_is_gso(skb))
1656 return true;
1657
1658 return false;
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1680{
1681 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1682 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1683 atomic_long_inc(&dev->rx_dropped);
1684 kfree_skb(skb);
1685 return NET_RX_DROP;
1686 }
1687 }
1688
1689 if (unlikely(!is_skb_forwardable(dev, skb))) {
1690 atomic_long_inc(&dev->rx_dropped);
1691 kfree_skb(skb);
1692 return NET_RX_DROP;
1693 }
1694 skb->protocol = eth_type_trans(skb, dev);
1695
1696
1697
1698
1699
1700 skb_scrub_packet(skb, true);
1701
1702 return netif_rx(skb);
1703}
1704EXPORT_SYMBOL_GPL(dev_forward_skb);
1705
1706static inline int deliver_skb(struct sk_buff *skb,
1707 struct packet_type *pt_prev,
1708 struct net_device *orig_dev)
1709{
1710 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1711 return -ENOMEM;
1712 atomic_inc(&skb->users);
1713 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1714}
1715
1716static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1717{
1718 if (!ptype->af_packet_priv || !skb->sk)
1719 return false;
1720
1721 if (ptype->id_match)
1722 return ptype->id_match(ptype, skb->sk);
1723 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1724 return true;
1725
1726 return false;
1727}
1728
1729
1730
1731
1732
1733
1734static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1735{
1736 struct packet_type *ptype;
1737 struct sk_buff *skb2 = NULL;
1738 struct packet_type *pt_prev = NULL;
1739
1740 rcu_read_lock();
1741 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1742
1743
1744
1745 if ((ptype->dev == dev || !ptype->dev) &&
1746 (!skb_loop_sk(ptype, skb))) {
1747 if (pt_prev) {
1748 deliver_skb(skb2, pt_prev, skb->dev);
1749 pt_prev = ptype;
1750 continue;
1751 }
1752
1753 skb2 = skb_clone(skb, GFP_ATOMIC);
1754 if (!skb2)
1755 break;
1756
1757 net_timestamp_set(skb2);
1758
1759
1760
1761
1762
1763 skb_reset_mac_header(skb2);
1764
1765 if (skb_network_header(skb2) < skb2->data ||
1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1768 ntohs(skb2->protocol),
1769 dev->name);
1770 skb_reset_network_header(skb2);
1771 }
1772
1773 skb2->transport_header = skb2->network_header;
1774 skb2->pkt_type = PACKET_OUTGOING;
1775 pt_prev = ptype;
1776 }
1777 }
1778 if (pt_prev)
1779 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1780 rcu_read_unlock();
1781}
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1797{
1798 int i;
1799 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1800
1801
1802 if (tc->offset + tc->count > txq) {
1803 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1804 dev->num_tc = 0;
1805 return;
1806 }
1807
1808
1809 for (i = 1; i < TC_BITMASK + 1; i++) {
1810 int q = netdev_get_prio_tc_map(dev, i);
1811
1812 tc = &dev->tc_to_txq[q];
1813 if (tc->offset + tc->count > txq) {
1814 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1815 i, q);
1816 netdev_set_prio_tc_map(dev, i, 0);
1817 }
1818 }
1819}
1820
1821#ifdef CONFIG_XPS
1822static DEFINE_MUTEX(xps_map_mutex);
1823#define xmap_dereference(P) \
1824 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1825
1826static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1827 int cpu, u16 index)
1828{
1829 struct xps_map *map = NULL;
1830 int pos;
1831
1832 if (dev_maps)
1833 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1834
1835 for (pos = 0; map && pos < map->len; pos++) {
1836 if (map->queues[pos] == index) {
1837 if (map->len > 1) {
1838 map->queues[pos] = map->queues[--map->len];
1839 } else {
1840 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1841 kfree_rcu(map, rcu);
1842 map = NULL;
1843 }
1844 break;
1845 }
1846 }
1847
1848 return map;
1849}
1850
1851static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1852{
1853 struct xps_dev_maps *dev_maps;
1854 int cpu, i;
1855 bool active = false;
1856
1857 mutex_lock(&xps_map_mutex);
1858 dev_maps = xmap_dereference(dev->xps_maps);
1859
1860 if (!dev_maps)
1861 goto out_no_maps;
1862
1863 for_each_possible_cpu(cpu) {
1864 for (i = index; i < dev->num_tx_queues; i++) {
1865 if (!remove_xps_queue(dev_maps, cpu, i))
1866 break;
1867 }
1868 if (i == dev->num_tx_queues)
1869 active = true;
1870 }
1871
1872 if (!active) {
1873 RCU_INIT_POINTER(dev->xps_maps, NULL);
1874 kfree_rcu(dev_maps, rcu);
1875 }
1876
1877 for (i = index; i < dev->num_tx_queues; i++)
1878 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1879 NUMA_NO_NODE);
1880
1881out_no_maps:
1882 mutex_unlock(&xps_map_mutex);
1883}
1884
1885static struct xps_map *expand_xps_map(struct xps_map *map,
1886 int cpu, u16 index)
1887{
1888 struct xps_map *new_map;
1889 int alloc_len = XPS_MIN_MAP_ALLOC;
1890 int i, pos;
1891
1892 for (pos = 0; map && pos < map->len; pos++) {
1893 if (map->queues[pos] != index)
1894 continue;
1895 return map;
1896 }
1897
1898
1899 if (map) {
1900 if (pos < map->alloc_len)
1901 return map;
1902
1903 alloc_len = map->alloc_len * 2;
1904 }
1905
1906
1907 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1908 cpu_to_node(cpu));
1909 if (!new_map)
1910 return NULL;
1911
1912 for (i = 0; i < pos; i++)
1913 new_map->queues[i] = map->queues[i];
1914 new_map->alloc_len = alloc_len;
1915 new_map->len = pos;
1916
1917 return new_map;
1918}
1919
1920int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
1921 u16 index)
1922{
1923 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1924 struct xps_map *map, *new_map;
1925 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1926 int cpu, numa_node_id = -2;
1927 bool active = false;
1928
1929 mutex_lock(&xps_map_mutex);
1930
1931 dev_maps = xmap_dereference(dev->xps_maps);
1932
1933
1934 for_each_online_cpu(cpu) {
1935 if (!cpumask_test_cpu(cpu, mask))
1936 continue;
1937
1938 if (!new_dev_maps)
1939 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1940 if (!new_dev_maps) {
1941 mutex_unlock(&xps_map_mutex);
1942 return -ENOMEM;
1943 }
1944
1945 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1946 NULL;
1947
1948 map = expand_xps_map(map, cpu, index);
1949 if (!map)
1950 goto error;
1951
1952 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1953 }
1954
1955 if (!new_dev_maps)
1956 goto out_no_new_maps;
1957
1958 for_each_possible_cpu(cpu) {
1959 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1960
1961 int pos = 0;
1962
1963 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1964 while ((pos < map->len) && (map->queues[pos] != index))
1965 pos++;
1966
1967 if (pos == map->len)
1968 map->queues[map->len++] = index;
1969#ifdef CONFIG_NUMA
1970 if (numa_node_id == -2)
1971 numa_node_id = cpu_to_node(cpu);
1972 else if (numa_node_id != cpu_to_node(cpu))
1973 numa_node_id = -1;
1974#endif
1975 } else if (dev_maps) {
1976
1977 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1978 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1979 }
1980
1981 }
1982
1983 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1984
1985
1986 if (dev_maps) {
1987 for_each_possible_cpu(cpu) {
1988 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1989 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1990 if (map && map != new_map)
1991 kfree_rcu(map, rcu);
1992 }
1993
1994 kfree_rcu(dev_maps, rcu);
1995 }
1996
1997 dev_maps = new_dev_maps;
1998 active = true;
1999
2000out_no_new_maps:
2001
2002 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2003 (numa_node_id >= 0) ? numa_node_id :
2004 NUMA_NO_NODE);
2005
2006 if (!dev_maps)
2007 goto out_no_maps;
2008
2009
2010 for_each_possible_cpu(cpu) {
2011 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2012 continue;
2013
2014 if (remove_xps_queue(dev_maps, cpu, index))
2015 active = true;
2016 }
2017
2018
2019 if (!active) {
2020 RCU_INIT_POINTER(dev->xps_maps, NULL);
2021 kfree_rcu(dev_maps, rcu);
2022 }
2023
2024out_no_maps:
2025 mutex_unlock(&xps_map_mutex);
2026
2027 return 0;
2028error:
2029
2030 for_each_possible_cpu(cpu) {
2031 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2032 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2033 NULL;
2034 if (new_map && new_map != map)
2035 kfree(new_map);
2036 }
2037
2038 mutex_unlock(&xps_map_mutex);
2039
2040 kfree(new_dev_maps);
2041 return -ENOMEM;
2042}
2043EXPORT_SYMBOL(netif_set_xps_queue);
2044
2045#endif
2046
2047
2048
2049
2050int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2051{
2052 int rc;
2053
2054 if (txq < 1 || txq > dev->num_tx_queues)
2055 return -EINVAL;
2056
2057 if (dev->reg_state == NETREG_REGISTERED ||
2058 dev->reg_state == NETREG_UNREGISTERING) {
2059 ASSERT_RTNL();
2060
2061 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2062 txq);
2063 if (rc)
2064 return rc;
2065
2066 if (dev->num_tc)
2067 netif_setup_tc(dev, txq);
2068
2069 if (txq < dev->real_num_tx_queues) {
2070 qdisc_reset_all_tx_gt(dev, txq);
2071#ifdef CONFIG_XPS
2072 netif_reset_xps_queues_gt(dev, txq);
2073#endif
2074 }
2075 }
2076
2077 dev->real_num_tx_queues = txq;
2078 return 0;
2079}
2080EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2081
2082#ifdef CONFIG_RPS
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2094{
2095 int rc;
2096
2097 if (rxq < 1 || rxq > dev->num_rx_queues)
2098 return -EINVAL;
2099
2100 if (dev->reg_state == NETREG_REGISTERED) {
2101 ASSERT_RTNL();
2102
2103 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2104 rxq);
2105 if (rc)
2106 return rc;
2107 }
2108
2109 dev->real_num_rx_queues = rxq;
2110 return 0;
2111}
2112EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2113#endif
2114
2115
2116
2117
2118
2119
2120
2121int netif_get_num_default_rss_queues(void)
2122{
2123 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2124}
2125EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2126
2127static inline void __netif_reschedule(struct Qdisc *q)
2128{
2129 struct softnet_data *sd;
2130 unsigned long flags;
2131
2132 local_irq_save(flags);
2133 sd = &__get_cpu_var(softnet_data);
2134 q->next_sched = NULL;
2135 *sd->output_queue_tailp = q;
2136 sd->output_queue_tailp = &q->next_sched;
2137 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2138 local_irq_restore(flags);
2139}
2140
2141void __netif_schedule(struct Qdisc *q)
2142{
2143 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2144 __netif_reschedule(q);
2145}
2146EXPORT_SYMBOL(__netif_schedule);
2147
2148void dev_kfree_skb_irq(struct sk_buff *skb)
2149{
2150 if (atomic_dec_and_test(&skb->users)) {
2151 struct softnet_data *sd;
2152 unsigned long flags;
2153
2154 local_irq_save(flags);
2155 sd = &__get_cpu_var(softnet_data);
2156 skb->next = sd->completion_queue;
2157 sd->completion_queue = skb;
2158 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2159 local_irq_restore(flags);
2160 }
2161}
2162EXPORT_SYMBOL(dev_kfree_skb_irq);
2163
2164void dev_kfree_skb_any(struct sk_buff *skb)
2165{
2166 if (in_irq() || irqs_disabled())
2167 dev_kfree_skb_irq(skb);
2168 else
2169 dev_kfree_skb(skb);
2170}
2171EXPORT_SYMBOL(dev_kfree_skb_any);
2172
2173
2174
2175
2176
2177
2178
2179
2180void netif_device_detach(struct net_device *dev)
2181{
2182 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2183 netif_running(dev)) {
2184 netif_tx_stop_all_queues(dev);
2185 }
2186}
2187EXPORT_SYMBOL(netif_device_detach);
2188
2189
2190
2191
2192
2193
2194
2195void netif_device_attach(struct net_device *dev)
2196{
2197 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2198 netif_running(dev)) {
2199 netif_tx_wake_all_queues(dev);
2200 __netdev_watchdog_up(dev);
2201 }
2202}
2203EXPORT_SYMBOL(netif_device_attach);
2204
2205static void skb_warn_bad_offload(const struct sk_buff *skb)
2206{
2207 static const netdev_features_t null_features = 0;
2208 struct net_device *dev = skb->dev;
2209 const char *driver = "";
2210
2211 if (!net_ratelimit())
2212 return;
2213
2214 if (dev && dev->dev.parent)
2215 driver = dev_driver_string(dev->dev.parent);
2216
2217 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2218 "gso_type=%d ip_summed=%d\n",
2219 driver, dev ? &dev->features : &null_features,
2220 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2221 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2222 skb_shinfo(skb)->gso_type, skb->ip_summed);
2223}
2224
2225
2226
2227
2228
2229int skb_checksum_help(struct sk_buff *skb)
2230{
2231 __wsum csum;
2232 int ret = 0, offset;
2233
2234 if (skb->ip_summed == CHECKSUM_COMPLETE)
2235 goto out_set_summed;
2236
2237 if (unlikely(skb_shinfo(skb)->gso_size)) {
2238 skb_warn_bad_offload(skb);
2239 return -EINVAL;
2240 }
2241
2242
2243
2244
2245 if (skb_has_shared_frag(skb)) {
2246 ret = __skb_linearize(skb);
2247 if (ret)
2248 goto out;
2249 }
2250
2251 offset = skb_checksum_start_offset(skb);
2252 BUG_ON(offset >= skb_headlen(skb));
2253 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2254
2255 offset += skb->csum_offset;
2256 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2257
2258 if (skb_cloned(skb) &&
2259 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2260 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2261 if (ret)
2262 goto out;
2263 }
2264
2265 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2266out_set_summed:
2267 skb->ip_summed = CHECKSUM_NONE;
2268out:
2269 return ret;
2270}
2271EXPORT_SYMBOL(skb_checksum_help);
2272
2273__be16 skb_network_protocol(struct sk_buff *skb)
2274{
2275 __be16 type = skb->protocol;
2276 int vlan_depth = ETH_HLEN;
2277
2278
2279 if (type == htons(ETH_P_TEB)) {
2280 struct ethhdr *eth;
2281
2282 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2283 return 0;
2284
2285 eth = (struct ethhdr *)skb_mac_header(skb);
2286 type = eth->h_proto;
2287 }
2288
2289 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2290 struct vlan_hdr *vh;
2291
2292 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2293 return 0;
2294
2295 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2296 type = vh->h_vlan_encapsulated_proto;
2297 vlan_depth += VLAN_HLEN;
2298 }
2299
2300 return type;
2301}
2302
2303
2304
2305
2306
2307
2308struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2309 netdev_features_t features)
2310{
2311 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2312 struct packet_offload *ptype;
2313 __be16 type = skb_network_protocol(skb);
2314
2315 if (unlikely(!type))
2316 return ERR_PTR(-EINVAL);
2317
2318 __skb_pull(skb, skb->mac_len);
2319
2320 rcu_read_lock();
2321 list_for_each_entry_rcu(ptype, &offload_base, list) {
2322 if (ptype->type == type && ptype->callbacks.gso_segment) {
2323 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2324 int err;
2325
2326 err = ptype->callbacks.gso_send_check(skb);
2327 segs = ERR_PTR(err);
2328 if (err || skb_gso_ok(skb, features))
2329 break;
2330 __skb_push(skb, (skb->data -
2331 skb_network_header(skb)));
2332 }
2333 segs = ptype->callbacks.gso_segment(skb, features);
2334 break;
2335 }
2336 }
2337 rcu_read_unlock();
2338
2339 __skb_push(skb, skb->data - skb_mac_header(skb));
2340
2341 return segs;
2342}
2343EXPORT_SYMBOL(skb_mac_gso_segment);
2344
2345
2346
2347
2348static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2349{
2350 if (tx_path)
2351 return skb->ip_summed != CHECKSUM_PARTIAL;
2352 else
2353 return skb->ip_summed == CHECKSUM_NONE;
2354}
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2368 netdev_features_t features, bool tx_path)
2369{
2370 if (unlikely(skb_needs_check(skb, tx_path))) {
2371 int err;
2372
2373 skb_warn_bad_offload(skb);
2374
2375 if (skb_header_cloned(skb) &&
2376 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2377 return ERR_PTR(err);
2378 }
2379
2380 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2381 skb_reset_mac_header(skb);
2382 skb_reset_mac_len(skb);
2383
2384 return skb_mac_gso_segment(skb, features);
2385}
2386EXPORT_SYMBOL(__skb_gso_segment);
2387
2388
2389#ifdef CONFIG_BUG
2390void netdev_rx_csum_fault(struct net_device *dev)
2391{
2392 if (net_ratelimit()) {
2393 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2394 dump_stack();
2395 }
2396}
2397EXPORT_SYMBOL(netdev_rx_csum_fault);
2398#endif
2399
2400
2401
2402
2403
2404
2405static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2406{
2407#ifdef CONFIG_HIGHMEM
2408 int i;
2409 if (!(dev->features & NETIF_F_HIGHDMA)) {
2410 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2411 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2412 if (PageHighMem(skb_frag_page(frag)))
2413 return 1;
2414 }
2415 }
2416
2417 if (PCI_DMA_BUS_IS_PHYS) {
2418 struct device *pdev = dev->dev.parent;
2419
2420 if (!pdev)
2421 return 0;
2422 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2423 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2424 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2425 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2426 return 1;
2427 }
2428 }
2429#endif
2430 return 0;
2431}
2432
2433struct dev_gso_cb {
2434 void (*destructor)(struct sk_buff *skb);
2435};
2436
2437#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2438
2439static void dev_gso_skb_destructor(struct sk_buff *skb)
2440{
2441 struct dev_gso_cb *cb;
2442
2443 do {
2444 struct sk_buff *nskb = skb->next;
2445
2446 skb->next = nskb->next;
2447 nskb->next = NULL;
2448 kfree_skb(nskb);
2449 } while (skb->next);
2450
2451 cb = DEV_GSO_CB(skb);
2452 if (cb->destructor)
2453 cb->destructor(skb);
2454}
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2465{
2466 struct sk_buff *segs;
2467
2468 segs = skb_gso_segment(skb, features);
2469
2470
2471 if (!segs)
2472 return 0;
2473
2474 if (IS_ERR(segs))
2475 return PTR_ERR(segs);
2476
2477 skb->next = segs;
2478 DEV_GSO_CB(skb)->destructor = skb->destructor;
2479 skb->destructor = dev_gso_skb_destructor;
2480
2481 return 0;
2482}
2483
2484static netdev_features_t harmonize_features(struct sk_buff *skb,
2485 netdev_features_t features)
2486{
2487 if (skb->ip_summed != CHECKSUM_NONE &&
2488 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2489 features &= ~NETIF_F_ALL_CSUM;
2490 } else if (illegal_highdma(skb->dev, skb)) {
2491 features &= ~NETIF_F_SG;
2492 }
2493
2494 return features;
2495}
2496
2497netdev_features_t netif_skb_features(struct sk_buff *skb)
2498{
2499 __be16 protocol = skb->protocol;
2500 netdev_features_t features = skb->dev->features;
2501
2502 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2503 features &= ~NETIF_F_GSO_MASK;
2504
2505 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2506 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2507 protocol = veh->h_vlan_encapsulated_proto;
2508 } else if (!vlan_tx_tag_present(skb)) {
2509 return harmonize_features(skb, features);
2510 }
2511
2512 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2513 NETIF_F_HW_VLAN_STAG_TX);
2514
2515 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
2516 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2517 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2518 NETIF_F_HW_VLAN_STAG_TX;
2519
2520 return harmonize_features(skb, features);
2521}
2522EXPORT_SYMBOL(netif_skb_features);
2523
2524
2525
2526
2527
2528
2529static inline int skb_needs_linearize(struct sk_buff *skb,
2530 netdev_features_t features)
2531{
2532 return skb_is_nonlinear(skb) &&
2533 ((skb_has_frag_list(skb) &&
2534 !(features & NETIF_F_FRAGLIST)) ||
2535 (skb_shinfo(skb)->nr_frags &&
2536 !(features & NETIF_F_SG)));
2537}
2538
2539int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2540 struct netdev_queue *txq)
2541{
2542 const struct net_device_ops *ops = dev->netdev_ops;
2543 int rc = NETDEV_TX_OK;
2544 unsigned int skb_len;
2545
2546 if (likely(!skb->next)) {
2547 netdev_features_t features;
2548
2549
2550
2551
2552
2553 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2554 skb_dst_drop(skb);
2555
2556 features = netif_skb_features(skb);
2557
2558 if (vlan_tx_tag_present(skb) &&
2559 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
2560 skb = __vlan_put_tag(skb, skb->vlan_proto,
2561 vlan_tx_tag_get(skb));
2562 if (unlikely(!skb))
2563 goto out;
2564
2565 skb->vlan_tci = 0;
2566 }
2567
2568
2569
2570
2571
2572 if (skb->encapsulation)
2573 features &= dev->hw_enc_features;
2574
2575 if (netif_needs_gso(skb, features)) {
2576 if (unlikely(dev_gso_segment(skb, features)))
2577 goto out_kfree_skb;
2578 if (skb->next)
2579 goto gso;
2580 } else {
2581 if (skb_needs_linearize(skb, features) &&
2582 __skb_linearize(skb))
2583 goto out_kfree_skb;
2584
2585
2586
2587
2588
2589 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2590 if (skb->encapsulation)
2591 skb_set_inner_transport_header(skb,
2592 skb_checksum_start_offset(skb));
2593 else
2594 skb_set_transport_header(skb,
2595 skb_checksum_start_offset(skb));
2596 if (!(features & NETIF_F_ALL_CSUM) &&
2597 skb_checksum_help(skb))
2598 goto out_kfree_skb;
2599 }
2600 }
2601
2602 if (!list_empty(&ptype_all))
2603 dev_queue_xmit_nit(skb, dev);
2604
2605 skb_len = skb->len;
2606 rc = ops->ndo_start_xmit(skb, dev);
2607 trace_net_dev_xmit(skb, rc, dev, skb_len);
2608 if (rc == NETDEV_TX_OK)
2609 txq_trans_update(txq);
2610 return rc;
2611 }
2612
2613gso:
2614 do {
2615 struct sk_buff *nskb = skb->next;
2616
2617 skb->next = nskb->next;
2618 nskb->next = NULL;
2619
2620 if (!list_empty(&ptype_all))
2621 dev_queue_xmit_nit(nskb, dev);
2622
2623 skb_len = nskb->len;
2624 rc = ops->ndo_start_xmit(nskb, dev);
2625 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2626 if (unlikely(rc != NETDEV_TX_OK)) {
2627 if (rc & ~NETDEV_TX_MASK)
2628 goto out_kfree_gso_skb;
2629 nskb->next = skb->next;
2630 skb->next = nskb;
2631 return rc;
2632 }
2633 txq_trans_update(txq);
2634 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2635 return NETDEV_TX_BUSY;
2636 } while (skb->next);
2637
2638out_kfree_gso_skb:
2639 if (likely(skb->next == NULL)) {
2640 skb->destructor = DEV_GSO_CB(skb)->destructor;
2641 consume_skb(skb);
2642 return rc;
2643 }
2644out_kfree_skb:
2645 kfree_skb(skb);
2646out:
2647 return rc;
2648}
2649
2650static void qdisc_pkt_len_init(struct sk_buff *skb)
2651{
2652 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2653
2654 qdisc_skb_cb(skb)->pkt_len = skb->len;
2655
2656
2657
2658
2659 if (shinfo->gso_size) {
2660 unsigned int hdr_len;
2661 u16 gso_segs = shinfo->gso_segs;
2662
2663
2664 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2665
2666
2667 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2668 hdr_len += tcp_hdrlen(skb);
2669 else
2670 hdr_len += sizeof(struct udphdr);
2671
2672 if (shinfo->gso_type & SKB_GSO_DODGY)
2673 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2674 shinfo->gso_size);
2675
2676 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2677 }
2678}
2679
2680static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2681 struct net_device *dev,
2682 struct netdev_queue *txq)
2683{
2684 spinlock_t *root_lock = qdisc_lock(q);
2685 bool contended;
2686 int rc;
2687
2688 qdisc_pkt_len_init(skb);
2689 qdisc_calculate_pkt_len(skb, q);
2690
2691
2692
2693
2694
2695
2696 contended = qdisc_is_running(q);
2697 if (unlikely(contended))
2698 spin_lock(&q->busylock);
2699
2700 spin_lock(root_lock);
2701 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2702 kfree_skb(skb);
2703 rc = NET_XMIT_DROP;
2704 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2705 qdisc_run_begin(q)) {
2706
2707
2708
2709
2710
2711 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2712 skb_dst_force(skb);
2713
2714 qdisc_bstats_update(q, skb);
2715
2716 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2717 if (unlikely(contended)) {
2718 spin_unlock(&q->busylock);
2719 contended = false;
2720 }
2721 __qdisc_run(q);
2722 } else
2723 qdisc_run_end(q);
2724
2725 rc = NET_XMIT_SUCCESS;
2726 } else {
2727 skb_dst_force(skb);
2728 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2729 if (qdisc_run_begin(q)) {
2730 if (unlikely(contended)) {
2731 spin_unlock(&q->busylock);
2732 contended = false;
2733 }
2734 __qdisc_run(q);
2735 }
2736 }
2737 spin_unlock(root_lock);
2738 if (unlikely(contended))
2739 spin_unlock(&q->busylock);
2740 return rc;
2741}
2742
2743#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2744static void skb_update_prio(struct sk_buff *skb)
2745{
2746 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2747
2748 if (!skb->priority && skb->sk && map) {
2749 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2750
2751 if (prioidx < map->priomap_len)
2752 skb->priority = map->priomap[prioidx];
2753 }
2754}
2755#else
2756#define skb_update_prio(skb)
2757#endif
2758
2759static DEFINE_PER_CPU(int, xmit_recursion);
2760#define RECURSION_LIMIT 10
2761
2762
2763
2764
2765
2766int dev_loopback_xmit(struct sk_buff *skb)
2767{
2768 skb_reset_mac_header(skb);
2769 __skb_pull(skb, skb_network_offset(skb));
2770 skb->pkt_type = PACKET_LOOPBACK;
2771 skb->ip_summed = CHECKSUM_UNNECESSARY;
2772 WARN_ON(!skb_dst(skb));
2773 skb_dst_force(skb);
2774 netif_rx_ni(skb);
2775 return 0;
2776}
2777EXPORT_SYMBOL(dev_loopback_xmit);
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804int dev_queue_xmit(struct sk_buff *skb)
2805{
2806 struct net_device *dev = skb->dev;
2807 struct netdev_queue *txq;
2808 struct Qdisc *q;
2809 int rc = -ENOMEM;
2810
2811 skb_reset_mac_header(skb);
2812
2813
2814
2815
2816 rcu_read_lock_bh();
2817
2818 skb_update_prio(skb);
2819
2820 txq = netdev_pick_tx(dev, skb);
2821 q = rcu_dereference_bh(txq->qdisc);
2822
2823#ifdef CONFIG_NET_CLS_ACT
2824 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2825#endif
2826 trace_net_dev_queue(skb);
2827 if (q->enqueue) {
2828 rc = __dev_xmit_skb(skb, q, dev, txq);
2829 goto out;
2830 }
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844 if (dev->flags & IFF_UP) {
2845 int cpu = smp_processor_id();
2846
2847 if (txq->xmit_lock_owner != cpu) {
2848
2849 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2850 goto recursion_alert;
2851
2852 HARD_TX_LOCK(dev, txq, cpu);
2853
2854 if (!netif_xmit_stopped(txq)) {
2855 __this_cpu_inc(xmit_recursion);
2856 rc = dev_hard_start_xmit(skb, dev, txq);
2857 __this_cpu_dec(xmit_recursion);
2858 if (dev_xmit_complete(rc)) {
2859 HARD_TX_UNLOCK(dev, txq);
2860 goto out;
2861 }
2862 }
2863 HARD_TX_UNLOCK(dev, txq);
2864 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2865 dev->name);
2866 } else {
2867
2868
2869
2870recursion_alert:
2871 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2872 dev->name);
2873 }
2874 }
2875
2876 rc = -ENETDOWN;
2877 rcu_read_unlock_bh();
2878
2879 kfree_skb(skb);
2880 return rc;
2881out:
2882 rcu_read_unlock_bh();
2883 return rc;
2884}
2885EXPORT_SYMBOL(dev_queue_xmit);
2886
2887
2888
2889
2890
2891
2892int netdev_max_backlog __read_mostly = 1000;
2893EXPORT_SYMBOL(netdev_max_backlog);
2894
2895int netdev_tstamp_prequeue __read_mostly = 1;
2896int netdev_budget __read_mostly = 300;
2897int weight_p __read_mostly = 64;
2898
2899
2900static inline void ____napi_schedule(struct softnet_data *sd,
2901 struct napi_struct *napi)
2902{
2903 list_add_tail(&napi->poll_list, &sd->poll_list);
2904 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2905}
2906
2907#ifdef CONFIG_RPS
2908
2909
2910struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2911EXPORT_SYMBOL(rps_sock_flow_table);
2912
2913struct static_key rps_needed __read_mostly;
2914
2915static struct rps_dev_flow *
2916set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2917 struct rps_dev_flow *rflow, u16 next_cpu)
2918{
2919 if (next_cpu != RPS_NO_CPU) {
2920#ifdef CONFIG_RFS_ACCEL
2921 struct netdev_rx_queue *rxqueue;
2922 struct rps_dev_flow_table *flow_table;
2923 struct rps_dev_flow *old_rflow;
2924 u32 flow_id;
2925 u16 rxq_index;
2926 int rc;
2927
2928
2929 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2930 !(dev->features & NETIF_F_NTUPLE))
2931 goto out;
2932 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2933 if (rxq_index == skb_get_rx_queue(skb))
2934 goto out;
2935
2936 rxqueue = dev->_rx + rxq_index;
2937 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2938 if (!flow_table)
2939 goto out;
2940 flow_id = skb->rxhash & flow_table->mask;
2941 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2942 rxq_index, flow_id);
2943 if (rc < 0)
2944 goto out;
2945 old_rflow = rflow;
2946 rflow = &flow_table->flows[flow_id];
2947 rflow->filter = rc;
2948 if (old_rflow->filter == rflow->filter)
2949 old_rflow->filter = RPS_NO_FILTER;
2950 out:
2951#endif
2952 rflow->last_qtail =
2953 per_cpu(softnet_data, next_cpu).input_queue_head;
2954 }
2955
2956 rflow->cpu = next_cpu;
2957 return rflow;
2958}
2959
2960
2961
2962
2963
2964
2965static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2966 struct rps_dev_flow **rflowp)
2967{
2968 struct netdev_rx_queue *rxqueue;
2969 struct rps_map *map;
2970 struct rps_dev_flow_table *flow_table;
2971 struct rps_sock_flow_table *sock_flow_table;
2972 int cpu = -1;
2973 u16 tcpu;
2974
2975 if (skb_rx_queue_recorded(skb)) {
2976 u16 index = skb_get_rx_queue(skb);
2977 if (unlikely(index >= dev->real_num_rx_queues)) {
2978 WARN_ONCE(dev->real_num_rx_queues > 1,
2979 "%s received packet on queue %u, but number "
2980 "of RX queues is %u\n",
2981 dev->name, index, dev->real_num_rx_queues);
2982 goto done;
2983 }
2984 rxqueue = dev->_rx + index;
2985 } else
2986 rxqueue = dev->_rx;
2987
2988 map = rcu_dereference(rxqueue->rps_map);
2989 if (map) {
2990 if (map->len == 1 &&
2991 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2992 tcpu = map->cpus[0];
2993 if (cpu_online(tcpu))
2994 cpu = tcpu;
2995 goto done;
2996 }
2997 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2998 goto done;
2999 }
3000
3001 skb_reset_network_header(skb);
3002 if (!skb_get_rxhash(skb))
3003 goto done;
3004
3005 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3006 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3007 if (flow_table && sock_flow_table) {
3008 u16 next_cpu;
3009 struct rps_dev_flow *rflow;
3010
3011 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
3012 tcpu = rflow->cpu;
3013
3014 next_cpu = sock_flow_table->ents[skb->rxhash &
3015 sock_flow_table->mask];
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028 if (unlikely(tcpu != next_cpu) &&
3029 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3030 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3031 rflow->last_qtail)) >= 0)) {
3032 tcpu = next_cpu;
3033 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3034 }
3035
3036 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3037 *rflowp = rflow;
3038 cpu = tcpu;
3039 goto done;
3040 }
3041 }
3042
3043 if (map) {
3044 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
3045
3046 if (cpu_online(tcpu)) {
3047 cpu = tcpu;
3048 goto done;
3049 }
3050 }
3051
3052done:
3053 return cpu;
3054}
3055
3056#ifdef CONFIG_RFS_ACCEL
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3070 u32 flow_id, u16 filter_id)
3071{
3072 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3073 struct rps_dev_flow_table *flow_table;
3074 struct rps_dev_flow *rflow;
3075 bool expire = true;
3076 int cpu;
3077
3078 rcu_read_lock();
3079 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3080 if (flow_table && flow_id <= flow_table->mask) {
3081 rflow = &flow_table->flows[flow_id];
3082 cpu = ACCESS_ONCE(rflow->cpu);
3083 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3084 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3085 rflow->last_qtail) <
3086 (int)(10 * flow_table->mask)))
3087 expire = false;
3088 }
3089 rcu_read_unlock();
3090 return expire;
3091}
3092EXPORT_SYMBOL(rps_may_expire_flow);
3093
3094#endif
3095
3096
3097static void rps_trigger_softirq(void *data)
3098{
3099 struct softnet_data *sd = data;
3100
3101 ____napi_schedule(sd, &sd->backlog);
3102 sd->received_rps++;
3103}
3104
3105#endif
3106
3107
3108
3109
3110
3111
3112static int rps_ipi_queued(struct softnet_data *sd)
3113{
3114#ifdef CONFIG_RPS
3115 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
3116
3117 if (sd != mysd) {
3118 sd->rps_ipi_next = mysd->rps_ipi_list;
3119 mysd->rps_ipi_list = sd;
3120
3121 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3122 return 1;
3123 }
3124#endif
3125 return 0;
3126}
3127
3128#ifdef CONFIG_NET_FLOW_LIMIT
3129int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3130#endif
3131
3132static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3133{
3134#ifdef CONFIG_NET_FLOW_LIMIT
3135 struct sd_flow_limit *fl;
3136 struct softnet_data *sd;
3137 unsigned int old_flow, new_flow;
3138
3139 if (qlen < (netdev_max_backlog >> 1))
3140 return false;
3141
3142 sd = &__get_cpu_var(softnet_data);
3143
3144 rcu_read_lock();
3145 fl = rcu_dereference(sd->flow_limit);
3146 if (fl) {
3147 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3148 old_flow = fl->history[fl->history_head];
3149 fl->history[fl->history_head] = new_flow;
3150
3151 fl->history_head++;
3152 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3153
3154 if (likely(fl->buckets[old_flow]))
3155 fl->buckets[old_flow]--;
3156
3157 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3158 fl->count++;
3159 rcu_read_unlock();
3160 return true;
3161 }
3162 }
3163 rcu_read_unlock();
3164#endif
3165 return false;
3166}
3167
3168
3169
3170
3171
3172static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3173 unsigned int *qtail)
3174{
3175 struct softnet_data *sd;
3176 unsigned long flags;
3177 unsigned int qlen;
3178
3179 sd = &per_cpu(softnet_data, cpu);
3180
3181 local_irq_save(flags);
3182
3183 rps_lock(sd);
3184 qlen = skb_queue_len(&sd->input_pkt_queue);
3185 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3186 if (skb_queue_len(&sd->input_pkt_queue)) {
3187enqueue:
3188 __skb_queue_tail(&sd->input_pkt_queue, skb);
3189 input_queue_tail_incr_save(sd, qtail);
3190 rps_unlock(sd);
3191 local_irq_restore(flags);
3192 return NET_RX_SUCCESS;
3193 }
3194
3195
3196
3197
3198 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3199 if (!rps_ipi_queued(sd))
3200 ____napi_schedule(sd, &sd->backlog);
3201 }
3202 goto enqueue;
3203 }
3204
3205 sd->dropped++;
3206 rps_unlock(sd);
3207
3208 local_irq_restore(flags);
3209
3210 atomic_long_inc(&skb->dev->rx_dropped);
3211 kfree_skb(skb);
3212 return NET_RX_DROP;
3213}
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230int netif_rx(struct sk_buff *skb)
3231{
3232 int ret;
3233
3234
3235 if (netpoll_rx(skb))
3236 return NET_RX_DROP;
3237
3238 net_timestamp_check(netdev_tstamp_prequeue, skb);
3239
3240 trace_netif_rx(skb);
3241#ifdef CONFIG_RPS
3242 if (static_key_false(&rps_needed)) {
3243 struct rps_dev_flow voidflow, *rflow = &voidflow;
3244 int cpu;
3245
3246 preempt_disable();
3247 rcu_read_lock();
3248
3249 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3250 if (cpu < 0)
3251 cpu = smp_processor_id();
3252
3253 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3254
3255 rcu_read_unlock();
3256 preempt_enable();
3257 } else
3258#endif
3259 {
3260 unsigned int qtail;
3261 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3262 put_cpu();
3263 }
3264 return ret;
3265}
3266EXPORT_SYMBOL(netif_rx);
3267
3268int netif_rx_ni(struct sk_buff *skb)
3269{
3270 int err;
3271
3272 preempt_disable();
3273 err = netif_rx(skb);
3274 if (local_softirq_pending())
3275 do_softirq();
3276 preempt_enable();
3277
3278 return err;
3279}
3280EXPORT_SYMBOL(netif_rx_ni);
3281
3282static void net_tx_action(struct softirq_action *h)
3283{
3284 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3285
3286 if (sd->completion_queue) {
3287 struct sk_buff *clist;
3288
3289 local_irq_disable();
3290 clist = sd->completion_queue;
3291 sd->completion_queue = NULL;
3292 local_irq_enable();
3293
3294 while (clist) {
3295 struct sk_buff *skb = clist;
3296 clist = clist->next;
3297
3298 WARN_ON(atomic_read(&skb->users));
3299 trace_kfree_skb(skb, net_tx_action);
3300 __kfree_skb(skb);
3301 }
3302 }
3303
3304 if (sd->output_queue) {
3305 struct Qdisc *head;
3306
3307 local_irq_disable();
3308 head = sd->output_queue;
3309 sd->output_queue = NULL;
3310 sd->output_queue_tailp = &sd->output_queue;
3311 local_irq_enable();
3312
3313 while (head) {
3314 struct Qdisc *q = head;
3315 spinlock_t *root_lock;
3316
3317 head = head->next_sched;
3318
3319 root_lock = qdisc_lock(q);
3320 if (spin_trylock(root_lock)) {
3321 smp_mb__before_clear_bit();
3322 clear_bit(__QDISC_STATE_SCHED,
3323 &q->state);
3324 qdisc_run(q);
3325 spin_unlock(root_lock);
3326 } else {
3327 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3328 &q->state)) {
3329 __netif_reschedule(q);
3330 } else {
3331 smp_mb__before_clear_bit();
3332 clear_bit(__QDISC_STATE_SCHED,
3333 &q->state);
3334 }
3335 }
3336 }
3337 }
3338}
3339
3340#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3341 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3342
3343int (*br_fdb_test_addr_hook)(struct net_device *dev,
3344 unsigned char *addr) __read_mostly;
3345EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3346#endif
3347
3348#ifdef CONFIG_NET_CLS_ACT
3349
3350
3351
3352
3353
3354
3355
3356
3357static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3358{
3359 struct net_device *dev = skb->dev;
3360 u32 ttl = G_TC_RTTL(skb->tc_verd);
3361 int result = TC_ACT_OK;
3362 struct Qdisc *q;
3363
3364 if (unlikely(MAX_RED_LOOP < ttl++)) {
3365 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3366 skb->skb_iif, dev->ifindex);
3367 return TC_ACT_SHOT;
3368 }
3369
3370 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3371 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3372
3373 q = rxq->qdisc;
3374 if (q != &noop_qdisc) {
3375 spin_lock(qdisc_lock(q));
3376 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3377 result = qdisc_enqueue_root(skb, q);
3378 spin_unlock(qdisc_lock(q));
3379 }
3380
3381 return result;
3382}
3383
3384static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3385 struct packet_type **pt_prev,
3386 int *ret, struct net_device *orig_dev)
3387{
3388 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3389
3390 if (!rxq || rxq->qdisc == &noop_qdisc)
3391 goto out;
3392
3393 if (*pt_prev) {
3394 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3395 *pt_prev = NULL;
3396 }
3397
3398 switch (ing_filter(skb, rxq)) {
3399 case TC_ACT_SHOT:
3400 case TC_ACT_STOLEN:
3401 kfree_skb(skb);
3402 return NULL;
3403 }
3404
3405out:
3406 skb->tc_verd = 0;
3407 return skb;
3408}
3409#endif
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425int netdev_rx_handler_register(struct net_device *dev,
3426 rx_handler_func_t *rx_handler,
3427 void *rx_handler_data)
3428{
3429 ASSERT_RTNL();
3430
3431 if (dev->rx_handler)
3432 return -EBUSY;
3433
3434
3435 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3436 rcu_assign_pointer(dev->rx_handler, rx_handler);
3437
3438 return 0;
3439}
3440EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450void netdev_rx_handler_unregister(struct net_device *dev)
3451{
3452
3453 ASSERT_RTNL();
3454 RCU_INIT_POINTER(dev->rx_handler, NULL);
3455
3456
3457
3458
3459 synchronize_net();
3460 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3461}
3462EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3463
3464
3465
3466
3467
3468static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3469{
3470 switch (skb->protocol) {
3471 case __constant_htons(ETH_P_ARP):
3472 case __constant_htons(ETH_P_IP):
3473 case __constant_htons(ETH_P_IPV6):
3474 case __constant_htons(ETH_P_8021Q):
3475 case __constant_htons(ETH_P_8021AD):
3476 return true;
3477 default:
3478 return false;
3479 }
3480}
3481
3482static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3483{
3484 struct packet_type *ptype, *pt_prev;
3485 rx_handler_func_t *rx_handler;
3486 struct net_device *orig_dev;
3487 struct net_device *null_or_dev;
3488 bool deliver_exact = false;
3489 int ret = NET_RX_DROP;
3490 __be16 type;
3491
3492 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3493
3494 trace_netif_receive_skb(skb);
3495
3496
3497 if (netpoll_receive_skb(skb))
3498 goto out;
3499
3500 orig_dev = skb->dev;
3501
3502 skb_reset_network_header(skb);
3503 if (!skb_transport_header_was_set(skb))
3504 skb_reset_transport_header(skb);
3505 skb_reset_mac_len(skb);
3506
3507 pt_prev = NULL;
3508
3509 rcu_read_lock();
3510
3511another_round:
3512 skb->skb_iif = skb->dev->ifindex;
3513
3514 __this_cpu_inc(softnet_data.processed);
3515
3516 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3517 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3518 skb = vlan_untag(skb);
3519 if (unlikely(!skb))
3520 goto unlock;
3521 }
3522
3523#ifdef CONFIG_NET_CLS_ACT
3524 if (skb->tc_verd & TC_NCLS) {
3525 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3526 goto ncls;
3527 }
3528#endif
3529
3530 if (pfmemalloc)
3531 goto skip_taps;
3532
3533 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3534 if (!ptype->dev || ptype->dev == skb->dev) {
3535 if (pt_prev)
3536 ret = deliver_skb(skb, pt_prev, orig_dev);
3537 pt_prev = ptype;
3538 }
3539 }
3540
3541skip_taps:
3542#ifdef CONFIG_NET_CLS_ACT
3543 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3544 if (!skb)
3545 goto unlock;
3546ncls:
3547#endif
3548
3549 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3550 goto drop;
3551
3552 if (vlan_tx_tag_present(skb)) {
3553 if (pt_prev) {
3554 ret = deliver_skb(skb, pt_prev, orig_dev);
3555 pt_prev = NULL;
3556 }
3557 if (vlan_do_receive(&skb))
3558 goto another_round;
3559 else if (unlikely(!skb))
3560 goto unlock;
3561 }
3562
3563 rx_handler = rcu_dereference(skb->dev->rx_handler);
3564 if (rx_handler) {
3565 if (pt_prev) {
3566 ret = deliver_skb(skb, pt_prev, orig_dev);
3567 pt_prev = NULL;
3568 }
3569 switch (rx_handler(&skb)) {
3570 case RX_HANDLER_CONSUMED:
3571 ret = NET_RX_SUCCESS;
3572 goto unlock;
3573 case RX_HANDLER_ANOTHER:
3574 goto another_round;
3575 case RX_HANDLER_EXACT:
3576 deliver_exact = true;
3577 case RX_HANDLER_PASS:
3578 break;
3579 default:
3580 BUG();
3581 }
3582 }
3583
3584 if (unlikely(vlan_tx_tag_present(skb))) {
3585 if (vlan_tx_tag_get_id(skb))
3586 skb->pkt_type = PACKET_OTHERHOST;
3587
3588
3589
3590
3591 skb->vlan_tci = 0;
3592 }
3593
3594
3595 null_or_dev = deliver_exact ? skb->dev : NULL;
3596
3597 type = skb->protocol;
3598 list_for_each_entry_rcu(ptype,
3599 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3600 if (ptype->type == type &&
3601 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3602 ptype->dev == orig_dev)) {
3603 if (pt_prev)
3604 ret = deliver_skb(skb, pt_prev, orig_dev);
3605 pt_prev = ptype;
3606 }
3607 }
3608
3609 if (pt_prev) {
3610 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3611 goto drop;
3612 else
3613 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3614 } else {
3615drop:
3616 atomic_long_inc(&skb->dev->rx_dropped);
3617 kfree_skb(skb);
3618
3619
3620
3621 ret = NET_RX_DROP;
3622 }
3623
3624unlock:
3625 rcu_read_unlock();
3626out:
3627 return ret;
3628}
3629
3630static int __netif_receive_skb(struct sk_buff *skb)
3631{
3632 int ret;
3633
3634 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3635 unsigned long pflags = current->flags;
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645
3646 current->flags |= PF_MEMALLOC;
3647 ret = __netif_receive_skb_core(skb, true);
3648 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3649 } else
3650 ret = __netif_receive_skb_core(skb, false);
3651
3652 return ret;
3653}
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669
3670int netif_receive_skb(struct sk_buff *skb)
3671{
3672 net_timestamp_check(netdev_tstamp_prequeue, skb);
3673
3674 if (skb_defer_rx_timestamp(skb))
3675 return NET_RX_SUCCESS;
3676
3677#ifdef CONFIG_RPS
3678 if (static_key_false(&rps_needed)) {
3679 struct rps_dev_flow voidflow, *rflow = &voidflow;
3680 int cpu, ret;
3681
3682 rcu_read_lock();
3683
3684 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3685
3686 if (cpu >= 0) {
3687 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3688 rcu_read_unlock();
3689 return ret;
3690 }
3691 rcu_read_unlock();
3692 }
3693#endif
3694 return __netif_receive_skb(skb);
3695}
3696EXPORT_SYMBOL(netif_receive_skb);
3697
3698
3699
3700
3701static void flush_backlog(void *arg)
3702{
3703 struct net_device *dev = arg;
3704 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3705 struct sk_buff *skb, *tmp;
3706
3707 rps_lock(sd);
3708 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3709 if (skb->dev == dev) {
3710 __skb_unlink(skb, &sd->input_pkt_queue);
3711 kfree_skb(skb);
3712 input_queue_head_incr(sd);
3713 }
3714 }
3715 rps_unlock(sd);
3716
3717 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3718 if (skb->dev == dev) {
3719 __skb_unlink(skb, &sd->process_queue);
3720 kfree_skb(skb);
3721 input_queue_head_incr(sd);
3722 }
3723 }
3724}
3725
3726static int napi_gro_complete(struct sk_buff *skb)
3727{
3728 struct packet_offload *ptype;
3729 __be16 type = skb->protocol;
3730 struct list_head *head = &offload_base;
3731 int err = -ENOENT;
3732
3733 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3734
3735 if (NAPI_GRO_CB(skb)->count == 1) {
3736 skb_shinfo(skb)->gso_size = 0;
3737 goto out;
3738 }
3739
3740 rcu_read_lock();
3741 list_for_each_entry_rcu(ptype, head, list) {
3742 if (ptype->type != type || !ptype->callbacks.gro_complete)
3743 continue;
3744
3745 err = ptype->callbacks.gro_complete(skb);
3746 break;
3747 }
3748 rcu_read_unlock();
3749
3750 if (err) {
3751 WARN_ON(&ptype->list == head);
3752 kfree_skb(skb);
3753 return NET_RX_SUCCESS;
3754 }
3755
3756out:
3757 return netif_receive_skb(skb);
3758}
3759
3760
3761
3762
3763
3764void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3765{
3766 struct sk_buff *skb, *prev = NULL;
3767
3768
3769 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3770 skb->prev = prev;
3771 prev = skb;
3772 }
3773
3774 for (skb = prev; skb; skb = prev) {
3775 skb->next = NULL;
3776
3777 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3778 return;
3779
3780 prev = skb->prev;
3781 napi_gro_complete(skb);
3782 napi->gro_count--;
3783 }
3784
3785 napi->gro_list = NULL;
3786}
3787EXPORT_SYMBOL(napi_gro_flush);
3788
3789static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3790{
3791 struct sk_buff *p;
3792 unsigned int maclen = skb->dev->hard_header_len;
3793
3794 for (p = napi->gro_list; p; p = p->next) {
3795 unsigned long diffs;
3796
3797 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3798 diffs |= p->vlan_tci ^ skb->vlan_tci;
3799 if (maclen == ETH_HLEN)
3800 diffs |= compare_ether_header(skb_mac_header(p),
3801 skb_gro_mac_header(skb));
3802 else if (!diffs)
3803 diffs = memcmp(skb_mac_header(p),
3804 skb_gro_mac_header(skb),
3805 maclen);
3806 NAPI_GRO_CB(p)->same_flow = !diffs;
3807 NAPI_GRO_CB(p)->flush = 0;
3808 }
3809}
3810
3811static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3812{
3813 struct sk_buff **pp = NULL;
3814 struct packet_offload *ptype;
3815 __be16 type = skb->protocol;
3816 struct list_head *head = &offload_base;
3817 int same_flow;
3818 enum gro_result ret;
3819
3820 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3821 goto normal;
3822
3823 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3824 goto normal;
3825
3826 gro_list_prepare(napi, skb);
3827
3828 rcu_read_lock();
3829 list_for_each_entry_rcu(ptype, head, list) {
3830 if (ptype->type != type || !ptype->callbacks.gro_receive)
3831 continue;
3832
3833 skb_set_network_header(skb, skb_gro_offset(skb));
3834 skb_reset_mac_len(skb);
3835 NAPI_GRO_CB(skb)->same_flow = 0;
3836 NAPI_GRO_CB(skb)->flush = 0;
3837 NAPI_GRO_CB(skb)->free = 0;
3838
3839 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3840 break;
3841 }
3842 rcu_read_unlock();
3843
3844 if (&ptype->list == head)
3845 goto normal;
3846
3847 same_flow = NAPI_GRO_CB(skb)->same_flow;
3848 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3849
3850 if (pp) {
3851 struct sk_buff *nskb = *pp;
3852
3853 *pp = nskb->next;
3854 nskb->next = NULL;
3855 napi_gro_complete(nskb);
3856 napi->gro_count--;
3857 }
3858
3859 if (same_flow)
3860 goto ok;
3861
3862 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3863 goto normal;
3864
3865 napi->gro_count++;
3866 NAPI_GRO_CB(skb)->count = 1;
3867 NAPI_GRO_CB(skb)->age = jiffies;
3868 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3869 skb->next = napi->gro_list;
3870 napi->gro_list = skb;
3871 ret = GRO_HELD;
3872
3873pull:
3874 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3875 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3876
3877 BUG_ON(skb->end - skb->tail < grow);
3878
3879 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3880
3881 skb->tail += grow;
3882 skb->data_len -= grow;
3883
3884 skb_shinfo(skb)->frags[0].page_offset += grow;
3885 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3886
3887 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3888 skb_frag_unref(skb, 0);
3889 memmove(skb_shinfo(skb)->frags,
3890 skb_shinfo(skb)->frags + 1,
3891 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3892 }
3893 }
3894
3895ok:
3896 return ret;
3897
3898normal:
3899 ret = GRO_NORMAL;
3900 goto pull;
3901}
3902
3903
3904static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3905{
3906 switch (ret) {
3907 case GRO_NORMAL:
3908 if (netif_receive_skb(skb))
3909 ret = GRO_DROP;
3910 break;
3911
3912 case GRO_DROP:
3913 kfree_skb(skb);
3914 break;
3915
3916 case GRO_MERGED_FREE:
3917 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3918 kmem_cache_free(skbuff_head_cache, skb);
3919 else
3920 __kfree_skb(skb);
3921 break;
3922
3923 case GRO_HELD:
3924 case GRO_MERGED:
3925 break;
3926 }
3927
3928 return ret;
3929}
3930
3931static void skb_gro_reset_offset(struct sk_buff *skb)
3932{
3933 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3934 const skb_frag_t *frag0 = &pinfo->frags[0];
3935
3936 NAPI_GRO_CB(skb)->data_offset = 0;
3937 NAPI_GRO_CB(skb)->frag0 = NULL;
3938 NAPI_GRO_CB(skb)->frag0_len = 0;
3939
3940 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3941 pinfo->nr_frags &&
3942 !PageHighMem(skb_frag_page(frag0))) {
3943 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3944 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3945 }
3946}
3947
3948gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3949{
3950 skb_gro_reset_offset(skb);
3951
3952 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3953}
3954EXPORT_SYMBOL(napi_gro_receive);
3955
3956static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3957{
3958 __skb_pull(skb, skb_headlen(skb));
3959
3960 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3961 skb->vlan_tci = 0;
3962 skb->dev = napi->dev;
3963 skb->skb_iif = 0;
3964
3965 napi->skb = skb;
3966}
3967
3968struct sk_buff *napi_get_frags(struct napi_struct *napi)
3969{
3970 struct sk_buff *skb = napi->skb;
3971
3972 if (!skb) {
3973 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3974 if (skb)
3975 napi->skb = skb;
3976 }
3977 return skb;
3978}
3979EXPORT_SYMBOL(napi_get_frags);
3980
3981static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3982 gro_result_t ret)
3983{
3984 switch (ret) {
3985 case GRO_NORMAL:
3986 case GRO_HELD:
3987 skb->protocol = eth_type_trans(skb, skb->dev);
3988
3989 if (ret == GRO_HELD)
3990 skb_gro_pull(skb, -ETH_HLEN);
3991 else if (netif_receive_skb(skb))
3992 ret = GRO_DROP;
3993 break;
3994
3995 case GRO_DROP:
3996 case GRO_MERGED_FREE:
3997 napi_reuse_skb(napi, skb);
3998 break;
3999
4000 case GRO_MERGED:
4001 break;
4002 }
4003
4004 return ret;
4005}
4006
4007static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4008{
4009 struct sk_buff *skb = napi->skb;
4010 struct ethhdr *eth;
4011 unsigned int hlen;
4012 unsigned int off;
4013
4014 napi->skb = NULL;
4015
4016 skb_reset_mac_header(skb);
4017 skb_gro_reset_offset(skb);
4018
4019 off = skb_gro_offset(skb);
4020 hlen = off + sizeof(*eth);
4021 eth = skb_gro_header_fast(skb, off);
4022 if (skb_gro_header_hard(skb, hlen)) {
4023 eth = skb_gro_header_slow(skb, hlen, off);
4024 if (unlikely(!eth)) {
4025 napi_reuse_skb(napi, skb);
4026 skb = NULL;
4027 goto out;
4028 }
4029 }
4030
4031 skb_gro_pull(skb, sizeof(*eth));
4032
4033
4034
4035
4036
4037 skb->protocol = eth->h_proto;
4038
4039out:
4040 return skb;
4041}
4042
4043gro_result_t napi_gro_frags(struct napi_struct *napi)
4044{
4045 struct sk_buff *skb = napi_frags_skb(napi);
4046
4047 if (!skb)
4048 return GRO_DROP;
4049
4050 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4051}
4052EXPORT_SYMBOL(napi_gro_frags);
4053
4054
4055
4056
4057
4058static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4059{
4060#ifdef CONFIG_RPS
4061 struct softnet_data *remsd = sd->rps_ipi_list;
4062
4063 if (remsd) {
4064 sd->rps_ipi_list = NULL;
4065
4066 local_irq_enable();
4067
4068
4069 while (remsd) {
4070 struct softnet_data *next = remsd->rps_ipi_next;
4071
4072 if (cpu_online(remsd->cpu))
4073 __smp_call_function_single(remsd->cpu,
4074 &remsd->csd, 0);
4075 remsd = next;
4076 }
4077 } else
4078#endif
4079 local_irq_enable();
4080}
4081
4082static int process_backlog(struct napi_struct *napi, int quota)
4083{
4084 int work = 0;
4085 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4086
4087#ifdef CONFIG_RPS
4088
4089
4090
4091 if (sd->rps_ipi_list) {
4092 local_irq_disable();
4093 net_rps_action_and_irq_enable(sd);
4094 }
4095#endif
4096 napi->weight = weight_p;
4097 local_irq_disable();
4098 while (work < quota) {
4099 struct sk_buff *skb;
4100 unsigned int qlen;
4101
4102 while ((skb = __skb_dequeue(&sd->process_queue))) {
4103 local_irq_enable();
4104 __netif_receive_skb(skb);
4105 local_irq_disable();
4106 input_queue_head_incr(sd);
4107 if (++work >= quota) {
4108 local_irq_enable();
4109 return work;
4110 }
4111 }
4112
4113 rps_lock(sd);
4114 qlen = skb_queue_len(&sd->input_pkt_queue);
4115 if (qlen)
4116 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4117 &sd->process_queue);
4118
4119 if (qlen < quota - work) {
4120
4121
4122
4123
4124
4125
4126
4127 list_del(&napi->poll_list);
4128 napi->state = 0;
4129
4130 quota = work + qlen;
4131 }
4132 rps_unlock(sd);
4133 }
4134 local_irq_enable();
4135
4136 return work;
4137}
4138
4139
4140
4141
4142
4143
4144
4145void __napi_schedule(struct napi_struct *n)
4146{
4147 unsigned long flags;
4148
4149 local_irq_save(flags);
4150 ____napi_schedule(&__get_cpu_var(softnet_data), n);
4151 local_irq_restore(flags);
4152}
4153EXPORT_SYMBOL(__napi_schedule);
4154
4155void __napi_complete(struct napi_struct *n)
4156{
4157 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4158 BUG_ON(n->gro_list);
4159
4160 list_del(&n->poll_list);
4161 smp_mb__before_clear_bit();
4162 clear_bit(NAPI_STATE_SCHED, &n->state);
4163}
4164EXPORT_SYMBOL(__napi_complete);
4165
4166void napi_complete(struct napi_struct *n)
4167{
4168 unsigned long flags;
4169
4170
4171
4172
4173
4174 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4175 return;
4176
4177 napi_gro_flush(n, false);
4178 local_irq_save(flags);
4179 __napi_complete(n);
4180 local_irq_restore(flags);
4181}
4182EXPORT_SYMBOL(napi_complete);
4183
4184
4185struct napi_struct *napi_by_id(unsigned int napi_id)
4186{
4187 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4188 struct napi_struct *napi;
4189
4190 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4191 if (napi->napi_id == napi_id)
4192 return napi;
4193
4194 return NULL;
4195}
4196EXPORT_SYMBOL_GPL(napi_by_id);
4197
4198void napi_hash_add(struct napi_struct *napi)
4199{
4200 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4201
4202 spin_lock(&napi_hash_lock);
4203
4204
4205
4206
4207 napi->napi_id = 0;
4208 while (!napi->napi_id) {
4209 napi->napi_id = ++napi_gen_id;
4210 if (napi_by_id(napi->napi_id))
4211 napi->napi_id = 0;
4212 }
4213
4214 hlist_add_head_rcu(&napi->napi_hash_node,
4215 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4216
4217 spin_unlock(&napi_hash_lock);
4218 }
4219}
4220EXPORT_SYMBOL_GPL(napi_hash_add);
4221
4222
4223
4224
4225void napi_hash_del(struct napi_struct *napi)
4226{
4227 spin_lock(&napi_hash_lock);
4228
4229 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4230 hlist_del_rcu(&napi->napi_hash_node);
4231
4232 spin_unlock(&napi_hash_lock);
4233}
4234EXPORT_SYMBOL_GPL(napi_hash_del);
4235
4236void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4237 int (*poll)(struct napi_struct *, int), int weight)
4238{
4239 INIT_LIST_HEAD(&napi->poll_list);
4240 napi->gro_count = 0;
4241 napi->gro_list = NULL;
4242 napi->skb = NULL;
4243 napi->poll = poll;
4244 if (weight > NAPI_POLL_WEIGHT)
4245 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4246 weight, dev->name);
4247 napi->weight = weight;
4248 list_add(&napi->dev_list, &dev->napi_list);
4249 napi->dev = dev;
4250#ifdef CONFIG_NETPOLL
4251 spin_lock_init(&napi->poll_lock);
4252 napi->poll_owner = -1;
4253#endif
4254 set_bit(NAPI_STATE_SCHED, &napi->state);
4255}
4256EXPORT_SYMBOL(netif_napi_add);
4257
4258void netif_napi_del(struct napi_struct *napi)
4259{
4260 struct sk_buff *skb, *next;
4261
4262 list_del_init(&napi->dev_list);
4263 napi_free_frags(napi);
4264
4265 for (skb = napi->gro_list; skb; skb = next) {
4266 next = skb->next;
4267 skb->next = NULL;
4268 kfree_skb(skb);
4269 }
4270
4271 napi->gro_list = NULL;
4272 napi->gro_count = 0;
4273}
4274EXPORT_SYMBOL(netif_napi_del);
4275
4276static void net_rx_action(struct softirq_action *h)
4277{
4278 struct softnet_data *sd = &__get_cpu_var(softnet_data);
4279 unsigned long time_limit = jiffies + 2;
4280 int budget = netdev_budget;
4281 void *have;
4282
4283 local_irq_disable();
4284
4285 while (!list_empty(&sd->poll_list)) {
4286 struct napi_struct *n;
4287 int work, weight;
4288
4289
4290
4291
4292
4293 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
4294 goto softnet_break;
4295
4296 local_irq_enable();
4297
4298
4299
4300
4301
4302
4303 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4304
4305 have = netpoll_poll_lock(n);
4306
4307 weight = n->weight;
4308
4309
4310
4311
4312
4313
4314
4315 work = 0;
4316 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4317 work = n->poll(n, weight);
4318 trace_napi_poll(n);
4319 }
4320
4321 WARN_ON_ONCE(work > weight);
4322
4323 budget -= work;
4324
4325 local_irq_disable();
4326
4327
4328
4329
4330
4331
4332 if (unlikely(work == weight)) {
4333 if (unlikely(napi_disable_pending(n))) {
4334 local_irq_enable();
4335 napi_complete(n);
4336 local_irq_disable();
4337 } else {
4338 if (n->gro_list) {
4339
4340
4341
4342 local_irq_enable();
4343 napi_gro_flush(n, HZ >= 1000);
4344 local_irq_disable();
4345 }
4346 list_move_tail(&n->poll_list, &sd->poll_list);
4347 }
4348 }
4349
4350 netpoll_poll_unlock(have);
4351 }
4352out:
4353 net_rps_action_and_irq_enable(sd);
4354
4355#ifdef CONFIG_NET_DMA
4356
4357
4358
4359
4360 dma_issue_pending_all();
4361#endif
4362
4363 return;
4364
4365softnet_break:
4366 sd->time_squeeze++;
4367 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4368 goto out;
4369}
4370
4371struct netdev_adjacent {
4372 struct net_device *dev;
4373
4374
4375 bool master;
4376
4377
4378 bool neighbour;
4379
4380
4381 u16 ref_nr;
4382
4383 struct list_head list;
4384 struct rcu_head rcu;
4385};
4386
4387static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
4388 struct net_device *adj_dev,
4389 bool upper)
4390{
4391 struct netdev_adjacent *adj;
4392 struct list_head *dev_list;
4393
4394 dev_list = upper ? &dev->upper_dev_list : &dev->lower_dev_list;
4395
4396 list_for_each_entry(adj, dev_list, list) {
4397 if (adj->dev == adj_dev)
4398 return adj;
4399 }
4400 return NULL;
4401}
4402
4403static inline struct netdev_adjacent *__netdev_find_upper(struct net_device *dev,
4404 struct net_device *udev)
4405{
4406 return __netdev_find_adj(dev, udev, true);
4407}
4408
4409static inline struct netdev_adjacent *__netdev_find_lower(struct net_device *dev,
4410 struct net_device *ldev)
4411{
4412 return __netdev_find_adj(dev, ldev, false);
4413}
4414
4415
4416
4417
4418
4419
4420
4421
4422
4423
4424bool netdev_has_upper_dev(struct net_device *dev,
4425 struct net_device *upper_dev)
4426{
4427 ASSERT_RTNL();
4428
4429 return __netdev_find_upper(dev, upper_dev);
4430}
4431EXPORT_SYMBOL(netdev_has_upper_dev);
4432
4433
4434
4435
4436
4437
4438
4439
4440bool netdev_has_any_upper_dev(struct net_device *dev)
4441{
4442 ASSERT_RTNL();
4443
4444 return !list_empty(&dev->upper_dev_list);
4445}
4446EXPORT_SYMBOL(netdev_has_any_upper_dev);
4447
4448
4449
4450
4451
4452
4453
4454
4455struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4456{
4457 struct netdev_adjacent *upper;
4458
4459 ASSERT_RTNL();
4460
4461 if (list_empty(&dev->upper_dev_list))
4462 return NULL;
4463
4464 upper = list_first_entry(&dev->upper_dev_list,
4465 struct netdev_adjacent, list);
4466 if (likely(upper->master))
4467 return upper->dev;
4468 return NULL;
4469}
4470EXPORT_SYMBOL(netdev_master_upper_dev_get);
4471
4472
4473
4474
4475
4476
4477
4478
4479struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
4480 struct list_head **iter)
4481{
4482 struct netdev_adjacent *upper;
4483
4484 WARN_ON_ONCE(!rcu_read_lock_held());
4485
4486 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
4487
4488 if (&upper->list == &dev->upper_dev_list)
4489 return NULL;
4490
4491 *iter = &upper->list;
4492
4493 return upper->dev;
4494}
4495EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
4496
4497
4498
4499
4500
4501
4502
4503
4504struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4505{
4506 struct netdev_adjacent *upper;
4507
4508 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4509 struct netdev_adjacent, list);
4510 if (upper && likely(upper->master))
4511 return upper->dev;
4512 return NULL;
4513}
4514EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4515
4516static int __netdev_adjacent_dev_insert(struct net_device *dev,
4517 struct net_device *adj_dev,
4518 bool neighbour, bool master,
4519 bool upper)
4520{
4521 struct netdev_adjacent *adj;
4522
4523 adj = __netdev_find_adj(dev, adj_dev, upper);
4524
4525 if (adj) {
4526 BUG_ON(neighbour);
4527 adj->ref_nr++;
4528 return 0;
4529 }
4530
4531 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
4532 if (!adj)
4533 return -ENOMEM;
4534
4535 adj->dev = adj_dev;
4536 adj->master = master;
4537 adj->neighbour = neighbour;
4538 adj->ref_nr = 1;
4539
4540 dev_hold(adj_dev);
4541 pr_debug("dev_hold for %s, because of %s link added from %s to %s\n",
4542 adj_dev->name, upper ? "upper" : "lower", dev->name,
4543 adj_dev->name);
4544
4545 if (!upper) {
4546 list_add_tail_rcu(&adj->list, &dev->lower_dev_list);
4547 return 0;
4548 }
4549
4550
4551 if (master)
4552 list_add_rcu(&adj->list, &dev->upper_dev_list);
4553 else
4554 list_add_tail_rcu(&adj->list, &dev->upper_dev_list);
4555
4556 return 0;
4557}
4558
4559static inline int __netdev_upper_dev_insert(struct net_device *dev,
4560 struct net_device *udev,
4561 bool master, bool neighbour)
4562{
4563 return __netdev_adjacent_dev_insert(dev, udev, neighbour, master,
4564 true);
4565}
4566
4567static inline int __netdev_lower_dev_insert(struct net_device *dev,
4568 struct net_device *ldev,
4569 bool neighbour)
4570{
4571 return __netdev_adjacent_dev_insert(dev, ldev, neighbour, false,
4572 false);
4573}
4574
4575void __netdev_adjacent_dev_remove(struct net_device *dev,
4576 struct net_device *adj_dev, bool upper)
4577{
4578 struct netdev_adjacent *adj;
4579
4580 if (upper)
4581 adj = __netdev_find_upper(dev, adj_dev);
4582 else
4583 adj = __netdev_find_lower(dev, adj_dev);
4584
4585 if (!adj)
4586 BUG();
4587
4588 if (adj->ref_nr > 1) {
4589 adj->ref_nr--;
4590 return;
4591 }
4592
4593 list_del_rcu(&adj->list);
4594 pr_debug("dev_put for %s, because of %s link removed from %s to %s\n",
4595 adj_dev->name, upper ? "upper" : "lower", dev->name,
4596 adj_dev->name);
4597 dev_put(adj_dev);
4598 kfree_rcu(adj, rcu);
4599}
4600
4601static inline void __netdev_upper_dev_remove(struct net_device *dev,
4602 struct net_device *udev)
4603{
4604 return __netdev_adjacent_dev_remove(dev, udev, true);
4605}
4606
4607static inline void __netdev_lower_dev_remove(struct net_device *dev,
4608 struct net_device *ldev)
4609{
4610 return __netdev_adjacent_dev_remove(dev, ldev, false);
4611}
4612
4613int __netdev_adjacent_dev_insert_link(struct net_device *dev,
4614 struct net_device *upper_dev,
4615 bool master, bool neighbour)
4616{
4617 int ret;
4618
4619 ret = __netdev_upper_dev_insert(dev, upper_dev, master, neighbour);
4620 if (ret)
4621 return ret;
4622
4623 ret = __netdev_lower_dev_insert(upper_dev, dev, neighbour);
4624 if (ret) {
4625 __netdev_upper_dev_remove(dev, upper_dev);
4626 return ret;
4627 }
4628
4629 return 0;
4630}
4631
4632static inline int __netdev_adjacent_dev_link(struct net_device *dev,
4633 struct net_device *udev)
4634{
4635 return __netdev_adjacent_dev_insert_link(dev, udev, false, false);
4636}
4637
4638static inline int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
4639 struct net_device *udev,
4640 bool master)
4641{
4642 return __netdev_adjacent_dev_insert_link(dev, udev, master, true);
4643}
4644
4645void __netdev_adjacent_dev_unlink(struct net_device *dev,
4646 struct net_device *upper_dev)
4647{
4648 __netdev_upper_dev_remove(dev, upper_dev);
4649 __netdev_lower_dev_remove(upper_dev, dev);
4650}
4651
4652
4653static int __netdev_upper_dev_link(struct net_device *dev,
4654 struct net_device *upper_dev, bool master)
4655{
4656 struct netdev_adjacent *i, *j, *to_i, *to_j;
4657 int ret = 0;
4658
4659 ASSERT_RTNL();
4660
4661 if (dev == upper_dev)
4662 return -EBUSY;
4663
4664
4665 if (__netdev_find_upper(upper_dev, dev))
4666 return -EBUSY;
4667
4668 if (__netdev_find_upper(dev, upper_dev))
4669 return -EEXIST;
4670
4671 if (master && netdev_master_upper_dev_get(dev))
4672 return -EBUSY;
4673
4674 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, master);
4675 if (ret)
4676 return ret;
4677
4678
4679
4680
4681
4682
4683 list_for_each_entry(i, &dev->lower_dev_list, list) {
4684 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4685 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
4686 if (ret)
4687 goto rollback_mesh;
4688 }
4689 }
4690
4691
4692 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4693 ret = __netdev_adjacent_dev_link(dev, i->dev);
4694 if (ret)
4695 goto rollback_upper_mesh;
4696 }
4697
4698
4699 list_for_each_entry(i, &dev->lower_dev_list, list) {
4700 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
4701 if (ret)
4702 goto rollback_lower_mesh;
4703 }
4704
4705 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4706 return 0;
4707
4708rollback_lower_mesh:
4709 to_i = i;
4710 list_for_each_entry(i, &dev->lower_dev_list, list) {
4711 if (i == to_i)
4712 break;
4713 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4714 }
4715
4716 i = NULL;
4717
4718rollback_upper_mesh:
4719 to_i = i;
4720 list_for_each_entry(i, &upper_dev->upper_dev_list, list) {
4721 if (i == to_i)
4722 break;
4723 __netdev_adjacent_dev_unlink(dev, i->dev);
4724 }
4725
4726 i = j = NULL;
4727
4728rollback_mesh:
4729 to_i = i;
4730 to_j = j;
4731 list_for_each_entry(i, &dev->lower_dev_list, list) {
4732 list_for_each_entry(j, &upper_dev->upper_dev_list, list) {
4733 if (i == to_i && j == to_j)
4734 break;
4735 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4736 }
4737 if (i == to_i)
4738 break;
4739 }
4740
4741 __netdev_adjacent_dev_unlink(dev, upper_dev);
4742
4743 return ret;
4744}
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754
4755
4756int netdev_upper_dev_link(struct net_device *dev,
4757 struct net_device *upper_dev)
4758{
4759 return __netdev_upper_dev_link(dev, upper_dev, false);
4760}
4761EXPORT_SYMBOL(netdev_upper_dev_link);
4762
4763
4764
4765
4766
4767
4768
4769
4770
4771
4772
4773
4774int netdev_master_upper_dev_link(struct net_device *dev,
4775 struct net_device *upper_dev)
4776{
4777 return __netdev_upper_dev_link(dev, upper_dev, true);
4778}
4779EXPORT_SYMBOL(netdev_master_upper_dev_link);
4780
4781
4782
4783
4784
4785
4786
4787
4788
4789void netdev_upper_dev_unlink(struct net_device *dev,
4790 struct net_device *upper_dev)
4791{
4792 struct netdev_adjacent *i, *j;
4793 ASSERT_RTNL();
4794
4795 __netdev_adjacent_dev_unlink(dev, upper_dev);
4796
4797
4798
4799
4800
4801 list_for_each_entry(i, &dev->lower_dev_list, list)
4802 list_for_each_entry(j, &upper_dev->upper_dev_list, list)
4803 __netdev_adjacent_dev_unlink(i->dev, j->dev);
4804
4805
4806
4807
4808 list_for_each_entry(i, &dev->lower_dev_list, list)
4809 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
4810
4811 list_for_each_entry(i, &upper_dev->upper_dev_list, list)
4812 __netdev_adjacent_dev_unlink(dev, i->dev);
4813
4814 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4815}
4816EXPORT_SYMBOL(netdev_upper_dev_unlink);
4817
4818static void dev_change_rx_flags(struct net_device *dev, int flags)
4819{
4820 const struct net_device_ops *ops = dev->netdev_ops;
4821
4822 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4823 ops->ndo_change_rx_flags(dev, flags);
4824}
4825
4826static int __dev_set_promiscuity(struct net_device *dev, int inc)
4827{
4828 unsigned int old_flags = dev->flags;
4829 kuid_t uid;
4830 kgid_t gid;
4831
4832 ASSERT_RTNL();
4833
4834 dev->flags |= IFF_PROMISC;
4835 dev->promiscuity += inc;
4836 if (dev->promiscuity == 0) {
4837
4838
4839
4840
4841 if (inc < 0)
4842 dev->flags &= ~IFF_PROMISC;
4843 else {
4844 dev->promiscuity -= inc;
4845 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4846 dev->name);
4847 return -EOVERFLOW;
4848 }
4849 }
4850 if (dev->flags != old_flags) {
4851 pr_info("device %s %s promiscuous mode\n",
4852 dev->name,
4853 dev->flags & IFF_PROMISC ? "entered" : "left");
4854 if (audit_enabled) {
4855 current_uid_gid(&uid, &gid);
4856 audit_log(current->audit_context, GFP_ATOMIC,
4857 AUDIT_ANOM_PROMISCUOUS,
4858 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4859 dev->name, (dev->flags & IFF_PROMISC),
4860 (old_flags & IFF_PROMISC),
4861 from_kuid(&init_user_ns, audit_get_loginuid(current)),
4862 from_kuid(&init_user_ns, uid),
4863 from_kgid(&init_user_ns, gid),
4864 audit_get_sessionid(current));
4865 }
4866
4867 dev_change_rx_flags(dev, IFF_PROMISC);
4868 }
4869 return 0;
4870}
4871
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881
4882
4883int dev_set_promiscuity(struct net_device *dev, int inc)
4884{
4885 unsigned int old_flags = dev->flags;
4886 int err;
4887
4888 err = __dev_set_promiscuity(dev, inc);
4889 if (err < 0)
4890 return err;
4891 if (dev->flags != old_flags)
4892 dev_set_rx_mode(dev);
4893 return err;
4894}
4895EXPORT_SYMBOL(dev_set_promiscuity);
4896
4897
4898
4899
4900
4901
4902
4903
4904
4905
4906
4907
4908
4909
4910int dev_set_allmulti(struct net_device *dev, int inc)
4911{
4912 unsigned int old_flags = dev->flags;
4913
4914 ASSERT_RTNL();
4915
4916 dev->flags |= IFF_ALLMULTI;
4917 dev->allmulti += inc;
4918 if (dev->allmulti == 0) {
4919
4920
4921
4922
4923 if (inc < 0)
4924 dev->flags &= ~IFF_ALLMULTI;
4925 else {
4926 dev->allmulti -= inc;
4927 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4928 dev->name);
4929 return -EOVERFLOW;
4930 }
4931 }
4932 if (dev->flags ^ old_flags) {
4933 dev_change_rx_flags(dev, IFF_ALLMULTI);
4934 dev_set_rx_mode(dev);
4935 }
4936 return 0;
4937}
4938EXPORT_SYMBOL(dev_set_allmulti);
4939
4940
4941
4942
4943
4944
4945
4946void __dev_set_rx_mode(struct net_device *dev)
4947{
4948 const struct net_device_ops *ops = dev->netdev_ops;
4949
4950
4951 if (!(dev->flags&IFF_UP))
4952 return;
4953
4954 if (!netif_device_present(dev))
4955 return;
4956
4957 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4958
4959
4960
4961 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4962 __dev_set_promiscuity(dev, 1);
4963 dev->uc_promisc = true;
4964 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4965 __dev_set_promiscuity(dev, -1);
4966 dev->uc_promisc = false;
4967 }
4968 }
4969
4970 if (ops->ndo_set_rx_mode)
4971 ops->ndo_set_rx_mode(dev);
4972}
4973
4974void dev_set_rx_mode(struct net_device *dev)
4975{
4976 netif_addr_lock_bh(dev);
4977 __dev_set_rx_mode(dev);
4978 netif_addr_unlock_bh(dev);
4979}
4980
4981
4982
4983
4984
4985
4986
4987unsigned int dev_get_flags(const struct net_device *dev)
4988{
4989 unsigned int flags;
4990
4991 flags = (dev->flags & ~(IFF_PROMISC |
4992 IFF_ALLMULTI |
4993 IFF_RUNNING |
4994 IFF_LOWER_UP |
4995 IFF_DORMANT)) |
4996 (dev->gflags & (IFF_PROMISC |
4997 IFF_ALLMULTI));
4998
4999 if (netif_running(dev)) {
5000 if (netif_oper_up(dev))
5001 flags |= IFF_RUNNING;
5002 if (netif_carrier_ok(dev))
5003 flags |= IFF_LOWER_UP;
5004 if (netif_dormant(dev))
5005 flags |= IFF_DORMANT;
5006 }
5007
5008 return flags;
5009}
5010EXPORT_SYMBOL(dev_get_flags);
5011
5012int __dev_change_flags(struct net_device *dev, unsigned int flags)
5013{
5014 unsigned int old_flags = dev->flags;
5015 int ret;
5016
5017 ASSERT_RTNL();
5018
5019
5020
5021
5022
5023 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
5024 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
5025 IFF_AUTOMEDIA)) |
5026 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
5027 IFF_ALLMULTI));
5028
5029
5030
5031
5032
5033 if ((old_flags ^ flags) & IFF_MULTICAST)
5034 dev_change_rx_flags(dev, IFF_MULTICAST);
5035
5036 dev_set_rx_mode(dev);
5037
5038
5039
5040
5041
5042
5043
5044 ret = 0;
5045 if ((old_flags ^ flags) & IFF_UP) {
5046 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
5047
5048 if (!ret)
5049 dev_set_rx_mode(dev);
5050 }
5051
5052 if ((flags ^ dev->gflags) & IFF_PROMISC) {
5053 int inc = (flags & IFF_PROMISC) ? 1 : -1;
5054
5055 dev->gflags ^= IFF_PROMISC;
5056 dev_set_promiscuity(dev, inc);
5057 }
5058
5059
5060
5061
5062
5063 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
5064 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
5065
5066 dev->gflags ^= IFF_ALLMULTI;
5067 dev_set_allmulti(dev, inc);
5068 }
5069
5070 return ret;
5071}
5072
5073void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
5074{
5075 unsigned int changes = dev->flags ^ old_flags;
5076
5077 if (changes & IFF_UP) {
5078 if (dev->flags & IFF_UP)
5079 call_netdevice_notifiers(NETDEV_UP, dev);
5080 else
5081 call_netdevice_notifiers(NETDEV_DOWN, dev);
5082 }
5083
5084 if (dev->flags & IFF_UP &&
5085 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
5086 struct netdev_notifier_change_info change_info;
5087
5088 change_info.flags_changed = changes;
5089 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
5090 &change_info.info);
5091 }
5092}
5093
5094
5095
5096
5097
5098
5099
5100
5101
5102int dev_change_flags(struct net_device *dev, unsigned int flags)
5103{
5104 int ret;
5105 unsigned int changes, old_flags = dev->flags;
5106
5107 ret = __dev_change_flags(dev, flags);
5108 if (ret < 0)
5109 return ret;
5110
5111 changes = old_flags ^ dev->flags;
5112 if (changes)
5113 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
5114
5115 __dev_notify_flags(dev, old_flags);
5116 return ret;
5117}
5118EXPORT_SYMBOL(dev_change_flags);
5119
5120
5121
5122
5123
5124
5125
5126
5127int dev_set_mtu(struct net_device *dev, int new_mtu)
5128{
5129 const struct net_device_ops *ops = dev->netdev_ops;
5130 int err;
5131
5132 if (new_mtu == dev->mtu)
5133 return 0;
5134
5135
5136 if (new_mtu < 0)
5137 return -EINVAL;
5138
5139 if (!netif_device_present(dev))
5140 return -ENODEV;
5141
5142 err = 0;
5143 if (ops->ndo_change_mtu)
5144 err = ops->ndo_change_mtu(dev, new_mtu);
5145 else
5146 dev->mtu = new_mtu;
5147
5148 if (!err)
5149 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
5150 return err;
5151}
5152EXPORT_SYMBOL(dev_set_mtu);
5153
5154
5155
5156
5157
5158
5159void dev_set_group(struct net_device *dev, int new_group)
5160{
5161 dev->group = new_group;
5162}
5163EXPORT_SYMBOL(dev_set_group);
5164
5165
5166
5167
5168
5169
5170
5171
5172int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5173{
5174 const struct net_device_ops *ops = dev->netdev_ops;
5175 int err;
5176
5177 if (!ops->ndo_set_mac_address)
5178 return -EOPNOTSUPP;
5179 if (sa->sa_family != dev->type)
5180 return -EINVAL;
5181 if (!netif_device_present(dev))
5182 return -ENODEV;
5183 err = ops->ndo_set_mac_address(dev, sa);
5184 if (err)
5185 return err;
5186 dev->addr_assign_type = NET_ADDR_SET;
5187 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5188 add_device_randomness(dev->dev_addr, dev->addr_len);
5189 return 0;
5190}
5191EXPORT_SYMBOL(dev_set_mac_address);
5192
5193
5194
5195
5196
5197
5198
5199
5200int dev_change_carrier(struct net_device *dev, bool new_carrier)
5201{
5202 const struct net_device_ops *ops = dev->netdev_ops;
5203
5204 if (!ops->ndo_change_carrier)
5205 return -EOPNOTSUPP;
5206 if (!netif_device_present(dev))
5207 return -ENODEV;
5208 return ops->ndo_change_carrier(dev, new_carrier);
5209}
5210EXPORT_SYMBOL(dev_change_carrier);
5211
5212
5213
5214
5215
5216
5217
5218
5219int dev_get_phys_port_id(struct net_device *dev,
5220 struct netdev_phys_port_id *ppid)
5221{
5222 const struct net_device_ops *ops = dev->netdev_ops;
5223
5224 if (!ops->ndo_get_phys_port_id)
5225 return -EOPNOTSUPP;
5226 return ops->ndo_get_phys_port_id(dev, ppid);
5227}
5228EXPORT_SYMBOL(dev_get_phys_port_id);
5229
5230
5231
5232
5233
5234
5235
5236
5237
5238static int dev_new_index(struct net *net)
5239{
5240 int ifindex = net->ifindex;
5241 for (;;) {
5242 if (++ifindex <= 0)
5243 ifindex = 1;
5244 if (!__dev_get_by_index(net, ifindex))
5245 return net->ifindex = ifindex;
5246 }
5247}
5248
5249
5250static LIST_HEAD(net_todo_list);
5251static DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
5252
5253static void net_set_todo(struct net_device *dev)
5254{
5255 list_add_tail(&dev->todo_list, &net_todo_list);
5256 dev_net(dev)->dev_unreg_count++;
5257}
5258
5259static void rollback_registered_many(struct list_head *head)
5260{
5261 struct net_device *dev, *tmp;
5262
5263 BUG_ON(dev_boot_phase);
5264 ASSERT_RTNL();
5265
5266 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5267
5268
5269
5270
5271 if (dev->reg_state == NETREG_UNINITIALIZED) {
5272 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5273 dev->name, dev);
5274
5275 WARN_ON(1);
5276 list_del(&dev->unreg_list);
5277 continue;
5278 }
5279 dev->dismantle = true;
5280 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5281 }
5282
5283
5284 dev_close_many(head);
5285
5286 list_for_each_entry(dev, head, unreg_list) {
5287
5288 unlist_netdevice(dev);
5289
5290 dev->reg_state = NETREG_UNREGISTERING;
5291 }
5292
5293 synchronize_net();
5294
5295 list_for_each_entry(dev, head, unreg_list) {
5296
5297 dev_shutdown(dev);
5298
5299
5300
5301
5302
5303 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5304
5305 if (!dev->rtnl_link_ops ||
5306 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5307 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5308
5309
5310
5311
5312 dev_uc_flush(dev);
5313 dev_mc_flush(dev);
5314
5315 if (dev->netdev_ops->ndo_uninit)
5316 dev->netdev_ops->ndo_uninit(dev);
5317
5318
5319 WARN_ON(netdev_has_any_upper_dev(dev));
5320
5321
5322 netdev_unregister_kobject(dev);
5323#ifdef CONFIG_XPS
5324
5325 netif_reset_xps_queues_gt(dev, 0);
5326#endif
5327 }
5328
5329 synchronize_net();
5330
5331 list_for_each_entry(dev, head, unreg_list)
5332 dev_put(dev);
5333}
5334
5335static void rollback_registered(struct net_device *dev)
5336{
5337 LIST_HEAD(single);
5338
5339 list_add(&dev->unreg_list, &single);
5340 rollback_registered_many(&single);
5341 list_del(&single);
5342}
5343
5344static netdev_features_t netdev_fix_features(struct net_device *dev,
5345 netdev_features_t features)
5346{
5347
5348 if ((features & NETIF_F_HW_CSUM) &&
5349 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5350 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5351 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5352 }
5353
5354
5355 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5356 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5357 features &= ~NETIF_F_ALL_TSO;
5358 }
5359
5360 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
5361 !(features & NETIF_F_IP_CSUM)) {
5362 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
5363 features &= ~NETIF_F_TSO;
5364 features &= ~NETIF_F_TSO_ECN;
5365 }
5366
5367 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
5368 !(features & NETIF_F_IPV6_CSUM)) {
5369 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
5370 features &= ~NETIF_F_TSO6;
5371 }
5372
5373
5374 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5375 features &= ~NETIF_F_TSO_ECN;
5376
5377
5378 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5379 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5380 features &= ~NETIF_F_GSO;
5381 }
5382
5383
5384 if (features & NETIF_F_UFO) {
5385
5386 if (!((features & NETIF_F_GEN_CSUM) ||
5387 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5388 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5389 netdev_dbg(dev,
5390 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5391 features &= ~NETIF_F_UFO;
5392 }
5393
5394 if (!(features & NETIF_F_SG)) {
5395 netdev_dbg(dev,
5396 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5397 features &= ~NETIF_F_UFO;
5398 }
5399 }
5400
5401 return features;
5402}
5403
5404int __netdev_update_features(struct net_device *dev)
5405{
5406 netdev_features_t features;
5407 int err = 0;
5408
5409 ASSERT_RTNL();
5410
5411 features = netdev_get_wanted_features(dev);
5412
5413 if (dev->netdev_ops->ndo_fix_features)
5414 features = dev->netdev_ops->ndo_fix_features(dev, features);
5415
5416
5417 features = netdev_fix_features(dev, features);
5418
5419 if (dev->features == features)
5420 return 0;
5421
5422 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5423 &dev->features, &features);
5424
5425 if (dev->netdev_ops->ndo_set_features)
5426 err = dev->netdev_ops->ndo_set_features(dev, features);
5427
5428 if (unlikely(err < 0)) {
5429 netdev_err(dev,
5430 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5431 err, &features, &dev->features);
5432 return -1;
5433 }
5434
5435 if (!err)
5436 dev->features = features;
5437
5438 return 1;
5439}
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449void netdev_update_features(struct net_device *dev)
5450{
5451 if (__netdev_update_features(dev))
5452 netdev_features_change(dev);
5453}
5454EXPORT_SYMBOL(netdev_update_features);
5455
5456
5457
5458
5459
5460
5461
5462
5463
5464
5465
5466void netdev_change_features(struct net_device *dev)
5467{
5468 __netdev_update_features(dev);
5469 netdev_features_change(dev);
5470}
5471EXPORT_SYMBOL(netdev_change_features);
5472
5473
5474
5475
5476
5477
5478
5479
5480
5481
5482void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5483 struct net_device *dev)
5484{
5485 if (rootdev->operstate == IF_OPER_DORMANT)
5486 netif_dormant_on(dev);
5487 else
5488 netif_dormant_off(dev);
5489
5490 if (netif_carrier_ok(rootdev)) {
5491 if (!netif_carrier_ok(dev))
5492 netif_carrier_on(dev);
5493 } else {
5494 if (netif_carrier_ok(dev))
5495 netif_carrier_off(dev);
5496 }
5497}
5498EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5499
5500#ifdef CONFIG_RPS
5501static int netif_alloc_rx_queues(struct net_device *dev)
5502{
5503 unsigned int i, count = dev->num_rx_queues;
5504 struct netdev_rx_queue *rx;
5505
5506 BUG_ON(count < 1);
5507
5508 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5509 if (!rx)
5510 return -ENOMEM;
5511
5512 dev->_rx = rx;
5513
5514 for (i = 0; i < count; i++)
5515 rx[i].dev = dev;
5516 return 0;
5517}
5518#endif
5519
5520static void netdev_init_one_queue(struct net_device *dev,
5521 struct netdev_queue *queue, void *_unused)
5522{
5523
5524 spin_lock_init(&queue->_xmit_lock);
5525 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5526 queue->xmit_lock_owner = -1;
5527 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5528 queue->dev = dev;
5529#ifdef CONFIG_BQL
5530 dql_init(&queue->dql, HZ);
5531#endif
5532}
5533
5534static void netif_free_tx_queues(struct net_device *dev)
5535{
5536 if (is_vmalloc_addr(dev->_tx))
5537 vfree(dev->_tx);
5538 else
5539 kfree(dev->_tx);
5540}
5541
5542static int netif_alloc_netdev_queues(struct net_device *dev)
5543{
5544 unsigned int count = dev->num_tx_queues;
5545 struct netdev_queue *tx;
5546 size_t sz = count * sizeof(*tx);
5547
5548 BUG_ON(count < 1 || count > 0xffff);
5549
5550 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
5551 if (!tx) {
5552 tx = vzalloc(sz);
5553 if (!tx)
5554 return -ENOMEM;
5555 }
5556 dev->_tx = tx;
5557
5558 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5559 spin_lock_init(&dev->tx_global_lock);
5560
5561 return 0;
5562}
5563
5564
5565
5566
5567
5568
5569
5570
5571
5572
5573
5574
5575
5576
5577
5578
5579
5580
5581int register_netdevice(struct net_device *dev)
5582{
5583 int ret;
5584 struct net *net = dev_net(dev);
5585
5586 BUG_ON(dev_boot_phase);
5587 ASSERT_RTNL();
5588
5589 might_sleep();
5590
5591
5592 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5593 BUG_ON(!net);
5594
5595 spin_lock_init(&dev->addr_list_lock);
5596 netdev_set_addr_lockdep_class(dev);
5597
5598 dev->iflink = -1;
5599
5600 ret = dev_get_valid_name(net, dev, dev->name);
5601 if (ret < 0)
5602 goto out;
5603
5604
5605 if (dev->netdev_ops->ndo_init) {
5606 ret = dev->netdev_ops->ndo_init(dev);
5607 if (ret) {
5608 if (ret > 0)
5609 ret = -EIO;
5610 goto out;
5611 }
5612 }
5613
5614 if (((dev->hw_features | dev->features) &
5615 NETIF_F_HW_VLAN_CTAG_FILTER) &&
5616 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5617 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5618 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
5619 ret = -EINVAL;
5620 goto err_uninit;
5621 }
5622
5623 ret = -EBUSY;
5624 if (!dev->ifindex)
5625 dev->ifindex = dev_new_index(net);
5626 else if (__dev_get_by_index(net, dev->ifindex))
5627 goto err_uninit;
5628
5629 if (dev->iflink == -1)
5630 dev->iflink = dev->ifindex;
5631
5632
5633
5634
5635 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5636 dev->features |= NETIF_F_SOFT_FEATURES;
5637 dev->wanted_features = dev->features & dev->hw_features;
5638
5639
5640 if (!(dev->flags & IFF_LOOPBACK)) {
5641 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5642 if (dev->features & NETIF_F_ALL_CSUM) {
5643 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5644 dev->features |= NETIF_F_NOCACHE_COPY;
5645 }
5646 }
5647
5648
5649
5650 dev->vlan_features |= NETIF_F_HIGHDMA;
5651
5652
5653
5654 dev->hw_enc_features |= NETIF_F_SG;
5655
5656
5657
5658 dev->mpls_features |= NETIF_F_SG;
5659
5660 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5661 ret = notifier_to_errno(ret);
5662 if (ret)
5663 goto err_uninit;
5664
5665 ret = netdev_register_kobject(dev);
5666 if (ret)
5667 goto err_uninit;
5668 dev->reg_state = NETREG_REGISTERED;
5669
5670 __netdev_update_features(dev);
5671
5672
5673
5674
5675
5676
5677 set_bit(__LINK_STATE_PRESENT, &dev->state);
5678
5679 linkwatch_init_dev(dev);
5680
5681 dev_init_scheduler(dev);
5682 dev_hold(dev);
5683 list_netdevice(dev);
5684 add_device_randomness(dev->dev_addr, dev->addr_len);
5685
5686
5687
5688
5689
5690 if (dev->addr_assign_type == NET_ADDR_PERM)
5691 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
5692
5693
5694 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5695 ret = notifier_to_errno(ret);
5696 if (ret) {
5697 rollback_registered(dev);
5698 dev->reg_state = NETREG_UNREGISTERED;
5699 }
5700
5701
5702
5703
5704 if (!dev->rtnl_link_ops ||
5705 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5706 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5707
5708out:
5709 return ret;
5710
5711err_uninit:
5712 if (dev->netdev_ops->ndo_uninit)
5713 dev->netdev_ops->ndo_uninit(dev);
5714 goto out;
5715}
5716EXPORT_SYMBOL(register_netdevice);
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728int init_dummy_netdev(struct net_device *dev)
5729{
5730
5731
5732
5733
5734
5735 memset(dev, 0, sizeof(struct net_device));
5736
5737
5738
5739
5740 dev->reg_state = NETREG_DUMMY;
5741
5742
5743 INIT_LIST_HEAD(&dev->napi_list);
5744
5745
5746 set_bit(__LINK_STATE_PRESENT, &dev->state);
5747 set_bit(__LINK_STATE_START, &dev->state);
5748
5749
5750
5751
5752
5753
5754 return 0;
5755}
5756EXPORT_SYMBOL_GPL(init_dummy_netdev);
5757
5758
5759
5760
5761
5762
5763
5764
5765
5766
5767
5768
5769
5770
5771
5772int register_netdev(struct net_device *dev)
5773{
5774 int err;
5775
5776 rtnl_lock();
5777 err = register_netdevice(dev);
5778 rtnl_unlock();
5779 return err;
5780}
5781EXPORT_SYMBOL(register_netdev);
5782
5783int netdev_refcnt_read(const struct net_device *dev)
5784{
5785 int i, refcnt = 0;
5786
5787 for_each_possible_cpu(i)
5788 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5789 return refcnt;
5790}
5791EXPORT_SYMBOL(netdev_refcnt_read);
5792
5793
5794
5795
5796
5797
5798
5799
5800
5801
5802
5803
5804
5805static void netdev_wait_allrefs(struct net_device *dev)
5806{
5807 unsigned long rebroadcast_time, warning_time;
5808 int refcnt;
5809
5810 linkwatch_forget_dev(dev);
5811
5812 rebroadcast_time = warning_time = jiffies;
5813 refcnt = netdev_refcnt_read(dev);
5814
5815 while (refcnt != 0) {
5816 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5817 rtnl_lock();
5818
5819
5820 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5821
5822 __rtnl_unlock();
5823 rcu_barrier();
5824 rtnl_lock();
5825
5826 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5827 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5828 &dev->state)) {
5829
5830
5831
5832
5833
5834
5835 linkwatch_run_queue();
5836 }
5837
5838 __rtnl_unlock();
5839
5840 rebroadcast_time = jiffies;
5841 }
5842
5843 msleep(250);
5844
5845 refcnt = netdev_refcnt_read(dev);
5846
5847 if (time_after(jiffies, warning_time + 10 * HZ)) {
5848 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5849 dev->name, refcnt);
5850 warning_time = jiffies;
5851 }
5852 }
5853}
5854
5855
5856
5857
5858
5859
5860
5861
5862
5863
5864
5865
5866
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878
5879void netdev_run_todo(void)
5880{
5881 struct list_head list;
5882
5883
5884 list_replace_init(&net_todo_list, &list);
5885
5886 __rtnl_unlock();
5887
5888
5889
5890 if (!list_empty(&list))
5891 rcu_barrier();
5892
5893 while (!list_empty(&list)) {
5894 struct net_device *dev
5895 = list_first_entry(&list, struct net_device, todo_list);
5896 list_del(&dev->todo_list);
5897
5898 rtnl_lock();
5899 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5900 __rtnl_unlock();
5901
5902 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5903 pr_err("network todo '%s' but state %d\n",
5904 dev->name, dev->reg_state);
5905 dump_stack();
5906 continue;
5907 }
5908
5909 dev->reg_state = NETREG_UNREGISTERED;
5910
5911 on_each_cpu(flush_backlog, dev, 1);
5912
5913 netdev_wait_allrefs(dev);
5914
5915
5916 BUG_ON(netdev_refcnt_read(dev));
5917 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5918 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5919 WARN_ON(dev->dn_ptr);
5920
5921 if (dev->destructor)
5922 dev->destructor(dev);
5923
5924
5925 rtnl_lock();
5926 dev_net(dev)->dev_unreg_count--;
5927 __rtnl_unlock();
5928 wake_up(&netdev_unregistering_wq);
5929
5930
5931 kobject_put(&dev->dev.kobj);
5932 }
5933}
5934
5935
5936
5937
5938void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5939 const struct net_device_stats *netdev_stats)
5940{
5941#if BITS_PER_LONG == 64
5942 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5943 memcpy(stats64, netdev_stats, sizeof(*stats64));
5944#else
5945 size_t i, n = sizeof(*stats64) / sizeof(u64);
5946 const unsigned long *src = (const unsigned long *)netdev_stats;
5947 u64 *dst = (u64 *)stats64;
5948
5949 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5950 sizeof(*stats64) / sizeof(u64));
5951 for (i = 0; i < n; i++)
5952 dst[i] = src[i];
5953#endif
5954}
5955EXPORT_SYMBOL(netdev_stats_to_stats64);
5956
5957
5958
5959
5960
5961
5962
5963
5964
5965
5966
5967struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5968 struct rtnl_link_stats64 *storage)
5969{
5970 const struct net_device_ops *ops = dev->netdev_ops;
5971
5972 if (ops->ndo_get_stats64) {
5973 memset(storage, 0, sizeof(*storage));
5974 ops->ndo_get_stats64(dev, storage);
5975 } else if (ops->ndo_get_stats) {
5976 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5977 } else {
5978 netdev_stats_to_stats64(storage, &dev->stats);
5979 }
5980 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5981 return storage;
5982}
5983EXPORT_SYMBOL(dev_get_stats);
5984
5985struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5986{
5987 struct netdev_queue *queue = dev_ingress_queue(dev);
5988
5989#ifdef CONFIG_NET_CLS_ACT
5990 if (queue)
5991 return queue;
5992 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5993 if (!queue)
5994 return NULL;
5995 netdev_init_one_queue(dev, queue, NULL);
5996 queue->qdisc = &noop_qdisc;
5997 queue->qdisc_sleeping = &noop_qdisc;
5998 rcu_assign_pointer(dev->ingress_queue, queue);
5999#endif
6000 return queue;
6001}
6002
6003static const struct ethtool_ops default_ethtool_ops;
6004
6005void netdev_set_default_ethtool_ops(struct net_device *dev,
6006 const struct ethtool_ops *ops)
6007{
6008 if (dev->ethtool_ops == &default_ethtool_ops)
6009 dev->ethtool_ops = ops;
6010}
6011EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6012
6013
6014
6015
6016
6017
6018
6019
6020
6021
6022
6023
6024
6025struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6026 void (*setup)(struct net_device *),
6027 unsigned int txqs, unsigned int rxqs)
6028{
6029 struct net_device *dev;
6030 size_t alloc_size;
6031 struct net_device *p;
6032
6033 BUG_ON(strlen(name) >= sizeof(dev->name));
6034
6035 if (txqs < 1) {
6036 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
6037 return NULL;
6038 }
6039
6040#ifdef CONFIG_RPS
6041 if (rxqs < 1) {
6042 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6043 return NULL;
6044 }
6045#endif
6046
6047 alloc_size = sizeof(struct net_device);
6048 if (sizeof_priv) {
6049
6050 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6051 alloc_size += sizeof_priv;
6052 }
6053
6054 alloc_size += NETDEV_ALIGN - 1;
6055
6056 p = kzalloc(alloc_size, GFP_KERNEL);
6057 if (!p)
6058 return NULL;
6059
6060 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6061 dev->padded = (char *)dev - (char *)p;
6062
6063 dev->pcpu_refcnt = alloc_percpu(int);
6064 if (!dev->pcpu_refcnt)
6065 goto free_p;
6066
6067 if (dev_addr_init(dev))
6068 goto free_pcpu;
6069
6070 dev_mc_init(dev);
6071 dev_uc_init(dev);
6072
6073 dev_net_set(dev, &init_net);
6074
6075 dev->gso_max_size = GSO_MAX_SIZE;
6076 dev->gso_max_segs = GSO_MAX_SEGS;
6077
6078 INIT_LIST_HEAD(&dev->napi_list);
6079 INIT_LIST_HEAD(&dev->unreg_list);
6080 INIT_LIST_HEAD(&dev->link_watch_list);
6081 INIT_LIST_HEAD(&dev->upper_dev_list);
6082 INIT_LIST_HEAD(&dev->lower_dev_list);
6083 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6084 setup(dev);
6085
6086 dev->num_tx_queues = txqs;
6087 dev->real_num_tx_queues = txqs;
6088 if (netif_alloc_netdev_queues(dev))
6089 goto free_all;
6090
6091#ifdef CONFIG_RPS
6092 dev->num_rx_queues = rxqs;
6093 dev->real_num_rx_queues = rxqs;
6094 if (netif_alloc_rx_queues(dev))
6095 goto free_all;
6096#endif
6097
6098 strcpy(dev->name, name);
6099 dev->group = INIT_NETDEV_GROUP;
6100 if (!dev->ethtool_ops)
6101 dev->ethtool_ops = &default_ethtool_ops;
6102 return dev;
6103
6104free_all:
6105 free_netdev(dev);
6106 return NULL;
6107
6108free_pcpu:
6109 free_percpu(dev->pcpu_refcnt);
6110 netif_free_tx_queues(dev);
6111#ifdef CONFIG_RPS
6112 kfree(dev->_rx);
6113#endif
6114
6115free_p:
6116 kfree(p);
6117 return NULL;
6118}
6119EXPORT_SYMBOL(alloc_netdev_mqs);
6120
6121
6122
6123
6124
6125
6126
6127
6128
6129void free_netdev(struct net_device *dev)
6130{
6131 struct napi_struct *p, *n;
6132
6133 release_net(dev_net(dev));
6134
6135 netif_free_tx_queues(dev);
6136#ifdef CONFIG_RPS
6137 kfree(dev->_rx);
6138#endif
6139
6140 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6141
6142
6143 dev_addr_flush(dev);
6144
6145 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6146 netif_napi_del(p);
6147
6148 free_percpu(dev->pcpu_refcnt);
6149 dev->pcpu_refcnt = NULL;
6150
6151
6152 if (dev->reg_state == NETREG_UNINITIALIZED) {
6153 kfree((char *)dev - dev->padded);
6154 return;
6155 }
6156
6157 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6158 dev->reg_state = NETREG_RELEASED;
6159
6160
6161 put_device(&dev->dev);
6162}
6163EXPORT_SYMBOL(free_netdev);
6164
6165
6166
6167
6168
6169
6170
6171void synchronize_net(void)
6172{
6173 might_sleep();
6174 if (rtnl_is_locked())
6175 synchronize_rcu_expedited();
6176 else
6177 synchronize_rcu();
6178}
6179EXPORT_SYMBOL(synchronize_net);
6180
6181
6182
6183
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6195{
6196 ASSERT_RTNL();
6197
6198 if (head) {
6199 list_move_tail(&dev->unreg_list, head);
6200 } else {
6201 rollback_registered(dev);
6202
6203 net_set_todo(dev);
6204 }
6205}
6206EXPORT_SYMBOL(unregister_netdevice_queue);
6207
6208
6209
6210
6211
6212void unregister_netdevice_many(struct list_head *head)
6213{
6214 struct net_device *dev;
6215
6216 if (!list_empty(head)) {
6217 rollback_registered_many(head);
6218 list_for_each_entry(dev, head, unreg_list)
6219 net_set_todo(dev);
6220 }
6221}
6222EXPORT_SYMBOL(unregister_netdevice_many);
6223
6224
6225
6226
6227
6228
6229
6230
6231
6232
6233
6234
6235void unregister_netdev(struct net_device *dev)
6236{
6237 rtnl_lock();
6238 unregister_netdevice(dev);
6239 rtnl_unlock();
6240}
6241EXPORT_SYMBOL(unregister_netdev);
6242
6243
6244
6245
6246
6247
6248
6249
6250
6251
6252
6253
6254
6255
6256
6257int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6258{
6259 int err;
6260
6261 ASSERT_RTNL();
6262
6263
6264 err = -EINVAL;
6265 if (dev->features & NETIF_F_NETNS_LOCAL)
6266 goto out;
6267
6268
6269 if (dev->reg_state != NETREG_REGISTERED)
6270 goto out;
6271
6272
6273 err = 0;
6274 if (net_eq(dev_net(dev), net))
6275 goto out;
6276
6277
6278
6279
6280 err = -EEXIST;
6281 if (__dev_get_by_name(net, dev->name)) {
6282
6283 if (!pat)
6284 goto out;
6285 if (dev_get_valid_name(net, dev, pat) < 0)
6286 goto out;
6287 }
6288
6289
6290
6291
6292
6293
6294 dev_close(dev);
6295
6296
6297 err = -ENODEV;
6298 unlist_netdevice(dev);
6299
6300 synchronize_net();
6301
6302
6303 dev_shutdown(dev);
6304
6305
6306
6307
6308
6309
6310
6311
6312 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6313 rcu_barrier();
6314 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6315 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6316
6317
6318
6319
6320 dev_uc_flush(dev);
6321 dev_mc_flush(dev);
6322
6323
6324 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6325
6326
6327 dev_net_set(dev, net);
6328
6329
6330 if (__dev_get_by_index(net, dev->ifindex)) {
6331 int iflink = (dev->iflink == dev->ifindex);
6332 dev->ifindex = dev_new_index(net);
6333 if (iflink)
6334 dev->iflink = dev->ifindex;
6335 }
6336
6337
6338 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6339
6340
6341 err = device_rename(&dev->dev, dev->name);
6342 WARN_ON(err);
6343
6344
6345 list_netdevice(dev);
6346
6347
6348 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6349
6350
6351
6352
6353
6354 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6355
6356 synchronize_net();
6357 err = 0;
6358out:
6359 return err;
6360}
6361EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6362
6363static int dev_cpu_callback(struct notifier_block *nfb,
6364 unsigned long action,
6365 void *ocpu)
6366{
6367 struct sk_buff **list_skb;
6368 struct sk_buff *skb;
6369 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6370 struct softnet_data *sd, *oldsd;
6371
6372 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6373 return NOTIFY_OK;
6374
6375 local_irq_disable();
6376 cpu = smp_processor_id();
6377 sd = &per_cpu(softnet_data, cpu);
6378 oldsd = &per_cpu(softnet_data, oldcpu);
6379
6380
6381 list_skb = &sd->completion_queue;
6382 while (*list_skb)
6383 list_skb = &(*list_skb)->next;
6384
6385 *list_skb = oldsd->completion_queue;
6386 oldsd->completion_queue = NULL;
6387
6388
6389 if (oldsd->output_queue) {
6390 *sd->output_queue_tailp = oldsd->output_queue;
6391 sd->output_queue_tailp = oldsd->output_queue_tailp;
6392 oldsd->output_queue = NULL;
6393 oldsd->output_queue_tailp = &oldsd->output_queue;
6394 }
6395
6396 if (!list_empty(&oldsd->poll_list)) {
6397 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6398 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6399 }
6400
6401 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6402 local_irq_enable();
6403
6404
6405 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6406 netif_rx(skb);
6407 input_queue_head_incr(oldsd);
6408 }
6409 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6410 netif_rx(skb);
6411 input_queue_head_incr(oldsd);
6412 }
6413
6414 return NOTIFY_OK;
6415}
6416
6417
6418
6419
6420
6421
6422
6423
6424
6425
6426
6427
6428netdev_features_t netdev_increment_features(netdev_features_t all,
6429 netdev_features_t one, netdev_features_t mask)
6430{
6431 if (mask & NETIF_F_GEN_CSUM)
6432 mask |= NETIF_F_ALL_CSUM;
6433 mask |= NETIF_F_VLAN_CHALLENGED;
6434
6435 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6436 all &= one | ~NETIF_F_ALL_FOR_ALL;
6437
6438
6439 if (all & NETIF_F_GEN_CSUM)
6440 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6441
6442 return all;
6443}
6444EXPORT_SYMBOL(netdev_increment_features);
6445
6446static struct hlist_head * __net_init netdev_create_hash(void)
6447{
6448 int i;
6449 struct hlist_head *hash;
6450
6451 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6452 if (hash != NULL)
6453 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6454 INIT_HLIST_HEAD(&hash[i]);
6455
6456 return hash;
6457}
6458
6459
6460static int __net_init netdev_init(struct net *net)
6461{
6462 if (net != &init_net)
6463 INIT_LIST_HEAD(&net->dev_base_head);
6464
6465 net->dev_name_head = netdev_create_hash();
6466 if (net->dev_name_head == NULL)
6467 goto err_name;
6468
6469 net->dev_index_head = netdev_create_hash();
6470 if (net->dev_index_head == NULL)
6471 goto err_idx;
6472
6473 return 0;
6474
6475err_idx:
6476 kfree(net->dev_name_head);
6477err_name:
6478 return -ENOMEM;
6479}
6480
6481
6482
6483
6484
6485
6486
6487const char *netdev_drivername(const struct net_device *dev)
6488{
6489 const struct device_driver *driver;
6490 const struct device *parent;
6491 const char *empty = "";
6492
6493 parent = dev->dev.parent;
6494 if (!parent)
6495 return empty;
6496
6497 driver = parent->driver;
6498 if (driver && driver->name)
6499 return driver->name;
6500 return empty;
6501}
6502
6503static int __netdev_printk(const char *level, const struct net_device *dev,
6504 struct va_format *vaf)
6505{
6506 int r;
6507
6508 if (dev && dev->dev.parent) {
6509 r = dev_printk_emit(level[1] - '0',
6510 dev->dev.parent,
6511 "%s %s %s: %pV",
6512 dev_driver_string(dev->dev.parent),
6513 dev_name(dev->dev.parent),
6514 netdev_name(dev), vaf);
6515 } else if (dev) {
6516 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6517 } else {
6518 r = printk("%s(NULL net_device): %pV", level, vaf);
6519 }
6520
6521 return r;
6522}
6523
6524int netdev_printk(const char *level, const struct net_device *dev,
6525 const char *format, ...)
6526{
6527 struct va_format vaf;
6528 va_list args;
6529 int r;
6530
6531 va_start(args, format);
6532
6533 vaf.fmt = format;
6534 vaf.va = &args;
6535
6536 r = __netdev_printk(level, dev, &vaf);
6537
6538 va_end(args);
6539
6540 return r;
6541}
6542EXPORT_SYMBOL(netdev_printk);
6543
6544#define define_netdev_printk_level(func, level) \
6545int func(const struct net_device *dev, const char *fmt, ...) \
6546{ \
6547 int r; \
6548 struct va_format vaf; \
6549 va_list args; \
6550 \
6551 va_start(args, fmt); \
6552 \
6553 vaf.fmt = fmt; \
6554 vaf.va = &args; \
6555 \
6556 r = __netdev_printk(level, dev, &vaf); \
6557 \
6558 va_end(args); \
6559 \
6560 return r; \
6561} \
6562EXPORT_SYMBOL(func);
6563
6564define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6565define_netdev_printk_level(netdev_alert, KERN_ALERT);
6566define_netdev_printk_level(netdev_crit, KERN_CRIT);
6567define_netdev_printk_level(netdev_err, KERN_ERR);
6568define_netdev_printk_level(netdev_warn, KERN_WARNING);
6569define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6570define_netdev_printk_level(netdev_info, KERN_INFO);
6571
6572static void __net_exit netdev_exit(struct net *net)
6573{
6574 kfree(net->dev_name_head);
6575 kfree(net->dev_index_head);
6576}
6577
6578static struct pernet_operations __net_initdata netdev_net_ops = {
6579 .init = netdev_init,
6580 .exit = netdev_exit,
6581};
6582
6583static void __net_exit default_device_exit(struct net *net)
6584{
6585 struct net_device *dev, *aux;
6586
6587
6588
6589
6590 rtnl_lock();
6591 for_each_netdev_safe(net, dev, aux) {
6592 int err;
6593 char fb_name[IFNAMSIZ];
6594
6595
6596 if (dev->features & NETIF_F_NETNS_LOCAL)
6597 continue;
6598
6599
6600 if (dev->rtnl_link_ops)
6601 continue;
6602
6603
6604 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6605 err = dev_change_net_namespace(dev, &init_net, fb_name);
6606 if (err) {
6607 pr_emerg("%s: failed to move %s to init_net: %d\n",
6608 __func__, dev->name, err);
6609 BUG();
6610 }
6611 }
6612 rtnl_unlock();
6613}
6614
6615static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
6616{
6617
6618
6619
6620 struct net *net;
6621 bool unregistering;
6622 DEFINE_WAIT(wait);
6623
6624 for (;;) {
6625 prepare_to_wait(&netdev_unregistering_wq, &wait,
6626 TASK_UNINTERRUPTIBLE);
6627 unregistering = false;
6628 rtnl_lock();
6629 list_for_each_entry(net, net_list, exit_list) {
6630 if (net->dev_unreg_count > 0) {
6631 unregistering = true;
6632 break;
6633 }
6634 }
6635 if (!unregistering)
6636 break;
6637 __rtnl_unlock();
6638 schedule();
6639 }
6640 finish_wait(&netdev_unregistering_wq, &wait);
6641}
6642
6643static void __net_exit default_device_exit_batch(struct list_head *net_list)
6644{
6645
6646
6647
6648
6649
6650 struct net_device *dev;
6651 struct net *net;
6652 LIST_HEAD(dev_kill_list);
6653
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664
6665 rtnl_lock_unregistering(net_list);
6666 list_for_each_entry(net, net_list, exit_list) {
6667 for_each_netdev_reverse(net, dev) {
6668 if (dev->rtnl_link_ops)
6669 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6670 else
6671 unregister_netdevice_queue(dev, &dev_kill_list);
6672 }
6673 }
6674 unregister_netdevice_many(&dev_kill_list);
6675 list_del(&dev_kill_list);
6676 rtnl_unlock();
6677}
6678
6679static struct pernet_operations __net_initdata default_device_ops = {
6680 .exit = default_device_exit,
6681 .exit_batch = default_device_exit_batch,
6682};
6683
6684
6685
6686
6687
6688
6689
6690
6691
6692
6693
6694
6695static int __init net_dev_init(void)
6696{
6697 int i, rc = -ENOMEM;
6698
6699 BUG_ON(!dev_boot_phase);
6700
6701 if (dev_proc_init())
6702 goto out;
6703
6704 if (netdev_kobject_init())
6705 goto out;
6706
6707 INIT_LIST_HEAD(&ptype_all);
6708 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6709 INIT_LIST_HEAD(&ptype_base[i]);
6710
6711 INIT_LIST_HEAD(&offload_base);
6712
6713 if (register_pernet_subsys(&netdev_net_ops))
6714 goto out;
6715
6716
6717
6718
6719
6720 for_each_possible_cpu(i) {
6721 struct softnet_data *sd = &per_cpu(softnet_data, i);
6722
6723 memset(sd, 0, sizeof(*sd));
6724 skb_queue_head_init(&sd->input_pkt_queue);
6725 skb_queue_head_init(&sd->process_queue);
6726 sd->completion_queue = NULL;
6727 INIT_LIST_HEAD(&sd->poll_list);
6728 sd->output_queue = NULL;
6729 sd->output_queue_tailp = &sd->output_queue;
6730#ifdef CONFIG_RPS
6731 sd->csd.func = rps_trigger_softirq;
6732 sd->csd.info = sd;
6733 sd->csd.flags = 0;
6734 sd->cpu = i;
6735#endif
6736
6737 sd->backlog.poll = process_backlog;
6738 sd->backlog.weight = weight_p;
6739 sd->backlog.gro_list = NULL;
6740 sd->backlog.gro_count = 0;
6741
6742#ifdef CONFIG_NET_FLOW_LIMIT
6743 sd->flow_limit = NULL;
6744#endif
6745 }
6746
6747 dev_boot_phase = 0;
6748
6749
6750
6751
6752
6753
6754
6755
6756
6757
6758 if (register_pernet_device(&loopback_net_ops))
6759 goto out;
6760
6761 if (register_pernet_device(&default_device_ops))
6762 goto out;
6763
6764 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6765 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6766
6767 hotcpu_notifier(dev_cpu_callback, 0);
6768 dst_init();
6769 rc = 0;
6770out:
6771 return rc;
6772}
6773
6774subsys_initcall(net_dev_init);
6775