1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/stat.h>
101#include <net/dst.h>
102#include <net/pkt_sched.h>
103#include <net/checksum.h>
104#include <net/xfrm.h>
105#include <linux/highmem.h>
106#include <linux/init.h>
107#include <linux/module.h>
108#include <linux/netpoll.h>
109#include <linux/rcupdate.h>
110#include <linux/delay.h>
111#include <net/iw_handler.h>
112#include <asm/current.h>
113#include <linux/audit.h>
114#include <linux/dmaengine.h>
115#include <linux/err.h>
116#include <linux/ctype.h>
117#include <linux/if_arp.h>
118#include <linux/if_vlan.h>
119#include <linux/ip.h>
120#include <net/ip.h>
121#include <linux/ipv6.h>
122#include <linux/in.h>
123#include <linux/jhash.h>
124#include <linux/random.h>
125#include <trace/events/napi.h>
126#include <trace/events/net.h>
127#include <trace/events/skb.h>
128#include <linux/pci.h>
129#include <linux/inetdevice.h>
130#include <linux/cpu_rmap.h>
131#include <linux/static_key.h>
132#include <linux/hashtable.h>
133#include <linux/vmalloc.h>
134
135#include "net-sysfs.h"
136
137
138#define MAX_GRO_SKBS 8
139
140
141#define GRO_MAX_HEAD (MAX_HEADER + 128)
142
143static DEFINE_SPINLOCK(ptype_lock);
144static DEFINE_SPINLOCK(offload_lock);
145struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
146struct list_head ptype_all __read_mostly;
147static struct list_head offload_base __read_mostly;
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168DEFINE_RWLOCK(dev_base_lock);
169EXPORT_SYMBOL(dev_base_lock);
170
171
172static DEFINE_SPINLOCK(napi_hash_lock);
173
174static unsigned int napi_gen_id;
175static DEFINE_HASHTABLE(napi_hash, 8);
176
177seqcount_t devnet_rename_seq;
178
179static inline void dev_base_seq_inc(struct net *net)
180{
181 while (++net->dev_base_seq == 0);
182}
183
184static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
185{
186 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
187
188 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
189}
190
191static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
192{
193 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
194}
195
196static inline void rps_lock(struct softnet_data *sd)
197{
198#ifdef CONFIG_RPS
199 spin_lock(&sd->input_pkt_queue.lock);
200#endif
201}
202
203static inline void rps_unlock(struct softnet_data *sd)
204{
205#ifdef CONFIG_RPS
206 spin_unlock(&sd->input_pkt_queue.lock);
207#endif
208}
209
210
211static void list_netdevice(struct net_device *dev)
212{
213 struct net *net = dev_net(dev);
214
215 ASSERT_RTNL();
216
217 write_lock_bh(&dev_base_lock);
218 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
219 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
220 hlist_add_head_rcu(&dev->index_hlist,
221 dev_index_hash(net, dev->ifindex));
222 write_unlock_bh(&dev_base_lock);
223
224 dev_base_seq_inc(net);
225}
226
227
228
229
230static void unlist_netdevice(struct net_device *dev)
231{
232 ASSERT_RTNL();
233
234
235 write_lock_bh(&dev_base_lock);
236 list_del_rcu(&dev->dev_list);
237 hlist_del_rcu(&dev->name_hlist);
238 hlist_del_rcu(&dev->index_hlist);
239 write_unlock_bh(&dev_base_lock);
240
241 dev_base_seq_inc(dev_net(dev));
242}
243
244
245
246
247
248static RAW_NOTIFIER_HEAD(netdev_chain);
249
250
251
252
253
254
255DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
256EXPORT_PER_CPU_SYMBOL(softnet_data);
257
258#ifdef CONFIG_LOCKDEP
259
260
261
262
263static const unsigned short netdev_lock_type[] =
264 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
265 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
266 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
267 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
268 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
269 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
270 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
271 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
272 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
273 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
274 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
275 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
276 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
277 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
278 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
279
280static const char *const netdev_lock_name[] =
281 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
282 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
283 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
284 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
285 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
286 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
287 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
288 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
289 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
290 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
291 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
292 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
293 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
294 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
295 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
296
297static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
298static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
299
300static inline unsigned short netdev_lock_pos(unsigned short dev_type)
301{
302 int i;
303
304 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
305 if (netdev_lock_type[i] == dev_type)
306 return i;
307
308 return ARRAY_SIZE(netdev_lock_type) - 1;
309}
310
311static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
312 unsigned short dev_type)
313{
314 int i;
315
316 i = netdev_lock_pos(dev_type);
317 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
318 netdev_lock_name[i]);
319}
320
321static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
322{
323 int i;
324
325 i = netdev_lock_pos(dev->type);
326 lockdep_set_class_and_name(&dev->addr_list_lock,
327 &netdev_addr_lock_key[i],
328 netdev_lock_name[i]);
329}
330#else
331static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
332 unsigned short dev_type)
333{
334}
335static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
336{
337}
338#endif
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362static inline struct list_head *ptype_head(const struct packet_type *pt)
363{
364 if (pt->type == htons(ETH_P_ALL))
365 return &ptype_all;
366 else
367 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
368}
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383void dev_add_pack(struct packet_type *pt)
384{
385 struct list_head *head = ptype_head(pt);
386
387 spin_lock(&ptype_lock);
388 list_add_rcu(&pt->list, head);
389 spin_unlock(&ptype_lock);
390}
391EXPORT_SYMBOL(dev_add_pack);
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406void __dev_remove_pack(struct packet_type *pt)
407{
408 struct list_head *head = ptype_head(pt);
409 struct packet_type *pt1;
410
411 spin_lock(&ptype_lock);
412
413 list_for_each_entry(pt1, head, list) {
414 if (pt == pt1) {
415 list_del_rcu(&pt->list);
416 goto out;
417 }
418 }
419
420 pr_warn("dev_remove_pack: %p not found\n", pt);
421out:
422 spin_unlock(&ptype_lock);
423}
424EXPORT_SYMBOL(__dev_remove_pack);
425
426
427
428
429
430
431
432
433
434
435
436
437
438void dev_remove_pack(struct packet_type *pt)
439{
440 __dev_remove_pack(pt);
441
442 synchronize_net();
443}
444EXPORT_SYMBOL(dev_remove_pack);
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459void dev_add_offload(struct packet_offload *po)
460{
461 struct list_head *head = &offload_base;
462
463 spin_lock(&offload_lock);
464 list_add_rcu(&po->list, head);
465 spin_unlock(&offload_lock);
466}
467EXPORT_SYMBOL(dev_add_offload);
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482void __dev_remove_offload(struct packet_offload *po)
483{
484 struct list_head *head = &offload_base;
485 struct packet_offload *po1;
486
487 spin_lock(&offload_lock);
488
489 list_for_each_entry(po1, head, list) {
490 if (po == po1) {
491 list_del_rcu(&po->list);
492 goto out;
493 }
494 }
495
496 pr_warn("dev_remove_offload: %p not found\n", po);
497out:
498 spin_unlock(&offload_lock);
499}
500EXPORT_SYMBOL(__dev_remove_offload);
501
502
503
504
505
506
507
508
509
510
511
512
513
514void dev_remove_offload(struct packet_offload *po)
515{
516 __dev_remove_offload(po);
517
518 synchronize_net();
519}
520EXPORT_SYMBOL(dev_remove_offload);
521
522
523
524
525
526
527
528
529static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
530
531
532
533
534
535
536
537
538
539
540static int netdev_boot_setup_add(char *name, struct ifmap *map)
541{
542 struct netdev_boot_setup *s;
543 int i;
544
545 s = dev_boot_setup;
546 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
547 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
548 memset(s[i].name, 0, sizeof(s[i].name));
549 strlcpy(s[i].name, name, IFNAMSIZ);
550 memcpy(&s[i].map, map, sizeof(s[i].map));
551 break;
552 }
553 }
554
555 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
556}
557
558
559
560
561
562
563
564
565
566
567int netdev_boot_setup_check(struct net_device *dev)
568{
569 struct netdev_boot_setup *s = dev_boot_setup;
570 int i;
571
572 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
573 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
574 !strcmp(dev->name, s[i].name)) {
575 dev->irq = s[i].map.irq;
576 dev->base_addr = s[i].map.base_addr;
577 dev->mem_start = s[i].map.mem_start;
578 dev->mem_end = s[i].map.mem_end;
579 return 1;
580 }
581 }
582 return 0;
583}
584EXPORT_SYMBOL(netdev_boot_setup_check);
585
586
587
588
589
590
591
592
593
594
595
596
597unsigned long netdev_boot_base(const char *prefix, int unit)
598{
599 const struct netdev_boot_setup *s = dev_boot_setup;
600 char name[IFNAMSIZ];
601 int i;
602
603 sprintf(name, "%s%d", prefix, unit);
604
605
606
607
608
609 if (__dev_get_by_name(&init_net, name))
610 return 1;
611
612 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
613 if (!strcmp(name, s[i].name))
614 return s[i].map.base_addr;
615 return 0;
616}
617
618
619
620
621int __init netdev_boot_setup(char *str)
622{
623 int ints[5];
624 struct ifmap map;
625
626 str = get_options(str, ARRAY_SIZE(ints), ints);
627 if (!str || !*str)
628 return 0;
629
630
631 memset(&map, 0, sizeof(map));
632 if (ints[0] > 0)
633 map.irq = ints[1];
634 if (ints[0] > 1)
635 map.base_addr = ints[2];
636 if (ints[0] > 2)
637 map.mem_start = ints[3];
638 if (ints[0] > 3)
639 map.mem_end = ints[4];
640
641
642 return netdev_boot_setup_add(str, &map);
643}
644
645__setup("netdev=", netdev_boot_setup);
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665struct net_device *__dev_get_by_name(struct net *net, const char *name)
666{
667 struct net_device *dev;
668 struct hlist_head *head = dev_name_hash(net, name);
669
670 hlist_for_each_entry(dev, head, name_hlist)
671 if (!strncmp(dev->name, name, IFNAMSIZ))
672 return dev;
673
674 return NULL;
675}
676EXPORT_SYMBOL(__dev_get_by_name);
677
678
679
680
681
682
683
684
685
686
687
688
689
690struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
691{
692 struct net_device *dev;
693 struct hlist_head *head = dev_name_hash(net, name);
694
695 hlist_for_each_entry_rcu(dev, head, name_hlist)
696 if (!strncmp(dev->name, name, IFNAMSIZ))
697 return dev;
698
699 return NULL;
700}
701EXPORT_SYMBOL(dev_get_by_name_rcu);
702
703
704
705
706
707
708
709
710
711
712
713
714
715struct net_device *dev_get_by_name(struct net *net, const char *name)
716{
717 struct net_device *dev;
718
719 rcu_read_lock();
720 dev = dev_get_by_name_rcu(net, name);
721 if (dev)
722 dev_hold(dev);
723 rcu_read_unlock();
724 return dev;
725}
726EXPORT_SYMBOL(dev_get_by_name);
727
728
729
730
731
732
733
734
735
736
737
738
739
740struct net_device *__dev_get_by_index(struct net *net, int ifindex)
741{
742 struct net_device *dev;
743 struct hlist_head *head = dev_index_hash(net, ifindex);
744
745 hlist_for_each_entry(dev, head, index_hlist)
746 if (dev->ifindex == ifindex)
747 return dev;
748
749 return NULL;
750}
751EXPORT_SYMBOL(__dev_get_by_index);
752
753
754
755
756
757
758
759
760
761
762
763
764struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
765{
766 struct net_device *dev;
767 struct hlist_head *head = dev_index_hash(net, ifindex);
768
769 hlist_for_each_entry_rcu(dev, head, index_hlist)
770 if (dev->ifindex == ifindex)
771 return dev;
772
773 return NULL;
774}
775EXPORT_SYMBOL(dev_get_by_index_rcu);
776
777
778
779
780
781
782
783
784
785
786
787
788
789struct net_device *dev_get_by_index(struct net *net, int ifindex)
790{
791 struct net_device *dev;
792
793 rcu_read_lock();
794 dev = dev_get_by_index_rcu(net, ifindex);
795 if (dev)
796 dev_hold(dev);
797 rcu_read_unlock();
798 return dev;
799}
800EXPORT_SYMBOL(dev_get_by_index);
801
802
803
804
805
806
807
808
809
810
811
812int netdev_get_name(struct net *net, char *name, int ifindex)
813{
814 struct net_device *dev;
815 unsigned int seq;
816
817retry:
818 seq = raw_seqcount_begin(&devnet_rename_seq);
819 rcu_read_lock();
820 dev = dev_get_by_index_rcu(net, ifindex);
821 if (!dev) {
822 rcu_read_unlock();
823 return -ENODEV;
824 }
825
826 strcpy(name, dev->name);
827 rcu_read_unlock();
828 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
829 cond_resched();
830 goto retry;
831 }
832
833 return 0;
834}
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
851 const char *ha)
852{
853 struct net_device *dev;
854
855 for_each_netdev_rcu(net, dev)
856 if (dev->type == type &&
857 !memcmp(dev->dev_addr, ha, dev->addr_len))
858 return dev;
859
860 return NULL;
861}
862EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
863
864struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
865{
866 struct net_device *dev;
867
868 ASSERT_RTNL();
869 for_each_netdev(net, dev)
870 if (dev->type == type)
871 return dev;
872
873 return NULL;
874}
875EXPORT_SYMBOL(__dev_getfirstbyhwtype);
876
877struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
878{
879 struct net_device *dev, *ret = NULL;
880
881 rcu_read_lock();
882 for_each_netdev_rcu(net, dev)
883 if (dev->type == type) {
884 dev_hold(dev);
885 ret = dev;
886 break;
887 }
888 rcu_read_unlock();
889 return ret;
890}
891EXPORT_SYMBOL(dev_getfirstbyhwtype);
892
893
894
895
896
897
898
899
900
901
902
903
904struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
905 unsigned short mask)
906{
907 struct net_device *dev, *ret;
908
909 ret = NULL;
910 for_each_netdev_rcu(net, dev) {
911 if (((dev->flags ^ if_flags) & mask) == 0) {
912 ret = dev;
913 break;
914 }
915 }
916 return ret;
917}
918EXPORT_SYMBOL(dev_get_by_flags_rcu);
919
920
921
922
923
924
925
926
927
928bool dev_valid_name(const char *name)
929{
930 if (*name == '\0')
931 return false;
932 if (strlen(name) >= IFNAMSIZ)
933 return false;
934 if (!strcmp(name, ".") || !strcmp(name, ".."))
935 return false;
936
937 while (*name) {
938 if (*name == '/' || isspace(*name))
939 return false;
940 name++;
941 }
942 return true;
943}
944EXPORT_SYMBOL(dev_valid_name);
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961static int __dev_alloc_name(struct net *net, const char *name, char *buf)
962{
963 int i = 0;
964 const char *p;
965 const int max_netdevices = 8*PAGE_SIZE;
966 unsigned long *inuse;
967 struct net_device *d;
968
969 p = strnchr(name, IFNAMSIZ-1, '%');
970 if (p) {
971
972
973
974
975
976 if (p[1] != 'd' || strchr(p + 2, '%'))
977 return -EINVAL;
978
979
980 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
981 if (!inuse)
982 return -ENOMEM;
983
984 for_each_netdev(net, d) {
985 if (!sscanf(d->name, name, &i))
986 continue;
987 if (i < 0 || i >= max_netdevices)
988 continue;
989
990
991 snprintf(buf, IFNAMSIZ, name, i);
992 if (!strncmp(buf, d->name, IFNAMSIZ))
993 set_bit(i, inuse);
994 }
995
996 i = find_first_zero_bit(inuse, max_netdevices);
997 free_page((unsigned long) inuse);
998 }
999
1000 if (buf != name)
1001 snprintf(buf, IFNAMSIZ, name, i);
1002 if (!__dev_get_by_name(net, buf))
1003 return i;
1004
1005
1006
1007
1008
1009 return -ENFILE;
1010}
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026int dev_alloc_name(struct net_device *dev, const char *name)
1027{
1028 char buf[IFNAMSIZ];
1029 struct net *net;
1030 int ret;
1031
1032 BUG_ON(!dev_net(dev));
1033 net = dev_net(dev);
1034 ret = __dev_alloc_name(net, name, buf);
1035 if (ret >= 0)
1036 strlcpy(dev->name, buf, IFNAMSIZ);
1037 return ret;
1038}
1039EXPORT_SYMBOL(dev_alloc_name);
1040
1041static int dev_alloc_name_ns(struct net *net,
1042 struct net_device *dev,
1043 const char *name)
1044{
1045 char buf[IFNAMSIZ];
1046 int ret;
1047
1048 ret = __dev_alloc_name(net, name, buf);
1049 if (ret >= 0)
1050 strlcpy(dev->name, buf, IFNAMSIZ);
1051 return ret;
1052}
1053
1054static int dev_get_valid_name(struct net *net,
1055 struct net_device *dev,
1056 const char *name)
1057{
1058 BUG_ON(!net);
1059
1060 if (!dev_valid_name(name))
1061 return -EINVAL;
1062
1063 if (strchr(name, '%'))
1064 return dev_alloc_name_ns(net, dev, name);
1065 else if (__dev_get_by_name(net, name))
1066 return -EEXIST;
1067 else if (dev->name != name)
1068 strlcpy(dev->name, name, IFNAMSIZ);
1069
1070 return 0;
1071}
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081int dev_change_name(struct net_device *dev, const char *newname)
1082{
1083 char oldname[IFNAMSIZ];
1084 int err = 0;
1085 int ret;
1086 struct net *net;
1087
1088 ASSERT_RTNL();
1089 BUG_ON(!dev_net(dev));
1090
1091 net = dev_net(dev);
1092 if (dev->flags & IFF_UP)
1093 return -EBUSY;
1094
1095 write_seqcount_begin(&devnet_rename_seq);
1096
1097 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1098 write_seqcount_end(&devnet_rename_seq);
1099 return 0;
1100 }
1101
1102 memcpy(oldname, dev->name, IFNAMSIZ);
1103
1104 err = dev_get_valid_name(net, dev, newname);
1105 if (err < 0) {
1106 write_seqcount_end(&devnet_rename_seq);
1107 return err;
1108 }
1109
1110rollback:
1111 ret = device_rename(&dev->dev, dev->name);
1112 if (ret) {
1113 memcpy(dev->name, oldname, IFNAMSIZ);
1114 write_seqcount_end(&devnet_rename_seq);
1115 return ret;
1116 }
1117
1118 write_seqcount_end(&devnet_rename_seq);
1119
1120 write_lock_bh(&dev_base_lock);
1121 hlist_del_rcu(&dev->name_hlist);
1122 write_unlock_bh(&dev_base_lock);
1123
1124 synchronize_rcu();
1125
1126 write_lock_bh(&dev_base_lock);
1127 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1128 write_unlock_bh(&dev_base_lock);
1129
1130 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1131 ret = notifier_to_errno(ret);
1132
1133 if (ret) {
1134
1135 if (err >= 0) {
1136 err = ret;
1137 write_seqcount_begin(&devnet_rename_seq);
1138 memcpy(dev->name, oldname, IFNAMSIZ);
1139 goto rollback;
1140 } else {
1141 pr_err("%s: name change rollback failed: %d\n",
1142 dev->name, ret);
1143 }
1144 }
1145
1146 return err;
1147}
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1158{
1159 char *new_ifalias;
1160
1161 ASSERT_RTNL();
1162
1163 if (len >= IFALIASZ)
1164 return -EINVAL;
1165
1166 if (!len) {
1167 kfree(dev->ifalias);
1168 dev->ifalias = NULL;
1169 return 0;
1170 }
1171
1172 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1173 if (!new_ifalias)
1174 return -ENOMEM;
1175 dev->ifalias = new_ifalias;
1176
1177 strlcpy(dev->ifalias, alias, len+1);
1178 return len;
1179}
1180
1181
1182
1183
1184
1185
1186
1187
1188void netdev_features_change(struct net_device *dev)
1189{
1190 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1191}
1192EXPORT_SYMBOL(netdev_features_change);
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202void netdev_state_change(struct net_device *dev)
1203{
1204 if (dev->flags & IFF_UP) {
1205 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1206 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1207 }
1208}
1209EXPORT_SYMBOL(netdev_state_change);
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221void netdev_notify_peers(struct net_device *dev)
1222{
1223 rtnl_lock();
1224 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1225 rtnl_unlock();
1226}
1227EXPORT_SYMBOL(netdev_notify_peers);
1228
1229static int __dev_open(struct net_device *dev)
1230{
1231 const struct net_device_ops *ops = dev->netdev_ops;
1232 int ret;
1233
1234 ASSERT_RTNL();
1235
1236 if (!netif_device_present(dev))
1237 return -ENODEV;
1238
1239
1240
1241
1242
1243 netpoll_rx_disable(dev);
1244
1245 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1246 ret = notifier_to_errno(ret);
1247 if (ret)
1248 return ret;
1249
1250 set_bit(__LINK_STATE_START, &dev->state);
1251
1252 if (ops->ndo_validate_addr)
1253 ret = ops->ndo_validate_addr(dev);
1254
1255 if (!ret && ops->ndo_open)
1256 ret = ops->ndo_open(dev);
1257
1258 netpoll_rx_enable(dev);
1259
1260 if (ret)
1261 clear_bit(__LINK_STATE_START, &dev->state);
1262 else {
1263 dev->flags |= IFF_UP;
1264 net_dmaengine_get();
1265 dev_set_rx_mode(dev);
1266 dev_activate(dev);
1267 add_device_randomness(dev->dev_addr, dev->addr_len);
1268 }
1269
1270 return ret;
1271}
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285int dev_open(struct net_device *dev)
1286{
1287 int ret;
1288
1289 if (dev->flags & IFF_UP)
1290 return 0;
1291
1292 ret = __dev_open(dev);
1293 if (ret < 0)
1294 return ret;
1295
1296 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1297 call_netdevice_notifiers(NETDEV_UP, dev);
1298
1299 return ret;
1300}
1301EXPORT_SYMBOL(dev_open);
1302
1303static int __dev_close_many(struct list_head *head)
1304{
1305 struct net_device *dev;
1306
1307 ASSERT_RTNL();
1308 might_sleep();
1309
1310 list_for_each_entry(dev, head, unreg_list) {
1311 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1312
1313 clear_bit(__LINK_STATE_START, &dev->state);
1314
1315
1316
1317
1318
1319
1320
1321 smp_mb__after_clear_bit();
1322 }
1323
1324 dev_deactivate_many(head);
1325
1326 list_for_each_entry(dev, head, unreg_list) {
1327 const struct net_device_ops *ops = dev->netdev_ops;
1328
1329
1330
1331
1332
1333
1334
1335
1336 if (ops->ndo_stop)
1337 ops->ndo_stop(dev);
1338
1339 dev->flags &= ~IFF_UP;
1340 net_dmaengine_put();
1341 }
1342
1343 return 0;
1344}
1345
1346static int __dev_close(struct net_device *dev)
1347{
1348 int retval;
1349 LIST_HEAD(single);
1350
1351
1352 netpoll_rx_disable(dev);
1353
1354 list_add(&dev->unreg_list, &single);
1355 retval = __dev_close_many(&single);
1356 list_del(&single);
1357
1358 netpoll_rx_enable(dev);
1359 return retval;
1360}
1361
1362static int dev_close_many(struct list_head *head)
1363{
1364 struct net_device *dev, *tmp;
1365 LIST_HEAD(tmp_list);
1366
1367 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1368 if (!(dev->flags & IFF_UP))
1369 list_move(&dev->unreg_list, &tmp_list);
1370
1371 __dev_close_many(head);
1372
1373 list_for_each_entry(dev, head, unreg_list) {
1374 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1375 call_netdevice_notifiers(NETDEV_DOWN, dev);
1376 }
1377
1378
1379 list_splice(&tmp_list, head);
1380 return 0;
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392int dev_close(struct net_device *dev)
1393{
1394 if (dev->flags & IFF_UP) {
1395 LIST_HEAD(single);
1396
1397
1398 netpoll_rx_disable(dev);
1399
1400 list_add(&dev->unreg_list, &single);
1401 dev_close_many(&single);
1402 list_del(&single);
1403
1404 netpoll_rx_enable(dev);
1405 }
1406 return 0;
1407}
1408EXPORT_SYMBOL(dev_close);
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419void dev_disable_lro(struct net_device *dev)
1420{
1421
1422
1423
1424
1425 if (is_vlan_dev(dev))
1426 dev = vlan_dev_real_dev(dev);
1427
1428 dev->wanted_features &= ~NETIF_F_LRO;
1429 netdev_update_features(dev);
1430
1431 if (unlikely(dev->features & NETIF_F_LRO))
1432 netdev_WARN(dev, "failed to disable LRO!\n");
1433}
1434EXPORT_SYMBOL(dev_disable_lro);
1435
1436static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1437 struct net_device *dev)
1438{
1439 struct netdev_notifier_info info;
1440
1441 netdev_notifier_info_init(&info, dev);
1442 return nb->notifier_call(nb, val, &info);
1443}
1444
1445static int dev_boot_phase = 1;
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461int register_netdevice_notifier(struct notifier_block *nb)
1462{
1463 struct net_device *dev;
1464 struct net_device *last;
1465 struct net *net;
1466 int err;
1467
1468 rtnl_lock();
1469 err = raw_notifier_chain_register(&netdev_chain, nb);
1470 if (err)
1471 goto unlock;
1472 if (dev_boot_phase)
1473 goto unlock;
1474 for_each_net(net) {
1475 for_each_netdev(net, dev) {
1476 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1477 err = notifier_to_errno(err);
1478 if (err)
1479 goto rollback;
1480
1481 if (!(dev->flags & IFF_UP))
1482 continue;
1483
1484 call_netdevice_notifier(nb, NETDEV_UP, dev);
1485 }
1486 }
1487
1488unlock:
1489 rtnl_unlock();
1490 return err;
1491
1492rollback:
1493 last = dev;
1494 for_each_net(net) {
1495 for_each_netdev(net, dev) {
1496 if (dev == last)
1497 goto outroll;
1498
1499 if (dev->flags & IFF_UP) {
1500 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1501 dev);
1502 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1503 }
1504 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1505 }
1506 }
1507
1508outroll:
1509 raw_notifier_chain_unregister(&netdev_chain, nb);
1510 goto unlock;
1511}
1512EXPORT_SYMBOL(register_netdevice_notifier);
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528int unregister_netdevice_notifier(struct notifier_block *nb)
1529{
1530 struct net_device *dev;
1531 struct net *net;
1532 int err;
1533
1534 rtnl_lock();
1535 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1536 if (err)
1537 goto unlock;
1538
1539 for_each_net(net) {
1540 for_each_netdev(net, dev) {
1541 if (dev->flags & IFF_UP) {
1542 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1543 dev);
1544 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1545 }
1546 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1547 }
1548 }
1549unlock:
1550 rtnl_unlock();
1551 return err;
1552}
1553EXPORT_SYMBOL(unregister_netdevice_notifier);
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565int call_netdevice_notifiers_info(unsigned long val, struct net_device *dev,
1566 struct netdev_notifier_info *info)
1567{
1568 ASSERT_RTNL();
1569 netdev_notifier_info_init(info, dev);
1570 return raw_notifier_call_chain(&netdev_chain, val, info);
1571}
1572EXPORT_SYMBOL(call_netdevice_notifiers_info);
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1584{
1585 struct netdev_notifier_info info;
1586
1587 return call_netdevice_notifiers_info(val, dev, &info);
1588}
1589EXPORT_SYMBOL(call_netdevice_notifiers);
1590
1591static struct static_key netstamp_needed __read_mostly;
1592#ifdef HAVE_JUMP_LABEL
1593
1594
1595
1596
1597static atomic_t netstamp_needed_deferred;
1598#endif
1599
1600void net_enable_timestamp(void)
1601{
1602#ifdef HAVE_JUMP_LABEL
1603 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1604
1605 if (deferred) {
1606 while (--deferred)
1607 static_key_slow_dec(&netstamp_needed);
1608 return;
1609 }
1610#endif
1611 static_key_slow_inc(&netstamp_needed);
1612}
1613EXPORT_SYMBOL(net_enable_timestamp);
1614
1615void net_disable_timestamp(void)
1616{
1617#ifdef HAVE_JUMP_LABEL
1618 if (in_interrupt()) {
1619 atomic_inc(&netstamp_needed_deferred);
1620 return;
1621 }
1622#endif
1623 static_key_slow_dec(&netstamp_needed);
1624}
1625EXPORT_SYMBOL(net_disable_timestamp);
1626
1627static inline void net_timestamp_set(struct sk_buff *skb)
1628{
1629 skb->tstamp.tv64 = 0;
1630 if (static_key_false(&netstamp_needed))
1631 __net_timestamp(skb);
1632}
1633
1634#define net_timestamp_check(COND, SKB) \
1635 if (static_key_false(&netstamp_needed)) { \
1636 if ((COND) && !(SKB)->tstamp.tv64) \
1637 __net_timestamp(SKB); \
1638 } \
1639
1640static inline bool is_skb_forwardable(struct net_device *dev,
1641 struct sk_buff *skb)
1642{
1643 unsigned int len;
1644
1645 if (!(dev->flags & IFF_UP))
1646 return false;
1647
1648 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1649 if (skb->len <= len)
1650 return true;
1651
1652
1653
1654
1655 if (skb_is_gso(skb))
1656 return true;
1657
1658 return false;
1659}
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1680{
1681 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1682 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1683 atomic_long_inc(&dev->rx_dropped);
1684 kfree_skb(skb);
1685 return NET_RX_DROP;
1686 }
1687 }
1688
1689 if (unlikely(!is_skb_forwardable(dev, skb))) {
1690 atomic_long_inc(&dev->rx_dropped);
1691 kfree_skb(skb);
1692 return NET_RX_DROP;
1693 }
1694 skb_scrub_packet(skb);
1695 skb->protocol = eth_type_trans(skb, dev);
1696
1697
1698
1699
1700 skb->pkt_type = PACKET_HOST;
1701
1702 return netif_rx(skb);
1703}
1704EXPORT_SYMBOL_GPL(dev_forward_skb);
1705
1706static inline int deliver_skb(struct sk_buff *skb,
1707 struct packet_type *pt_prev,
1708 struct net_device *orig_dev)
1709{
1710 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1711 return -ENOMEM;
1712 atomic_inc(&skb->users);
1713 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1714}
1715
1716static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1717{
1718 if (!ptype->af_packet_priv || !skb->sk)
1719 return false;
1720
1721 if (ptype->id_match)
1722 return ptype->id_match(ptype, skb->sk);
1723 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1724 return true;
1725
1726 return false;
1727}
1728
1729
1730
1731
1732
1733
1734static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1735{
1736 struct packet_type *ptype;
1737 struct sk_buff *skb2 = NULL;
1738 struct packet_type *pt_prev = NULL;
1739
1740 rcu_read_lock();
1741 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1742
1743
1744
1745 if ((ptype->dev == dev || !ptype->dev) &&
1746 (!skb_loop_sk(ptype, skb))) {
1747 if (pt_prev) {
1748 deliver_skb(skb2, pt_prev, skb->dev);
1749 pt_prev = ptype;
1750 continue;
1751 }
1752
1753 skb2 = skb_clone(skb, GFP_ATOMIC);
1754 if (!skb2)
1755 break;
1756
1757 net_timestamp_set(skb2);
1758
1759
1760
1761
1762
1763 skb_reset_mac_header(skb2);
1764
1765 if (skb_network_header(skb2) < skb2->data ||
1766 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1767 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1768 ntohs(skb2->protocol),
1769 dev->name);
1770 skb_reset_network_header(skb2);
1771 }
1772
1773 skb2->transport_header = skb2->network_header;
1774 skb2->pkt_type = PACKET_OUTGOING;
1775 pt_prev = ptype;
1776 }
1777 }
1778 if (pt_prev)
1779 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1780 rcu_read_unlock();
1781}
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1797{
1798 int i;
1799 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1800
1801
1802 if (tc->offset + tc->count > txq) {
1803 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1804 dev->num_tc = 0;
1805 return;
1806 }
1807
1808
1809 for (i = 1; i < TC_BITMASK + 1; i++) {
1810 int q = netdev_get_prio_tc_map(dev, i);
1811
1812 tc = &dev->tc_to_txq[q];
1813 if (tc->offset + tc->count > txq) {
1814 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1815 i, q);
1816 netdev_set_prio_tc_map(dev, i, 0);
1817 }
1818 }
1819}
1820
1821#ifdef CONFIG_XPS
1822static DEFINE_MUTEX(xps_map_mutex);
1823#define xmap_dereference(P) \
1824 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1825
1826static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1827 int cpu, u16 index)
1828{
1829 struct xps_map *map = NULL;
1830 int pos;
1831
1832 if (dev_maps)
1833 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1834
1835 for (pos = 0; map && pos < map->len; pos++) {
1836 if (map->queues[pos] == index) {
1837 if (map->len > 1) {
1838 map->queues[pos] = map->queues[--map->len];
1839 } else {
1840 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1841 kfree_rcu(map, rcu);
1842 map = NULL;
1843 }
1844 break;
1845 }
1846 }
1847
1848 return map;
1849}
1850
1851static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1852{
1853 struct xps_dev_maps *dev_maps;
1854 int cpu, i;
1855 bool active = false;
1856
1857 mutex_lock(&xps_map_mutex);
1858 dev_maps = xmap_dereference(dev->xps_maps);
1859
1860 if (!dev_maps)
1861 goto out_no_maps;
1862
1863 for_each_possible_cpu(cpu) {
1864 for (i = index; i < dev->num_tx_queues; i++) {
1865 if (!remove_xps_queue(dev_maps, cpu, i))
1866 break;
1867 }
1868 if (i == dev->num_tx_queues)
1869 active = true;
1870 }
1871
1872 if (!active) {
1873 RCU_INIT_POINTER(dev->xps_maps, NULL);
1874 kfree_rcu(dev_maps, rcu);
1875 }
1876
1877 for (i = index; i < dev->num_tx_queues; i++)
1878 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
1879 NUMA_NO_NODE);
1880
1881out_no_maps:
1882 mutex_unlock(&xps_map_mutex);
1883}
1884
1885static struct xps_map *expand_xps_map(struct xps_map *map,
1886 int cpu, u16 index)
1887{
1888 struct xps_map *new_map;
1889 int alloc_len = XPS_MIN_MAP_ALLOC;
1890 int i, pos;
1891
1892 for (pos = 0; map && pos < map->len; pos++) {
1893 if (map->queues[pos] != index)
1894 continue;
1895 return map;
1896 }
1897
1898
1899 if (map) {
1900 if (pos < map->alloc_len)
1901 return map;
1902
1903 alloc_len = map->alloc_len * 2;
1904 }
1905
1906
1907 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
1908 cpu_to_node(cpu));
1909 if (!new_map)
1910 return NULL;
1911
1912 for (i = 0; i < pos; i++)
1913 new_map->queues[i] = map->queues[i];
1914 new_map->alloc_len = alloc_len;
1915 new_map->len = pos;
1916
1917 return new_map;
1918}
1919
1920int netif_set_xps_queue(struct net_device *dev, struct cpumask *mask, u16 index)
1921{
1922 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
1923 struct xps_map *map, *new_map;
1924 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
1925 int cpu, numa_node_id = -2;
1926 bool active = false;
1927
1928 mutex_lock(&xps_map_mutex);
1929
1930 dev_maps = xmap_dereference(dev->xps_maps);
1931
1932
1933 for_each_online_cpu(cpu) {
1934 if (!cpumask_test_cpu(cpu, mask))
1935 continue;
1936
1937 if (!new_dev_maps)
1938 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
1939 if (!new_dev_maps) {
1940 mutex_unlock(&xps_map_mutex);
1941 return -ENOMEM;
1942 }
1943
1944 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
1945 NULL;
1946
1947 map = expand_xps_map(map, cpu, index);
1948 if (!map)
1949 goto error;
1950
1951 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1952 }
1953
1954 if (!new_dev_maps)
1955 goto out_no_new_maps;
1956
1957 for_each_possible_cpu(cpu) {
1958 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
1959
1960 int pos = 0;
1961
1962 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1963 while ((pos < map->len) && (map->queues[pos] != index))
1964 pos++;
1965
1966 if (pos == map->len)
1967 map->queues[map->len++] = index;
1968#ifdef CONFIG_NUMA
1969 if (numa_node_id == -2)
1970 numa_node_id = cpu_to_node(cpu);
1971 else if (numa_node_id != cpu_to_node(cpu))
1972 numa_node_id = -1;
1973#endif
1974 } else if (dev_maps) {
1975
1976 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1977 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
1978 }
1979
1980 }
1981
1982 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
1983
1984
1985 if (dev_maps) {
1986 for_each_possible_cpu(cpu) {
1987 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
1988 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1989 if (map && map != new_map)
1990 kfree_rcu(map, rcu);
1991 }
1992
1993 kfree_rcu(dev_maps, rcu);
1994 }
1995
1996 dev_maps = new_dev_maps;
1997 active = true;
1998
1999out_no_new_maps:
2000
2001 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2002 (numa_node_id >= 0) ? numa_node_id :
2003 NUMA_NO_NODE);
2004
2005 if (!dev_maps)
2006 goto out_no_maps;
2007
2008
2009 for_each_possible_cpu(cpu) {
2010 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2011 continue;
2012
2013 if (remove_xps_queue(dev_maps, cpu, index))
2014 active = true;
2015 }
2016
2017
2018 if (!active) {
2019 RCU_INIT_POINTER(dev->xps_maps, NULL);
2020 kfree_rcu(dev_maps, rcu);
2021 }
2022
2023out_no_maps:
2024 mutex_unlock(&xps_map_mutex);
2025
2026 return 0;
2027error:
2028
2029 for_each_possible_cpu(cpu) {
2030 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2031 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2032 NULL;
2033 if (new_map && new_map != map)
2034 kfree(new_map);
2035 }
2036
2037 mutex_unlock(&xps_map_mutex);
2038
2039 kfree(new_dev_maps);
2040 return -ENOMEM;
2041}
2042EXPORT_SYMBOL(netif_set_xps_queue);
2043
2044#endif
2045
2046
2047
2048
2049int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2050{
2051 int rc;
2052
2053 if (txq < 1 || txq > dev->num_tx_queues)
2054 return -EINVAL;
2055
2056 if (dev->reg_state == NETREG_REGISTERED ||
2057 dev->reg_state == NETREG_UNREGISTERING) {
2058 ASSERT_RTNL();
2059
2060 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2061 txq);
2062 if (rc)
2063 return rc;
2064
2065 if (dev->num_tc)
2066 netif_setup_tc(dev, txq);
2067
2068 if (txq < dev->real_num_tx_queues) {
2069 qdisc_reset_all_tx_gt(dev, txq);
2070#ifdef CONFIG_XPS
2071 netif_reset_xps_queues_gt(dev, txq);
2072#endif
2073 }
2074 }
2075
2076 dev->real_num_tx_queues = txq;
2077 return 0;
2078}
2079EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2080
2081#ifdef CONFIG_RPS
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2093{
2094 int rc;
2095
2096 if (rxq < 1 || rxq > dev->num_rx_queues)
2097 return -EINVAL;
2098
2099 if (dev->reg_state == NETREG_REGISTERED) {
2100 ASSERT_RTNL();
2101
2102 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2103 rxq);
2104 if (rc)
2105 return rc;
2106 }
2107
2108 dev->real_num_rx_queues = rxq;
2109 return 0;
2110}
2111EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2112#endif
2113
2114
2115
2116
2117
2118
2119
2120int netif_get_num_default_rss_queues(void)
2121{
2122 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2123}
2124EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2125
2126static inline void __netif_reschedule(struct Qdisc *q)
2127{
2128 struct softnet_data *sd;
2129 unsigned long flags;
2130
2131 local_irq_save(flags);
2132 sd = &__get_cpu_var(softnet_data);
2133 q->next_sched = NULL;
2134 *sd->output_queue_tailp = q;
2135 sd->output_queue_tailp = &q->next_sched;
2136 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2137 local_irq_restore(flags);
2138}
2139
2140void __netif_schedule(struct Qdisc *q)
2141{
2142 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2143 __netif_reschedule(q);
2144}
2145EXPORT_SYMBOL(__netif_schedule);
2146
2147void dev_kfree_skb_irq(struct sk_buff *skb)
2148{
2149 if (atomic_dec_and_test(&skb->users)) {
2150 struct softnet_data *sd;
2151 unsigned long flags;
2152
2153 local_irq_save(flags);
2154 sd = &__get_cpu_var(softnet_data);
2155 skb->next = sd->completion_queue;
2156 sd->completion_queue = skb;
2157 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2158 local_irq_restore(flags);
2159 }
2160}
2161EXPORT_SYMBOL(dev_kfree_skb_irq);
2162
2163void dev_kfree_skb_any(struct sk_buff *skb)
2164{
2165 if (in_irq() || irqs_disabled())
2166 dev_kfree_skb_irq(skb);
2167 else
2168 dev_kfree_skb(skb);
2169}
2170EXPORT_SYMBOL(dev_kfree_skb_any);
2171
2172
2173
2174
2175
2176
2177
2178
2179void netif_device_detach(struct net_device *dev)
2180{
2181 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2182 netif_running(dev)) {
2183 netif_tx_stop_all_queues(dev);
2184 }
2185}
2186EXPORT_SYMBOL(netif_device_detach);
2187
2188
2189
2190
2191
2192
2193
2194void netif_device_attach(struct net_device *dev)
2195{
2196 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2197 netif_running(dev)) {
2198 netif_tx_wake_all_queues(dev);
2199 __netdev_watchdog_up(dev);
2200 }
2201}
2202EXPORT_SYMBOL(netif_device_attach);
2203
2204static void skb_warn_bad_offload(const struct sk_buff *skb)
2205{
2206 static const netdev_features_t null_features = 0;
2207 struct net_device *dev = skb->dev;
2208 const char *driver = "";
2209
2210 if (!net_ratelimit())
2211 return;
2212
2213 if (dev && dev->dev.parent)
2214 driver = dev_driver_string(dev->dev.parent);
2215
2216 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2217 "gso_type=%d ip_summed=%d\n",
2218 driver, dev ? &dev->features : &null_features,
2219 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2220 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2221 skb_shinfo(skb)->gso_type, skb->ip_summed);
2222}
2223
2224
2225
2226
2227
2228int skb_checksum_help(struct sk_buff *skb)
2229{
2230 __wsum csum;
2231 int ret = 0, offset;
2232
2233 if (skb->ip_summed == CHECKSUM_COMPLETE)
2234 goto out_set_summed;
2235
2236 if (unlikely(skb_shinfo(skb)->gso_size)) {
2237 skb_warn_bad_offload(skb);
2238 return -EINVAL;
2239 }
2240
2241
2242
2243
2244 if (skb_has_shared_frag(skb)) {
2245 ret = __skb_linearize(skb);
2246 if (ret)
2247 goto out;
2248 }
2249
2250 offset = skb_checksum_start_offset(skb);
2251 BUG_ON(offset >= skb_headlen(skb));
2252 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2253
2254 offset += skb->csum_offset;
2255 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2256
2257 if (skb_cloned(skb) &&
2258 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2259 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2260 if (ret)
2261 goto out;
2262 }
2263
2264 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2265out_set_summed:
2266 skb->ip_summed = CHECKSUM_NONE;
2267out:
2268 return ret;
2269}
2270EXPORT_SYMBOL(skb_checksum_help);
2271
2272__be16 skb_network_protocol(struct sk_buff *skb)
2273{
2274 __be16 type = skb->protocol;
2275 int vlan_depth = ETH_HLEN;
2276
2277
2278 if (type == htons(ETH_P_TEB)) {
2279 struct ethhdr *eth;
2280
2281 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2282 return 0;
2283
2284 eth = (struct ethhdr *)skb_mac_header(skb);
2285 type = eth->h_proto;
2286 }
2287
2288 while (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
2289 struct vlan_hdr *vh;
2290
2291 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2292 return 0;
2293
2294 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2295 type = vh->h_vlan_encapsulated_proto;
2296 vlan_depth += VLAN_HLEN;
2297 }
2298
2299 return type;
2300}
2301
2302
2303
2304
2305
2306
2307struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2308 netdev_features_t features)
2309{
2310 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2311 struct packet_offload *ptype;
2312 __be16 type = skb_network_protocol(skb);
2313
2314 if (unlikely(!type))
2315 return ERR_PTR(-EINVAL);
2316
2317 __skb_pull(skb, skb->mac_len);
2318
2319 rcu_read_lock();
2320 list_for_each_entry_rcu(ptype, &offload_base, list) {
2321 if (ptype->type == type && ptype->callbacks.gso_segment) {
2322 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2323 int err;
2324
2325 err = ptype->callbacks.gso_send_check(skb);
2326 segs = ERR_PTR(err);
2327 if (err || skb_gso_ok(skb, features))
2328 break;
2329 __skb_push(skb, (skb->data -
2330 skb_network_header(skb)));
2331 }
2332 segs = ptype->callbacks.gso_segment(skb, features);
2333 break;
2334 }
2335 }
2336 rcu_read_unlock();
2337
2338 __skb_push(skb, skb->data - skb_mac_header(skb));
2339
2340 return segs;
2341}
2342EXPORT_SYMBOL(skb_mac_gso_segment);
2343
2344
2345
2346
2347static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2348{
2349 if (tx_path)
2350 return skb->ip_summed != CHECKSUM_PARTIAL;
2351 else
2352 return skb->ip_summed == CHECKSUM_NONE;
2353}
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2367 netdev_features_t features, bool tx_path)
2368{
2369 if (unlikely(skb_needs_check(skb, tx_path))) {
2370 int err;
2371
2372 skb_warn_bad_offload(skb);
2373
2374 if (skb_header_cloned(skb) &&
2375 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2376 return ERR_PTR(err);
2377 }
2378
2379 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2380 skb_reset_mac_header(skb);
2381 skb_reset_mac_len(skb);
2382
2383 return skb_mac_gso_segment(skb, features);
2384}
2385EXPORT_SYMBOL(__skb_gso_segment);
2386
2387
2388#ifdef CONFIG_BUG
2389void netdev_rx_csum_fault(struct net_device *dev)
2390{
2391 if (net_ratelimit()) {
2392 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2393 dump_stack();
2394 }
2395}
2396EXPORT_SYMBOL(netdev_rx_csum_fault);
2397#endif
2398
2399
2400
2401
2402
2403
2404static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2405{
2406#ifdef CONFIG_HIGHMEM
2407 int i;
2408 if (!(dev->features & NETIF_F_HIGHDMA)) {
2409 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2410 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2411 if (PageHighMem(skb_frag_page(frag)))
2412 return 1;
2413 }
2414 }
2415
2416 if (PCI_DMA_BUS_IS_PHYS) {
2417 struct device *pdev = dev->dev.parent;
2418
2419 if (!pdev)
2420 return 0;
2421 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2422 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2423 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2424 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2425 return 1;
2426 }
2427 }
2428#endif
2429 return 0;
2430}
2431
2432struct dev_gso_cb {
2433 void (*destructor)(struct sk_buff *skb);
2434};
2435
2436#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2437
2438static void dev_gso_skb_destructor(struct sk_buff *skb)
2439{
2440 struct dev_gso_cb *cb;
2441
2442 do {
2443 struct sk_buff *nskb = skb->next;
2444
2445 skb->next = nskb->next;
2446 nskb->next = NULL;
2447 kfree_skb(nskb);
2448 } while (skb->next);
2449
2450 cb = DEV_GSO_CB(skb);
2451 if (cb->destructor)
2452 cb->destructor(skb);
2453}
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2464{
2465 struct sk_buff *segs;
2466
2467 segs = skb_gso_segment(skb, features);
2468
2469
2470 if (!segs)
2471 return 0;
2472
2473 if (IS_ERR(segs))
2474 return PTR_ERR(segs);
2475
2476 skb->next = segs;
2477 DEV_GSO_CB(skb)->destructor = skb->destructor;
2478 skb->destructor = dev_gso_skb_destructor;
2479
2480 return 0;
2481}
2482
2483static netdev_features_t harmonize_features(struct sk_buff *skb,
2484 netdev_features_t features)
2485{
2486 if (skb->ip_summed != CHECKSUM_NONE &&
2487 !can_checksum_protocol(features, skb_network_protocol(skb))) {
2488 features &= ~NETIF_F_ALL_CSUM;
2489 } else if (illegal_highdma(skb->dev, skb)) {
2490 features &= ~NETIF_F_SG;
2491 }
2492
2493 return features;
2494}
2495
2496netdev_features_t netif_skb_features(struct sk_buff *skb)
2497{
2498 __be16 protocol = skb->protocol;
2499 netdev_features_t features = skb->dev->features;
2500
2501 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2502 features &= ~NETIF_F_GSO_MASK;
2503
2504 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
2505 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2506 protocol = veh->h_vlan_encapsulated_proto;
2507 } else if (!vlan_tx_tag_present(skb)) {
2508 return harmonize_features(skb, features);
2509 }
2510
2511 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
2512 NETIF_F_HW_VLAN_STAG_TX);
2513
2514 if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
2515 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2516 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
2517 NETIF_F_HW_VLAN_STAG_TX;
2518
2519 return harmonize_features(skb, features);
2520}
2521EXPORT_SYMBOL(netif_skb_features);
2522
2523
2524
2525
2526
2527
2528static inline int skb_needs_linearize(struct sk_buff *skb,
2529 netdev_features_t features)
2530{
2531 return skb_is_nonlinear(skb) &&
2532 ((skb_has_frag_list(skb) &&
2533 !(features & NETIF_F_FRAGLIST)) ||
2534 (skb_shinfo(skb)->nr_frags &&
2535 !(features & NETIF_F_SG)));
2536}
2537
2538int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2539 struct netdev_queue *txq)
2540{
2541 const struct net_device_ops *ops = dev->netdev_ops;
2542 int rc = NETDEV_TX_OK;
2543 unsigned int skb_len;
2544
2545 if (likely(!skb->next)) {
2546 netdev_features_t features;
2547
2548
2549
2550
2551
2552 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2553 skb_dst_drop(skb);
2554
2555 features = netif_skb_features(skb);
2556
2557 if (vlan_tx_tag_present(skb) &&
2558 !vlan_hw_offload_capable(features, skb->vlan_proto)) {
2559 skb = __vlan_put_tag(skb, skb->vlan_proto,
2560 vlan_tx_tag_get(skb));
2561 if (unlikely(!skb))
2562 goto out;
2563
2564 skb->vlan_tci = 0;
2565 }
2566
2567
2568
2569
2570
2571 if (skb->encapsulation)
2572 features &= dev->hw_enc_features;
2573
2574 if (netif_needs_gso(skb, features)) {
2575 if (unlikely(dev_gso_segment(skb, features)))
2576 goto out_kfree_skb;
2577 if (skb->next)
2578 goto gso;
2579 } else {
2580 if (skb_needs_linearize(skb, features) &&
2581 __skb_linearize(skb))
2582 goto out_kfree_skb;
2583
2584
2585
2586
2587
2588 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2589 if (skb->encapsulation)
2590 skb_set_inner_transport_header(skb,
2591 skb_checksum_start_offset(skb));
2592 else
2593 skb_set_transport_header(skb,
2594 skb_checksum_start_offset(skb));
2595 if (!(features & NETIF_F_ALL_CSUM) &&
2596 skb_checksum_help(skb))
2597 goto out_kfree_skb;
2598 }
2599 }
2600
2601 if (!list_empty(&ptype_all))
2602 dev_queue_xmit_nit(skb, dev);
2603
2604 skb_len = skb->len;
2605 rc = ops->ndo_start_xmit(skb, dev);
2606 trace_net_dev_xmit(skb, rc, dev, skb_len);
2607 if (rc == NETDEV_TX_OK)
2608 txq_trans_update(txq);
2609 return rc;
2610 }
2611
2612gso:
2613 do {
2614 struct sk_buff *nskb = skb->next;
2615
2616 skb->next = nskb->next;
2617 nskb->next = NULL;
2618
2619 if (!list_empty(&ptype_all))
2620 dev_queue_xmit_nit(nskb, dev);
2621
2622 skb_len = nskb->len;
2623 rc = ops->ndo_start_xmit(nskb, dev);
2624 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2625 if (unlikely(rc != NETDEV_TX_OK)) {
2626 if (rc & ~NETDEV_TX_MASK)
2627 goto out_kfree_gso_skb;
2628 nskb->next = skb->next;
2629 skb->next = nskb;
2630 return rc;
2631 }
2632 txq_trans_update(txq);
2633 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2634 return NETDEV_TX_BUSY;
2635 } while (skb->next);
2636
2637out_kfree_gso_skb:
2638 if (likely(skb->next == NULL)) {
2639 skb->destructor = DEV_GSO_CB(skb)->destructor;
2640 consume_skb(skb);
2641 return rc;
2642 }
2643out_kfree_skb:
2644 kfree_skb(skb);
2645out:
2646 return rc;
2647}
2648
2649static void qdisc_pkt_len_init(struct sk_buff *skb)
2650{
2651 const struct skb_shared_info *shinfo = skb_shinfo(skb);
2652
2653 qdisc_skb_cb(skb)->pkt_len = skb->len;
2654
2655
2656
2657
2658 if (shinfo->gso_size) {
2659 unsigned int hdr_len;
2660 u16 gso_segs = shinfo->gso_segs;
2661
2662
2663 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
2664
2665
2666 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
2667 hdr_len += tcp_hdrlen(skb);
2668 else
2669 hdr_len += sizeof(struct udphdr);
2670
2671 if (shinfo->gso_type & SKB_GSO_DODGY)
2672 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
2673 shinfo->gso_size);
2674
2675 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
2676 }
2677}
2678
2679static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2680 struct net_device *dev,
2681 struct netdev_queue *txq)
2682{
2683 spinlock_t *root_lock = qdisc_lock(q);
2684 bool contended;
2685 int rc;
2686
2687 qdisc_pkt_len_init(skb);
2688 qdisc_calculate_pkt_len(skb, q);
2689
2690
2691
2692
2693
2694
2695 contended = qdisc_is_running(q);
2696 if (unlikely(contended))
2697 spin_lock(&q->busylock);
2698
2699 spin_lock(root_lock);
2700 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2701 kfree_skb(skb);
2702 rc = NET_XMIT_DROP;
2703 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2704 qdisc_run_begin(q)) {
2705
2706
2707
2708
2709
2710 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2711 skb_dst_force(skb);
2712
2713 qdisc_bstats_update(q, skb);
2714
2715 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2716 if (unlikely(contended)) {
2717 spin_unlock(&q->busylock);
2718 contended = false;
2719 }
2720 __qdisc_run(q);
2721 } else
2722 qdisc_run_end(q);
2723
2724 rc = NET_XMIT_SUCCESS;
2725 } else {
2726 skb_dst_force(skb);
2727 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2728 if (qdisc_run_begin(q)) {
2729 if (unlikely(contended)) {
2730 spin_unlock(&q->busylock);
2731 contended = false;
2732 }
2733 __qdisc_run(q);
2734 }
2735 }
2736 spin_unlock(root_lock);
2737 if (unlikely(contended))
2738 spin_unlock(&q->busylock);
2739 return rc;
2740}
2741
2742#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2743static void skb_update_prio(struct sk_buff *skb)
2744{
2745 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2746
2747 if (!skb->priority && skb->sk && map) {
2748 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2749
2750 if (prioidx < map->priomap_len)
2751 skb->priority = map->priomap[prioidx];
2752 }
2753}
2754#else
2755#define skb_update_prio(skb)
2756#endif
2757
2758static DEFINE_PER_CPU(int, xmit_recursion);
2759#define RECURSION_LIMIT 10
2760
2761
2762
2763
2764
2765int dev_loopback_xmit(struct sk_buff *skb)
2766{
2767 skb_reset_mac_header(skb);
2768 __skb_pull(skb, skb_network_offset(skb));
2769 skb->pkt_type = PACKET_LOOPBACK;
2770 skb->ip_summed = CHECKSUM_UNNECESSARY;
2771 WARN_ON(!skb_dst(skb));
2772 skb_dst_force(skb);
2773 netif_rx_ni(skb);
2774 return 0;
2775}
2776EXPORT_SYMBOL(dev_loopback_xmit);
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803int dev_queue_xmit(struct sk_buff *skb)
2804{
2805 struct net_device *dev = skb->dev;
2806 struct netdev_queue *txq;
2807 struct Qdisc *q;
2808 int rc = -ENOMEM;
2809
2810 skb_reset_mac_header(skb);
2811
2812
2813
2814
2815 rcu_read_lock_bh();
2816
2817 skb_update_prio(skb);
2818
2819 txq = netdev_pick_tx(dev, skb);
2820 q = rcu_dereference_bh(txq->qdisc);
2821
2822#ifdef CONFIG_NET_CLS_ACT
2823 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2824#endif
2825 trace_net_dev_queue(skb);
2826 if (q->enqueue) {
2827 rc = __dev_xmit_skb(skb, q, dev, txq);
2828 goto out;
2829 }
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843 if (dev->flags & IFF_UP) {
2844 int cpu = smp_processor_id();
2845
2846 if (txq->xmit_lock_owner != cpu) {
2847
2848 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2849 goto recursion_alert;
2850
2851 HARD_TX_LOCK(dev, txq, cpu);
2852
2853 if (!netif_xmit_stopped(txq)) {
2854 __this_cpu_inc(xmit_recursion);
2855 rc = dev_hard_start_xmit(skb, dev, txq);
2856 __this_cpu_dec(xmit_recursion);
2857 if (dev_xmit_complete(rc)) {
2858 HARD_TX_UNLOCK(dev, txq);
2859 goto out;
2860 }
2861 }
2862 HARD_TX_UNLOCK(dev, txq);
2863 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2864 dev->name);
2865 } else {
2866
2867
2868
2869recursion_alert:
2870 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2871 dev->name);
2872 }
2873 }
2874
2875 rc = -ENETDOWN;
2876 rcu_read_unlock_bh();
2877
2878 kfree_skb(skb);
2879 return rc;
2880out:
2881 rcu_read_unlock_bh();
2882 return rc;
2883}
2884EXPORT_SYMBOL(dev_queue_xmit);
2885
2886
2887
2888
2889
2890
2891int netdev_max_backlog __read_mostly = 1000;
2892EXPORT_SYMBOL(netdev_max_backlog);
2893
2894int netdev_tstamp_prequeue __read_mostly = 1;
2895int netdev_budget __read_mostly = 300;
2896int weight_p __read_mostly = 64;
2897
2898
2899static inline void ____napi_schedule(struct softnet_data *sd,
2900 struct napi_struct *napi)
2901{
2902 list_add_tail(&napi->poll_list, &sd->poll_list);
2903 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2904}
2905
2906#ifdef CONFIG_RPS
2907
2908
2909struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2910EXPORT_SYMBOL(rps_sock_flow_table);
2911
2912struct static_key rps_needed __read_mostly;
2913
2914static struct rps_dev_flow *
2915set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2916 struct rps_dev_flow *rflow, u16 next_cpu)
2917{
2918 if (next_cpu != RPS_NO_CPU) {
2919#ifdef CONFIG_RFS_ACCEL
2920 struct netdev_rx_queue *rxqueue;
2921 struct rps_dev_flow_table *flow_table;
2922 struct rps_dev_flow *old_rflow;
2923 u32 flow_id;
2924 u16 rxq_index;
2925 int rc;
2926
2927
2928 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2929 !(dev->features & NETIF_F_NTUPLE))
2930 goto out;
2931 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2932 if (rxq_index == skb_get_rx_queue(skb))
2933 goto out;
2934
2935 rxqueue = dev->_rx + rxq_index;
2936 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2937 if (!flow_table)
2938 goto out;
2939 flow_id = skb->rxhash & flow_table->mask;
2940 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2941 rxq_index, flow_id);
2942 if (rc < 0)
2943 goto out;
2944 old_rflow = rflow;
2945 rflow = &flow_table->flows[flow_id];
2946 rflow->filter = rc;
2947 if (old_rflow->filter == rflow->filter)
2948 old_rflow->filter = RPS_NO_FILTER;
2949 out:
2950#endif
2951 rflow->last_qtail =
2952 per_cpu(softnet_data, next_cpu).input_queue_head;
2953 }
2954
2955 rflow->cpu = next_cpu;
2956 return rflow;
2957}
2958
2959
2960
2961
2962
2963
2964static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2965 struct rps_dev_flow **rflowp)
2966{
2967 struct netdev_rx_queue *rxqueue;
2968 struct rps_map *map;
2969 struct rps_dev_flow_table *flow_table;
2970 struct rps_sock_flow_table *sock_flow_table;
2971 int cpu = -1;
2972 u16 tcpu;
2973
2974 if (skb_rx_queue_recorded(skb)) {
2975 u16 index = skb_get_rx_queue(skb);
2976 if (unlikely(index >= dev->real_num_rx_queues)) {
2977 WARN_ONCE(dev->real_num_rx_queues > 1,
2978 "%s received packet on queue %u, but number "
2979 "of RX queues is %u\n",
2980 dev->name, index, dev->real_num_rx_queues);
2981 goto done;
2982 }
2983 rxqueue = dev->_rx + index;
2984 } else
2985 rxqueue = dev->_rx;
2986
2987 map = rcu_dereference(rxqueue->rps_map);
2988 if (map) {
2989 if (map->len == 1 &&
2990 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2991 tcpu = map->cpus[0];
2992 if (cpu_online(tcpu))
2993 cpu = tcpu;
2994 goto done;
2995 }
2996 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2997 goto done;
2998 }
2999
3000 skb_reset_network_header(skb);
3001 if (!skb_get_rxhash(skb))
3002 goto done;
3003
3004 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3005 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3006 if (flow_table && sock_flow_table) {
3007 u16 next_cpu;
3008 struct rps_dev_flow *rflow;
3009
3010 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
3011 tcpu = rflow->cpu;
3012
3013 next_cpu = sock_flow_table->ents[skb->rxhash &
3014 sock_flow_table->mask];
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027 if (unlikely(tcpu != next_cpu) &&
3028 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
3029 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3030 rflow->last_qtail)) >= 0)) {
3031 tcpu = next_cpu;
3032 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3033 }
3034
3035 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
3036 *rflowp = rflow;
3037 cpu = tcpu;
3038 goto done;
3039 }
3040 }
3041
3042 if (map) {
3043 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
3044
3045 if (cpu_online(tcpu)) {
3046 cpu = tcpu;
3047 goto done;
3048 }
3049 }
3050
3051done:
3052 return cpu;
3053}
3054
3055#ifdef CONFIG_RFS_ACCEL
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3069 u32 flow_id, u16 filter_id)
3070{
3071 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3072 struct rps_dev_flow_table *flow_table;
3073 struct rps_dev_flow *rflow;
3074 bool expire = true;
3075 int cpu;
3076
3077 rcu_read_lock();
3078 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3079 if (flow_table && flow_id <= flow_table->mask) {
3080 rflow = &flow_table->flows[flow_id];
3081 cpu = ACCESS_ONCE(rflow->cpu);
3082 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
3083 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3084 rflow->last_qtail) <
3085 (int)(10 * flow_table->mask)))
3086 expire = false;
3087 }
3088 rcu_read_unlock();
3089 return expire;
3090}
3091EXPORT_SYMBOL(rps_may_expire_flow);
3092
3093#endif
3094
3095
3096static void rps_trigger_softirq(void *data)
3097{
3098 struct softnet_data *sd = data;
3099
3100 ____napi_schedule(sd, &sd->backlog);
3101 sd->received_rps++;
3102}
3103
3104#endif
3105
3106
3107
3108
3109
3110
3111static int rps_ipi_queued(struct softnet_data *sd)
3112{
3113#ifdef CONFIG_RPS
3114 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
3115
3116 if (sd != mysd) {
3117 sd->rps_ipi_next = mysd->rps_ipi_list;
3118 mysd->rps_ipi_list = sd;
3119
3120 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3121 return 1;
3122 }
3123#endif
3124 return 0;
3125}
3126
3127#ifdef CONFIG_NET_FLOW_LIMIT
3128int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3129#endif
3130
3131static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3132{
3133#ifdef CONFIG_NET_FLOW_LIMIT
3134 struct sd_flow_limit *fl;
3135 struct softnet_data *sd;
3136 unsigned int old_flow, new_flow;
3137
3138 if (qlen < (netdev_max_backlog >> 1))
3139 return false;
3140
3141 sd = &__get_cpu_var(softnet_data);
3142
3143 rcu_read_lock();
3144 fl = rcu_dereference(sd->flow_limit);
3145 if (fl) {
3146 new_flow = skb_get_rxhash(skb) & (fl->num_buckets - 1);
3147 old_flow = fl->history[fl->history_head];
3148 fl->history[fl->history_head] = new_flow;
3149
3150 fl->history_head++;
3151 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3152
3153 if (likely(fl->buckets[old_flow]))
3154 fl->buckets[old_flow]--;
3155
3156 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3157 fl->count++;
3158 rcu_read_unlock();
3159 return true;
3160 }
3161 }
3162 rcu_read_unlock();
3163#endif
3164 return false;
3165}
3166
3167
3168
3169
3170
3171static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3172 unsigned int *qtail)
3173{
3174 struct softnet_data *sd;
3175 unsigned long flags;
3176 unsigned int qlen;
3177
3178 sd = &per_cpu(softnet_data, cpu);
3179
3180 local_irq_save(flags);
3181
3182 rps_lock(sd);
3183 qlen = skb_queue_len(&sd->input_pkt_queue);
3184 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3185 if (skb_queue_len(&sd->input_pkt_queue)) {
3186enqueue:
3187 __skb_queue_tail(&sd->input_pkt_queue, skb);
3188 input_queue_tail_incr_save(sd, qtail);
3189 rps_unlock(sd);
3190 local_irq_restore(flags);
3191 return NET_RX_SUCCESS;
3192 }
3193
3194
3195
3196
3197 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3198 if (!rps_ipi_queued(sd))
3199 ____napi_schedule(sd, &sd->backlog);
3200 }
3201 goto enqueue;
3202 }
3203
3204 sd->dropped++;
3205 rps_unlock(sd);
3206
3207 local_irq_restore(flags);
3208
3209 atomic_long_inc(&skb->dev->rx_dropped);
3210 kfree_skb(skb);
3211 return NET_RX_DROP;
3212}
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229int netif_rx(struct sk_buff *skb)
3230{
3231 int ret;
3232
3233
3234 if (netpoll_rx(skb))
3235 return NET_RX_DROP;
3236
3237 net_timestamp_check(netdev_tstamp_prequeue, skb);
3238
3239 trace_netif_rx(skb);
3240#ifdef CONFIG_RPS
3241 if (static_key_false(&rps_needed)) {
3242 struct rps_dev_flow voidflow, *rflow = &voidflow;
3243 int cpu;
3244
3245 preempt_disable();
3246 rcu_read_lock();
3247
3248 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3249 if (cpu < 0)
3250 cpu = smp_processor_id();
3251
3252 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3253
3254 rcu_read_unlock();
3255 preempt_enable();
3256 } else
3257#endif
3258 {
3259 unsigned int qtail;
3260 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3261 put_cpu();
3262 }
3263 return ret;
3264}
3265EXPORT_SYMBOL(netif_rx);
3266
3267int netif_rx_ni(struct sk_buff *skb)
3268{
3269 int err;
3270
3271 preempt_disable();
3272 err = netif_rx(skb);
3273 if (local_softirq_pending())
3274 do_softirq();
3275 preempt_enable();
3276
3277 return err;
3278}
3279EXPORT_SYMBOL(netif_rx_ni);
3280
3281static void net_tx_action(struct softirq_action *h)
3282{
3283 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3284
3285 if (sd->completion_queue) {
3286 struct sk_buff *clist;
3287
3288 local_irq_disable();
3289 clist = sd->completion_queue;
3290 sd->completion_queue = NULL;
3291 local_irq_enable();
3292
3293 while (clist) {
3294 struct sk_buff *skb = clist;
3295 clist = clist->next;
3296
3297 WARN_ON(atomic_read(&skb->users));
3298 trace_kfree_skb(skb, net_tx_action);
3299 __kfree_skb(skb);
3300 }
3301 }
3302
3303 if (sd->output_queue) {
3304 struct Qdisc *head;
3305
3306 local_irq_disable();
3307 head = sd->output_queue;
3308 sd->output_queue = NULL;
3309 sd->output_queue_tailp = &sd->output_queue;
3310 local_irq_enable();
3311
3312 while (head) {
3313 struct Qdisc *q = head;
3314 spinlock_t *root_lock;
3315
3316 head = head->next_sched;
3317
3318 root_lock = qdisc_lock(q);
3319 if (spin_trylock(root_lock)) {
3320 smp_mb__before_clear_bit();
3321 clear_bit(__QDISC_STATE_SCHED,
3322 &q->state);
3323 qdisc_run(q);
3324 spin_unlock(root_lock);
3325 } else {
3326 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3327 &q->state)) {
3328 __netif_reschedule(q);
3329 } else {
3330 smp_mb__before_clear_bit();
3331 clear_bit(__QDISC_STATE_SCHED,
3332 &q->state);
3333 }
3334 }
3335 }
3336 }
3337}
3338
3339#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3340 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3341
3342int (*br_fdb_test_addr_hook)(struct net_device *dev,
3343 unsigned char *addr) __read_mostly;
3344EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3345#endif
3346
3347#ifdef CONFIG_NET_CLS_ACT
3348
3349
3350
3351
3352
3353
3354
3355
3356static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3357{
3358 struct net_device *dev = skb->dev;
3359 u32 ttl = G_TC_RTTL(skb->tc_verd);
3360 int result = TC_ACT_OK;
3361 struct Qdisc *q;
3362
3363 if (unlikely(MAX_RED_LOOP < ttl++)) {
3364 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3365 skb->skb_iif, dev->ifindex);
3366 return TC_ACT_SHOT;
3367 }
3368
3369 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3370 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3371
3372 q = rxq->qdisc;
3373 if (q != &noop_qdisc) {
3374 spin_lock(qdisc_lock(q));
3375 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3376 result = qdisc_enqueue_root(skb, q);
3377 spin_unlock(qdisc_lock(q));
3378 }
3379
3380 return result;
3381}
3382
3383static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3384 struct packet_type **pt_prev,
3385 int *ret, struct net_device *orig_dev)
3386{
3387 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3388
3389 if (!rxq || rxq->qdisc == &noop_qdisc)
3390 goto out;
3391
3392 if (*pt_prev) {
3393 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3394 *pt_prev = NULL;
3395 }
3396
3397 switch (ing_filter(skb, rxq)) {
3398 case TC_ACT_SHOT:
3399 case TC_ACT_STOLEN:
3400 kfree_skb(skb);
3401 return NULL;
3402 }
3403
3404out:
3405 skb->tc_verd = 0;
3406 return skb;
3407}
3408#endif
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424int netdev_rx_handler_register(struct net_device *dev,
3425 rx_handler_func_t *rx_handler,
3426 void *rx_handler_data)
3427{
3428 ASSERT_RTNL();
3429
3430 if (dev->rx_handler)
3431 return -EBUSY;
3432
3433
3434 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3435 rcu_assign_pointer(dev->rx_handler, rx_handler);
3436
3437 return 0;
3438}
3439EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449void netdev_rx_handler_unregister(struct net_device *dev)
3450{
3451
3452 ASSERT_RTNL();
3453 RCU_INIT_POINTER(dev->rx_handler, NULL);
3454
3455
3456
3457
3458 synchronize_net();
3459 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3460}
3461EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3462
3463
3464
3465
3466
3467static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3468{
3469 switch (skb->protocol) {
3470 case __constant_htons(ETH_P_ARP):
3471 case __constant_htons(ETH_P_IP):
3472 case __constant_htons(ETH_P_IPV6):
3473 case __constant_htons(ETH_P_8021Q):
3474 case __constant_htons(ETH_P_8021AD):
3475 return true;
3476 default:
3477 return false;
3478 }
3479}
3480
3481static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
3482{
3483 struct packet_type *ptype, *pt_prev;
3484 rx_handler_func_t *rx_handler;
3485 struct net_device *orig_dev;
3486 struct net_device *null_or_dev;
3487 bool deliver_exact = false;
3488 int ret = NET_RX_DROP;
3489 __be16 type;
3490
3491 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3492
3493 trace_netif_receive_skb(skb);
3494
3495
3496 if (netpoll_receive_skb(skb))
3497 goto out;
3498
3499 orig_dev = skb->dev;
3500
3501 skb_reset_network_header(skb);
3502 if (!skb_transport_header_was_set(skb))
3503 skb_reset_transport_header(skb);
3504 skb_reset_mac_len(skb);
3505
3506 pt_prev = NULL;
3507
3508 rcu_read_lock();
3509
3510another_round:
3511 skb->skb_iif = skb->dev->ifindex;
3512
3513 __this_cpu_inc(softnet_data.processed);
3514
3515 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
3516 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
3517 skb = vlan_untag(skb);
3518 if (unlikely(!skb))
3519 goto unlock;
3520 }
3521
3522#ifdef CONFIG_NET_CLS_ACT
3523 if (skb->tc_verd & TC_NCLS) {
3524 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3525 goto ncls;
3526 }
3527#endif
3528
3529 if (pfmemalloc)
3530 goto skip_taps;
3531
3532 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3533 if (!ptype->dev || ptype->dev == skb->dev) {
3534 if (pt_prev)
3535 ret = deliver_skb(skb, pt_prev, orig_dev);
3536 pt_prev = ptype;
3537 }
3538 }
3539
3540skip_taps:
3541#ifdef CONFIG_NET_CLS_ACT
3542 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3543 if (!skb)
3544 goto unlock;
3545ncls:
3546#endif
3547
3548 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
3549 goto drop;
3550
3551 if (vlan_tx_tag_present(skb)) {
3552 if (pt_prev) {
3553 ret = deliver_skb(skb, pt_prev, orig_dev);
3554 pt_prev = NULL;
3555 }
3556 if (vlan_do_receive(&skb))
3557 goto another_round;
3558 else if (unlikely(!skb))
3559 goto unlock;
3560 }
3561
3562 rx_handler = rcu_dereference(skb->dev->rx_handler);
3563 if (rx_handler) {
3564 if (pt_prev) {
3565 ret = deliver_skb(skb, pt_prev, orig_dev);
3566 pt_prev = NULL;
3567 }
3568 switch (rx_handler(&skb)) {
3569 case RX_HANDLER_CONSUMED:
3570 ret = NET_RX_SUCCESS;
3571 goto unlock;
3572 case RX_HANDLER_ANOTHER:
3573 goto another_round;
3574 case RX_HANDLER_EXACT:
3575 deliver_exact = true;
3576 case RX_HANDLER_PASS:
3577 break;
3578 default:
3579 BUG();
3580 }
3581 }
3582
3583 if (unlikely(vlan_tx_tag_present(skb))) {
3584 if (vlan_tx_tag_get_id(skb))
3585 skb->pkt_type = PACKET_OTHERHOST;
3586
3587
3588
3589
3590 skb->vlan_tci = 0;
3591 }
3592
3593
3594 null_or_dev = deliver_exact ? skb->dev : NULL;
3595
3596 type = skb->protocol;
3597 list_for_each_entry_rcu(ptype,
3598 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3599 if (ptype->type == type &&
3600 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3601 ptype->dev == orig_dev)) {
3602 if (pt_prev)
3603 ret = deliver_skb(skb, pt_prev, orig_dev);
3604 pt_prev = ptype;
3605 }
3606 }
3607
3608 if (pt_prev) {
3609 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3610 goto drop;
3611 else
3612 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3613 } else {
3614drop:
3615 atomic_long_inc(&skb->dev->rx_dropped);
3616 kfree_skb(skb);
3617
3618
3619
3620 ret = NET_RX_DROP;
3621 }
3622
3623unlock:
3624 rcu_read_unlock();
3625out:
3626 return ret;
3627}
3628
3629static int __netif_receive_skb(struct sk_buff *skb)
3630{
3631 int ret;
3632
3633 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
3634 unsigned long pflags = current->flags;
3635
3636
3637
3638
3639
3640
3641
3642
3643
3644
3645 current->flags |= PF_MEMALLOC;
3646 ret = __netif_receive_skb_core(skb, true);
3647 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3648 } else
3649 ret = __netif_receive_skb_core(skb, false);
3650
3651 return ret;
3652}
3653
3654
3655
3656
3657
3658
3659
3660
3661
3662
3663
3664
3665
3666
3667
3668
3669int netif_receive_skb(struct sk_buff *skb)
3670{
3671 net_timestamp_check(netdev_tstamp_prequeue, skb);
3672
3673 if (skb_defer_rx_timestamp(skb))
3674 return NET_RX_SUCCESS;
3675
3676#ifdef CONFIG_RPS
3677 if (static_key_false(&rps_needed)) {
3678 struct rps_dev_flow voidflow, *rflow = &voidflow;
3679 int cpu, ret;
3680
3681 rcu_read_lock();
3682
3683 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3684
3685 if (cpu >= 0) {
3686 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3687 rcu_read_unlock();
3688 return ret;
3689 }
3690 rcu_read_unlock();
3691 }
3692#endif
3693 return __netif_receive_skb(skb);
3694}
3695EXPORT_SYMBOL(netif_receive_skb);
3696
3697
3698
3699
3700static void flush_backlog(void *arg)
3701{
3702 struct net_device *dev = arg;
3703 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3704 struct sk_buff *skb, *tmp;
3705
3706 rps_lock(sd);
3707 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3708 if (skb->dev == dev) {
3709 __skb_unlink(skb, &sd->input_pkt_queue);
3710 kfree_skb(skb);
3711 input_queue_head_incr(sd);
3712 }
3713 }
3714 rps_unlock(sd);
3715
3716 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3717 if (skb->dev == dev) {
3718 __skb_unlink(skb, &sd->process_queue);
3719 kfree_skb(skb);
3720 input_queue_head_incr(sd);
3721 }
3722 }
3723}
3724
3725static int napi_gro_complete(struct sk_buff *skb)
3726{
3727 struct packet_offload *ptype;
3728 __be16 type = skb->protocol;
3729 struct list_head *head = &offload_base;
3730 int err = -ENOENT;
3731
3732 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3733
3734 if (NAPI_GRO_CB(skb)->count == 1) {
3735 skb_shinfo(skb)->gso_size = 0;
3736 goto out;
3737 }
3738
3739 rcu_read_lock();
3740 list_for_each_entry_rcu(ptype, head, list) {
3741 if (ptype->type != type || !ptype->callbacks.gro_complete)
3742 continue;
3743
3744 err = ptype->callbacks.gro_complete(skb);
3745 break;
3746 }
3747 rcu_read_unlock();
3748
3749 if (err) {
3750 WARN_ON(&ptype->list == head);
3751 kfree_skb(skb);
3752 return NET_RX_SUCCESS;
3753 }
3754
3755out:
3756 return netif_receive_skb(skb);
3757}
3758
3759
3760
3761
3762
3763void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3764{
3765 struct sk_buff *skb, *prev = NULL;
3766
3767
3768 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3769 skb->prev = prev;
3770 prev = skb;
3771 }
3772
3773 for (skb = prev; skb; skb = prev) {
3774 skb->next = NULL;
3775
3776 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3777 return;
3778
3779 prev = skb->prev;
3780 napi_gro_complete(skb);
3781 napi->gro_count--;
3782 }
3783
3784 napi->gro_list = NULL;
3785}
3786EXPORT_SYMBOL(napi_gro_flush);
3787
3788static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3789{
3790 struct sk_buff *p;
3791 unsigned int maclen = skb->dev->hard_header_len;
3792
3793 for (p = napi->gro_list; p; p = p->next) {
3794 unsigned long diffs;
3795
3796 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3797 diffs |= p->vlan_tci ^ skb->vlan_tci;
3798 if (maclen == ETH_HLEN)
3799 diffs |= compare_ether_header(skb_mac_header(p),
3800 skb_gro_mac_header(skb));
3801 else if (!diffs)
3802 diffs = memcmp(skb_mac_header(p),
3803 skb_gro_mac_header(skb),
3804 maclen);
3805 NAPI_GRO_CB(p)->same_flow = !diffs;
3806 NAPI_GRO_CB(p)->flush = 0;
3807 }
3808}
3809
3810static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3811{
3812 struct sk_buff **pp = NULL;
3813 struct packet_offload *ptype;
3814 __be16 type = skb->protocol;
3815 struct list_head *head = &offload_base;
3816 int same_flow;
3817 enum gro_result ret;
3818
3819 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3820 goto normal;
3821
3822 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3823 goto normal;
3824
3825 gro_list_prepare(napi, skb);
3826
3827 rcu_read_lock();
3828 list_for_each_entry_rcu(ptype, head, list) {
3829 if (ptype->type != type || !ptype->callbacks.gro_receive)
3830 continue;
3831
3832 skb_set_network_header(skb, skb_gro_offset(skb));
3833 skb_reset_mac_len(skb);
3834 NAPI_GRO_CB(skb)->same_flow = 0;
3835 NAPI_GRO_CB(skb)->flush = 0;
3836 NAPI_GRO_CB(skb)->free = 0;
3837
3838 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3839 break;
3840 }
3841 rcu_read_unlock();
3842
3843 if (&ptype->list == head)
3844 goto normal;
3845
3846 same_flow = NAPI_GRO_CB(skb)->same_flow;
3847 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3848
3849 if (pp) {
3850 struct sk_buff *nskb = *pp;
3851
3852 *pp = nskb->next;
3853 nskb->next = NULL;
3854 napi_gro_complete(nskb);
3855 napi->gro_count--;
3856 }
3857
3858 if (same_flow)
3859 goto ok;
3860
3861 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3862 goto normal;
3863
3864 napi->gro_count++;
3865 NAPI_GRO_CB(skb)->count = 1;
3866 NAPI_GRO_CB(skb)->age = jiffies;
3867 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3868 skb->next = napi->gro_list;
3869 napi->gro_list = skb;
3870 ret = GRO_HELD;
3871
3872pull:
3873 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3874 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3875
3876 BUG_ON(skb->end - skb->tail < grow);
3877
3878 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3879
3880 skb->tail += grow;
3881 skb->data_len -= grow;
3882
3883 skb_shinfo(skb)->frags[0].page_offset += grow;
3884 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3885
3886 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3887 skb_frag_unref(skb, 0);
3888 memmove(skb_shinfo(skb)->frags,
3889 skb_shinfo(skb)->frags + 1,
3890 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3891 }
3892 }
3893
3894ok:
3895 return ret;
3896
3897normal:
3898 ret = GRO_NORMAL;
3899 goto pull;
3900}
3901
3902
3903static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3904{
3905 switch (ret) {
3906 case GRO_NORMAL:
3907 if (netif_receive_skb(skb))
3908 ret = GRO_DROP;
3909 break;
3910
3911 case GRO_DROP:
3912 kfree_skb(skb);
3913 break;
3914
3915 case GRO_MERGED_FREE:
3916 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3917 kmem_cache_free(skbuff_head_cache, skb);
3918 else
3919 __kfree_skb(skb);
3920 break;
3921
3922 case GRO_HELD:
3923 case GRO_MERGED:
3924 break;
3925 }
3926
3927 return ret;
3928}
3929
3930static void skb_gro_reset_offset(struct sk_buff *skb)
3931{
3932 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3933 const skb_frag_t *frag0 = &pinfo->frags[0];
3934
3935 NAPI_GRO_CB(skb)->data_offset = 0;
3936 NAPI_GRO_CB(skb)->frag0 = NULL;
3937 NAPI_GRO_CB(skb)->frag0_len = 0;
3938
3939 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
3940 pinfo->nr_frags &&
3941 !PageHighMem(skb_frag_page(frag0))) {
3942 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3943 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3944 }
3945}
3946
3947gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3948{
3949 skb_gro_reset_offset(skb);
3950
3951 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3952}
3953EXPORT_SYMBOL(napi_gro_receive);
3954
3955static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3956{
3957 __skb_pull(skb, skb_headlen(skb));
3958
3959 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3960 skb->vlan_tci = 0;
3961 skb->dev = napi->dev;
3962 skb->skb_iif = 0;
3963
3964 napi->skb = skb;
3965}
3966
3967struct sk_buff *napi_get_frags(struct napi_struct *napi)
3968{
3969 struct sk_buff *skb = napi->skb;
3970
3971 if (!skb) {
3972 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3973 if (skb)
3974 napi->skb = skb;
3975 }
3976 return skb;
3977}
3978EXPORT_SYMBOL(napi_get_frags);
3979
3980static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3981 gro_result_t ret)
3982{
3983 switch (ret) {
3984 case GRO_NORMAL:
3985 case GRO_HELD:
3986 skb->protocol = eth_type_trans(skb, skb->dev);
3987
3988 if (ret == GRO_HELD)
3989 skb_gro_pull(skb, -ETH_HLEN);
3990 else if (netif_receive_skb(skb))
3991 ret = GRO_DROP;
3992 break;
3993
3994 case GRO_DROP:
3995 case GRO_MERGED_FREE:
3996 napi_reuse_skb(napi, skb);
3997 break;
3998
3999 case GRO_MERGED:
4000 break;
4001 }
4002
4003 return ret;
4004}
4005
4006static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4007{
4008 struct sk_buff *skb = napi->skb;
4009 struct ethhdr *eth;
4010 unsigned int hlen;
4011 unsigned int off;
4012
4013 napi->skb = NULL;
4014
4015 skb_reset_mac_header(skb);
4016 skb_gro_reset_offset(skb);
4017
4018 off = skb_gro_offset(skb);
4019 hlen = off + sizeof(*eth);
4020 eth = skb_gro_header_fast(skb, off);
4021 if (skb_gro_header_hard(skb, hlen)) {
4022 eth = skb_gro_header_slow(skb, hlen, off);
4023 if (unlikely(!eth)) {
4024 napi_reuse_skb(napi, skb);
4025 skb = NULL;
4026 goto out;
4027 }
4028 }
4029
4030 skb_gro_pull(skb, sizeof(*eth));
4031
4032
4033
4034
4035
4036 skb->protocol = eth->h_proto;
4037
4038out:
4039 return skb;
4040}
4041
4042gro_result_t napi_gro_frags(struct napi_struct *napi)
4043{
4044 struct sk_buff *skb = napi_frags_skb(napi);
4045
4046 if (!skb)
4047 return GRO_DROP;
4048
4049 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4050}
4051EXPORT_SYMBOL(napi_gro_frags);
4052
4053
4054
4055
4056
4057static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4058{
4059#ifdef CONFIG_RPS
4060 struct softnet_data *remsd = sd->rps_ipi_list;
4061
4062 if (remsd) {
4063 sd->rps_ipi_list = NULL;
4064
4065 local_irq_enable();
4066
4067
4068 while (remsd) {
4069 struct softnet_data *next = remsd->rps_ipi_next;
4070
4071 if (cpu_online(remsd->cpu))
4072 __smp_call_function_single(remsd->cpu,
4073 &remsd->csd, 0);
4074 remsd = next;
4075 }
4076 } else
4077#endif
4078 local_irq_enable();
4079}
4080
4081static int process_backlog(struct napi_struct *napi, int quota)
4082{
4083 int work = 0;
4084 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4085
4086#ifdef CONFIG_RPS
4087
4088
4089
4090 if (sd->rps_ipi_list) {
4091 local_irq_disable();
4092 net_rps_action_and_irq_enable(sd);
4093 }
4094#endif
4095 napi->weight = weight_p;
4096 local_irq_disable();
4097 while (work < quota) {
4098 struct sk_buff *skb;
4099 unsigned int qlen;
4100
4101 while ((skb = __skb_dequeue(&sd->process_queue))) {
4102 local_irq_enable();
4103 __netif_receive_skb(skb);
4104 local_irq_disable();
4105 input_queue_head_incr(sd);
4106 if (++work >= quota) {
4107 local_irq_enable();
4108 return work;
4109 }
4110 }
4111
4112 rps_lock(sd);
4113 qlen = skb_queue_len(&sd->input_pkt_queue);
4114 if (qlen)
4115 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4116 &sd->process_queue);
4117
4118 if (qlen < quota - work) {
4119
4120
4121
4122
4123
4124
4125
4126 list_del(&napi->poll_list);
4127 napi->state = 0;
4128
4129 quota = work + qlen;
4130 }
4131 rps_unlock(sd);
4132 }
4133 local_irq_enable();
4134
4135 return work;
4136}
4137
4138
4139
4140
4141
4142
4143
4144void __napi_schedule(struct napi_struct *n)
4145{
4146 unsigned long flags;
4147
4148 local_irq_save(flags);
4149 ____napi_schedule(&__get_cpu_var(softnet_data), n);
4150 local_irq_restore(flags);
4151}
4152EXPORT_SYMBOL(__napi_schedule);
4153
4154void __napi_complete(struct napi_struct *n)
4155{
4156 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4157 BUG_ON(n->gro_list);
4158
4159 list_del(&n->poll_list);
4160 smp_mb__before_clear_bit();
4161 clear_bit(NAPI_STATE_SCHED, &n->state);
4162}
4163EXPORT_SYMBOL(__napi_complete);
4164
4165void napi_complete(struct napi_struct *n)
4166{
4167 unsigned long flags;
4168
4169
4170
4171
4172
4173 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4174 return;
4175
4176 napi_gro_flush(n, false);
4177 local_irq_save(flags);
4178 __napi_complete(n);
4179 local_irq_restore(flags);
4180}
4181EXPORT_SYMBOL(napi_complete);
4182
4183
4184struct napi_struct *napi_by_id(unsigned int napi_id)
4185{
4186 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4187 struct napi_struct *napi;
4188
4189 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4190 if (napi->napi_id == napi_id)
4191 return napi;
4192
4193 return NULL;
4194}
4195EXPORT_SYMBOL_GPL(napi_by_id);
4196
4197void napi_hash_add(struct napi_struct *napi)
4198{
4199 if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
4200
4201 spin_lock(&napi_hash_lock);
4202
4203
4204
4205
4206 napi->napi_id = 0;
4207 while (!napi->napi_id) {
4208 napi->napi_id = ++napi_gen_id;
4209 if (napi_by_id(napi->napi_id))
4210 napi->napi_id = 0;
4211 }
4212
4213 hlist_add_head_rcu(&napi->napi_hash_node,
4214 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
4215
4216 spin_unlock(&napi_hash_lock);
4217 }
4218}
4219EXPORT_SYMBOL_GPL(napi_hash_add);
4220
4221
4222
4223
4224void napi_hash_del(struct napi_struct *napi)
4225{
4226 spin_lock(&napi_hash_lock);
4227
4228 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
4229 hlist_del_rcu(&napi->napi_hash_node);
4230
4231 spin_unlock(&napi_hash_lock);
4232}
4233EXPORT_SYMBOL_GPL(napi_hash_del);
4234
4235void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4236 int (*poll)(struct napi_struct *, int), int weight)
4237{
4238 INIT_LIST_HEAD(&napi->poll_list);
4239 napi->gro_count = 0;
4240 napi->gro_list = NULL;
4241 napi->skb = NULL;
4242 napi->poll = poll;
4243 if (weight > NAPI_POLL_WEIGHT)
4244 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
4245 weight, dev->name);
4246 napi->weight = weight;
4247 list_add(&napi->dev_list, &dev->napi_list);
4248 napi->dev = dev;
4249#ifdef CONFIG_NETPOLL
4250 spin_lock_init(&napi->poll_lock);
4251 napi->poll_owner = -1;
4252#endif
4253 set_bit(NAPI_STATE_SCHED, &napi->state);
4254}
4255EXPORT_SYMBOL(netif_napi_add);
4256
4257void netif_napi_del(struct napi_struct *napi)
4258{
4259 struct sk_buff *skb, *next;
4260
4261 list_del_init(&napi->dev_list);
4262 napi_free_frags(napi);
4263
4264 for (skb = napi->gro_list; skb; skb = next) {
4265 next = skb->next;
4266 skb->next = NULL;
4267 kfree_skb(skb);
4268 }
4269
4270 napi->gro_list = NULL;
4271 napi->gro_count = 0;
4272}
4273EXPORT_SYMBOL(netif_napi_del);
4274
4275static void net_rx_action(struct softirq_action *h)
4276{
4277 struct softnet_data *sd = &__get_cpu_var(softnet_data);
4278 unsigned long time_limit = jiffies + 2;
4279 int budget = netdev_budget;
4280 void *have;
4281
4282 local_irq_disable();
4283
4284 while (!list_empty(&sd->poll_list)) {
4285 struct napi_struct *n;
4286 int work, weight;
4287
4288
4289
4290
4291
4292 if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
4293 goto softnet_break;
4294
4295 local_irq_enable();
4296
4297
4298
4299
4300
4301
4302 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4303
4304 have = netpoll_poll_lock(n);
4305
4306 weight = n->weight;
4307
4308
4309
4310
4311
4312
4313
4314 work = 0;
4315 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4316 work = n->poll(n, weight);
4317 trace_napi_poll(n);
4318 }
4319
4320 WARN_ON_ONCE(work > weight);
4321
4322 budget -= work;
4323
4324 local_irq_disable();
4325
4326
4327
4328
4329
4330
4331 if (unlikely(work == weight)) {
4332 if (unlikely(napi_disable_pending(n))) {
4333 local_irq_enable();
4334 napi_complete(n);
4335 local_irq_disable();
4336 } else {
4337 if (n->gro_list) {
4338
4339
4340
4341 local_irq_enable();
4342 napi_gro_flush(n, HZ >= 1000);
4343 local_irq_disable();
4344 }
4345 list_move_tail(&n->poll_list, &sd->poll_list);
4346 }
4347 }
4348
4349 netpoll_poll_unlock(have);
4350 }
4351out:
4352 net_rps_action_and_irq_enable(sd);
4353
4354#ifdef CONFIG_NET_DMA
4355
4356
4357
4358
4359 dma_issue_pending_all();
4360#endif
4361
4362 return;
4363
4364softnet_break:
4365 sd->time_squeeze++;
4366 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4367 goto out;
4368}
4369
4370struct netdev_upper {
4371 struct net_device *dev;
4372 bool master;
4373 struct list_head list;
4374 struct rcu_head rcu;
4375 struct list_head search_list;
4376};
4377
4378static void __append_search_uppers(struct list_head *search_list,
4379 struct net_device *dev)
4380{
4381 struct netdev_upper *upper;
4382
4383 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4384
4385 if (list_empty(&upper->search_list))
4386 list_add_tail(&upper->search_list, search_list);
4387 }
4388}
4389
4390static bool __netdev_search_upper_dev(struct net_device *dev,
4391 struct net_device *upper_dev)
4392{
4393 LIST_HEAD(search_list);
4394 struct netdev_upper *upper;
4395 struct netdev_upper *tmp;
4396 bool ret = false;
4397
4398 __append_search_uppers(&search_list, dev);
4399 list_for_each_entry(upper, &search_list, search_list) {
4400 if (upper->dev == upper_dev) {
4401 ret = true;
4402 break;
4403 }
4404 __append_search_uppers(&search_list, upper->dev);
4405 }
4406 list_for_each_entry_safe(upper, tmp, &search_list, search_list)
4407 INIT_LIST_HEAD(&upper->search_list);
4408 return ret;
4409}
4410
4411static struct netdev_upper *__netdev_find_upper(struct net_device *dev,
4412 struct net_device *upper_dev)
4413{
4414 struct netdev_upper *upper;
4415
4416 list_for_each_entry(upper, &dev->upper_dev_list, list) {
4417 if (upper->dev == upper_dev)
4418 return upper;
4419 }
4420 return NULL;
4421}
4422
4423
4424
4425
4426
4427
4428
4429
4430
4431
4432bool netdev_has_upper_dev(struct net_device *dev,
4433 struct net_device *upper_dev)
4434{
4435 ASSERT_RTNL();
4436
4437 return __netdev_find_upper(dev, upper_dev);
4438}
4439EXPORT_SYMBOL(netdev_has_upper_dev);
4440
4441
4442
4443
4444
4445
4446
4447
4448bool netdev_has_any_upper_dev(struct net_device *dev)
4449{
4450 ASSERT_RTNL();
4451
4452 return !list_empty(&dev->upper_dev_list);
4453}
4454EXPORT_SYMBOL(netdev_has_any_upper_dev);
4455
4456
4457
4458
4459
4460
4461
4462
4463struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
4464{
4465 struct netdev_upper *upper;
4466
4467 ASSERT_RTNL();
4468
4469 if (list_empty(&dev->upper_dev_list))
4470 return NULL;
4471
4472 upper = list_first_entry(&dev->upper_dev_list,
4473 struct netdev_upper, list);
4474 if (likely(upper->master))
4475 return upper->dev;
4476 return NULL;
4477}
4478EXPORT_SYMBOL(netdev_master_upper_dev_get);
4479
4480
4481
4482
4483
4484
4485
4486
4487struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
4488{
4489 struct netdev_upper *upper;
4490
4491 upper = list_first_or_null_rcu(&dev->upper_dev_list,
4492 struct netdev_upper, list);
4493 if (upper && likely(upper->master))
4494 return upper->dev;
4495 return NULL;
4496}
4497EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
4498
4499static int __netdev_upper_dev_link(struct net_device *dev,
4500 struct net_device *upper_dev, bool master)
4501{
4502 struct netdev_upper *upper;
4503
4504 ASSERT_RTNL();
4505
4506 if (dev == upper_dev)
4507 return -EBUSY;
4508
4509
4510 if (__netdev_search_upper_dev(upper_dev, dev))
4511 return -EBUSY;
4512
4513 if (__netdev_find_upper(dev, upper_dev))
4514 return -EEXIST;
4515
4516 if (master && netdev_master_upper_dev_get(dev))
4517 return -EBUSY;
4518
4519 upper = kmalloc(sizeof(*upper), GFP_KERNEL);
4520 if (!upper)
4521 return -ENOMEM;
4522
4523 upper->dev = upper_dev;
4524 upper->master = master;
4525 INIT_LIST_HEAD(&upper->search_list);
4526
4527
4528 if (master)
4529 list_add_rcu(&upper->list, &dev->upper_dev_list);
4530 else
4531 list_add_tail_rcu(&upper->list, &dev->upper_dev_list);
4532 dev_hold(upper_dev);
4533 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4534 return 0;
4535}
4536
4537
4538
4539
4540
4541
4542
4543
4544
4545
4546
4547int netdev_upper_dev_link(struct net_device *dev,
4548 struct net_device *upper_dev)
4549{
4550 return __netdev_upper_dev_link(dev, upper_dev, false);
4551}
4552EXPORT_SYMBOL(netdev_upper_dev_link);
4553
4554
4555
4556
4557
4558
4559
4560
4561
4562
4563
4564
4565int netdev_master_upper_dev_link(struct net_device *dev,
4566 struct net_device *upper_dev)
4567{
4568 return __netdev_upper_dev_link(dev, upper_dev, true);
4569}
4570EXPORT_SYMBOL(netdev_master_upper_dev_link);
4571
4572
4573
4574
4575
4576
4577
4578
4579
4580void netdev_upper_dev_unlink(struct net_device *dev,
4581 struct net_device *upper_dev)
4582{
4583 struct netdev_upper *upper;
4584
4585 ASSERT_RTNL();
4586
4587 upper = __netdev_find_upper(dev, upper_dev);
4588 if (!upper)
4589 return;
4590 list_del_rcu(&upper->list);
4591 dev_put(upper_dev);
4592 kfree_rcu(upper, rcu);
4593 call_netdevice_notifiers(NETDEV_CHANGEUPPER, dev);
4594}
4595EXPORT_SYMBOL(netdev_upper_dev_unlink);
4596
4597static void dev_change_rx_flags(struct net_device *dev, int flags)
4598{
4599 const struct net_device_ops *ops = dev->netdev_ops;
4600
4601 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4602 ops->ndo_change_rx_flags(dev, flags);
4603}
4604
4605static int __dev_set_promiscuity(struct net_device *dev, int inc)
4606{
4607 unsigned int old_flags = dev->flags;
4608 kuid_t uid;
4609 kgid_t gid;
4610
4611 ASSERT_RTNL();
4612
4613 dev->flags |= IFF_PROMISC;
4614 dev->promiscuity += inc;
4615 if (dev->promiscuity == 0) {
4616
4617
4618
4619
4620 if (inc < 0)
4621 dev->flags &= ~IFF_PROMISC;
4622 else {
4623 dev->promiscuity -= inc;
4624 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4625 dev->name);
4626 return -EOVERFLOW;
4627 }
4628 }
4629 if (dev->flags != old_flags) {
4630 pr_info("device %s %s promiscuous mode\n",
4631 dev->name,
4632 dev->flags & IFF_PROMISC ? "entered" : "left");
4633 if (audit_enabled) {
4634 current_uid_gid(&uid, &gid);
4635 audit_log(current->audit_context, GFP_ATOMIC,
4636 AUDIT_ANOM_PROMISCUOUS,
4637 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4638 dev->name, (dev->flags & IFF_PROMISC),
4639 (old_flags & IFF_PROMISC),
4640 from_kuid(&init_user_ns, audit_get_loginuid(current)),
4641 from_kuid(&init_user_ns, uid),
4642 from_kgid(&init_user_ns, gid),
4643 audit_get_sessionid(current));
4644 }
4645
4646 dev_change_rx_flags(dev, IFF_PROMISC);
4647 }
4648 return 0;
4649}
4650
4651
4652
4653
4654
4655
4656
4657
4658
4659
4660
4661
4662int dev_set_promiscuity(struct net_device *dev, int inc)
4663{
4664 unsigned int old_flags = dev->flags;
4665 int err;
4666
4667 err = __dev_set_promiscuity(dev, inc);
4668 if (err < 0)
4669 return err;
4670 if (dev->flags != old_flags)
4671 dev_set_rx_mode(dev);
4672 return err;
4673}
4674EXPORT_SYMBOL(dev_set_promiscuity);
4675
4676
4677
4678
4679
4680
4681
4682
4683
4684
4685
4686
4687
4688
4689int dev_set_allmulti(struct net_device *dev, int inc)
4690{
4691 unsigned int old_flags = dev->flags;
4692
4693 ASSERT_RTNL();
4694
4695 dev->flags |= IFF_ALLMULTI;
4696 dev->allmulti += inc;
4697 if (dev->allmulti == 0) {
4698
4699
4700
4701
4702 if (inc < 0)
4703 dev->flags &= ~IFF_ALLMULTI;
4704 else {
4705 dev->allmulti -= inc;
4706 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4707 dev->name);
4708 return -EOVERFLOW;
4709 }
4710 }
4711 if (dev->flags ^ old_flags) {
4712 dev_change_rx_flags(dev, IFF_ALLMULTI);
4713 dev_set_rx_mode(dev);
4714 }
4715 return 0;
4716}
4717EXPORT_SYMBOL(dev_set_allmulti);
4718
4719
4720
4721
4722
4723
4724
4725void __dev_set_rx_mode(struct net_device *dev)
4726{
4727 const struct net_device_ops *ops = dev->netdev_ops;
4728
4729
4730 if (!(dev->flags&IFF_UP))
4731 return;
4732
4733 if (!netif_device_present(dev))
4734 return;
4735
4736 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4737
4738
4739
4740 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4741 __dev_set_promiscuity(dev, 1);
4742 dev->uc_promisc = true;
4743 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4744 __dev_set_promiscuity(dev, -1);
4745 dev->uc_promisc = false;
4746 }
4747 }
4748
4749 if (ops->ndo_set_rx_mode)
4750 ops->ndo_set_rx_mode(dev);
4751}
4752
4753void dev_set_rx_mode(struct net_device *dev)
4754{
4755 netif_addr_lock_bh(dev);
4756 __dev_set_rx_mode(dev);
4757 netif_addr_unlock_bh(dev);
4758}
4759
4760
4761
4762
4763
4764
4765
4766unsigned int dev_get_flags(const struct net_device *dev)
4767{
4768 unsigned int flags;
4769
4770 flags = (dev->flags & ~(IFF_PROMISC |
4771 IFF_ALLMULTI |
4772 IFF_RUNNING |
4773 IFF_LOWER_UP |
4774 IFF_DORMANT)) |
4775 (dev->gflags & (IFF_PROMISC |
4776 IFF_ALLMULTI));
4777
4778 if (netif_running(dev)) {
4779 if (netif_oper_up(dev))
4780 flags |= IFF_RUNNING;
4781 if (netif_carrier_ok(dev))
4782 flags |= IFF_LOWER_UP;
4783 if (netif_dormant(dev))
4784 flags |= IFF_DORMANT;
4785 }
4786
4787 return flags;
4788}
4789EXPORT_SYMBOL(dev_get_flags);
4790
4791int __dev_change_flags(struct net_device *dev, unsigned int flags)
4792{
4793 unsigned int old_flags = dev->flags;
4794 int ret;
4795
4796 ASSERT_RTNL();
4797
4798
4799
4800
4801
4802 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4803 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4804 IFF_AUTOMEDIA)) |
4805 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4806 IFF_ALLMULTI));
4807
4808
4809
4810
4811
4812 if ((old_flags ^ flags) & IFF_MULTICAST)
4813 dev_change_rx_flags(dev, IFF_MULTICAST);
4814
4815 dev_set_rx_mode(dev);
4816
4817
4818
4819
4820
4821
4822
4823 ret = 0;
4824 if ((old_flags ^ flags) & IFF_UP) {
4825 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4826
4827 if (!ret)
4828 dev_set_rx_mode(dev);
4829 }
4830
4831 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4832 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4833
4834 dev->gflags ^= IFF_PROMISC;
4835 dev_set_promiscuity(dev, inc);
4836 }
4837
4838
4839
4840
4841
4842 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4843 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4844
4845 dev->gflags ^= IFF_ALLMULTI;
4846 dev_set_allmulti(dev, inc);
4847 }
4848
4849 return ret;
4850}
4851
4852void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4853{
4854 unsigned int changes = dev->flags ^ old_flags;
4855
4856 if (changes & IFF_UP) {
4857 if (dev->flags & IFF_UP)
4858 call_netdevice_notifiers(NETDEV_UP, dev);
4859 else
4860 call_netdevice_notifiers(NETDEV_DOWN, dev);
4861 }
4862
4863 if (dev->flags & IFF_UP &&
4864 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
4865 struct netdev_notifier_change_info change_info;
4866
4867 change_info.flags_changed = changes;
4868 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
4869 &change_info.info);
4870 }
4871}
4872
4873
4874
4875
4876
4877
4878
4879
4880
4881int dev_change_flags(struct net_device *dev, unsigned int flags)
4882{
4883 int ret;
4884 unsigned int changes, old_flags = dev->flags;
4885
4886 ret = __dev_change_flags(dev, flags);
4887 if (ret < 0)
4888 return ret;
4889
4890 changes = old_flags ^ dev->flags;
4891 if (changes)
4892 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4893
4894 __dev_notify_flags(dev, old_flags);
4895 return ret;
4896}
4897EXPORT_SYMBOL(dev_change_flags);
4898
4899
4900
4901
4902
4903
4904
4905
4906int dev_set_mtu(struct net_device *dev, int new_mtu)
4907{
4908 const struct net_device_ops *ops = dev->netdev_ops;
4909 int err;
4910
4911 if (new_mtu == dev->mtu)
4912 return 0;
4913
4914
4915 if (new_mtu < 0)
4916 return -EINVAL;
4917
4918 if (!netif_device_present(dev))
4919 return -ENODEV;
4920
4921 err = 0;
4922 if (ops->ndo_change_mtu)
4923 err = ops->ndo_change_mtu(dev, new_mtu);
4924 else
4925 dev->mtu = new_mtu;
4926
4927 if (!err)
4928 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4929 return err;
4930}
4931EXPORT_SYMBOL(dev_set_mtu);
4932
4933
4934
4935
4936
4937
4938void dev_set_group(struct net_device *dev, int new_group)
4939{
4940 dev->group = new_group;
4941}
4942EXPORT_SYMBOL(dev_set_group);
4943
4944
4945
4946
4947
4948
4949
4950
4951int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4952{
4953 const struct net_device_ops *ops = dev->netdev_ops;
4954 int err;
4955
4956 if (!ops->ndo_set_mac_address)
4957 return -EOPNOTSUPP;
4958 if (sa->sa_family != dev->type)
4959 return -EINVAL;
4960 if (!netif_device_present(dev))
4961 return -ENODEV;
4962 err = ops->ndo_set_mac_address(dev, sa);
4963 if (err)
4964 return err;
4965 dev->addr_assign_type = NET_ADDR_SET;
4966 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4967 add_device_randomness(dev->dev_addr, dev->addr_len);
4968 return 0;
4969}
4970EXPORT_SYMBOL(dev_set_mac_address);
4971
4972
4973
4974
4975
4976
4977
4978
4979int dev_change_carrier(struct net_device *dev, bool new_carrier)
4980{
4981 const struct net_device_ops *ops = dev->netdev_ops;
4982
4983 if (!ops->ndo_change_carrier)
4984 return -EOPNOTSUPP;
4985 if (!netif_device_present(dev))
4986 return -ENODEV;
4987 return ops->ndo_change_carrier(dev, new_carrier);
4988}
4989EXPORT_SYMBOL(dev_change_carrier);
4990
4991
4992
4993
4994
4995
4996
4997
4998
4999static int dev_new_index(struct net *net)
5000{
5001 int ifindex = net->ifindex;
5002 for (;;) {
5003 if (++ifindex <= 0)
5004 ifindex = 1;
5005 if (!__dev_get_by_index(net, ifindex))
5006 return net->ifindex = ifindex;
5007 }
5008}
5009
5010
5011static LIST_HEAD(net_todo_list);
5012
5013static void net_set_todo(struct net_device *dev)
5014{
5015 list_add_tail(&dev->todo_list, &net_todo_list);
5016}
5017
5018static void rollback_registered_many(struct list_head *head)
5019{
5020 struct net_device *dev, *tmp;
5021
5022 BUG_ON(dev_boot_phase);
5023 ASSERT_RTNL();
5024
5025 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5026
5027
5028
5029
5030 if (dev->reg_state == NETREG_UNINITIALIZED) {
5031 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5032 dev->name, dev);
5033
5034 WARN_ON(1);
5035 list_del(&dev->unreg_list);
5036 continue;
5037 }
5038 dev->dismantle = true;
5039 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5040 }
5041
5042
5043 dev_close_many(head);
5044
5045 list_for_each_entry(dev, head, unreg_list) {
5046
5047 unlist_netdevice(dev);
5048
5049 dev->reg_state = NETREG_UNREGISTERING;
5050 }
5051
5052 synchronize_net();
5053
5054 list_for_each_entry(dev, head, unreg_list) {
5055
5056 dev_shutdown(dev);
5057
5058
5059
5060
5061
5062 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5063
5064 if (!dev->rtnl_link_ops ||
5065 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5066 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5067
5068
5069
5070
5071 dev_uc_flush(dev);
5072 dev_mc_flush(dev);
5073
5074 if (dev->netdev_ops->ndo_uninit)
5075 dev->netdev_ops->ndo_uninit(dev);
5076
5077
5078 WARN_ON(netdev_has_any_upper_dev(dev));
5079
5080
5081 netdev_unregister_kobject(dev);
5082#ifdef CONFIG_XPS
5083
5084 netif_reset_xps_queues_gt(dev, 0);
5085#endif
5086 }
5087
5088 synchronize_net();
5089
5090 list_for_each_entry(dev, head, unreg_list)
5091 dev_put(dev);
5092}
5093
5094static void rollback_registered(struct net_device *dev)
5095{
5096 LIST_HEAD(single);
5097
5098 list_add(&dev->unreg_list, &single);
5099 rollback_registered_many(&single);
5100 list_del(&single);
5101}
5102
5103static netdev_features_t netdev_fix_features(struct net_device *dev,
5104 netdev_features_t features)
5105{
5106
5107 if ((features & NETIF_F_HW_CSUM) &&
5108 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5109 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5110 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5111 }
5112
5113
5114 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5115 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5116 features &= ~NETIF_F_ALL_TSO;
5117 }
5118
5119 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
5120 !(features & NETIF_F_IP_CSUM)) {
5121 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
5122 features &= ~NETIF_F_TSO;
5123 features &= ~NETIF_F_TSO_ECN;
5124 }
5125
5126 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
5127 !(features & NETIF_F_IPV6_CSUM)) {
5128 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
5129 features &= ~NETIF_F_TSO6;
5130 }
5131
5132
5133 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5134 features &= ~NETIF_F_TSO_ECN;
5135
5136
5137 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5138 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5139 features &= ~NETIF_F_GSO;
5140 }
5141
5142
5143 if (features & NETIF_F_UFO) {
5144
5145 if (!((features & NETIF_F_GEN_CSUM) ||
5146 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5147 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5148 netdev_dbg(dev,
5149 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5150 features &= ~NETIF_F_UFO;
5151 }
5152
5153 if (!(features & NETIF_F_SG)) {
5154 netdev_dbg(dev,
5155 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5156 features &= ~NETIF_F_UFO;
5157 }
5158 }
5159
5160 return features;
5161}
5162
5163int __netdev_update_features(struct net_device *dev)
5164{
5165 netdev_features_t features;
5166 int err = 0;
5167
5168 ASSERT_RTNL();
5169
5170 features = netdev_get_wanted_features(dev);
5171
5172 if (dev->netdev_ops->ndo_fix_features)
5173 features = dev->netdev_ops->ndo_fix_features(dev, features);
5174
5175
5176 features = netdev_fix_features(dev, features);
5177
5178 if (dev->features == features)
5179 return 0;
5180
5181 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5182 &dev->features, &features);
5183
5184 if (dev->netdev_ops->ndo_set_features)
5185 err = dev->netdev_ops->ndo_set_features(dev, features);
5186
5187 if (unlikely(err < 0)) {
5188 netdev_err(dev,
5189 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5190 err, &features, &dev->features);
5191 return -1;
5192 }
5193
5194 if (!err)
5195 dev->features = features;
5196
5197 return 1;
5198}
5199
5200
5201
5202
5203
5204
5205
5206
5207
5208void netdev_update_features(struct net_device *dev)
5209{
5210 if (__netdev_update_features(dev))
5211 netdev_features_change(dev);
5212}
5213EXPORT_SYMBOL(netdev_update_features);
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223
5224
5225void netdev_change_features(struct net_device *dev)
5226{
5227 __netdev_update_features(dev);
5228 netdev_features_change(dev);
5229}
5230EXPORT_SYMBOL(netdev_change_features);
5231
5232
5233
5234
5235
5236
5237
5238
5239
5240
5241void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5242 struct net_device *dev)
5243{
5244 if (rootdev->operstate == IF_OPER_DORMANT)
5245 netif_dormant_on(dev);
5246 else
5247 netif_dormant_off(dev);
5248
5249 if (netif_carrier_ok(rootdev)) {
5250 if (!netif_carrier_ok(dev))
5251 netif_carrier_on(dev);
5252 } else {
5253 if (netif_carrier_ok(dev))
5254 netif_carrier_off(dev);
5255 }
5256}
5257EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5258
5259#ifdef CONFIG_RPS
5260static int netif_alloc_rx_queues(struct net_device *dev)
5261{
5262 unsigned int i, count = dev->num_rx_queues;
5263 struct netdev_rx_queue *rx;
5264
5265 BUG_ON(count < 1);
5266
5267 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5268 if (!rx)
5269 return -ENOMEM;
5270
5271 dev->_rx = rx;
5272
5273 for (i = 0; i < count; i++)
5274 rx[i].dev = dev;
5275 return 0;
5276}
5277#endif
5278
5279static void netdev_init_one_queue(struct net_device *dev,
5280 struct netdev_queue *queue, void *_unused)
5281{
5282
5283 spin_lock_init(&queue->_xmit_lock);
5284 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5285 queue->xmit_lock_owner = -1;
5286 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5287 queue->dev = dev;
5288#ifdef CONFIG_BQL
5289 dql_init(&queue->dql, HZ);
5290#endif
5291}
5292
5293static void netif_free_tx_queues(struct net_device *dev)
5294{
5295 if (is_vmalloc_addr(dev->_tx))
5296 vfree(dev->_tx);
5297 else
5298 kfree(dev->_tx);
5299}
5300
5301static int netif_alloc_netdev_queues(struct net_device *dev)
5302{
5303 unsigned int count = dev->num_tx_queues;
5304 struct netdev_queue *tx;
5305 size_t sz = count * sizeof(*tx);
5306
5307 BUG_ON(count < 1 || count > 0xffff);
5308
5309 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
5310 if (!tx) {
5311 tx = vzalloc(sz);
5312 if (!tx)
5313 return -ENOMEM;
5314 }
5315 dev->_tx = tx;
5316
5317 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5318 spin_lock_init(&dev->tx_global_lock);
5319
5320 return 0;
5321}
5322
5323
5324
5325
5326
5327
5328
5329
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339
5340int register_netdevice(struct net_device *dev)
5341{
5342 int ret;
5343 struct net *net = dev_net(dev);
5344
5345 BUG_ON(dev_boot_phase);
5346 ASSERT_RTNL();
5347
5348 might_sleep();
5349
5350
5351 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5352 BUG_ON(!net);
5353
5354 spin_lock_init(&dev->addr_list_lock);
5355 netdev_set_addr_lockdep_class(dev);
5356
5357 dev->iflink = -1;
5358
5359 ret = dev_get_valid_name(net, dev, dev->name);
5360 if (ret < 0)
5361 goto out;
5362
5363
5364 if (dev->netdev_ops->ndo_init) {
5365 ret = dev->netdev_ops->ndo_init(dev);
5366 if (ret) {
5367 if (ret > 0)
5368 ret = -EIO;
5369 goto out;
5370 }
5371 }
5372
5373 if (((dev->hw_features | dev->features) &
5374 NETIF_F_HW_VLAN_CTAG_FILTER) &&
5375 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
5376 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
5377 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
5378 ret = -EINVAL;
5379 goto err_uninit;
5380 }
5381
5382 ret = -EBUSY;
5383 if (!dev->ifindex)
5384 dev->ifindex = dev_new_index(net);
5385 else if (__dev_get_by_index(net, dev->ifindex))
5386 goto err_uninit;
5387
5388 if (dev->iflink == -1)
5389 dev->iflink = dev->ifindex;
5390
5391
5392
5393
5394 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5395 dev->features |= NETIF_F_SOFT_FEATURES;
5396 dev->wanted_features = dev->features & dev->hw_features;
5397
5398
5399 if (!(dev->flags & IFF_LOOPBACK)) {
5400 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5401 if (dev->features & NETIF_F_ALL_CSUM) {
5402 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5403 dev->features |= NETIF_F_NOCACHE_COPY;
5404 }
5405 }
5406
5407
5408
5409 dev->vlan_features |= NETIF_F_HIGHDMA;
5410
5411
5412
5413 dev->hw_enc_features |= NETIF_F_SG;
5414
5415
5416
5417 dev->mpls_features |= NETIF_F_SG;
5418
5419 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5420 ret = notifier_to_errno(ret);
5421 if (ret)
5422 goto err_uninit;
5423
5424 ret = netdev_register_kobject(dev);
5425 if (ret)
5426 goto err_uninit;
5427 dev->reg_state = NETREG_REGISTERED;
5428
5429 __netdev_update_features(dev);
5430
5431
5432
5433
5434
5435
5436 set_bit(__LINK_STATE_PRESENT, &dev->state);
5437
5438 linkwatch_init_dev(dev);
5439
5440 dev_init_scheduler(dev);
5441 dev_hold(dev);
5442 list_netdevice(dev);
5443 add_device_randomness(dev->dev_addr, dev->addr_len);
5444
5445
5446
5447
5448
5449 if (dev->addr_assign_type == NET_ADDR_PERM)
5450 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
5451
5452
5453 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5454 ret = notifier_to_errno(ret);
5455 if (ret) {
5456 rollback_registered(dev);
5457 dev->reg_state = NETREG_UNREGISTERED;
5458 }
5459
5460
5461
5462
5463 if (!dev->rtnl_link_ops ||
5464 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5465 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5466
5467out:
5468 return ret;
5469
5470err_uninit:
5471 if (dev->netdev_ops->ndo_uninit)
5472 dev->netdev_ops->ndo_uninit(dev);
5473 goto out;
5474}
5475EXPORT_SYMBOL(register_netdevice);
5476
5477
5478
5479
5480
5481
5482
5483
5484
5485
5486
5487int init_dummy_netdev(struct net_device *dev)
5488{
5489
5490
5491
5492
5493
5494 memset(dev, 0, sizeof(struct net_device));
5495
5496
5497
5498
5499 dev->reg_state = NETREG_DUMMY;
5500
5501
5502 INIT_LIST_HEAD(&dev->napi_list);
5503
5504
5505 set_bit(__LINK_STATE_PRESENT, &dev->state);
5506 set_bit(__LINK_STATE_START, &dev->state);
5507
5508
5509
5510
5511
5512
5513 return 0;
5514}
5515EXPORT_SYMBOL_GPL(init_dummy_netdev);
5516
5517
5518
5519
5520
5521
5522
5523
5524
5525
5526
5527
5528
5529
5530
5531int register_netdev(struct net_device *dev)
5532{
5533 int err;
5534
5535 rtnl_lock();
5536 err = register_netdevice(dev);
5537 rtnl_unlock();
5538 return err;
5539}
5540EXPORT_SYMBOL(register_netdev);
5541
5542int netdev_refcnt_read(const struct net_device *dev)
5543{
5544 int i, refcnt = 0;
5545
5546 for_each_possible_cpu(i)
5547 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5548 return refcnt;
5549}
5550EXPORT_SYMBOL(netdev_refcnt_read);
5551
5552
5553
5554
5555
5556
5557
5558
5559
5560
5561
5562
5563
5564static void netdev_wait_allrefs(struct net_device *dev)
5565{
5566 unsigned long rebroadcast_time, warning_time;
5567 int refcnt;
5568
5569 linkwatch_forget_dev(dev);
5570
5571 rebroadcast_time = warning_time = jiffies;
5572 refcnt = netdev_refcnt_read(dev);
5573
5574 while (refcnt != 0) {
5575 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5576 rtnl_lock();
5577
5578
5579 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5580
5581 __rtnl_unlock();
5582 rcu_barrier();
5583 rtnl_lock();
5584
5585 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5586 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5587 &dev->state)) {
5588
5589
5590
5591
5592
5593
5594 linkwatch_run_queue();
5595 }
5596
5597 __rtnl_unlock();
5598
5599 rebroadcast_time = jiffies;
5600 }
5601
5602 msleep(250);
5603
5604 refcnt = netdev_refcnt_read(dev);
5605
5606 if (time_after(jiffies, warning_time + 10 * HZ)) {
5607 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5608 dev->name, refcnt);
5609 warning_time = jiffies;
5610 }
5611 }
5612}
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624
5625
5626
5627
5628
5629
5630
5631
5632
5633
5634
5635
5636
5637
5638void netdev_run_todo(void)
5639{
5640 struct list_head list;
5641
5642
5643 list_replace_init(&net_todo_list, &list);
5644
5645 __rtnl_unlock();
5646
5647
5648
5649 if (!list_empty(&list))
5650 rcu_barrier();
5651
5652 while (!list_empty(&list)) {
5653 struct net_device *dev
5654 = list_first_entry(&list, struct net_device, todo_list);
5655 list_del(&dev->todo_list);
5656
5657 rtnl_lock();
5658 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5659 __rtnl_unlock();
5660
5661 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5662 pr_err("network todo '%s' but state %d\n",
5663 dev->name, dev->reg_state);
5664 dump_stack();
5665 continue;
5666 }
5667
5668 dev->reg_state = NETREG_UNREGISTERED;
5669
5670 on_each_cpu(flush_backlog, dev, 1);
5671
5672 netdev_wait_allrefs(dev);
5673
5674
5675 BUG_ON(netdev_refcnt_read(dev));
5676 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5677 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5678 WARN_ON(dev->dn_ptr);
5679
5680 if (dev->destructor)
5681 dev->destructor(dev);
5682
5683
5684 kobject_put(&dev->dev.kobj);
5685 }
5686}
5687
5688
5689
5690
5691void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5692 const struct net_device_stats *netdev_stats)
5693{
5694#if BITS_PER_LONG == 64
5695 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5696 memcpy(stats64, netdev_stats, sizeof(*stats64));
5697#else
5698 size_t i, n = sizeof(*stats64) / sizeof(u64);
5699 const unsigned long *src = (const unsigned long *)netdev_stats;
5700 u64 *dst = (u64 *)stats64;
5701
5702 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5703 sizeof(*stats64) / sizeof(u64));
5704 for (i = 0; i < n; i++)
5705 dst[i] = src[i];
5706#endif
5707}
5708EXPORT_SYMBOL(netdev_stats_to_stats64);
5709
5710
5711
5712
5713
5714
5715
5716
5717
5718
5719
5720struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5721 struct rtnl_link_stats64 *storage)
5722{
5723 const struct net_device_ops *ops = dev->netdev_ops;
5724
5725 if (ops->ndo_get_stats64) {
5726 memset(storage, 0, sizeof(*storage));
5727 ops->ndo_get_stats64(dev, storage);
5728 } else if (ops->ndo_get_stats) {
5729 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5730 } else {
5731 netdev_stats_to_stats64(storage, &dev->stats);
5732 }
5733 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5734 return storage;
5735}
5736EXPORT_SYMBOL(dev_get_stats);
5737
5738struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5739{
5740 struct netdev_queue *queue = dev_ingress_queue(dev);
5741
5742#ifdef CONFIG_NET_CLS_ACT
5743 if (queue)
5744 return queue;
5745 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5746 if (!queue)
5747 return NULL;
5748 netdev_init_one_queue(dev, queue, NULL);
5749 queue->qdisc = &noop_qdisc;
5750 queue->qdisc_sleeping = &noop_qdisc;
5751 rcu_assign_pointer(dev->ingress_queue, queue);
5752#endif
5753 return queue;
5754}
5755
5756static const struct ethtool_ops default_ethtool_ops;
5757
5758void netdev_set_default_ethtool_ops(struct net_device *dev,
5759 const struct ethtool_ops *ops)
5760{
5761 if (dev->ethtool_ops == &default_ethtool_ops)
5762 dev->ethtool_ops = ops;
5763}
5764EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
5765
5766
5767
5768
5769
5770
5771
5772
5773
5774
5775
5776
5777
5778struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5779 void (*setup)(struct net_device *),
5780 unsigned int txqs, unsigned int rxqs)
5781{
5782 struct net_device *dev;
5783 size_t alloc_size;
5784 struct net_device *p;
5785
5786 BUG_ON(strlen(name) >= sizeof(dev->name));
5787
5788 if (txqs < 1) {
5789 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
5790 return NULL;
5791 }
5792
5793#ifdef CONFIG_RPS
5794 if (rxqs < 1) {
5795 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
5796 return NULL;
5797 }
5798#endif
5799
5800 alloc_size = sizeof(struct net_device);
5801 if (sizeof_priv) {
5802
5803 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
5804 alloc_size += sizeof_priv;
5805 }
5806
5807 alloc_size += NETDEV_ALIGN - 1;
5808
5809 p = kzalloc(alloc_size, GFP_KERNEL);
5810 if (!p)
5811 return NULL;
5812
5813 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5814 dev->padded = (char *)dev - (char *)p;
5815
5816 dev->pcpu_refcnt = alloc_percpu(int);
5817 if (!dev->pcpu_refcnt)
5818 goto free_p;
5819
5820 if (dev_addr_init(dev))
5821 goto free_pcpu;
5822
5823 dev_mc_init(dev);
5824 dev_uc_init(dev);
5825
5826 dev_net_set(dev, &init_net);
5827
5828 dev->gso_max_size = GSO_MAX_SIZE;
5829 dev->gso_max_segs = GSO_MAX_SEGS;
5830
5831 INIT_LIST_HEAD(&dev->napi_list);
5832 INIT_LIST_HEAD(&dev->unreg_list);
5833 INIT_LIST_HEAD(&dev->link_watch_list);
5834 INIT_LIST_HEAD(&dev->upper_dev_list);
5835 dev->priv_flags = IFF_XMIT_DST_RELEASE;
5836 setup(dev);
5837
5838 dev->num_tx_queues = txqs;
5839 dev->real_num_tx_queues = txqs;
5840 if (netif_alloc_netdev_queues(dev))
5841 goto free_all;
5842
5843#ifdef CONFIG_RPS
5844 dev->num_rx_queues = rxqs;
5845 dev->real_num_rx_queues = rxqs;
5846 if (netif_alloc_rx_queues(dev))
5847 goto free_all;
5848#endif
5849
5850 strcpy(dev->name, name);
5851 dev->group = INIT_NETDEV_GROUP;
5852 if (!dev->ethtool_ops)
5853 dev->ethtool_ops = &default_ethtool_ops;
5854 return dev;
5855
5856free_all:
5857 free_netdev(dev);
5858 return NULL;
5859
5860free_pcpu:
5861 free_percpu(dev->pcpu_refcnt);
5862 netif_free_tx_queues(dev);
5863#ifdef CONFIG_RPS
5864 kfree(dev->_rx);
5865#endif
5866
5867free_p:
5868 kfree(p);
5869 return NULL;
5870}
5871EXPORT_SYMBOL(alloc_netdev_mqs);
5872
5873
5874
5875
5876
5877
5878
5879
5880
5881void free_netdev(struct net_device *dev)
5882{
5883 struct napi_struct *p, *n;
5884
5885 release_net(dev_net(dev));
5886
5887 netif_free_tx_queues(dev);
5888#ifdef CONFIG_RPS
5889 kfree(dev->_rx);
5890#endif
5891
5892 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
5893
5894
5895 dev_addr_flush(dev);
5896
5897 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5898 netif_napi_del(p);
5899
5900 free_percpu(dev->pcpu_refcnt);
5901 dev->pcpu_refcnt = NULL;
5902
5903
5904 if (dev->reg_state == NETREG_UNINITIALIZED) {
5905 kfree((char *)dev - dev->padded);
5906 return;
5907 }
5908
5909 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5910 dev->reg_state = NETREG_RELEASED;
5911
5912
5913 put_device(&dev->dev);
5914}
5915EXPORT_SYMBOL(free_netdev);
5916
5917
5918
5919
5920
5921
5922
5923void synchronize_net(void)
5924{
5925 might_sleep();
5926 if (rtnl_is_locked())
5927 synchronize_rcu_expedited();
5928 else
5929 synchronize_rcu();
5930}
5931EXPORT_SYMBOL(synchronize_net);
5932
5933
5934
5935
5936
5937
5938
5939
5940
5941
5942
5943
5944
5945
5946void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
5947{
5948 ASSERT_RTNL();
5949
5950 if (head) {
5951 list_move_tail(&dev->unreg_list, head);
5952 } else {
5953 rollback_registered(dev);
5954
5955 net_set_todo(dev);
5956 }
5957}
5958EXPORT_SYMBOL(unregister_netdevice_queue);
5959
5960
5961
5962
5963
5964void unregister_netdevice_many(struct list_head *head)
5965{
5966 struct net_device *dev;
5967
5968 if (!list_empty(head)) {
5969 rollback_registered_many(head);
5970 list_for_each_entry(dev, head, unreg_list)
5971 net_set_todo(dev);
5972 }
5973}
5974EXPORT_SYMBOL(unregister_netdevice_many);
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987void unregister_netdev(struct net_device *dev)
5988{
5989 rtnl_lock();
5990 unregister_netdevice(dev);
5991 rtnl_unlock();
5992}
5993EXPORT_SYMBOL(unregister_netdev);
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004
6005
6006
6007
6008
6009int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6010{
6011 int err;
6012
6013 ASSERT_RTNL();
6014
6015
6016 err = -EINVAL;
6017 if (dev->features & NETIF_F_NETNS_LOCAL)
6018 goto out;
6019
6020
6021 if (dev->reg_state != NETREG_REGISTERED)
6022 goto out;
6023
6024
6025 err = 0;
6026 if (net_eq(dev_net(dev), net))
6027 goto out;
6028
6029
6030
6031
6032 err = -EEXIST;
6033 if (__dev_get_by_name(net, dev->name)) {
6034
6035 if (!pat)
6036 goto out;
6037 if (dev_get_valid_name(net, dev, pat) < 0)
6038 goto out;
6039 }
6040
6041
6042
6043
6044
6045
6046 dev_close(dev);
6047
6048
6049 err = -ENODEV;
6050 unlist_netdevice(dev);
6051
6052 synchronize_net();
6053
6054
6055 dev_shutdown(dev);
6056
6057
6058
6059
6060
6061
6062
6063
6064 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6065 rcu_barrier();
6066 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6067 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6068
6069
6070
6071
6072 dev_uc_flush(dev);
6073 dev_mc_flush(dev);
6074
6075
6076 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6077
6078
6079 dev_net_set(dev, net);
6080
6081
6082 if (__dev_get_by_index(net, dev->ifindex)) {
6083 int iflink = (dev->iflink == dev->ifindex);
6084 dev->ifindex = dev_new_index(net);
6085 if (iflink)
6086 dev->iflink = dev->ifindex;
6087 }
6088
6089
6090 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6091
6092
6093 err = device_rename(&dev->dev, dev->name);
6094 WARN_ON(err);
6095
6096
6097 list_netdevice(dev);
6098
6099
6100 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6101
6102
6103
6104
6105
6106 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6107
6108 synchronize_net();
6109 err = 0;
6110out:
6111 return err;
6112}
6113EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6114
6115static int dev_cpu_callback(struct notifier_block *nfb,
6116 unsigned long action,
6117 void *ocpu)
6118{
6119 struct sk_buff **list_skb;
6120 struct sk_buff *skb;
6121 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6122 struct softnet_data *sd, *oldsd;
6123
6124 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6125 return NOTIFY_OK;
6126
6127 local_irq_disable();
6128 cpu = smp_processor_id();
6129 sd = &per_cpu(softnet_data, cpu);
6130 oldsd = &per_cpu(softnet_data, oldcpu);
6131
6132
6133 list_skb = &sd->completion_queue;
6134 while (*list_skb)
6135 list_skb = &(*list_skb)->next;
6136
6137 *list_skb = oldsd->completion_queue;
6138 oldsd->completion_queue = NULL;
6139
6140
6141 if (oldsd->output_queue) {
6142 *sd->output_queue_tailp = oldsd->output_queue;
6143 sd->output_queue_tailp = oldsd->output_queue_tailp;
6144 oldsd->output_queue = NULL;
6145 oldsd->output_queue_tailp = &oldsd->output_queue;
6146 }
6147
6148 if (!list_empty(&oldsd->poll_list)) {
6149 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6150 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6151 }
6152
6153 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6154 local_irq_enable();
6155
6156
6157 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6158 netif_rx(skb);
6159 input_queue_head_incr(oldsd);
6160 }
6161 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6162 netif_rx(skb);
6163 input_queue_head_incr(oldsd);
6164 }
6165
6166 return NOTIFY_OK;
6167}
6168
6169
6170
6171
6172
6173
6174
6175
6176
6177
6178
6179
6180netdev_features_t netdev_increment_features(netdev_features_t all,
6181 netdev_features_t one, netdev_features_t mask)
6182{
6183 if (mask & NETIF_F_GEN_CSUM)
6184 mask |= NETIF_F_ALL_CSUM;
6185 mask |= NETIF_F_VLAN_CHALLENGED;
6186
6187 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6188 all &= one | ~NETIF_F_ALL_FOR_ALL;
6189
6190
6191 if (all & NETIF_F_GEN_CSUM)
6192 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6193
6194 return all;
6195}
6196EXPORT_SYMBOL(netdev_increment_features);
6197
6198static struct hlist_head * __net_init netdev_create_hash(void)
6199{
6200 int i;
6201 struct hlist_head *hash;
6202
6203 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6204 if (hash != NULL)
6205 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6206 INIT_HLIST_HEAD(&hash[i]);
6207
6208 return hash;
6209}
6210
6211
6212static int __net_init netdev_init(struct net *net)
6213{
6214 if (net != &init_net)
6215 INIT_LIST_HEAD(&net->dev_base_head);
6216
6217 net->dev_name_head = netdev_create_hash();
6218 if (net->dev_name_head == NULL)
6219 goto err_name;
6220
6221 net->dev_index_head = netdev_create_hash();
6222 if (net->dev_index_head == NULL)
6223 goto err_idx;
6224
6225 return 0;
6226
6227err_idx:
6228 kfree(net->dev_name_head);
6229err_name:
6230 return -ENOMEM;
6231}
6232
6233
6234
6235
6236
6237
6238
6239const char *netdev_drivername(const struct net_device *dev)
6240{
6241 const struct device_driver *driver;
6242 const struct device *parent;
6243 const char *empty = "";
6244
6245 parent = dev->dev.parent;
6246 if (!parent)
6247 return empty;
6248
6249 driver = parent->driver;
6250 if (driver && driver->name)
6251 return driver->name;
6252 return empty;
6253}
6254
6255static int __netdev_printk(const char *level, const struct net_device *dev,
6256 struct va_format *vaf)
6257{
6258 int r;
6259
6260 if (dev && dev->dev.parent) {
6261 r = dev_printk_emit(level[1] - '0',
6262 dev->dev.parent,
6263 "%s %s %s: %pV",
6264 dev_driver_string(dev->dev.parent),
6265 dev_name(dev->dev.parent),
6266 netdev_name(dev), vaf);
6267 } else if (dev) {
6268 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6269 } else {
6270 r = printk("%s(NULL net_device): %pV", level, vaf);
6271 }
6272
6273 return r;
6274}
6275
6276int netdev_printk(const char *level, const struct net_device *dev,
6277 const char *format, ...)
6278{
6279 struct va_format vaf;
6280 va_list args;
6281 int r;
6282
6283 va_start(args, format);
6284
6285 vaf.fmt = format;
6286 vaf.va = &args;
6287
6288 r = __netdev_printk(level, dev, &vaf);
6289
6290 va_end(args);
6291
6292 return r;
6293}
6294EXPORT_SYMBOL(netdev_printk);
6295
6296#define define_netdev_printk_level(func, level) \
6297int func(const struct net_device *dev, const char *fmt, ...) \
6298{ \
6299 int r; \
6300 struct va_format vaf; \
6301 va_list args; \
6302 \
6303 va_start(args, fmt); \
6304 \
6305 vaf.fmt = fmt; \
6306 vaf.va = &args; \
6307 \
6308 r = __netdev_printk(level, dev, &vaf); \
6309 \
6310 va_end(args); \
6311 \
6312 return r; \
6313} \
6314EXPORT_SYMBOL(func);
6315
6316define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6317define_netdev_printk_level(netdev_alert, KERN_ALERT);
6318define_netdev_printk_level(netdev_crit, KERN_CRIT);
6319define_netdev_printk_level(netdev_err, KERN_ERR);
6320define_netdev_printk_level(netdev_warn, KERN_WARNING);
6321define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6322define_netdev_printk_level(netdev_info, KERN_INFO);
6323
6324static void __net_exit netdev_exit(struct net *net)
6325{
6326 kfree(net->dev_name_head);
6327 kfree(net->dev_index_head);
6328}
6329
6330static struct pernet_operations __net_initdata netdev_net_ops = {
6331 .init = netdev_init,
6332 .exit = netdev_exit,
6333};
6334
6335static void __net_exit default_device_exit(struct net *net)
6336{
6337 struct net_device *dev, *aux;
6338
6339
6340
6341
6342 rtnl_lock();
6343 for_each_netdev_safe(net, dev, aux) {
6344 int err;
6345 char fb_name[IFNAMSIZ];
6346
6347
6348 if (dev->features & NETIF_F_NETNS_LOCAL)
6349 continue;
6350
6351
6352 if (dev->rtnl_link_ops)
6353 continue;
6354
6355
6356 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6357 err = dev_change_net_namespace(dev, &init_net, fb_name);
6358 if (err) {
6359 pr_emerg("%s: failed to move %s to init_net: %d\n",
6360 __func__, dev->name, err);
6361 BUG();
6362 }
6363 }
6364 rtnl_unlock();
6365}
6366
6367static void __net_exit default_device_exit_batch(struct list_head *net_list)
6368{
6369
6370
6371
6372
6373
6374 struct net_device *dev;
6375 struct net *net;
6376 LIST_HEAD(dev_kill_list);
6377
6378 rtnl_lock();
6379 list_for_each_entry(net, net_list, exit_list) {
6380 for_each_netdev_reverse(net, dev) {
6381 if (dev->rtnl_link_ops)
6382 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6383 else
6384 unregister_netdevice_queue(dev, &dev_kill_list);
6385 }
6386 }
6387 unregister_netdevice_many(&dev_kill_list);
6388 list_del(&dev_kill_list);
6389 rtnl_unlock();
6390}
6391
6392static struct pernet_operations __net_initdata default_device_ops = {
6393 .exit = default_device_exit,
6394 .exit_batch = default_device_exit_batch,
6395};
6396
6397
6398
6399
6400
6401
6402
6403
6404
6405
6406
6407
6408static int __init net_dev_init(void)
6409{
6410 int i, rc = -ENOMEM;
6411
6412 BUG_ON(!dev_boot_phase);
6413
6414 if (dev_proc_init())
6415 goto out;
6416
6417 if (netdev_kobject_init())
6418 goto out;
6419
6420 INIT_LIST_HEAD(&ptype_all);
6421 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6422 INIT_LIST_HEAD(&ptype_base[i]);
6423
6424 INIT_LIST_HEAD(&offload_base);
6425
6426 if (register_pernet_subsys(&netdev_net_ops))
6427 goto out;
6428
6429
6430
6431
6432
6433 for_each_possible_cpu(i) {
6434 struct softnet_data *sd = &per_cpu(softnet_data, i);
6435
6436 memset(sd, 0, sizeof(*sd));
6437 skb_queue_head_init(&sd->input_pkt_queue);
6438 skb_queue_head_init(&sd->process_queue);
6439 sd->completion_queue = NULL;
6440 INIT_LIST_HEAD(&sd->poll_list);
6441 sd->output_queue = NULL;
6442 sd->output_queue_tailp = &sd->output_queue;
6443#ifdef CONFIG_RPS
6444 sd->csd.func = rps_trigger_softirq;
6445 sd->csd.info = sd;
6446 sd->csd.flags = 0;
6447 sd->cpu = i;
6448#endif
6449
6450 sd->backlog.poll = process_backlog;
6451 sd->backlog.weight = weight_p;
6452 sd->backlog.gro_list = NULL;
6453 sd->backlog.gro_count = 0;
6454
6455#ifdef CONFIG_NET_FLOW_LIMIT
6456 sd->flow_limit = NULL;
6457#endif
6458 }
6459
6460 dev_boot_phase = 0;
6461
6462
6463
6464
6465
6466
6467
6468
6469
6470
6471 if (register_pernet_device(&loopback_net_ops))
6472 goto out;
6473
6474 if (register_pernet_device(&default_device_ops))
6475 goto out;
6476
6477 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6478 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6479
6480 hotcpu_notifier(dev_cpu_callback, 0);
6481 dst_init();
6482 rc = 0;
6483out:
6484 return rc;
6485}
6486
6487subsys_initcall(net_dev_init);
6488