1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <net/busy_poll.h>
100#include <linux/rtnetlink.h>
101#include <linux/stat.h>
102#include <net/dst.h>
103#include <net/dst_metadata.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <net/xfrm.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/module.h>
110#include <linux/netpoll.h>
111#include <linux/rcupdate.h>
112#include <linux/delay.h>
113#include <net/iw_handler.h>
114#include <asm/current.h>
115#include <linux/audit.h>
116#include <linux/dmaengine.h>
117#include <linux/err.h>
118#include <linux/ctype.h>
119#include <linux/if_arp.h>
120#include <linux/if_vlan.h>
121#include <linux/ip.h>
122#include <net/ip.h>
123#include <net/mpls.h>
124#include <linux/ipv6.h>
125#include <linux/in.h>
126#include <linux/jhash.h>
127#include <linux/random.h>
128#include <trace/events/napi.h>
129#include <trace/events/net.h>
130#include <trace/events/skb.h>
131#include <linux/pci.h>
132#include <linux/inetdevice.h>
133#include <linux/cpu_rmap.h>
134#include <linux/static_key.h>
135#include <linux/hashtable.h>
136#include <linux/vmalloc.h>
137#include <linux/if_macvlan.h>
138#include <linux/errqueue.h>
139#include <linux/hrtimer.h>
140#include <linux/netfilter_ingress.h>
141#include <linux/sctp.h>
142
143#include "net-sysfs.h"
144
145
146#define MAX_GRO_SKBS 8
147
148
149#define GRO_MAX_HEAD (MAX_HEADER + 128)
150
151static DEFINE_SPINLOCK(ptype_lock);
152static DEFINE_SPINLOCK(offload_lock);
153struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
154struct list_head ptype_all __read_mostly;
155static struct list_head offload_base __read_mostly;
156
157static int netif_rx_internal(struct sk_buff *skb);
158static int call_netdevice_notifiers_info(unsigned long val,
159 struct net_device *dev,
160 struct netdev_notifier_info *info);
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181DEFINE_RWLOCK(dev_base_lock);
182EXPORT_SYMBOL(dev_base_lock);
183
184
185static DEFINE_SPINLOCK(napi_hash_lock);
186
187static unsigned int napi_gen_id = NR_CPUS;
188static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
189
190static seqcount_t devnet_rename_seq;
191
192static inline void dev_base_seq_inc(struct net *net)
193{
194 while (++net->dev_base_seq == 0);
195}
196
197static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
198{
199 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
200
201 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
202}
203
204static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
205{
206 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
207}
208
209static inline void rps_lock(struct softnet_data *sd)
210{
211#ifdef CONFIG_RPS
212 spin_lock(&sd->input_pkt_queue.lock);
213#endif
214}
215
216static inline void rps_unlock(struct softnet_data *sd)
217{
218#ifdef CONFIG_RPS
219 spin_unlock(&sd->input_pkt_queue.lock);
220#endif
221}
222
223
224static void list_netdevice(struct net_device *dev)
225{
226 struct net *net = dev_net(dev);
227
228 ASSERT_RTNL();
229
230 write_lock_bh(&dev_base_lock);
231 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head_rcu(&dev->index_hlist,
234 dev_index_hash(net, dev->ifindex));
235 write_unlock_bh(&dev_base_lock);
236
237 dev_base_seq_inc(net);
238}
239
240
241
242
243static void unlist_netdevice(struct net_device *dev)
244{
245 ASSERT_RTNL();
246
247
248 write_lock_bh(&dev_base_lock);
249 list_del_rcu(&dev->dev_list);
250 hlist_del_rcu(&dev->name_hlist);
251 hlist_del_rcu(&dev->index_hlist);
252 write_unlock_bh(&dev_base_lock);
253
254 dev_base_seq_inc(dev_net(dev));
255}
256
257
258
259
260
261static RAW_NOTIFIER_HEAD(netdev_chain);
262
263
264
265
266
267
268DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
269EXPORT_PER_CPU_SYMBOL(softnet_data);
270
271#ifdef CONFIG_LOCKDEP
272
273
274
275
276static const unsigned short netdev_lock_type[] =
277 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
278 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
279 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
280 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
281 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
282 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
283 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
284 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
285 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
286 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
287 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
288 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
289 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
290 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
291 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
292
293static const char *const netdev_lock_name[] =
294 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
295 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
296 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
297 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
298 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
299 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
300 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
301 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
302 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
303 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
304 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
305 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
306 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
307 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
308 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
309
310static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
311static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
312
313static inline unsigned short netdev_lock_pos(unsigned short dev_type)
314{
315 int i;
316
317 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
318 if (netdev_lock_type[i] == dev_type)
319 return i;
320
321 return ARRAY_SIZE(netdev_lock_type) - 1;
322}
323
324static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
325 unsigned short dev_type)
326{
327 int i;
328
329 i = netdev_lock_pos(dev_type);
330 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
331 netdev_lock_name[i]);
332}
333
334static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
335{
336 int i;
337
338 i = netdev_lock_pos(dev->type);
339 lockdep_set_class_and_name(&dev->addr_list_lock,
340 &netdev_addr_lock_key[i],
341 netdev_lock_name[i]);
342}
343#else
344static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
345 unsigned short dev_type)
346{
347}
348static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
349{
350}
351#endif
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375static inline struct list_head *ptype_head(const struct packet_type *pt)
376{
377 if (pt->type == htons(ETH_P_ALL))
378 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
379 else
380 return pt->dev ? &pt->dev->ptype_specific :
381 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
382}
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397void dev_add_pack(struct packet_type *pt)
398{
399 struct list_head *head = ptype_head(pt);
400
401 spin_lock(&ptype_lock);
402 list_add_rcu(&pt->list, head);
403 spin_unlock(&ptype_lock);
404}
405EXPORT_SYMBOL(dev_add_pack);
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420void __dev_remove_pack(struct packet_type *pt)
421{
422 struct list_head *head = ptype_head(pt);
423 struct packet_type *pt1;
424
425 spin_lock(&ptype_lock);
426
427 list_for_each_entry(pt1, head, list) {
428 if (pt == pt1) {
429 list_del_rcu(&pt->list);
430 goto out;
431 }
432 }
433
434 pr_warn("dev_remove_pack: %p not found\n", pt);
435out:
436 spin_unlock(&ptype_lock);
437}
438EXPORT_SYMBOL(__dev_remove_pack);
439
440
441
442
443
444
445
446
447
448
449
450
451
452void dev_remove_pack(struct packet_type *pt)
453{
454 __dev_remove_pack(pt);
455
456 synchronize_net();
457}
458EXPORT_SYMBOL(dev_remove_pack);
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473void dev_add_offload(struct packet_offload *po)
474{
475 struct packet_offload *elem;
476
477 spin_lock(&offload_lock);
478 list_for_each_entry(elem, &offload_base, list) {
479 if (po->priority < elem->priority)
480 break;
481 }
482 list_add_rcu(&po->list, elem->list.prev);
483 spin_unlock(&offload_lock);
484}
485EXPORT_SYMBOL(dev_add_offload);
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500static void __dev_remove_offload(struct packet_offload *po)
501{
502 struct list_head *head = &offload_base;
503 struct packet_offload *po1;
504
505 spin_lock(&offload_lock);
506
507 list_for_each_entry(po1, head, list) {
508 if (po == po1) {
509 list_del_rcu(&po->list);
510 goto out;
511 }
512 }
513
514 pr_warn("dev_remove_offload: %p not found\n", po);
515out:
516 spin_unlock(&offload_lock);
517}
518
519
520
521
522
523
524
525
526
527
528
529
530
531void dev_remove_offload(struct packet_offload *po)
532{
533 __dev_remove_offload(po);
534
535 synchronize_net();
536}
537EXPORT_SYMBOL(dev_remove_offload);
538
539
540
541
542
543
544
545
546static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
547
548
549
550
551
552
553
554
555
556
557static int netdev_boot_setup_add(char *name, struct ifmap *map)
558{
559 struct netdev_boot_setup *s;
560 int i;
561
562 s = dev_boot_setup;
563 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
564 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
565 memset(s[i].name, 0, sizeof(s[i].name));
566 strlcpy(s[i].name, name, IFNAMSIZ);
567 memcpy(&s[i].map, map, sizeof(s[i].map));
568 break;
569 }
570 }
571
572 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
573}
574
575
576
577
578
579
580
581
582
583
584int netdev_boot_setup_check(struct net_device *dev)
585{
586 struct netdev_boot_setup *s = dev_boot_setup;
587 int i;
588
589 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
590 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
591 !strcmp(dev->name, s[i].name)) {
592 dev->irq = s[i].map.irq;
593 dev->base_addr = s[i].map.base_addr;
594 dev->mem_start = s[i].map.mem_start;
595 dev->mem_end = s[i].map.mem_end;
596 return 1;
597 }
598 }
599 return 0;
600}
601EXPORT_SYMBOL(netdev_boot_setup_check);
602
603
604
605
606
607
608
609
610
611
612
613
614unsigned long netdev_boot_base(const char *prefix, int unit)
615{
616 const struct netdev_boot_setup *s = dev_boot_setup;
617 char name[IFNAMSIZ];
618 int i;
619
620 sprintf(name, "%s%d", prefix, unit);
621
622
623
624
625
626 if (__dev_get_by_name(&init_net, name))
627 return 1;
628
629 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
630 if (!strcmp(name, s[i].name))
631 return s[i].map.base_addr;
632 return 0;
633}
634
635
636
637
638int __init netdev_boot_setup(char *str)
639{
640 int ints[5];
641 struct ifmap map;
642
643 str = get_options(str, ARRAY_SIZE(ints), ints);
644 if (!str || !*str)
645 return 0;
646
647
648 memset(&map, 0, sizeof(map));
649 if (ints[0] > 0)
650 map.irq = ints[1];
651 if (ints[0] > 1)
652 map.base_addr = ints[2];
653 if (ints[0] > 2)
654 map.mem_start = ints[3];
655 if (ints[0] > 3)
656 map.mem_end = ints[4];
657
658
659 return netdev_boot_setup_add(str, &map);
660}
661
662__setup("netdev=", netdev_boot_setup);
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678int dev_get_iflink(const struct net_device *dev)
679{
680 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
681 return dev->netdev_ops->ndo_get_iflink(dev);
682
683 return dev->ifindex;
684}
685EXPORT_SYMBOL(dev_get_iflink);
686
687
688
689
690
691
692
693
694
695
696int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
697{
698 struct ip_tunnel_info *info;
699
700 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
701 return -EINVAL;
702
703 info = skb_tunnel_info_unclone(skb);
704 if (!info)
705 return -ENOMEM;
706 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
707 return -EINVAL;
708
709 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
710}
711EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
712
713
714
715
716
717
718
719
720
721
722
723
724
725struct net_device *__dev_get_by_name(struct net *net, const char *name)
726{
727 struct net_device *dev;
728 struct hlist_head *head = dev_name_hash(net, name);
729
730 hlist_for_each_entry(dev, head, name_hlist)
731 if (!strncmp(dev->name, name, IFNAMSIZ))
732 return dev;
733
734 return NULL;
735}
736EXPORT_SYMBOL(__dev_get_by_name);
737
738
739
740
741
742
743
744
745
746
747
748
749
750struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
751{
752 struct net_device *dev;
753 struct hlist_head *head = dev_name_hash(net, name);
754
755 hlist_for_each_entry_rcu(dev, head, name_hlist)
756 if (!strncmp(dev->name, name, IFNAMSIZ))
757 return dev;
758
759 return NULL;
760}
761EXPORT_SYMBOL(dev_get_by_name_rcu);
762
763
764
765
766
767
768
769
770
771
772
773
774
775struct net_device *dev_get_by_name(struct net *net, const char *name)
776{
777 struct net_device *dev;
778
779 rcu_read_lock();
780 dev = dev_get_by_name_rcu(net, name);
781 if (dev)
782 dev_hold(dev);
783 rcu_read_unlock();
784 return dev;
785}
786EXPORT_SYMBOL(dev_get_by_name);
787
788
789
790
791
792
793
794
795
796
797
798
799
800struct net_device *__dev_get_by_index(struct net *net, int ifindex)
801{
802 struct net_device *dev;
803 struct hlist_head *head = dev_index_hash(net, ifindex);
804
805 hlist_for_each_entry(dev, head, index_hlist)
806 if (dev->ifindex == ifindex)
807 return dev;
808
809 return NULL;
810}
811EXPORT_SYMBOL(__dev_get_by_index);
812
813
814
815
816
817
818
819
820
821
822
823
824struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
825{
826 struct net_device *dev;
827 struct hlist_head *head = dev_index_hash(net, ifindex);
828
829 hlist_for_each_entry_rcu(dev, head, index_hlist)
830 if (dev->ifindex == ifindex)
831 return dev;
832
833 return NULL;
834}
835EXPORT_SYMBOL(dev_get_by_index_rcu);
836
837
838
839
840
841
842
843
844
845
846
847
848
849struct net_device *dev_get_by_index(struct net *net, int ifindex)
850{
851 struct net_device *dev;
852
853 rcu_read_lock();
854 dev = dev_get_by_index_rcu(net, ifindex);
855 if (dev)
856 dev_hold(dev);
857 rcu_read_unlock();
858 return dev;
859}
860EXPORT_SYMBOL(dev_get_by_index);
861
862
863
864
865
866
867
868
869
870
871
872int netdev_get_name(struct net *net, char *name, int ifindex)
873{
874 struct net_device *dev;
875 unsigned int seq;
876
877retry:
878 seq = raw_seqcount_begin(&devnet_rename_seq);
879 rcu_read_lock();
880 dev = dev_get_by_index_rcu(net, ifindex);
881 if (!dev) {
882 rcu_read_unlock();
883 return -ENODEV;
884 }
885
886 strcpy(name, dev->name);
887 rcu_read_unlock();
888 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
889 cond_resched();
890 goto retry;
891 }
892
893 return 0;
894}
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
911 const char *ha)
912{
913 struct net_device *dev;
914
915 for_each_netdev_rcu(net, dev)
916 if (dev->type == type &&
917 !memcmp(dev->dev_addr, ha, dev->addr_len))
918 return dev;
919
920 return NULL;
921}
922EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
923
924struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
925{
926 struct net_device *dev;
927
928 ASSERT_RTNL();
929 for_each_netdev(net, dev)
930 if (dev->type == type)
931 return dev;
932
933 return NULL;
934}
935EXPORT_SYMBOL(__dev_getfirstbyhwtype);
936
937struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
938{
939 struct net_device *dev, *ret = NULL;
940
941 rcu_read_lock();
942 for_each_netdev_rcu(net, dev)
943 if (dev->type == type) {
944 dev_hold(dev);
945 ret = dev;
946 break;
947 }
948 rcu_read_unlock();
949 return ret;
950}
951EXPORT_SYMBOL(dev_getfirstbyhwtype);
952
953
954
955
956
957
958
959
960
961
962
963
964struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
965 unsigned short mask)
966{
967 struct net_device *dev, *ret;
968
969 ASSERT_RTNL();
970
971 ret = NULL;
972 for_each_netdev(net, dev) {
973 if (((dev->flags ^ if_flags) & mask) == 0) {
974 ret = dev;
975 break;
976 }
977 }
978 return ret;
979}
980EXPORT_SYMBOL(__dev_get_by_flags);
981
982
983
984
985
986
987
988
989
990bool dev_valid_name(const char *name)
991{
992 if (*name == '\0')
993 return false;
994 if (strlen(name) >= IFNAMSIZ)
995 return false;
996 if (!strcmp(name, ".") || !strcmp(name, ".."))
997 return false;
998
999 while (*name) {
1000 if (*name == '/' || *name == ':' || isspace(*name))
1001 return false;
1002 name++;
1003 }
1004 return true;
1005}
1006EXPORT_SYMBOL(dev_valid_name);
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1024{
1025 int i = 0;
1026 const char *p;
1027 const int max_netdevices = 8*PAGE_SIZE;
1028 unsigned long *inuse;
1029 struct net_device *d;
1030
1031 p = strnchr(name, IFNAMSIZ-1, '%');
1032 if (p) {
1033
1034
1035
1036
1037
1038 if (p[1] != 'd' || strchr(p + 2, '%'))
1039 return -EINVAL;
1040
1041
1042 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1043 if (!inuse)
1044 return -ENOMEM;
1045
1046 for_each_netdev(net, d) {
1047 if (!sscanf(d->name, name, &i))
1048 continue;
1049 if (i < 0 || i >= max_netdevices)
1050 continue;
1051
1052
1053 snprintf(buf, IFNAMSIZ, name, i);
1054 if (!strncmp(buf, d->name, IFNAMSIZ))
1055 set_bit(i, inuse);
1056 }
1057
1058 i = find_first_zero_bit(inuse, max_netdevices);
1059 free_page((unsigned long) inuse);
1060 }
1061
1062 if (buf != name)
1063 snprintf(buf, IFNAMSIZ, name, i);
1064 if (!__dev_get_by_name(net, buf))
1065 return i;
1066
1067
1068
1069
1070
1071 return -ENFILE;
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088int dev_alloc_name(struct net_device *dev, const char *name)
1089{
1090 char buf[IFNAMSIZ];
1091 struct net *net;
1092 int ret;
1093
1094 BUG_ON(!dev_net(dev));
1095 net = dev_net(dev);
1096 ret = __dev_alloc_name(net, name, buf);
1097 if (ret >= 0)
1098 strlcpy(dev->name, buf, IFNAMSIZ);
1099 return ret;
1100}
1101EXPORT_SYMBOL(dev_alloc_name);
1102
1103static int dev_alloc_name_ns(struct net *net,
1104 struct net_device *dev,
1105 const char *name)
1106{
1107 char buf[IFNAMSIZ];
1108 int ret;
1109
1110 ret = __dev_alloc_name(net, name, buf);
1111 if (ret >= 0)
1112 strlcpy(dev->name, buf, IFNAMSIZ);
1113 return ret;
1114}
1115
1116static int dev_get_valid_name(struct net *net,
1117 struct net_device *dev,
1118 const char *name)
1119{
1120 BUG_ON(!net);
1121
1122 if (!dev_valid_name(name))
1123 return -EINVAL;
1124
1125 if (strchr(name, '%'))
1126 return dev_alloc_name_ns(net, dev, name);
1127 else if (__dev_get_by_name(net, name))
1128 return -EEXIST;
1129 else if (dev->name != name)
1130 strlcpy(dev->name, name, IFNAMSIZ);
1131
1132 return 0;
1133}
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143int dev_change_name(struct net_device *dev, const char *newname)
1144{
1145 unsigned char old_assign_type;
1146 char oldname[IFNAMSIZ];
1147 int err = 0;
1148 int ret;
1149 struct net *net;
1150
1151 ASSERT_RTNL();
1152 BUG_ON(!dev_net(dev));
1153
1154 net = dev_net(dev);
1155 if (dev->flags & IFF_UP)
1156 return -EBUSY;
1157
1158 write_seqcount_begin(&devnet_rename_seq);
1159
1160 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1161 write_seqcount_end(&devnet_rename_seq);
1162 return 0;
1163 }
1164
1165 memcpy(oldname, dev->name, IFNAMSIZ);
1166
1167 err = dev_get_valid_name(net, dev, newname);
1168 if (err < 0) {
1169 write_seqcount_end(&devnet_rename_seq);
1170 return err;
1171 }
1172
1173 if (oldname[0] && !strchr(oldname, '%'))
1174 netdev_info(dev, "renamed from %s\n", oldname);
1175
1176 old_assign_type = dev->name_assign_type;
1177 dev->name_assign_type = NET_NAME_RENAMED;
1178
1179rollback:
1180 ret = device_rename(&dev->dev, dev->name);
1181 if (ret) {
1182 memcpy(dev->name, oldname, IFNAMSIZ);
1183 dev->name_assign_type = old_assign_type;
1184 write_seqcount_end(&devnet_rename_seq);
1185 return ret;
1186 }
1187
1188 write_seqcount_end(&devnet_rename_seq);
1189
1190 netdev_adjacent_rename_links(dev, oldname);
1191
1192 write_lock_bh(&dev_base_lock);
1193 hlist_del_rcu(&dev->name_hlist);
1194 write_unlock_bh(&dev_base_lock);
1195
1196 synchronize_rcu();
1197
1198 write_lock_bh(&dev_base_lock);
1199 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1200 write_unlock_bh(&dev_base_lock);
1201
1202 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1203 ret = notifier_to_errno(ret);
1204
1205 if (ret) {
1206
1207 if (err >= 0) {
1208 err = ret;
1209 write_seqcount_begin(&devnet_rename_seq);
1210 memcpy(dev->name, oldname, IFNAMSIZ);
1211 memcpy(oldname, newname, IFNAMSIZ);
1212 dev->name_assign_type = old_assign_type;
1213 old_assign_type = NET_NAME_RENAMED;
1214 goto rollback;
1215 } else {
1216 pr_err("%s: name change rollback failed: %d\n",
1217 dev->name, ret);
1218 }
1219 }
1220
1221 return err;
1222}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1233{
1234 char *new_ifalias;
1235
1236 ASSERT_RTNL();
1237
1238 if (len >= IFALIASZ)
1239 return -EINVAL;
1240
1241 if (!len) {
1242 kfree(dev->ifalias);
1243 dev->ifalias = NULL;
1244 return 0;
1245 }
1246
1247 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1248 if (!new_ifalias)
1249 return -ENOMEM;
1250 dev->ifalias = new_ifalias;
1251
1252 strlcpy(dev->ifalias, alias, len+1);
1253 return len;
1254}
1255
1256
1257
1258
1259
1260
1261
1262
1263void netdev_features_change(struct net_device *dev)
1264{
1265 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1266}
1267EXPORT_SYMBOL(netdev_features_change);
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277void netdev_state_change(struct net_device *dev)
1278{
1279 if (dev->flags & IFF_UP) {
1280 struct netdev_notifier_change_info change_info;
1281
1282 change_info.flags_changed = 0;
1283 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1284 &change_info.info);
1285 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1286 }
1287}
1288EXPORT_SYMBOL(netdev_state_change);
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300void netdev_notify_peers(struct net_device *dev)
1301{
1302 rtnl_lock();
1303 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1304 rtnl_unlock();
1305}
1306EXPORT_SYMBOL(netdev_notify_peers);
1307
1308static int __dev_open(struct net_device *dev)
1309{
1310 const struct net_device_ops *ops = dev->netdev_ops;
1311 int ret;
1312
1313 ASSERT_RTNL();
1314
1315 if (!netif_device_present(dev))
1316 return -ENODEV;
1317
1318
1319
1320
1321
1322 netpoll_poll_disable(dev);
1323
1324 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1325 ret = notifier_to_errno(ret);
1326 if (ret)
1327 return ret;
1328
1329 set_bit(__LINK_STATE_START, &dev->state);
1330
1331 if (ops->ndo_validate_addr)
1332 ret = ops->ndo_validate_addr(dev);
1333
1334 if (!ret && ops->ndo_open)
1335 ret = ops->ndo_open(dev);
1336
1337 netpoll_poll_enable(dev);
1338
1339 if (ret)
1340 clear_bit(__LINK_STATE_START, &dev->state);
1341 else {
1342 dev->flags |= IFF_UP;
1343 dev_set_rx_mode(dev);
1344 dev_activate(dev);
1345 add_device_randomness(dev->dev_addr, dev->addr_len);
1346 }
1347
1348 return ret;
1349}
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363int dev_open(struct net_device *dev)
1364{
1365 int ret;
1366
1367 if (dev->flags & IFF_UP)
1368 return 0;
1369
1370 ret = __dev_open(dev);
1371 if (ret < 0)
1372 return ret;
1373
1374 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1375 call_netdevice_notifiers(NETDEV_UP, dev);
1376
1377 return ret;
1378}
1379EXPORT_SYMBOL(dev_open);
1380
1381static int __dev_close_many(struct list_head *head)
1382{
1383 struct net_device *dev;
1384
1385 ASSERT_RTNL();
1386 might_sleep();
1387
1388 list_for_each_entry(dev, head, close_list) {
1389
1390 netpoll_poll_disable(dev);
1391
1392 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1393
1394 clear_bit(__LINK_STATE_START, &dev->state);
1395
1396
1397
1398
1399
1400
1401
1402 smp_mb__after_atomic();
1403 }
1404
1405 dev_deactivate_many(head);
1406
1407 list_for_each_entry(dev, head, close_list) {
1408 const struct net_device_ops *ops = dev->netdev_ops;
1409
1410
1411
1412
1413
1414
1415
1416
1417 if (ops->ndo_stop)
1418 ops->ndo_stop(dev);
1419
1420 dev->flags &= ~IFF_UP;
1421 netpoll_poll_enable(dev);
1422 }
1423
1424 return 0;
1425}
1426
1427static int __dev_close(struct net_device *dev)
1428{
1429 int retval;
1430 LIST_HEAD(single);
1431
1432 list_add(&dev->close_list, &single);
1433 retval = __dev_close_many(&single);
1434 list_del(&single);
1435
1436 return retval;
1437}
1438
1439int dev_close_many(struct list_head *head, bool unlink)
1440{
1441 struct net_device *dev, *tmp;
1442
1443
1444 list_for_each_entry_safe(dev, tmp, head, close_list)
1445 if (!(dev->flags & IFF_UP))
1446 list_del_init(&dev->close_list);
1447
1448 __dev_close_many(head);
1449
1450 list_for_each_entry_safe(dev, tmp, head, close_list) {
1451 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1452 call_netdevice_notifiers(NETDEV_DOWN, dev);
1453 if (unlink)
1454 list_del_init(&dev->close_list);
1455 }
1456
1457 return 0;
1458}
1459EXPORT_SYMBOL(dev_close_many);
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470int dev_close(struct net_device *dev)
1471{
1472 if (dev->flags & IFF_UP) {
1473 LIST_HEAD(single);
1474
1475 list_add(&dev->close_list, &single);
1476 dev_close_many(&single, true);
1477 list_del(&single);
1478 }
1479 return 0;
1480}
1481EXPORT_SYMBOL(dev_close);
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492void dev_disable_lro(struct net_device *dev)
1493{
1494 struct net_device *lower_dev;
1495 struct list_head *iter;
1496
1497 dev->wanted_features &= ~NETIF_F_LRO;
1498 netdev_update_features(dev);
1499
1500 if (unlikely(dev->features & NETIF_F_LRO))
1501 netdev_WARN(dev, "failed to disable LRO!\n");
1502
1503 netdev_for_each_lower_dev(dev, lower_dev, iter)
1504 dev_disable_lro(lower_dev);
1505}
1506EXPORT_SYMBOL(dev_disable_lro);
1507
1508static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1509 struct net_device *dev)
1510{
1511 struct netdev_notifier_info info;
1512
1513 netdev_notifier_info_init(&info, dev);
1514 return nb->notifier_call(nb, val, &info);
1515}
1516
1517static int dev_boot_phase = 1;
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533int register_netdevice_notifier(struct notifier_block *nb)
1534{
1535 struct net_device *dev;
1536 struct net_device *last;
1537 struct net *net;
1538 int err;
1539
1540 rtnl_lock();
1541 err = raw_notifier_chain_register(&netdev_chain, nb);
1542 if (err)
1543 goto unlock;
1544 if (dev_boot_phase)
1545 goto unlock;
1546 for_each_net(net) {
1547 for_each_netdev(net, dev) {
1548 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1549 err = notifier_to_errno(err);
1550 if (err)
1551 goto rollback;
1552
1553 if (!(dev->flags & IFF_UP))
1554 continue;
1555
1556 call_netdevice_notifier(nb, NETDEV_UP, dev);
1557 }
1558 }
1559
1560unlock:
1561 rtnl_unlock();
1562 return err;
1563
1564rollback:
1565 last = dev;
1566 for_each_net(net) {
1567 for_each_netdev(net, dev) {
1568 if (dev == last)
1569 goto outroll;
1570
1571 if (dev->flags & IFF_UP) {
1572 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1573 dev);
1574 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1575 }
1576 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1577 }
1578 }
1579
1580outroll:
1581 raw_notifier_chain_unregister(&netdev_chain, nb);
1582 goto unlock;
1583}
1584EXPORT_SYMBOL(register_netdevice_notifier);
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600int unregister_netdevice_notifier(struct notifier_block *nb)
1601{
1602 struct net_device *dev;
1603 struct net *net;
1604 int err;
1605
1606 rtnl_lock();
1607 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1608 if (err)
1609 goto unlock;
1610
1611 for_each_net(net) {
1612 for_each_netdev(net, dev) {
1613 if (dev->flags & IFF_UP) {
1614 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1615 dev);
1616 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1617 }
1618 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1619 }
1620 }
1621unlock:
1622 rtnl_unlock();
1623 return err;
1624}
1625EXPORT_SYMBOL(unregister_netdevice_notifier);
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637static int call_netdevice_notifiers_info(unsigned long val,
1638 struct net_device *dev,
1639 struct netdev_notifier_info *info)
1640{
1641 ASSERT_RTNL();
1642 netdev_notifier_info_init(info, dev);
1643 return raw_notifier_call_chain(&netdev_chain, val, info);
1644}
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1656{
1657 struct netdev_notifier_info info;
1658
1659 return call_netdevice_notifiers_info(val, dev, &info);
1660}
1661EXPORT_SYMBOL(call_netdevice_notifiers);
1662
1663#ifdef CONFIG_NET_INGRESS
1664static struct static_key ingress_needed __read_mostly;
1665
1666void net_inc_ingress_queue(void)
1667{
1668 static_key_slow_inc(&ingress_needed);
1669}
1670EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
1671
1672void net_dec_ingress_queue(void)
1673{
1674 static_key_slow_dec(&ingress_needed);
1675}
1676EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
1677#endif
1678
1679#ifdef CONFIG_NET_EGRESS
1680static struct static_key egress_needed __read_mostly;
1681
1682void net_inc_egress_queue(void)
1683{
1684 static_key_slow_inc(&egress_needed);
1685}
1686EXPORT_SYMBOL_GPL(net_inc_egress_queue);
1687
1688void net_dec_egress_queue(void)
1689{
1690 static_key_slow_dec(&egress_needed);
1691}
1692EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1693#endif
1694
1695static struct static_key netstamp_needed __read_mostly;
1696#ifdef HAVE_JUMP_LABEL
1697
1698
1699
1700
1701static atomic_t netstamp_needed_deferred;
1702#endif
1703
1704void net_enable_timestamp(void)
1705{
1706#ifdef HAVE_JUMP_LABEL
1707 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1708
1709 if (deferred) {
1710 while (--deferred)
1711 static_key_slow_dec(&netstamp_needed);
1712 return;
1713 }
1714#endif
1715 static_key_slow_inc(&netstamp_needed);
1716}
1717EXPORT_SYMBOL(net_enable_timestamp);
1718
1719void net_disable_timestamp(void)
1720{
1721#ifdef HAVE_JUMP_LABEL
1722 if (in_interrupt()) {
1723 atomic_inc(&netstamp_needed_deferred);
1724 return;
1725 }
1726#endif
1727 static_key_slow_dec(&netstamp_needed);
1728}
1729EXPORT_SYMBOL(net_disable_timestamp);
1730
1731static inline void net_timestamp_set(struct sk_buff *skb)
1732{
1733 skb->tstamp.tv64 = 0;
1734 if (static_key_false(&netstamp_needed))
1735 __net_timestamp(skb);
1736}
1737
1738#define net_timestamp_check(COND, SKB) \
1739 if (static_key_false(&netstamp_needed)) { \
1740 if ((COND) && !(SKB)->tstamp.tv64) \
1741 __net_timestamp(SKB); \
1742 } \
1743
1744bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
1745{
1746 unsigned int len;
1747
1748 if (!(dev->flags & IFF_UP))
1749 return false;
1750
1751 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1752 if (skb->len <= len)
1753 return true;
1754
1755
1756
1757
1758 if (skb_is_gso(skb))
1759 return true;
1760
1761 return false;
1762}
1763EXPORT_SYMBOL_GPL(is_skb_forwardable);
1764
1765int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1766{
1767 if (skb_orphan_frags(skb, GFP_ATOMIC) ||
1768 unlikely(!is_skb_forwardable(dev, skb))) {
1769 atomic_long_inc(&dev->rx_dropped);
1770 kfree_skb(skb);
1771 return NET_RX_DROP;
1772 }
1773
1774 skb_scrub_packet(skb, true);
1775 skb->priority = 0;
1776 skb->protocol = eth_type_trans(skb, dev);
1777 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1778
1779 return 0;
1780}
1781EXPORT_SYMBOL_GPL(__dev_forward_skb);
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1802{
1803 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1804}
1805EXPORT_SYMBOL_GPL(dev_forward_skb);
1806
1807static inline int deliver_skb(struct sk_buff *skb,
1808 struct packet_type *pt_prev,
1809 struct net_device *orig_dev)
1810{
1811 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1812 return -ENOMEM;
1813 atomic_inc(&skb->users);
1814 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1815}
1816
1817static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1818 struct packet_type **pt,
1819 struct net_device *orig_dev,
1820 __be16 type,
1821 struct list_head *ptype_list)
1822{
1823 struct packet_type *ptype, *pt_prev = *pt;
1824
1825 list_for_each_entry_rcu(ptype, ptype_list, list) {
1826 if (ptype->type != type)
1827 continue;
1828 if (pt_prev)
1829 deliver_skb(skb, pt_prev, orig_dev);
1830 pt_prev = ptype;
1831 }
1832 *pt = pt_prev;
1833}
1834
1835static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1836{
1837 if (!ptype->af_packet_priv || !skb->sk)
1838 return false;
1839
1840 if (ptype->id_match)
1841 return ptype->id_match(ptype, skb->sk);
1842 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1843 return true;
1844
1845 return false;
1846}
1847
1848
1849
1850
1851
1852
1853void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1854{
1855 struct packet_type *ptype;
1856 struct sk_buff *skb2 = NULL;
1857 struct packet_type *pt_prev = NULL;
1858 struct list_head *ptype_list = &ptype_all;
1859
1860 rcu_read_lock();
1861again:
1862 list_for_each_entry_rcu(ptype, ptype_list, list) {
1863
1864
1865
1866 if (skb_loop_sk(ptype, skb))
1867 continue;
1868
1869 if (pt_prev) {
1870 deliver_skb(skb2, pt_prev, skb->dev);
1871 pt_prev = ptype;
1872 continue;
1873 }
1874
1875
1876 skb2 = skb_clone(skb, GFP_ATOMIC);
1877 if (!skb2)
1878 goto out_unlock;
1879
1880 net_timestamp_set(skb2);
1881
1882
1883
1884
1885
1886 skb_reset_mac_header(skb2);
1887
1888 if (skb_network_header(skb2) < skb2->data ||
1889 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1890 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1891 ntohs(skb2->protocol),
1892 dev->name);
1893 skb_reset_network_header(skb2);
1894 }
1895
1896 skb2->transport_header = skb2->network_header;
1897 skb2->pkt_type = PACKET_OUTGOING;
1898 pt_prev = ptype;
1899 }
1900
1901 if (ptype_list == &ptype_all) {
1902 ptype_list = &dev->ptype_all;
1903 goto again;
1904 }
1905out_unlock:
1906 if (pt_prev)
1907 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1908 rcu_read_unlock();
1909}
1910EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1926{
1927 int i;
1928 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1929
1930
1931 if (tc->offset + tc->count > txq) {
1932 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1933 dev->num_tc = 0;
1934 return;
1935 }
1936
1937
1938 for (i = 1; i < TC_BITMASK + 1; i++) {
1939 int q = netdev_get_prio_tc_map(dev, i);
1940
1941 tc = &dev->tc_to_txq[q];
1942 if (tc->offset + tc->count > txq) {
1943 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1944 i, q);
1945 netdev_set_prio_tc_map(dev, i, 0);
1946 }
1947 }
1948}
1949
1950#ifdef CONFIG_XPS
1951static DEFINE_MUTEX(xps_map_mutex);
1952#define xmap_dereference(P) \
1953 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1954
1955static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1956 int cpu, u16 index)
1957{
1958 struct xps_map *map = NULL;
1959 int pos;
1960
1961 if (dev_maps)
1962 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1963
1964 for (pos = 0; map && pos < map->len; pos++) {
1965 if (map->queues[pos] == index) {
1966 if (map->len > 1) {
1967 map->queues[pos] = map->queues[--map->len];
1968 } else {
1969 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1970 kfree_rcu(map, rcu);
1971 map = NULL;
1972 }
1973 break;
1974 }
1975 }
1976
1977 return map;
1978}
1979
1980static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1981{
1982 struct xps_dev_maps *dev_maps;
1983 int cpu, i;
1984 bool active = false;
1985
1986 mutex_lock(&xps_map_mutex);
1987 dev_maps = xmap_dereference(dev->xps_maps);
1988
1989 if (!dev_maps)
1990 goto out_no_maps;
1991
1992 for_each_possible_cpu(cpu) {
1993 for (i = index; i < dev->num_tx_queues; i++) {
1994 if (!remove_xps_queue(dev_maps, cpu, i))
1995 break;
1996 }
1997 if (i == dev->num_tx_queues)
1998 active = true;
1999 }
2000
2001 if (!active) {
2002 RCU_INIT_POINTER(dev->xps_maps, NULL);
2003 kfree_rcu(dev_maps, rcu);
2004 }
2005
2006 for (i = index; i < dev->num_tx_queues; i++)
2007 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
2008 NUMA_NO_NODE);
2009
2010out_no_maps:
2011 mutex_unlock(&xps_map_mutex);
2012}
2013
2014static struct xps_map *expand_xps_map(struct xps_map *map,
2015 int cpu, u16 index)
2016{
2017 struct xps_map *new_map;
2018 int alloc_len = XPS_MIN_MAP_ALLOC;
2019 int i, pos;
2020
2021 for (pos = 0; map && pos < map->len; pos++) {
2022 if (map->queues[pos] != index)
2023 continue;
2024 return map;
2025 }
2026
2027
2028 if (map) {
2029 if (pos < map->alloc_len)
2030 return map;
2031
2032 alloc_len = map->alloc_len * 2;
2033 }
2034
2035
2036 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2037 cpu_to_node(cpu));
2038 if (!new_map)
2039 return NULL;
2040
2041 for (i = 0; i < pos; i++)
2042 new_map->queues[i] = map->queues[i];
2043 new_map->alloc_len = alloc_len;
2044 new_map->len = pos;
2045
2046 return new_map;
2047}
2048
2049int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2050 u16 index)
2051{
2052 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2053 struct xps_map *map, *new_map;
2054 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
2055 int cpu, numa_node_id = -2;
2056 bool active = false;
2057
2058 mutex_lock(&xps_map_mutex);
2059
2060 dev_maps = xmap_dereference(dev->xps_maps);
2061
2062
2063 for_each_online_cpu(cpu) {
2064 if (!cpumask_test_cpu(cpu, mask))
2065 continue;
2066
2067 if (!new_dev_maps)
2068 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2069 if (!new_dev_maps) {
2070 mutex_unlock(&xps_map_mutex);
2071 return -ENOMEM;
2072 }
2073
2074 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2075 NULL;
2076
2077 map = expand_xps_map(map, cpu, index);
2078 if (!map)
2079 goto error;
2080
2081 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2082 }
2083
2084 if (!new_dev_maps)
2085 goto out_no_new_maps;
2086
2087 for_each_possible_cpu(cpu) {
2088 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2089
2090 int pos = 0;
2091
2092 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2093 while ((pos < map->len) && (map->queues[pos] != index))
2094 pos++;
2095
2096 if (pos == map->len)
2097 map->queues[map->len++] = index;
2098#ifdef CONFIG_NUMA
2099 if (numa_node_id == -2)
2100 numa_node_id = cpu_to_node(cpu);
2101 else if (numa_node_id != cpu_to_node(cpu))
2102 numa_node_id = -1;
2103#endif
2104 } else if (dev_maps) {
2105
2106 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2107 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2108 }
2109
2110 }
2111
2112 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2113
2114
2115 if (dev_maps) {
2116 for_each_possible_cpu(cpu) {
2117 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2118 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2119 if (map && map != new_map)
2120 kfree_rcu(map, rcu);
2121 }
2122
2123 kfree_rcu(dev_maps, rcu);
2124 }
2125
2126 dev_maps = new_dev_maps;
2127 active = true;
2128
2129out_no_new_maps:
2130
2131 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2132 (numa_node_id >= 0) ? numa_node_id :
2133 NUMA_NO_NODE);
2134
2135 if (!dev_maps)
2136 goto out_no_maps;
2137
2138
2139 for_each_possible_cpu(cpu) {
2140 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2141 continue;
2142
2143 if (remove_xps_queue(dev_maps, cpu, index))
2144 active = true;
2145 }
2146
2147
2148 if (!active) {
2149 RCU_INIT_POINTER(dev->xps_maps, NULL);
2150 kfree_rcu(dev_maps, rcu);
2151 }
2152
2153out_no_maps:
2154 mutex_unlock(&xps_map_mutex);
2155
2156 return 0;
2157error:
2158
2159 for_each_possible_cpu(cpu) {
2160 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2161 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2162 NULL;
2163 if (new_map && new_map != map)
2164 kfree(new_map);
2165 }
2166
2167 mutex_unlock(&xps_map_mutex);
2168
2169 kfree(new_dev_maps);
2170 return -ENOMEM;
2171}
2172EXPORT_SYMBOL(netif_set_xps_queue);
2173
2174#endif
2175
2176
2177
2178
2179int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2180{
2181 int rc;
2182
2183 if (txq < 1 || txq > dev->num_tx_queues)
2184 return -EINVAL;
2185
2186 if (dev->reg_state == NETREG_REGISTERED ||
2187 dev->reg_state == NETREG_UNREGISTERING) {
2188 ASSERT_RTNL();
2189
2190 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2191 txq);
2192 if (rc)
2193 return rc;
2194
2195 if (dev->num_tc)
2196 netif_setup_tc(dev, txq);
2197
2198 if (txq < dev->real_num_tx_queues) {
2199 qdisc_reset_all_tx_gt(dev, txq);
2200#ifdef CONFIG_XPS
2201 netif_reset_xps_queues_gt(dev, txq);
2202#endif
2203 }
2204 }
2205
2206 dev->real_num_tx_queues = txq;
2207 return 0;
2208}
2209EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2210
2211#ifdef CONFIG_SYSFS
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2223{
2224 int rc;
2225
2226 if (rxq < 1 || rxq > dev->num_rx_queues)
2227 return -EINVAL;
2228
2229 if (dev->reg_state == NETREG_REGISTERED) {
2230 ASSERT_RTNL();
2231
2232 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2233 rxq);
2234 if (rc)
2235 return rc;
2236 }
2237
2238 dev->real_num_rx_queues = rxq;
2239 return 0;
2240}
2241EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2242#endif
2243
2244
2245
2246
2247
2248
2249
2250int netif_get_num_default_rss_queues(void)
2251{
2252 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2253}
2254EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2255
2256static inline void __netif_reschedule(struct Qdisc *q)
2257{
2258 struct softnet_data *sd;
2259 unsigned long flags;
2260
2261 local_irq_save(flags);
2262 sd = this_cpu_ptr(&softnet_data);
2263 q->next_sched = NULL;
2264 *sd->output_queue_tailp = q;
2265 sd->output_queue_tailp = &q->next_sched;
2266 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2267 local_irq_restore(flags);
2268}
2269
2270void __netif_schedule(struct Qdisc *q)
2271{
2272 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2273 __netif_reschedule(q);
2274}
2275EXPORT_SYMBOL(__netif_schedule);
2276
2277struct dev_kfree_skb_cb {
2278 enum skb_free_reason reason;
2279};
2280
2281static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2282{
2283 return (struct dev_kfree_skb_cb *)skb->cb;
2284}
2285
2286void netif_schedule_queue(struct netdev_queue *txq)
2287{
2288 rcu_read_lock();
2289 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2290 struct Qdisc *q = rcu_dereference(txq->qdisc);
2291
2292 __netif_schedule(q);
2293 }
2294 rcu_read_unlock();
2295}
2296EXPORT_SYMBOL(netif_schedule_queue);
2297
2298
2299
2300
2301
2302
2303
2304
2305void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2306{
2307 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2308
2309 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
2310 struct Qdisc *q;
2311
2312 rcu_read_lock();
2313 q = rcu_dereference(txq->qdisc);
2314 __netif_schedule(q);
2315 rcu_read_unlock();
2316 }
2317}
2318EXPORT_SYMBOL(netif_wake_subqueue);
2319
2320void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2321{
2322 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2323 struct Qdisc *q;
2324
2325 rcu_read_lock();
2326 q = rcu_dereference(dev_queue->qdisc);
2327 __netif_schedule(q);
2328 rcu_read_unlock();
2329 }
2330}
2331EXPORT_SYMBOL(netif_tx_wake_queue);
2332
2333void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2334{
2335 unsigned long flags;
2336
2337 if (likely(atomic_read(&skb->users) == 1)) {
2338 smp_rmb();
2339 atomic_set(&skb->users, 0);
2340 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2341 return;
2342 }
2343 get_kfree_skb_cb(skb)->reason = reason;
2344 local_irq_save(flags);
2345 skb->next = __this_cpu_read(softnet_data.completion_queue);
2346 __this_cpu_write(softnet_data.completion_queue, skb);
2347 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2348 local_irq_restore(flags);
2349}
2350EXPORT_SYMBOL(__dev_kfree_skb_irq);
2351
2352void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2353{
2354 if (in_irq() || irqs_disabled())
2355 __dev_kfree_skb_irq(skb, reason);
2356 else
2357 dev_kfree_skb(skb);
2358}
2359EXPORT_SYMBOL(__dev_kfree_skb_any);
2360
2361
2362
2363
2364
2365
2366
2367
2368void netif_device_detach(struct net_device *dev)
2369{
2370 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2371 netif_running(dev)) {
2372 netif_tx_stop_all_queues(dev);
2373 }
2374}
2375EXPORT_SYMBOL(netif_device_detach);
2376
2377
2378
2379
2380
2381
2382
2383void netif_device_attach(struct net_device *dev)
2384{
2385 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2386 netif_running(dev)) {
2387 netif_tx_wake_all_queues(dev);
2388 __netdev_watchdog_up(dev);
2389 }
2390}
2391EXPORT_SYMBOL(netif_device_attach);
2392
2393
2394
2395
2396
2397u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
2398 unsigned int num_tx_queues)
2399{
2400 u32 hash;
2401 u16 qoffset = 0;
2402 u16 qcount = num_tx_queues;
2403
2404 if (skb_rx_queue_recorded(skb)) {
2405 hash = skb_get_rx_queue(skb);
2406 while (unlikely(hash >= num_tx_queues))
2407 hash -= num_tx_queues;
2408 return hash;
2409 }
2410
2411 if (dev->num_tc) {
2412 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2413 qoffset = dev->tc_to_txq[tc].offset;
2414 qcount = dev->tc_to_txq[tc].count;
2415 }
2416
2417 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
2418}
2419EXPORT_SYMBOL(__skb_tx_hash);
2420
2421static void skb_warn_bad_offload(const struct sk_buff *skb)
2422{
2423 static const netdev_features_t null_features = 0;
2424 struct net_device *dev = skb->dev;
2425 const char *name = "";
2426
2427 if (!net_ratelimit())
2428 return;
2429
2430 if (dev) {
2431 if (dev->dev.parent)
2432 name = dev_driver_string(dev->dev.parent);
2433 else
2434 name = netdev_name(dev);
2435 }
2436 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2437 "gso_type=%d ip_summed=%d\n",
2438 name, dev ? &dev->features : &null_features,
2439 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2440 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2441 skb_shinfo(skb)->gso_type, skb->ip_summed);
2442}
2443
2444
2445
2446
2447
2448int skb_checksum_help(struct sk_buff *skb)
2449{
2450 __wsum csum;
2451 int ret = 0, offset;
2452
2453 if (skb->ip_summed == CHECKSUM_COMPLETE)
2454 goto out_set_summed;
2455
2456 if (unlikely(skb_shinfo(skb)->gso_size)) {
2457 skb_warn_bad_offload(skb);
2458 return -EINVAL;
2459 }
2460
2461
2462
2463
2464 if (skb_has_shared_frag(skb)) {
2465 ret = __skb_linearize(skb);
2466 if (ret)
2467 goto out;
2468 }
2469
2470 offset = skb_checksum_start_offset(skb);
2471 BUG_ON(offset >= skb_headlen(skb));
2472 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2473
2474 offset += skb->csum_offset;
2475 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2476
2477 if (skb_cloned(skb) &&
2478 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2479 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2480 if (ret)
2481 goto out;
2482 }
2483
2484 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2485out_set_summed:
2486 skb->ip_summed = CHECKSUM_NONE;
2487out:
2488 return ret;
2489}
2490EXPORT_SYMBOL(skb_checksum_help);
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513bool __skb_csum_offload_chk(struct sk_buff *skb,
2514 const struct skb_csum_offl_spec *spec,
2515 bool *csum_encapped,
2516 bool csum_help)
2517{
2518 struct iphdr *iph;
2519 struct ipv6hdr *ipv6;
2520 void *nhdr;
2521 int protocol;
2522 u8 ip_proto;
2523
2524 if (skb->protocol == htons(ETH_P_8021Q) ||
2525 skb->protocol == htons(ETH_P_8021AD)) {
2526 if (!spec->vlan_okay)
2527 goto need_help;
2528 }
2529
2530
2531
2532
2533
2534
2535 if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
2536
2537 protocol = eproto_to_ipproto(vlan_get_protocol(skb));
2538 nhdr = skb_network_header(skb);
2539 *csum_encapped = false;
2540 if (spec->no_not_encapped)
2541 goto need_help;
2542 } else if (skb->encapsulation && spec->encap_okay &&
2543 skb_checksum_start_offset(skb) ==
2544 skb_inner_transport_offset(skb)) {
2545
2546 *csum_encapped = true;
2547 switch (skb->inner_protocol_type) {
2548 case ENCAP_TYPE_ETHER:
2549 protocol = eproto_to_ipproto(skb->inner_protocol);
2550 break;
2551 case ENCAP_TYPE_IPPROTO:
2552 protocol = skb->inner_protocol;
2553 break;
2554 }
2555 nhdr = skb_inner_network_header(skb);
2556 } else {
2557 goto need_help;
2558 }
2559
2560 switch (protocol) {
2561 case IPPROTO_IP:
2562 if (!spec->ipv4_okay)
2563 goto need_help;
2564 iph = nhdr;
2565 ip_proto = iph->protocol;
2566 if (iph->ihl != 5 && !spec->ip_options_okay)
2567 goto need_help;
2568 break;
2569 case IPPROTO_IPV6:
2570 if (!spec->ipv6_okay)
2571 goto need_help;
2572 if (spec->no_encapped_ipv6 && *csum_encapped)
2573 goto need_help;
2574 ipv6 = nhdr;
2575 nhdr += sizeof(*ipv6);
2576 ip_proto = ipv6->nexthdr;
2577 break;
2578 default:
2579 goto need_help;
2580 }
2581
2582ip_proto_again:
2583 switch (ip_proto) {
2584 case IPPROTO_TCP:
2585 if (!spec->tcp_okay ||
2586 skb->csum_offset != offsetof(struct tcphdr, check))
2587 goto need_help;
2588 break;
2589 case IPPROTO_UDP:
2590 if (!spec->udp_okay ||
2591 skb->csum_offset != offsetof(struct udphdr, check))
2592 goto need_help;
2593 break;
2594 case IPPROTO_SCTP:
2595 if (!spec->sctp_okay ||
2596 skb->csum_offset != offsetof(struct sctphdr, checksum))
2597 goto cant_help;
2598 break;
2599 case NEXTHDR_HOP:
2600 case NEXTHDR_ROUTING:
2601 case NEXTHDR_DEST: {
2602 u8 *opthdr = nhdr;
2603
2604 if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
2605 goto need_help;
2606
2607 ip_proto = opthdr[0];
2608 nhdr += (opthdr[1] + 1) << 3;
2609
2610 goto ip_proto_again;
2611 }
2612 default:
2613 goto need_help;
2614 }
2615
2616
2617 return true;
2618
2619need_help:
2620 if (csum_help && !skb_shinfo(skb)->gso_size)
2621 skb_checksum_help(skb);
2622cant_help:
2623 return false;
2624}
2625EXPORT_SYMBOL(__skb_csum_offload_chk);
2626
2627__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2628{
2629 __be16 type = skb->protocol;
2630
2631
2632 if (type == htons(ETH_P_TEB)) {
2633 struct ethhdr *eth;
2634
2635 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2636 return 0;
2637
2638 eth = (struct ethhdr *)skb_mac_header(skb);
2639 type = eth->h_proto;
2640 }
2641
2642 return __vlan_get_protocol(skb, type, depth);
2643}
2644
2645
2646
2647
2648
2649
2650struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2651 netdev_features_t features)
2652{
2653 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2654 struct packet_offload *ptype;
2655 int vlan_depth = skb->mac_len;
2656 __be16 type = skb_network_protocol(skb, &vlan_depth);
2657
2658 if (unlikely(!type))
2659 return ERR_PTR(-EINVAL);
2660
2661 __skb_pull(skb, vlan_depth);
2662
2663 rcu_read_lock();
2664 list_for_each_entry_rcu(ptype, &offload_base, list) {
2665 if (ptype->type == type && ptype->callbacks.gso_segment) {
2666 segs = ptype->callbacks.gso_segment(skb, features);
2667 break;
2668 }
2669 }
2670 rcu_read_unlock();
2671
2672 __skb_push(skb, skb->data - skb_mac_header(skb));
2673
2674 return segs;
2675}
2676EXPORT_SYMBOL(skb_mac_gso_segment);
2677
2678
2679
2680
2681static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2682{
2683 if (tx_path)
2684 return skb->ip_summed != CHECKSUM_PARTIAL;
2685 else
2686 return skb->ip_summed == CHECKSUM_NONE;
2687}
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2703 netdev_features_t features, bool tx_path)
2704{
2705 if (unlikely(skb_needs_check(skb, tx_path))) {
2706 int err;
2707
2708 skb_warn_bad_offload(skb);
2709
2710 err = skb_cow_head(skb, 0);
2711 if (err < 0)
2712 return ERR_PTR(err);
2713 }
2714
2715
2716
2717
2718
2719 if (features & NETIF_F_GSO_PARTIAL) {
2720 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
2721 struct net_device *dev = skb->dev;
2722
2723 partial_features |= dev->features & dev->gso_partial_features;
2724 if (!skb_gso_ok(skb, features | partial_features))
2725 features &= ~NETIF_F_GSO_PARTIAL;
2726 }
2727
2728 BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
2729 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
2730
2731 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2732 SKB_GSO_CB(skb)->encap_level = 0;
2733
2734 skb_reset_mac_header(skb);
2735 skb_reset_mac_len(skb);
2736
2737 return skb_mac_gso_segment(skb, features);
2738}
2739EXPORT_SYMBOL(__skb_gso_segment);
2740
2741
2742#ifdef CONFIG_BUG
2743void netdev_rx_csum_fault(struct net_device *dev)
2744{
2745 if (net_ratelimit()) {
2746 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2747 dump_stack();
2748 }
2749}
2750EXPORT_SYMBOL(netdev_rx_csum_fault);
2751#endif
2752
2753
2754
2755
2756
2757
2758static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2759{
2760#ifdef CONFIG_HIGHMEM
2761 int i;
2762 if (!(dev->features & NETIF_F_HIGHDMA)) {
2763 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2764 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2765 if (PageHighMem(skb_frag_page(frag)))
2766 return 1;
2767 }
2768 }
2769
2770 if (PCI_DMA_BUS_IS_PHYS) {
2771 struct device *pdev = dev->dev.parent;
2772
2773 if (!pdev)
2774 return 0;
2775 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2776 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2777 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2778 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2779 return 1;
2780 }
2781 }
2782#endif
2783 return 0;
2784}
2785
2786
2787
2788
2789#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
2790static netdev_features_t net_mpls_features(struct sk_buff *skb,
2791 netdev_features_t features,
2792 __be16 type)
2793{
2794 if (eth_p_mpls(type))
2795 features &= skb->dev->mpls_features;
2796
2797 return features;
2798}
2799#else
2800static netdev_features_t net_mpls_features(struct sk_buff *skb,
2801 netdev_features_t features,
2802 __be16 type)
2803{
2804 return features;
2805}
2806#endif
2807
2808static netdev_features_t harmonize_features(struct sk_buff *skb,
2809 netdev_features_t features)
2810{
2811 int tmp;
2812 __be16 type;
2813
2814 type = skb_network_protocol(skb, &tmp);
2815 features = net_mpls_features(skb, features, type);
2816
2817 if (skb->ip_summed != CHECKSUM_NONE &&
2818 !can_checksum_protocol(features, type)) {
2819 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2820 } else if (illegal_highdma(skb->dev, skb)) {
2821 features &= ~NETIF_F_SG;
2822 }
2823
2824 return features;
2825}
2826
2827netdev_features_t passthru_features_check(struct sk_buff *skb,
2828 struct net_device *dev,
2829 netdev_features_t features)
2830{
2831 return features;
2832}
2833EXPORT_SYMBOL(passthru_features_check);
2834
2835static netdev_features_t dflt_features_check(const struct sk_buff *skb,
2836 struct net_device *dev,
2837 netdev_features_t features)
2838{
2839 return vlan_features_check(skb, features);
2840}
2841
2842static netdev_features_t gso_features_check(const struct sk_buff *skb,
2843 struct net_device *dev,
2844 netdev_features_t features)
2845{
2846 u16 gso_segs = skb_shinfo(skb)->gso_segs;
2847
2848 if (gso_segs > dev->gso_max_segs)
2849 return features & ~NETIF_F_GSO_MASK;
2850
2851
2852
2853
2854
2855
2856
2857 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
2858 features &= ~dev->gso_partial_features;
2859
2860
2861
2862
2863 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
2864 struct iphdr *iph = skb->encapsulation ?
2865 inner_ip_hdr(skb) : ip_hdr(skb);
2866
2867 if (!(iph->frag_off & htons(IP_DF)))
2868 features &= ~NETIF_F_TSO_MANGLEID;
2869 }
2870
2871 return features;
2872}
2873
2874netdev_features_t netif_skb_features(struct sk_buff *skb)
2875{
2876 struct net_device *dev = skb->dev;
2877 netdev_features_t features = dev->features;
2878
2879 if (skb_is_gso(skb))
2880 features = gso_features_check(skb, dev, features);
2881
2882
2883
2884
2885
2886 if (skb->encapsulation)
2887 features &= dev->hw_enc_features;
2888
2889 if (skb_vlan_tagged(skb))
2890 features = netdev_intersect_features(features,
2891 dev->vlan_features |
2892 NETIF_F_HW_VLAN_CTAG_TX |
2893 NETIF_F_HW_VLAN_STAG_TX);
2894
2895 if (dev->netdev_ops->ndo_features_check)
2896 features &= dev->netdev_ops->ndo_features_check(skb, dev,
2897 features);
2898 else
2899 features &= dflt_features_check(skb, dev, features);
2900
2901 return harmonize_features(skb, features);
2902}
2903EXPORT_SYMBOL(netif_skb_features);
2904
2905static int xmit_one(struct sk_buff *skb, struct net_device *dev,
2906 struct netdev_queue *txq, bool more)
2907{
2908 unsigned int len;
2909 int rc;
2910
2911 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
2912 dev_queue_xmit_nit(skb, dev);
2913
2914 len = skb->len;
2915 trace_net_dev_start_xmit(skb, dev);
2916 rc = netdev_start_xmit(skb, dev, txq, more);
2917 trace_net_dev_xmit(skb, rc, dev, len);
2918
2919 return rc;
2920}
2921
2922struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
2923 struct netdev_queue *txq, int *ret)
2924{
2925 struct sk_buff *skb = first;
2926 int rc = NETDEV_TX_OK;
2927
2928 while (skb) {
2929 struct sk_buff *next = skb->next;
2930
2931 skb->next = NULL;
2932 rc = xmit_one(skb, dev, txq, next != NULL);
2933 if (unlikely(!dev_xmit_complete(rc))) {
2934 skb->next = next;
2935 goto out;
2936 }
2937
2938 skb = next;
2939 if (netif_xmit_stopped(txq) && skb) {
2940 rc = NETDEV_TX_BUSY;
2941 break;
2942 }
2943 }
2944
2945out:
2946 *ret = rc;
2947 return skb;
2948}
2949
2950static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2951 netdev_features_t features)
2952{
2953 if (skb_vlan_tag_present(skb) &&
2954 !vlan_hw_offload_capable(features, skb->vlan_proto))
2955 skb = __vlan_hwaccel_push_inside(skb);
2956 return skb;
2957}
2958
2959static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
2960{
2961 netdev_features_t features;
2962
2963 features = netif_skb_features(skb);
2964 skb = validate_xmit_vlan(skb, features);
2965 if (unlikely(!skb))
2966 goto out_null;
2967
2968 if (netif_needs_gso(skb, features)) {
2969 struct sk_buff *segs;
2970
2971 segs = skb_gso_segment(skb, features);
2972 if (IS_ERR(segs)) {
2973 goto out_kfree_skb;
2974 } else if (segs) {
2975 consume_skb(skb);
2976 skb = segs;
2977 }
2978 } else {
2979 if (skb_needs_linearize(skb, features) &&
2980 __skb_linearize(skb))
2981 goto out_kfree_skb;
2982
2983
2984
2985
2986
2987 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2988 if (skb->encapsulation)
2989 skb_set_inner_transport_header(skb,
2990 skb_checksum_start_offset(skb));
2991 else
2992 skb_set_transport_header(skb,
2993 skb_checksum_start_offset(skb));
2994 if (!(features & NETIF_F_CSUM_MASK) &&
2995 skb_checksum_help(skb))
2996 goto out_kfree_skb;
2997 }
2998 }
2999
3000 return skb;
3001
3002out_kfree_skb:
3003 kfree_skb(skb);
3004out_null:
3005 atomic_long_inc(&dev->tx_dropped);
3006 return NULL;
3007}
3008
3009struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
3010{
3011 struct sk_buff *next, *head = NULL, *tail;
3012
3013 for (; skb != NULL; skb = next) {
3014 next = skb->next;
3015 skb->next = NULL;
3016
3017
3018 skb->prev = skb;
3019
3020 skb = validate_xmit_skb(skb, dev);
3021 if (!skb)
3022 continue;
3023
3024 if (!head)
3025 head = skb;
3026 else
3027 tail->next = skb;
3028
3029
3030
3031 tail = skb->prev;
3032 }
3033 return head;
3034}
3035
3036static void qdisc_pkt_len_init(struct sk_buff *skb)
3037{
3038 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3039
3040 qdisc_skb_cb(skb)->pkt_len = skb->len;
3041
3042
3043
3044
3045 if (shinfo->gso_size) {
3046 unsigned int hdr_len;
3047 u16 gso_segs = shinfo->gso_segs;
3048
3049
3050 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3051
3052
3053 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
3054 hdr_len += tcp_hdrlen(skb);
3055 else
3056 hdr_len += sizeof(struct udphdr);
3057
3058 if (shinfo->gso_type & SKB_GSO_DODGY)
3059 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3060 shinfo->gso_size);
3061
3062 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3063 }
3064}
3065
3066static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3067 struct net_device *dev,
3068 struct netdev_queue *txq)
3069{
3070 spinlock_t *root_lock = qdisc_lock(q);
3071 bool contended;
3072 int rc;
3073
3074 qdisc_calculate_pkt_len(skb, q);
3075
3076
3077
3078
3079
3080
3081 contended = qdisc_is_running(q);
3082 if (unlikely(contended))
3083 spin_lock(&q->busylock);
3084
3085 spin_lock(root_lock);
3086 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3087 kfree_skb(skb);
3088 rc = NET_XMIT_DROP;
3089 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3090 qdisc_run_begin(q)) {
3091
3092
3093
3094
3095
3096
3097 qdisc_bstats_update(q, skb);
3098
3099 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3100 if (unlikely(contended)) {
3101 spin_unlock(&q->busylock);
3102 contended = false;
3103 }
3104 __qdisc_run(q);
3105 } else
3106 qdisc_run_end(q);
3107
3108 rc = NET_XMIT_SUCCESS;
3109 } else {
3110 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
3111 if (qdisc_run_begin(q)) {
3112 if (unlikely(contended)) {
3113 spin_unlock(&q->busylock);
3114 contended = false;
3115 }
3116 __qdisc_run(q);
3117 }
3118 }
3119 spin_unlock(root_lock);
3120 if (unlikely(contended))
3121 spin_unlock(&q->busylock);
3122 return rc;
3123}
3124
3125#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3126static void skb_update_prio(struct sk_buff *skb)
3127{
3128 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
3129
3130 if (!skb->priority && skb->sk && map) {
3131 unsigned int prioidx =
3132 sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
3133
3134 if (prioidx < map->priomap_len)
3135 skb->priority = map->priomap[prioidx];
3136 }
3137}
3138#else
3139#define skb_update_prio(skb)
3140#endif
3141
3142DEFINE_PER_CPU(int, xmit_recursion);
3143EXPORT_SYMBOL(xmit_recursion);
3144
3145#define RECURSION_LIMIT 10
3146
3147
3148
3149
3150
3151
3152
3153int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3154{
3155 skb_reset_mac_header(skb);
3156 __skb_pull(skb, skb_network_offset(skb));
3157 skb->pkt_type = PACKET_LOOPBACK;
3158 skb->ip_summed = CHECKSUM_UNNECESSARY;
3159 WARN_ON(!skb_dst(skb));
3160 skb_dst_force(skb);
3161 netif_rx_ni(skb);
3162 return 0;
3163}
3164EXPORT_SYMBOL(dev_loopback_xmit);
3165
3166#ifdef CONFIG_NET_EGRESS
3167static struct sk_buff *
3168sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3169{
3170 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
3171 struct tcf_result cl_res;
3172
3173 if (!cl)
3174 return skb;
3175
3176
3177
3178
3179 qdisc_bstats_cpu_update(cl->q, skb);
3180
3181 switch (tc_classify(skb, cl, &cl_res, false)) {
3182 case TC_ACT_OK:
3183 case TC_ACT_RECLASSIFY:
3184 skb->tc_index = TC_H_MIN(cl_res.classid);
3185 break;
3186 case TC_ACT_SHOT:
3187 qdisc_qstats_cpu_drop(cl->q);
3188 *ret = NET_XMIT_DROP;
3189 kfree_skb(skb);
3190 return NULL;
3191 case TC_ACT_STOLEN:
3192 case TC_ACT_QUEUED:
3193 *ret = NET_XMIT_SUCCESS;
3194 consume_skb(skb);
3195 return NULL;
3196 case TC_ACT_REDIRECT:
3197
3198 skb_do_redirect(skb);
3199 *ret = NET_XMIT_SUCCESS;
3200 return NULL;
3201 default:
3202 break;
3203 }
3204
3205 return skb;
3206}
3207#endif
3208
3209static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
3210{
3211#ifdef CONFIG_XPS
3212 struct xps_dev_maps *dev_maps;
3213 struct xps_map *map;
3214 int queue_index = -1;
3215
3216 rcu_read_lock();
3217 dev_maps = rcu_dereference(dev->xps_maps);
3218 if (dev_maps) {
3219 map = rcu_dereference(
3220 dev_maps->cpu_map[skb->sender_cpu - 1]);
3221 if (map) {
3222 if (map->len == 1)
3223 queue_index = map->queues[0];
3224 else
3225 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
3226 map->len)];
3227 if (unlikely(queue_index >= dev->real_num_tx_queues))
3228 queue_index = -1;
3229 }
3230 }
3231 rcu_read_unlock();
3232
3233 return queue_index;
3234#else
3235 return -1;
3236#endif
3237}
3238
3239static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
3240{
3241 struct sock *sk = skb->sk;
3242 int queue_index = sk_tx_queue_get(sk);
3243
3244 if (queue_index < 0 || skb->ooo_okay ||
3245 queue_index >= dev->real_num_tx_queues) {
3246 int new_index = get_xps_queue(dev, skb);
3247 if (new_index < 0)
3248 new_index = skb_tx_hash(dev, skb);
3249
3250 if (queue_index != new_index && sk &&
3251 sk_fullsock(sk) &&
3252 rcu_access_pointer(sk->sk_dst_cache))
3253 sk_tx_queue_set(sk, new_index);
3254
3255 queue_index = new_index;
3256 }
3257
3258 return queue_index;
3259}
3260
3261struct netdev_queue *netdev_pick_tx(struct net_device *dev,
3262 struct sk_buff *skb,
3263 void *accel_priv)
3264{
3265 int queue_index = 0;
3266
3267#ifdef CONFIG_XPS
3268 u32 sender_cpu = skb->sender_cpu - 1;
3269
3270 if (sender_cpu >= (u32)NR_CPUS)
3271 skb->sender_cpu = raw_smp_processor_id() + 1;
3272#endif
3273
3274 if (dev->real_num_tx_queues != 1) {
3275 const struct net_device_ops *ops = dev->netdev_ops;
3276 if (ops->ndo_select_queue)
3277 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
3278 __netdev_pick_tx);
3279 else
3280 queue_index = __netdev_pick_tx(dev, skb);
3281
3282 if (!accel_priv)
3283 queue_index = netdev_cap_txqueue(dev, queue_index);
3284 }
3285
3286 skb_set_queue_mapping(skb, queue_index);
3287 return netdev_get_tx_queue(dev, queue_index);
3288}
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3317{
3318 struct net_device *dev = skb->dev;
3319 struct netdev_queue *txq;
3320 struct Qdisc *q;
3321 int rc = -ENOMEM;
3322
3323 skb_reset_mac_header(skb);
3324
3325 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
3326 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
3327
3328
3329
3330
3331 rcu_read_lock_bh();
3332
3333 skb_update_prio(skb);
3334
3335 qdisc_pkt_len_init(skb);
3336#ifdef CONFIG_NET_CLS_ACT
3337 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3338# ifdef CONFIG_NET_EGRESS
3339 if (static_key_false(&egress_needed)) {
3340 skb = sch_handle_egress(skb, &rc, dev);
3341 if (!skb)
3342 goto out;
3343 }
3344# endif
3345#endif
3346
3347
3348
3349 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
3350 skb_dst_drop(skb);
3351 else
3352 skb_dst_force(skb);
3353
3354#ifdef CONFIG_NET_SWITCHDEV
3355
3356 if (skb->offload_fwd_mark &&
3357 skb->offload_fwd_mark == dev->offload_fwd_mark) {
3358 consume_skb(skb);
3359 rc = NET_XMIT_SUCCESS;
3360 goto out;
3361 }
3362#endif
3363
3364 txq = netdev_pick_tx(dev, skb, accel_priv);
3365 q = rcu_dereference_bh(txq->qdisc);
3366
3367 trace_net_dev_queue(skb);
3368 if (q->enqueue) {
3369 rc = __dev_xmit_skb(skb, q, dev, txq);
3370 goto out;
3371 }
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385 if (dev->flags & IFF_UP) {
3386 int cpu = smp_processor_id();
3387
3388 if (txq->xmit_lock_owner != cpu) {
3389
3390 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
3391 goto recursion_alert;
3392
3393 skb = validate_xmit_skb(skb, dev);
3394 if (!skb)
3395 goto out;
3396
3397 HARD_TX_LOCK(dev, txq, cpu);
3398
3399 if (!netif_xmit_stopped(txq)) {
3400 __this_cpu_inc(xmit_recursion);
3401 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
3402 __this_cpu_dec(xmit_recursion);
3403 if (dev_xmit_complete(rc)) {
3404 HARD_TX_UNLOCK(dev, txq);
3405 goto out;
3406 }
3407 }
3408 HARD_TX_UNLOCK(dev, txq);
3409 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
3410 dev->name);
3411 } else {
3412
3413
3414
3415recursion_alert:
3416 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
3417 dev->name);
3418 }
3419 }
3420
3421 rc = -ENETDOWN;
3422 rcu_read_unlock_bh();
3423
3424 atomic_long_inc(&dev->tx_dropped);
3425 kfree_skb_list(skb);
3426 return rc;
3427out:
3428 rcu_read_unlock_bh();
3429 return rc;
3430}
3431
3432int dev_queue_xmit(struct sk_buff *skb)
3433{
3434 return __dev_queue_xmit(skb, NULL);
3435}
3436EXPORT_SYMBOL(dev_queue_xmit);
3437
3438int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
3439{
3440 return __dev_queue_xmit(skb, accel_priv);
3441}
3442EXPORT_SYMBOL(dev_queue_xmit_accel);
3443
3444
3445
3446
3447
3448
3449int netdev_max_backlog __read_mostly = 1000;
3450EXPORT_SYMBOL(netdev_max_backlog);
3451
3452int netdev_tstamp_prequeue __read_mostly = 1;
3453int netdev_budget __read_mostly = 300;
3454int weight_p __read_mostly = 64;
3455
3456
3457static inline void ____napi_schedule(struct softnet_data *sd,
3458 struct napi_struct *napi)
3459{
3460 list_add_tail(&napi->poll_list, &sd->poll_list);
3461 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3462}
3463
3464#ifdef CONFIG_RPS
3465
3466
3467struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3468EXPORT_SYMBOL(rps_sock_flow_table);
3469u32 rps_cpu_mask __read_mostly;
3470EXPORT_SYMBOL(rps_cpu_mask);
3471
3472struct static_key rps_needed __read_mostly;
3473EXPORT_SYMBOL(rps_needed);
3474
3475static struct rps_dev_flow *
3476set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3477 struct rps_dev_flow *rflow, u16 next_cpu)
3478{
3479 if (next_cpu < nr_cpu_ids) {
3480#ifdef CONFIG_RFS_ACCEL
3481 struct netdev_rx_queue *rxqueue;
3482 struct rps_dev_flow_table *flow_table;
3483 struct rps_dev_flow *old_rflow;
3484 u32 flow_id;
3485 u16 rxq_index;
3486 int rc;
3487
3488
3489 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3490 !(dev->features & NETIF_F_NTUPLE))
3491 goto out;
3492 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3493 if (rxq_index == skb_get_rx_queue(skb))
3494 goto out;
3495
3496 rxqueue = dev->_rx + rxq_index;
3497 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3498 if (!flow_table)
3499 goto out;
3500 flow_id = skb_get_hash(skb) & flow_table->mask;
3501 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3502 rxq_index, flow_id);
3503 if (rc < 0)
3504 goto out;
3505 old_rflow = rflow;
3506 rflow = &flow_table->flows[flow_id];
3507 rflow->filter = rc;
3508 if (old_rflow->filter == rflow->filter)
3509 old_rflow->filter = RPS_NO_FILTER;
3510 out:
3511#endif
3512 rflow->last_qtail =
3513 per_cpu(softnet_data, next_cpu).input_queue_head;
3514 }
3515
3516 rflow->cpu = next_cpu;
3517 return rflow;
3518}
3519
3520
3521
3522
3523
3524
3525static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3526 struct rps_dev_flow **rflowp)
3527{
3528 const struct rps_sock_flow_table *sock_flow_table;
3529 struct netdev_rx_queue *rxqueue = dev->_rx;
3530 struct rps_dev_flow_table *flow_table;
3531 struct rps_map *map;
3532 int cpu = -1;
3533 u32 tcpu;
3534 u32 hash;
3535
3536 if (skb_rx_queue_recorded(skb)) {
3537 u16 index = skb_get_rx_queue(skb);
3538
3539 if (unlikely(index >= dev->real_num_rx_queues)) {
3540 WARN_ONCE(dev->real_num_rx_queues > 1,
3541 "%s received packet on queue %u, but number "
3542 "of RX queues is %u\n",
3543 dev->name, index, dev->real_num_rx_queues);
3544 goto done;
3545 }
3546 rxqueue += index;
3547 }
3548
3549
3550
3551 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3552 map = rcu_dereference(rxqueue->rps_map);
3553 if (!flow_table && !map)
3554 goto done;
3555
3556 skb_reset_network_header(skb);
3557 hash = skb_get_hash(skb);
3558 if (!hash)
3559 goto done;
3560
3561 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3562 if (flow_table && sock_flow_table) {
3563 struct rps_dev_flow *rflow;
3564 u32 next_cpu;
3565 u32 ident;
3566
3567
3568 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
3569 if ((ident ^ hash) & ~rps_cpu_mask)
3570 goto try_rps;
3571
3572 next_cpu = ident & rps_cpu_mask;
3573
3574
3575
3576
3577 rflow = &flow_table->flows[hash & flow_table->mask];
3578 tcpu = rflow->cpu;
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591 if (unlikely(tcpu != next_cpu) &&
3592 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
3593 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3594 rflow->last_qtail)) >= 0)) {
3595 tcpu = next_cpu;
3596 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3597 }
3598
3599 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
3600 *rflowp = rflow;
3601 cpu = tcpu;
3602 goto done;
3603 }
3604 }
3605
3606try_rps:
3607
3608 if (map) {
3609 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3610 if (cpu_online(tcpu)) {
3611 cpu = tcpu;
3612 goto done;
3613 }
3614 }
3615
3616done:
3617 return cpu;
3618}
3619
3620#ifdef CONFIG_RFS_ACCEL
3621
3622
3623
3624
3625
3626
3627
3628
3629
3630
3631
3632
3633bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3634 u32 flow_id, u16 filter_id)
3635{
3636 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3637 struct rps_dev_flow_table *flow_table;
3638 struct rps_dev_flow *rflow;
3639 bool expire = true;
3640 unsigned int cpu;
3641
3642 rcu_read_lock();
3643 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3644 if (flow_table && flow_id <= flow_table->mask) {
3645 rflow = &flow_table->flows[flow_id];
3646 cpu = ACCESS_ONCE(rflow->cpu);
3647 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3648 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3649 rflow->last_qtail) <
3650 (int)(10 * flow_table->mask)))
3651 expire = false;
3652 }
3653 rcu_read_unlock();
3654 return expire;
3655}
3656EXPORT_SYMBOL(rps_may_expire_flow);
3657
3658#endif
3659
3660
3661static void rps_trigger_softirq(void *data)
3662{
3663 struct softnet_data *sd = data;
3664
3665 ____napi_schedule(sd, &sd->backlog);
3666 sd->received_rps++;
3667}
3668
3669#endif
3670
3671
3672
3673
3674
3675
3676static int rps_ipi_queued(struct softnet_data *sd)
3677{
3678#ifdef CONFIG_RPS
3679 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
3680
3681 if (sd != mysd) {
3682 sd->rps_ipi_next = mysd->rps_ipi_list;
3683 mysd->rps_ipi_list = sd;
3684
3685 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3686 return 1;
3687 }
3688#endif
3689 return 0;
3690}
3691
3692#ifdef CONFIG_NET_FLOW_LIMIT
3693int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3694#endif
3695
3696static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3697{
3698#ifdef CONFIG_NET_FLOW_LIMIT
3699 struct sd_flow_limit *fl;
3700 struct softnet_data *sd;
3701 unsigned int old_flow, new_flow;
3702
3703 if (qlen < (netdev_max_backlog >> 1))
3704 return false;
3705
3706 sd = this_cpu_ptr(&softnet_data);
3707
3708 rcu_read_lock();
3709 fl = rcu_dereference(sd->flow_limit);
3710 if (fl) {
3711 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3712 old_flow = fl->history[fl->history_head];
3713 fl->history[fl->history_head] = new_flow;
3714
3715 fl->history_head++;
3716 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3717
3718 if (likely(fl->buckets[old_flow]))
3719 fl->buckets[old_flow]--;
3720
3721 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3722 fl->count++;
3723 rcu_read_unlock();
3724 return true;
3725 }
3726 }
3727 rcu_read_unlock();
3728#endif
3729 return false;
3730}
3731
3732
3733
3734
3735
3736static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3737 unsigned int *qtail)
3738{
3739 struct softnet_data *sd;
3740 unsigned long flags;
3741 unsigned int qlen;
3742
3743 sd = &per_cpu(softnet_data, cpu);
3744
3745 local_irq_save(flags);
3746
3747 rps_lock(sd);
3748 if (!netif_running(skb->dev))
3749 goto drop;
3750 qlen = skb_queue_len(&sd->input_pkt_queue);
3751 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3752 if (qlen) {
3753enqueue:
3754 __skb_queue_tail(&sd->input_pkt_queue, skb);
3755 input_queue_tail_incr_save(sd, qtail);
3756 rps_unlock(sd);
3757 local_irq_restore(flags);
3758 return NET_RX_SUCCESS;
3759 }
3760
3761
3762
3763
3764 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3765 if (!rps_ipi_queued(sd))
3766 ____napi_schedule(sd, &sd->backlog);
3767 }
3768 goto enqueue;
3769 }
3770
3771drop:
3772 sd->dropped++;
3773 rps_unlock(sd);
3774
3775 local_irq_restore(flags);
3776
3777 atomic_long_inc(&skb->dev->rx_dropped);
3778 kfree_skb(skb);
3779 return NET_RX_DROP;
3780}
3781
3782static int netif_rx_internal(struct sk_buff *skb)
3783{
3784 int ret;
3785
3786 net_timestamp_check(netdev_tstamp_prequeue, skb);
3787
3788 trace_netif_rx(skb);
3789#ifdef CONFIG_RPS
3790 if (static_key_false(&rps_needed)) {
3791 struct rps_dev_flow voidflow, *rflow = &voidflow;
3792 int cpu;
3793
3794 preempt_disable();
3795 rcu_read_lock();
3796
3797 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3798 if (cpu < 0)
3799 cpu = smp_processor_id();
3800
3801 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3802
3803 rcu_read_unlock();
3804 preempt_enable();
3805 } else
3806#endif
3807 {
3808 unsigned int qtail;
3809 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3810 put_cpu();
3811 }
3812 return ret;
3813}
3814
3815
3816
3817
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830int netif_rx(struct sk_buff *skb)
3831{
3832 trace_netif_rx_entry(skb);
3833
3834 return netif_rx_internal(skb);
3835}
3836EXPORT_SYMBOL(netif_rx);
3837
3838int netif_rx_ni(struct sk_buff *skb)
3839{
3840 int err;
3841
3842 trace_netif_rx_ni_entry(skb);
3843
3844 preempt_disable();
3845 err = netif_rx_internal(skb);
3846 if (local_softirq_pending())
3847 do_softirq();
3848 preempt_enable();
3849
3850 return err;
3851}
3852EXPORT_SYMBOL(netif_rx_ni);
3853
3854static void net_tx_action(struct softirq_action *h)
3855{
3856 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
3857
3858 if (sd->completion_queue) {
3859 struct sk_buff *clist;
3860
3861 local_irq_disable();
3862 clist = sd->completion_queue;
3863 sd->completion_queue = NULL;
3864 local_irq_enable();
3865
3866 while (clist) {
3867 struct sk_buff *skb = clist;
3868 clist = clist->next;
3869
3870 WARN_ON(atomic_read(&skb->users));
3871 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3872 trace_consume_skb(skb);
3873 else
3874 trace_kfree_skb(skb, net_tx_action);
3875
3876 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3877 __kfree_skb(skb);
3878 else
3879 __kfree_skb_defer(skb);
3880 }
3881
3882 __kfree_skb_flush();
3883 }
3884
3885 if (sd->output_queue) {
3886 struct Qdisc *head;
3887
3888 local_irq_disable();
3889 head = sd->output_queue;
3890 sd->output_queue = NULL;
3891 sd->output_queue_tailp = &sd->output_queue;
3892 local_irq_enable();
3893
3894 while (head) {
3895 struct Qdisc *q = head;
3896 spinlock_t *root_lock;
3897
3898 head = head->next_sched;
3899
3900 root_lock = qdisc_lock(q);
3901 if (spin_trylock(root_lock)) {
3902 smp_mb__before_atomic();
3903 clear_bit(__QDISC_STATE_SCHED,
3904 &q->state);
3905 qdisc_run(q);
3906 spin_unlock(root_lock);
3907 } else {
3908 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3909 &q->state)) {
3910 __netif_reschedule(q);
3911 } else {
3912 smp_mb__before_atomic();
3913 clear_bit(__QDISC_STATE_SCHED,
3914 &q->state);
3915 }
3916 }
3917 }
3918 }
3919}
3920
3921#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3922 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3923
3924int (*br_fdb_test_addr_hook)(struct net_device *dev,
3925 unsigned char *addr) __read_mostly;
3926EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3927#endif
3928
3929static inline struct sk_buff *
3930sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
3931 struct net_device *orig_dev)
3932{
3933#ifdef CONFIG_NET_CLS_ACT
3934 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
3935 struct tcf_result cl_res;
3936
3937
3938
3939
3940
3941
3942 if (!cl)
3943 return skb;
3944 if (*pt_prev) {
3945 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3946 *pt_prev = NULL;
3947 }
3948
3949 qdisc_skb_cb(skb)->pkt_len = skb->len;
3950 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3951 qdisc_bstats_cpu_update(cl->q, skb);
3952
3953 switch (tc_classify(skb, cl, &cl_res, false)) {
3954 case TC_ACT_OK:
3955 case TC_ACT_RECLASSIFY:
3956 skb->tc_index = TC_H_MIN(cl_res.classid);
3957 break;
3958 case TC_ACT_SHOT:
3959 qdisc_qstats_cpu_drop(cl->q);
3960 kfree_skb(skb);
3961 return NULL;
3962 case TC_ACT_STOLEN:
3963 case TC_ACT_QUEUED:
3964 consume_skb(skb);
3965 return NULL;
3966 case TC_ACT_REDIRECT:
3967
3968
3969
3970
3971 __skb_push(skb, skb->mac_len);
3972 skb_do_redirect(skb);
3973 return NULL;
3974 default:
3975 break;
3976 }
3977#endif
3978 return skb;
3979}
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995int netdev_rx_handler_register(struct net_device *dev,
3996 rx_handler_func_t *rx_handler,
3997 void *rx_handler_data)
3998{
3999 ASSERT_RTNL();
4000
4001 if (dev->rx_handler)
4002 return -EBUSY;
4003
4004
4005 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
4006 rcu_assign_pointer(dev->rx_handler, rx_handler);
4007
4008 return 0;
4009}
4010EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020void netdev_rx_handler_unregister(struct net_device *dev)
4021{
4022
4023 ASSERT_RTNL();
4024 RCU_INIT_POINTER(dev->rx_handler, NULL);
4025
4026
4027
4028
4029 synchronize_net();
4030 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
4031}
4032EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
4033
4034
4035
4036
4037
4038static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
4039{
4040 switch (skb->protocol) {
4041 case htons(ETH_P_ARP):
4042 case htons(ETH_P_IP):
4043 case htons(ETH_P_IPV6):
4044 case htons(ETH_P_8021Q):
4045 case htons(ETH_P_8021AD):
4046 return true;
4047 default:
4048 return false;
4049 }
4050}
4051
4052static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
4053 int *ret, struct net_device *orig_dev)
4054{
4055#ifdef CONFIG_NETFILTER_INGRESS
4056 if (nf_hook_ingress_active(skb)) {
4057 if (*pt_prev) {
4058 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4059 *pt_prev = NULL;
4060 }
4061
4062 return nf_hook_ingress(skb);
4063 }
4064#endif
4065 return 0;
4066}
4067
4068static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
4069{
4070 struct packet_type *ptype, *pt_prev;
4071 rx_handler_func_t *rx_handler;
4072 struct net_device *orig_dev;
4073 bool deliver_exact = false;
4074 int ret = NET_RX_DROP;
4075 __be16 type;
4076
4077 net_timestamp_check(!netdev_tstamp_prequeue, skb);
4078
4079 trace_netif_receive_skb(skb);
4080
4081 orig_dev = skb->dev;
4082
4083 skb_reset_network_header(skb);
4084 if (!skb_transport_header_was_set(skb))
4085 skb_reset_transport_header(skb);
4086 skb_reset_mac_len(skb);
4087
4088 pt_prev = NULL;
4089
4090another_round:
4091 skb->skb_iif = skb->dev->ifindex;
4092
4093 __this_cpu_inc(softnet_data.processed);
4094
4095 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
4096 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
4097 skb = skb_vlan_untag(skb);
4098 if (unlikely(!skb))
4099 goto out;
4100 }
4101
4102#ifdef CONFIG_NET_CLS_ACT
4103 if (skb->tc_verd & TC_NCLS) {
4104 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
4105 goto ncls;
4106 }
4107#endif
4108
4109 if (pfmemalloc)
4110 goto skip_taps;
4111
4112 list_for_each_entry_rcu(ptype, &ptype_all, list) {
4113 if (pt_prev)
4114 ret = deliver_skb(skb, pt_prev, orig_dev);
4115 pt_prev = ptype;
4116 }
4117
4118 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
4119 if (pt_prev)
4120 ret = deliver_skb(skb, pt_prev, orig_dev);
4121 pt_prev = ptype;
4122 }
4123
4124skip_taps:
4125#ifdef CONFIG_NET_INGRESS
4126 if (static_key_false(&ingress_needed)) {
4127 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
4128 if (!skb)
4129 goto out;
4130
4131 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
4132 goto out;
4133 }
4134#endif
4135#ifdef CONFIG_NET_CLS_ACT
4136 skb->tc_verd = 0;
4137ncls:
4138#endif
4139 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
4140 goto drop;
4141
4142 if (skb_vlan_tag_present(skb)) {
4143 if (pt_prev) {
4144 ret = deliver_skb(skb, pt_prev, orig_dev);
4145 pt_prev = NULL;
4146 }
4147 if (vlan_do_receive(&skb))
4148 goto another_round;
4149 else if (unlikely(!skb))
4150 goto out;
4151 }
4152
4153 rx_handler = rcu_dereference(skb->dev->rx_handler);
4154 if (rx_handler) {
4155 if (pt_prev) {
4156 ret = deliver_skb(skb, pt_prev, orig_dev);
4157 pt_prev = NULL;
4158 }
4159 switch (rx_handler(&skb)) {
4160 case RX_HANDLER_CONSUMED:
4161 ret = NET_RX_SUCCESS;
4162 goto out;
4163 case RX_HANDLER_ANOTHER:
4164 goto another_round;
4165 case RX_HANDLER_EXACT:
4166 deliver_exact = true;
4167 case RX_HANDLER_PASS:
4168 break;
4169 default:
4170 BUG();
4171 }
4172 }
4173
4174 if (unlikely(skb_vlan_tag_present(skb))) {
4175 if (skb_vlan_tag_get_id(skb))
4176 skb->pkt_type = PACKET_OTHERHOST;
4177
4178
4179
4180
4181 skb->vlan_tci = 0;
4182 }
4183
4184 type = skb->protocol;
4185
4186
4187 if (likely(!deliver_exact)) {
4188 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4189 &ptype_base[ntohs(type) &
4190 PTYPE_HASH_MASK]);
4191 }
4192
4193 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4194 &orig_dev->ptype_specific);
4195
4196 if (unlikely(skb->dev != orig_dev)) {
4197 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4198 &skb->dev->ptype_specific);
4199 }
4200
4201 if (pt_prev) {
4202 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
4203 goto drop;
4204 else
4205 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4206 } else {
4207drop:
4208 if (!deliver_exact)
4209 atomic_long_inc(&skb->dev->rx_dropped);
4210 else
4211 atomic_long_inc(&skb->dev->rx_nohandler);
4212 kfree_skb(skb);
4213
4214
4215
4216 ret = NET_RX_DROP;
4217 }
4218
4219out:
4220 return ret;
4221}
4222
4223static int __netif_receive_skb(struct sk_buff *skb)
4224{
4225 int ret;
4226
4227 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
4228 unsigned long pflags = current->flags;
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239 current->flags |= PF_MEMALLOC;
4240 ret = __netif_receive_skb_core(skb, true);
4241 tsk_restore_flags(current, pflags, PF_MEMALLOC);
4242 } else
4243 ret = __netif_receive_skb_core(skb, false);
4244
4245 return ret;
4246}
4247
4248static int netif_receive_skb_internal(struct sk_buff *skb)
4249{
4250 int ret;
4251
4252 net_timestamp_check(netdev_tstamp_prequeue, skb);
4253
4254 if (skb_defer_rx_timestamp(skb))
4255 return NET_RX_SUCCESS;
4256
4257 rcu_read_lock();
4258
4259#ifdef CONFIG_RPS
4260 if (static_key_false(&rps_needed)) {
4261 struct rps_dev_flow voidflow, *rflow = &voidflow;
4262 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
4263
4264 if (cpu >= 0) {
4265 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4266 rcu_read_unlock();
4267 return ret;
4268 }
4269 }
4270#endif
4271 ret = __netif_receive_skb(skb);
4272 rcu_read_unlock();
4273 return ret;
4274}
4275
4276
4277
4278
4279
4280
4281
4282
4283
4284
4285
4286
4287
4288
4289
4290
4291int netif_receive_skb(struct sk_buff *skb)
4292{
4293 trace_netif_receive_skb_entry(skb);
4294
4295 return netif_receive_skb_internal(skb);
4296}
4297EXPORT_SYMBOL(netif_receive_skb);
4298
4299
4300
4301
4302static void flush_backlog(void *arg)
4303{
4304 struct net_device *dev = arg;
4305 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4306 struct sk_buff *skb, *tmp;
4307
4308 rps_lock(sd);
4309 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
4310 if (skb->dev == dev) {
4311 __skb_unlink(skb, &sd->input_pkt_queue);
4312 kfree_skb(skb);
4313 input_queue_head_incr(sd);
4314 }
4315 }
4316 rps_unlock(sd);
4317
4318 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
4319 if (skb->dev == dev) {
4320 __skb_unlink(skb, &sd->process_queue);
4321 kfree_skb(skb);
4322 input_queue_head_incr(sd);
4323 }
4324 }
4325}
4326
4327static int napi_gro_complete(struct sk_buff *skb)
4328{
4329 struct packet_offload *ptype;
4330 __be16 type = skb->protocol;
4331 struct list_head *head = &offload_base;
4332 int err = -ENOENT;
4333
4334 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
4335
4336 if (NAPI_GRO_CB(skb)->count == 1) {
4337 skb_shinfo(skb)->gso_size = 0;
4338 goto out;
4339 }
4340
4341 rcu_read_lock();
4342 list_for_each_entry_rcu(ptype, head, list) {
4343 if (ptype->type != type || !ptype->callbacks.gro_complete)
4344 continue;
4345
4346 err = ptype->callbacks.gro_complete(skb, 0);
4347 break;
4348 }
4349 rcu_read_unlock();
4350
4351 if (err) {
4352 WARN_ON(&ptype->list == head);
4353 kfree_skb(skb);
4354 return NET_RX_SUCCESS;
4355 }
4356
4357out:
4358 return netif_receive_skb_internal(skb);
4359}
4360
4361
4362
4363
4364
4365void napi_gro_flush(struct napi_struct *napi, bool flush_old)
4366{
4367 struct sk_buff *skb, *prev = NULL;
4368
4369
4370 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
4371 skb->prev = prev;
4372 prev = skb;
4373 }
4374
4375 for (skb = prev; skb; skb = prev) {
4376 skb->next = NULL;
4377
4378 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
4379 return;
4380
4381 prev = skb->prev;
4382 napi_gro_complete(skb);
4383 napi->gro_count--;
4384 }
4385
4386 napi->gro_list = NULL;
4387}
4388EXPORT_SYMBOL(napi_gro_flush);
4389
4390static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4391{
4392 struct sk_buff *p;
4393 unsigned int maclen = skb->dev->hard_header_len;
4394 u32 hash = skb_get_hash_raw(skb);
4395
4396 for (p = napi->gro_list; p; p = p->next) {
4397 unsigned long diffs;
4398
4399 NAPI_GRO_CB(p)->flush = 0;
4400
4401 if (hash != skb_get_hash_raw(p)) {
4402 NAPI_GRO_CB(p)->same_flow = 0;
4403 continue;
4404 }
4405
4406 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4407 diffs |= p->vlan_tci ^ skb->vlan_tci;
4408 diffs |= skb_metadata_dst_cmp(p, skb);
4409 if (maclen == ETH_HLEN)
4410 diffs |= compare_ether_header(skb_mac_header(p),
4411 skb_mac_header(skb));
4412 else if (!diffs)
4413 diffs = memcmp(skb_mac_header(p),
4414 skb_mac_header(skb),
4415 maclen);
4416 NAPI_GRO_CB(p)->same_flow = !diffs;
4417 }
4418}
4419
4420static void skb_gro_reset_offset(struct sk_buff *skb)
4421{
4422 const struct skb_shared_info *pinfo = skb_shinfo(skb);
4423 const skb_frag_t *frag0 = &pinfo->frags[0];
4424
4425 NAPI_GRO_CB(skb)->data_offset = 0;
4426 NAPI_GRO_CB(skb)->frag0 = NULL;
4427 NAPI_GRO_CB(skb)->frag0_len = 0;
4428
4429 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
4430 pinfo->nr_frags &&
4431 !PageHighMem(skb_frag_page(frag0))) {
4432 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
4433 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
4434 }
4435}
4436
4437static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
4438{
4439 struct skb_shared_info *pinfo = skb_shinfo(skb);
4440
4441 BUG_ON(skb->end - skb->tail < grow);
4442
4443 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
4444
4445 skb->data_len -= grow;
4446 skb->tail += grow;
4447
4448 pinfo->frags[0].page_offset += grow;
4449 skb_frag_size_sub(&pinfo->frags[0], grow);
4450
4451 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
4452 skb_frag_unref(skb, 0);
4453 memmove(pinfo->frags, pinfo->frags + 1,
4454 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
4455 }
4456}
4457
4458static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4459{
4460 struct sk_buff **pp = NULL;
4461 struct packet_offload *ptype;
4462 __be16 type = skb->protocol;
4463 struct list_head *head = &offload_base;
4464 int same_flow;
4465 enum gro_result ret;
4466 int grow;
4467
4468 if (!(skb->dev->features & NETIF_F_GRO))
4469 goto normal;
4470
4471 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
4472 goto normal;
4473
4474 gro_list_prepare(napi, skb);
4475
4476 rcu_read_lock();
4477 list_for_each_entry_rcu(ptype, head, list) {
4478 if (ptype->type != type || !ptype->callbacks.gro_receive)
4479 continue;
4480
4481 skb_set_network_header(skb, skb_gro_offset(skb));
4482 skb_reset_mac_len(skb);
4483 NAPI_GRO_CB(skb)->same_flow = 0;
4484 NAPI_GRO_CB(skb)->flush = 0;
4485 NAPI_GRO_CB(skb)->free = 0;
4486 NAPI_GRO_CB(skb)->encap_mark = 0;
4487 NAPI_GRO_CB(skb)->is_fou = 0;
4488 NAPI_GRO_CB(skb)->is_atomic = 1;
4489 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4490
4491
4492 switch (skb->ip_summed) {
4493 case CHECKSUM_COMPLETE:
4494 NAPI_GRO_CB(skb)->csum = skb->csum;
4495 NAPI_GRO_CB(skb)->csum_valid = 1;
4496 NAPI_GRO_CB(skb)->csum_cnt = 0;
4497 break;
4498 case CHECKSUM_UNNECESSARY:
4499 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
4500 NAPI_GRO_CB(skb)->csum_valid = 0;
4501 break;
4502 default:
4503 NAPI_GRO_CB(skb)->csum_cnt = 0;
4504 NAPI_GRO_CB(skb)->csum_valid = 0;
4505 }
4506
4507 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
4508 break;
4509 }
4510 rcu_read_unlock();
4511
4512 if (&ptype->list == head)
4513 goto normal;
4514
4515 same_flow = NAPI_GRO_CB(skb)->same_flow;
4516 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
4517
4518 if (pp) {
4519 struct sk_buff *nskb = *pp;
4520
4521 *pp = nskb->next;
4522 nskb->next = NULL;
4523 napi_gro_complete(nskb);
4524 napi->gro_count--;
4525 }
4526
4527 if (same_flow)
4528 goto ok;
4529
4530 if (NAPI_GRO_CB(skb)->flush)
4531 goto normal;
4532
4533 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
4534 struct sk_buff *nskb = napi->gro_list;
4535
4536
4537 while (nskb->next) {
4538 pp = &nskb->next;
4539 nskb = *pp;
4540 }
4541 *pp = NULL;
4542 nskb->next = NULL;
4543 napi_gro_complete(nskb);
4544 } else {
4545 napi->gro_count++;
4546 }
4547 NAPI_GRO_CB(skb)->count = 1;
4548 NAPI_GRO_CB(skb)->age = jiffies;
4549 NAPI_GRO_CB(skb)->last = skb;
4550 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
4551 skb->next = napi->gro_list;
4552 napi->gro_list = skb;
4553 ret = GRO_HELD;
4554
4555pull:
4556 grow = skb_gro_offset(skb) - skb_headlen(skb);
4557 if (grow > 0)
4558 gro_pull_from_frag0(skb, grow);
4559ok:
4560 return ret;
4561
4562normal:
4563 ret = GRO_NORMAL;
4564 goto pull;
4565}
4566
4567struct packet_offload *gro_find_receive_by_type(__be16 type)
4568{
4569 struct list_head *offload_head = &offload_base;
4570 struct packet_offload *ptype;
4571
4572 list_for_each_entry_rcu(ptype, offload_head, list) {
4573 if (ptype->type != type || !ptype->callbacks.gro_receive)
4574 continue;
4575 return ptype;
4576 }
4577 return NULL;
4578}
4579EXPORT_SYMBOL(gro_find_receive_by_type);
4580
4581struct packet_offload *gro_find_complete_by_type(__be16 type)
4582{
4583 struct list_head *offload_head = &offload_base;
4584 struct packet_offload *ptype;
4585
4586 list_for_each_entry_rcu(ptype, offload_head, list) {
4587 if (ptype->type != type || !ptype->callbacks.gro_complete)
4588 continue;
4589 return ptype;
4590 }
4591 return NULL;
4592}
4593EXPORT_SYMBOL(gro_find_complete_by_type);
4594
4595static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4596{
4597 switch (ret) {
4598 case GRO_NORMAL:
4599 if (netif_receive_skb_internal(skb))
4600 ret = GRO_DROP;
4601 break;
4602
4603 case GRO_DROP:
4604 kfree_skb(skb);
4605 break;
4606
4607 case GRO_MERGED_FREE:
4608 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
4609 skb_dst_drop(skb);
4610 kmem_cache_free(skbuff_head_cache, skb);
4611 } else {
4612 __kfree_skb(skb);
4613 }
4614 break;
4615
4616 case GRO_HELD:
4617 case GRO_MERGED:
4618 break;
4619 }
4620
4621 return ret;
4622}
4623
4624gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4625{
4626 skb_mark_napi_id(skb, napi);
4627 trace_napi_gro_receive_entry(skb);
4628
4629 skb_gro_reset_offset(skb);
4630
4631 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4632}
4633EXPORT_SYMBOL(napi_gro_receive);
4634
4635static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
4636{
4637 if (unlikely(skb->pfmemalloc)) {
4638 consume_skb(skb);
4639 return;
4640 }
4641 __skb_pull(skb, skb_headlen(skb));
4642
4643 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
4644 skb->vlan_tci = 0;
4645 skb->dev = napi->dev;
4646 skb->skb_iif = 0;
4647 skb->encapsulation = 0;
4648 skb_shinfo(skb)->gso_type = 0;
4649 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4650
4651 napi->skb = skb;
4652}
4653
4654struct sk_buff *napi_get_frags(struct napi_struct *napi)
4655{
4656 struct sk_buff *skb = napi->skb;
4657
4658 if (!skb) {
4659 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4660 if (skb) {
4661 napi->skb = skb;
4662 skb_mark_napi_id(skb, napi);
4663 }
4664 }
4665 return skb;
4666}
4667EXPORT_SYMBOL(napi_get_frags);
4668
4669static gro_result_t napi_frags_finish(struct napi_struct *napi,
4670 struct sk_buff *skb,
4671 gro_result_t ret)
4672{
4673 switch (ret) {
4674 case GRO_NORMAL:
4675 case GRO_HELD:
4676 __skb_push(skb, ETH_HLEN);
4677 skb->protocol = eth_type_trans(skb, skb->dev);
4678 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4679 ret = GRO_DROP;
4680 break;
4681
4682 case GRO_DROP:
4683 case GRO_MERGED_FREE:
4684 napi_reuse_skb(napi, skb);
4685 break;
4686
4687 case GRO_MERGED:
4688 break;
4689 }
4690
4691 return ret;
4692}
4693
4694
4695
4696
4697
4698static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4699{
4700 struct sk_buff *skb = napi->skb;
4701 const struct ethhdr *eth;
4702 unsigned int hlen = sizeof(*eth);
4703
4704 napi->skb = NULL;
4705
4706 skb_reset_mac_header(skb);
4707 skb_gro_reset_offset(skb);
4708
4709 eth = skb_gro_header_fast(skb, 0);
4710 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4711 eth = skb_gro_header_slow(skb, hlen, 0);
4712 if (unlikely(!eth)) {
4713 net_warn_ratelimited("%s: dropping impossible skb from %s\n",
4714 __func__, napi->dev->name);
4715 napi_reuse_skb(napi, skb);
4716 return NULL;
4717 }
4718 } else {
4719 gro_pull_from_frag0(skb, hlen);
4720 NAPI_GRO_CB(skb)->frag0 += hlen;
4721 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4722 }
4723 __skb_pull(skb, hlen);
4724
4725
4726
4727
4728
4729
4730 skb->protocol = eth->h_proto;
4731
4732 return skb;
4733}
4734
4735gro_result_t napi_gro_frags(struct napi_struct *napi)
4736{
4737 struct sk_buff *skb = napi_frags_skb(napi);
4738
4739 if (!skb)
4740 return GRO_DROP;
4741
4742 trace_napi_gro_frags_entry(skb);
4743
4744 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4745}
4746EXPORT_SYMBOL(napi_gro_frags);
4747
4748
4749
4750
4751__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
4752{
4753 __wsum wsum;
4754 __sum16 sum;
4755
4756 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
4757
4758
4759 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
4760 if (likely(!sum)) {
4761 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
4762 !skb->csum_complete_sw)
4763 netdev_rx_csum_fault(skb->dev);
4764 }
4765
4766 NAPI_GRO_CB(skb)->csum = wsum;
4767 NAPI_GRO_CB(skb)->csum_valid = 1;
4768
4769 return sum;
4770}
4771EXPORT_SYMBOL(__skb_gro_checksum_complete);
4772
4773
4774
4775
4776
4777static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4778{
4779#ifdef CONFIG_RPS
4780 struct softnet_data *remsd = sd->rps_ipi_list;
4781
4782 if (remsd) {
4783 sd->rps_ipi_list = NULL;
4784
4785 local_irq_enable();
4786
4787
4788 while (remsd) {
4789 struct softnet_data *next = remsd->rps_ipi_next;
4790
4791 if (cpu_online(remsd->cpu))
4792 smp_call_function_single_async(remsd->cpu,
4793 &remsd->csd);
4794 remsd = next;
4795 }
4796 } else
4797#endif
4798 local_irq_enable();
4799}
4800
4801static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4802{
4803#ifdef CONFIG_RPS
4804 return sd->rps_ipi_list != NULL;
4805#else
4806 return false;
4807#endif
4808}
4809
4810static int process_backlog(struct napi_struct *napi, int quota)
4811{
4812 int work = 0;
4813 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4814
4815
4816
4817
4818 if (sd_has_rps_ipi_waiting(sd)) {
4819 local_irq_disable();
4820 net_rps_action_and_irq_enable(sd);
4821 }
4822
4823 napi->weight = weight_p;
4824 local_irq_disable();
4825 while (1) {
4826 struct sk_buff *skb;
4827
4828 while ((skb = __skb_dequeue(&sd->process_queue))) {
4829 rcu_read_lock();
4830 local_irq_enable();
4831 __netif_receive_skb(skb);
4832 rcu_read_unlock();
4833 local_irq_disable();
4834 input_queue_head_incr(sd);
4835 if (++work >= quota) {
4836 local_irq_enable();
4837 return work;
4838 }
4839 }
4840
4841 rps_lock(sd);
4842 if (skb_queue_empty(&sd->input_pkt_queue)) {
4843
4844
4845
4846
4847
4848
4849
4850
4851 napi->state = 0;
4852 rps_unlock(sd);
4853
4854 break;
4855 }
4856
4857 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4858 &sd->process_queue);
4859 rps_unlock(sd);
4860 }
4861 local_irq_enable();
4862
4863 return work;
4864}
4865
4866
4867
4868
4869
4870
4871
4872
4873void __napi_schedule(struct napi_struct *n)
4874{
4875 unsigned long flags;
4876
4877 local_irq_save(flags);
4878 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4879 local_irq_restore(flags);
4880}
4881EXPORT_SYMBOL(__napi_schedule);
4882
4883
4884
4885
4886
4887
4888
4889void __napi_schedule_irqoff(struct napi_struct *n)
4890{
4891 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4892}
4893EXPORT_SYMBOL(__napi_schedule_irqoff);
4894
4895void __napi_complete(struct napi_struct *n)
4896{
4897 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4898
4899 list_del_init(&n->poll_list);
4900 smp_mb__before_atomic();
4901 clear_bit(NAPI_STATE_SCHED, &n->state);
4902}
4903EXPORT_SYMBOL(__napi_complete);
4904
4905void napi_complete_done(struct napi_struct *n, int work_done)
4906{
4907 unsigned long flags;
4908
4909
4910
4911
4912
4913 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4914 return;
4915
4916 if (n->gro_list) {
4917 unsigned long timeout = 0;
4918
4919 if (work_done)
4920 timeout = n->dev->gro_flush_timeout;
4921
4922 if (timeout)
4923 hrtimer_start(&n->timer, ns_to_ktime(timeout),
4924 HRTIMER_MODE_REL_PINNED);
4925 else
4926 napi_gro_flush(n, false);
4927 }
4928 if (likely(list_empty(&n->poll_list))) {
4929 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4930 } else {
4931
4932 local_irq_save(flags);
4933 __napi_complete(n);
4934 local_irq_restore(flags);
4935 }
4936}
4937EXPORT_SYMBOL(napi_complete_done);
4938
4939
4940static struct napi_struct *napi_by_id(unsigned int napi_id)
4941{
4942 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4943 struct napi_struct *napi;
4944
4945 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4946 if (napi->napi_id == napi_id)
4947 return napi;
4948
4949 return NULL;
4950}
4951
4952#if defined(CONFIG_NET_RX_BUSY_POLL)
4953#define BUSY_POLL_BUDGET 8
4954bool sk_busy_loop(struct sock *sk, int nonblock)
4955{
4956 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4957 int (*busy_poll)(struct napi_struct *dev);
4958 struct napi_struct *napi;
4959 int rc = false;
4960
4961 rcu_read_lock();
4962
4963 napi = napi_by_id(sk->sk_napi_id);
4964 if (!napi)
4965 goto out;
4966
4967
4968 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4969
4970 do {
4971 rc = 0;
4972 local_bh_disable();
4973 if (busy_poll) {
4974 rc = busy_poll(napi);
4975 } else if (napi_schedule_prep(napi)) {
4976 void *have = netpoll_poll_lock(napi);
4977
4978 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
4979 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4980 trace_napi_poll(napi);
4981 if (rc == BUSY_POLL_BUDGET) {
4982 napi_complete_done(napi, rc);
4983 napi_schedule(napi);
4984 }
4985 }
4986 netpoll_poll_unlock(have);
4987 }
4988 if (rc > 0)
4989 __NET_ADD_STATS(sock_net(sk),
4990 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
4991 local_bh_enable();
4992
4993 if (rc == LL_FLUSH_FAILED)
4994 break;
4995
4996 cpu_relax();
4997 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
4998 !need_resched() && !busy_loop_timeout(end_time));
4999
5000 rc = !skb_queue_empty(&sk->sk_receive_queue);
5001out:
5002 rcu_read_unlock();
5003 return rc;
5004}
5005EXPORT_SYMBOL(sk_busy_loop);
5006
5007#endif
5008
5009void napi_hash_add(struct napi_struct *napi)
5010{
5011 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
5012 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
5013 return;
5014
5015 spin_lock(&napi_hash_lock);
5016
5017
5018 do {
5019 if (unlikely(++napi_gen_id < NR_CPUS + 1))
5020 napi_gen_id = NR_CPUS + 1;
5021 } while (napi_by_id(napi_gen_id));
5022 napi->napi_id = napi_gen_id;
5023
5024 hlist_add_head_rcu(&napi->napi_hash_node,
5025 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
5026
5027 spin_unlock(&napi_hash_lock);
5028}
5029EXPORT_SYMBOL_GPL(napi_hash_add);
5030
5031
5032
5033
5034bool napi_hash_del(struct napi_struct *napi)
5035{
5036 bool rcu_sync_needed = false;
5037
5038 spin_lock(&napi_hash_lock);
5039
5040 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
5041 rcu_sync_needed = true;
5042 hlist_del_rcu(&napi->napi_hash_node);
5043 }
5044 spin_unlock(&napi_hash_lock);
5045 return rcu_sync_needed;
5046}
5047EXPORT_SYMBOL_GPL(napi_hash_del);
5048
5049static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
5050{
5051 struct napi_struct *napi;
5052
5053 napi = container_of(timer, struct napi_struct, timer);
5054 if (napi->gro_list)
5055 napi_schedule(napi);
5056
5057 return HRTIMER_NORESTART;
5058}
5059
5060void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
5061 int (*poll)(struct napi_struct *, int), int weight)
5062{
5063 INIT_LIST_HEAD(&napi->poll_list);
5064 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
5065 napi->timer.function = napi_watchdog;
5066 napi->gro_count = 0;
5067 napi->gro_list = NULL;
5068 napi->skb = NULL;
5069 napi->poll = poll;
5070 if (weight > NAPI_POLL_WEIGHT)
5071 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
5072 weight, dev->name);
5073 napi->weight = weight;
5074 list_add(&napi->dev_list, &dev->napi_list);
5075 napi->dev = dev;
5076#ifdef CONFIG_NETPOLL
5077 spin_lock_init(&napi->poll_lock);
5078 napi->poll_owner = -1;
5079#endif
5080 set_bit(NAPI_STATE_SCHED, &napi->state);
5081 napi_hash_add(napi);
5082}
5083EXPORT_SYMBOL(netif_napi_add);
5084
5085void napi_disable(struct napi_struct *n)
5086{
5087 might_sleep();
5088 set_bit(NAPI_STATE_DISABLE, &n->state);
5089
5090 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
5091 msleep(1);
5092 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
5093 msleep(1);
5094
5095 hrtimer_cancel(&n->timer);
5096
5097 clear_bit(NAPI_STATE_DISABLE, &n->state);
5098}
5099EXPORT_SYMBOL(napi_disable);
5100
5101
5102void netif_napi_del(struct napi_struct *napi)
5103{
5104 might_sleep();
5105 if (napi_hash_del(napi))
5106 synchronize_net();
5107 list_del_init(&napi->dev_list);
5108 napi_free_frags(napi);
5109
5110 kfree_skb_list(napi->gro_list);
5111 napi->gro_list = NULL;
5112 napi->gro_count = 0;
5113}
5114EXPORT_SYMBOL(netif_napi_del);
5115
5116static int napi_poll(struct napi_struct *n, struct list_head *repoll)
5117{
5118 void *have;
5119 int work, weight;
5120
5121 list_del_init(&n->poll_list);
5122
5123 have = netpoll_poll_lock(n);
5124
5125 weight = n->weight;
5126
5127
5128
5129
5130
5131
5132
5133 work = 0;
5134 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
5135 work = n->poll(n, weight);
5136 trace_napi_poll(n);
5137 }
5138
5139 WARN_ON_ONCE(work > weight);
5140
5141 if (likely(work < weight))
5142 goto out_unlock;
5143
5144
5145
5146
5147
5148
5149 if (unlikely(napi_disable_pending(n))) {
5150 napi_complete(n);
5151 goto out_unlock;
5152 }
5153
5154 if (n->gro_list) {
5155
5156
5157
5158 napi_gro_flush(n, HZ >= 1000);
5159 }
5160
5161
5162
5163
5164 if (unlikely(!list_empty(&n->poll_list))) {
5165 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
5166 n->dev ? n->dev->name : "backlog");
5167 goto out_unlock;
5168 }
5169
5170 list_add_tail(&n->poll_list, repoll);
5171
5172out_unlock:
5173 netpoll_poll_unlock(have);
5174
5175 return work;
5176}
5177
5178static void net_rx_action(struct softirq_action *h)
5179{
5180 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
5181 unsigned long time_limit = jiffies + 2;
5182 int budget = netdev_budget;
5183 LIST_HEAD(list);
5184 LIST_HEAD(repoll);
5185
5186 local_irq_disable();
5187 list_splice_init(&sd->poll_list, &list);
5188 local_irq_enable();
5189
5190 for (;;) {
5191 struct napi_struct *n;
5192
5193 if (list_empty(&list)) {
5194 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
5195 return;
5196 break;
5197 }
5198
5199 n = list_first_entry(&list, struct napi_struct, poll_list);
5200 budget -= napi_poll(n, &repoll);
5201
5202
5203
5204
5205
5206 if (unlikely(budget <= 0 ||
5207 time_after_eq(jiffies, time_limit))) {
5208 sd->time_squeeze++;
5209 break;
5210 }
5211 }
5212
5213 __kfree_skb_flush();
5214 local_irq_disable();
5215
5216 list_splice_tail_init(&sd->poll_list, &list);
5217 list_splice_tail(&repoll, &list);
5218 list_splice(&list, &sd->poll_list);
5219 if (!list_empty(&sd->poll_list))
5220 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
5221
5222 net_rps_action_and_irq_enable(sd);
5223}
5224
5225struct netdev_adjacent {
5226 struct net_device *dev;
5227
5228
5229 bool master;
5230
5231
5232 u16 ref_nr;
5233
5234
5235 void *private;
5236
5237 struct list_head list;
5238 struct rcu_head rcu;
5239};
5240
5241static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
5242 struct list_head *adj_list)
5243{
5244 struct netdev_adjacent *adj;
5245
5246 list_for_each_entry(adj, adj_list, list) {
5247 if (adj->dev == adj_dev)
5248 return adj;
5249 }
5250 return NULL;
5251}
5252
5253
5254
5255
5256
5257
5258
5259
5260
5261
5262bool netdev_has_upper_dev(struct net_device *dev,
5263 struct net_device *upper_dev)
5264{
5265 ASSERT_RTNL();
5266
5267 return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
5268}
5269EXPORT_SYMBOL(netdev_has_upper_dev);
5270
5271
5272
5273
5274
5275
5276
5277
5278static bool netdev_has_any_upper_dev(struct net_device *dev)
5279{
5280 ASSERT_RTNL();
5281
5282 return !list_empty(&dev->all_adj_list.upper);
5283}
5284
5285
5286
5287
5288
5289
5290
5291
5292struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
5293{
5294 struct netdev_adjacent *upper;
5295
5296 ASSERT_RTNL();
5297
5298 if (list_empty(&dev->adj_list.upper))
5299 return NULL;
5300
5301 upper = list_first_entry(&dev->adj_list.upper,
5302 struct netdev_adjacent, list);
5303 if (likely(upper->master))
5304 return upper->dev;
5305 return NULL;
5306}
5307EXPORT_SYMBOL(netdev_master_upper_dev_get);
5308
5309void *netdev_adjacent_get_private(struct list_head *adj_list)
5310{
5311 struct netdev_adjacent *adj;
5312
5313 adj = list_entry(adj_list, struct netdev_adjacent, list);
5314
5315 return adj->private;
5316}
5317EXPORT_SYMBOL(netdev_adjacent_get_private);
5318
5319
5320
5321
5322
5323
5324
5325
5326
5327struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
5328 struct list_head **iter)
5329{
5330 struct netdev_adjacent *upper;
5331
5332 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5333
5334 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5335
5336 if (&upper->list == &dev->adj_list.upper)
5337 return NULL;
5338
5339 *iter = &upper->list;
5340
5341 return upper->dev;
5342}
5343EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
5344
5345
5346
5347
5348
5349
5350
5351
5352
5353struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
5354 struct list_head **iter)
5355{
5356 struct netdev_adjacent *upper;
5357
5358 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5359
5360 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5361
5362 if (&upper->list == &dev->all_adj_list.upper)
5363 return NULL;
5364
5365 *iter = &upper->list;
5366
5367 return upper->dev;
5368}
5369EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
5370
5371
5372
5373
5374
5375
5376
5377
5378
5379
5380
5381
5382void *netdev_lower_get_next_private(struct net_device *dev,
5383 struct list_head **iter)
5384{
5385 struct netdev_adjacent *lower;
5386
5387 lower = list_entry(*iter, struct netdev_adjacent, list);
5388
5389 if (&lower->list == &dev->adj_list.lower)
5390 return NULL;
5391
5392 *iter = lower->list.next;
5393
5394 return lower->private;
5395}
5396EXPORT_SYMBOL(netdev_lower_get_next_private);
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407
5408void *netdev_lower_get_next_private_rcu(struct net_device *dev,
5409 struct list_head **iter)
5410{
5411 struct netdev_adjacent *lower;
5412
5413 WARN_ON_ONCE(!rcu_read_lock_held());
5414
5415 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5416
5417 if (&lower->list == &dev->adj_list.lower)
5418 return NULL;
5419
5420 *iter = &lower->list;
5421
5422 return lower->private;
5423}
5424EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
5425
5426
5427
5428
5429
5430
5431
5432
5433
5434
5435
5436
5437void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5438{
5439 struct netdev_adjacent *lower;
5440
5441 lower = list_entry(*iter, struct netdev_adjacent, list);
5442
5443 if (&lower->list == &dev->adj_list.lower)
5444 return NULL;
5445
5446 *iter = lower->list.next;
5447
5448 return lower->dev;
5449}
5450EXPORT_SYMBOL(netdev_lower_get_next);
5451
5452
5453
5454
5455
5456
5457
5458
5459
5460
5461void *netdev_lower_get_first_private_rcu(struct net_device *dev)
5462{
5463 struct netdev_adjacent *lower;
5464
5465 lower = list_first_or_null_rcu(&dev->adj_list.lower,
5466 struct netdev_adjacent, list);
5467 if (lower)
5468 return lower->private;
5469 return NULL;
5470}
5471EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
5472
5473
5474
5475
5476
5477
5478
5479
5480struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
5481{
5482 struct netdev_adjacent *upper;
5483
5484 upper = list_first_or_null_rcu(&dev->adj_list.upper,
5485 struct netdev_adjacent, list);
5486 if (upper && likely(upper->master))
5487 return upper->dev;
5488 return NULL;
5489}
5490EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
5491
5492static int netdev_adjacent_sysfs_add(struct net_device *dev,
5493 struct net_device *adj_dev,
5494 struct list_head *dev_list)
5495{
5496 char linkname[IFNAMSIZ+7];
5497 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5498 "upper_%s" : "lower_%s", adj_dev->name);
5499 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
5500 linkname);
5501}
5502static void netdev_adjacent_sysfs_del(struct net_device *dev,
5503 char *name,
5504 struct list_head *dev_list)
5505{
5506 char linkname[IFNAMSIZ+7];
5507 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5508 "upper_%s" : "lower_%s", name);
5509 sysfs_remove_link(&(dev->dev.kobj), linkname);
5510}
5511
5512static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
5513 struct net_device *adj_dev,
5514 struct list_head *dev_list)
5515{
5516 return (dev_list == &dev->adj_list.upper ||
5517 dev_list == &dev->adj_list.lower) &&
5518 net_eq(dev_net(dev), dev_net(adj_dev));
5519}
5520
5521static int __netdev_adjacent_dev_insert(struct net_device *dev,
5522 struct net_device *adj_dev,
5523 struct list_head *dev_list,
5524 void *private, bool master)
5525{
5526 struct netdev_adjacent *adj;
5527 int ret;
5528
5529 adj = __netdev_find_adj(adj_dev, dev_list);
5530
5531 if (adj) {
5532 adj->ref_nr++;
5533 return 0;
5534 }
5535
5536 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
5537 if (!adj)
5538 return -ENOMEM;
5539
5540 adj->dev = adj_dev;
5541 adj->master = master;
5542 adj->ref_nr = 1;
5543 adj->private = private;
5544 dev_hold(adj_dev);
5545
5546 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
5547 adj_dev->name, dev->name, adj_dev->name);
5548
5549 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
5550 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
5551 if (ret)
5552 goto free_adj;
5553 }
5554
5555
5556 if (master) {
5557 ret = sysfs_create_link(&(dev->dev.kobj),
5558 &(adj_dev->dev.kobj), "master");
5559 if (ret)
5560 goto remove_symlinks;
5561
5562 list_add_rcu(&adj->list, dev_list);
5563 } else {
5564 list_add_tail_rcu(&adj->list, dev_list);
5565 }
5566
5567 return 0;
5568
5569remove_symlinks:
5570 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5571 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5572free_adj:
5573 kfree(adj);
5574 dev_put(adj_dev);
5575
5576 return ret;
5577}
5578
5579static void __netdev_adjacent_dev_remove(struct net_device *dev,
5580 struct net_device *adj_dev,
5581 struct list_head *dev_list)
5582{
5583 struct netdev_adjacent *adj;
5584
5585 adj = __netdev_find_adj(adj_dev, dev_list);
5586
5587 if (!adj) {
5588 pr_err("tried to remove device %s from %s\n",
5589 dev->name, adj_dev->name);
5590 BUG();
5591 }
5592
5593 if (adj->ref_nr > 1) {
5594 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
5595 adj->ref_nr-1);
5596 adj->ref_nr--;
5597 return;
5598 }
5599
5600 if (adj->master)
5601 sysfs_remove_link(&(dev->dev.kobj), "master");
5602
5603 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5604 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5605
5606 list_del_rcu(&adj->list);
5607 pr_debug("dev_put for %s, because link removed from %s to %s\n",
5608 adj_dev->name, dev->name, adj_dev->name);
5609 dev_put(adj_dev);
5610 kfree_rcu(adj, rcu);
5611}
5612
5613static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5614 struct net_device *upper_dev,
5615 struct list_head *up_list,
5616 struct list_head *down_list,
5617 void *private, bool master)
5618{
5619 int ret;
5620
5621 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
5622 master);
5623 if (ret)
5624 return ret;
5625
5626 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
5627 false);
5628 if (ret) {
5629 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5630 return ret;
5631 }
5632
5633 return 0;
5634}
5635
5636static int __netdev_adjacent_dev_link(struct net_device *dev,
5637 struct net_device *upper_dev)
5638{
5639 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
5640 &dev->all_adj_list.upper,
5641 &upper_dev->all_adj_list.lower,
5642 NULL, false);
5643}
5644
5645static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5646 struct net_device *upper_dev,
5647 struct list_head *up_list,
5648 struct list_head *down_list)
5649{
5650 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5651 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
5652}
5653
5654static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5655 struct net_device *upper_dev)
5656{
5657 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5658 &dev->all_adj_list.upper,
5659 &upper_dev->all_adj_list.lower);
5660}
5661
5662static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5663 struct net_device *upper_dev,
5664 void *private, bool master)
5665{
5666 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
5667
5668 if (ret)
5669 return ret;
5670
5671 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
5672 &dev->adj_list.upper,
5673 &upper_dev->adj_list.lower,
5674 private, master);
5675 if (ret) {
5676 __netdev_adjacent_dev_unlink(dev, upper_dev);
5677 return ret;
5678 }
5679
5680 return 0;
5681}
5682
5683static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5684 struct net_device *upper_dev)
5685{
5686 __netdev_adjacent_dev_unlink(dev, upper_dev);
5687 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5688 &dev->adj_list.upper,
5689 &upper_dev->adj_list.lower);
5690}
5691
5692static int __netdev_upper_dev_link(struct net_device *dev,
5693 struct net_device *upper_dev, bool master,
5694 void *upper_priv, void *upper_info)
5695{
5696 struct netdev_notifier_changeupper_info changeupper_info;
5697 struct netdev_adjacent *i, *j, *to_i, *to_j;
5698 int ret = 0;
5699
5700 ASSERT_RTNL();
5701
5702 if (dev == upper_dev)
5703 return -EBUSY;
5704
5705
5706 if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
5707 return -EBUSY;
5708
5709 if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
5710 return -EEXIST;
5711
5712 if (master && netdev_master_upper_dev_get(dev))
5713 return -EBUSY;
5714
5715 changeupper_info.upper_dev = upper_dev;
5716 changeupper_info.master = master;
5717 changeupper_info.linking = true;
5718 changeupper_info.upper_info = upper_info;
5719
5720 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5721 &changeupper_info.info);
5722 ret = notifier_to_errno(ret);
5723 if (ret)
5724 return ret;
5725
5726 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
5727 master);
5728 if (ret)
5729 return ret;
5730
5731
5732
5733
5734
5735
5736 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5737 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5738 pr_debug("Interlinking %s with %s, non-neighbour\n",
5739 i->dev->name, j->dev->name);
5740 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
5741 if (ret)
5742 goto rollback_mesh;
5743 }
5744 }
5745
5746
5747 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5748 pr_debug("linking %s's upper device %s with %s\n",
5749 upper_dev->name, i->dev->name, dev->name);
5750 ret = __netdev_adjacent_dev_link(dev, i->dev);
5751 if (ret)
5752 goto rollback_upper_mesh;
5753 }
5754
5755
5756 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5757 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5758 i->dev->name, upper_dev->name);
5759 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
5760 if (ret)
5761 goto rollback_lower_mesh;
5762 }
5763
5764 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5765 &changeupper_info.info);
5766 ret = notifier_to_errno(ret);
5767 if (ret)
5768 goto rollback_lower_mesh;
5769
5770 return 0;
5771
5772rollback_lower_mesh:
5773 to_i = i;
5774 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5775 if (i == to_i)
5776 break;
5777 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5778 }
5779
5780 i = NULL;
5781
5782rollback_upper_mesh:
5783 to_i = i;
5784 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5785 if (i == to_i)
5786 break;
5787 __netdev_adjacent_dev_unlink(dev, i->dev);
5788 }
5789
5790 i = j = NULL;
5791
5792rollback_mesh:
5793 to_i = i;
5794 to_j = j;
5795 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5796 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5797 if (i == to_i && j == to_j)
5798 break;
5799 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5800 }
5801 if (i == to_i)
5802 break;
5803 }
5804
5805 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5806
5807 return ret;
5808}
5809
5810
5811
5812
5813
5814
5815
5816
5817
5818
5819
5820int netdev_upper_dev_link(struct net_device *dev,
5821 struct net_device *upper_dev)
5822{
5823 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
5824}
5825EXPORT_SYMBOL(netdev_upper_dev_link);
5826
5827
5828
5829
5830
5831
5832
5833
5834
5835
5836
5837
5838
5839
5840int netdev_master_upper_dev_link(struct net_device *dev,
5841 struct net_device *upper_dev,
5842 void *upper_priv, void *upper_info)
5843{
5844 return __netdev_upper_dev_link(dev, upper_dev, true,
5845 upper_priv, upper_info);
5846}
5847EXPORT_SYMBOL(netdev_master_upper_dev_link);
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857void netdev_upper_dev_unlink(struct net_device *dev,
5858 struct net_device *upper_dev)
5859{
5860 struct netdev_notifier_changeupper_info changeupper_info;
5861 struct netdev_adjacent *i, *j;
5862 ASSERT_RTNL();
5863
5864 changeupper_info.upper_dev = upper_dev;
5865 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
5866 changeupper_info.linking = false;
5867
5868 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5869 &changeupper_info.info);
5870
5871 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5872
5873
5874
5875
5876
5877 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5878 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
5879 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5880
5881
5882
5883
5884 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5885 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5886
5887 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
5888 __netdev_adjacent_dev_unlink(dev, i->dev);
5889
5890 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5891 &changeupper_info.info);
5892}
5893EXPORT_SYMBOL(netdev_upper_dev_unlink);
5894
5895
5896
5897
5898
5899
5900
5901
5902
5903void netdev_bonding_info_change(struct net_device *dev,
5904 struct netdev_bonding_info *bonding_info)
5905{
5906 struct netdev_notifier_bonding_info info;
5907
5908 memcpy(&info.bonding_info, bonding_info,
5909 sizeof(struct netdev_bonding_info));
5910 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
5911 &info.info);
5912}
5913EXPORT_SYMBOL(netdev_bonding_info_change);
5914
5915static void netdev_adjacent_add_links(struct net_device *dev)
5916{
5917 struct netdev_adjacent *iter;
5918
5919 struct net *net = dev_net(dev);
5920
5921 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5922 if (!net_eq(net,dev_net(iter->dev)))
5923 continue;
5924 netdev_adjacent_sysfs_add(iter->dev, dev,
5925 &iter->dev->adj_list.lower);
5926 netdev_adjacent_sysfs_add(dev, iter->dev,
5927 &dev->adj_list.upper);
5928 }
5929
5930 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5931 if (!net_eq(net,dev_net(iter->dev)))
5932 continue;
5933 netdev_adjacent_sysfs_add(iter->dev, dev,
5934 &iter->dev->adj_list.upper);
5935 netdev_adjacent_sysfs_add(dev, iter->dev,
5936 &dev->adj_list.lower);
5937 }
5938}
5939
5940static void netdev_adjacent_del_links(struct net_device *dev)
5941{
5942 struct netdev_adjacent *iter;
5943
5944 struct net *net = dev_net(dev);
5945
5946 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5947 if (!net_eq(net,dev_net(iter->dev)))
5948 continue;
5949 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5950 &iter->dev->adj_list.lower);
5951 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5952 &dev->adj_list.upper);
5953 }
5954
5955 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5956 if (!net_eq(net,dev_net(iter->dev)))
5957 continue;
5958 netdev_adjacent_sysfs_del(iter->dev, dev->name,
5959 &iter->dev->adj_list.upper);
5960 netdev_adjacent_sysfs_del(dev, iter->dev->name,
5961 &dev->adj_list.lower);
5962 }
5963}
5964
5965void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
5966{
5967 struct netdev_adjacent *iter;
5968
5969 struct net *net = dev_net(dev);
5970
5971 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5972 if (!net_eq(net,dev_net(iter->dev)))
5973 continue;
5974 netdev_adjacent_sysfs_del(iter->dev, oldname,
5975 &iter->dev->adj_list.lower);
5976 netdev_adjacent_sysfs_add(iter->dev, dev,
5977 &iter->dev->adj_list.lower);
5978 }
5979
5980 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5981 if (!net_eq(net,dev_net(iter->dev)))
5982 continue;
5983 netdev_adjacent_sysfs_del(iter->dev, oldname,
5984 &iter->dev->adj_list.upper);
5985 netdev_adjacent_sysfs_add(iter->dev, dev,
5986 &iter->dev->adj_list.upper);
5987 }
5988}
5989
5990void *netdev_lower_dev_get_private(struct net_device *dev,
5991 struct net_device *lower_dev)
5992{
5993 struct netdev_adjacent *lower;
5994
5995 if (!lower_dev)
5996 return NULL;
5997 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
5998 if (!lower)
5999 return NULL;
6000
6001 return lower->private;
6002}
6003EXPORT_SYMBOL(netdev_lower_dev_get_private);
6004
6005
6006int dev_get_nest_level(struct net_device *dev,
6007 bool (*type_check)(const struct net_device *dev))
6008{
6009 struct net_device *lower = NULL;
6010 struct list_head *iter;
6011 int max_nest = -1;
6012 int nest;
6013
6014 ASSERT_RTNL();
6015
6016 netdev_for_each_lower_dev(dev, lower, iter) {
6017 nest = dev_get_nest_level(lower, type_check);
6018 if (max_nest < nest)
6019 max_nest = nest;
6020 }
6021
6022 if (type_check(dev))
6023 max_nest++;
6024
6025 return max_nest;
6026}
6027EXPORT_SYMBOL(dev_get_nest_level);
6028
6029
6030
6031
6032
6033
6034
6035
6036
6037void netdev_lower_state_changed(struct net_device *lower_dev,
6038 void *lower_state_info)
6039{
6040 struct netdev_notifier_changelowerstate_info changelowerstate_info;
6041
6042 ASSERT_RTNL();
6043 changelowerstate_info.lower_state_info = lower_state_info;
6044 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
6045 &changelowerstate_info.info);
6046}
6047EXPORT_SYMBOL(netdev_lower_state_changed);
6048
6049static void dev_change_rx_flags(struct net_device *dev, int flags)
6050{
6051 const struct net_device_ops *ops = dev->netdev_ops;
6052
6053 if (ops->ndo_change_rx_flags)
6054 ops->ndo_change_rx_flags(dev, flags);
6055}
6056
6057static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
6058{
6059 unsigned int old_flags = dev->flags;
6060 kuid_t uid;
6061 kgid_t gid;
6062
6063 ASSERT_RTNL();
6064
6065 dev->flags |= IFF_PROMISC;
6066 dev->promiscuity += inc;
6067 if (dev->promiscuity == 0) {
6068
6069
6070
6071
6072 if (inc < 0)
6073 dev->flags &= ~IFF_PROMISC;
6074 else {
6075 dev->promiscuity -= inc;
6076 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
6077 dev->name);
6078 return -EOVERFLOW;
6079 }
6080 }
6081 if (dev->flags != old_flags) {
6082 pr_info("device %s %s promiscuous mode\n",
6083 dev->name,
6084 dev->flags & IFF_PROMISC ? "entered" : "left");
6085 if (audit_enabled) {
6086 current_uid_gid(&uid, &gid);
6087 audit_log(current->audit_context, GFP_ATOMIC,
6088 AUDIT_ANOM_PROMISCUOUS,
6089 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
6090 dev->name, (dev->flags & IFF_PROMISC),
6091 (old_flags & IFF_PROMISC),
6092 from_kuid(&init_user_ns, audit_get_loginuid(current)),
6093 from_kuid(&init_user_ns, uid),
6094 from_kgid(&init_user_ns, gid),
6095 audit_get_sessionid(current));
6096 }
6097
6098 dev_change_rx_flags(dev, IFF_PROMISC);
6099 }
6100 if (notify)
6101 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
6102 return 0;
6103}
6104
6105
6106
6107
6108
6109
6110
6111
6112
6113
6114
6115
6116int dev_set_promiscuity(struct net_device *dev, int inc)
6117{
6118 unsigned int old_flags = dev->flags;
6119 int err;
6120
6121 err = __dev_set_promiscuity(dev, inc, true);
6122 if (err < 0)
6123 return err;
6124 if (dev->flags != old_flags)
6125 dev_set_rx_mode(dev);
6126 return err;
6127}
6128EXPORT_SYMBOL(dev_set_promiscuity);
6129
6130static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
6131{
6132 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
6133
6134 ASSERT_RTNL();
6135
6136 dev->flags |= IFF_ALLMULTI;
6137 dev->allmulti += inc;
6138 if (dev->allmulti == 0) {
6139
6140
6141
6142
6143 if (inc < 0)
6144 dev->flags &= ~IFF_ALLMULTI;
6145 else {
6146 dev->allmulti -= inc;
6147 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
6148 dev->name);
6149 return -EOVERFLOW;
6150 }
6151 }
6152 if (dev->flags ^ old_flags) {
6153 dev_change_rx_flags(dev, IFF_ALLMULTI);
6154 dev_set_rx_mode(dev);
6155 if (notify)
6156 __dev_notify_flags(dev, old_flags,
6157 dev->gflags ^ old_gflags);
6158 }
6159 return 0;
6160}
6161
6162
6163
6164
6165
6166
6167
6168
6169
6170
6171
6172
6173
6174
6175int dev_set_allmulti(struct net_device *dev, int inc)
6176{
6177 return __dev_set_allmulti(dev, inc, true);
6178}
6179EXPORT_SYMBOL(dev_set_allmulti);
6180
6181
6182
6183
6184
6185
6186
6187void __dev_set_rx_mode(struct net_device *dev)
6188{
6189 const struct net_device_ops *ops = dev->netdev_ops;
6190
6191
6192 if (!(dev->flags&IFF_UP))
6193 return;
6194
6195 if (!netif_device_present(dev))
6196 return;
6197
6198 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
6199
6200
6201
6202 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
6203 __dev_set_promiscuity(dev, 1, false);
6204 dev->uc_promisc = true;
6205 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
6206 __dev_set_promiscuity(dev, -1, false);
6207 dev->uc_promisc = false;
6208 }
6209 }
6210
6211 if (ops->ndo_set_rx_mode)
6212 ops->ndo_set_rx_mode(dev);
6213}
6214
6215void dev_set_rx_mode(struct net_device *dev)
6216{
6217 netif_addr_lock_bh(dev);
6218 __dev_set_rx_mode(dev);
6219 netif_addr_unlock_bh(dev);
6220}
6221
6222
6223
6224
6225
6226
6227
6228unsigned int dev_get_flags(const struct net_device *dev)
6229{
6230 unsigned int flags;
6231
6232 flags = (dev->flags & ~(IFF_PROMISC |
6233 IFF_ALLMULTI |
6234 IFF_RUNNING |
6235 IFF_LOWER_UP |
6236 IFF_DORMANT)) |
6237 (dev->gflags & (IFF_PROMISC |
6238 IFF_ALLMULTI));
6239
6240 if (netif_running(dev)) {
6241 if (netif_oper_up(dev))
6242 flags |= IFF_RUNNING;
6243 if (netif_carrier_ok(dev))
6244 flags |= IFF_LOWER_UP;
6245 if (netif_dormant(dev))
6246 flags |= IFF_DORMANT;
6247 }
6248
6249 return flags;
6250}
6251EXPORT_SYMBOL(dev_get_flags);
6252
6253int __dev_change_flags(struct net_device *dev, unsigned int flags)
6254{
6255 unsigned int old_flags = dev->flags;
6256 int ret;
6257
6258 ASSERT_RTNL();
6259
6260
6261
6262
6263
6264 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
6265 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
6266 IFF_AUTOMEDIA)) |
6267 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
6268 IFF_ALLMULTI));
6269
6270
6271
6272
6273
6274 if ((old_flags ^ flags) & IFF_MULTICAST)
6275 dev_change_rx_flags(dev, IFF_MULTICAST);
6276
6277 dev_set_rx_mode(dev);
6278
6279
6280
6281
6282
6283
6284
6285 ret = 0;
6286 if ((old_flags ^ flags) & IFF_UP)
6287 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
6288
6289 if ((flags ^ dev->gflags) & IFF_PROMISC) {
6290 int inc = (flags & IFF_PROMISC) ? 1 : -1;
6291 unsigned int old_flags = dev->flags;
6292
6293 dev->gflags ^= IFF_PROMISC;
6294
6295 if (__dev_set_promiscuity(dev, inc, false) >= 0)
6296 if (dev->flags != old_flags)
6297 dev_set_rx_mode(dev);
6298 }
6299
6300
6301
6302
6303
6304 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
6305 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
6306
6307 dev->gflags ^= IFF_ALLMULTI;
6308 __dev_set_allmulti(dev, inc, false);
6309 }
6310
6311 return ret;
6312}
6313
6314void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
6315 unsigned int gchanges)
6316{
6317 unsigned int changes = dev->flags ^ old_flags;
6318
6319 if (gchanges)
6320 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
6321
6322 if (changes & IFF_UP) {
6323 if (dev->flags & IFF_UP)
6324 call_netdevice_notifiers(NETDEV_UP, dev);
6325 else
6326 call_netdevice_notifiers(NETDEV_DOWN, dev);
6327 }
6328
6329 if (dev->flags & IFF_UP &&
6330 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
6331 struct netdev_notifier_change_info change_info;
6332
6333 change_info.flags_changed = changes;
6334 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
6335 &change_info.info);
6336 }
6337}
6338
6339
6340
6341
6342
6343
6344
6345
6346
6347int dev_change_flags(struct net_device *dev, unsigned int flags)
6348{
6349 int ret;
6350 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
6351
6352 ret = __dev_change_flags(dev, flags);
6353 if (ret < 0)
6354 return ret;
6355
6356 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
6357 __dev_notify_flags(dev, old_flags, changes);
6358 return ret;
6359}
6360EXPORT_SYMBOL(dev_change_flags);
6361
6362static int __dev_set_mtu(struct net_device *dev, int new_mtu)
6363{
6364 const struct net_device_ops *ops = dev->netdev_ops;
6365
6366 if (ops->ndo_change_mtu)
6367 return ops->ndo_change_mtu(dev, new_mtu);
6368
6369 dev->mtu = new_mtu;
6370 return 0;
6371}
6372
6373
6374
6375
6376
6377
6378
6379
6380int dev_set_mtu(struct net_device *dev, int new_mtu)
6381{
6382 int err, orig_mtu;
6383
6384 if (new_mtu == dev->mtu)
6385 return 0;
6386
6387
6388 if (new_mtu < 0)
6389 return -EINVAL;
6390
6391 if (!netif_device_present(dev))
6392 return -ENODEV;
6393
6394 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
6395 err = notifier_to_errno(err);
6396 if (err)
6397 return err;
6398
6399 orig_mtu = dev->mtu;
6400 err = __dev_set_mtu(dev, new_mtu);
6401
6402 if (!err) {
6403 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6404 err = notifier_to_errno(err);
6405 if (err) {
6406
6407
6408
6409 __dev_set_mtu(dev, orig_mtu);
6410 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6411 }
6412 }
6413 return err;
6414}
6415EXPORT_SYMBOL(dev_set_mtu);
6416
6417
6418
6419
6420
6421
6422void dev_set_group(struct net_device *dev, int new_group)
6423{
6424 dev->group = new_group;
6425}
6426EXPORT_SYMBOL(dev_set_group);
6427
6428
6429
6430
6431
6432
6433
6434
6435int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
6436{
6437 const struct net_device_ops *ops = dev->netdev_ops;
6438 int err;
6439
6440 if (!ops->ndo_set_mac_address)
6441 return -EOPNOTSUPP;
6442 if (sa->sa_family != dev->type)
6443 return -EINVAL;
6444 if (!netif_device_present(dev))
6445 return -ENODEV;
6446 err = ops->ndo_set_mac_address(dev, sa);
6447 if (err)
6448 return err;
6449 dev->addr_assign_type = NET_ADDR_SET;
6450 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
6451 add_device_randomness(dev->dev_addr, dev->addr_len);
6452 return 0;
6453}
6454EXPORT_SYMBOL(dev_set_mac_address);
6455
6456
6457
6458
6459
6460
6461
6462
6463int dev_change_carrier(struct net_device *dev, bool new_carrier)
6464{
6465 const struct net_device_ops *ops = dev->netdev_ops;
6466
6467 if (!ops->ndo_change_carrier)
6468 return -EOPNOTSUPP;
6469 if (!netif_device_present(dev))
6470 return -ENODEV;
6471 return ops->ndo_change_carrier(dev, new_carrier);
6472}
6473EXPORT_SYMBOL(dev_change_carrier);
6474
6475
6476
6477
6478
6479
6480
6481
6482int dev_get_phys_port_id(struct net_device *dev,
6483 struct netdev_phys_item_id *ppid)
6484{
6485 const struct net_device_ops *ops = dev->netdev_ops;
6486
6487 if (!ops->ndo_get_phys_port_id)
6488 return -EOPNOTSUPP;
6489 return ops->ndo_get_phys_port_id(dev, ppid);
6490}
6491EXPORT_SYMBOL(dev_get_phys_port_id);
6492
6493
6494
6495
6496
6497
6498
6499
6500
6501int dev_get_phys_port_name(struct net_device *dev,
6502 char *name, size_t len)
6503{
6504 const struct net_device_ops *ops = dev->netdev_ops;
6505
6506 if (!ops->ndo_get_phys_port_name)
6507 return -EOPNOTSUPP;
6508 return ops->ndo_get_phys_port_name(dev, name, len);
6509}
6510EXPORT_SYMBOL(dev_get_phys_port_name);
6511
6512
6513
6514
6515
6516
6517
6518
6519
6520int dev_change_proto_down(struct net_device *dev, bool proto_down)
6521{
6522 const struct net_device_ops *ops = dev->netdev_ops;
6523
6524 if (!ops->ndo_change_proto_down)
6525 return -EOPNOTSUPP;
6526 if (!netif_device_present(dev))
6527 return -ENODEV;
6528 return ops->ndo_change_proto_down(dev, proto_down);
6529}
6530EXPORT_SYMBOL(dev_change_proto_down);
6531
6532
6533
6534
6535
6536
6537
6538
6539
6540static int dev_new_index(struct net *net)
6541{
6542 int ifindex = net->ifindex;
6543 for (;;) {
6544 if (++ifindex <= 0)
6545 ifindex = 1;
6546 if (!__dev_get_by_index(net, ifindex))
6547 return net->ifindex = ifindex;
6548 }
6549}
6550
6551
6552static LIST_HEAD(net_todo_list);
6553DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
6554
6555static void net_set_todo(struct net_device *dev)
6556{
6557 list_add_tail(&dev->todo_list, &net_todo_list);
6558 dev_net(dev)->dev_unreg_count++;
6559}
6560
6561static void rollback_registered_many(struct list_head *head)
6562{
6563 struct net_device *dev, *tmp;
6564 LIST_HEAD(close_head);
6565
6566 BUG_ON(dev_boot_phase);
6567 ASSERT_RTNL();
6568
6569 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
6570
6571
6572
6573
6574 if (dev->reg_state == NETREG_UNINITIALIZED) {
6575 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
6576 dev->name, dev);
6577
6578 WARN_ON(1);
6579 list_del(&dev->unreg_list);
6580 continue;
6581 }
6582 dev->dismantle = true;
6583 BUG_ON(dev->reg_state != NETREG_REGISTERED);
6584 }
6585
6586
6587 list_for_each_entry(dev, head, unreg_list)
6588 list_add_tail(&dev->close_list, &close_head);
6589 dev_close_many(&close_head, true);
6590
6591 list_for_each_entry(dev, head, unreg_list) {
6592
6593 unlist_netdevice(dev);
6594
6595 dev->reg_state = NETREG_UNREGISTERING;
6596 on_each_cpu(flush_backlog, dev, 1);
6597 }
6598
6599 synchronize_net();
6600
6601 list_for_each_entry(dev, head, unreg_list) {
6602 struct sk_buff *skb = NULL;
6603
6604
6605 dev_shutdown(dev);
6606
6607
6608
6609
6610
6611 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6612
6613 if (!dev->rtnl_link_ops ||
6614 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
6615 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
6616 GFP_KERNEL);
6617
6618
6619
6620
6621 dev_uc_flush(dev);
6622 dev_mc_flush(dev);
6623
6624 if (dev->netdev_ops->ndo_uninit)
6625 dev->netdev_ops->ndo_uninit(dev);
6626
6627 if (skb)
6628 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
6629
6630
6631 WARN_ON(netdev_has_any_upper_dev(dev));
6632
6633
6634 netdev_unregister_kobject(dev);
6635#ifdef CONFIG_XPS
6636
6637 netif_reset_xps_queues_gt(dev, 0);
6638#endif
6639 }
6640
6641 synchronize_net();
6642
6643 list_for_each_entry(dev, head, unreg_list)
6644 dev_put(dev);
6645}
6646
6647static void rollback_registered(struct net_device *dev)
6648{
6649 LIST_HEAD(single);
6650
6651 list_add(&dev->unreg_list, &single);
6652 rollback_registered_many(&single);
6653 list_del(&single);
6654}
6655
6656static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
6657 struct net_device *upper, netdev_features_t features)
6658{
6659 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6660 netdev_features_t feature;
6661 int feature_bit;
6662
6663 for_each_netdev_feature(&upper_disables, feature_bit) {
6664 feature = __NETIF_F_BIT(feature_bit);
6665 if (!(upper->wanted_features & feature)
6666 && (features & feature)) {
6667 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
6668 &feature, upper->name);
6669 features &= ~feature;
6670 }
6671 }
6672
6673 return features;
6674}
6675
6676static void netdev_sync_lower_features(struct net_device *upper,
6677 struct net_device *lower, netdev_features_t features)
6678{
6679 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6680 netdev_features_t feature;
6681 int feature_bit;
6682
6683 for_each_netdev_feature(&upper_disables, feature_bit) {
6684 feature = __NETIF_F_BIT(feature_bit);
6685 if (!(features & feature) && (lower->features & feature)) {
6686 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
6687 &feature, lower->name);
6688 lower->wanted_features &= ~feature;
6689 netdev_update_features(lower);
6690
6691 if (unlikely(lower->features & feature))
6692 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
6693 &feature, lower->name);
6694 }
6695 }
6696}
6697
6698static netdev_features_t netdev_fix_features(struct net_device *dev,
6699 netdev_features_t features)
6700{
6701
6702 if ((features & NETIF_F_HW_CSUM) &&
6703 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
6704 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
6705 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
6706 }
6707
6708
6709 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
6710 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
6711 features &= ~NETIF_F_ALL_TSO;
6712 }
6713
6714 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
6715 !(features & NETIF_F_IP_CSUM)) {
6716 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
6717 features &= ~NETIF_F_TSO;
6718 features &= ~NETIF_F_TSO_ECN;
6719 }
6720
6721 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
6722 !(features & NETIF_F_IPV6_CSUM)) {
6723 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
6724 features &= ~NETIF_F_TSO6;
6725 }
6726
6727
6728 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
6729 features &= ~NETIF_F_TSO_MANGLEID;
6730
6731
6732 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6733 features &= ~NETIF_F_TSO_ECN;
6734
6735
6736 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
6737 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
6738 features &= ~NETIF_F_GSO;
6739 }
6740
6741
6742 if (features & NETIF_F_UFO) {
6743
6744 if (!(features & NETIF_F_HW_CSUM) &&
6745 ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
6746 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
6747 netdev_dbg(dev,
6748 "Dropping NETIF_F_UFO since no checksum offload features.\n");
6749 features &= ~NETIF_F_UFO;
6750 }
6751
6752 if (!(features & NETIF_F_SG)) {
6753 netdev_dbg(dev,
6754 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
6755 features &= ~NETIF_F_UFO;
6756 }
6757 }
6758
6759
6760 if ((features & dev->gso_partial_features) &&
6761 !(features & NETIF_F_GSO_PARTIAL)) {
6762 netdev_dbg(dev,
6763 "Dropping partially supported GSO features since no GSO partial.\n");
6764 features &= ~dev->gso_partial_features;
6765 }
6766
6767#ifdef CONFIG_NET_RX_BUSY_POLL
6768 if (dev->netdev_ops->ndo_busy_poll)
6769 features |= NETIF_F_BUSY_POLL;
6770 else
6771#endif
6772 features &= ~NETIF_F_BUSY_POLL;
6773
6774 return features;
6775}
6776
6777int __netdev_update_features(struct net_device *dev)
6778{
6779 struct net_device *upper, *lower;
6780 netdev_features_t features;
6781 struct list_head *iter;
6782 int err = -1;
6783
6784 ASSERT_RTNL();
6785
6786 features = netdev_get_wanted_features(dev);
6787
6788 if (dev->netdev_ops->ndo_fix_features)
6789 features = dev->netdev_ops->ndo_fix_features(dev, features);
6790
6791
6792 features = netdev_fix_features(dev, features);
6793
6794
6795 netdev_for_each_upper_dev_rcu(dev, upper, iter)
6796 features = netdev_sync_upper_features(dev, upper, features);
6797
6798 if (dev->features == features)
6799 goto sync_lower;
6800
6801 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6802 &dev->features, &features);
6803
6804 if (dev->netdev_ops->ndo_set_features)
6805 err = dev->netdev_ops->ndo_set_features(dev, features);
6806 else
6807 err = 0;
6808
6809 if (unlikely(err < 0)) {
6810 netdev_err(dev,
6811 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6812 err, &features, &dev->features);
6813
6814
6815
6816 return -1;
6817 }
6818
6819sync_lower:
6820
6821
6822
6823 netdev_for_each_lower_dev(dev, lower, iter)
6824 netdev_sync_lower_features(dev, lower, features);
6825
6826 if (!err)
6827 dev->features = features;
6828
6829 return err < 0 ? 0 : 1;
6830}
6831
6832
6833
6834
6835
6836
6837
6838
6839
6840void netdev_update_features(struct net_device *dev)
6841{
6842 if (__netdev_update_features(dev))
6843 netdev_features_change(dev);
6844}
6845EXPORT_SYMBOL(netdev_update_features);
6846
6847
6848
6849
6850
6851
6852
6853
6854
6855
6856
6857void netdev_change_features(struct net_device *dev)
6858{
6859 __netdev_update_features(dev);
6860 netdev_features_change(dev);
6861}
6862EXPORT_SYMBOL(netdev_change_features);
6863
6864
6865
6866
6867
6868
6869
6870
6871
6872
6873void netif_stacked_transfer_operstate(const struct net_device *rootdev,
6874 struct net_device *dev)
6875{
6876 if (rootdev->operstate == IF_OPER_DORMANT)
6877 netif_dormant_on(dev);
6878 else
6879 netif_dormant_off(dev);
6880
6881 if (netif_carrier_ok(rootdev)) {
6882 if (!netif_carrier_ok(dev))
6883 netif_carrier_on(dev);
6884 } else {
6885 if (netif_carrier_ok(dev))
6886 netif_carrier_off(dev);
6887 }
6888}
6889EXPORT_SYMBOL(netif_stacked_transfer_operstate);
6890
6891#ifdef CONFIG_SYSFS
6892static int netif_alloc_rx_queues(struct net_device *dev)
6893{
6894 unsigned int i, count = dev->num_rx_queues;
6895 struct netdev_rx_queue *rx;
6896 size_t sz = count * sizeof(*rx);
6897
6898 BUG_ON(count < 1);
6899
6900 rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6901 if (!rx) {
6902 rx = vzalloc(sz);
6903 if (!rx)
6904 return -ENOMEM;
6905 }
6906 dev->_rx = rx;
6907
6908 for (i = 0; i < count; i++)
6909 rx[i].dev = dev;
6910 return 0;
6911}
6912#endif
6913
6914static void netdev_init_one_queue(struct net_device *dev,
6915 struct netdev_queue *queue, void *_unused)
6916{
6917
6918 spin_lock_init(&queue->_xmit_lock);
6919 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
6920 queue->xmit_lock_owner = -1;
6921 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
6922 queue->dev = dev;
6923#ifdef CONFIG_BQL
6924 dql_init(&queue->dql, HZ);
6925#endif
6926}
6927
6928static void netif_free_tx_queues(struct net_device *dev)
6929{
6930 kvfree(dev->_tx);
6931}
6932
6933static int netif_alloc_netdev_queues(struct net_device *dev)
6934{
6935 unsigned int count = dev->num_tx_queues;
6936 struct netdev_queue *tx;
6937 size_t sz = count * sizeof(*tx);
6938
6939 if (count < 1 || count > 0xffff)
6940 return -EINVAL;
6941
6942 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
6943 if (!tx) {
6944 tx = vzalloc(sz);
6945 if (!tx)
6946 return -ENOMEM;
6947 }
6948 dev->_tx = tx;
6949
6950 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
6951 spin_lock_init(&dev->tx_global_lock);
6952
6953 return 0;
6954}
6955
6956void netif_tx_stop_all_queues(struct net_device *dev)
6957{
6958 unsigned int i;
6959
6960 for (i = 0; i < dev->num_tx_queues; i++) {
6961 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
6962 netif_tx_stop_queue(txq);
6963 }
6964}
6965EXPORT_SYMBOL(netif_tx_stop_all_queues);
6966
6967
6968
6969
6970
6971
6972
6973
6974
6975
6976
6977
6978
6979
6980
6981
6982
6983
6984int register_netdevice(struct net_device *dev)
6985{
6986 int ret;
6987 struct net *net = dev_net(dev);
6988
6989 BUG_ON(dev_boot_phase);
6990 ASSERT_RTNL();
6991
6992 might_sleep();
6993
6994
6995 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
6996 BUG_ON(!net);
6997
6998 spin_lock_init(&dev->addr_list_lock);
6999 netdev_set_addr_lockdep_class(dev);
7000
7001 ret = dev_get_valid_name(net, dev, dev->name);
7002 if (ret < 0)
7003 goto out;
7004
7005
7006 if (dev->netdev_ops->ndo_init) {
7007 ret = dev->netdev_ops->ndo_init(dev);
7008 if (ret) {
7009 if (ret > 0)
7010 ret = -EIO;
7011 goto out;
7012 }
7013 }
7014
7015 if (((dev->hw_features | dev->features) &
7016 NETIF_F_HW_VLAN_CTAG_FILTER) &&
7017 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
7018 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
7019 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
7020 ret = -EINVAL;
7021 goto err_uninit;
7022 }
7023
7024 ret = -EBUSY;
7025 if (!dev->ifindex)
7026 dev->ifindex = dev_new_index(net);
7027 else if (__dev_get_by_index(net, dev->ifindex))
7028 goto err_uninit;
7029
7030
7031
7032
7033 dev->hw_features |= NETIF_F_SOFT_FEATURES;
7034 dev->features |= NETIF_F_SOFT_FEATURES;
7035 dev->wanted_features = dev->features & dev->hw_features;
7036
7037 if (!(dev->flags & IFF_LOOPBACK))
7038 dev->hw_features |= NETIF_F_NOCACHE_COPY;
7039
7040
7041
7042
7043
7044
7045 if (dev->hw_features & NETIF_F_TSO)
7046 dev->hw_features |= NETIF_F_TSO_MANGLEID;
7047 if (dev->vlan_features & NETIF_F_TSO)
7048 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
7049 if (dev->mpls_features & NETIF_F_TSO)
7050 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
7051 if (dev->hw_enc_features & NETIF_F_TSO)
7052 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
7053
7054
7055
7056 dev->vlan_features |= NETIF_F_HIGHDMA;
7057
7058
7059
7060 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
7061
7062
7063
7064 dev->mpls_features |= NETIF_F_SG;
7065
7066 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
7067 ret = notifier_to_errno(ret);
7068 if (ret)
7069 goto err_uninit;
7070
7071 ret = netdev_register_kobject(dev);
7072 if (ret)
7073 goto err_uninit;
7074 dev->reg_state = NETREG_REGISTERED;
7075
7076 __netdev_update_features(dev);
7077
7078
7079
7080
7081
7082
7083 set_bit(__LINK_STATE_PRESENT, &dev->state);
7084
7085 linkwatch_init_dev(dev);
7086
7087 dev_init_scheduler(dev);
7088 dev_hold(dev);
7089 list_netdevice(dev);
7090 add_device_randomness(dev->dev_addr, dev->addr_len);
7091
7092
7093
7094
7095
7096 if (dev->addr_assign_type == NET_ADDR_PERM)
7097 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
7098
7099
7100 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
7101 ret = notifier_to_errno(ret);
7102 if (ret) {
7103 rollback_registered(dev);
7104 dev->reg_state = NETREG_UNREGISTERED;
7105 }
7106
7107
7108
7109
7110 if (!dev->rtnl_link_ops ||
7111 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7112 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7113
7114out:
7115 return ret;
7116
7117err_uninit:
7118 if (dev->netdev_ops->ndo_uninit)
7119 dev->netdev_ops->ndo_uninit(dev);
7120 goto out;
7121}
7122EXPORT_SYMBOL(register_netdevice);
7123
7124
7125
7126
7127
7128
7129
7130
7131
7132
7133
7134int init_dummy_netdev(struct net_device *dev)
7135{
7136
7137
7138
7139
7140
7141 memset(dev, 0, sizeof(struct net_device));
7142
7143
7144
7145
7146 dev->reg_state = NETREG_DUMMY;
7147
7148
7149 INIT_LIST_HEAD(&dev->napi_list);
7150
7151
7152 set_bit(__LINK_STATE_PRESENT, &dev->state);
7153 set_bit(__LINK_STATE_START, &dev->state);
7154
7155
7156
7157
7158
7159
7160 return 0;
7161}
7162EXPORT_SYMBOL_GPL(init_dummy_netdev);
7163
7164
7165
7166
7167
7168
7169
7170
7171
7172
7173
7174
7175
7176
7177
7178int register_netdev(struct net_device *dev)
7179{
7180 int err;
7181
7182 rtnl_lock();
7183 err = register_netdevice(dev);
7184 rtnl_unlock();
7185 return err;
7186}
7187EXPORT_SYMBOL(register_netdev);
7188
7189int netdev_refcnt_read(const struct net_device *dev)
7190{
7191 int i, refcnt = 0;
7192
7193 for_each_possible_cpu(i)
7194 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
7195 return refcnt;
7196}
7197EXPORT_SYMBOL(netdev_refcnt_read);
7198
7199
7200
7201
7202
7203
7204
7205
7206
7207
7208
7209
7210
7211static void netdev_wait_allrefs(struct net_device *dev)
7212{
7213 unsigned long rebroadcast_time, warning_time;
7214 int refcnt;
7215
7216 linkwatch_forget_dev(dev);
7217
7218 rebroadcast_time = warning_time = jiffies;
7219 refcnt = netdev_refcnt_read(dev);
7220
7221 while (refcnt != 0) {
7222 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
7223 rtnl_lock();
7224
7225
7226 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7227
7228 __rtnl_unlock();
7229 rcu_barrier();
7230 rtnl_lock();
7231
7232 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7233 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
7234 &dev->state)) {
7235
7236
7237
7238
7239
7240
7241 linkwatch_run_queue();
7242 }
7243
7244 __rtnl_unlock();
7245
7246 rebroadcast_time = jiffies;
7247 }
7248
7249 msleep(250);
7250
7251 refcnt = netdev_refcnt_read(dev);
7252
7253 if (time_after(jiffies, warning_time + 10 * HZ)) {
7254 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
7255 dev->name, refcnt);
7256 warning_time = jiffies;
7257 }
7258 }
7259}
7260
7261
7262
7263
7264
7265
7266
7267
7268
7269
7270
7271
7272
7273
7274
7275
7276
7277
7278
7279
7280
7281
7282
7283
7284
7285void netdev_run_todo(void)
7286{
7287 struct list_head list;
7288
7289
7290 list_replace_init(&net_todo_list, &list);
7291
7292 __rtnl_unlock();
7293
7294
7295
7296 if (!list_empty(&list))
7297 rcu_barrier();
7298
7299 while (!list_empty(&list)) {
7300 struct net_device *dev
7301 = list_first_entry(&list, struct net_device, todo_list);
7302 list_del(&dev->todo_list);
7303
7304 rtnl_lock();
7305 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7306 __rtnl_unlock();
7307
7308 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
7309 pr_err("network todo '%s' but state %d\n",
7310 dev->name, dev->reg_state);
7311 dump_stack();
7312 continue;
7313 }
7314
7315 dev->reg_state = NETREG_UNREGISTERED;
7316
7317 netdev_wait_allrefs(dev);
7318
7319
7320 BUG_ON(netdev_refcnt_read(dev));
7321 BUG_ON(!list_empty(&dev->ptype_all));
7322 BUG_ON(!list_empty(&dev->ptype_specific));
7323 WARN_ON(rcu_access_pointer(dev->ip_ptr));
7324 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
7325 WARN_ON(dev->dn_ptr);
7326
7327 if (dev->destructor)
7328 dev->destructor(dev);
7329
7330
7331 rtnl_lock();
7332 dev_net(dev)->dev_unreg_count--;
7333 __rtnl_unlock();
7334 wake_up(&netdev_unregistering_wq);
7335
7336
7337 kobject_put(&dev->dev.kobj);
7338 }
7339}
7340
7341
7342
7343
7344
7345
7346void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7347 const struct net_device_stats *netdev_stats)
7348{
7349#if BITS_PER_LONG == 64
7350 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7351 memcpy(stats64, netdev_stats, sizeof(*stats64));
7352
7353 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7354 sizeof(*stats64) - sizeof(*netdev_stats));
7355#else
7356 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7357 const unsigned long *src = (const unsigned long *)netdev_stats;
7358 u64 *dst = (u64 *)stats64;
7359
7360 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7361 for (i = 0; i < n; i++)
7362 dst[i] = src[i];
7363
7364 memset((char *)stats64 + n * sizeof(u64), 0,
7365 sizeof(*stats64) - n * sizeof(u64));
7366#endif
7367}
7368EXPORT_SYMBOL(netdev_stats_to_stats64);
7369
7370
7371
7372
7373
7374
7375
7376
7377
7378
7379
7380struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7381 struct rtnl_link_stats64 *storage)
7382{
7383 const struct net_device_ops *ops = dev->netdev_ops;
7384
7385 if (ops->ndo_get_stats64) {
7386 memset(storage, 0, sizeof(*storage));
7387 ops->ndo_get_stats64(dev, storage);
7388 } else if (ops->ndo_get_stats) {
7389 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
7390 } else {
7391 netdev_stats_to_stats64(storage, &dev->stats);
7392 }
7393 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7394 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7395 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7396 return storage;
7397}
7398EXPORT_SYMBOL(dev_get_stats);
7399
7400struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
7401{
7402 struct netdev_queue *queue = dev_ingress_queue(dev);
7403
7404#ifdef CONFIG_NET_CLS_ACT
7405 if (queue)
7406 return queue;
7407 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
7408 if (!queue)
7409 return NULL;
7410 netdev_init_one_queue(dev, queue, NULL);
7411 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
7412 queue->qdisc_sleeping = &noop_qdisc;
7413 rcu_assign_pointer(dev->ingress_queue, queue);
7414#endif
7415 return queue;
7416}
7417
7418static const struct ethtool_ops default_ethtool_ops;
7419
7420void netdev_set_default_ethtool_ops(struct net_device *dev,
7421 const struct ethtool_ops *ops)
7422{
7423 if (dev->ethtool_ops == &default_ethtool_ops)
7424 dev->ethtool_ops = ops;
7425}
7426EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
7427
7428void netdev_freemem(struct net_device *dev)
7429{
7430 char *addr = (char *)dev - dev->padded;
7431
7432 kvfree(addr);
7433}
7434
7435
7436
7437
7438
7439
7440
7441
7442
7443
7444
7445
7446
7447
7448struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7449 unsigned char name_assign_type,
7450 void (*setup)(struct net_device *),
7451 unsigned int txqs, unsigned int rxqs)
7452{
7453 struct net_device *dev;
7454 size_t alloc_size;
7455 struct net_device *p;
7456
7457 BUG_ON(strlen(name) >= sizeof(dev->name));
7458
7459 if (txqs < 1) {
7460 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
7461 return NULL;
7462 }
7463
7464#ifdef CONFIG_SYSFS
7465 if (rxqs < 1) {
7466 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
7467 return NULL;
7468 }
7469#endif
7470
7471 alloc_size = sizeof(struct net_device);
7472 if (sizeof_priv) {
7473
7474 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
7475 alloc_size += sizeof_priv;
7476 }
7477
7478 alloc_size += NETDEV_ALIGN - 1;
7479
7480 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7481 if (!p)
7482 p = vzalloc(alloc_size);
7483 if (!p)
7484 return NULL;
7485
7486 dev = PTR_ALIGN(p, NETDEV_ALIGN);
7487 dev->padded = (char *)dev - (char *)p;
7488
7489 dev->pcpu_refcnt = alloc_percpu(int);
7490 if (!dev->pcpu_refcnt)
7491 goto free_dev;
7492
7493 if (dev_addr_init(dev))
7494 goto free_pcpu;
7495
7496 dev_mc_init(dev);
7497 dev_uc_init(dev);
7498
7499 dev_net_set(dev, &init_net);
7500
7501 dev->gso_max_size = GSO_MAX_SIZE;
7502 dev->gso_max_segs = GSO_MAX_SEGS;
7503
7504 INIT_LIST_HEAD(&dev->napi_list);
7505 INIT_LIST_HEAD(&dev->unreg_list);
7506 INIT_LIST_HEAD(&dev->close_list);
7507 INIT_LIST_HEAD(&dev->link_watch_list);
7508 INIT_LIST_HEAD(&dev->adj_list.upper);
7509 INIT_LIST_HEAD(&dev->adj_list.lower);
7510 INIT_LIST_HEAD(&dev->all_adj_list.upper);
7511 INIT_LIST_HEAD(&dev->all_adj_list.lower);
7512 INIT_LIST_HEAD(&dev->ptype_all);
7513 INIT_LIST_HEAD(&dev->ptype_specific);
7514 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
7515 setup(dev);
7516
7517 if (!dev->tx_queue_len) {
7518 dev->priv_flags |= IFF_NO_QUEUE;
7519 dev->tx_queue_len = 1;
7520 }
7521
7522 dev->num_tx_queues = txqs;
7523 dev->real_num_tx_queues = txqs;
7524 if (netif_alloc_netdev_queues(dev))
7525 goto free_all;
7526
7527#ifdef CONFIG_SYSFS
7528 dev->num_rx_queues = rxqs;
7529 dev->real_num_rx_queues = rxqs;
7530 if (netif_alloc_rx_queues(dev))
7531 goto free_all;
7532#endif
7533
7534 strcpy(dev->name, name);
7535 dev->name_assign_type = name_assign_type;
7536 dev->group = INIT_NETDEV_GROUP;
7537 if (!dev->ethtool_ops)
7538 dev->ethtool_ops = &default_ethtool_ops;
7539
7540 nf_hook_ingress_init(dev);
7541
7542 return dev;
7543
7544free_all:
7545 free_netdev(dev);
7546 return NULL;
7547
7548free_pcpu:
7549 free_percpu(dev->pcpu_refcnt);
7550free_dev:
7551 netdev_freemem(dev);
7552 return NULL;
7553}
7554EXPORT_SYMBOL(alloc_netdev_mqs);
7555
7556
7557
7558
7559
7560
7561
7562
7563
7564
7565void free_netdev(struct net_device *dev)
7566{
7567 struct napi_struct *p, *n;
7568
7569 might_sleep();
7570 netif_free_tx_queues(dev);
7571#ifdef CONFIG_SYSFS
7572 kvfree(dev->_rx);
7573#endif
7574
7575 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
7576
7577
7578 dev_addr_flush(dev);
7579
7580 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
7581 netif_napi_del(p);
7582
7583 free_percpu(dev->pcpu_refcnt);
7584 dev->pcpu_refcnt = NULL;
7585
7586
7587 if (dev->reg_state == NETREG_UNINITIALIZED) {
7588 netdev_freemem(dev);
7589 return;
7590 }
7591
7592 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
7593 dev->reg_state = NETREG_RELEASED;
7594
7595
7596 put_device(&dev->dev);
7597}
7598EXPORT_SYMBOL(free_netdev);
7599
7600
7601
7602
7603
7604
7605
7606void synchronize_net(void)
7607{
7608 might_sleep();
7609 if (rtnl_is_locked())
7610 synchronize_rcu_expedited();
7611 else
7612 synchronize_rcu();
7613}
7614EXPORT_SYMBOL(synchronize_net);
7615
7616
7617
7618
7619
7620
7621
7622
7623
7624
7625
7626
7627
7628
7629void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
7630{
7631 ASSERT_RTNL();
7632
7633 if (head) {
7634 list_move_tail(&dev->unreg_list, head);
7635 } else {
7636 rollback_registered(dev);
7637
7638 net_set_todo(dev);
7639 }
7640}
7641EXPORT_SYMBOL(unregister_netdevice_queue);
7642
7643
7644
7645
7646
7647
7648
7649
7650void unregister_netdevice_many(struct list_head *head)
7651{
7652 struct net_device *dev;
7653
7654 if (!list_empty(head)) {
7655 rollback_registered_many(head);
7656 list_for_each_entry(dev, head, unreg_list)
7657 net_set_todo(dev);
7658 list_del(head);
7659 }
7660}
7661EXPORT_SYMBOL(unregister_netdevice_many);
7662
7663
7664
7665
7666
7667
7668
7669
7670
7671
7672
7673
7674void unregister_netdev(struct net_device *dev)
7675{
7676 rtnl_lock();
7677 unregister_netdevice(dev);
7678 rtnl_unlock();
7679}
7680EXPORT_SYMBOL(unregister_netdev);
7681
7682
7683
7684
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695
7696int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
7697{
7698 int err;
7699
7700 ASSERT_RTNL();
7701
7702
7703 err = -EINVAL;
7704 if (dev->features & NETIF_F_NETNS_LOCAL)
7705 goto out;
7706
7707
7708 if (dev->reg_state != NETREG_REGISTERED)
7709 goto out;
7710
7711
7712 err = 0;
7713 if (net_eq(dev_net(dev), net))
7714 goto out;
7715
7716
7717
7718
7719 err = -EEXIST;
7720 if (__dev_get_by_name(net, dev->name)) {
7721
7722 if (!pat)
7723 goto out;
7724 if (dev_get_valid_name(net, dev, pat) < 0)
7725 goto out;
7726 }
7727
7728
7729
7730
7731
7732
7733 dev_close(dev);
7734
7735
7736 err = -ENODEV;
7737 unlist_netdevice(dev);
7738
7739 synchronize_net();
7740
7741
7742 dev_shutdown(dev);
7743
7744
7745
7746
7747
7748
7749
7750
7751 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7752 rcu_barrier();
7753 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7754 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
7755
7756
7757
7758
7759 dev_uc_flush(dev);
7760 dev_mc_flush(dev);
7761
7762
7763 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
7764 netdev_adjacent_del_links(dev);
7765
7766
7767 dev_net_set(dev, net);
7768
7769
7770 if (__dev_get_by_index(net, dev->ifindex))
7771 dev->ifindex = dev_new_index(net);
7772
7773
7774 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
7775 netdev_adjacent_add_links(dev);
7776
7777
7778 err = device_rename(&dev->dev, dev->name);
7779 WARN_ON(err);
7780
7781
7782 list_netdevice(dev);
7783
7784
7785 call_netdevice_notifiers(NETDEV_REGISTER, dev);
7786
7787
7788
7789
7790
7791 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7792
7793 synchronize_net();
7794 err = 0;
7795out:
7796 return err;
7797}
7798EXPORT_SYMBOL_GPL(dev_change_net_namespace);
7799
7800static int dev_cpu_callback(struct notifier_block *nfb,
7801 unsigned long action,
7802 void *ocpu)
7803{
7804 struct sk_buff **list_skb;
7805 struct sk_buff *skb;
7806 unsigned int cpu, oldcpu = (unsigned long)ocpu;
7807 struct softnet_data *sd, *oldsd;
7808
7809 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
7810 return NOTIFY_OK;
7811
7812 local_irq_disable();
7813 cpu = smp_processor_id();
7814 sd = &per_cpu(softnet_data, cpu);
7815 oldsd = &per_cpu(softnet_data, oldcpu);
7816
7817
7818 list_skb = &sd->completion_queue;
7819 while (*list_skb)
7820 list_skb = &(*list_skb)->next;
7821
7822 *list_skb = oldsd->completion_queue;
7823 oldsd->completion_queue = NULL;
7824
7825
7826 if (oldsd->output_queue) {
7827 *sd->output_queue_tailp = oldsd->output_queue;
7828 sd->output_queue_tailp = oldsd->output_queue_tailp;
7829 oldsd->output_queue = NULL;
7830 oldsd->output_queue_tailp = &oldsd->output_queue;
7831 }
7832
7833
7834
7835
7836 while (!list_empty(&oldsd->poll_list)) {
7837 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
7838 struct napi_struct,
7839 poll_list);
7840
7841 list_del_init(&napi->poll_list);
7842 if (napi->poll == process_backlog)
7843 napi->state = 0;
7844 else
7845 ____napi_schedule(sd, napi);
7846 }
7847
7848 raise_softirq_irqoff(NET_TX_SOFTIRQ);
7849 local_irq_enable();
7850
7851
7852 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
7853 netif_rx_ni(skb);
7854 input_queue_head_incr(oldsd);
7855 }
7856 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
7857 netif_rx_ni(skb);
7858 input_queue_head_incr(oldsd);
7859 }
7860
7861 return NOTIFY_OK;
7862}
7863
7864
7865
7866
7867
7868
7869
7870
7871
7872
7873
7874
7875netdev_features_t netdev_increment_features(netdev_features_t all,
7876 netdev_features_t one, netdev_features_t mask)
7877{
7878 if (mask & NETIF_F_HW_CSUM)
7879 mask |= NETIF_F_CSUM_MASK;
7880 mask |= NETIF_F_VLAN_CHALLENGED;
7881
7882 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
7883 all &= one | ~NETIF_F_ALL_FOR_ALL;
7884
7885
7886 if (all & NETIF_F_HW_CSUM)
7887 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
7888
7889 return all;
7890}
7891EXPORT_SYMBOL(netdev_increment_features);
7892
7893static struct hlist_head * __net_init netdev_create_hash(void)
7894{
7895 int i;
7896 struct hlist_head *hash;
7897
7898 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
7899 if (hash != NULL)
7900 for (i = 0; i < NETDEV_HASHENTRIES; i++)
7901 INIT_HLIST_HEAD(&hash[i]);
7902
7903 return hash;
7904}
7905
7906
7907static int __net_init netdev_init(struct net *net)
7908{
7909 if (net != &init_net)
7910 INIT_LIST_HEAD(&net->dev_base_head);
7911
7912 net->dev_name_head = netdev_create_hash();
7913 if (net->dev_name_head == NULL)
7914 goto err_name;
7915
7916 net->dev_index_head = netdev_create_hash();
7917 if (net->dev_index_head == NULL)
7918 goto err_idx;
7919
7920 return 0;
7921
7922err_idx:
7923 kfree(net->dev_name_head);
7924err_name:
7925 return -ENOMEM;
7926}
7927
7928
7929
7930
7931
7932
7933
7934const char *netdev_drivername(const struct net_device *dev)
7935{
7936 const struct device_driver *driver;
7937 const struct device *parent;
7938 const char *empty = "";
7939
7940 parent = dev->dev.parent;
7941 if (!parent)
7942 return empty;
7943
7944 driver = parent->driver;
7945 if (driver && driver->name)
7946 return driver->name;
7947 return empty;
7948}
7949
7950static void __netdev_printk(const char *level, const struct net_device *dev,
7951 struct va_format *vaf)
7952{
7953 if (dev && dev->dev.parent) {
7954 dev_printk_emit(level[1] - '0',
7955 dev->dev.parent,
7956 "%s %s %s%s: %pV",
7957 dev_driver_string(dev->dev.parent),
7958 dev_name(dev->dev.parent),
7959 netdev_name(dev), netdev_reg_state(dev),
7960 vaf);
7961 } else if (dev) {
7962 printk("%s%s%s: %pV",
7963 level, netdev_name(dev), netdev_reg_state(dev), vaf);
7964 } else {
7965 printk("%s(NULL net_device): %pV", level, vaf);
7966 }
7967}
7968
7969void netdev_printk(const char *level, const struct net_device *dev,
7970 const char *format, ...)
7971{
7972 struct va_format vaf;
7973 va_list args;
7974
7975 va_start(args, format);
7976
7977 vaf.fmt = format;
7978 vaf.va = &args;
7979
7980 __netdev_printk(level, dev, &vaf);
7981
7982 va_end(args);
7983}
7984EXPORT_SYMBOL(netdev_printk);
7985
7986#define define_netdev_printk_level(func, level) \
7987void func(const struct net_device *dev, const char *fmt, ...) \
7988{ \
7989 struct va_format vaf; \
7990 va_list args; \
7991 \
7992 va_start(args, fmt); \
7993 \
7994 vaf.fmt = fmt; \
7995 vaf.va = &args; \
7996 \
7997 __netdev_printk(level, dev, &vaf); \
7998 \
7999 va_end(args); \
8000} \
8001EXPORT_SYMBOL(func);
8002
8003define_netdev_printk_level(netdev_emerg, KERN_EMERG);
8004define_netdev_printk_level(netdev_alert, KERN_ALERT);
8005define_netdev_printk_level(netdev_crit, KERN_CRIT);
8006define_netdev_printk_level(netdev_err, KERN_ERR);
8007define_netdev_printk_level(netdev_warn, KERN_WARNING);
8008define_netdev_printk_level(netdev_notice, KERN_NOTICE);
8009define_netdev_printk_level(netdev_info, KERN_INFO);
8010
8011static void __net_exit netdev_exit(struct net *net)
8012{
8013 kfree(net->dev_name_head);
8014 kfree(net->dev_index_head);
8015}
8016
8017static struct pernet_operations __net_initdata netdev_net_ops = {
8018 .init = netdev_init,
8019 .exit = netdev_exit,
8020};
8021
8022static void __net_exit default_device_exit(struct net *net)
8023{
8024 struct net_device *dev, *aux;
8025
8026
8027
8028
8029 rtnl_lock();
8030 for_each_netdev_safe(net, dev, aux) {
8031 int err;
8032 char fb_name[IFNAMSIZ];
8033
8034
8035 if (dev->features & NETIF_F_NETNS_LOCAL)
8036 continue;
8037
8038
8039 if (dev->rtnl_link_ops)
8040 continue;
8041
8042
8043 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
8044 err = dev_change_net_namespace(dev, &init_net, fb_name);
8045 if (err) {
8046 pr_emerg("%s: failed to move %s to init_net: %d\n",
8047 __func__, dev->name, err);
8048 BUG();
8049 }
8050 }
8051 rtnl_unlock();
8052}
8053
8054static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
8055{
8056
8057
8058
8059 struct net *net;
8060 bool unregistering;
8061 DEFINE_WAIT_FUNC(wait, woken_wake_function);
8062
8063 add_wait_queue(&netdev_unregistering_wq, &wait);
8064 for (;;) {
8065 unregistering = false;
8066 rtnl_lock();
8067 list_for_each_entry(net, net_list, exit_list) {
8068 if (net->dev_unreg_count > 0) {
8069 unregistering = true;
8070 break;
8071 }
8072 }
8073 if (!unregistering)
8074 break;
8075 __rtnl_unlock();
8076
8077 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
8078 }
8079 remove_wait_queue(&netdev_unregistering_wq, &wait);
8080}
8081
8082static void __net_exit default_device_exit_batch(struct list_head *net_list)
8083{
8084
8085
8086
8087
8088
8089 struct net_device *dev;
8090 struct net *net;
8091 LIST_HEAD(dev_kill_list);
8092
8093
8094
8095
8096
8097
8098
8099
8100
8101
8102
8103
8104 rtnl_lock_unregistering(net_list);
8105 list_for_each_entry(net, net_list, exit_list) {
8106 for_each_netdev_reverse(net, dev) {
8107 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
8108 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
8109 else
8110 unregister_netdevice_queue(dev, &dev_kill_list);
8111 }
8112 }
8113 unregister_netdevice_many(&dev_kill_list);
8114 rtnl_unlock();
8115}
8116
8117static struct pernet_operations __net_initdata default_device_ops = {
8118 .exit = default_device_exit,
8119 .exit_batch = default_device_exit_batch,
8120};
8121
8122
8123
8124
8125
8126
8127
8128
8129
8130
8131
8132
8133static int __init net_dev_init(void)
8134{
8135 int i, rc = -ENOMEM;
8136
8137 BUG_ON(!dev_boot_phase);
8138
8139 if (dev_proc_init())
8140 goto out;
8141
8142 if (netdev_kobject_init())
8143 goto out;
8144
8145 INIT_LIST_HEAD(&ptype_all);
8146 for (i = 0; i < PTYPE_HASH_SIZE; i++)
8147 INIT_LIST_HEAD(&ptype_base[i]);
8148
8149 INIT_LIST_HEAD(&offload_base);
8150
8151 if (register_pernet_subsys(&netdev_net_ops))
8152 goto out;
8153
8154
8155
8156
8157
8158 for_each_possible_cpu(i) {
8159 struct softnet_data *sd = &per_cpu(softnet_data, i);
8160
8161 skb_queue_head_init(&sd->input_pkt_queue);
8162 skb_queue_head_init(&sd->process_queue);
8163 INIT_LIST_HEAD(&sd->poll_list);
8164 sd->output_queue_tailp = &sd->output_queue;
8165#ifdef CONFIG_RPS
8166 sd->csd.func = rps_trigger_softirq;
8167 sd->csd.info = sd;
8168 sd->cpu = i;
8169#endif
8170
8171 sd->backlog.poll = process_backlog;
8172 sd->backlog.weight = weight_p;
8173 }
8174
8175 dev_boot_phase = 0;
8176
8177
8178
8179
8180
8181
8182
8183
8184
8185
8186 if (register_pernet_device(&loopback_net_ops))
8187 goto out;
8188
8189 if (register_pernet_device(&default_device_ops))
8190 goto out;
8191
8192 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
8193 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
8194
8195 hotcpu_notifier(dev_cpu_callback, 0);
8196 dst_subsys_init();
8197 rc = 0;
8198out:
8199 return rc;
8200}
8201
8202subsys_initcall(net_dev_init);
8203