1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <linux/bpf.h>
98#include <net/net_namespace.h>
99#include <net/sock.h>
100#include <net/busy_poll.h>
101#include <linux/rtnetlink.h>
102#include <linux/stat.h>
103#include <net/dst.h>
104#include <net/dst_metadata.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <net/xfrm.h>
108#include <linux/highmem.h>
109#include <linux/init.h>
110#include <linux/module.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
114#include <net/iw_handler.h>
115#include <asm/current.h>
116#include <linux/audit.h>
117#include <linux/dmaengine.h>
118#include <linux/err.h>
119#include <linux/ctype.h>
120#include <linux/if_arp.h>
121#include <linux/if_vlan.h>
122#include <linux/ip.h>
123#include <net/ip.h>
124#include <net/mpls.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
127#include <linux/jhash.h>
128#include <linux/random.h>
129#include <trace/events/napi.h>
130#include <trace/events/net.h>
131#include <trace/events/skb.h>
132#include <linux/pci.h>
133#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h>
135#include <linux/static_key.h>
136#include <linux/hashtable.h>
137#include <linux/vmalloc.h>
138#include <linux/if_macvlan.h>
139#include <linux/errqueue.h>
140#include <linux/hrtimer.h>
141#include <linux/netfilter_ingress.h>
142#include <linux/sctp.h>
143#include <linux/crash_dump.h>
144
145#include "net-sysfs.h"
146
147
148#define MAX_GRO_SKBS 8
149
150
151#define GRO_MAX_HEAD (MAX_HEADER + 128)
152
153static DEFINE_SPINLOCK(ptype_lock);
154static DEFINE_SPINLOCK(offload_lock);
155struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
156struct list_head ptype_all __read_mostly;
157static struct list_head offload_base __read_mostly;
158
159static int netif_rx_internal(struct sk_buff *skb);
160static int call_netdevice_notifiers_info(unsigned long val,
161 struct net_device *dev,
162 struct netdev_notifier_info *info);
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183DEFINE_RWLOCK(dev_base_lock);
184EXPORT_SYMBOL(dev_base_lock);
185
186
187static DEFINE_SPINLOCK(napi_hash_lock);
188
189static unsigned int napi_gen_id = NR_CPUS;
190static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
191
192static seqcount_t devnet_rename_seq;
193
194static inline void dev_base_seq_inc(struct net *net)
195{
196 while (++net->dev_base_seq == 0);
197}
198
199static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
200{
201 unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
202
203 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
204}
205
206static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
207{
208 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
209}
210
211static inline void rps_lock(struct softnet_data *sd)
212{
213#ifdef CONFIG_RPS
214 spin_lock(&sd->input_pkt_queue.lock);
215#endif
216}
217
218static inline void rps_unlock(struct softnet_data *sd)
219{
220#ifdef CONFIG_RPS
221 spin_unlock(&sd->input_pkt_queue.lock);
222#endif
223}
224
225
226static void list_netdevice(struct net_device *dev)
227{
228 struct net *net = dev_net(dev);
229
230 ASSERT_RTNL();
231
232 write_lock_bh(&dev_base_lock);
233 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
234 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
235 hlist_add_head_rcu(&dev->index_hlist,
236 dev_index_hash(net, dev->ifindex));
237 write_unlock_bh(&dev_base_lock);
238
239 dev_base_seq_inc(net);
240}
241
242
243
244
245static void unlist_netdevice(struct net_device *dev)
246{
247 ASSERT_RTNL();
248
249
250 write_lock_bh(&dev_base_lock);
251 list_del_rcu(&dev->dev_list);
252 hlist_del_rcu(&dev->name_hlist);
253 hlist_del_rcu(&dev->index_hlist);
254 write_unlock_bh(&dev_base_lock);
255
256 dev_base_seq_inc(dev_net(dev));
257}
258
259
260
261
262
263static RAW_NOTIFIER_HEAD(netdev_chain);
264
265
266
267
268
269
270DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
271EXPORT_PER_CPU_SYMBOL(softnet_data);
272
273#ifdef CONFIG_LOCKDEP
274
275
276
277
278static const unsigned short netdev_lock_type[] =
279 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
280 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
281 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
282 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
283 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
284 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
285 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
286 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
287 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
288 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
289 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
290 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
291 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
292 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
293 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
294
295static const char *const netdev_lock_name[] =
296 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
297 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
298 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
299 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
300 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
301 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
302 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
303 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
304 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
305 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
306 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
307 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
308 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
309 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
310 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
311
312static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
313static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
314
315static inline unsigned short netdev_lock_pos(unsigned short dev_type)
316{
317 int i;
318
319 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
320 if (netdev_lock_type[i] == dev_type)
321 return i;
322
323 return ARRAY_SIZE(netdev_lock_type) - 1;
324}
325
326static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
327 unsigned short dev_type)
328{
329 int i;
330
331 i = netdev_lock_pos(dev_type);
332 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
333 netdev_lock_name[i]);
334}
335
336static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
337{
338 int i;
339
340 i = netdev_lock_pos(dev->type);
341 lockdep_set_class_and_name(&dev->addr_list_lock,
342 &netdev_addr_lock_key[i],
343 netdev_lock_name[i]);
344}
345#else
346static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
347 unsigned short dev_type)
348{
349}
350static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
351{
352}
353#endif
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377static inline struct list_head *ptype_head(const struct packet_type *pt)
378{
379 if (pt->type == htons(ETH_P_ALL))
380 return pt->dev ? &pt->dev->ptype_all : &ptype_all;
381 else
382 return pt->dev ? &pt->dev->ptype_specific :
383 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
384}
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399void dev_add_pack(struct packet_type *pt)
400{
401 struct list_head *head = ptype_head(pt);
402
403 spin_lock(&ptype_lock);
404 list_add_rcu(&pt->list, head);
405 spin_unlock(&ptype_lock);
406}
407EXPORT_SYMBOL(dev_add_pack);
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422void __dev_remove_pack(struct packet_type *pt)
423{
424 struct list_head *head = ptype_head(pt);
425 struct packet_type *pt1;
426
427 spin_lock(&ptype_lock);
428
429 list_for_each_entry(pt1, head, list) {
430 if (pt == pt1) {
431 list_del_rcu(&pt->list);
432 goto out;
433 }
434 }
435
436 pr_warn("dev_remove_pack: %p not found\n", pt);
437out:
438 spin_unlock(&ptype_lock);
439}
440EXPORT_SYMBOL(__dev_remove_pack);
441
442
443
444
445
446
447
448
449
450
451
452
453
454void dev_remove_pack(struct packet_type *pt)
455{
456 __dev_remove_pack(pt);
457
458 synchronize_net();
459}
460EXPORT_SYMBOL(dev_remove_pack);
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475void dev_add_offload(struct packet_offload *po)
476{
477 struct packet_offload *elem;
478
479 spin_lock(&offload_lock);
480 list_for_each_entry(elem, &offload_base, list) {
481 if (po->priority < elem->priority)
482 break;
483 }
484 list_add_rcu(&po->list, elem->list.prev);
485 spin_unlock(&offload_lock);
486}
487EXPORT_SYMBOL(dev_add_offload);
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502static void __dev_remove_offload(struct packet_offload *po)
503{
504 struct list_head *head = &offload_base;
505 struct packet_offload *po1;
506
507 spin_lock(&offload_lock);
508
509 list_for_each_entry(po1, head, list) {
510 if (po == po1) {
511 list_del_rcu(&po->list);
512 goto out;
513 }
514 }
515
516 pr_warn("dev_remove_offload: %p not found\n", po);
517out:
518 spin_unlock(&offload_lock);
519}
520
521
522
523
524
525
526
527
528
529
530
531
532
533void dev_remove_offload(struct packet_offload *po)
534{
535 __dev_remove_offload(po);
536
537 synchronize_net();
538}
539EXPORT_SYMBOL(dev_remove_offload);
540
541
542
543
544
545
546
547
548static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
549
550
551
552
553
554
555
556
557
558
559static int netdev_boot_setup_add(char *name, struct ifmap *map)
560{
561 struct netdev_boot_setup *s;
562 int i;
563
564 s = dev_boot_setup;
565 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
566 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
567 memset(s[i].name, 0, sizeof(s[i].name));
568 strlcpy(s[i].name, name, IFNAMSIZ);
569 memcpy(&s[i].map, map, sizeof(s[i].map));
570 break;
571 }
572 }
573
574 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
575}
576
577
578
579
580
581
582
583
584
585
586int netdev_boot_setup_check(struct net_device *dev)
587{
588 struct netdev_boot_setup *s = dev_boot_setup;
589 int i;
590
591 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
592 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
593 !strcmp(dev->name, s[i].name)) {
594 dev->irq = s[i].map.irq;
595 dev->base_addr = s[i].map.base_addr;
596 dev->mem_start = s[i].map.mem_start;
597 dev->mem_end = s[i].map.mem_end;
598 return 1;
599 }
600 }
601 return 0;
602}
603EXPORT_SYMBOL(netdev_boot_setup_check);
604
605
606
607
608
609
610
611
612
613
614
615
616unsigned long netdev_boot_base(const char *prefix, int unit)
617{
618 const struct netdev_boot_setup *s = dev_boot_setup;
619 char name[IFNAMSIZ];
620 int i;
621
622 sprintf(name, "%s%d", prefix, unit);
623
624
625
626
627
628 if (__dev_get_by_name(&init_net, name))
629 return 1;
630
631 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
632 if (!strcmp(name, s[i].name))
633 return s[i].map.base_addr;
634 return 0;
635}
636
637
638
639
640int __init netdev_boot_setup(char *str)
641{
642 int ints[5];
643 struct ifmap map;
644
645 str = get_options(str, ARRAY_SIZE(ints), ints);
646 if (!str || !*str)
647 return 0;
648
649
650 memset(&map, 0, sizeof(map));
651 if (ints[0] > 0)
652 map.irq = ints[1];
653 if (ints[0] > 1)
654 map.base_addr = ints[2];
655 if (ints[0] > 2)
656 map.mem_start = ints[3];
657 if (ints[0] > 3)
658 map.mem_end = ints[4];
659
660
661 return netdev_boot_setup_add(str, &map);
662}
663
664__setup("netdev=", netdev_boot_setup);
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680int dev_get_iflink(const struct net_device *dev)
681{
682 if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
683 return dev->netdev_ops->ndo_get_iflink(dev);
684
685 return dev->ifindex;
686}
687EXPORT_SYMBOL(dev_get_iflink);
688
689
690
691
692
693
694
695
696
697
698int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
699{
700 struct ip_tunnel_info *info;
701
702 if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
703 return -EINVAL;
704
705 info = skb_tunnel_info_unclone(skb);
706 if (!info)
707 return -ENOMEM;
708 if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
709 return -EINVAL;
710
711 return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
712}
713EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
714
715
716
717
718
719
720
721
722
723
724
725
726
727struct net_device *__dev_get_by_name(struct net *net, const char *name)
728{
729 struct net_device *dev;
730 struct hlist_head *head = dev_name_hash(net, name);
731
732 hlist_for_each_entry(dev, head, name_hlist)
733 if (!strncmp(dev->name, name, IFNAMSIZ))
734 return dev;
735
736 return NULL;
737}
738EXPORT_SYMBOL(__dev_get_by_name);
739
740
741
742
743
744
745
746
747
748
749
750
751
752struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
753{
754 struct net_device *dev;
755 struct hlist_head *head = dev_name_hash(net, name);
756
757 hlist_for_each_entry_rcu(dev, head, name_hlist)
758 if (!strncmp(dev->name, name, IFNAMSIZ))
759 return dev;
760
761 return NULL;
762}
763EXPORT_SYMBOL(dev_get_by_name_rcu);
764
765
766
767
768
769
770
771
772
773
774
775
776
777struct net_device *dev_get_by_name(struct net *net, const char *name)
778{
779 struct net_device *dev;
780
781 rcu_read_lock();
782 dev = dev_get_by_name_rcu(net, name);
783 if (dev)
784 dev_hold(dev);
785 rcu_read_unlock();
786 return dev;
787}
788EXPORT_SYMBOL(dev_get_by_name);
789
790
791
792
793
794
795
796
797
798
799
800
801
802struct net_device *__dev_get_by_index(struct net *net, int ifindex)
803{
804 struct net_device *dev;
805 struct hlist_head *head = dev_index_hash(net, ifindex);
806
807 hlist_for_each_entry(dev, head, index_hlist)
808 if (dev->ifindex == ifindex)
809 return dev;
810
811 return NULL;
812}
813EXPORT_SYMBOL(__dev_get_by_index);
814
815
816
817
818
819
820
821
822
823
824
825
826struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
827{
828 struct net_device *dev;
829 struct hlist_head *head = dev_index_hash(net, ifindex);
830
831 hlist_for_each_entry_rcu(dev, head, index_hlist)
832 if (dev->ifindex == ifindex)
833 return dev;
834
835 return NULL;
836}
837EXPORT_SYMBOL(dev_get_by_index_rcu);
838
839
840
841
842
843
844
845
846
847
848
849
850
851struct net_device *dev_get_by_index(struct net *net, int ifindex)
852{
853 struct net_device *dev;
854
855 rcu_read_lock();
856 dev = dev_get_by_index_rcu(net, ifindex);
857 if (dev)
858 dev_hold(dev);
859 rcu_read_unlock();
860 return dev;
861}
862EXPORT_SYMBOL(dev_get_by_index);
863
864
865
866
867
868
869
870
871
872
873
874int netdev_get_name(struct net *net, char *name, int ifindex)
875{
876 struct net_device *dev;
877 unsigned int seq;
878
879retry:
880 seq = raw_seqcount_begin(&devnet_rename_seq);
881 rcu_read_lock();
882 dev = dev_get_by_index_rcu(net, ifindex);
883 if (!dev) {
884 rcu_read_unlock();
885 return -ENODEV;
886 }
887
888 strcpy(name, dev->name);
889 rcu_read_unlock();
890 if (read_seqcount_retry(&devnet_rename_seq, seq)) {
891 cond_resched();
892 goto retry;
893 }
894
895 return 0;
896}
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
913 const char *ha)
914{
915 struct net_device *dev;
916
917 for_each_netdev_rcu(net, dev)
918 if (dev->type == type &&
919 !memcmp(dev->dev_addr, ha, dev->addr_len))
920 return dev;
921
922 return NULL;
923}
924EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
925
926struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
927{
928 struct net_device *dev;
929
930 ASSERT_RTNL();
931 for_each_netdev(net, dev)
932 if (dev->type == type)
933 return dev;
934
935 return NULL;
936}
937EXPORT_SYMBOL(__dev_getfirstbyhwtype);
938
939struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
940{
941 struct net_device *dev, *ret = NULL;
942
943 rcu_read_lock();
944 for_each_netdev_rcu(net, dev)
945 if (dev->type == type) {
946 dev_hold(dev);
947 ret = dev;
948 break;
949 }
950 rcu_read_unlock();
951 return ret;
952}
953EXPORT_SYMBOL(dev_getfirstbyhwtype);
954
955
956
957
958
959
960
961
962
963
964
965
966struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
967 unsigned short mask)
968{
969 struct net_device *dev, *ret;
970
971 ASSERT_RTNL();
972
973 ret = NULL;
974 for_each_netdev(net, dev) {
975 if (((dev->flags ^ if_flags) & mask) == 0) {
976 ret = dev;
977 break;
978 }
979 }
980 return ret;
981}
982EXPORT_SYMBOL(__dev_get_by_flags);
983
984
985
986
987
988
989
990
991
992bool dev_valid_name(const char *name)
993{
994 if (*name == '\0')
995 return false;
996 if (strlen(name) >= IFNAMSIZ)
997 return false;
998 if (!strcmp(name, ".") || !strcmp(name, ".."))
999 return false;
1000
1001 while (*name) {
1002 if (*name == '/' || *name == ':' || isspace(*name))
1003 return false;
1004 name++;
1005 }
1006 return true;
1007}
1008EXPORT_SYMBOL(dev_valid_name);
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1026{
1027 int i = 0;
1028 const char *p;
1029 const int max_netdevices = 8*PAGE_SIZE;
1030 unsigned long *inuse;
1031 struct net_device *d;
1032
1033 p = strnchr(name, IFNAMSIZ-1, '%');
1034 if (p) {
1035
1036
1037
1038
1039
1040 if (p[1] != 'd' || strchr(p + 2, '%'))
1041 return -EINVAL;
1042
1043
1044 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1045 if (!inuse)
1046 return -ENOMEM;
1047
1048 for_each_netdev(net, d) {
1049 if (!sscanf(d->name, name, &i))
1050 continue;
1051 if (i < 0 || i >= max_netdevices)
1052 continue;
1053
1054
1055 snprintf(buf, IFNAMSIZ, name, i);
1056 if (!strncmp(buf, d->name, IFNAMSIZ))
1057 set_bit(i, inuse);
1058 }
1059
1060 i = find_first_zero_bit(inuse, max_netdevices);
1061 free_page((unsigned long) inuse);
1062 }
1063
1064 if (buf != name)
1065 snprintf(buf, IFNAMSIZ, name, i);
1066 if (!__dev_get_by_name(net, buf))
1067 return i;
1068
1069
1070
1071
1072
1073 return -ENFILE;
1074}
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090int dev_alloc_name(struct net_device *dev, const char *name)
1091{
1092 char buf[IFNAMSIZ];
1093 struct net *net;
1094 int ret;
1095
1096 BUG_ON(!dev_net(dev));
1097 net = dev_net(dev);
1098 ret = __dev_alloc_name(net, name, buf);
1099 if (ret >= 0)
1100 strlcpy(dev->name, buf, IFNAMSIZ);
1101 return ret;
1102}
1103EXPORT_SYMBOL(dev_alloc_name);
1104
1105static int dev_alloc_name_ns(struct net *net,
1106 struct net_device *dev,
1107 const char *name)
1108{
1109 char buf[IFNAMSIZ];
1110 int ret;
1111
1112 ret = __dev_alloc_name(net, name, buf);
1113 if (ret >= 0)
1114 strlcpy(dev->name, buf, IFNAMSIZ);
1115 return ret;
1116}
1117
1118static int dev_get_valid_name(struct net *net,
1119 struct net_device *dev,
1120 const char *name)
1121{
1122 BUG_ON(!net);
1123
1124 if (!dev_valid_name(name))
1125 return -EINVAL;
1126
1127 if (strchr(name, '%'))
1128 return dev_alloc_name_ns(net, dev, name);
1129 else if (__dev_get_by_name(net, name))
1130 return -EEXIST;
1131 else if (dev->name != name)
1132 strlcpy(dev->name, name, IFNAMSIZ);
1133
1134 return 0;
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145int dev_change_name(struct net_device *dev, const char *newname)
1146{
1147 unsigned char old_assign_type;
1148 char oldname[IFNAMSIZ];
1149 int err = 0;
1150 int ret;
1151 struct net *net;
1152
1153 ASSERT_RTNL();
1154 BUG_ON(!dev_net(dev));
1155
1156 net = dev_net(dev);
1157 if (dev->flags & IFF_UP)
1158 return -EBUSY;
1159
1160 write_seqcount_begin(&devnet_rename_seq);
1161
1162 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1163 write_seqcount_end(&devnet_rename_seq);
1164 return 0;
1165 }
1166
1167 memcpy(oldname, dev->name, IFNAMSIZ);
1168
1169 err = dev_get_valid_name(net, dev, newname);
1170 if (err < 0) {
1171 write_seqcount_end(&devnet_rename_seq);
1172 return err;
1173 }
1174
1175 if (oldname[0] && !strchr(oldname, '%'))
1176 netdev_info(dev, "renamed from %s\n", oldname);
1177
1178 old_assign_type = dev->name_assign_type;
1179 dev->name_assign_type = NET_NAME_RENAMED;
1180
1181rollback:
1182 ret = device_rename(&dev->dev, dev->name);
1183 if (ret) {
1184 memcpy(dev->name, oldname, IFNAMSIZ);
1185 dev->name_assign_type = old_assign_type;
1186 write_seqcount_end(&devnet_rename_seq);
1187 return ret;
1188 }
1189
1190 write_seqcount_end(&devnet_rename_seq);
1191
1192 netdev_adjacent_rename_links(dev, oldname);
1193
1194 write_lock_bh(&dev_base_lock);
1195 hlist_del_rcu(&dev->name_hlist);
1196 write_unlock_bh(&dev_base_lock);
1197
1198 synchronize_rcu();
1199
1200 write_lock_bh(&dev_base_lock);
1201 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1202 write_unlock_bh(&dev_base_lock);
1203
1204 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1205 ret = notifier_to_errno(ret);
1206
1207 if (ret) {
1208
1209 if (err >= 0) {
1210 err = ret;
1211 write_seqcount_begin(&devnet_rename_seq);
1212 memcpy(dev->name, oldname, IFNAMSIZ);
1213 memcpy(oldname, newname, IFNAMSIZ);
1214 dev->name_assign_type = old_assign_type;
1215 old_assign_type = NET_NAME_RENAMED;
1216 goto rollback;
1217 } else {
1218 pr_err("%s: name change rollback failed: %d\n",
1219 dev->name, ret);
1220 }
1221 }
1222
1223 return err;
1224}
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1235{
1236 char *new_ifalias;
1237
1238 ASSERT_RTNL();
1239
1240 if (len >= IFALIASZ)
1241 return -EINVAL;
1242
1243 if (!len) {
1244 kfree(dev->ifalias);
1245 dev->ifalias = NULL;
1246 return 0;
1247 }
1248
1249 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1250 if (!new_ifalias)
1251 return -ENOMEM;
1252 dev->ifalias = new_ifalias;
1253
1254 strlcpy(dev->ifalias, alias, len+1);
1255 return len;
1256}
1257
1258
1259
1260
1261
1262
1263
1264
1265void netdev_features_change(struct net_device *dev)
1266{
1267 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1268}
1269EXPORT_SYMBOL(netdev_features_change);
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279void netdev_state_change(struct net_device *dev)
1280{
1281 if (dev->flags & IFF_UP) {
1282 struct netdev_notifier_change_info change_info;
1283
1284 change_info.flags_changed = 0;
1285 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
1286 &change_info.info);
1287 rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1288 }
1289}
1290EXPORT_SYMBOL(netdev_state_change);
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302void netdev_notify_peers(struct net_device *dev)
1303{
1304 rtnl_lock();
1305 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1306 rtnl_unlock();
1307}
1308EXPORT_SYMBOL(netdev_notify_peers);
1309
1310static int __dev_open(struct net_device *dev)
1311{
1312 const struct net_device_ops *ops = dev->netdev_ops;
1313 int ret;
1314
1315 ASSERT_RTNL();
1316
1317 if (!netif_device_present(dev))
1318 return -ENODEV;
1319
1320
1321
1322
1323
1324 netpoll_poll_disable(dev);
1325
1326 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1327 ret = notifier_to_errno(ret);
1328 if (ret)
1329 return ret;
1330
1331 set_bit(__LINK_STATE_START, &dev->state);
1332
1333 if (ops->ndo_validate_addr)
1334 ret = ops->ndo_validate_addr(dev);
1335
1336 if (!ret && ops->ndo_open)
1337 ret = ops->ndo_open(dev);
1338
1339 netpoll_poll_enable(dev);
1340
1341 if (ret)
1342 clear_bit(__LINK_STATE_START, &dev->state);
1343 else {
1344 dev->flags |= IFF_UP;
1345 dev_set_rx_mode(dev);
1346 dev_activate(dev);
1347 add_device_randomness(dev->dev_addr, dev->addr_len);
1348 }
1349
1350 return ret;
1351}
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365int dev_open(struct net_device *dev)
1366{
1367 int ret;
1368
1369 if (dev->flags & IFF_UP)
1370 return 0;
1371
1372 ret = __dev_open(dev);
1373 if (ret < 0)
1374 return ret;
1375
1376 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1377 call_netdevice_notifiers(NETDEV_UP, dev);
1378
1379 return ret;
1380}
1381EXPORT_SYMBOL(dev_open);
1382
1383static int __dev_close_many(struct list_head *head)
1384{
1385 struct net_device *dev;
1386
1387 ASSERT_RTNL();
1388 might_sleep();
1389
1390 list_for_each_entry(dev, head, close_list) {
1391
1392 netpoll_poll_disable(dev);
1393
1394 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1395
1396 clear_bit(__LINK_STATE_START, &dev->state);
1397
1398
1399
1400
1401
1402
1403
1404 smp_mb__after_atomic();
1405 }
1406
1407 dev_deactivate_many(head);
1408
1409 list_for_each_entry(dev, head, close_list) {
1410 const struct net_device_ops *ops = dev->netdev_ops;
1411
1412
1413
1414
1415
1416
1417
1418
1419 if (ops->ndo_stop)
1420 ops->ndo_stop(dev);
1421
1422 dev->flags &= ~IFF_UP;
1423 netpoll_poll_enable(dev);
1424 }
1425
1426 return 0;
1427}
1428
1429static int __dev_close(struct net_device *dev)
1430{
1431 int retval;
1432 LIST_HEAD(single);
1433
1434 list_add(&dev->close_list, &single);
1435 retval = __dev_close_many(&single);
1436 list_del(&single);
1437
1438 return retval;
1439}
1440
1441int dev_close_many(struct list_head *head, bool unlink)
1442{
1443 struct net_device *dev, *tmp;
1444
1445
1446 list_for_each_entry_safe(dev, tmp, head, close_list)
1447 if (!(dev->flags & IFF_UP))
1448 list_del_init(&dev->close_list);
1449
1450 __dev_close_many(head);
1451
1452 list_for_each_entry_safe(dev, tmp, head, close_list) {
1453 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1454 call_netdevice_notifiers(NETDEV_DOWN, dev);
1455 if (unlink)
1456 list_del_init(&dev->close_list);
1457 }
1458
1459 return 0;
1460}
1461EXPORT_SYMBOL(dev_close_many);
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472int dev_close(struct net_device *dev)
1473{
1474 if (dev->flags & IFF_UP) {
1475 LIST_HEAD(single);
1476
1477 list_add(&dev->close_list, &single);
1478 dev_close_many(&single, true);
1479 list_del(&single);
1480 }
1481 return 0;
1482}
1483EXPORT_SYMBOL(dev_close);
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494void dev_disable_lro(struct net_device *dev)
1495{
1496 struct net_device *lower_dev;
1497 struct list_head *iter;
1498
1499 dev->wanted_features &= ~NETIF_F_LRO;
1500 netdev_update_features(dev);
1501
1502 if (unlikely(dev->features & NETIF_F_LRO))
1503 netdev_WARN(dev, "failed to disable LRO!\n");
1504
1505 netdev_for_each_lower_dev(dev, lower_dev, iter)
1506 dev_disable_lro(lower_dev);
1507}
1508EXPORT_SYMBOL(dev_disable_lro);
1509
1510static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1511 struct net_device *dev)
1512{
1513 struct netdev_notifier_info info;
1514
1515 netdev_notifier_info_init(&info, dev);
1516 return nb->notifier_call(nb, val, &info);
1517}
1518
1519static int dev_boot_phase = 1;
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535int register_netdevice_notifier(struct notifier_block *nb)
1536{
1537 struct net_device *dev;
1538 struct net_device *last;
1539 struct net *net;
1540 int err;
1541
1542 rtnl_lock();
1543 err = raw_notifier_chain_register(&netdev_chain, nb);
1544 if (err)
1545 goto unlock;
1546 if (dev_boot_phase)
1547 goto unlock;
1548 for_each_net(net) {
1549 for_each_netdev(net, dev) {
1550 err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1551 err = notifier_to_errno(err);
1552 if (err)
1553 goto rollback;
1554
1555 if (!(dev->flags & IFF_UP))
1556 continue;
1557
1558 call_netdevice_notifier(nb, NETDEV_UP, dev);
1559 }
1560 }
1561
1562unlock:
1563 rtnl_unlock();
1564 return err;
1565
1566rollback:
1567 last = dev;
1568 for_each_net(net) {
1569 for_each_netdev(net, dev) {
1570 if (dev == last)
1571 goto outroll;
1572
1573 if (dev->flags & IFF_UP) {
1574 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1575 dev);
1576 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1577 }
1578 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1579 }
1580 }
1581
1582outroll:
1583 raw_notifier_chain_unregister(&netdev_chain, nb);
1584 goto unlock;
1585}
1586EXPORT_SYMBOL(register_netdevice_notifier);
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602int unregister_netdevice_notifier(struct notifier_block *nb)
1603{
1604 struct net_device *dev;
1605 struct net *net;
1606 int err;
1607
1608 rtnl_lock();
1609 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1610 if (err)
1611 goto unlock;
1612
1613 for_each_net(net) {
1614 for_each_netdev(net, dev) {
1615 if (dev->flags & IFF_UP) {
1616 call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1617 dev);
1618 call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1619 }
1620 call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1621 }
1622 }
1623unlock:
1624 rtnl_unlock();
1625 return err;
1626}
1627EXPORT_SYMBOL(unregister_netdevice_notifier);
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639static int call_netdevice_notifiers_info(unsigned long val,
1640 struct net_device *dev,
1641 struct netdev_notifier_info *info)
1642{
1643 ASSERT_RTNL();
1644 netdev_notifier_info_init(info, dev);
1645 return raw_notifier_call_chain(&netdev_chain, val, info);
1646}
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1658{
1659 struct netdev_notifier_info info;
1660
1661 return call_netdevice_notifiers_info(val, dev, &info);
1662}
1663EXPORT_SYMBOL(call_netdevice_notifiers);
1664
1665#ifdef CONFIG_NET_INGRESS
1666static struct static_key ingress_needed __read_mostly;
1667
1668void net_inc_ingress_queue(void)
1669{
1670 static_key_slow_inc(&ingress_needed);
1671}
1672EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
1673
1674void net_dec_ingress_queue(void)
1675{
1676 static_key_slow_dec(&ingress_needed);
1677}
1678EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
1679#endif
1680
1681#ifdef CONFIG_NET_EGRESS
1682static struct static_key egress_needed __read_mostly;
1683
1684void net_inc_egress_queue(void)
1685{
1686 static_key_slow_inc(&egress_needed);
1687}
1688EXPORT_SYMBOL_GPL(net_inc_egress_queue);
1689
1690void net_dec_egress_queue(void)
1691{
1692 static_key_slow_dec(&egress_needed);
1693}
1694EXPORT_SYMBOL_GPL(net_dec_egress_queue);
1695#endif
1696
1697static struct static_key netstamp_needed __read_mostly;
1698#ifdef HAVE_JUMP_LABEL
1699
1700
1701
1702
1703static atomic_t netstamp_needed_deferred;
1704#endif
1705
1706void net_enable_timestamp(void)
1707{
1708#ifdef HAVE_JUMP_LABEL
1709 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1710
1711 if (deferred) {
1712 while (--deferred)
1713 static_key_slow_dec(&netstamp_needed);
1714 return;
1715 }
1716#endif
1717 static_key_slow_inc(&netstamp_needed);
1718}
1719EXPORT_SYMBOL(net_enable_timestamp);
1720
1721void net_disable_timestamp(void)
1722{
1723#ifdef HAVE_JUMP_LABEL
1724 if (in_interrupt()) {
1725 atomic_inc(&netstamp_needed_deferred);
1726 return;
1727 }
1728#endif
1729 static_key_slow_dec(&netstamp_needed);
1730}
1731EXPORT_SYMBOL(net_disable_timestamp);
1732
1733static inline void net_timestamp_set(struct sk_buff *skb)
1734{
1735 skb->tstamp.tv64 = 0;
1736 if (static_key_false(&netstamp_needed))
1737 __net_timestamp(skb);
1738}
1739
1740#define net_timestamp_check(COND, SKB) \
1741 if (static_key_false(&netstamp_needed)) { \
1742 if ((COND) && !(SKB)->tstamp.tv64) \
1743 __net_timestamp(SKB); \
1744 } \
1745
1746bool is_skb_forwardable(const struct net_device *dev, const struct sk_buff *skb)
1747{
1748 unsigned int len;
1749
1750 if (!(dev->flags & IFF_UP))
1751 return false;
1752
1753 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1754 if (skb->len <= len)
1755 return true;
1756
1757
1758
1759
1760 if (skb_is_gso(skb))
1761 return true;
1762
1763 return false;
1764}
1765EXPORT_SYMBOL_GPL(is_skb_forwardable);
1766
1767int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1768{
1769 if (skb_orphan_frags(skb, GFP_ATOMIC) ||
1770 unlikely(!is_skb_forwardable(dev, skb))) {
1771 atomic_long_inc(&dev->rx_dropped);
1772 kfree_skb(skb);
1773 return NET_RX_DROP;
1774 }
1775
1776 skb_scrub_packet(skb, true);
1777 skb->priority = 0;
1778 skb->protocol = eth_type_trans(skb, dev);
1779 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
1780
1781 return 0;
1782}
1783EXPORT_SYMBOL_GPL(__dev_forward_skb);
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1804{
1805 return __dev_forward_skb(dev, skb) ?: netif_rx_internal(skb);
1806}
1807EXPORT_SYMBOL_GPL(dev_forward_skb);
1808
1809static inline int deliver_skb(struct sk_buff *skb,
1810 struct packet_type *pt_prev,
1811 struct net_device *orig_dev)
1812{
1813 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1814 return -ENOMEM;
1815 atomic_inc(&skb->users);
1816 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1817}
1818
1819static inline void deliver_ptype_list_skb(struct sk_buff *skb,
1820 struct packet_type **pt,
1821 struct net_device *orig_dev,
1822 __be16 type,
1823 struct list_head *ptype_list)
1824{
1825 struct packet_type *ptype, *pt_prev = *pt;
1826
1827 list_for_each_entry_rcu(ptype, ptype_list, list) {
1828 if (ptype->type != type)
1829 continue;
1830 if (pt_prev)
1831 deliver_skb(skb, pt_prev, orig_dev);
1832 pt_prev = ptype;
1833 }
1834 *pt = pt_prev;
1835}
1836
1837static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1838{
1839 if (!ptype->af_packet_priv || !skb->sk)
1840 return false;
1841
1842 if (ptype->id_match)
1843 return ptype->id_match(ptype, skb->sk);
1844 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1845 return true;
1846
1847 return false;
1848}
1849
1850
1851
1852
1853
1854
1855void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1856{
1857 struct packet_type *ptype;
1858 struct sk_buff *skb2 = NULL;
1859 struct packet_type *pt_prev = NULL;
1860 struct list_head *ptype_list = &ptype_all;
1861
1862 rcu_read_lock();
1863again:
1864 list_for_each_entry_rcu(ptype, ptype_list, list) {
1865
1866
1867
1868 if (skb_loop_sk(ptype, skb))
1869 continue;
1870
1871 if (pt_prev) {
1872 deliver_skb(skb2, pt_prev, skb->dev);
1873 pt_prev = ptype;
1874 continue;
1875 }
1876
1877
1878 skb2 = skb_clone(skb, GFP_ATOMIC);
1879 if (!skb2)
1880 goto out_unlock;
1881
1882 net_timestamp_set(skb2);
1883
1884
1885
1886
1887
1888 skb_reset_mac_header(skb2);
1889
1890 if (skb_network_header(skb2) < skb2->data ||
1891 skb_network_header(skb2) > skb_tail_pointer(skb2)) {
1892 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1893 ntohs(skb2->protocol),
1894 dev->name);
1895 skb_reset_network_header(skb2);
1896 }
1897
1898 skb2->transport_header = skb2->network_header;
1899 skb2->pkt_type = PACKET_OUTGOING;
1900 pt_prev = ptype;
1901 }
1902
1903 if (ptype_list == &ptype_all) {
1904 ptype_list = &dev->ptype_all;
1905 goto again;
1906 }
1907out_unlock:
1908 if (pt_prev)
1909 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1910 rcu_read_unlock();
1911}
1912EXPORT_SYMBOL_GPL(dev_queue_xmit_nit);
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1928{
1929 int i;
1930 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1931
1932
1933 if (tc->offset + tc->count > txq) {
1934 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1935 dev->num_tc = 0;
1936 return;
1937 }
1938
1939
1940 for (i = 1; i < TC_BITMASK + 1; i++) {
1941 int q = netdev_get_prio_tc_map(dev, i);
1942
1943 tc = &dev->tc_to_txq[q];
1944 if (tc->offset + tc->count > txq) {
1945 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1946 i, q);
1947 netdev_set_prio_tc_map(dev, i, 0);
1948 }
1949 }
1950}
1951
1952#ifdef CONFIG_XPS
1953static DEFINE_MUTEX(xps_map_mutex);
1954#define xmap_dereference(P) \
1955 rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
1956
1957static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps,
1958 int cpu, u16 index)
1959{
1960 struct xps_map *map = NULL;
1961 int pos;
1962
1963 if (dev_maps)
1964 map = xmap_dereference(dev_maps->cpu_map[cpu]);
1965
1966 for (pos = 0; map && pos < map->len; pos++) {
1967 if (map->queues[pos] == index) {
1968 if (map->len > 1) {
1969 map->queues[pos] = map->queues[--map->len];
1970 } else {
1971 RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL);
1972 kfree_rcu(map, rcu);
1973 map = NULL;
1974 }
1975 break;
1976 }
1977 }
1978
1979 return map;
1980}
1981
1982static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index)
1983{
1984 struct xps_dev_maps *dev_maps;
1985 int cpu, i;
1986 bool active = false;
1987
1988 mutex_lock(&xps_map_mutex);
1989 dev_maps = xmap_dereference(dev->xps_maps);
1990
1991 if (!dev_maps)
1992 goto out_no_maps;
1993
1994 for_each_possible_cpu(cpu) {
1995 for (i = index; i < dev->num_tx_queues; i++) {
1996 if (!remove_xps_queue(dev_maps, cpu, i))
1997 break;
1998 }
1999 if (i == dev->num_tx_queues)
2000 active = true;
2001 }
2002
2003 if (!active) {
2004 RCU_INIT_POINTER(dev->xps_maps, NULL);
2005 kfree_rcu(dev_maps, rcu);
2006 }
2007
2008 for (i = index; i < dev->num_tx_queues; i++)
2009 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i),
2010 NUMA_NO_NODE);
2011
2012out_no_maps:
2013 mutex_unlock(&xps_map_mutex);
2014}
2015
2016static struct xps_map *expand_xps_map(struct xps_map *map,
2017 int cpu, u16 index)
2018{
2019 struct xps_map *new_map;
2020 int alloc_len = XPS_MIN_MAP_ALLOC;
2021 int i, pos;
2022
2023 for (pos = 0; map && pos < map->len; pos++) {
2024 if (map->queues[pos] != index)
2025 continue;
2026 return map;
2027 }
2028
2029
2030 if (map) {
2031 if (pos < map->alloc_len)
2032 return map;
2033
2034 alloc_len = map->alloc_len * 2;
2035 }
2036
2037
2038 new_map = kzalloc_node(XPS_MAP_SIZE(alloc_len), GFP_KERNEL,
2039 cpu_to_node(cpu));
2040 if (!new_map)
2041 return NULL;
2042
2043 for (i = 0; i < pos; i++)
2044 new_map->queues[i] = map->queues[i];
2045 new_map->alloc_len = alloc_len;
2046 new_map->len = pos;
2047
2048 return new_map;
2049}
2050
2051int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
2052 u16 index)
2053{
2054 struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
2055 struct xps_map *map, *new_map;
2056 int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES);
2057 int cpu, numa_node_id = -2;
2058 bool active = false;
2059
2060 mutex_lock(&xps_map_mutex);
2061
2062 dev_maps = xmap_dereference(dev->xps_maps);
2063
2064
2065 for_each_online_cpu(cpu) {
2066 if (!cpumask_test_cpu(cpu, mask))
2067 continue;
2068
2069 if (!new_dev_maps)
2070 new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
2071 if (!new_dev_maps) {
2072 mutex_unlock(&xps_map_mutex);
2073 return -ENOMEM;
2074 }
2075
2076 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2077 NULL;
2078
2079 map = expand_xps_map(map, cpu, index);
2080 if (!map)
2081 goto error;
2082
2083 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2084 }
2085
2086 if (!new_dev_maps)
2087 goto out_no_new_maps;
2088
2089 for_each_possible_cpu(cpu) {
2090 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) {
2091
2092 int pos = 0;
2093
2094 map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2095 while ((pos < map->len) && (map->queues[pos] != index))
2096 pos++;
2097
2098 if (pos == map->len)
2099 map->queues[map->len++] = index;
2100#ifdef CONFIG_NUMA
2101 if (numa_node_id == -2)
2102 numa_node_id = cpu_to_node(cpu);
2103 else if (numa_node_id != cpu_to_node(cpu))
2104 numa_node_id = -1;
2105#endif
2106 } else if (dev_maps) {
2107
2108 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2109 RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map);
2110 }
2111
2112 }
2113
2114 rcu_assign_pointer(dev->xps_maps, new_dev_maps);
2115
2116
2117 if (dev_maps) {
2118 for_each_possible_cpu(cpu) {
2119 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2120 map = xmap_dereference(dev_maps->cpu_map[cpu]);
2121 if (map && map != new_map)
2122 kfree_rcu(map, rcu);
2123 }
2124
2125 kfree_rcu(dev_maps, rcu);
2126 }
2127
2128 dev_maps = new_dev_maps;
2129 active = true;
2130
2131out_no_new_maps:
2132
2133 netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
2134 (numa_node_id >= 0) ? numa_node_id :
2135 NUMA_NO_NODE);
2136
2137 if (!dev_maps)
2138 goto out_no_maps;
2139
2140
2141 for_each_possible_cpu(cpu) {
2142 if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu))
2143 continue;
2144
2145 if (remove_xps_queue(dev_maps, cpu, index))
2146 active = true;
2147 }
2148
2149
2150 if (!active) {
2151 RCU_INIT_POINTER(dev->xps_maps, NULL);
2152 kfree_rcu(dev_maps, rcu);
2153 }
2154
2155out_no_maps:
2156 mutex_unlock(&xps_map_mutex);
2157
2158 return 0;
2159error:
2160
2161 for_each_possible_cpu(cpu) {
2162 new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]);
2163 map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) :
2164 NULL;
2165 if (new_map && new_map != map)
2166 kfree(new_map);
2167 }
2168
2169 mutex_unlock(&xps_map_mutex);
2170
2171 kfree(new_dev_maps);
2172 return -ENOMEM;
2173}
2174EXPORT_SYMBOL(netif_set_xps_queue);
2175
2176#endif
2177
2178
2179
2180
2181int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
2182{
2183 int rc;
2184
2185 if (txq < 1 || txq > dev->num_tx_queues)
2186 return -EINVAL;
2187
2188 if (dev->reg_state == NETREG_REGISTERED ||
2189 dev->reg_state == NETREG_UNREGISTERING) {
2190 ASSERT_RTNL();
2191
2192 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
2193 txq);
2194 if (rc)
2195 return rc;
2196
2197 if (dev->num_tc)
2198 netif_setup_tc(dev, txq);
2199
2200 if (txq < dev->real_num_tx_queues) {
2201 qdisc_reset_all_tx_gt(dev, txq);
2202#ifdef CONFIG_XPS
2203 netif_reset_xps_queues_gt(dev, txq);
2204#endif
2205 }
2206 }
2207
2208 dev->real_num_tx_queues = txq;
2209 return 0;
2210}
2211EXPORT_SYMBOL(netif_set_real_num_tx_queues);
2212
2213#ifdef CONFIG_SYSFS
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
2225{
2226 int rc;
2227
2228 if (rxq < 1 || rxq > dev->num_rx_queues)
2229 return -EINVAL;
2230
2231 if (dev->reg_state == NETREG_REGISTERED) {
2232 ASSERT_RTNL();
2233
2234 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
2235 rxq);
2236 if (rc)
2237 return rc;
2238 }
2239
2240 dev->real_num_rx_queues = rxq;
2241 return 0;
2242}
2243EXPORT_SYMBOL(netif_set_real_num_rx_queues);
2244#endif
2245
2246
2247
2248
2249
2250
2251
2252int netif_get_num_default_rss_queues(void)
2253{
2254 return is_kdump_kernel() ?
2255 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
2256}
2257EXPORT_SYMBOL(netif_get_num_default_rss_queues);
2258
2259static void __netif_reschedule(struct Qdisc *q)
2260{
2261 struct softnet_data *sd;
2262 unsigned long flags;
2263
2264 local_irq_save(flags);
2265 sd = this_cpu_ptr(&softnet_data);
2266 q->next_sched = NULL;
2267 *sd->output_queue_tailp = q;
2268 sd->output_queue_tailp = &q->next_sched;
2269 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2270 local_irq_restore(flags);
2271}
2272
2273void __netif_schedule(struct Qdisc *q)
2274{
2275 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2276 __netif_reschedule(q);
2277}
2278EXPORT_SYMBOL(__netif_schedule);
2279
2280struct dev_kfree_skb_cb {
2281 enum skb_free_reason reason;
2282};
2283
2284static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
2285{
2286 return (struct dev_kfree_skb_cb *)skb->cb;
2287}
2288
2289void netif_schedule_queue(struct netdev_queue *txq)
2290{
2291 rcu_read_lock();
2292 if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
2293 struct Qdisc *q = rcu_dereference(txq->qdisc);
2294
2295 __netif_schedule(q);
2296 }
2297 rcu_read_unlock();
2298}
2299EXPORT_SYMBOL(netif_schedule_queue);
2300
2301
2302
2303
2304
2305
2306
2307
2308void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
2309{
2310 struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
2311
2312 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
2313 struct Qdisc *q;
2314
2315 rcu_read_lock();
2316 q = rcu_dereference(txq->qdisc);
2317 __netif_schedule(q);
2318 rcu_read_unlock();
2319 }
2320}
2321EXPORT_SYMBOL(netif_wake_subqueue);
2322
2323void netif_tx_wake_queue(struct netdev_queue *dev_queue)
2324{
2325 if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
2326 struct Qdisc *q;
2327
2328 rcu_read_lock();
2329 q = rcu_dereference(dev_queue->qdisc);
2330 __netif_schedule(q);
2331 rcu_read_unlock();
2332 }
2333}
2334EXPORT_SYMBOL(netif_tx_wake_queue);
2335
2336void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
2337{
2338 unsigned long flags;
2339
2340 if (likely(atomic_read(&skb->users) == 1)) {
2341 smp_rmb();
2342 atomic_set(&skb->users, 0);
2343 } else if (likely(!atomic_dec_and_test(&skb->users))) {
2344 return;
2345 }
2346 get_kfree_skb_cb(skb)->reason = reason;
2347 local_irq_save(flags);
2348 skb->next = __this_cpu_read(softnet_data.completion_queue);
2349 __this_cpu_write(softnet_data.completion_queue, skb);
2350 raise_softirq_irqoff(NET_TX_SOFTIRQ);
2351 local_irq_restore(flags);
2352}
2353EXPORT_SYMBOL(__dev_kfree_skb_irq);
2354
2355void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
2356{
2357 if (in_irq() || irqs_disabled())
2358 __dev_kfree_skb_irq(skb, reason);
2359 else
2360 dev_kfree_skb(skb);
2361}
2362EXPORT_SYMBOL(__dev_kfree_skb_any);
2363
2364
2365
2366
2367
2368
2369
2370
2371void netif_device_detach(struct net_device *dev)
2372{
2373 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2374 netif_running(dev)) {
2375 netif_tx_stop_all_queues(dev);
2376 }
2377}
2378EXPORT_SYMBOL(netif_device_detach);
2379
2380
2381
2382
2383
2384
2385
2386void netif_device_attach(struct net_device *dev)
2387{
2388 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2389 netif_running(dev)) {
2390 netif_tx_wake_all_queues(dev);
2391 __netdev_watchdog_up(dev);
2392 }
2393}
2394EXPORT_SYMBOL(netif_device_attach);
2395
2396
2397
2398
2399
2400u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
2401 unsigned int num_tx_queues)
2402{
2403 u32 hash;
2404 u16 qoffset = 0;
2405 u16 qcount = num_tx_queues;
2406
2407 if (skb_rx_queue_recorded(skb)) {
2408 hash = skb_get_rx_queue(skb);
2409 while (unlikely(hash >= num_tx_queues))
2410 hash -= num_tx_queues;
2411 return hash;
2412 }
2413
2414 if (dev->num_tc) {
2415 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2416 qoffset = dev->tc_to_txq[tc].offset;
2417 qcount = dev->tc_to_txq[tc].count;
2418 }
2419
2420 return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
2421}
2422EXPORT_SYMBOL(__skb_tx_hash);
2423
2424static void skb_warn_bad_offload(const struct sk_buff *skb)
2425{
2426 static const netdev_features_t null_features;
2427 struct net_device *dev = skb->dev;
2428 const char *name = "";
2429
2430 if (!net_ratelimit())
2431 return;
2432
2433 if (dev) {
2434 if (dev->dev.parent)
2435 name = dev_driver_string(dev->dev.parent);
2436 else
2437 name = netdev_name(dev);
2438 }
2439 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2440 "gso_type=%d ip_summed=%d\n",
2441 name, dev ? &dev->features : &null_features,
2442 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2443 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2444 skb_shinfo(skb)->gso_type, skb->ip_summed);
2445}
2446
2447
2448
2449
2450
2451int skb_checksum_help(struct sk_buff *skb)
2452{
2453 __wsum csum;
2454 int ret = 0, offset;
2455
2456 if (skb->ip_summed == CHECKSUM_COMPLETE)
2457 goto out_set_summed;
2458
2459 if (unlikely(skb_shinfo(skb)->gso_size)) {
2460 skb_warn_bad_offload(skb);
2461 return -EINVAL;
2462 }
2463
2464
2465
2466
2467 if (skb_has_shared_frag(skb)) {
2468 ret = __skb_linearize(skb);
2469 if (ret)
2470 goto out;
2471 }
2472
2473 offset = skb_checksum_start_offset(skb);
2474 BUG_ON(offset >= skb_headlen(skb));
2475 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2476
2477 offset += skb->csum_offset;
2478 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2479
2480 if (skb_cloned(skb) &&
2481 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2482 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2483 if (ret)
2484 goto out;
2485 }
2486
2487 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2488out_set_summed:
2489 skb->ip_summed = CHECKSUM_NONE;
2490out:
2491 return ret;
2492}
2493EXPORT_SYMBOL(skb_checksum_help);
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516bool __skb_csum_offload_chk(struct sk_buff *skb,
2517 const struct skb_csum_offl_spec *spec,
2518 bool *csum_encapped,
2519 bool csum_help)
2520{
2521 struct iphdr *iph;
2522 struct ipv6hdr *ipv6;
2523 void *nhdr;
2524 int protocol;
2525 u8 ip_proto;
2526
2527 if (skb->protocol == htons(ETH_P_8021Q) ||
2528 skb->protocol == htons(ETH_P_8021AD)) {
2529 if (!spec->vlan_okay)
2530 goto need_help;
2531 }
2532
2533
2534
2535
2536
2537
2538 if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) {
2539
2540 protocol = eproto_to_ipproto(vlan_get_protocol(skb));
2541 nhdr = skb_network_header(skb);
2542 *csum_encapped = false;
2543 if (spec->no_not_encapped)
2544 goto need_help;
2545 } else if (skb->encapsulation && spec->encap_okay &&
2546 skb_checksum_start_offset(skb) ==
2547 skb_inner_transport_offset(skb)) {
2548
2549 *csum_encapped = true;
2550 switch (skb->inner_protocol_type) {
2551 case ENCAP_TYPE_ETHER:
2552 protocol = eproto_to_ipproto(skb->inner_protocol);
2553 break;
2554 case ENCAP_TYPE_IPPROTO:
2555 protocol = skb->inner_protocol;
2556 break;
2557 }
2558 nhdr = skb_inner_network_header(skb);
2559 } else {
2560 goto need_help;
2561 }
2562
2563 switch (protocol) {
2564 case IPPROTO_IP:
2565 if (!spec->ipv4_okay)
2566 goto need_help;
2567 iph = nhdr;
2568 ip_proto = iph->protocol;
2569 if (iph->ihl != 5 && !spec->ip_options_okay)
2570 goto need_help;
2571 break;
2572 case IPPROTO_IPV6:
2573 if (!spec->ipv6_okay)
2574 goto need_help;
2575 if (spec->no_encapped_ipv6 && *csum_encapped)
2576 goto need_help;
2577 ipv6 = nhdr;
2578 nhdr += sizeof(*ipv6);
2579 ip_proto = ipv6->nexthdr;
2580 break;
2581 default:
2582 goto need_help;
2583 }
2584
2585ip_proto_again:
2586 switch (ip_proto) {
2587 case IPPROTO_TCP:
2588 if (!spec->tcp_okay ||
2589 skb->csum_offset != offsetof(struct tcphdr, check))
2590 goto need_help;
2591 break;
2592 case IPPROTO_UDP:
2593 if (!spec->udp_okay ||
2594 skb->csum_offset != offsetof(struct udphdr, check))
2595 goto need_help;
2596 break;
2597 case IPPROTO_SCTP:
2598 if (!spec->sctp_okay ||
2599 skb->csum_offset != offsetof(struct sctphdr, checksum))
2600 goto cant_help;
2601 break;
2602 case NEXTHDR_HOP:
2603 case NEXTHDR_ROUTING:
2604 case NEXTHDR_DEST: {
2605 u8 *opthdr = nhdr;
2606
2607 if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay)
2608 goto need_help;
2609
2610 ip_proto = opthdr[0];
2611 nhdr += (opthdr[1] + 1) << 3;
2612
2613 goto ip_proto_again;
2614 }
2615 default:
2616 goto need_help;
2617 }
2618
2619
2620 return true;
2621
2622need_help:
2623 if (csum_help && !skb_shinfo(skb)->gso_size)
2624 skb_checksum_help(skb);
2625cant_help:
2626 return false;
2627}
2628EXPORT_SYMBOL(__skb_csum_offload_chk);
2629
2630__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
2631{
2632 __be16 type = skb->protocol;
2633
2634
2635 if (type == htons(ETH_P_TEB)) {
2636 struct ethhdr *eth;
2637
2638 if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
2639 return 0;
2640
2641 eth = (struct ethhdr *)skb_mac_header(skb);
2642 type = eth->h_proto;
2643 }
2644
2645 return __vlan_get_protocol(skb, type, depth);
2646}
2647
2648
2649
2650
2651
2652
2653struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
2654 netdev_features_t features)
2655{
2656 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2657 struct packet_offload *ptype;
2658 int vlan_depth = skb->mac_len;
2659 __be16 type = skb_network_protocol(skb, &vlan_depth);
2660
2661 if (unlikely(!type))
2662 return ERR_PTR(-EINVAL);
2663
2664 __skb_pull(skb, vlan_depth);
2665
2666 rcu_read_lock();
2667 list_for_each_entry_rcu(ptype, &offload_base, list) {
2668 if (ptype->type == type && ptype->callbacks.gso_segment) {
2669 segs = ptype->callbacks.gso_segment(skb, features);
2670 break;
2671 }
2672 }
2673 rcu_read_unlock();
2674
2675 __skb_push(skb, skb->data - skb_mac_header(skb));
2676
2677 return segs;
2678}
2679EXPORT_SYMBOL(skb_mac_gso_segment);
2680
2681
2682
2683
2684static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
2685{
2686 if (tx_path)
2687 return skb->ip_summed != CHECKSUM_PARTIAL;
2688 else
2689 return skb->ip_summed == CHECKSUM_NONE;
2690}
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
2706 netdev_features_t features, bool tx_path)
2707{
2708 if (unlikely(skb_needs_check(skb, tx_path))) {
2709 int err;
2710
2711 skb_warn_bad_offload(skb);
2712
2713 err = skb_cow_head(skb, 0);
2714 if (err < 0)
2715 return ERR_PTR(err);
2716 }
2717
2718
2719
2720
2721
2722 if (features & NETIF_F_GSO_PARTIAL) {
2723 netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
2724 struct net_device *dev = skb->dev;
2725
2726 partial_features |= dev->features & dev->gso_partial_features;
2727 if (!skb_gso_ok(skb, features | partial_features))
2728 features &= ~NETIF_F_GSO_PARTIAL;
2729 }
2730
2731 BUILD_BUG_ON(SKB_SGO_CB_OFFSET +
2732 sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
2733
2734 SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
2735 SKB_GSO_CB(skb)->encap_level = 0;
2736
2737 skb_reset_mac_header(skb);
2738 skb_reset_mac_len(skb);
2739
2740 return skb_mac_gso_segment(skb, features);
2741}
2742EXPORT_SYMBOL(__skb_gso_segment);
2743
2744
2745#ifdef CONFIG_BUG
2746void netdev_rx_csum_fault(struct net_device *dev)
2747{
2748 if (net_ratelimit()) {
2749 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2750 dump_stack();
2751 }
2752}
2753EXPORT_SYMBOL(netdev_rx_csum_fault);
2754#endif
2755
2756
2757
2758
2759
2760
2761static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2762{
2763#ifdef CONFIG_HIGHMEM
2764 int i;
2765 if (!(dev->features & NETIF_F_HIGHDMA)) {
2766 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2767 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2768 if (PageHighMem(skb_frag_page(frag)))
2769 return 1;
2770 }
2771 }
2772
2773 if (PCI_DMA_BUS_IS_PHYS) {
2774 struct device *pdev = dev->dev.parent;
2775
2776 if (!pdev)
2777 return 0;
2778 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2779 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2780 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2781 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2782 return 1;
2783 }
2784 }
2785#endif
2786 return 0;
2787}
2788
2789
2790
2791
2792#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
2793static netdev_features_t net_mpls_features(struct sk_buff *skb,
2794 netdev_features_t features,
2795 __be16 type)
2796{
2797 if (eth_p_mpls(type))
2798 features &= skb->dev->mpls_features;
2799
2800 return features;
2801}
2802#else
2803static netdev_features_t net_mpls_features(struct sk_buff *skb,
2804 netdev_features_t features,
2805 __be16 type)
2806{
2807 return features;
2808}
2809#endif
2810
2811static netdev_features_t harmonize_features(struct sk_buff *skb,
2812 netdev_features_t features)
2813{
2814 int tmp;
2815 __be16 type;
2816
2817 type = skb_network_protocol(skb, &tmp);
2818 features = net_mpls_features(skb, features, type);
2819
2820 if (skb->ip_summed != CHECKSUM_NONE &&
2821 !can_checksum_protocol(features, type)) {
2822 features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
2823 } else if (illegal_highdma(skb->dev, skb)) {
2824 features &= ~NETIF_F_SG;
2825 }
2826
2827 return features;
2828}
2829
2830netdev_features_t passthru_features_check(struct sk_buff *skb,
2831 struct net_device *dev,
2832 netdev_features_t features)
2833{
2834 return features;
2835}
2836EXPORT_SYMBOL(passthru_features_check);
2837
2838static netdev_features_t dflt_features_check(const struct sk_buff *skb,
2839 struct net_device *dev,
2840 netdev_features_t features)
2841{
2842 return vlan_features_check(skb, features);
2843}
2844
2845static netdev_features_t gso_features_check(const struct sk_buff *skb,
2846 struct net_device *dev,
2847 netdev_features_t features)
2848{
2849 u16 gso_segs = skb_shinfo(skb)->gso_segs;
2850
2851 if (gso_segs > dev->gso_max_segs)
2852 return features & ~NETIF_F_GSO_MASK;
2853
2854
2855
2856
2857
2858
2859
2860 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL))
2861 features &= ~dev->gso_partial_features;
2862
2863
2864
2865
2866 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
2867 struct iphdr *iph = skb->encapsulation ?
2868 inner_ip_hdr(skb) : ip_hdr(skb);
2869
2870 if (!(iph->frag_off & htons(IP_DF)))
2871 features &= ~NETIF_F_TSO_MANGLEID;
2872 }
2873
2874 return features;
2875}
2876
2877netdev_features_t netif_skb_features(struct sk_buff *skb)
2878{
2879 struct net_device *dev = skb->dev;
2880 netdev_features_t features = dev->features;
2881
2882 if (skb_is_gso(skb))
2883 features = gso_features_check(skb, dev, features);
2884
2885
2886
2887
2888
2889 if (skb->encapsulation)
2890 features &= dev->hw_enc_features;
2891
2892 if (skb_vlan_tagged(skb))
2893 features = netdev_intersect_features(features,
2894 dev->vlan_features |
2895 NETIF_F_HW_VLAN_CTAG_TX |
2896 NETIF_F_HW_VLAN_STAG_TX);
2897
2898 if (dev->netdev_ops->ndo_features_check)
2899 features &= dev->netdev_ops->ndo_features_check(skb, dev,
2900 features);
2901 else
2902 features &= dflt_features_check(skb, dev, features);
2903
2904 return harmonize_features(skb, features);
2905}
2906EXPORT_SYMBOL(netif_skb_features);
2907
2908static int xmit_one(struct sk_buff *skb, struct net_device *dev,
2909 struct netdev_queue *txq, bool more)
2910{
2911 unsigned int len;
2912 int rc;
2913
2914 if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
2915 dev_queue_xmit_nit(skb, dev);
2916
2917 len = skb->len;
2918 trace_net_dev_start_xmit(skb, dev);
2919 rc = netdev_start_xmit(skb, dev, txq, more);
2920 trace_net_dev_xmit(skb, rc, dev, len);
2921
2922 return rc;
2923}
2924
2925struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *dev,
2926 struct netdev_queue *txq, int *ret)
2927{
2928 struct sk_buff *skb = first;
2929 int rc = NETDEV_TX_OK;
2930
2931 while (skb) {
2932 struct sk_buff *next = skb->next;
2933
2934 skb->next = NULL;
2935 rc = xmit_one(skb, dev, txq, next != NULL);
2936 if (unlikely(!dev_xmit_complete(rc))) {
2937 skb->next = next;
2938 goto out;
2939 }
2940
2941 skb = next;
2942 if (netif_xmit_stopped(txq) && skb) {
2943 rc = NETDEV_TX_BUSY;
2944 break;
2945 }
2946 }
2947
2948out:
2949 *ret = rc;
2950 return skb;
2951}
2952
2953static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
2954 netdev_features_t features)
2955{
2956 if (skb_vlan_tag_present(skb) &&
2957 !vlan_hw_offload_capable(features, skb->vlan_proto))
2958 skb = __vlan_hwaccel_push_inside(skb);
2959 return skb;
2960}
2961
2962static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
2963{
2964 netdev_features_t features;
2965
2966 features = netif_skb_features(skb);
2967 skb = validate_xmit_vlan(skb, features);
2968 if (unlikely(!skb))
2969 goto out_null;
2970
2971 if (netif_needs_gso(skb, features)) {
2972 struct sk_buff *segs;
2973
2974 segs = skb_gso_segment(skb, features);
2975 if (IS_ERR(segs)) {
2976 goto out_kfree_skb;
2977 } else if (segs) {
2978 consume_skb(skb);
2979 skb = segs;
2980 }
2981 } else {
2982 if (skb_needs_linearize(skb, features) &&
2983 __skb_linearize(skb))
2984 goto out_kfree_skb;
2985
2986
2987
2988
2989
2990 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2991 if (skb->encapsulation)
2992 skb_set_inner_transport_header(skb,
2993 skb_checksum_start_offset(skb));
2994 else
2995 skb_set_transport_header(skb,
2996 skb_checksum_start_offset(skb));
2997 if (!(features & NETIF_F_CSUM_MASK) &&
2998 skb_checksum_help(skb))
2999 goto out_kfree_skb;
3000 }
3001 }
3002
3003 return skb;
3004
3005out_kfree_skb:
3006 kfree_skb(skb);
3007out_null:
3008 atomic_long_inc(&dev->tx_dropped);
3009 return NULL;
3010}
3011
3012struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
3013{
3014 struct sk_buff *next, *head = NULL, *tail;
3015
3016 for (; skb != NULL; skb = next) {
3017 next = skb->next;
3018 skb->next = NULL;
3019
3020
3021 skb->prev = skb;
3022
3023 skb = validate_xmit_skb(skb, dev);
3024 if (!skb)
3025 continue;
3026
3027 if (!head)
3028 head = skb;
3029 else
3030 tail->next = skb;
3031
3032
3033
3034 tail = skb->prev;
3035 }
3036 return head;
3037}
3038
3039static void qdisc_pkt_len_init(struct sk_buff *skb)
3040{
3041 const struct skb_shared_info *shinfo = skb_shinfo(skb);
3042
3043 qdisc_skb_cb(skb)->pkt_len = skb->len;
3044
3045
3046
3047
3048 if (shinfo->gso_size) {
3049 unsigned int hdr_len;
3050 u16 gso_segs = shinfo->gso_segs;
3051
3052
3053 hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
3054
3055
3056 if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
3057 hdr_len += tcp_hdrlen(skb);
3058 else
3059 hdr_len += sizeof(struct udphdr);
3060
3061 if (shinfo->gso_type & SKB_GSO_DODGY)
3062 gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
3063 shinfo->gso_size);
3064
3065 qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len;
3066 }
3067}
3068
3069static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
3070 struct net_device *dev,
3071 struct netdev_queue *txq)
3072{
3073 spinlock_t *root_lock = qdisc_lock(q);
3074 struct sk_buff *to_free = NULL;
3075 bool contended;
3076 int rc;
3077
3078 qdisc_calculate_pkt_len(skb, q);
3079
3080
3081
3082
3083
3084
3085 contended = qdisc_is_running(q);
3086 if (unlikely(contended))
3087 spin_lock(&q->busylock);
3088
3089 spin_lock(root_lock);
3090 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
3091 __qdisc_drop(skb, &to_free);
3092 rc = NET_XMIT_DROP;
3093 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
3094 qdisc_run_begin(q)) {
3095
3096
3097
3098
3099
3100
3101 qdisc_bstats_update(q, skb);
3102
3103 if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
3104 if (unlikely(contended)) {
3105 spin_unlock(&q->busylock);
3106 contended = false;
3107 }
3108 __qdisc_run(q);
3109 } else
3110 qdisc_run_end(q);
3111
3112 rc = NET_XMIT_SUCCESS;
3113 } else {
3114 rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
3115 if (qdisc_run_begin(q)) {
3116 if (unlikely(contended)) {
3117 spin_unlock(&q->busylock);
3118 contended = false;
3119 }
3120 __qdisc_run(q);
3121 }
3122 }
3123 spin_unlock(root_lock);
3124 if (unlikely(to_free))
3125 kfree_skb_list(to_free);
3126 if (unlikely(contended))
3127 spin_unlock(&q->busylock);
3128 return rc;
3129}
3130
3131#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
3132static void skb_update_prio(struct sk_buff *skb)
3133{
3134 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
3135
3136 if (!skb->priority && skb->sk && map) {
3137 unsigned int prioidx =
3138 sock_cgroup_prioidx(&skb->sk->sk_cgrp_data);
3139
3140 if (prioidx < map->priomap_len)
3141 skb->priority = map->priomap[prioidx];
3142 }
3143}
3144#else
3145#define skb_update_prio(skb)
3146#endif
3147
3148DEFINE_PER_CPU(int, xmit_recursion);
3149EXPORT_SYMBOL(xmit_recursion);
3150
3151
3152
3153
3154
3155
3156
3157int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
3158{
3159 skb_reset_mac_header(skb);
3160 __skb_pull(skb, skb_network_offset(skb));
3161 skb->pkt_type = PACKET_LOOPBACK;
3162 skb->ip_summed = CHECKSUM_UNNECESSARY;
3163 WARN_ON(!skb_dst(skb));
3164 skb_dst_force(skb);
3165 netif_rx_ni(skb);
3166 return 0;
3167}
3168EXPORT_SYMBOL(dev_loopback_xmit);
3169
3170#ifdef CONFIG_NET_EGRESS
3171static struct sk_buff *
3172sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
3173{
3174 struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list);
3175 struct tcf_result cl_res;
3176
3177 if (!cl)
3178 return skb;
3179
3180
3181
3182
3183 qdisc_bstats_cpu_update(cl->q, skb);
3184
3185 switch (tc_classify(skb, cl, &cl_res, false)) {
3186 case TC_ACT_OK:
3187 case TC_ACT_RECLASSIFY:
3188 skb->tc_index = TC_H_MIN(cl_res.classid);
3189 break;
3190 case TC_ACT_SHOT:
3191 qdisc_qstats_cpu_drop(cl->q);
3192 *ret = NET_XMIT_DROP;
3193 kfree_skb(skb);
3194 return NULL;
3195 case TC_ACT_STOLEN:
3196 case TC_ACT_QUEUED:
3197 *ret = NET_XMIT_SUCCESS;
3198 consume_skb(skb);
3199 return NULL;
3200 case TC_ACT_REDIRECT:
3201
3202 skb_do_redirect(skb);
3203 *ret = NET_XMIT_SUCCESS;
3204 return NULL;
3205 default:
3206 break;
3207 }
3208
3209 return skb;
3210}
3211#endif
3212
3213static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
3214{
3215#ifdef CONFIG_XPS
3216 struct xps_dev_maps *dev_maps;
3217 struct xps_map *map;
3218 int queue_index = -1;
3219
3220 rcu_read_lock();
3221 dev_maps = rcu_dereference(dev->xps_maps);
3222 if (dev_maps) {
3223 map = rcu_dereference(
3224 dev_maps->cpu_map[skb->sender_cpu - 1]);
3225 if (map) {
3226 if (map->len == 1)
3227 queue_index = map->queues[0];
3228 else
3229 queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
3230 map->len)];
3231 if (unlikely(queue_index >= dev->real_num_tx_queues))
3232 queue_index = -1;
3233 }
3234 }
3235 rcu_read_unlock();
3236
3237 return queue_index;
3238#else
3239 return -1;
3240#endif
3241}
3242
3243static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
3244{
3245 struct sock *sk = skb->sk;
3246 int queue_index = sk_tx_queue_get(sk);
3247
3248 if (queue_index < 0 || skb->ooo_okay ||
3249 queue_index >= dev->real_num_tx_queues) {
3250 int new_index = get_xps_queue(dev, skb);
3251 if (new_index < 0)
3252 new_index = skb_tx_hash(dev, skb);
3253
3254 if (queue_index != new_index && sk &&
3255 sk_fullsock(sk) &&
3256 rcu_access_pointer(sk->sk_dst_cache))
3257 sk_tx_queue_set(sk, new_index);
3258
3259 queue_index = new_index;
3260 }
3261
3262 return queue_index;
3263}
3264
3265struct netdev_queue *netdev_pick_tx(struct net_device *dev,
3266 struct sk_buff *skb,
3267 void *accel_priv)
3268{
3269 int queue_index = 0;
3270
3271#ifdef CONFIG_XPS
3272 u32 sender_cpu = skb->sender_cpu - 1;
3273
3274 if (sender_cpu >= (u32)NR_CPUS)
3275 skb->sender_cpu = raw_smp_processor_id() + 1;
3276#endif
3277
3278 if (dev->real_num_tx_queues != 1) {
3279 const struct net_device_ops *ops = dev->netdev_ops;
3280 if (ops->ndo_select_queue)
3281 queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
3282 __netdev_pick_tx);
3283 else
3284 queue_index = __netdev_pick_tx(dev, skb);
3285
3286 if (!accel_priv)
3287 queue_index = netdev_cap_txqueue(dev, queue_index);
3288 }
3289
3290 skb_set_queue_mapping(skb, queue_index);
3291 return netdev_get_tx_queue(dev, queue_index);
3292}
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
3321{
3322 struct net_device *dev = skb->dev;
3323 struct netdev_queue *txq;
3324 struct Qdisc *q;
3325 int rc = -ENOMEM;
3326
3327 skb_reset_mac_header(skb);
3328
3329 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
3330 __skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
3331
3332
3333
3334
3335 rcu_read_lock_bh();
3336
3337 skb_update_prio(skb);
3338
3339 qdisc_pkt_len_init(skb);
3340#ifdef CONFIG_NET_CLS_ACT
3341 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
3342# ifdef CONFIG_NET_EGRESS
3343 if (static_key_false(&egress_needed)) {
3344 skb = sch_handle_egress(skb, &rc, dev);
3345 if (!skb)
3346 goto out;
3347 }
3348# endif
3349#endif
3350
3351
3352
3353 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
3354 skb_dst_drop(skb);
3355 else
3356 skb_dst_force(skb);
3357
3358#ifdef CONFIG_NET_SWITCHDEV
3359
3360 if (skb->offload_fwd_mark &&
3361 skb->offload_fwd_mark == dev->offload_fwd_mark) {
3362 consume_skb(skb);
3363 rc = NET_XMIT_SUCCESS;
3364 goto out;
3365 }
3366#endif
3367
3368 txq = netdev_pick_tx(dev, skb, accel_priv);
3369 q = rcu_dereference_bh(txq->qdisc);
3370
3371 trace_net_dev_queue(skb);
3372 if (q->enqueue) {
3373 rc = __dev_xmit_skb(skb, q, dev, txq);
3374 goto out;
3375 }
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389 if (dev->flags & IFF_UP) {
3390 int cpu = smp_processor_id();
3391
3392 if (txq->xmit_lock_owner != cpu) {
3393 if (unlikely(__this_cpu_read(xmit_recursion) >
3394 XMIT_RECURSION_LIMIT))
3395 goto recursion_alert;
3396
3397 skb = validate_xmit_skb(skb, dev);
3398 if (!skb)
3399 goto out;
3400
3401 HARD_TX_LOCK(dev, txq, cpu);
3402
3403 if (!netif_xmit_stopped(txq)) {
3404 __this_cpu_inc(xmit_recursion);
3405 skb = dev_hard_start_xmit(skb, dev, txq, &rc);
3406 __this_cpu_dec(xmit_recursion);
3407 if (dev_xmit_complete(rc)) {
3408 HARD_TX_UNLOCK(dev, txq);
3409 goto out;
3410 }
3411 }
3412 HARD_TX_UNLOCK(dev, txq);
3413 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
3414 dev->name);
3415 } else {
3416
3417
3418
3419recursion_alert:
3420 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
3421 dev->name);
3422 }
3423 }
3424
3425 rc = -ENETDOWN;
3426 rcu_read_unlock_bh();
3427
3428 atomic_long_inc(&dev->tx_dropped);
3429 kfree_skb_list(skb);
3430 return rc;
3431out:
3432 rcu_read_unlock_bh();
3433 return rc;
3434}
3435
3436int dev_queue_xmit(struct sk_buff *skb)
3437{
3438 return __dev_queue_xmit(skb, NULL);
3439}
3440EXPORT_SYMBOL(dev_queue_xmit);
3441
3442int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
3443{
3444 return __dev_queue_xmit(skb, accel_priv);
3445}
3446EXPORT_SYMBOL(dev_queue_xmit_accel);
3447
3448
3449
3450
3451
3452
3453int netdev_max_backlog __read_mostly = 1000;
3454EXPORT_SYMBOL(netdev_max_backlog);
3455
3456int netdev_tstamp_prequeue __read_mostly = 1;
3457int netdev_budget __read_mostly = 300;
3458int weight_p __read_mostly = 64;
3459
3460
3461static inline void ____napi_schedule(struct softnet_data *sd,
3462 struct napi_struct *napi)
3463{
3464 list_add_tail(&napi->poll_list, &sd->poll_list);
3465 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3466}
3467
3468#ifdef CONFIG_RPS
3469
3470
3471struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
3472EXPORT_SYMBOL(rps_sock_flow_table);
3473u32 rps_cpu_mask __read_mostly;
3474EXPORT_SYMBOL(rps_cpu_mask);
3475
3476struct static_key rps_needed __read_mostly;
3477EXPORT_SYMBOL(rps_needed);
3478
3479static struct rps_dev_flow *
3480set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3481 struct rps_dev_flow *rflow, u16 next_cpu)
3482{
3483 if (next_cpu < nr_cpu_ids) {
3484#ifdef CONFIG_RFS_ACCEL
3485 struct netdev_rx_queue *rxqueue;
3486 struct rps_dev_flow_table *flow_table;
3487 struct rps_dev_flow *old_rflow;
3488 u32 flow_id;
3489 u16 rxq_index;
3490 int rc;
3491
3492
3493 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
3494 !(dev->features & NETIF_F_NTUPLE))
3495 goto out;
3496 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
3497 if (rxq_index == skb_get_rx_queue(skb))
3498 goto out;
3499
3500 rxqueue = dev->_rx + rxq_index;
3501 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3502 if (!flow_table)
3503 goto out;
3504 flow_id = skb_get_hash(skb) & flow_table->mask;
3505 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
3506 rxq_index, flow_id);
3507 if (rc < 0)
3508 goto out;
3509 old_rflow = rflow;
3510 rflow = &flow_table->flows[flow_id];
3511 rflow->filter = rc;
3512 if (old_rflow->filter == rflow->filter)
3513 old_rflow->filter = RPS_NO_FILTER;
3514 out:
3515#endif
3516 rflow->last_qtail =
3517 per_cpu(softnet_data, next_cpu).input_queue_head;
3518 }
3519
3520 rflow->cpu = next_cpu;
3521 return rflow;
3522}
3523
3524
3525
3526
3527
3528
3529static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
3530 struct rps_dev_flow **rflowp)
3531{
3532 const struct rps_sock_flow_table *sock_flow_table;
3533 struct netdev_rx_queue *rxqueue = dev->_rx;
3534 struct rps_dev_flow_table *flow_table;
3535 struct rps_map *map;
3536 int cpu = -1;
3537 u32 tcpu;
3538 u32 hash;
3539
3540 if (skb_rx_queue_recorded(skb)) {
3541 u16 index = skb_get_rx_queue(skb);
3542
3543 if (unlikely(index >= dev->real_num_rx_queues)) {
3544 WARN_ONCE(dev->real_num_rx_queues > 1,
3545 "%s received packet on queue %u, but number "
3546 "of RX queues is %u\n",
3547 dev->name, index, dev->real_num_rx_queues);
3548 goto done;
3549 }
3550 rxqueue += index;
3551 }
3552
3553
3554
3555 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3556 map = rcu_dereference(rxqueue->rps_map);
3557 if (!flow_table && !map)
3558 goto done;
3559
3560 skb_reset_network_header(skb);
3561 hash = skb_get_hash(skb);
3562 if (!hash)
3563 goto done;
3564
3565 sock_flow_table = rcu_dereference(rps_sock_flow_table);
3566 if (flow_table && sock_flow_table) {
3567 struct rps_dev_flow *rflow;
3568 u32 next_cpu;
3569 u32 ident;
3570
3571
3572 ident = sock_flow_table->ents[hash & sock_flow_table->mask];
3573 if ((ident ^ hash) & ~rps_cpu_mask)
3574 goto try_rps;
3575
3576 next_cpu = ident & rps_cpu_mask;
3577
3578
3579
3580
3581 rflow = &flow_table->flows[hash & flow_table->mask];
3582 tcpu = rflow->cpu;
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595 if (unlikely(tcpu != next_cpu) &&
3596 (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
3597 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
3598 rflow->last_qtail)) >= 0)) {
3599 tcpu = next_cpu;
3600 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
3601 }
3602
3603 if (tcpu < nr_cpu_ids && cpu_online(tcpu)) {
3604 *rflowp = rflow;
3605 cpu = tcpu;
3606 goto done;
3607 }
3608 }
3609
3610try_rps:
3611
3612 if (map) {
3613 tcpu = map->cpus[reciprocal_scale(hash, map->len)];
3614 if (cpu_online(tcpu)) {
3615 cpu = tcpu;
3616 goto done;
3617 }
3618 }
3619
3620done:
3621 return cpu;
3622}
3623
3624#ifdef CONFIG_RFS_ACCEL
3625
3626
3627
3628
3629
3630
3631
3632
3633
3634
3635
3636
3637bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
3638 u32 flow_id, u16 filter_id)
3639{
3640 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
3641 struct rps_dev_flow_table *flow_table;
3642 struct rps_dev_flow *rflow;
3643 bool expire = true;
3644 unsigned int cpu;
3645
3646 rcu_read_lock();
3647 flow_table = rcu_dereference(rxqueue->rps_flow_table);
3648 if (flow_table && flow_id <= flow_table->mask) {
3649 rflow = &flow_table->flows[flow_id];
3650 cpu = ACCESS_ONCE(rflow->cpu);
3651 if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
3652 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
3653 rflow->last_qtail) <
3654 (int)(10 * flow_table->mask)))
3655 expire = false;
3656 }
3657 rcu_read_unlock();
3658 return expire;
3659}
3660EXPORT_SYMBOL(rps_may_expire_flow);
3661
3662#endif
3663
3664
3665static void rps_trigger_softirq(void *data)
3666{
3667 struct softnet_data *sd = data;
3668
3669 ____napi_schedule(sd, &sd->backlog);
3670 sd->received_rps++;
3671}
3672
3673#endif
3674
3675
3676
3677
3678
3679
3680static int rps_ipi_queued(struct softnet_data *sd)
3681{
3682#ifdef CONFIG_RPS
3683 struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
3684
3685 if (sd != mysd) {
3686 sd->rps_ipi_next = mysd->rps_ipi_list;
3687 mysd->rps_ipi_list = sd;
3688
3689 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3690 return 1;
3691 }
3692#endif
3693 return 0;
3694}
3695
3696#ifdef CONFIG_NET_FLOW_LIMIT
3697int netdev_flow_limit_table_len __read_mostly = (1 << 12);
3698#endif
3699
3700static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
3701{
3702#ifdef CONFIG_NET_FLOW_LIMIT
3703 struct sd_flow_limit *fl;
3704 struct softnet_data *sd;
3705 unsigned int old_flow, new_flow;
3706
3707 if (qlen < (netdev_max_backlog >> 1))
3708 return false;
3709
3710 sd = this_cpu_ptr(&softnet_data);
3711
3712 rcu_read_lock();
3713 fl = rcu_dereference(sd->flow_limit);
3714 if (fl) {
3715 new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
3716 old_flow = fl->history[fl->history_head];
3717 fl->history[fl->history_head] = new_flow;
3718
3719 fl->history_head++;
3720 fl->history_head &= FLOW_LIMIT_HISTORY - 1;
3721
3722 if (likely(fl->buckets[old_flow]))
3723 fl->buckets[old_flow]--;
3724
3725 if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
3726 fl->count++;
3727 rcu_read_unlock();
3728 return true;
3729 }
3730 }
3731 rcu_read_unlock();
3732#endif
3733 return false;
3734}
3735
3736
3737
3738
3739
3740static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3741 unsigned int *qtail)
3742{
3743 struct softnet_data *sd;
3744 unsigned long flags;
3745 unsigned int qlen;
3746
3747 sd = &per_cpu(softnet_data, cpu);
3748
3749 local_irq_save(flags);
3750
3751 rps_lock(sd);
3752 if (!netif_running(skb->dev))
3753 goto drop;
3754 qlen = skb_queue_len(&sd->input_pkt_queue);
3755 if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
3756 if (qlen) {
3757enqueue:
3758 __skb_queue_tail(&sd->input_pkt_queue, skb);
3759 input_queue_tail_incr_save(sd, qtail);
3760 rps_unlock(sd);
3761 local_irq_restore(flags);
3762 return NET_RX_SUCCESS;
3763 }
3764
3765
3766
3767
3768 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3769 if (!rps_ipi_queued(sd))
3770 ____napi_schedule(sd, &sd->backlog);
3771 }
3772 goto enqueue;
3773 }
3774
3775drop:
3776 sd->dropped++;
3777 rps_unlock(sd);
3778
3779 local_irq_restore(flags);
3780
3781 atomic_long_inc(&skb->dev->rx_dropped);
3782 kfree_skb(skb);
3783 return NET_RX_DROP;
3784}
3785
3786static int netif_rx_internal(struct sk_buff *skb)
3787{
3788 int ret;
3789
3790 net_timestamp_check(netdev_tstamp_prequeue, skb);
3791
3792 trace_netif_rx(skb);
3793#ifdef CONFIG_RPS
3794 if (static_key_false(&rps_needed)) {
3795 struct rps_dev_flow voidflow, *rflow = &voidflow;
3796 int cpu;
3797
3798 preempt_disable();
3799 rcu_read_lock();
3800
3801 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3802 if (cpu < 0)
3803 cpu = smp_processor_id();
3804
3805 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3806
3807 rcu_read_unlock();
3808 preempt_enable();
3809 } else
3810#endif
3811 {
3812 unsigned int qtail;
3813 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3814 put_cpu();
3815 }
3816 return ret;
3817}
3818
3819
3820
3821
3822
3823
3824
3825
3826
3827
3828
3829
3830
3831
3832
3833
3834int netif_rx(struct sk_buff *skb)
3835{
3836 trace_netif_rx_entry(skb);
3837
3838 return netif_rx_internal(skb);
3839}
3840EXPORT_SYMBOL(netif_rx);
3841
3842int netif_rx_ni(struct sk_buff *skb)
3843{
3844 int err;
3845
3846 trace_netif_rx_ni_entry(skb);
3847
3848 preempt_disable();
3849 err = netif_rx_internal(skb);
3850 if (local_softirq_pending())
3851 do_softirq();
3852 preempt_enable();
3853
3854 return err;
3855}
3856EXPORT_SYMBOL(netif_rx_ni);
3857
3858static void net_tx_action(struct softirq_action *h)
3859{
3860 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
3861
3862 if (sd->completion_queue) {
3863 struct sk_buff *clist;
3864
3865 local_irq_disable();
3866 clist = sd->completion_queue;
3867 sd->completion_queue = NULL;
3868 local_irq_enable();
3869
3870 while (clist) {
3871 struct sk_buff *skb = clist;
3872 clist = clist->next;
3873
3874 WARN_ON(atomic_read(&skb->users));
3875 if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
3876 trace_consume_skb(skb);
3877 else
3878 trace_kfree_skb(skb, net_tx_action);
3879
3880 if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
3881 __kfree_skb(skb);
3882 else
3883 __kfree_skb_defer(skb);
3884 }
3885
3886 __kfree_skb_flush();
3887 }
3888
3889 if (sd->output_queue) {
3890 struct Qdisc *head;
3891
3892 local_irq_disable();
3893 head = sd->output_queue;
3894 sd->output_queue = NULL;
3895 sd->output_queue_tailp = &sd->output_queue;
3896 local_irq_enable();
3897
3898 while (head) {
3899 struct Qdisc *q = head;
3900 spinlock_t *root_lock;
3901
3902 head = head->next_sched;
3903
3904 root_lock = qdisc_lock(q);
3905 spin_lock(root_lock);
3906
3907
3908
3909 smp_mb__before_atomic();
3910 clear_bit(__QDISC_STATE_SCHED, &q->state);
3911 qdisc_run(q);
3912 spin_unlock(root_lock);
3913 }
3914 }
3915}
3916
3917#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3918 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3919
3920int (*br_fdb_test_addr_hook)(struct net_device *dev,
3921 unsigned char *addr) __read_mostly;
3922EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3923#endif
3924
3925static inline struct sk_buff *
3926sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
3927 struct net_device *orig_dev)
3928{
3929#ifdef CONFIG_NET_CLS_ACT
3930 struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
3931 struct tcf_result cl_res;
3932
3933
3934
3935
3936
3937
3938 if (!cl)
3939 return skb;
3940 if (*pt_prev) {
3941 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3942 *pt_prev = NULL;
3943 }
3944
3945 qdisc_skb_cb(skb)->pkt_len = skb->len;
3946 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3947 qdisc_bstats_cpu_update(cl->q, skb);
3948
3949 switch (tc_classify(skb, cl, &cl_res, false)) {
3950 case TC_ACT_OK:
3951 case TC_ACT_RECLASSIFY:
3952 skb->tc_index = TC_H_MIN(cl_res.classid);
3953 break;
3954 case TC_ACT_SHOT:
3955 qdisc_qstats_cpu_drop(cl->q);
3956 kfree_skb(skb);
3957 return NULL;
3958 case TC_ACT_STOLEN:
3959 case TC_ACT_QUEUED:
3960 consume_skb(skb);
3961 return NULL;
3962 case TC_ACT_REDIRECT:
3963
3964
3965
3966
3967 __skb_push(skb, skb->mac_len);
3968 skb_do_redirect(skb);
3969 return NULL;
3970 default:
3971 break;
3972 }
3973#endif
3974 return skb;
3975}
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986bool netdev_is_rx_handler_busy(struct net_device *dev)
3987{
3988 ASSERT_RTNL();
3989 return dev && rtnl_dereference(dev->rx_handler);
3990}
3991EXPORT_SYMBOL_GPL(netdev_is_rx_handler_busy);
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007int netdev_rx_handler_register(struct net_device *dev,
4008 rx_handler_func_t *rx_handler,
4009 void *rx_handler_data)
4010{
4011 ASSERT_RTNL();
4012
4013 if (dev->rx_handler)
4014 return -EBUSY;
4015
4016
4017 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
4018 rcu_assign_pointer(dev->rx_handler, rx_handler);
4019
4020 return 0;
4021}
4022EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032void netdev_rx_handler_unregister(struct net_device *dev)
4033{
4034
4035 ASSERT_RTNL();
4036 RCU_INIT_POINTER(dev->rx_handler, NULL);
4037
4038
4039
4040
4041 synchronize_net();
4042 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
4043}
4044EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
4045
4046
4047
4048
4049
4050static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
4051{
4052 switch (skb->protocol) {
4053 case htons(ETH_P_ARP):
4054 case htons(ETH_P_IP):
4055 case htons(ETH_P_IPV6):
4056 case htons(ETH_P_8021Q):
4057 case htons(ETH_P_8021AD):
4058 return true;
4059 default:
4060 return false;
4061 }
4062}
4063
4064static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
4065 int *ret, struct net_device *orig_dev)
4066{
4067#ifdef CONFIG_NETFILTER_INGRESS
4068 if (nf_hook_ingress_active(skb)) {
4069 if (*pt_prev) {
4070 *ret = deliver_skb(skb, *pt_prev, orig_dev);
4071 *pt_prev = NULL;
4072 }
4073
4074 return nf_hook_ingress(skb);
4075 }
4076#endif
4077 return 0;
4078}
4079
4080static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
4081{
4082 struct packet_type *ptype, *pt_prev;
4083 rx_handler_func_t *rx_handler;
4084 struct net_device *orig_dev;
4085 bool deliver_exact = false;
4086 int ret = NET_RX_DROP;
4087 __be16 type;
4088
4089 net_timestamp_check(!netdev_tstamp_prequeue, skb);
4090
4091 trace_netif_receive_skb(skb);
4092
4093 orig_dev = skb->dev;
4094
4095 skb_reset_network_header(skb);
4096 if (!skb_transport_header_was_set(skb))
4097 skb_reset_transport_header(skb);
4098 skb_reset_mac_len(skb);
4099
4100 pt_prev = NULL;
4101
4102another_round:
4103 skb->skb_iif = skb->dev->ifindex;
4104
4105 __this_cpu_inc(softnet_data.processed);
4106
4107 if (skb->protocol == cpu_to_be16(ETH_P_8021Q) ||
4108 skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
4109 skb = skb_vlan_untag(skb);
4110 if (unlikely(!skb))
4111 goto out;
4112 }
4113
4114#ifdef CONFIG_NET_CLS_ACT
4115 if (skb->tc_verd & TC_NCLS) {
4116 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
4117 goto ncls;
4118 }
4119#endif
4120
4121 if (pfmemalloc)
4122 goto skip_taps;
4123
4124 list_for_each_entry_rcu(ptype, &ptype_all, list) {
4125 if (pt_prev)
4126 ret = deliver_skb(skb, pt_prev, orig_dev);
4127 pt_prev = ptype;
4128 }
4129
4130 list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
4131 if (pt_prev)
4132 ret = deliver_skb(skb, pt_prev, orig_dev);
4133 pt_prev = ptype;
4134 }
4135
4136skip_taps:
4137#ifdef CONFIG_NET_INGRESS
4138 if (static_key_false(&ingress_needed)) {
4139 skb = sch_handle_ingress(skb, &pt_prev, &ret, orig_dev);
4140 if (!skb)
4141 goto out;
4142
4143 if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
4144 goto out;
4145 }
4146#endif
4147#ifdef CONFIG_NET_CLS_ACT
4148 skb->tc_verd = 0;
4149ncls:
4150#endif
4151 if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
4152 goto drop;
4153
4154 if (skb_vlan_tag_present(skb)) {
4155 if (pt_prev) {
4156 ret = deliver_skb(skb, pt_prev, orig_dev);
4157 pt_prev = NULL;
4158 }
4159 if (vlan_do_receive(&skb))
4160 goto another_round;
4161 else if (unlikely(!skb))
4162 goto out;
4163 }
4164
4165 rx_handler = rcu_dereference(skb->dev->rx_handler);
4166 if (rx_handler) {
4167 if (pt_prev) {
4168 ret = deliver_skb(skb, pt_prev, orig_dev);
4169 pt_prev = NULL;
4170 }
4171 switch (rx_handler(&skb)) {
4172 case RX_HANDLER_CONSUMED:
4173 ret = NET_RX_SUCCESS;
4174 goto out;
4175 case RX_HANDLER_ANOTHER:
4176 goto another_round;
4177 case RX_HANDLER_EXACT:
4178 deliver_exact = true;
4179 case RX_HANDLER_PASS:
4180 break;
4181 default:
4182 BUG();
4183 }
4184 }
4185
4186 if (unlikely(skb_vlan_tag_present(skb))) {
4187 if (skb_vlan_tag_get_id(skb))
4188 skb->pkt_type = PACKET_OTHERHOST;
4189
4190
4191
4192
4193 skb->vlan_tci = 0;
4194 }
4195
4196 type = skb->protocol;
4197
4198
4199 if (likely(!deliver_exact)) {
4200 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4201 &ptype_base[ntohs(type) &
4202 PTYPE_HASH_MASK]);
4203 }
4204
4205 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4206 &orig_dev->ptype_specific);
4207
4208 if (unlikely(skb->dev != orig_dev)) {
4209 deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
4210 &skb->dev->ptype_specific);
4211 }
4212
4213 if (pt_prev) {
4214 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
4215 goto drop;
4216 else
4217 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
4218 } else {
4219drop:
4220 if (!deliver_exact)
4221 atomic_long_inc(&skb->dev->rx_dropped);
4222 else
4223 atomic_long_inc(&skb->dev->rx_nohandler);
4224 kfree_skb(skb);
4225
4226
4227
4228 ret = NET_RX_DROP;
4229 }
4230
4231out:
4232 return ret;
4233}
4234
4235static int __netif_receive_skb(struct sk_buff *skb)
4236{
4237 int ret;
4238
4239 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) {
4240 unsigned long pflags = current->flags;
4241
4242
4243
4244
4245
4246
4247
4248
4249
4250
4251 current->flags |= PF_MEMALLOC;
4252 ret = __netif_receive_skb_core(skb, true);
4253 tsk_restore_flags(current, pflags, PF_MEMALLOC);
4254 } else
4255 ret = __netif_receive_skb_core(skb, false);
4256
4257 return ret;
4258}
4259
4260static int netif_receive_skb_internal(struct sk_buff *skb)
4261{
4262 int ret;
4263
4264 net_timestamp_check(netdev_tstamp_prequeue, skb);
4265
4266 if (skb_defer_rx_timestamp(skb))
4267 return NET_RX_SUCCESS;
4268
4269 rcu_read_lock();
4270
4271#ifdef CONFIG_RPS
4272 if (static_key_false(&rps_needed)) {
4273 struct rps_dev_flow voidflow, *rflow = &voidflow;
4274 int cpu = get_rps_cpu(skb->dev, skb, &rflow);
4275
4276 if (cpu >= 0) {
4277 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
4278 rcu_read_unlock();
4279 return ret;
4280 }
4281 }
4282#endif
4283 ret = __netif_receive_skb(skb);
4284 rcu_read_unlock();
4285 return ret;
4286}
4287
4288
4289
4290
4291
4292
4293
4294
4295
4296
4297
4298
4299
4300
4301
4302
4303int netif_receive_skb(struct sk_buff *skb)
4304{
4305 trace_netif_receive_skb_entry(skb);
4306
4307 return netif_receive_skb_internal(skb);
4308}
4309EXPORT_SYMBOL(netif_receive_skb);
4310
4311
4312
4313
4314static void flush_backlog(void *arg)
4315{
4316 struct net_device *dev = arg;
4317 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
4318 struct sk_buff *skb, *tmp;
4319
4320 rps_lock(sd);
4321 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
4322 if (skb->dev == dev) {
4323 __skb_unlink(skb, &sd->input_pkt_queue);
4324 kfree_skb(skb);
4325 input_queue_head_incr(sd);
4326 }
4327 }
4328 rps_unlock(sd);
4329
4330 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
4331 if (skb->dev == dev) {
4332 __skb_unlink(skb, &sd->process_queue);
4333 kfree_skb(skb);
4334 input_queue_head_incr(sd);
4335 }
4336 }
4337}
4338
4339static int napi_gro_complete(struct sk_buff *skb)
4340{
4341 struct packet_offload *ptype;
4342 __be16 type = skb->protocol;
4343 struct list_head *head = &offload_base;
4344 int err = -ENOENT;
4345
4346 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
4347
4348 if (NAPI_GRO_CB(skb)->count == 1) {
4349 skb_shinfo(skb)->gso_size = 0;
4350 goto out;
4351 }
4352
4353 rcu_read_lock();
4354 list_for_each_entry_rcu(ptype, head, list) {
4355 if (ptype->type != type || !ptype->callbacks.gro_complete)
4356 continue;
4357
4358 err = ptype->callbacks.gro_complete(skb, 0);
4359 break;
4360 }
4361 rcu_read_unlock();
4362
4363 if (err) {
4364 WARN_ON(&ptype->list == head);
4365 kfree_skb(skb);
4366 return NET_RX_SUCCESS;
4367 }
4368
4369out:
4370 return netif_receive_skb_internal(skb);
4371}
4372
4373
4374
4375
4376
4377void napi_gro_flush(struct napi_struct *napi, bool flush_old)
4378{
4379 struct sk_buff *skb, *prev = NULL;
4380
4381
4382 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
4383 skb->prev = prev;
4384 prev = skb;
4385 }
4386
4387 for (skb = prev; skb; skb = prev) {
4388 skb->next = NULL;
4389
4390 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
4391 return;
4392
4393 prev = skb->prev;
4394 napi_gro_complete(skb);
4395 napi->gro_count--;
4396 }
4397
4398 napi->gro_list = NULL;
4399}
4400EXPORT_SYMBOL(napi_gro_flush);
4401
4402static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
4403{
4404 struct sk_buff *p;
4405 unsigned int maclen = skb->dev->hard_header_len;
4406 u32 hash = skb_get_hash_raw(skb);
4407
4408 for (p = napi->gro_list; p; p = p->next) {
4409 unsigned long diffs;
4410
4411 NAPI_GRO_CB(p)->flush = 0;
4412
4413 if (hash != skb_get_hash_raw(p)) {
4414 NAPI_GRO_CB(p)->same_flow = 0;
4415 continue;
4416 }
4417
4418 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
4419 diffs |= p->vlan_tci ^ skb->vlan_tci;
4420 diffs |= skb_metadata_dst_cmp(p, skb);
4421 if (maclen == ETH_HLEN)
4422 diffs |= compare_ether_header(skb_mac_header(p),
4423 skb_mac_header(skb));
4424 else if (!diffs)
4425 diffs = memcmp(skb_mac_header(p),
4426 skb_mac_header(skb),
4427 maclen);
4428 NAPI_GRO_CB(p)->same_flow = !diffs;
4429 }
4430}
4431
4432static void skb_gro_reset_offset(struct sk_buff *skb)
4433{
4434 const struct skb_shared_info *pinfo = skb_shinfo(skb);
4435 const skb_frag_t *frag0 = &pinfo->frags[0];
4436
4437 NAPI_GRO_CB(skb)->data_offset = 0;
4438 NAPI_GRO_CB(skb)->frag0 = NULL;
4439 NAPI_GRO_CB(skb)->frag0_len = 0;
4440
4441 if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
4442 pinfo->nr_frags &&
4443 !PageHighMem(skb_frag_page(frag0))) {
4444 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
4445 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
4446 }
4447}
4448
4449static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
4450{
4451 struct skb_shared_info *pinfo = skb_shinfo(skb);
4452
4453 BUG_ON(skb->end - skb->tail < grow);
4454
4455 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
4456
4457 skb->data_len -= grow;
4458 skb->tail += grow;
4459
4460 pinfo->frags[0].page_offset += grow;
4461 skb_frag_size_sub(&pinfo->frags[0], grow);
4462
4463 if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
4464 skb_frag_unref(skb, 0);
4465 memmove(pinfo->frags, pinfo->frags + 1,
4466 --pinfo->nr_frags * sizeof(pinfo->frags[0]));
4467 }
4468}
4469
4470static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4471{
4472 struct sk_buff **pp = NULL;
4473 struct packet_offload *ptype;
4474 __be16 type = skb->protocol;
4475 struct list_head *head = &offload_base;
4476 int same_flow;
4477 enum gro_result ret;
4478 int grow;
4479
4480 if (!(skb->dev->features & NETIF_F_GRO))
4481 goto normal;
4482
4483 if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
4484 goto normal;
4485
4486 gro_list_prepare(napi, skb);
4487
4488 rcu_read_lock();
4489 list_for_each_entry_rcu(ptype, head, list) {
4490 if (ptype->type != type || !ptype->callbacks.gro_receive)
4491 continue;
4492
4493 skb_set_network_header(skb, skb_gro_offset(skb));
4494 skb_reset_mac_len(skb);
4495 NAPI_GRO_CB(skb)->same_flow = 0;
4496 NAPI_GRO_CB(skb)->flush = 0;
4497 NAPI_GRO_CB(skb)->free = 0;
4498 NAPI_GRO_CB(skb)->encap_mark = 0;
4499 NAPI_GRO_CB(skb)->is_fou = 0;
4500 NAPI_GRO_CB(skb)->is_atomic = 1;
4501 NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
4502
4503
4504 switch (skb->ip_summed) {
4505 case CHECKSUM_COMPLETE:
4506 NAPI_GRO_CB(skb)->csum = skb->csum;
4507 NAPI_GRO_CB(skb)->csum_valid = 1;
4508 NAPI_GRO_CB(skb)->csum_cnt = 0;
4509 break;
4510 case CHECKSUM_UNNECESSARY:
4511 NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
4512 NAPI_GRO_CB(skb)->csum_valid = 0;
4513 break;
4514 default:
4515 NAPI_GRO_CB(skb)->csum_cnt = 0;
4516 NAPI_GRO_CB(skb)->csum_valid = 0;
4517 }
4518
4519 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
4520 break;
4521 }
4522 rcu_read_unlock();
4523
4524 if (&ptype->list == head)
4525 goto normal;
4526
4527 same_flow = NAPI_GRO_CB(skb)->same_flow;
4528 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
4529
4530 if (pp) {
4531 struct sk_buff *nskb = *pp;
4532
4533 *pp = nskb->next;
4534 nskb->next = NULL;
4535 napi_gro_complete(nskb);
4536 napi->gro_count--;
4537 }
4538
4539 if (same_flow)
4540 goto ok;
4541
4542 if (NAPI_GRO_CB(skb)->flush)
4543 goto normal;
4544
4545 if (unlikely(napi->gro_count >= MAX_GRO_SKBS)) {
4546 struct sk_buff *nskb = napi->gro_list;
4547
4548
4549 while (nskb->next) {
4550 pp = &nskb->next;
4551 nskb = *pp;
4552 }
4553 *pp = NULL;
4554 nskb->next = NULL;
4555 napi_gro_complete(nskb);
4556 } else {
4557 napi->gro_count++;
4558 }
4559 NAPI_GRO_CB(skb)->count = 1;
4560 NAPI_GRO_CB(skb)->age = jiffies;
4561 NAPI_GRO_CB(skb)->last = skb;
4562 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
4563 skb->next = napi->gro_list;
4564 napi->gro_list = skb;
4565 ret = GRO_HELD;
4566
4567pull:
4568 grow = skb_gro_offset(skb) - skb_headlen(skb);
4569 if (grow > 0)
4570 gro_pull_from_frag0(skb, grow);
4571ok:
4572 return ret;
4573
4574normal:
4575 ret = GRO_NORMAL;
4576 goto pull;
4577}
4578
4579struct packet_offload *gro_find_receive_by_type(__be16 type)
4580{
4581 struct list_head *offload_head = &offload_base;
4582 struct packet_offload *ptype;
4583
4584 list_for_each_entry_rcu(ptype, offload_head, list) {
4585 if (ptype->type != type || !ptype->callbacks.gro_receive)
4586 continue;
4587 return ptype;
4588 }
4589 return NULL;
4590}
4591EXPORT_SYMBOL(gro_find_receive_by_type);
4592
4593struct packet_offload *gro_find_complete_by_type(__be16 type)
4594{
4595 struct list_head *offload_head = &offload_base;
4596 struct packet_offload *ptype;
4597
4598 list_for_each_entry_rcu(ptype, offload_head, list) {
4599 if (ptype->type != type || !ptype->callbacks.gro_complete)
4600 continue;
4601 return ptype;
4602 }
4603 return NULL;
4604}
4605EXPORT_SYMBOL(gro_find_complete_by_type);
4606
4607static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
4608{
4609 switch (ret) {
4610 case GRO_NORMAL:
4611 if (netif_receive_skb_internal(skb))
4612 ret = GRO_DROP;
4613 break;
4614
4615 case GRO_DROP:
4616 kfree_skb(skb);
4617 break;
4618
4619 case GRO_MERGED_FREE:
4620 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
4621 skb_dst_drop(skb);
4622 kmem_cache_free(skbuff_head_cache, skb);
4623 } else {
4624 __kfree_skb(skb);
4625 }
4626 break;
4627
4628 case GRO_HELD:
4629 case GRO_MERGED:
4630 break;
4631 }
4632
4633 return ret;
4634}
4635
4636gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
4637{
4638 skb_mark_napi_id(skb, napi);
4639 trace_napi_gro_receive_entry(skb);
4640
4641 skb_gro_reset_offset(skb);
4642
4643 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
4644}
4645EXPORT_SYMBOL(napi_gro_receive);
4646
4647static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
4648{
4649 if (unlikely(skb->pfmemalloc)) {
4650 consume_skb(skb);
4651 return;
4652 }
4653 __skb_pull(skb, skb_headlen(skb));
4654
4655 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
4656 skb->vlan_tci = 0;
4657 skb->dev = napi->dev;
4658 skb->skb_iif = 0;
4659 skb->encapsulation = 0;
4660 skb_shinfo(skb)->gso_type = 0;
4661 skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
4662
4663 napi->skb = skb;
4664}
4665
4666struct sk_buff *napi_get_frags(struct napi_struct *napi)
4667{
4668 struct sk_buff *skb = napi->skb;
4669
4670 if (!skb) {
4671 skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
4672 if (skb) {
4673 napi->skb = skb;
4674 skb_mark_napi_id(skb, napi);
4675 }
4676 }
4677 return skb;
4678}
4679EXPORT_SYMBOL(napi_get_frags);
4680
4681static gro_result_t napi_frags_finish(struct napi_struct *napi,
4682 struct sk_buff *skb,
4683 gro_result_t ret)
4684{
4685 switch (ret) {
4686 case GRO_NORMAL:
4687 case GRO_HELD:
4688 __skb_push(skb, ETH_HLEN);
4689 skb->protocol = eth_type_trans(skb, skb->dev);
4690 if (ret == GRO_NORMAL && netif_receive_skb_internal(skb))
4691 ret = GRO_DROP;
4692 break;
4693
4694 case GRO_DROP:
4695 case GRO_MERGED_FREE:
4696 napi_reuse_skb(napi, skb);
4697 break;
4698
4699 case GRO_MERGED:
4700 break;
4701 }
4702
4703 return ret;
4704}
4705
4706
4707
4708
4709
4710static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
4711{
4712 struct sk_buff *skb = napi->skb;
4713 const struct ethhdr *eth;
4714 unsigned int hlen = sizeof(*eth);
4715
4716 napi->skb = NULL;
4717
4718 skb_reset_mac_header(skb);
4719 skb_gro_reset_offset(skb);
4720
4721 eth = skb_gro_header_fast(skb, 0);
4722 if (unlikely(skb_gro_header_hard(skb, hlen))) {
4723 eth = skb_gro_header_slow(skb, hlen, 0);
4724 if (unlikely(!eth)) {
4725 net_warn_ratelimited("%s: dropping impossible skb from %s\n",
4726 __func__, napi->dev->name);
4727 napi_reuse_skb(napi, skb);
4728 return NULL;
4729 }
4730 } else {
4731 gro_pull_from_frag0(skb, hlen);
4732 NAPI_GRO_CB(skb)->frag0 += hlen;
4733 NAPI_GRO_CB(skb)->frag0_len -= hlen;
4734 }
4735 __skb_pull(skb, hlen);
4736
4737
4738
4739
4740
4741
4742 skb->protocol = eth->h_proto;
4743
4744 return skb;
4745}
4746
4747gro_result_t napi_gro_frags(struct napi_struct *napi)
4748{
4749 struct sk_buff *skb = napi_frags_skb(napi);
4750
4751 if (!skb)
4752 return GRO_DROP;
4753
4754 trace_napi_gro_frags_entry(skb);
4755
4756 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
4757}
4758EXPORT_SYMBOL(napi_gro_frags);
4759
4760
4761
4762
4763__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
4764{
4765 __wsum wsum;
4766 __sum16 sum;
4767
4768 wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
4769
4770
4771 sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
4772 if (likely(!sum)) {
4773 if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
4774 !skb->csum_complete_sw)
4775 netdev_rx_csum_fault(skb->dev);
4776 }
4777
4778 NAPI_GRO_CB(skb)->csum = wsum;
4779 NAPI_GRO_CB(skb)->csum_valid = 1;
4780
4781 return sum;
4782}
4783EXPORT_SYMBOL(__skb_gro_checksum_complete);
4784
4785
4786
4787
4788
4789static void net_rps_action_and_irq_enable(struct softnet_data *sd)
4790{
4791#ifdef CONFIG_RPS
4792 struct softnet_data *remsd = sd->rps_ipi_list;
4793
4794 if (remsd) {
4795 sd->rps_ipi_list = NULL;
4796
4797 local_irq_enable();
4798
4799
4800 while (remsd) {
4801 struct softnet_data *next = remsd->rps_ipi_next;
4802
4803 if (cpu_online(remsd->cpu))
4804 smp_call_function_single_async(remsd->cpu,
4805 &remsd->csd);
4806 remsd = next;
4807 }
4808 } else
4809#endif
4810 local_irq_enable();
4811}
4812
4813static bool sd_has_rps_ipi_waiting(struct softnet_data *sd)
4814{
4815#ifdef CONFIG_RPS
4816 return sd->rps_ipi_list != NULL;
4817#else
4818 return false;
4819#endif
4820}
4821
4822static int process_backlog(struct napi_struct *napi, int quota)
4823{
4824 int work = 0;
4825 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
4826
4827
4828
4829
4830 if (sd_has_rps_ipi_waiting(sd)) {
4831 local_irq_disable();
4832 net_rps_action_and_irq_enable(sd);
4833 }
4834
4835 napi->weight = weight_p;
4836 local_irq_disable();
4837 while (1) {
4838 struct sk_buff *skb;
4839
4840 while ((skb = __skb_dequeue(&sd->process_queue))) {
4841 rcu_read_lock();
4842 local_irq_enable();
4843 __netif_receive_skb(skb);
4844 rcu_read_unlock();
4845 local_irq_disable();
4846 input_queue_head_incr(sd);
4847 if (++work >= quota) {
4848 local_irq_enable();
4849 return work;
4850 }
4851 }
4852
4853 rps_lock(sd);
4854 if (skb_queue_empty(&sd->input_pkt_queue)) {
4855
4856
4857
4858
4859
4860
4861
4862
4863 napi->state = 0;
4864 rps_unlock(sd);
4865
4866 break;
4867 }
4868
4869 skb_queue_splice_tail_init(&sd->input_pkt_queue,
4870 &sd->process_queue);
4871 rps_unlock(sd);
4872 }
4873 local_irq_enable();
4874
4875 return work;
4876}
4877
4878
4879
4880
4881
4882
4883
4884
4885void __napi_schedule(struct napi_struct *n)
4886{
4887 unsigned long flags;
4888
4889 local_irq_save(flags);
4890 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4891 local_irq_restore(flags);
4892}
4893EXPORT_SYMBOL(__napi_schedule);
4894
4895
4896
4897
4898
4899
4900
4901void __napi_schedule_irqoff(struct napi_struct *n)
4902{
4903 ____napi_schedule(this_cpu_ptr(&softnet_data), n);
4904}
4905EXPORT_SYMBOL(__napi_schedule_irqoff);
4906
4907void __napi_complete(struct napi_struct *n)
4908{
4909 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
4910
4911 list_del_init(&n->poll_list);
4912 smp_mb__before_atomic();
4913 clear_bit(NAPI_STATE_SCHED, &n->state);
4914}
4915EXPORT_SYMBOL(__napi_complete);
4916
4917void napi_complete_done(struct napi_struct *n, int work_done)
4918{
4919 unsigned long flags;
4920
4921
4922
4923
4924
4925 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
4926 return;
4927
4928 if (n->gro_list) {
4929 unsigned long timeout = 0;
4930
4931 if (work_done)
4932 timeout = n->dev->gro_flush_timeout;
4933
4934 if (timeout)
4935 hrtimer_start(&n->timer, ns_to_ktime(timeout),
4936 HRTIMER_MODE_REL_PINNED);
4937 else
4938 napi_gro_flush(n, false);
4939 }
4940 if (likely(list_empty(&n->poll_list))) {
4941 WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
4942 } else {
4943
4944 local_irq_save(flags);
4945 __napi_complete(n);
4946 local_irq_restore(flags);
4947 }
4948}
4949EXPORT_SYMBOL(napi_complete_done);
4950
4951
4952static struct napi_struct *napi_by_id(unsigned int napi_id)
4953{
4954 unsigned int hash = napi_id % HASH_SIZE(napi_hash);
4955 struct napi_struct *napi;
4956
4957 hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
4958 if (napi->napi_id == napi_id)
4959 return napi;
4960
4961 return NULL;
4962}
4963
4964#if defined(CONFIG_NET_RX_BUSY_POLL)
4965#define BUSY_POLL_BUDGET 8
4966bool sk_busy_loop(struct sock *sk, int nonblock)
4967{
4968 unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
4969 int (*busy_poll)(struct napi_struct *dev);
4970 struct napi_struct *napi;
4971 int rc = false;
4972
4973 rcu_read_lock();
4974
4975 napi = napi_by_id(sk->sk_napi_id);
4976 if (!napi)
4977 goto out;
4978
4979
4980 busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
4981
4982 do {
4983 rc = 0;
4984 local_bh_disable();
4985 if (busy_poll) {
4986 rc = busy_poll(napi);
4987 } else if (napi_schedule_prep(napi)) {
4988 void *have = netpoll_poll_lock(napi);
4989
4990 if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
4991 rc = napi->poll(napi, BUSY_POLL_BUDGET);
4992 trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
4993 if (rc == BUSY_POLL_BUDGET) {
4994 napi_complete_done(napi, rc);
4995 napi_schedule(napi);
4996 }
4997 }
4998 netpoll_poll_unlock(have);
4999 }
5000 if (rc > 0)
5001 __NET_ADD_STATS(sock_net(sk),
5002 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
5003 local_bh_enable();
5004
5005 if (rc == LL_FLUSH_FAILED)
5006 break;
5007
5008 cpu_relax();
5009 } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
5010 !need_resched() && !busy_loop_timeout(end_time));
5011
5012 rc = !skb_queue_empty(&sk->sk_receive_queue);
5013out:
5014 rcu_read_unlock();
5015 return rc;
5016}
5017EXPORT_SYMBOL(sk_busy_loop);
5018
5019#endif
5020
5021void napi_hash_add(struct napi_struct *napi)
5022{
5023 if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
5024 test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
5025 return;
5026
5027 spin_lock(&napi_hash_lock);
5028
5029
5030 do {
5031 if (unlikely(++napi_gen_id < NR_CPUS + 1))
5032 napi_gen_id = NR_CPUS + 1;
5033 } while (napi_by_id(napi_gen_id));
5034 napi->napi_id = napi_gen_id;
5035
5036 hlist_add_head_rcu(&napi->napi_hash_node,
5037 &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
5038
5039 spin_unlock(&napi_hash_lock);
5040}
5041EXPORT_SYMBOL_GPL(napi_hash_add);
5042
5043
5044
5045
5046bool napi_hash_del(struct napi_struct *napi)
5047{
5048 bool rcu_sync_needed = false;
5049
5050 spin_lock(&napi_hash_lock);
5051
5052 if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
5053 rcu_sync_needed = true;
5054 hlist_del_rcu(&napi->napi_hash_node);
5055 }
5056 spin_unlock(&napi_hash_lock);
5057 return rcu_sync_needed;
5058}
5059EXPORT_SYMBOL_GPL(napi_hash_del);
5060
5061static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
5062{
5063 struct napi_struct *napi;
5064
5065 napi = container_of(timer, struct napi_struct, timer);
5066 if (napi->gro_list)
5067 napi_schedule(napi);
5068
5069 return HRTIMER_NORESTART;
5070}
5071
5072void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
5073 int (*poll)(struct napi_struct *, int), int weight)
5074{
5075 INIT_LIST_HEAD(&napi->poll_list);
5076 hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
5077 napi->timer.function = napi_watchdog;
5078 napi->gro_count = 0;
5079 napi->gro_list = NULL;
5080 napi->skb = NULL;
5081 napi->poll = poll;
5082 if (weight > NAPI_POLL_WEIGHT)
5083 pr_err_once("netif_napi_add() called with weight %d on device %s\n",
5084 weight, dev->name);
5085 napi->weight = weight;
5086 list_add(&napi->dev_list, &dev->napi_list);
5087 napi->dev = dev;
5088#ifdef CONFIG_NETPOLL
5089 spin_lock_init(&napi->poll_lock);
5090 napi->poll_owner = -1;
5091#endif
5092 set_bit(NAPI_STATE_SCHED, &napi->state);
5093 napi_hash_add(napi);
5094}
5095EXPORT_SYMBOL(netif_napi_add);
5096
5097void napi_disable(struct napi_struct *n)
5098{
5099 might_sleep();
5100 set_bit(NAPI_STATE_DISABLE, &n->state);
5101
5102 while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
5103 msleep(1);
5104 while (test_and_set_bit(NAPI_STATE_NPSVC, &n->state))
5105 msleep(1);
5106
5107 hrtimer_cancel(&n->timer);
5108
5109 clear_bit(NAPI_STATE_DISABLE, &n->state);
5110}
5111EXPORT_SYMBOL(napi_disable);
5112
5113
5114void netif_napi_del(struct napi_struct *napi)
5115{
5116 might_sleep();
5117 if (napi_hash_del(napi))
5118 synchronize_net();
5119 list_del_init(&napi->dev_list);
5120 napi_free_frags(napi);
5121
5122 kfree_skb_list(napi->gro_list);
5123 napi->gro_list = NULL;
5124 napi->gro_count = 0;
5125}
5126EXPORT_SYMBOL(netif_napi_del);
5127
5128static int napi_poll(struct napi_struct *n, struct list_head *repoll)
5129{
5130 void *have;
5131 int work, weight;
5132
5133 list_del_init(&n->poll_list);
5134
5135 have = netpoll_poll_lock(n);
5136
5137 weight = n->weight;
5138
5139
5140
5141
5142
5143
5144
5145 work = 0;
5146 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
5147 work = n->poll(n, weight);
5148 trace_napi_poll(n, work, weight);
5149 }
5150
5151 WARN_ON_ONCE(work > weight);
5152
5153 if (likely(work < weight))
5154 goto out_unlock;
5155
5156
5157
5158
5159
5160
5161 if (unlikely(napi_disable_pending(n))) {
5162 napi_complete(n);
5163 goto out_unlock;
5164 }
5165
5166 if (n->gro_list) {
5167
5168
5169
5170 napi_gro_flush(n, HZ >= 1000);
5171 }
5172
5173
5174
5175
5176 if (unlikely(!list_empty(&n->poll_list))) {
5177 pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
5178 n->dev ? n->dev->name : "backlog");
5179 goto out_unlock;
5180 }
5181
5182 list_add_tail(&n->poll_list, repoll);
5183
5184out_unlock:
5185 netpoll_poll_unlock(have);
5186
5187 return work;
5188}
5189
5190static void net_rx_action(struct softirq_action *h)
5191{
5192 struct softnet_data *sd = this_cpu_ptr(&softnet_data);
5193 unsigned long time_limit = jiffies + 2;
5194 int budget = netdev_budget;
5195 LIST_HEAD(list);
5196 LIST_HEAD(repoll);
5197
5198 local_irq_disable();
5199 list_splice_init(&sd->poll_list, &list);
5200 local_irq_enable();
5201
5202 for (;;) {
5203 struct napi_struct *n;
5204
5205 if (list_empty(&list)) {
5206 if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
5207 return;
5208 break;
5209 }
5210
5211 n = list_first_entry(&list, struct napi_struct, poll_list);
5212 budget -= napi_poll(n, &repoll);
5213
5214
5215
5216
5217
5218 if (unlikely(budget <= 0 ||
5219 time_after_eq(jiffies, time_limit))) {
5220 sd->time_squeeze++;
5221 break;
5222 }
5223 }
5224
5225 __kfree_skb_flush();
5226 local_irq_disable();
5227
5228 list_splice_tail_init(&sd->poll_list, &list);
5229 list_splice_tail(&repoll, &list);
5230 list_splice(&list, &sd->poll_list);
5231 if (!list_empty(&sd->poll_list))
5232 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
5233
5234 net_rps_action_and_irq_enable(sd);
5235}
5236
5237struct netdev_adjacent {
5238 struct net_device *dev;
5239
5240
5241 bool master;
5242
5243
5244 u16 ref_nr;
5245
5246
5247 void *private;
5248
5249 struct list_head list;
5250 struct rcu_head rcu;
5251};
5252
5253static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
5254 struct list_head *adj_list)
5255{
5256 struct netdev_adjacent *adj;
5257
5258 list_for_each_entry(adj, adj_list, list) {
5259 if (adj->dev == adj_dev)
5260 return adj;
5261 }
5262 return NULL;
5263}
5264
5265
5266
5267
5268
5269
5270
5271
5272
5273
5274bool netdev_has_upper_dev(struct net_device *dev,
5275 struct net_device *upper_dev)
5276{
5277 ASSERT_RTNL();
5278
5279 return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
5280}
5281EXPORT_SYMBOL(netdev_has_upper_dev);
5282
5283
5284
5285
5286
5287
5288
5289
5290static bool netdev_has_any_upper_dev(struct net_device *dev)
5291{
5292 ASSERT_RTNL();
5293
5294 return !list_empty(&dev->all_adj_list.upper);
5295}
5296
5297
5298
5299
5300
5301
5302
5303
5304struct net_device *netdev_master_upper_dev_get(struct net_device *dev)
5305{
5306 struct netdev_adjacent *upper;
5307
5308 ASSERT_RTNL();
5309
5310 if (list_empty(&dev->adj_list.upper))
5311 return NULL;
5312
5313 upper = list_first_entry(&dev->adj_list.upper,
5314 struct netdev_adjacent, list);
5315 if (likely(upper->master))
5316 return upper->dev;
5317 return NULL;
5318}
5319EXPORT_SYMBOL(netdev_master_upper_dev_get);
5320
5321void *netdev_adjacent_get_private(struct list_head *adj_list)
5322{
5323 struct netdev_adjacent *adj;
5324
5325 adj = list_entry(adj_list, struct netdev_adjacent, list);
5326
5327 return adj->private;
5328}
5329EXPORT_SYMBOL(netdev_adjacent_get_private);
5330
5331
5332
5333
5334
5335
5336
5337
5338
5339struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev,
5340 struct list_head **iter)
5341{
5342 struct netdev_adjacent *upper;
5343
5344 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5345
5346 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5347
5348 if (&upper->list == &dev->adj_list.upper)
5349 return NULL;
5350
5351 *iter = &upper->list;
5352
5353 return upper->dev;
5354}
5355EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu);
5356
5357
5358
5359
5360
5361
5362
5363
5364
5365struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev,
5366 struct list_head **iter)
5367{
5368 struct netdev_adjacent *upper;
5369
5370 WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held());
5371
5372 upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5373
5374 if (&upper->list == &dev->all_adj_list.upper)
5375 return NULL;
5376
5377 *iter = &upper->list;
5378
5379 return upper->dev;
5380}
5381EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
5382
5383
5384
5385
5386
5387
5388
5389
5390
5391
5392
5393
5394void *netdev_lower_get_next_private(struct net_device *dev,
5395 struct list_head **iter)
5396{
5397 struct netdev_adjacent *lower;
5398
5399 lower = list_entry(*iter, struct netdev_adjacent, list);
5400
5401 if (&lower->list == &dev->adj_list.lower)
5402 return NULL;
5403
5404 *iter = lower->list.next;
5405
5406 return lower->private;
5407}
5408EXPORT_SYMBOL(netdev_lower_get_next_private);
5409
5410
5411
5412
5413
5414
5415
5416
5417
5418
5419
5420void *netdev_lower_get_next_private_rcu(struct net_device *dev,
5421 struct list_head **iter)
5422{
5423 struct netdev_adjacent *lower;
5424
5425 WARN_ON_ONCE(!rcu_read_lock_held());
5426
5427 lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list);
5428
5429 if (&lower->list == &dev->adj_list.lower)
5430 return NULL;
5431
5432 *iter = &lower->list;
5433
5434 return lower->private;
5435}
5436EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
5437
5438
5439
5440
5441
5442
5443
5444
5445
5446
5447
5448
5449void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
5450{
5451 struct netdev_adjacent *lower;
5452
5453 lower = list_entry(*iter, struct netdev_adjacent, list);
5454
5455 if (&lower->list == &dev->adj_list.lower)
5456 return NULL;
5457
5458 *iter = lower->list.next;
5459
5460 return lower->dev;
5461}
5462EXPORT_SYMBOL(netdev_lower_get_next);
5463
5464
5465
5466
5467
5468
5469
5470
5471
5472
5473
5474struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
5475{
5476 struct netdev_adjacent *lower;
5477
5478 lower = list_entry(*iter, struct netdev_adjacent, list);
5479
5480 if (&lower->list == &dev->all_adj_list.lower)
5481 return NULL;
5482
5483 *iter = lower->list.next;
5484
5485 return lower->dev;
5486}
5487EXPORT_SYMBOL(netdev_all_lower_get_next);
5488
5489
5490
5491
5492
5493
5494
5495
5496
5497
5498struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
5499 struct list_head **iter)
5500{
5501 struct netdev_adjacent *lower;
5502
5503 lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
5504 struct netdev_adjacent, list);
5505
5506 return lower ? lower->dev : NULL;
5507}
5508EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
5509
5510
5511
5512
5513
5514
5515
5516
5517
5518
5519void *netdev_lower_get_first_private_rcu(struct net_device *dev)
5520{
5521 struct netdev_adjacent *lower;
5522
5523 lower = list_first_or_null_rcu(&dev->adj_list.lower,
5524 struct netdev_adjacent, list);
5525 if (lower)
5526 return lower->private;
5527 return NULL;
5528}
5529EXPORT_SYMBOL(netdev_lower_get_first_private_rcu);
5530
5531
5532
5533
5534
5535
5536
5537
5538struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
5539{
5540 struct netdev_adjacent *upper;
5541
5542 upper = list_first_or_null_rcu(&dev->adj_list.upper,
5543 struct netdev_adjacent, list);
5544 if (upper && likely(upper->master))
5545 return upper->dev;
5546 return NULL;
5547}
5548EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
5549
5550static int netdev_adjacent_sysfs_add(struct net_device *dev,
5551 struct net_device *adj_dev,
5552 struct list_head *dev_list)
5553{
5554 char linkname[IFNAMSIZ+7];
5555 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5556 "upper_%s" : "lower_%s", adj_dev->name);
5557 return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
5558 linkname);
5559}
5560static void netdev_adjacent_sysfs_del(struct net_device *dev,
5561 char *name,
5562 struct list_head *dev_list)
5563{
5564 char linkname[IFNAMSIZ+7];
5565 sprintf(linkname, dev_list == &dev->adj_list.upper ?
5566 "upper_%s" : "lower_%s", name);
5567 sysfs_remove_link(&(dev->dev.kobj), linkname);
5568}
5569
5570static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev,
5571 struct net_device *adj_dev,
5572 struct list_head *dev_list)
5573{
5574 return (dev_list == &dev->adj_list.upper ||
5575 dev_list == &dev->adj_list.lower) &&
5576 net_eq(dev_net(dev), dev_net(adj_dev));
5577}
5578
5579static int __netdev_adjacent_dev_insert(struct net_device *dev,
5580 struct net_device *adj_dev,
5581 struct list_head *dev_list,
5582 void *private, bool master)
5583{
5584 struct netdev_adjacent *adj;
5585 int ret;
5586
5587 adj = __netdev_find_adj(adj_dev, dev_list);
5588
5589 if (adj) {
5590 adj->ref_nr++;
5591 return 0;
5592 }
5593
5594 adj = kmalloc(sizeof(*adj), GFP_KERNEL);
5595 if (!adj)
5596 return -ENOMEM;
5597
5598 adj->dev = adj_dev;
5599 adj->master = master;
5600 adj->ref_nr = 1;
5601 adj->private = private;
5602 dev_hold(adj_dev);
5603
5604 pr_debug("dev_hold for %s, because of link added from %s to %s\n",
5605 adj_dev->name, dev->name, adj_dev->name);
5606
5607 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) {
5608 ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list);
5609 if (ret)
5610 goto free_adj;
5611 }
5612
5613
5614 if (master) {
5615 ret = sysfs_create_link(&(dev->dev.kobj),
5616 &(adj_dev->dev.kobj), "master");
5617 if (ret)
5618 goto remove_symlinks;
5619
5620 list_add_rcu(&adj->list, dev_list);
5621 } else {
5622 list_add_tail_rcu(&adj->list, dev_list);
5623 }
5624
5625 return 0;
5626
5627remove_symlinks:
5628 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5629 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5630free_adj:
5631 kfree(adj);
5632 dev_put(adj_dev);
5633
5634 return ret;
5635}
5636
5637static void __netdev_adjacent_dev_remove(struct net_device *dev,
5638 struct net_device *adj_dev,
5639 struct list_head *dev_list)
5640{
5641 struct netdev_adjacent *adj;
5642
5643 adj = __netdev_find_adj(adj_dev, dev_list);
5644
5645 if (!adj) {
5646 pr_err("tried to remove device %s from %s\n",
5647 dev->name, adj_dev->name);
5648 BUG();
5649 }
5650
5651 if (adj->ref_nr > 1) {
5652 pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name,
5653 adj->ref_nr-1);
5654 adj->ref_nr--;
5655 return;
5656 }
5657
5658 if (adj->master)
5659 sysfs_remove_link(&(dev->dev.kobj), "master");
5660
5661 if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
5662 netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
5663
5664 list_del_rcu(&adj->list);
5665 pr_debug("dev_put for %s, because link removed from %s to %s\n",
5666 adj_dev->name, dev->name, adj_dev->name);
5667 dev_put(adj_dev);
5668 kfree_rcu(adj, rcu);
5669}
5670
5671static int __netdev_adjacent_dev_link_lists(struct net_device *dev,
5672 struct net_device *upper_dev,
5673 struct list_head *up_list,
5674 struct list_head *down_list,
5675 void *private, bool master)
5676{
5677 int ret;
5678
5679 ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private,
5680 master);
5681 if (ret)
5682 return ret;
5683
5684 ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private,
5685 false);
5686 if (ret) {
5687 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5688 return ret;
5689 }
5690
5691 return 0;
5692}
5693
5694static int __netdev_adjacent_dev_link(struct net_device *dev,
5695 struct net_device *upper_dev)
5696{
5697 return __netdev_adjacent_dev_link_lists(dev, upper_dev,
5698 &dev->all_adj_list.upper,
5699 &upper_dev->all_adj_list.lower,
5700 NULL, false);
5701}
5702
5703static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev,
5704 struct net_device *upper_dev,
5705 struct list_head *up_list,
5706 struct list_head *down_list)
5707{
5708 __netdev_adjacent_dev_remove(dev, upper_dev, up_list);
5709 __netdev_adjacent_dev_remove(upper_dev, dev, down_list);
5710}
5711
5712static void __netdev_adjacent_dev_unlink(struct net_device *dev,
5713 struct net_device *upper_dev)
5714{
5715 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5716 &dev->all_adj_list.upper,
5717 &upper_dev->all_adj_list.lower);
5718}
5719
5720static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev,
5721 struct net_device *upper_dev,
5722 void *private, bool master)
5723{
5724 int ret = __netdev_adjacent_dev_link(dev, upper_dev);
5725
5726 if (ret)
5727 return ret;
5728
5729 ret = __netdev_adjacent_dev_link_lists(dev, upper_dev,
5730 &dev->adj_list.upper,
5731 &upper_dev->adj_list.lower,
5732 private, master);
5733 if (ret) {
5734 __netdev_adjacent_dev_unlink(dev, upper_dev);
5735 return ret;
5736 }
5737
5738 return 0;
5739}
5740
5741static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
5742 struct net_device *upper_dev)
5743{
5744 __netdev_adjacent_dev_unlink(dev, upper_dev);
5745 __netdev_adjacent_dev_unlink_lists(dev, upper_dev,
5746 &dev->adj_list.upper,
5747 &upper_dev->adj_list.lower);
5748}
5749
5750static int __netdev_upper_dev_link(struct net_device *dev,
5751 struct net_device *upper_dev, bool master,
5752 void *upper_priv, void *upper_info)
5753{
5754 struct netdev_notifier_changeupper_info changeupper_info;
5755 struct netdev_adjacent *i, *j, *to_i, *to_j;
5756 int ret = 0;
5757
5758 ASSERT_RTNL();
5759
5760 if (dev == upper_dev)
5761 return -EBUSY;
5762
5763
5764 if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
5765 return -EBUSY;
5766
5767 if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
5768 return -EEXIST;
5769
5770 if (master && netdev_master_upper_dev_get(dev))
5771 return -EBUSY;
5772
5773 changeupper_info.upper_dev = upper_dev;
5774 changeupper_info.master = master;
5775 changeupper_info.linking = true;
5776 changeupper_info.upper_info = upper_info;
5777
5778 ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5779 &changeupper_info.info);
5780 ret = notifier_to_errno(ret);
5781 if (ret)
5782 return ret;
5783
5784 ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
5785 master);
5786 if (ret)
5787 return ret;
5788
5789
5790
5791
5792
5793
5794 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5795 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5796 pr_debug("Interlinking %s with %s, non-neighbour\n",
5797 i->dev->name, j->dev->name);
5798 ret = __netdev_adjacent_dev_link(i->dev, j->dev);
5799 if (ret)
5800 goto rollback_mesh;
5801 }
5802 }
5803
5804
5805 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5806 pr_debug("linking %s's upper device %s with %s\n",
5807 upper_dev->name, i->dev->name, dev->name);
5808 ret = __netdev_adjacent_dev_link(dev, i->dev);
5809 if (ret)
5810 goto rollback_upper_mesh;
5811 }
5812
5813
5814 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5815 pr_debug("linking %s's lower device %s with %s\n", dev->name,
5816 i->dev->name, upper_dev->name);
5817 ret = __netdev_adjacent_dev_link(i->dev, upper_dev);
5818 if (ret)
5819 goto rollback_lower_mesh;
5820 }
5821
5822 ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5823 &changeupper_info.info);
5824 ret = notifier_to_errno(ret);
5825 if (ret)
5826 goto rollback_lower_mesh;
5827
5828 return 0;
5829
5830rollback_lower_mesh:
5831 to_i = i;
5832 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5833 if (i == to_i)
5834 break;
5835 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5836 }
5837
5838 i = NULL;
5839
5840rollback_upper_mesh:
5841 to_i = i;
5842 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) {
5843 if (i == to_i)
5844 break;
5845 __netdev_adjacent_dev_unlink(dev, i->dev);
5846 }
5847
5848 i = j = NULL;
5849
5850rollback_mesh:
5851 to_i = i;
5852 to_j = j;
5853 list_for_each_entry(i, &dev->all_adj_list.lower, list) {
5854 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) {
5855 if (i == to_i && j == to_j)
5856 break;
5857 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5858 }
5859 if (i == to_i)
5860 break;
5861 }
5862
5863 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5864
5865 return ret;
5866}
5867
5868
5869
5870
5871
5872
5873
5874
5875
5876
5877
5878int netdev_upper_dev_link(struct net_device *dev,
5879 struct net_device *upper_dev)
5880{
5881 return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
5882}
5883EXPORT_SYMBOL(netdev_upper_dev_link);
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897
5898int netdev_master_upper_dev_link(struct net_device *dev,
5899 struct net_device *upper_dev,
5900 void *upper_priv, void *upper_info)
5901{
5902 return __netdev_upper_dev_link(dev, upper_dev, true,
5903 upper_priv, upper_info);
5904}
5905EXPORT_SYMBOL(netdev_master_upper_dev_link);
5906
5907
5908
5909
5910
5911
5912
5913
5914
5915void netdev_upper_dev_unlink(struct net_device *dev,
5916 struct net_device *upper_dev)
5917{
5918 struct netdev_notifier_changeupper_info changeupper_info;
5919 struct netdev_adjacent *i, *j;
5920 ASSERT_RTNL();
5921
5922 changeupper_info.upper_dev = upper_dev;
5923 changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
5924 changeupper_info.linking = false;
5925
5926 call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
5927 &changeupper_info.info);
5928
5929 __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
5930
5931
5932
5933
5934
5935 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5936 list_for_each_entry(j, &upper_dev->all_adj_list.upper, list)
5937 __netdev_adjacent_dev_unlink(i->dev, j->dev);
5938
5939
5940
5941
5942 list_for_each_entry(i, &dev->all_adj_list.lower, list)
5943 __netdev_adjacent_dev_unlink(i->dev, upper_dev);
5944
5945 list_for_each_entry(i, &upper_dev->all_adj_list.upper, list)
5946 __netdev_adjacent_dev_unlink(dev, i->dev);
5947
5948 call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
5949 &changeupper_info.info);
5950}
5951EXPORT_SYMBOL(netdev_upper_dev_unlink);
5952
5953
5954
5955
5956
5957
5958
5959
5960
5961void netdev_bonding_info_change(struct net_device *dev,
5962 struct netdev_bonding_info *bonding_info)
5963{
5964 struct netdev_notifier_bonding_info info;
5965
5966 memcpy(&info.bonding_info, bonding_info,
5967 sizeof(struct netdev_bonding_info));
5968 call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
5969 &info.info);
5970}
5971EXPORT_SYMBOL(netdev_bonding_info_change);
5972
5973static void netdev_adjacent_add_links(struct net_device *dev)
5974{
5975 struct netdev_adjacent *iter;
5976
5977 struct net *net = dev_net(dev);
5978
5979 list_for_each_entry(iter, &dev->adj_list.upper, list) {
5980 if (!net_eq(net, dev_net(iter->dev)))
5981 continue;
5982 netdev_adjacent_sysfs_add(iter->dev, dev,
5983 &iter->dev->adj_list.lower);
5984 netdev_adjacent_sysfs_add(dev, iter->dev,
5985 &dev->adj_list.upper);
5986 }
5987
5988 list_for_each_entry(iter, &dev->adj_list.lower, list) {
5989 if (!net_eq(net, dev_net(iter->dev)))
5990 continue;
5991 netdev_adjacent_sysfs_add(iter->dev, dev,
5992 &iter->dev->adj_list.upper);
5993 netdev_adjacent_sysfs_add(dev, iter->dev,
5994 &dev->adj_list.lower);
5995 }
5996}
5997
5998static void netdev_adjacent_del_links(struct net_device *dev)
5999{
6000 struct netdev_adjacent *iter;
6001
6002 struct net *net = dev_net(dev);
6003
6004 list_for_each_entry(iter, &dev->adj_list.upper, list) {
6005 if (!net_eq(net, dev_net(iter->dev)))
6006 continue;
6007 netdev_adjacent_sysfs_del(iter->dev, dev->name,
6008 &iter->dev->adj_list.lower);
6009 netdev_adjacent_sysfs_del(dev, iter->dev->name,
6010 &dev->adj_list.upper);
6011 }
6012
6013 list_for_each_entry(iter, &dev->adj_list.lower, list) {
6014 if (!net_eq(net, dev_net(iter->dev)))
6015 continue;
6016 netdev_adjacent_sysfs_del(iter->dev, dev->name,
6017 &iter->dev->adj_list.upper);
6018 netdev_adjacent_sysfs_del(dev, iter->dev->name,
6019 &dev->adj_list.lower);
6020 }
6021}
6022
6023void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
6024{
6025 struct netdev_adjacent *iter;
6026
6027 struct net *net = dev_net(dev);
6028
6029 list_for_each_entry(iter, &dev->adj_list.upper, list) {
6030 if (!net_eq(net, dev_net(iter->dev)))
6031 continue;
6032 netdev_adjacent_sysfs_del(iter->dev, oldname,
6033 &iter->dev->adj_list.lower);
6034 netdev_adjacent_sysfs_add(iter->dev, dev,
6035 &iter->dev->adj_list.lower);
6036 }
6037
6038 list_for_each_entry(iter, &dev->adj_list.lower, list) {
6039 if (!net_eq(net, dev_net(iter->dev)))
6040 continue;
6041 netdev_adjacent_sysfs_del(iter->dev, oldname,
6042 &iter->dev->adj_list.upper);
6043 netdev_adjacent_sysfs_add(iter->dev, dev,
6044 &iter->dev->adj_list.upper);
6045 }
6046}
6047
6048void *netdev_lower_dev_get_private(struct net_device *dev,
6049 struct net_device *lower_dev)
6050{
6051 struct netdev_adjacent *lower;
6052
6053 if (!lower_dev)
6054 return NULL;
6055 lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
6056 if (!lower)
6057 return NULL;
6058
6059 return lower->private;
6060}
6061EXPORT_SYMBOL(netdev_lower_dev_get_private);
6062
6063
6064int dev_get_nest_level(struct net_device *dev)
6065{
6066 struct net_device *lower = NULL;
6067 struct list_head *iter;
6068 int max_nest = -1;
6069 int nest;
6070
6071 ASSERT_RTNL();
6072
6073 netdev_for_each_lower_dev(dev, lower, iter) {
6074 nest = dev_get_nest_level(lower);
6075 if (max_nest < nest)
6076 max_nest = nest;
6077 }
6078
6079 return max_nest + 1;
6080}
6081EXPORT_SYMBOL(dev_get_nest_level);
6082
6083
6084
6085
6086
6087
6088
6089
6090
6091void netdev_lower_state_changed(struct net_device *lower_dev,
6092 void *lower_state_info)
6093{
6094 struct netdev_notifier_changelowerstate_info changelowerstate_info;
6095
6096 ASSERT_RTNL();
6097 changelowerstate_info.lower_state_info = lower_state_info;
6098 call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
6099 &changelowerstate_info.info);
6100}
6101EXPORT_SYMBOL(netdev_lower_state_changed);
6102
6103int netdev_default_l2upper_neigh_construct(struct net_device *dev,
6104 struct neighbour *n)
6105{
6106 struct net_device *lower_dev, *stop_dev;
6107 struct list_head *iter;
6108 int err;
6109
6110 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6111 if (!lower_dev->netdev_ops->ndo_neigh_construct)
6112 continue;
6113 err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
6114 if (err) {
6115 stop_dev = lower_dev;
6116 goto rollback;
6117 }
6118 }
6119 return 0;
6120
6121rollback:
6122 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6123 if (lower_dev == stop_dev)
6124 break;
6125 if (!lower_dev->netdev_ops->ndo_neigh_destroy)
6126 continue;
6127 lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
6128 }
6129 return err;
6130}
6131EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
6132
6133void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
6134 struct neighbour *n)
6135{
6136 struct net_device *lower_dev;
6137 struct list_head *iter;
6138
6139 netdev_for_each_lower_dev(dev, lower_dev, iter) {
6140 if (!lower_dev->netdev_ops->ndo_neigh_destroy)
6141 continue;
6142 lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
6143 }
6144}
6145EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
6146
6147static void dev_change_rx_flags(struct net_device *dev, int flags)
6148{
6149 const struct net_device_ops *ops = dev->netdev_ops;
6150
6151 if (ops->ndo_change_rx_flags)
6152 ops->ndo_change_rx_flags(dev, flags);
6153}
6154
6155static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
6156{
6157 unsigned int old_flags = dev->flags;
6158 kuid_t uid;
6159 kgid_t gid;
6160
6161 ASSERT_RTNL();
6162
6163 dev->flags |= IFF_PROMISC;
6164 dev->promiscuity += inc;
6165 if (dev->promiscuity == 0) {
6166
6167
6168
6169
6170 if (inc < 0)
6171 dev->flags &= ~IFF_PROMISC;
6172 else {
6173 dev->promiscuity -= inc;
6174 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
6175 dev->name);
6176 return -EOVERFLOW;
6177 }
6178 }
6179 if (dev->flags != old_flags) {
6180 pr_info("device %s %s promiscuous mode\n",
6181 dev->name,
6182 dev->flags & IFF_PROMISC ? "entered" : "left");
6183 if (audit_enabled) {
6184 current_uid_gid(&uid, &gid);
6185 audit_log(current->audit_context, GFP_ATOMIC,
6186 AUDIT_ANOM_PROMISCUOUS,
6187 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
6188 dev->name, (dev->flags & IFF_PROMISC),
6189 (old_flags & IFF_PROMISC),
6190 from_kuid(&init_user_ns, audit_get_loginuid(current)),
6191 from_kuid(&init_user_ns, uid),
6192 from_kgid(&init_user_ns, gid),
6193 audit_get_sessionid(current));
6194 }
6195
6196 dev_change_rx_flags(dev, IFF_PROMISC);
6197 }
6198 if (notify)
6199 __dev_notify_flags(dev, old_flags, IFF_PROMISC);
6200 return 0;
6201}
6202
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214int dev_set_promiscuity(struct net_device *dev, int inc)
6215{
6216 unsigned int old_flags = dev->flags;
6217 int err;
6218
6219 err = __dev_set_promiscuity(dev, inc, true);
6220 if (err < 0)
6221 return err;
6222 if (dev->flags != old_flags)
6223 dev_set_rx_mode(dev);
6224 return err;
6225}
6226EXPORT_SYMBOL(dev_set_promiscuity);
6227
6228static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
6229{
6230 unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
6231
6232 ASSERT_RTNL();
6233
6234 dev->flags |= IFF_ALLMULTI;
6235 dev->allmulti += inc;
6236 if (dev->allmulti == 0) {
6237
6238
6239
6240
6241 if (inc < 0)
6242 dev->flags &= ~IFF_ALLMULTI;
6243 else {
6244 dev->allmulti -= inc;
6245 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
6246 dev->name);
6247 return -EOVERFLOW;
6248 }
6249 }
6250 if (dev->flags ^ old_flags) {
6251 dev_change_rx_flags(dev, IFF_ALLMULTI);
6252 dev_set_rx_mode(dev);
6253 if (notify)
6254 __dev_notify_flags(dev, old_flags,
6255 dev->gflags ^ old_gflags);
6256 }
6257 return 0;
6258}
6259
6260
6261
6262
6263
6264
6265
6266
6267
6268
6269
6270
6271
6272
6273int dev_set_allmulti(struct net_device *dev, int inc)
6274{
6275 return __dev_set_allmulti(dev, inc, true);
6276}
6277EXPORT_SYMBOL(dev_set_allmulti);
6278
6279
6280
6281
6282
6283
6284
6285void __dev_set_rx_mode(struct net_device *dev)
6286{
6287 const struct net_device_ops *ops = dev->netdev_ops;
6288
6289
6290 if (!(dev->flags&IFF_UP))
6291 return;
6292
6293 if (!netif_device_present(dev))
6294 return;
6295
6296 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
6297
6298
6299
6300 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
6301 __dev_set_promiscuity(dev, 1, false);
6302 dev->uc_promisc = true;
6303 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
6304 __dev_set_promiscuity(dev, -1, false);
6305 dev->uc_promisc = false;
6306 }
6307 }
6308
6309 if (ops->ndo_set_rx_mode)
6310 ops->ndo_set_rx_mode(dev);
6311}
6312
6313void dev_set_rx_mode(struct net_device *dev)
6314{
6315 netif_addr_lock_bh(dev);
6316 __dev_set_rx_mode(dev);
6317 netif_addr_unlock_bh(dev);
6318}
6319
6320
6321
6322
6323
6324
6325
6326unsigned int dev_get_flags(const struct net_device *dev)
6327{
6328 unsigned int flags;
6329
6330 flags = (dev->flags & ~(IFF_PROMISC |
6331 IFF_ALLMULTI |
6332 IFF_RUNNING |
6333 IFF_LOWER_UP |
6334 IFF_DORMANT)) |
6335 (dev->gflags & (IFF_PROMISC |
6336 IFF_ALLMULTI));
6337
6338 if (netif_running(dev)) {
6339 if (netif_oper_up(dev))
6340 flags |= IFF_RUNNING;
6341 if (netif_carrier_ok(dev))
6342 flags |= IFF_LOWER_UP;
6343 if (netif_dormant(dev))
6344 flags |= IFF_DORMANT;
6345 }
6346
6347 return flags;
6348}
6349EXPORT_SYMBOL(dev_get_flags);
6350
6351int __dev_change_flags(struct net_device *dev, unsigned int flags)
6352{
6353 unsigned int old_flags = dev->flags;
6354 int ret;
6355
6356 ASSERT_RTNL();
6357
6358
6359
6360
6361
6362 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
6363 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
6364 IFF_AUTOMEDIA)) |
6365 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
6366 IFF_ALLMULTI));
6367
6368
6369
6370
6371
6372 if ((old_flags ^ flags) & IFF_MULTICAST)
6373 dev_change_rx_flags(dev, IFF_MULTICAST);
6374
6375 dev_set_rx_mode(dev);
6376
6377
6378
6379
6380
6381
6382
6383 ret = 0;
6384 if ((old_flags ^ flags) & IFF_UP)
6385 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
6386
6387 if ((flags ^ dev->gflags) & IFF_PROMISC) {
6388 int inc = (flags & IFF_PROMISC) ? 1 : -1;
6389 unsigned int old_flags = dev->flags;
6390
6391 dev->gflags ^= IFF_PROMISC;
6392
6393 if (__dev_set_promiscuity(dev, inc, false) >= 0)
6394 if (dev->flags != old_flags)
6395 dev_set_rx_mode(dev);
6396 }
6397
6398
6399
6400
6401
6402 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
6403 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
6404
6405 dev->gflags ^= IFF_ALLMULTI;
6406 __dev_set_allmulti(dev, inc, false);
6407 }
6408
6409 return ret;
6410}
6411
6412void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
6413 unsigned int gchanges)
6414{
6415 unsigned int changes = dev->flags ^ old_flags;
6416
6417 if (gchanges)
6418 rtmsg_ifinfo(RTM_NEWLINK, dev, gchanges, GFP_ATOMIC);
6419
6420 if (changes & IFF_UP) {
6421 if (dev->flags & IFF_UP)
6422 call_netdevice_notifiers(NETDEV_UP, dev);
6423 else
6424 call_netdevice_notifiers(NETDEV_DOWN, dev);
6425 }
6426
6427 if (dev->flags & IFF_UP &&
6428 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
6429 struct netdev_notifier_change_info change_info;
6430
6431 change_info.flags_changed = changes;
6432 call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
6433 &change_info.info);
6434 }
6435}
6436
6437
6438
6439
6440
6441
6442
6443
6444
6445int dev_change_flags(struct net_device *dev, unsigned int flags)
6446{
6447 int ret;
6448 unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
6449
6450 ret = __dev_change_flags(dev, flags);
6451 if (ret < 0)
6452 return ret;
6453
6454 changes = (old_flags ^ dev->flags) | (old_gflags ^ dev->gflags);
6455 __dev_notify_flags(dev, old_flags, changes);
6456 return ret;
6457}
6458EXPORT_SYMBOL(dev_change_flags);
6459
6460static int __dev_set_mtu(struct net_device *dev, int new_mtu)
6461{
6462 const struct net_device_ops *ops = dev->netdev_ops;
6463
6464 if (ops->ndo_change_mtu)
6465 return ops->ndo_change_mtu(dev, new_mtu);
6466
6467 dev->mtu = new_mtu;
6468 return 0;
6469}
6470
6471
6472
6473
6474
6475
6476
6477
6478int dev_set_mtu(struct net_device *dev, int new_mtu)
6479{
6480 int err, orig_mtu;
6481
6482 if (new_mtu == dev->mtu)
6483 return 0;
6484
6485
6486 if (new_mtu < 0)
6487 return -EINVAL;
6488
6489 if (!netif_device_present(dev))
6490 return -ENODEV;
6491
6492 err = call_netdevice_notifiers(NETDEV_PRECHANGEMTU, dev);
6493 err = notifier_to_errno(err);
6494 if (err)
6495 return err;
6496
6497 orig_mtu = dev->mtu;
6498 err = __dev_set_mtu(dev, new_mtu);
6499
6500 if (!err) {
6501 err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6502 err = notifier_to_errno(err);
6503 if (err) {
6504
6505
6506
6507 __dev_set_mtu(dev, orig_mtu);
6508 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
6509 }
6510 }
6511 return err;
6512}
6513EXPORT_SYMBOL(dev_set_mtu);
6514
6515
6516
6517
6518
6519
6520void dev_set_group(struct net_device *dev, int new_group)
6521{
6522 dev->group = new_group;
6523}
6524EXPORT_SYMBOL(dev_set_group);
6525
6526
6527
6528
6529
6530
6531
6532
6533int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
6534{
6535 const struct net_device_ops *ops = dev->netdev_ops;
6536 int err;
6537
6538 if (!ops->ndo_set_mac_address)
6539 return -EOPNOTSUPP;
6540 if (sa->sa_family != dev->type)
6541 return -EINVAL;
6542 if (!netif_device_present(dev))
6543 return -ENODEV;
6544 err = ops->ndo_set_mac_address(dev, sa);
6545 if (err)
6546 return err;
6547 dev->addr_assign_type = NET_ADDR_SET;
6548 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
6549 add_device_randomness(dev->dev_addr, dev->addr_len);
6550 return 0;
6551}
6552EXPORT_SYMBOL(dev_set_mac_address);
6553
6554
6555
6556
6557
6558
6559
6560
6561int dev_change_carrier(struct net_device *dev, bool new_carrier)
6562{
6563 const struct net_device_ops *ops = dev->netdev_ops;
6564
6565 if (!ops->ndo_change_carrier)
6566 return -EOPNOTSUPP;
6567 if (!netif_device_present(dev))
6568 return -ENODEV;
6569 return ops->ndo_change_carrier(dev, new_carrier);
6570}
6571EXPORT_SYMBOL(dev_change_carrier);
6572
6573
6574
6575
6576
6577
6578
6579
6580int dev_get_phys_port_id(struct net_device *dev,
6581 struct netdev_phys_item_id *ppid)
6582{
6583 const struct net_device_ops *ops = dev->netdev_ops;
6584
6585 if (!ops->ndo_get_phys_port_id)
6586 return -EOPNOTSUPP;
6587 return ops->ndo_get_phys_port_id(dev, ppid);
6588}
6589EXPORT_SYMBOL(dev_get_phys_port_id);
6590
6591
6592
6593
6594
6595
6596
6597
6598
6599int dev_get_phys_port_name(struct net_device *dev,
6600 char *name, size_t len)
6601{
6602 const struct net_device_ops *ops = dev->netdev_ops;
6603
6604 if (!ops->ndo_get_phys_port_name)
6605 return -EOPNOTSUPP;
6606 return ops->ndo_get_phys_port_name(dev, name, len);
6607}
6608EXPORT_SYMBOL(dev_get_phys_port_name);
6609
6610
6611
6612
6613
6614
6615
6616
6617
6618int dev_change_proto_down(struct net_device *dev, bool proto_down)
6619{
6620 const struct net_device_ops *ops = dev->netdev_ops;
6621
6622 if (!ops->ndo_change_proto_down)
6623 return -EOPNOTSUPP;
6624 if (!netif_device_present(dev))
6625 return -ENODEV;
6626 return ops->ndo_change_proto_down(dev, proto_down);
6627}
6628EXPORT_SYMBOL(dev_change_proto_down);
6629
6630
6631
6632
6633
6634
6635
6636
6637int dev_change_xdp_fd(struct net_device *dev, int fd)
6638{
6639 const struct net_device_ops *ops = dev->netdev_ops;
6640 struct bpf_prog *prog = NULL;
6641 struct netdev_xdp xdp = {};
6642 int err;
6643
6644 if (!ops->ndo_xdp)
6645 return -EOPNOTSUPP;
6646 if (fd >= 0) {
6647 prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
6648 if (IS_ERR(prog))
6649 return PTR_ERR(prog);
6650 }
6651
6652 xdp.command = XDP_SETUP_PROG;
6653 xdp.prog = prog;
6654 err = ops->ndo_xdp(dev, &xdp);
6655 if (err < 0 && prog)
6656 bpf_prog_put(prog);
6657
6658 return err;
6659}
6660EXPORT_SYMBOL(dev_change_xdp_fd);
6661
6662
6663
6664
6665
6666
6667
6668
6669
6670static int dev_new_index(struct net *net)
6671{
6672 int ifindex = net->ifindex;
6673 for (;;) {
6674 if (++ifindex <= 0)
6675 ifindex = 1;
6676 if (!__dev_get_by_index(net, ifindex))
6677 return net->ifindex = ifindex;
6678 }
6679}
6680
6681
6682static LIST_HEAD(net_todo_list);
6683DECLARE_WAIT_QUEUE_HEAD(netdev_unregistering_wq);
6684
6685static void net_set_todo(struct net_device *dev)
6686{
6687 list_add_tail(&dev->todo_list, &net_todo_list);
6688 dev_net(dev)->dev_unreg_count++;
6689}
6690
6691static void rollback_registered_many(struct list_head *head)
6692{
6693 struct net_device *dev, *tmp;
6694 LIST_HEAD(close_head);
6695
6696 BUG_ON(dev_boot_phase);
6697 ASSERT_RTNL();
6698
6699 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
6700
6701
6702
6703
6704 if (dev->reg_state == NETREG_UNINITIALIZED) {
6705 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
6706 dev->name, dev);
6707
6708 WARN_ON(1);
6709 list_del(&dev->unreg_list);
6710 continue;
6711 }
6712 dev->dismantle = true;
6713 BUG_ON(dev->reg_state != NETREG_REGISTERED);
6714 }
6715
6716
6717 list_for_each_entry(dev, head, unreg_list)
6718 list_add_tail(&dev->close_list, &close_head);
6719 dev_close_many(&close_head, true);
6720
6721 list_for_each_entry(dev, head, unreg_list) {
6722
6723 unlist_netdevice(dev);
6724
6725 dev->reg_state = NETREG_UNREGISTERING;
6726 on_each_cpu(flush_backlog, dev, 1);
6727 }
6728
6729 synchronize_net();
6730
6731 list_for_each_entry(dev, head, unreg_list) {
6732 struct sk_buff *skb = NULL;
6733
6734
6735 dev_shutdown(dev);
6736
6737
6738
6739
6740
6741 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6742
6743 if (!dev->rtnl_link_ops ||
6744 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
6745 skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
6746 GFP_KERNEL);
6747
6748
6749
6750
6751 dev_uc_flush(dev);
6752 dev_mc_flush(dev);
6753
6754 if (dev->netdev_ops->ndo_uninit)
6755 dev->netdev_ops->ndo_uninit(dev);
6756
6757 if (skb)
6758 rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
6759
6760
6761 WARN_ON(netdev_has_any_upper_dev(dev));
6762
6763
6764 netdev_unregister_kobject(dev);
6765#ifdef CONFIG_XPS
6766
6767 netif_reset_xps_queues_gt(dev, 0);
6768#endif
6769 }
6770
6771 synchronize_net();
6772
6773 list_for_each_entry(dev, head, unreg_list)
6774 dev_put(dev);
6775}
6776
6777static void rollback_registered(struct net_device *dev)
6778{
6779 LIST_HEAD(single);
6780
6781 list_add(&dev->unreg_list, &single);
6782 rollback_registered_many(&single);
6783 list_del(&single);
6784}
6785
6786static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
6787 struct net_device *upper, netdev_features_t features)
6788{
6789 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6790 netdev_features_t feature;
6791 int feature_bit;
6792
6793 for_each_netdev_feature(&upper_disables, feature_bit) {
6794 feature = __NETIF_F_BIT(feature_bit);
6795 if (!(upper->wanted_features & feature)
6796 && (features & feature)) {
6797 netdev_dbg(lower, "Dropping feature %pNF, upper dev %s has it off.\n",
6798 &feature, upper->name);
6799 features &= ~feature;
6800 }
6801 }
6802
6803 return features;
6804}
6805
6806static void netdev_sync_lower_features(struct net_device *upper,
6807 struct net_device *lower, netdev_features_t features)
6808{
6809 netdev_features_t upper_disables = NETIF_F_UPPER_DISABLES;
6810 netdev_features_t feature;
6811 int feature_bit;
6812
6813 for_each_netdev_feature(&upper_disables, feature_bit) {
6814 feature = __NETIF_F_BIT(feature_bit);
6815 if (!(features & feature) && (lower->features & feature)) {
6816 netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
6817 &feature, lower->name);
6818 lower->wanted_features &= ~feature;
6819 netdev_update_features(lower);
6820
6821 if (unlikely(lower->features & feature))
6822 netdev_WARN(upper, "failed to disable %pNF on %s!\n",
6823 &feature, lower->name);
6824 }
6825 }
6826}
6827
6828static netdev_features_t netdev_fix_features(struct net_device *dev,
6829 netdev_features_t features)
6830{
6831
6832 if ((features & NETIF_F_HW_CSUM) &&
6833 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
6834 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
6835 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
6836 }
6837
6838
6839 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
6840 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
6841 features &= ~NETIF_F_ALL_TSO;
6842 }
6843
6844 if ((features & NETIF_F_TSO) && !(features & NETIF_F_HW_CSUM) &&
6845 !(features & NETIF_F_IP_CSUM)) {
6846 netdev_dbg(dev, "Dropping TSO features since no CSUM feature.\n");
6847 features &= ~NETIF_F_TSO;
6848 features &= ~NETIF_F_TSO_ECN;
6849 }
6850
6851 if ((features & NETIF_F_TSO6) && !(features & NETIF_F_HW_CSUM) &&
6852 !(features & NETIF_F_IPV6_CSUM)) {
6853 netdev_dbg(dev, "Dropping TSO6 features since no CSUM feature.\n");
6854 features &= ~NETIF_F_TSO6;
6855 }
6856
6857
6858 if ((features & NETIF_F_TSO_MANGLEID) && !(features & NETIF_F_TSO))
6859 features &= ~NETIF_F_TSO_MANGLEID;
6860
6861
6862 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
6863 features &= ~NETIF_F_TSO_ECN;
6864
6865
6866 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
6867 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
6868 features &= ~NETIF_F_GSO;
6869 }
6870
6871
6872 if (features & NETIF_F_UFO) {
6873
6874 if (!(features & NETIF_F_HW_CSUM) &&
6875 ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
6876 (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
6877 netdev_dbg(dev,
6878 "Dropping NETIF_F_UFO since no checksum offload features.\n");
6879 features &= ~NETIF_F_UFO;
6880 }
6881
6882 if (!(features & NETIF_F_SG)) {
6883 netdev_dbg(dev,
6884 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
6885 features &= ~NETIF_F_UFO;
6886 }
6887 }
6888
6889
6890 if ((features & dev->gso_partial_features) &&
6891 !(features & NETIF_F_GSO_PARTIAL)) {
6892 netdev_dbg(dev,
6893 "Dropping partially supported GSO features since no GSO partial.\n");
6894 features &= ~dev->gso_partial_features;
6895 }
6896
6897#ifdef CONFIG_NET_RX_BUSY_POLL
6898 if (dev->netdev_ops->ndo_busy_poll)
6899 features |= NETIF_F_BUSY_POLL;
6900 else
6901#endif
6902 features &= ~NETIF_F_BUSY_POLL;
6903
6904 return features;
6905}
6906
6907int __netdev_update_features(struct net_device *dev)
6908{
6909 struct net_device *upper, *lower;
6910 netdev_features_t features;
6911 struct list_head *iter;
6912 int err = -1;
6913
6914 ASSERT_RTNL();
6915
6916 features = netdev_get_wanted_features(dev);
6917
6918 if (dev->netdev_ops->ndo_fix_features)
6919 features = dev->netdev_ops->ndo_fix_features(dev, features);
6920
6921
6922 features = netdev_fix_features(dev, features);
6923
6924
6925 netdev_for_each_upper_dev_rcu(dev, upper, iter)
6926 features = netdev_sync_upper_features(dev, upper, features);
6927
6928 if (dev->features == features)
6929 goto sync_lower;
6930
6931 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
6932 &dev->features, &features);
6933
6934 if (dev->netdev_ops->ndo_set_features)
6935 err = dev->netdev_ops->ndo_set_features(dev, features);
6936 else
6937 err = 0;
6938
6939 if (unlikely(err < 0)) {
6940 netdev_err(dev,
6941 "set_features() failed (%d); wanted %pNF, left %pNF\n",
6942 err, &features, &dev->features);
6943
6944
6945
6946 return -1;
6947 }
6948
6949sync_lower:
6950
6951
6952
6953 netdev_for_each_lower_dev(dev, lower, iter)
6954 netdev_sync_lower_features(dev, lower, features);
6955
6956 if (!err)
6957 dev->features = features;
6958
6959 return err < 0 ? 0 : 1;
6960}
6961
6962
6963
6964
6965
6966
6967
6968
6969
6970void netdev_update_features(struct net_device *dev)
6971{
6972 if (__netdev_update_features(dev))
6973 netdev_features_change(dev);
6974}
6975EXPORT_SYMBOL(netdev_update_features);
6976
6977
6978
6979
6980
6981
6982
6983
6984
6985
6986
6987void netdev_change_features(struct net_device *dev)
6988{
6989 __netdev_update_features(dev);
6990 netdev_features_change(dev);
6991}
6992EXPORT_SYMBOL(netdev_change_features);
6993
6994
6995
6996
6997
6998
6999
7000
7001
7002
7003void netif_stacked_transfer_operstate(const struct net_device *rootdev,
7004 struct net_device *dev)
7005{
7006 if (rootdev->operstate == IF_OPER_DORMANT)
7007 netif_dormant_on(dev);
7008 else
7009 netif_dormant_off(dev);
7010
7011 if (netif_carrier_ok(rootdev)) {
7012 if (!netif_carrier_ok(dev))
7013 netif_carrier_on(dev);
7014 } else {
7015 if (netif_carrier_ok(dev))
7016 netif_carrier_off(dev);
7017 }
7018}
7019EXPORT_SYMBOL(netif_stacked_transfer_operstate);
7020
7021#ifdef CONFIG_SYSFS
7022static int netif_alloc_rx_queues(struct net_device *dev)
7023{
7024 unsigned int i, count = dev->num_rx_queues;
7025 struct netdev_rx_queue *rx;
7026 size_t sz = count * sizeof(*rx);
7027
7028 BUG_ON(count < 1);
7029
7030 rx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7031 if (!rx) {
7032 rx = vzalloc(sz);
7033 if (!rx)
7034 return -ENOMEM;
7035 }
7036 dev->_rx = rx;
7037
7038 for (i = 0; i < count; i++)
7039 rx[i].dev = dev;
7040 return 0;
7041}
7042#endif
7043
7044static void netdev_init_one_queue(struct net_device *dev,
7045 struct netdev_queue *queue, void *_unused)
7046{
7047
7048 spin_lock_init(&queue->_xmit_lock);
7049 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
7050 queue->xmit_lock_owner = -1;
7051 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
7052 queue->dev = dev;
7053#ifdef CONFIG_BQL
7054 dql_init(&queue->dql, HZ);
7055#endif
7056}
7057
7058static void netif_free_tx_queues(struct net_device *dev)
7059{
7060 kvfree(dev->_tx);
7061}
7062
7063static int netif_alloc_netdev_queues(struct net_device *dev)
7064{
7065 unsigned int count = dev->num_tx_queues;
7066 struct netdev_queue *tx;
7067 size_t sz = count * sizeof(*tx);
7068
7069 if (count < 1 || count > 0xffff)
7070 return -EINVAL;
7071
7072 tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7073 if (!tx) {
7074 tx = vzalloc(sz);
7075 if (!tx)
7076 return -ENOMEM;
7077 }
7078 dev->_tx = tx;
7079
7080 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
7081 spin_lock_init(&dev->tx_global_lock);
7082
7083 return 0;
7084}
7085
7086void netif_tx_stop_all_queues(struct net_device *dev)
7087{
7088 unsigned int i;
7089
7090 for (i = 0; i < dev->num_tx_queues; i++) {
7091 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
7092 netif_tx_stop_queue(txq);
7093 }
7094}
7095EXPORT_SYMBOL(netif_tx_stop_all_queues);
7096
7097
7098
7099
7100
7101
7102
7103
7104
7105
7106
7107
7108
7109
7110
7111
7112
7113
7114int register_netdevice(struct net_device *dev)
7115{
7116 int ret;
7117 struct net *net = dev_net(dev);
7118
7119 BUG_ON(dev_boot_phase);
7120 ASSERT_RTNL();
7121
7122 might_sleep();
7123
7124
7125 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
7126 BUG_ON(!net);
7127
7128 spin_lock_init(&dev->addr_list_lock);
7129 netdev_set_addr_lockdep_class(dev);
7130
7131 ret = dev_get_valid_name(net, dev, dev->name);
7132 if (ret < 0)
7133 goto out;
7134
7135
7136 if (dev->netdev_ops->ndo_init) {
7137 ret = dev->netdev_ops->ndo_init(dev);
7138 if (ret) {
7139 if (ret > 0)
7140 ret = -EIO;
7141 goto out;
7142 }
7143 }
7144
7145 if (((dev->hw_features | dev->features) &
7146 NETIF_F_HW_VLAN_CTAG_FILTER) &&
7147 (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
7148 !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
7149 netdev_WARN(dev, "Buggy VLAN acceleration in driver!\n");
7150 ret = -EINVAL;
7151 goto err_uninit;
7152 }
7153
7154 ret = -EBUSY;
7155 if (!dev->ifindex)
7156 dev->ifindex = dev_new_index(net);
7157 else if (__dev_get_by_index(net, dev->ifindex))
7158 goto err_uninit;
7159
7160
7161
7162
7163 dev->hw_features |= NETIF_F_SOFT_FEATURES;
7164 dev->features |= NETIF_F_SOFT_FEATURES;
7165 dev->wanted_features = dev->features & dev->hw_features;
7166
7167 if (!(dev->flags & IFF_LOOPBACK))
7168 dev->hw_features |= NETIF_F_NOCACHE_COPY;
7169
7170
7171
7172
7173
7174
7175 if (dev->hw_features & NETIF_F_TSO)
7176 dev->hw_features |= NETIF_F_TSO_MANGLEID;
7177 if (dev->vlan_features & NETIF_F_TSO)
7178 dev->vlan_features |= NETIF_F_TSO_MANGLEID;
7179 if (dev->mpls_features & NETIF_F_TSO)
7180 dev->mpls_features |= NETIF_F_TSO_MANGLEID;
7181 if (dev->hw_enc_features & NETIF_F_TSO)
7182 dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;
7183
7184
7185
7186 dev->vlan_features |= NETIF_F_HIGHDMA;
7187
7188
7189
7190 dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;
7191
7192
7193
7194 dev->mpls_features |= NETIF_F_SG;
7195
7196 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
7197 ret = notifier_to_errno(ret);
7198 if (ret)
7199 goto err_uninit;
7200
7201 ret = netdev_register_kobject(dev);
7202 if (ret)
7203 goto err_uninit;
7204 dev->reg_state = NETREG_REGISTERED;
7205
7206 __netdev_update_features(dev);
7207
7208
7209
7210
7211
7212
7213 set_bit(__LINK_STATE_PRESENT, &dev->state);
7214
7215 linkwatch_init_dev(dev);
7216
7217 dev_init_scheduler(dev);
7218 dev_hold(dev);
7219 list_netdevice(dev);
7220 add_device_randomness(dev->dev_addr, dev->addr_len);
7221
7222
7223
7224
7225
7226 if (dev->addr_assign_type == NET_ADDR_PERM)
7227 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
7228
7229
7230 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
7231 ret = notifier_to_errno(ret);
7232 if (ret) {
7233 rollback_registered(dev);
7234 dev->reg_state = NETREG_UNREGISTERED;
7235 }
7236
7237
7238
7239
7240 if (!dev->rtnl_link_ops ||
7241 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
7242 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7243
7244out:
7245 return ret;
7246
7247err_uninit:
7248 if (dev->netdev_ops->ndo_uninit)
7249 dev->netdev_ops->ndo_uninit(dev);
7250 goto out;
7251}
7252EXPORT_SYMBOL(register_netdevice);
7253
7254
7255
7256
7257
7258
7259
7260
7261
7262
7263
7264int init_dummy_netdev(struct net_device *dev)
7265{
7266
7267
7268
7269
7270
7271 memset(dev, 0, sizeof(struct net_device));
7272
7273
7274
7275
7276 dev->reg_state = NETREG_DUMMY;
7277
7278
7279 INIT_LIST_HEAD(&dev->napi_list);
7280
7281
7282 set_bit(__LINK_STATE_PRESENT, &dev->state);
7283 set_bit(__LINK_STATE_START, &dev->state);
7284
7285
7286
7287
7288
7289
7290 return 0;
7291}
7292EXPORT_SYMBOL_GPL(init_dummy_netdev);
7293
7294
7295
7296
7297
7298
7299
7300
7301
7302
7303
7304
7305
7306
7307
7308int register_netdev(struct net_device *dev)
7309{
7310 int err;
7311
7312 rtnl_lock();
7313 err = register_netdevice(dev);
7314 rtnl_unlock();
7315 return err;
7316}
7317EXPORT_SYMBOL(register_netdev);
7318
7319int netdev_refcnt_read(const struct net_device *dev)
7320{
7321 int i, refcnt = 0;
7322
7323 for_each_possible_cpu(i)
7324 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
7325 return refcnt;
7326}
7327EXPORT_SYMBOL(netdev_refcnt_read);
7328
7329
7330
7331
7332
7333
7334
7335
7336
7337
7338
7339
7340
7341static void netdev_wait_allrefs(struct net_device *dev)
7342{
7343 unsigned long rebroadcast_time, warning_time;
7344 int refcnt;
7345
7346 linkwatch_forget_dev(dev);
7347
7348 rebroadcast_time = warning_time = jiffies;
7349 refcnt = netdev_refcnt_read(dev);
7350
7351 while (refcnt != 0) {
7352 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
7353 rtnl_lock();
7354
7355
7356 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7357
7358 __rtnl_unlock();
7359 rcu_barrier();
7360 rtnl_lock();
7361
7362 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7363 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
7364 &dev->state)) {
7365
7366
7367
7368
7369
7370
7371 linkwatch_run_queue();
7372 }
7373
7374 __rtnl_unlock();
7375
7376 rebroadcast_time = jiffies;
7377 }
7378
7379 msleep(250);
7380
7381 refcnt = netdev_refcnt_read(dev);
7382
7383 if (time_after(jiffies, warning_time + 10 * HZ)) {
7384 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
7385 dev->name, refcnt);
7386 warning_time = jiffies;
7387 }
7388 }
7389}
7390
7391
7392
7393
7394
7395
7396
7397
7398
7399
7400
7401
7402
7403
7404
7405
7406
7407
7408
7409
7410
7411
7412
7413
7414
7415void netdev_run_todo(void)
7416{
7417 struct list_head list;
7418
7419
7420 list_replace_init(&net_todo_list, &list);
7421
7422 __rtnl_unlock();
7423
7424
7425
7426 if (!list_empty(&list))
7427 rcu_barrier();
7428
7429 while (!list_empty(&list)) {
7430 struct net_device *dev
7431 = list_first_entry(&list, struct net_device, todo_list);
7432 list_del(&dev->todo_list);
7433
7434 rtnl_lock();
7435 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7436 __rtnl_unlock();
7437
7438 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
7439 pr_err("network todo '%s' but state %d\n",
7440 dev->name, dev->reg_state);
7441 dump_stack();
7442 continue;
7443 }
7444
7445 dev->reg_state = NETREG_UNREGISTERED;
7446
7447 netdev_wait_allrefs(dev);
7448
7449
7450 BUG_ON(netdev_refcnt_read(dev));
7451 BUG_ON(!list_empty(&dev->ptype_all));
7452 BUG_ON(!list_empty(&dev->ptype_specific));
7453 WARN_ON(rcu_access_pointer(dev->ip_ptr));
7454 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
7455 WARN_ON(dev->dn_ptr);
7456
7457 if (dev->destructor)
7458 dev->destructor(dev);
7459
7460
7461 rtnl_lock();
7462 dev_net(dev)->dev_unreg_count--;
7463 __rtnl_unlock();
7464 wake_up(&netdev_unregistering_wq);
7465
7466
7467 kobject_put(&dev->dev.kobj);
7468 }
7469}
7470
7471
7472
7473
7474
7475
7476void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
7477 const struct net_device_stats *netdev_stats)
7478{
7479#if BITS_PER_LONG == 64
7480 BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
7481 memcpy(stats64, netdev_stats, sizeof(*stats64));
7482
7483 memset((char *)stats64 + sizeof(*netdev_stats), 0,
7484 sizeof(*stats64) - sizeof(*netdev_stats));
7485#else
7486 size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
7487 const unsigned long *src = (const unsigned long *)netdev_stats;
7488 u64 *dst = (u64 *)stats64;
7489
7490 BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
7491 for (i = 0; i < n; i++)
7492 dst[i] = src[i];
7493
7494 memset((char *)stats64 + n * sizeof(u64), 0,
7495 sizeof(*stats64) - n * sizeof(u64));
7496#endif
7497}
7498EXPORT_SYMBOL(netdev_stats_to_stats64);
7499
7500
7501
7502
7503
7504
7505
7506
7507
7508
7509
7510struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
7511 struct rtnl_link_stats64 *storage)
7512{
7513 const struct net_device_ops *ops = dev->netdev_ops;
7514
7515 if (ops->ndo_get_stats64) {
7516 memset(storage, 0, sizeof(*storage));
7517 ops->ndo_get_stats64(dev, storage);
7518 } else if (ops->ndo_get_stats) {
7519 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
7520 } else {
7521 netdev_stats_to_stats64(storage, &dev->stats);
7522 }
7523 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
7524 storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
7525 storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
7526 return storage;
7527}
7528EXPORT_SYMBOL(dev_get_stats);
7529
7530struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
7531{
7532 struct netdev_queue *queue = dev_ingress_queue(dev);
7533
7534#ifdef CONFIG_NET_CLS_ACT
7535 if (queue)
7536 return queue;
7537 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
7538 if (!queue)
7539 return NULL;
7540 netdev_init_one_queue(dev, queue, NULL);
7541 RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
7542 queue->qdisc_sleeping = &noop_qdisc;
7543 rcu_assign_pointer(dev->ingress_queue, queue);
7544#endif
7545 return queue;
7546}
7547
7548static const struct ethtool_ops default_ethtool_ops;
7549
7550void netdev_set_default_ethtool_ops(struct net_device *dev,
7551 const struct ethtool_ops *ops)
7552{
7553 if (dev->ethtool_ops == &default_ethtool_ops)
7554 dev->ethtool_ops = ops;
7555}
7556EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
7557
7558void netdev_freemem(struct net_device *dev)
7559{
7560 char *addr = (char *)dev - dev->padded;
7561
7562 kvfree(addr);
7563}
7564
7565
7566
7567
7568
7569
7570
7571
7572
7573
7574
7575
7576
7577
7578struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
7579 unsigned char name_assign_type,
7580 void (*setup)(struct net_device *),
7581 unsigned int txqs, unsigned int rxqs)
7582{
7583 struct net_device *dev;
7584 size_t alloc_size;
7585 struct net_device *p;
7586
7587 BUG_ON(strlen(name) >= sizeof(dev->name));
7588
7589 if (txqs < 1) {
7590 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
7591 return NULL;
7592 }
7593
7594#ifdef CONFIG_SYSFS
7595 if (rxqs < 1) {
7596 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
7597 return NULL;
7598 }
7599#endif
7600
7601 alloc_size = sizeof(struct net_device);
7602 if (sizeof_priv) {
7603
7604 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
7605 alloc_size += sizeof_priv;
7606 }
7607
7608 alloc_size += NETDEV_ALIGN - 1;
7609
7610 p = kzalloc(alloc_size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
7611 if (!p)
7612 p = vzalloc(alloc_size);
7613 if (!p)
7614 return NULL;
7615
7616 dev = PTR_ALIGN(p, NETDEV_ALIGN);
7617 dev->padded = (char *)dev - (char *)p;
7618
7619 dev->pcpu_refcnt = alloc_percpu(int);
7620 if (!dev->pcpu_refcnt)
7621 goto free_dev;
7622
7623 if (dev_addr_init(dev))
7624 goto free_pcpu;
7625
7626 dev_mc_init(dev);
7627 dev_uc_init(dev);
7628
7629 dev_net_set(dev, &init_net);
7630
7631 dev->gso_max_size = GSO_MAX_SIZE;
7632 dev->gso_max_segs = GSO_MAX_SEGS;
7633
7634 INIT_LIST_HEAD(&dev->napi_list);
7635 INIT_LIST_HEAD(&dev->unreg_list);
7636 INIT_LIST_HEAD(&dev->close_list);
7637 INIT_LIST_HEAD(&dev->link_watch_list);
7638 INIT_LIST_HEAD(&dev->adj_list.upper);
7639 INIT_LIST_HEAD(&dev->adj_list.lower);
7640 INIT_LIST_HEAD(&dev->all_adj_list.upper);
7641 INIT_LIST_HEAD(&dev->all_adj_list.lower);
7642 INIT_LIST_HEAD(&dev->ptype_all);
7643 INIT_LIST_HEAD(&dev->ptype_specific);
7644 dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
7645 setup(dev);
7646
7647 if (!dev->tx_queue_len) {
7648 dev->priv_flags |= IFF_NO_QUEUE;
7649 dev->tx_queue_len = 1;
7650 }
7651
7652 dev->num_tx_queues = txqs;
7653 dev->real_num_tx_queues = txqs;
7654 if (netif_alloc_netdev_queues(dev))
7655 goto free_all;
7656
7657#ifdef CONFIG_SYSFS
7658 dev->num_rx_queues = rxqs;
7659 dev->real_num_rx_queues = rxqs;
7660 if (netif_alloc_rx_queues(dev))
7661 goto free_all;
7662#endif
7663
7664 strcpy(dev->name, name);
7665 dev->name_assign_type = name_assign_type;
7666 dev->group = INIT_NETDEV_GROUP;
7667 if (!dev->ethtool_ops)
7668 dev->ethtool_ops = &default_ethtool_ops;
7669
7670 nf_hook_ingress_init(dev);
7671
7672 return dev;
7673
7674free_all:
7675 free_netdev(dev);
7676 return NULL;
7677
7678free_pcpu:
7679 free_percpu(dev->pcpu_refcnt);
7680free_dev:
7681 netdev_freemem(dev);
7682 return NULL;
7683}
7684EXPORT_SYMBOL(alloc_netdev_mqs);
7685
7686
7687
7688
7689
7690
7691
7692
7693
7694
7695void free_netdev(struct net_device *dev)
7696{
7697 struct napi_struct *p, *n;
7698
7699 might_sleep();
7700 netif_free_tx_queues(dev);
7701#ifdef CONFIG_SYSFS
7702 kvfree(dev->_rx);
7703#endif
7704
7705 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
7706
7707
7708 dev_addr_flush(dev);
7709
7710 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
7711 netif_napi_del(p);
7712
7713 free_percpu(dev->pcpu_refcnt);
7714 dev->pcpu_refcnt = NULL;
7715
7716
7717 if (dev->reg_state == NETREG_UNINITIALIZED) {
7718 netdev_freemem(dev);
7719 return;
7720 }
7721
7722 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
7723 dev->reg_state = NETREG_RELEASED;
7724
7725
7726 put_device(&dev->dev);
7727}
7728EXPORT_SYMBOL(free_netdev);
7729
7730
7731
7732
7733
7734
7735
7736void synchronize_net(void)
7737{
7738 might_sleep();
7739 if (rtnl_is_locked())
7740 synchronize_rcu_expedited();
7741 else
7742 synchronize_rcu();
7743}
7744EXPORT_SYMBOL(synchronize_net);
7745
7746
7747
7748
7749
7750
7751
7752
7753
7754
7755
7756
7757
7758
7759void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
7760{
7761 ASSERT_RTNL();
7762
7763 if (head) {
7764 list_move_tail(&dev->unreg_list, head);
7765 } else {
7766 rollback_registered(dev);
7767
7768 net_set_todo(dev);
7769 }
7770}
7771EXPORT_SYMBOL(unregister_netdevice_queue);
7772
7773
7774
7775
7776
7777
7778
7779
7780void unregister_netdevice_many(struct list_head *head)
7781{
7782 struct net_device *dev;
7783
7784 if (!list_empty(head)) {
7785 rollback_registered_many(head);
7786 list_for_each_entry(dev, head, unreg_list)
7787 net_set_todo(dev);
7788 list_del(head);
7789 }
7790}
7791EXPORT_SYMBOL(unregister_netdevice_many);
7792
7793
7794
7795
7796
7797
7798
7799
7800
7801
7802
7803
7804void unregister_netdev(struct net_device *dev)
7805{
7806 rtnl_lock();
7807 unregister_netdevice(dev);
7808 rtnl_unlock();
7809}
7810EXPORT_SYMBOL(unregister_netdev);
7811
7812
7813
7814
7815
7816
7817
7818
7819
7820
7821
7822
7823
7824
7825
7826int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
7827{
7828 int err;
7829
7830 ASSERT_RTNL();
7831
7832
7833 err = -EINVAL;
7834 if (dev->features & NETIF_F_NETNS_LOCAL)
7835 goto out;
7836
7837
7838 if (dev->reg_state != NETREG_REGISTERED)
7839 goto out;
7840
7841
7842 err = 0;
7843 if (net_eq(dev_net(dev), net))
7844 goto out;
7845
7846
7847
7848
7849 err = -EEXIST;
7850 if (__dev_get_by_name(net, dev->name)) {
7851
7852 if (!pat)
7853 goto out;
7854 if (dev_get_valid_name(net, dev, pat) < 0)
7855 goto out;
7856 }
7857
7858
7859
7860
7861
7862
7863 dev_close(dev);
7864
7865
7866 err = -ENODEV;
7867 unlist_netdevice(dev);
7868
7869 synchronize_net();
7870
7871
7872 dev_shutdown(dev);
7873
7874
7875
7876
7877
7878
7879
7880
7881 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
7882 rcu_barrier();
7883 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
7884 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
7885
7886
7887
7888
7889 dev_uc_flush(dev);
7890 dev_mc_flush(dev);
7891
7892
7893 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
7894 netdev_adjacent_del_links(dev);
7895
7896
7897 dev_net_set(dev, net);
7898
7899
7900 if (__dev_get_by_index(net, dev->ifindex))
7901 dev->ifindex = dev_new_index(net);
7902
7903
7904 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
7905 netdev_adjacent_add_links(dev);
7906
7907
7908 err = device_rename(&dev->dev, dev->name);
7909 WARN_ON(err);
7910
7911
7912 list_netdevice(dev);
7913
7914
7915 call_netdevice_notifiers(NETDEV_REGISTER, dev);
7916
7917
7918
7919
7920
7921 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);
7922
7923 synchronize_net();
7924 err = 0;
7925out:
7926 return err;
7927}
7928EXPORT_SYMBOL_GPL(dev_change_net_namespace);
7929
7930static int dev_cpu_callback(struct notifier_block *nfb,
7931 unsigned long action,
7932 void *ocpu)
7933{
7934 struct sk_buff **list_skb;
7935 struct sk_buff *skb;
7936 unsigned int cpu, oldcpu = (unsigned long)ocpu;
7937 struct softnet_data *sd, *oldsd;
7938
7939 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
7940 return NOTIFY_OK;
7941
7942 local_irq_disable();
7943 cpu = smp_processor_id();
7944 sd = &per_cpu(softnet_data, cpu);
7945 oldsd = &per_cpu(softnet_data, oldcpu);
7946
7947
7948 list_skb = &sd->completion_queue;
7949 while (*list_skb)
7950 list_skb = &(*list_skb)->next;
7951
7952 *list_skb = oldsd->completion_queue;
7953 oldsd->completion_queue = NULL;
7954
7955
7956 if (oldsd->output_queue) {
7957 *sd->output_queue_tailp = oldsd->output_queue;
7958 sd->output_queue_tailp = oldsd->output_queue_tailp;
7959 oldsd->output_queue = NULL;
7960 oldsd->output_queue_tailp = &oldsd->output_queue;
7961 }
7962
7963
7964
7965
7966 while (!list_empty(&oldsd->poll_list)) {
7967 struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
7968 struct napi_struct,
7969 poll_list);
7970
7971 list_del_init(&napi->poll_list);
7972 if (napi->poll == process_backlog)
7973 napi->state = 0;
7974 else
7975 ____napi_schedule(sd, napi);
7976 }
7977
7978 raise_softirq_irqoff(NET_TX_SOFTIRQ);
7979 local_irq_enable();
7980
7981
7982 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
7983 netif_rx_ni(skb);
7984 input_queue_head_incr(oldsd);
7985 }
7986 while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
7987 netif_rx_ni(skb);
7988 input_queue_head_incr(oldsd);
7989 }
7990
7991 return NOTIFY_OK;
7992}
7993
7994
7995
7996
7997
7998
7999
8000
8001
8002
8003
8004
8005netdev_features_t netdev_increment_features(netdev_features_t all,
8006 netdev_features_t one, netdev_features_t mask)
8007{
8008 if (mask & NETIF_F_HW_CSUM)
8009 mask |= NETIF_F_CSUM_MASK;
8010 mask |= NETIF_F_VLAN_CHALLENGED;
8011
8012 all |= one & (NETIF_F_ONE_FOR_ALL | NETIF_F_CSUM_MASK) & mask;
8013 all &= one | ~NETIF_F_ALL_FOR_ALL;
8014
8015
8016 if (all & NETIF_F_HW_CSUM)
8017 all &= ~(NETIF_F_CSUM_MASK & ~NETIF_F_HW_CSUM);
8018
8019 return all;
8020}
8021EXPORT_SYMBOL(netdev_increment_features);
8022
8023static struct hlist_head * __net_init netdev_create_hash(void)
8024{
8025 int i;
8026 struct hlist_head *hash;
8027
8028 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
8029 if (hash != NULL)
8030 for (i = 0; i < NETDEV_HASHENTRIES; i++)
8031 INIT_HLIST_HEAD(&hash[i]);
8032
8033 return hash;
8034}
8035
8036
8037static int __net_init netdev_init(struct net *net)
8038{
8039 if (net != &init_net)
8040 INIT_LIST_HEAD(&net->dev_base_head);
8041
8042 net->dev_name_head = netdev_create_hash();
8043 if (net->dev_name_head == NULL)
8044 goto err_name;
8045
8046 net->dev_index_head = netdev_create_hash();
8047 if (net->dev_index_head == NULL)
8048 goto err_idx;
8049
8050 return 0;
8051
8052err_idx:
8053 kfree(net->dev_name_head);
8054err_name:
8055 return -ENOMEM;
8056}
8057
8058
8059
8060
8061
8062
8063
8064const char *netdev_drivername(const struct net_device *dev)
8065{
8066 const struct device_driver *driver;
8067 const struct device *parent;
8068 const char *empty = "";
8069
8070 parent = dev->dev.parent;
8071 if (!parent)
8072 return empty;
8073
8074 driver = parent->driver;
8075 if (driver && driver->name)
8076 return driver->name;
8077 return empty;
8078}
8079
8080static void __netdev_printk(const char *level, const struct net_device *dev,
8081 struct va_format *vaf)
8082{
8083 if (dev && dev->dev.parent) {
8084 dev_printk_emit(level[1] - '0',
8085 dev->dev.parent,
8086 "%s %s %s%s: %pV",
8087 dev_driver_string(dev->dev.parent),
8088 dev_name(dev->dev.parent),
8089 netdev_name(dev), netdev_reg_state(dev),
8090 vaf);
8091 } else if (dev) {
8092 printk("%s%s%s: %pV",
8093 level, netdev_name(dev), netdev_reg_state(dev), vaf);
8094 } else {
8095 printk("%s(NULL net_device): %pV", level, vaf);
8096 }
8097}
8098
8099void netdev_printk(const char *level, const struct net_device *dev,
8100 const char *format, ...)
8101{
8102 struct va_format vaf;
8103 va_list args;
8104
8105 va_start(args, format);
8106
8107 vaf.fmt = format;
8108 vaf.va = &args;
8109
8110 __netdev_printk(level, dev, &vaf);
8111
8112 va_end(args);
8113}
8114EXPORT_SYMBOL(netdev_printk);
8115
8116#define define_netdev_printk_level(func, level) \
8117void func(const struct net_device *dev, const char *fmt, ...) \
8118{ \
8119 struct va_format vaf; \
8120 va_list args; \
8121 \
8122 va_start(args, fmt); \
8123 \
8124 vaf.fmt = fmt; \
8125 vaf.va = &args; \
8126 \
8127 __netdev_printk(level, dev, &vaf); \
8128 \
8129 va_end(args); \
8130} \
8131EXPORT_SYMBOL(func);
8132
8133define_netdev_printk_level(netdev_emerg, KERN_EMERG);
8134define_netdev_printk_level(netdev_alert, KERN_ALERT);
8135define_netdev_printk_level(netdev_crit, KERN_CRIT);
8136define_netdev_printk_level(netdev_err, KERN_ERR);
8137define_netdev_printk_level(netdev_warn, KERN_WARNING);
8138define_netdev_printk_level(netdev_notice, KERN_NOTICE);
8139define_netdev_printk_level(netdev_info, KERN_INFO);
8140
8141static void __net_exit netdev_exit(struct net *net)
8142{
8143 kfree(net->dev_name_head);
8144 kfree(net->dev_index_head);
8145}
8146
8147static struct pernet_operations __net_initdata netdev_net_ops = {
8148 .init = netdev_init,
8149 .exit = netdev_exit,
8150};
8151
8152static void __net_exit default_device_exit(struct net *net)
8153{
8154 struct net_device *dev, *aux;
8155
8156
8157
8158
8159 rtnl_lock();
8160 for_each_netdev_safe(net, dev, aux) {
8161 int err;
8162 char fb_name[IFNAMSIZ];
8163
8164
8165 if (dev->features & NETIF_F_NETNS_LOCAL)
8166 continue;
8167
8168
8169 if (dev->rtnl_link_ops)
8170 continue;
8171
8172
8173 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
8174 err = dev_change_net_namespace(dev, &init_net, fb_name);
8175 if (err) {
8176 pr_emerg("%s: failed to move %s to init_net: %d\n",
8177 __func__, dev->name, err);
8178 BUG();
8179 }
8180 }
8181 rtnl_unlock();
8182}
8183
8184static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
8185{
8186
8187
8188
8189 struct net *net;
8190 bool unregistering;
8191 DEFINE_WAIT_FUNC(wait, woken_wake_function);
8192
8193 add_wait_queue(&netdev_unregistering_wq, &wait);
8194 for (;;) {
8195 unregistering = false;
8196 rtnl_lock();
8197 list_for_each_entry(net, net_list, exit_list) {
8198 if (net->dev_unreg_count > 0) {
8199 unregistering = true;
8200 break;
8201 }
8202 }
8203 if (!unregistering)
8204 break;
8205 __rtnl_unlock();
8206
8207 wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
8208 }
8209 remove_wait_queue(&netdev_unregistering_wq, &wait);
8210}
8211
8212static void __net_exit default_device_exit_batch(struct list_head *net_list)
8213{
8214
8215
8216
8217
8218
8219 struct net_device *dev;
8220 struct net *net;
8221 LIST_HEAD(dev_kill_list);
8222
8223
8224
8225
8226
8227
8228
8229
8230
8231
8232
8233
8234 rtnl_lock_unregistering(net_list);
8235 list_for_each_entry(net, net_list, exit_list) {
8236 for_each_netdev_reverse(net, dev) {
8237 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
8238 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
8239 else
8240 unregister_netdevice_queue(dev, &dev_kill_list);
8241 }
8242 }
8243 unregister_netdevice_many(&dev_kill_list);
8244 rtnl_unlock();
8245}
8246
8247static struct pernet_operations __net_initdata default_device_ops = {
8248 .exit = default_device_exit,
8249 .exit_batch = default_device_exit_batch,
8250};
8251
8252
8253
8254
8255
8256
8257
8258
8259
8260
8261
8262
8263static int __init net_dev_init(void)
8264{
8265 int i, rc = -ENOMEM;
8266
8267 BUG_ON(!dev_boot_phase);
8268
8269 if (dev_proc_init())
8270 goto out;
8271
8272 if (netdev_kobject_init())
8273 goto out;
8274
8275 INIT_LIST_HEAD(&ptype_all);
8276 for (i = 0; i < PTYPE_HASH_SIZE; i++)
8277 INIT_LIST_HEAD(&ptype_base[i]);
8278
8279 INIT_LIST_HEAD(&offload_base);
8280
8281 if (register_pernet_subsys(&netdev_net_ops))
8282 goto out;
8283
8284
8285
8286
8287
8288 for_each_possible_cpu(i) {
8289 struct softnet_data *sd = &per_cpu(softnet_data, i);
8290
8291 skb_queue_head_init(&sd->input_pkt_queue);
8292 skb_queue_head_init(&sd->process_queue);
8293 INIT_LIST_HEAD(&sd->poll_list);
8294 sd->output_queue_tailp = &sd->output_queue;
8295#ifdef CONFIG_RPS
8296 sd->csd.func = rps_trigger_softirq;
8297 sd->csd.info = sd;
8298 sd->cpu = i;
8299#endif
8300
8301 sd->backlog.poll = process_backlog;
8302 sd->backlog.weight = weight_p;
8303 }
8304
8305 dev_boot_phase = 0;
8306
8307
8308
8309
8310
8311
8312
8313
8314
8315
8316 if (register_pernet_device(&loopback_net_ops))
8317 goto out;
8318
8319 if (register_pernet_device(&default_device_ops))
8320 goto out;
8321
8322 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
8323 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
8324
8325 hotcpu_notifier(dev_cpu_callback, 0);
8326 dst_subsys_init();
8327 rc = 0;
8328out:
8329 return rc;
8330}
8331
8332subsys_initcall(net_dev_init);
8333