1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <linux/bitops.h>
77#include <linux/capability.h>
78#include <linux/cpu.h>
79#include <linux/types.h>
80#include <linux/kernel.h>
81#include <linux/hash.h>
82#include <linux/slab.h>
83#include <linux/sched.h>
84#include <linux/mutex.h>
85#include <linux/string.h>
86#include <linux/mm.h>
87#include <linux/socket.h>
88#include <linux/sockios.h>
89#include <linux/errno.h>
90#include <linux/interrupt.h>
91#include <linux/if_ether.h>
92#include <linux/netdevice.h>
93#include <linux/etherdevice.h>
94#include <linux/ethtool.h>
95#include <linux/notifier.h>
96#include <linux/skbuff.h>
97#include <net/net_namespace.h>
98#include <net/sock.h>
99#include <linux/rtnetlink.h>
100#include <linux/proc_fs.h>
101#include <linux/seq_file.h>
102#include <linux/stat.h>
103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <net/xfrm.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
114#include <net/wext.h>
115#include <net/iw_handler.h>
116#include <asm/current.h>
117#include <linux/audit.h>
118#include <linux/dmaengine.h>
119#include <linux/err.h>
120#include <linux/ctype.h>
121#include <linux/if_arp.h>
122#include <linux/if_vlan.h>
123#include <linux/ip.h>
124#include <net/ip.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
127#include <linux/jhash.h>
128#include <linux/random.h>
129#include <trace/events/napi.h>
130#include <trace/events/net.h>
131#include <trace/events/skb.h>
132#include <linux/pci.h>
133#include <linux/inetdevice.h>
134#include <linux/cpu_rmap.h>
135#include <linux/net_tstamp.h>
136#include <linux/static_key.h>
137#include <net/flow_keys.h>
138
139#include "net-sysfs.h"
140
141
142#define MAX_GRO_SKBS 8
143
144
145#define GRO_MAX_HEAD (MAX_HEADER + 128)
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175#define PTYPE_HASH_SIZE (16)
176#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
177
178static DEFINE_SPINLOCK(ptype_lock);
179static DEFINE_SPINLOCK(offload_lock);
180static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
181static struct list_head ptype_all __read_mostly;
182static struct list_head offload_base __read_mostly;
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203DEFINE_RWLOCK(dev_base_lock);
204EXPORT_SYMBOL(dev_base_lock);
205
206seqcount_t devnet_rename_seq;
207
208static inline void dev_base_seq_inc(struct net *net)
209{
210 while (++net->dev_base_seq == 0);
211}
212
213static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
214{
215 unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
216
217 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
218}
219
220static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
221{
222 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
223}
224
225static inline void rps_lock(struct softnet_data *sd)
226{
227#ifdef CONFIG_RPS
228 spin_lock(&sd->input_pkt_queue.lock);
229#endif
230}
231
232static inline void rps_unlock(struct softnet_data *sd)
233{
234#ifdef CONFIG_RPS
235 spin_unlock(&sd->input_pkt_queue.lock);
236#endif
237}
238
239
240static int list_netdevice(struct net_device *dev)
241{
242 struct net *net = dev_net(dev);
243
244 ASSERT_RTNL();
245
246 write_lock_bh(&dev_base_lock);
247 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
248 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
249 hlist_add_head_rcu(&dev->index_hlist,
250 dev_index_hash(net, dev->ifindex));
251 write_unlock_bh(&dev_base_lock);
252
253 dev_base_seq_inc(net);
254
255 return 0;
256}
257
258
259
260
261static void unlist_netdevice(struct net_device *dev)
262{
263 ASSERT_RTNL();
264
265
266 write_lock_bh(&dev_base_lock);
267 list_del_rcu(&dev->dev_list);
268 hlist_del_rcu(&dev->name_hlist);
269 hlist_del_rcu(&dev->index_hlist);
270 write_unlock_bh(&dev_base_lock);
271
272 dev_base_seq_inc(dev_net(dev));
273}
274
275
276
277
278
279static RAW_NOTIFIER_HEAD(netdev_chain);
280
281
282
283
284
285
286DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
287EXPORT_PER_CPU_SYMBOL(softnet_data);
288
289#ifdef CONFIG_LOCKDEP
290
291
292
293
294static const unsigned short netdev_lock_type[] =
295 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
296 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
297 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
298 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
299 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
300 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
301 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
302 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
303 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
304 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
305 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
306 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
307 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
308 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
309 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
310
311static const char *const netdev_lock_name[] =
312 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
313 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
314 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
315 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
316 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
317 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
318 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
319 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
320 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
321 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
322 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
323 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
324 "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
325 "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
326 "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
327
328static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
329static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
330
331static inline unsigned short netdev_lock_pos(unsigned short dev_type)
332{
333 int i;
334
335 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
336 if (netdev_lock_type[i] == dev_type)
337 return i;
338
339 return ARRAY_SIZE(netdev_lock_type) - 1;
340}
341
342static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
343 unsigned short dev_type)
344{
345 int i;
346
347 i = netdev_lock_pos(dev_type);
348 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
349 netdev_lock_name[i]);
350}
351
352static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
353{
354 int i;
355
356 i = netdev_lock_pos(dev->type);
357 lockdep_set_class_and_name(&dev->addr_list_lock,
358 &netdev_addr_lock_key[i],
359 netdev_lock_name[i]);
360}
361#else
362static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
363 unsigned short dev_type)
364{
365}
366static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
367{
368}
369#endif
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393static inline struct list_head *ptype_head(const struct packet_type *pt)
394{
395 if (pt->type == htons(ETH_P_ALL))
396 return &ptype_all;
397 else
398 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
399}
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414void dev_add_pack(struct packet_type *pt)
415{
416 struct list_head *head = ptype_head(pt);
417
418 spin_lock(&ptype_lock);
419 list_add_rcu(&pt->list, head);
420 spin_unlock(&ptype_lock);
421}
422EXPORT_SYMBOL(dev_add_pack);
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437void __dev_remove_pack(struct packet_type *pt)
438{
439 struct list_head *head = ptype_head(pt);
440 struct packet_type *pt1;
441
442 spin_lock(&ptype_lock);
443
444 list_for_each_entry(pt1, head, list) {
445 if (pt == pt1) {
446 list_del_rcu(&pt->list);
447 goto out;
448 }
449 }
450
451 pr_warn("dev_remove_pack: %p not found\n", pt);
452out:
453 spin_unlock(&ptype_lock);
454}
455EXPORT_SYMBOL(__dev_remove_pack);
456
457
458
459
460
461
462
463
464
465
466
467
468
469void dev_remove_pack(struct packet_type *pt)
470{
471 __dev_remove_pack(pt);
472
473 synchronize_net();
474}
475EXPORT_SYMBOL(dev_remove_pack);
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490void dev_add_offload(struct packet_offload *po)
491{
492 struct list_head *head = &offload_base;
493
494 spin_lock(&offload_lock);
495 list_add_rcu(&po->list, head);
496 spin_unlock(&offload_lock);
497}
498EXPORT_SYMBOL(dev_add_offload);
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513void __dev_remove_offload(struct packet_offload *po)
514{
515 struct list_head *head = &offload_base;
516 struct packet_offload *po1;
517
518 spin_lock(&offload_lock);
519
520 list_for_each_entry(po1, head, list) {
521 if (po == po1) {
522 list_del_rcu(&po->list);
523 goto out;
524 }
525 }
526
527 pr_warn("dev_remove_offload: %p not found\n", po);
528out:
529 spin_unlock(&offload_lock);
530}
531EXPORT_SYMBOL(__dev_remove_offload);
532
533
534
535
536
537
538
539
540
541
542
543
544
545void dev_remove_offload(struct packet_offload *po)
546{
547 __dev_remove_offload(po);
548
549 synchronize_net();
550}
551EXPORT_SYMBOL(dev_remove_offload);
552
553
554
555
556
557
558
559
560static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
561
562
563
564
565
566
567
568
569
570
571static int netdev_boot_setup_add(char *name, struct ifmap *map)
572{
573 struct netdev_boot_setup *s;
574 int i;
575
576 s = dev_boot_setup;
577 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
578 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
579 memset(s[i].name, 0, sizeof(s[i].name));
580 strlcpy(s[i].name, name, IFNAMSIZ);
581 memcpy(&s[i].map, map, sizeof(s[i].map));
582 break;
583 }
584 }
585
586 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
587}
588
589
590
591
592
593
594
595
596
597
598int netdev_boot_setup_check(struct net_device *dev)
599{
600 struct netdev_boot_setup *s = dev_boot_setup;
601 int i;
602
603 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
604 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
605 !strcmp(dev->name, s[i].name)) {
606 dev->irq = s[i].map.irq;
607 dev->base_addr = s[i].map.base_addr;
608 dev->mem_start = s[i].map.mem_start;
609 dev->mem_end = s[i].map.mem_end;
610 return 1;
611 }
612 }
613 return 0;
614}
615EXPORT_SYMBOL(netdev_boot_setup_check);
616
617
618
619
620
621
622
623
624
625
626
627
628unsigned long netdev_boot_base(const char *prefix, int unit)
629{
630 const struct netdev_boot_setup *s = dev_boot_setup;
631 char name[IFNAMSIZ];
632 int i;
633
634 sprintf(name, "%s%d", prefix, unit);
635
636
637
638
639
640 if (__dev_get_by_name(&init_net, name))
641 return 1;
642
643 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
644 if (!strcmp(name, s[i].name))
645 return s[i].map.base_addr;
646 return 0;
647}
648
649
650
651
652int __init netdev_boot_setup(char *str)
653{
654 int ints[5];
655 struct ifmap map;
656
657 str = get_options(str, ARRAY_SIZE(ints), ints);
658 if (!str || !*str)
659 return 0;
660
661
662 memset(&map, 0, sizeof(map));
663 if (ints[0] > 0)
664 map.irq = ints[1];
665 if (ints[0] > 1)
666 map.base_addr = ints[2];
667 if (ints[0] > 2)
668 map.mem_start = ints[3];
669 if (ints[0] > 3)
670 map.mem_end = ints[4];
671
672
673 return netdev_boot_setup_add(str, &map);
674}
675
676__setup("netdev=", netdev_boot_setup);
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696struct net_device *__dev_get_by_name(struct net *net, const char *name)
697{
698 struct hlist_node *p;
699 struct net_device *dev;
700 struct hlist_head *head = dev_name_hash(net, name);
701
702 hlist_for_each_entry(dev, p, head, name_hlist)
703 if (!strncmp(dev->name, name, IFNAMSIZ))
704 return dev;
705
706 return NULL;
707}
708EXPORT_SYMBOL(__dev_get_by_name);
709
710
711
712
713
714
715
716
717
718
719
720
721
722struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
723{
724 struct hlist_node *p;
725 struct net_device *dev;
726 struct hlist_head *head = dev_name_hash(net, name);
727
728 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
729 if (!strncmp(dev->name, name, IFNAMSIZ))
730 return dev;
731
732 return NULL;
733}
734EXPORT_SYMBOL(dev_get_by_name_rcu);
735
736
737
738
739
740
741
742
743
744
745
746
747
748struct net_device *dev_get_by_name(struct net *net, const char *name)
749{
750 struct net_device *dev;
751
752 rcu_read_lock();
753 dev = dev_get_by_name_rcu(net, name);
754 if (dev)
755 dev_hold(dev);
756 rcu_read_unlock();
757 return dev;
758}
759EXPORT_SYMBOL(dev_get_by_name);
760
761
762
763
764
765
766
767
768
769
770
771
772
773struct net_device *__dev_get_by_index(struct net *net, int ifindex)
774{
775 struct hlist_node *p;
776 struct net_device *dev;
777 struct hlist_head *head = dev_index_hash(net, ifindex);
778
779 hlist_for_each_entry(dev, p, head, index_hlist)
780 if (dev->ifindex == ifindex)
781 return dev;
782
783 return NULL;
784}
785EXPORT_SYMBOL(__dev_get_by_index);
786
787
788
789
790
791
792
793
794
795
796
797
798struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
799{
800 struct hlist_node *p;
801 struct net_device *dev;
802 struct hlist_head *head = dev_index_hash(net, ifindex);
803
804 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
805 if (dev->ifindex == ifindex)
806 return dev;
807
808 return NULL;
809}
810EXPORT_SYMBOL(dev_get_by_index_rcu);
811
812
813
814
815
816
817
818
819
820
821
822
823
824struct net_device *dev_get_by_index(struct net *net, int ifindex)
825{
826 struct net_device *dev;
827
828 rcu_read_lock();
829 dev = dev_get_by_index_rcu(net, ifindex);
830 if (dev)
831 dev_hold(dev);
832 rcu_read_unlock();
833 return dev;
834}
835EXPORT_SYMBOL(dev_get_by_index);
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
852 const char *ha)
853{
854 struct net_device *dev;
855
856 for_each_netdev_rcu(net, dev)
857 if (dev->type == type &&
858 !memcmp(dev->dev_addr, ha, dev->addr_len))
859 return dev;
860
861 return NULL;
862}
863EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
864
865struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
866{
867 struct net_device *dev;
868
869 ASSERT_RTNL();
870 for_each_netdev(net, dev)
871 if (dev->type == type)
872 return dev;
873
874 return NULL;
875}
876EXPORT_SYMBOL(__dev_getfirstbyhwtype);
877
878struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
879{
880 struct net_device *dev, *ret = NULL;
881
882 rcu_read_lock();
883 for_each_netdev_rcu(net, dev)
884 if (dev->type == type) {
885 dev_hold(dev);
886 ret = dev;
887 break;
888 }
889 rcu_read_unlock();
890 return ret;
891}
892EXPORT_SYMBOL(dev_getfirstbyhwtype);
893
894
895
896
897
898
899
900
901
902
903
904
905struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
906 unsigned short mask)
907{
908 struct net_device *dev, *ret;
909
910 ret = NULL;
911 for_each_netdev_rcu(net, dev) {
912 if (((dev->flags ^ if_flags) & mask) == 0) {
913 ret = dev;
914 break;
915 }
916 }
917 return ret;
918}
919EXPORT_SYMBOL(dev_get_by_flags_rcu);
920
921
922
923
924
925
926
927
928
929bool dev_valid_name(const char *name)
930{
931 if (*name == '\0')
932 return false;
933 if (strlen(name) >= IFNAMSIZ)
934 return false;
935 if (!strcmp(name, ".") || !strcmp(name, ".."))
936 return false;
937
938 while (*name) {
939 if (*name == '/' || isspace(*name))
940 return false;
941 name++;
942 }
943 return true;
944}
945EXPORT_SYMBOL(dev_valid_name);
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962static int __dev_alloc_name(struct net *net, const char *name, char *buf)
963{
964 int i = 0;
965 const char *p;
966 const int max_netdevices = 8*PAGE_SIZE;
967 unsigned long *inuse;
968 struct net_device *d;
969
970 p = strnchr(name, IFNAMSIZ-1, '%');
971 if (p) {
972
973
974
975
976
977 if (p[1] != 'd' || strchr(p + 2, '%'))
978 return -EINVAL;
979
980
981 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
982 if (!inuse)
983 return -ENOMEM;
984
985 for_each_netdev(net, d) {
986 if (!sscanf(d->name, name, &i))
987 continue;
988 if (i < 0 || i >= max_netdevices)
989 continue;
990
991
992 snprintf(buf, IFNAMSIZ, name, i);
993 if (!strncmp(buf, d->name, IFNAMSIZ))
994 set_bit(i, inuse);
995 }
996
997 i = find_first_zero_bit(inuse, max_netdevices);
998 free_page((unsigned long) inuse);
999 }
1000
1001 if (buf != name)
1002 snprintf(buf, IFNAMSIZ, name, i);
1003 if (!__dev_get_by_name(net, buf))
1004 return i;
1005
1006
1007
1008
1009
1010 return -ENFILE;
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027int dev_alloc_name(struct net_device *dev, const char *name)
1028{
1029 char buf[IFNAMSIZ];
1030 struct net *net;
1031 int ret;
1032
1033 BUG_ON(!dev_net(dev));
1034 net = dev_net(dev);
1035 ret = __dev_alloc_name(net, name, buf);
1036 if (ret >= 0)
1037 strlcpy(dev->name, buf, IFNAMSIZ);
1038 return ret;
1039}
1040EXPORT_SYMBOL(dev_alloc_name);
1041
1042static int dev_alloc_name_ns(struct net *net,
1043 struct net_device *dev,
1044 const char *name)
1045{
1046 char buf[IFNAMSIZ];
1047 int ret;
1048
1049 ret = __dev_alloc_name(net, name, buf);
1050 if (ret >= 0)
1051 strlcpy(dev->name, buf, IFNAMSIZ);
1052 return ret;
1053}
1054
1055static int dev_get_valid_name(struct net *net,
1056 struct net_device *dev,
1057 const char *name)
1058{
1059 BUG_ON(!net);
1060
1061 if (!dev_valid_name(name))
1062 return -EINVAL;
1063
1064 if (strchr(name, '%'))
1065 return dev_alloc_name_ns(net, dev, name);
1066 else if (__dev_get_by_name(net, name))
1067 return -EEXIST;
1068 else if (dev->name != name)
1069 strlcpy(dev->name, name, IFNAMSIZ);
1070
1071 return 0;
1072}
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082int dev_change_name(struct net_device *dev, const char *newname)
1083{
1084 char oldname[IFNAMSIZ];
1085 int err = 0;
1086 int ret;
1087 struct net *net;
1088
1089 ASSERT_RTNL();
1090 BUG_ON(!dev_net(dev));
1091
1092 net = dev_net(dev);
1093 if (dev->flags & IFF_UP)
1094 return -EBUSY;
1095
1096 write_seqcount_begin(&devnet_rename_seq);
1097
1098 if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1099 write_seqcount_end(&devnet_rename_seq);
1100 return 0;
1101 }
1102
1103 memcpy(oldname, dev->name, IFNAMSIZ);
1104
1105 err = dev_get_valid_name(net, dev, newname);
1106 if (err < 0) {
1107 write_seqcount_end(&devnet_rename_seq);
1108 return err;
1109 }
1110
1111rollback:
1112 ret = device_rename(&dev->dev, dev->name);
1113 if (ret) {
1114 memcpy(dev->name, oldname, IFNAMSIZ);
1115 write_seqcount_end(&devnet_rename_seq);
1116 return ret;
1117 }
1118
1119 write_seqcount_end(&devnet_rename_seq);
1120
1121 write_lock_bh(&dev_base_lock);
1122 hlist_del_rcu(&dev->name_hlist);
1123 write_unlock_bh(&dev_base_lock);
1124
1125 synchronize_rcu();
1126
1127 write_lock_bh(&dev_base_lock);
1128 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1129 write_unlock_bh(&dev_base_lock);
1130
1131 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1132 ret = notifier_to_errno(ret);
1133
1134 if (ret) {
1135
1136 if (err >= 0) {
1137 err = ret;
1138 write_seqcount_begin(&devnet_rename_seq);
1139 memcpy(dev->name, oldname, IFNAMSIZ);
1140 goto rollback;
1141 } else {
1142 pr_err("%s: name change rollback failed: %d\n",
1143 dev->name, ret);
1144 }
1145 }
1146
1147 return err;
1148}
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1159{
1160 char *new_ifalias;
1161
1162 ASSERT_RTNL();
1163
1164 if (len >= IFALIASZ)
1165 return -EINVAL;
1166
1167 if (!len) {
1168 kfree(dev->ifalias);
1169 dev->ifalias = NULL;
1170 return 0;
1171 }
1172
1173 new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1174 if (!new_ifalias)
1175 return -ENOMEM;
1176 dev->ifalias = new_ifalias;
1177
1178 strlcpy(dev->ifalias, alias, len+1);
1179 return len;
1180}
1181
1182
1183
1184
1185
1186
1187
1188
1189void netdev_features_change(struct net_device *dev)
1190{
1191 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1192}
1193EXPORT_SYMBOL(netdev_features_change);
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203void netdev_state_change(struct net_device *dev)
1204{
1205 if (dev->flags & IFF_UP) {
1206 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1207 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1208 }
1209}
1210EXPORT_SYMBOL(netdev_state_change);
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222void netdev_notify_peers(struct net_device *dev)
1223{
1224 rtnl_lock();
1225 call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1226 rtnl_unlock();
1227}
1228EXPORT_SYMBOL(netdev_notify_peers);
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240void dev_load(struct net *net, const char *name)
1241{
1242 struct net_device *dev;
1243 int no_module;
1244
1245 rcu_read_lock();
1246 dev = dev_get_by_name_rcu(net, name);
1247 rcu_read_unlock();
1248
1249 no_module = !dev;
1250 if (no_module && capable(CAP_NET_ADMIN))
1251 no_module = request_module("netdev-%s", name);
1252 if (no_module && capable(CAP_SYS_MODULE)) {
1253 if (!request_module("%s", name))
1254 pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
1255 name);
1256 }
1257}
1258EXPORT_SYMBOL(dev_load);
1259
1260static int __dev_open(struct net_device *dev)
1261{
1262 const struct net_device_ops *ops = dev->netdev_ops;
1263 int ret;
1264
1265 ASSERT_RTNL();
1266
1267 if (!netif_device_present(dev))
1268 return -ENODEV;
1269
1270 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1271 ret = notifier_to_errno(ret);
1272 if (ret)
1273 return ret;
1274
1275 set_bit(__LINK_STATE_START, &dev->state);
1276
1277 if (ops->ndo_validate_addr)
1278 ret = ops->ndo_validate_addr(dev);
1279
1280 if (!ret && ops->ndo_open)
1281 ret = ops->ndo_open(dev);
1282
1283 if (ret)
1284 clear_bit(__LINK_STATE_START, &dev->state);
1285 else {
1286 dev->flags |= IFF_UP;
1287 net_dmaengine_get();
1288 dev_set_rx_mode(dev);
1289 dev_activate(dev);
1290 add_device_randomness(dev->dev_addr, dev->addr_len);
1291 }
1292
1293 return ret;
1294}
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308int dev_open(struct net_device *dev)
1309{
1310 int ret;
1311
1312 if (dev->flags & IFF_UP)
1313 return 0;
1314
1315 ret = __dev_open(dev);
1316 if (ret < 0)
1317 return ret;
1318
1319 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1320 call_netdevice_notifiers(NETDEV_UP, dev);
1321
1322 return ret;
1323}
1324EXPORT_SYMBOL(dev_open);
1325
1326static int __dev_close_many(struct list_head *head)
1327{
1328 struct net_device *dev;
1329
1330 ASSERT_RTNL();
1331 might_sleep();
1332
1333 list_for_each_entry(dev, head, unreg_list) {
1334 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1335
1336 clear_bit(__LINK_STATE_START, &dev->state);
1337
1338
1339
1340
1341
1342
1343
1344 smp_mb__after_clear_bit();
1345 }
1346
1347 dev_deactivate_many(head);
1348
1349 list_for_each_entry(dev, head, unreg_list) {
1350 const struct net_device_ops *ops = dev->netdev_ops;
1351
1352
1353
1354
1355
1356
1357
1358
1359 if (ops->ndo_stop)
1360 ops->ndo_stop(dev);
1361
1362 dev->flags &= ~IFF_UP;
1363 net_dmaengine_put();
1364 }
1365
1366 return 0;
1367}
1368
1369static int __dev_close(struct net_device *dev)
1370{
1371 int retval;
1372 LIST_HEAD(single);
1373
1374 list_add(&dev->unreg_list, &single);
1375 retval = __dev_close_many(&single);
1376 list_del(&single);
1377 return retval;
1378}
1379
1380static int dev_close_many(struct list_head *head)
1381{
1382 struct net_device *dev, *tmp;
1383 LIST_HEAD(tmp_list);
1384
1385 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1386 if (!(dev->flags & IFF_UP))
1387 list_move(&dev->unreg_list, &tmp_list);
1388
1389 __dev_close_many(head);
1390
1391 list_for_each_entry(dev, head, unreg_list) {
1392 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1393 call_netdevice_notifiers(NETDEV_DOWN, dev);
1394 }
1395
1396
1397 list_splice(&tmp_list, head);
1398 return 0;
1399}
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410int dev_close(struct net_device *dev)
1411{
1412 if (dev->flags & IFF_UP) {
1413 LIST_HEAD(single);
1414
1415 list_add(&dev->unreg_list, &single);
1416 dev_close_many(&single);
1417 list_del(&single);
1418 }
1419 return 0;
1420}
1421EXPORT_SYMBOL(dev_close);
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432void dev_disable_lro(struct net_device *dev)
1433{
1434
1435
1436
1437
1438 if (is_vlan_dev(dev))
1439 dev = vlan_dev_real_dev(dev);
1440
1441 dev->wanted_features &= ~NETIF_F_LRO;
1442 netdev_update_features(dev);
1443
1444 if (unlikely(dev->features & NETIF_F_LRO))
1445 netdev_WARN(dev, "failed to disable LRO!\n");
1446}
1447EXPORT_SYMBOL(dev_disable_lro);
1448
1449
1450static int dev_boot_phase = 1;
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466int register_netdevice_notifier(struct notifier_block *nb)
1467{
1468 struct net_device *dev;
1469 struct net_device *last;
1470 struct net *net;
1471 int err;
1472
1473 rtnl_lock();
1474 err = raw_notifier_chain_register(&netdev_chain, nb);
1475 if (err)
1476 goto unlock;
1477 if (dev_boot_phase)
1478 goto unlock;
1479 for_each_net(net) {
1480 for_each_netdev(net, dev) {
1481 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1482 err = notifier_to_errno(err);
1483 if (err)
1484 goto rollback;
1485
1486 if (!(dev->flags & IFF_UP))
1487 continue;
1488
1489 nb->notifier_call(nb, NETDEV_UP, dev);
1490 }
1491 }
1492
1493unlock:
1494 rtnl_unlock();
1495 return err;
1496
1497rollback:
1498 last = dev;
1499 for_each_net(net) {
1500 for_each_netdev(net, dev) {
1501 if (dev == last)
1502 goto outroll;
1503
1504 if (dev->flags & IFF_UP) {
1505 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1506 nb->notifier_call(nb, NETDEV_DOWN, dev);
1507 }
1508 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1509 }
1510 }
1511
1512outroll:
1513 raw_notifier_chain_unregister(&netdev_chain, nb);
1514 goto unlock;
1515}
1516EXPORT_SYMBOL(register_netdevice_notifier);
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532int unregister_netdevice_notifier(struct notifier_block *nb)
1533{
1534 struct net_device *dev;
1535 struct net *net;
1536 int err;
1537
1538 rtnl_lock();
1539 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1540 if (err)
1541 goto unlock;
1542
1543 for_each_net(net) {
1544 for_each_netdev(net, dev) {
1545 if (dev->flags & IFF_UP) {
1546 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1547 nb->notifier_call(nb, NETDEV_DOWN, dev);
1548 }
1549 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1550 }
1551 }
1552unlock:
1553 rtnl_unlock();
1554 return err;
1555}
1556EXPORT_SYMBOL(unregister_netdevice_notifier);
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1568{
1569 ASSERT_RTNL();
1570 return raw_notifier_call_chain(&netdev_chain, val, dev);
1571}
1572EXPORT_SYMBOL(call_netdevice_notifiers);
1573
1574static struct static_key netstamp_needed __read_mostly;
1575#ifdef HAVE_JUMP_LABEL
1576
1577
1578
1579
1580static atomic_t netstamp_needed_deferred;
1581#endif
1582
1583void net_enable_timestamp(void)
1584{
1585#ifdef HAVE_JUMP_LABEL
1586 int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
1587
1588 if (deferred) {
1589 while (--deferred)
1590 static_key_slow_dec(&netstamp_needed);
1591 return;
1592 }
1593#endif
1594 WARN_ON(in_interrupt());
1595 static_key_slow_inc(&netstamp_needed);
1596}
1597EXPORT_SYMBOL(net_enable_timestamp);
1598
1599void net_disable_timestamp(void)
1600{
1601#ifdef HAVE_JUMP_LABEL
1602 if (in_interrupt()) {
1603 atomic_inc(&netstamp_needed_deferred);
1604 return;
1605 }
1606#endif
1607 static_key_slow_dec(&netstamp_needed);
1608}
1609EXPORT_SYMBOL(net_disable_timestamp);
1610
1611static inline void net_timestamp_set(struct sk_buff *skb)
1612{
1613 skb->tstamp.tv64 = 0;
1614 if (static_key_false(&netstamp_needed))
1615 __net_timestamp(skb);
1616}
1617
1618#define net_timestamp_check(COND, SKB) \
1619 if (static_key_false(&netstamp_needed)) { \
1620 if ((COND) && !(SKB)->tstamp.tv64) \
1621 __net_timestamp(SKB); \
1622 } \
1623
1624static int net_hwtstamp_validate(struct ifreq *ifr)
1625{
1626 struct hwtstamp_config cfg;
1627 enum hwtstamp_tx_types tx_type;
1628 enum hwtstamp_rx_filters rx_filter;
1629 int tx_type_valid = 0;
1630 int rx_filter_valid = 0;
1631
1632 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1633 return -EFAULT;
1634
1635 if (cfg.flags)
1636 return -EINVAL;
1637
1638 tx_type = cfg.tx_type;
1639 rx_filter = cfg.rx_filter;
1640
1641 switch (tx_type) {
1642 case HWTSTAMP_TX_OFF:
1643 case HWTSTAMP_TX_ON:
1644 case HWTSTAMP_TX_ONESTEP_SYNC:
1645 tx_type_valid = 1;
1646 break;
1647 }
1648
1649 switch (rx_filter) {
1650 case HWTSTAMP_FILTER_NONE:
1651 case HWTSTAMP_FILTER_ALL:
1652 case HWTSTAMP_FILTER_SOME:
1653 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1654 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1655 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1656 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1657 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1658 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1659 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1660 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1661 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1662 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1663 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1664 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1665 rx_filter_valid = 1;
1666 break;
1667 }
1668
1669 if (!tx_type_valid || !rx_filter_valid)
1670 return -ERANGE;
1671
1672 return 0;
1673}
1674
1675static inline bool is_skb_forwardable(struct net_device *dev,
1676 struct sk_buff *skb)
1677{
1678 unsigned int len;
1679
1680 if (!(dev->flags & IFF_UP))
1681 return false;
1682
1683 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1684 if (skb->len <= len)
1685 return true;
1686
1687
1688
1689
1690 if (skb_is_gso(skb))
1691 return true;
1692
1693 return false;
1694}
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1715{
1716 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1717 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1718 atomic_long_inc(&dev->rx_dropped);
1719 kfree_skb(skb);
1720 return NET_RX_DROP;
1721 }
1722 }
1723
1724 skb_orphan(skb);
1725 nf_reset(skb);
1726
1727 if (unlikely(!is_skb_forwardable(dev, skb))) {
1728 atomic_long_inc(&dev->rx_dropped);
1729 kfree_skb(skb);
1730 return NET_RX_DROP;
1731 }
1732 skb->skb_iif = 0;
1733 skb->dev = dev;
1734 skb_dst_drop(skb);
1735 skb->tstamp.tv64 = 0;
1736 skb->pkt_type = PACKET_HOST;
1737 skb->protocol = eth_type_trans(skb, dev);
1738 skb->mark = 0;
1739 secpath_reset(skb);
1740 nf_reset(skb);
1741 return netif_rx(skb);
1742}
1743EXPORT_SYMBOL_GPL(dev_forward_skb);
1744
1745static inline int deliver_skb(struct sk_buff *skb,
1746 struct packet_type *pt_prev,
1747 struct net_device *orig_dev)
1748{
1749 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
1750 return -ENOMEM;
1751 atomic_inc(&skb->users);
1752 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1753}
1754
1755static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
1756{
1757 if (!ptype->af_packet_priv || !skb->sk)
1758 return false;
1759
1760 if (ptype->id_match)
1761 return ptype->id_match(ptype, skb->sk);
1762 else if ((struct sock *)ptype->af_packet_priv == skb->sk)
1763 return true;
1764
1765 return false;
1766}
1767
1768
1769
1770
1771
1772
1773static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1774{
1775 struct packet_type *ptype;
1776 struct sk_buff *skb2 = NULL;
1777 struct packet_type *pt_prev = NULL;
1778
1779 rcu_read_lock();
1780 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1781
1782
1783
1784 if ((ptype->dev == dev || !ptype->dev) &&
1785 (!skb_loop_sk(ptype, skb))) {
1786 if (pt_prev) {
1787 deliver_skb(skb2, pt_prev, skb->dev);
1788 pt_prev = ptype;
1789 continue;
1790 }
1791
1792 skb2 = skb_clone(skb, GFP_ATOMIC);
1793 if (!skb2)
1794 break;
1795
1796 net_timestamp_set(skb2);
1797
1798
1799
1800
1801
1802 skb_reset_mac_header(skb2);
1803
1804 if (skb_network_header(skb2) < skb2->data ||
1805 skb2->network_header > skb2->tail) {
1806 net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
1807 ntohs(skb2->protocol),
1808 dev->name);
1809 skb_reset_network_header(skb2);
1810 }
1811
1812 skb2->transport_header = skb2->network_header;
1813 skb2->pkt_type = PACKET_OUTGOING;
1814 pt_prev = ptype;
1815 }
1816 }
1817 if (pt_prev)
1818 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1819 rcu_read_unlock();
1820}
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1836{
1837 int i;
1838 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1839
1840
1841 if (tc->offset + tc->count > txq) {
1842 pr_warn("Number of in use tx queues changed invalidating tc mappings. Priority traffic classification disabled!\n");
1843 dev->num_tc = 0;
1844 return;
1845 }
1846
1847
1848 for (i = 1; i < TC_BITMASK + 1; i++) {
1849 int q = netdev_get_prio_tc_map(dev, i);
1850
1851 tc = &dev->tc_to_txq[q];
1852 if (tc->offset + tc->count > txq) {
1853 pr_warn("Number of in use tx queues changed. Priority %i to tc mapping %i is no longer valid. Setting map to 0\n",
1854 i, q);
1855 netdev_set_prio_tc_map(dev, i, 0);
1856 }
1857 }
1858}
1859
1860
1861
1862
1863
1864int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1865{
1866 int rc;
1867
1868 if (txq < 1 || txq > dev->num_tx_queues)
1869 return -EINVAL;
1870
1871 if (dev->reg_state == NETREG_REGISTERED ||
1872 dev->reg_state == NETREG_UNREGISTERING) {
1873 ASSERT_RTNL();
1874
1875 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1876 txq);
1877 if (rc)
1878 return rc;
1879
1880 if (dev->num_tc)
1881 netif_setup_tc(dev, txq);
1882
1883 if (txq < dev->real_num_tx_queues)
1884 qdisc_reset_all_tx_gt(dev, txq);
1885 }
1886
1887 dev->real_num_tx_queues = txq;
1888 return 0;
1889}
1890EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1891
1892#ifdef CONFIG_RPS
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1904{
1905 int rc;
1906
1907 if (rxq < 1 || rxq > dev->num_rx_queues)
1908 return -EINVAL;
1909
1910 if (dev->reg_state == NETREG_REGISTERED) {
1911 ASSERT_RTNL();
1912
1913 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1914 rxq);
1915 if (rc)
1916 return rc;
1917 }
1918
1919 dev->real_num_rx_queues = rxq;
1920 return 0;
1921}
1922EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1923#endif
1924
1925
1926
1927
1928
1929
1930
1931int netif_get_num_default_rss_queues(void)
1932{
1933 return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
1934}
1935EXPORT_SYMBOL(netif_get_num_default_rss_queues);
1936
1937static inline void __netif_reschedule(struct Qdisc *q)
1938{
1939 struct softnet_data *sd;
1940 unsigned long flags;
1941
1942 local_irq_save(flags);
1943 sd = &__get_cpu_var(softnet_data);
1944 q->next_sched = NULL;
1945 *sd->output_queue_tailp = q;
1946 sd->output_queue_tailp = &q->next_sched;
1947 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1948 local_irq_restore(flags);
1949}
1950
1951void __netif_schedule(struct Qdisc *q)
1952{
1953 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1954 __netif_reschedule(q);
1955}
1956EXPORT_SYMBOL(__netif_schedule);
1957
1958void dev_kfree_skb_irq(struct sk_buff *skb)
1959{
1960 if (atomic_dec_and_test(&skb->users)) {
1961 struct softnet_data *sd;
1962 unsigned long flags;
1963
1964 local_irq_save(flags);
1965 sd = &__get_cpu_var(softnet_data);
1966 skb->next = sd->completion_queue;
1967 sd->completion_queue = skb;
1968 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1969 local_irq_restore(flags);
1970 }
1971}
1972EXPORT_SYMBOL(dev_kfree_skb_irq);
1973
1974void dev_kfree_skb_any(struct sk_buff *skb)
1975{
1976 if (in_irq() || irqs_disabled())
1977 dev_kfree_skb_irq(skb);
1978 else
1979 dev_kfree_skb(skb);
1980}
1981EXPORT_SYMBOL(dev_kfree_skb_any);
1982
1983
1984
1985
1986
1987
1988
1989
1990void netif_device_detach(struct net_device *dev)
1991{
1992 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1993 netif_running(dev)) {
1994 netif_tx_stop_all_queues(dev);
1995 }
1996}
1997EXPORT_SYMBOL(netif_device_detach);
1998
1999
2000
2001
2002
2003
2004
2005void netif_device_attach(struct net_device *dev)
2006{
2007 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2008 netif_running(dev)) {
2009 netif_tx_wake_all_queues(dev);
2010 __netdev_watchdog_up(dev);
2011 }
2012}
2013EXPORT_SYMBOL(netif_device_attach);
2014
2015static void skb_warn_bad_offload(const struct sk_buff *skb)
2016{
2017 static const netdev_features_t null_features = 0;
2018 struct net_device *dev = skb->dev;
2019 const char *driver = "";
2020
2021 if (dev && dev->dev.parent)
2022 driver = dev_driver_string(dev->dev.parent);
2023
2024 WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d gso_size=%d "
2025 "gso_type=%d ip_summed=%d\n",
2026 driver, dev ? &dev->features : &null_features,
2027 skb->sk ? &skb->sk->sk_route_caps : &null_features,
2028 skb->len, skb->data_len, skb_shinfo(skb)->gso_size,
2029 skb_shinfo(skb)->gso_type, skb->ip_summed);
2030}
2031
2032
2033
2034
2035
2036int skb_checksum_help(struct sk_buff *skb)
2037{
2038 __wsum csum;
2039 int ret = 0, offset;
2040
2041 if (skb->ip_summed == CHECKSUM_COMPLETE)
2042 goto out_set_summed;
2043
2044 if (unlikely(skb_shinfo(skb)->gso_size)) {
2045 skb_warn_bad_offload(skb);
2046 return -EINVAL;
2047 }
2048
2049 offset = skb_checksum_start_offset(skb);
2050 BUG_ON(offset >= skb_headlen(skb));
2051 csum = skb_checksum(skb, offset, skb->len - offset, 0);
2052
2053 offset += skb->csum_offset;
2054 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2055
2056 if (skb_cloned(skb) &&
2057 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2058 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2059 if (ret)
2060 goto out;
2061 }
2062
2063 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2064out_set_summed:
2065 skb->ip_summed = CHECKSUM_NONE;
2066out:
2067 return ret;
2068}
2069EXPORT_SYMBOL(skb_checksum_help);
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081struct sk_buff *skb_gso_segment(struct sk_buff *skb,
2082 netdev_features_t features)
2083{
2084 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2085 struct packet_offload *ptype;
2086 __be16 type = skb->protocol;
2087 int vlan_depth = ETH_HLEN;
2088 int err;
2089
2090 while (type == htons(ETH_P_8021Q)) {
2091 struct vlan_hdr *vh;
2092
2093 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
2094 return ERR_PTR(-EINVAL);
2095
2096 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
2097 type = vh->h_vlan_encapsulated_proto;
2098 vlan_depth += VLAN_HLEN;
2099 }
2100
2101 skb_reset_mac_header(skb);
2102 skb->mac_len = skb->network_header - skb->mac_header;
2103 __skb_pull(skb, skb->mac_len);
2104
2105 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2106 skb_warn_bad_offload(skb);
2107
2108 if (skb_header_cloned(skb) &&
2109 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2110 return ERR_PTR(err);
2111 }
2112
2113 rcu_read_lock();
2114 list_for_each_entry_rcu(ptype, &offload_base, list) {
2115 if (ptype->type == type && ptype->callbacks.gso_segment) {
2116 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2117 err = ptype->callbacks.gso_send_check(skb);
2118 segs = ERR_PTR(err);
2119 if (err || skb_gso_ok(skb, features))
2120 break;
2121 __skb_push(skb, (skb->data -
2122 skb_network_header(skb)));
2123 }
2124 segs = ptype->callbacks.gso_segment(skb, features);
2125 break;
2126 }
2127 }
2128 rcu_read_unlock();
2129
2130 __skb_push(skb, skb->data - skb_mac_header(skb));
2131
2132 return segs;
2133}
2134EXPORT_SYMBOL(skb_gso_segment);
2135
2136
2137#ifdef CONFIG_BUG
2138void netdev_rx_csum_fault(struct net_device *dev)
2139{
2140 if (net_ratelimit()) {
2141 pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
2142 dump_stack();
2143 }
2144}
2145EXPORT_SYMBOL(netdev_rx_csum_fault);
2146#endif
2147
2148
2149
2150
2151
2152
2153static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2154{
2155#ifdef CONFIG_HIGHMEM
2156 int i;
2157 if (!(dev->features & NETIF_F_HIGHDMA)) {
2158 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2159 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2160 if (PageHighMem(skb_frag_page(frag)))
2161 return 1;
2162 }
2163 }
2164
2165 if (PCI_DMA_BUS_IS_PHYS) {
2166 struct device *pdev = dev->dev.parent;
2167
2168 if (!pdev)
2169 return 0;
2170 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2171 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2172 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2173 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2174 return 1;
2175 }
2176 }
2177#endif
2178 return 0;
2179}
2180
2181struct dev_gso_cb {
2182 void (*destructor)(struct sk_buff *skb);
2183};
2184
2185#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2186
2187static void dev_gso_skb_destructor(struct sk_buff *skb)
2188{
2189 struct dev_gso_cb *cb;
2190
2191 do {
2192 struct sk_buff *nskb = skb->next;
2193
2194 skb->next = nskb->next;
2195 nskb->next = NULL;
2196 kfree_skb(nskb);
2197 } while (skb->next);
2198
2199 cb = DEV_GSO_CB(skb);
2200 if (cb->destructor)
2201 cb->destructor(skb);
2202}
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
2213{
2214 struct sk_buff *segs;
2215
2216 segs = skb_gso_segment(skb, features);
2217
2218
2219 if (!segs)
2220 return 0;
2221
2222 if (IS_ERR(segs))
2223 return PTR_ERR(segs);
2224
2225 skb->next = segs;
2226 DEV_GSO_CB(skb)->destructor = skb->destructor;
2227 skb->destructor = dev_gso_skb_destructor;
2228
2229 return 0;
2230}
2231
2232static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
2233{
2234 return ((features & NETIF_F_GEN_CSUM) ||
2235 ((features & NETIF_F_V4_CSUM) &&
2236 protocol == htons(ETH_P_IP)) ||
2237 ((features & NETIF_F_V6_CSUM) &&
2238 protocol == htons(ETH_P_IPV6)) ||
2239 ((features & NETIF_F_FCOE_CRC) &&
2240 protocol == htons(ETH_P_FCOE)));
2241}
2242
2243static netdev_features_t harmonize_features(struct sk_buff *skb,
2244 __be16 protocol, netdev_features_t features)
2245{
2246 if (skb->ip_summed != CHECKSUM_NONE &&
2247 !can_checksum_protocol(features, protocol)) {
2248 features &= ~NETIF_F_ALL_CSUM;
2249 features &= ~NETIF_F_SG;
2250 } else if (illegal_highdma(skb->dev, skb)) {
2251 features &= ~NETIF_F_SG;
2252 }
2253
2254 return features;
2255}
2256
2257netdev_features_t netif_skb_features(struct sk_buff *skb)
2258{
2259 __be16 protocol = skb->protocol;
2260 netdev_features_t features = skb->dev->features;
2261
2262 if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
2263 features &= ~NETIF_F_GSO_MASK;
2264
2265 if (protocol == htons(ETH_P_8021Q)) {
2266 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2267 protocol = veh->h_vlan_encapsulated_proto;
2268 } else if (!vlan_tx_tag_present(skb)) {
2269 return harmonize_features(skb, protocol, features);
2270 }
2271
2272 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2273
2274 if (protocol != htons(ETH_P_8021Q)) {
2275 return harmonize_features(skb, protocol, features);
2276 } else {
2277 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2278 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2279 return harmonize_features(skb, protocol, features);
2280 }
2281}
2282EXPORT_SYMBOL(netif_skb_features);
2283
2284
2285
2286
2287
2288
2289static inline int skb_needs_linearize(struct sk_buff *skb,
2290 int features)
2291{
2292 return skb_is_nonlinear(skb) &&
2293 ((skb_has_frag_list(skb) &&
2294 !(features & NETIF_F_FRAGLIST)) ||
2295 (skb_shinfo(skb)->nr_frags &&
2296 !(features & NETIF_F_SG)));
2297}
2298
2299int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2300 struct netdev_queue *txq)
2301{
2302 const struct net_device_ops *ops = dev->netdev_ops;
2303 int rc = NETDEV_TX_OK;
2304 unsigned int skb_len;
2305
2306 if (likely(!skb->next)) {
2307 netdev_features_t features;
2308
2309
2310
2311
2312
2313 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2314 skb_dst_drop(skb);
2315
2316 features = netif_skb_features(skb);
2317
2318 if (vlan_tx_tag_present(skb) &&
2319 !(features & NETIF_F_HW_VLAN_TX)) {
2320 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2321 if (unlikely(!skb))
2322 goto out;
2323
2324 skb->vlan_tci = 0;
2325 }
2326
2327
2328
2329
2330
2331 if (skb->encapsulation)
2332 features &= dev->hw_enc_features;
2333
2334 if (netif_needs_gso(skb, features)) {
2335 if (unlikely(dev_gso_segment(skb, features)))
2336 goto out_kfree_skb;
2337 if (skb->next)
2338 goto gso;
2339 } else {
2340 if (skb_needs_linearize(skb, features) &&
2341 __skb_linearize(skb))
2342 goto out_kfree_skb;
2343
2344
2345
2346
2347
2348 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2349 if (skb->encapsulation)
2350 skb_set_inner_transport_header(skb,
2351 skb_checksum_start_offset(skb));
2352 else
2353 skb_set_transport_header(skb,
2354 skb_checksum_start_offset(skb));
2355 if (!(features & NETIF_F_ALL_CSUM) &&
2356 skb_checksum_help(skb))
2357 goto out_kfree_skb;
2358 }
2359 }
2360
2361 if (!list_empty(&ptype_all))
2362 dev_queue_xmit_nit(skb, dev);
2363
2364 skb_len = skb->len;
2365 rc = ops->ndo_start_xmit(skb, dev);
2366 trace_net_dev_xmit(skb, rc, dev, skb_len);
2367 if (rc == NETDEV_TX_OK)
2368 txq_trans_update(txq);
2369 return rc;
2370 }
2371
2372gso:
2373 do {
2374 struct sk_buff *nskb = skb->next;
2375
2376 skb->next = nskb->next;
2377 nskb->next = NULL;
2378
2379
2380
2381
2382
2383 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2384 skb_dst_drop(nskb);
2385
2386 if (!list_empty(&ptype_all))
2387 dev_queue_xmit_nit(nskb, dev);
2388
2389 skb_len = nskb->len;
2390 rc = ops->ndo_start_xmit(nskb, dev);
2391 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2392 if (unlikely(rc != NETDEV_TX_OK)) {
2393 if (rc & ~NETDEV_TX_MASK)
2394 goto out_kfree_gso_skb;
2395 nskb->next = skb->next;
2396 skb->next = nskb;
2397 return rc;
2398 }
2399 txq_trans_update(txq);
2400 if (unlikely(netif_xmit_stopped(txq) && skb->next))
2401 return NETDEV_TX_BUSY;
2402 } while (skb->next);
2403
2404out_kfree_gso_skb:
2405 if (likely(skb->next == NULL))
2406 skb->destructor = DEV_GSO_CB(skb)->destructor;
2407out_kfree_skb:
2408 kfree_skb(skb);
2409out:
2410 return rc;
2411}
2412
2413static u32 hashrnd __read_mostly;
2414
2415
2416
2417
2418
2419u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2420 unsigned int num_tx_queues)
2421{
2422 u32 hash;
2423 u16 qoffset = 0;
2424 u16 qcount = num_tx_queues;
2425
2426 if (skb_rx_queue_recorded(skb)) {
2427 hash = skb_get_rx_queue(skb);
2428 while (unlikely(hash >= num_tx_queues))
2429 hash -= num_tx_queues;
2430 return hash;
2431 }
2432
2433 if (dev->num_tc) {
2434 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2435 qoffset = dev->tc_to_txq[tc].offset;
2436 qcount = dev->tc_to_txq[tc].count;
2437 }
2438
2439 if (skb->sk && skb->sk->sk_hash)
2440 hash = skb->sk->sk_hash;
2441 else
2442 hash = (__force u16) skb->protocol;
2443 hash = jhash_1word(hash, hashrnd);
2444
2445 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2446}
2447EXPORT_SYMBOL(__skb_tx_hash);
2448
2449static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2450{
2451 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2452 net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
2453 dev->name, queue_index,
2454 dev->real_num_tx_queues);
2455 return 0;
2456 }
2457 return queue_index;
2458}
2459
2460static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2461{
2462#ifdef CONFIG_XPS
2463 struct xps_dev_maps *dev_maps;
2464 struct xps_map *map;
2465 int queue_index = -1;
2466
2467 rcu_read_lock();
2468 dev_maps = rcu_dereference(dev->xps_maps);
2469 if (dev_maps) {
2470 map = rcu_dereference(
2471 dev_maps->cpu_map[raw_smp_processor_id()]);
2472 if (map) {
2473 if (map->len == 1)
2474 queue_index = map->queues[0];
2475 else {
2476 u32 hash;
2477 if (skb->sk && skb->sk->sk_hash)
2478 hash = skb->sk->sk_hash;
2479 else
2480 hash = (__force u16) skb->protocol ^
2481 skb->rxhash;
2482 hash = jhash_1word(hash, hashrnd);
2483 queue_index = map->queues[
2484 ((u64)hash * map->len) >> 32];
2485 }
2486 if (unlikely(queue_index >= dev->real_num_tx_queues))
2487 queue_index = -1;
2488 }
2489 }
2490 rcu_read_unlock();
2491
2492 return queue_index;
2493#else
2494 return -1;
2495#endif
2496}
2497
2498struct netdev_queue *netdev_pick_tx(struct net_device *dev,
2499 struct sk_buff *skb)
2500{
2501 int queue_index;
2502 const struct net_device_ops *ops = dev->netdev_ops;
2503
2504 if (dev->real_num_tx_queues == 1)
2505 queue_index = 0;
2506 else if (ops->ndo_select_queue) {
2507 queue_index = ops->ndo_select_queue(dev, skb);
2508 queue_index = dev_cap_txqueue(dev, queue_index);
2509 } else {
2510 struct sock *sk = skb->sk;
2511 queue_index = sk_tx_queue_get(sk);
2512
2513 if (queue_index < 0 || skb->ooo_okay ||
2514 queue_index >= dev->real_num_tx_queues) {
2515 int old_index = queue_index;
2516
2517 queue_index = get_xps_queue(dev, skb);
2518 if (queue_index < 0)
2519 queue_index = skb_tx_hash(dev, skb);
2520
2521 if (queue_index != old_index && sk) {
2522 struct dst_entry *dst =
2523 rcu_dereference_check(sk->sk_dst_cache, 1);
2524
2525 if (dst && skb_dst(skb) == dst)
2526 sk_tx_queue_set(sk, queue_index);
2527 }
2528 }
2529 }
2530
2531 skb_set_queue_mapping(skb, queue_index);
2532 return netdev_get_tx_queue(dev, queue_index);
2533}
2534
2535static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2536 struct net_device *dev,
2537 struct netdev_queue *txq)
2538{
2539 spinlock_t *root_lock = qdisc_lock(q);
2540 bool contended;
2541 int rc;
2542
2543 qdisc_skb_cb(skb)->pkt_len = skb->len;
2544 qdisc_calculate_pkt_len(skb, q);
2545
2546
2547
2548
2549
2550
2551 contended = qdisc_is_running(q);
2552 if (unlikely(contended))
2553 spin_lock(&q->busylock);
2554
2555 spin_lock(root_lock);
2556 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2557 kfree_skb(skb);
2558 rc = NET_XMIT_DROP;
2559 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2560 qdisc_run_begin(q)) {
2561
2562
2563
2564
2565
2566 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2567 skb_dst_force(skb);
2568
2569 qdisc_bstats_update(q, skb);
2570
2571 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2572 if (unlikely(contended)) {
2573 spin_unlock(&q->busylock);
2574 contended = false;
2575 }
2576 __qdisc_run(q);
2577 } else
2578 qdisc_run_end(q);
2579
2580 rc = NET_XMIT_SUCCESS;
2581 } else {
2582 skb_dst_force(skb);
2583 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2584 if (qdisc_run_begin(q)) {
2585 if (unlikely(contended)) {
2586 spin_unlock(&q->busylock);
2587 contended = false;
2588 }
2589 __qdisc_run(q);
2590 }
2591 }
2592 spin_unlock(root_lock);
2593 if (unlikely(contended))
2594 spin_unlock(&q->busylock);
2595 return rc;
2596}
2597
2598#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
2599static void skb_update_prio(struct sk_buff *skb)
2600{
2601 struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
2602
2603 if (!skb->priority && skb->sk && map) {
2604 unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
2605
2606 if (prioidx < map->priomap_len)
2607 skb->priority = map->priomap[prioidx];
2608 }
2609}
2610#else
2611#define skb_update_prio(skb)
2612#endif
2613
2614static DEFINE_PER_CPU(int, xmit_recursion);
2615#define RECURSION_LIMIT 10
2616
2617
2618
2619
2620
2621int dev_loopback_xmit(struct sk_buff *skb)
2622{
2623 skb_reset_mac_header(skb);
2624 __skb_pull(skb, skb_network_offset(skb));
2625 skb->pkt_type = PACKET_LOOPBACK;
2626 skb->ip_summed = CHECKSUM_UNNECESSARY;
2627 WARN_ON(!skb_dst(skb));
2628 skb_dst_force(skb);
2629 netif_rx_ni(skb);
2630 return 0;
2631}
2632EXPORT_SYMBOL(dev_loopback_xmit);
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659int dev_queue_xmit(struct sk_buff *skb)
2660{
2661 struct net_device *dev = skb->dev;
2662 struct netdev_queue *txq;
2663 struct Qdisc *q;
2664 int rc = -ENOMEM;
2665
2666
2667
2668
2669 rcu_read_lock_bh();
2670
2671 skb_update_prio(skb);
2672
2673 txq = netdev_pick_tx(dev, skb);
2674 q = rcu_dereference_bh(txq->qdisc);
2675
2676#ifdef CONFIG_NET_CLS_ACT
2677 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2678#endif
2679 trace_net_dev_queue(skb);
2680 if (q->enqueue) {
2681 rc = __dev_xmit_skb(skb, q, dev, txq);
2682 goto out;
2683 }
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697 if (dev->flags & IFF_UP) {
2698 int cpu = smp_processor_id();
2699
2700 if (txq->xmit_lock_owner != cpu) {
2701
2702 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2703 goto recursion_alert;
2704
2705 HARD_TX_LOCK(dev, txq, cpu);
2706
2707 if (!netif_xmit_stopped(txq)) {
2708 __this_cpu_inc(xmit_recursion);
2709 rc = dev_hard_start_xmit(skb, dev, txq);
2710 __this_cpu_dec(xmit_recursion);
2711 if (dev_xmit_complete(rc)) {
2712 HARD_TX_UNLOCK(dev, txq);
2713 goto out;
2714 }
2715 }
2716 HARD_TX_UNLOCK(dev, txq);
2717 net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
2718 dev->name);
2719 } else {
2720
2721
2722
2723recursion_alert:
2724 net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
2725 dev->name);
2726 }
2727 }
2728
2729 rc = -ENETDOWN;
2730 rcu_read_unlock_bh();
2731
2732 kfree_skb(skb);
2733 return rc;
2734out:
2735 rcu_read_unlock_bh();
2736 return rc;
2737}
2738EXPORT_SYMBOL(dev_queue_xmit);
2739
2740
2741
2742
2743
2744
2745int netdev_max_backlog __read_mostly = 1000;
2746EXPORT_SYMBOL(netdev_max_backlog);
2747
2748int netdev_tstamp_prequeue __read_mostly = 1;
2749int netdev_budget __read_mostly = 300;
2750int weight_p __read_mostly = 64;
2751
2752
2753static inline void ____napi_schedule(struct softnet_data *sd,
2754 struct napi_struct *napi)
2755{
2756 list_add_tail(&napi->poll_list, &sd->poll_list);
2757 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2758}
2759
2760
2761
2762
2763
2764
2765
2766void __skb_get_rxhash(struct sk_buff *skb)
2767{
2768 struct flow_keys keys;
2769 u32 hash;
2770
2771 if (!skb_flow_dissect(skb, &keys))
2772 return;
2773
2774 if (keys.ports)
2775 skb->l4_rxhash = 1;
2776
2777
2778 if (((__force u32)keys.dst < (__force u32)keys.src) ||
2779 (((__force u32)keys.dst == (__force u32)keys.src) &&
2780 ((__force u16)keys.port16[1] < (__force u16)keys.port16[0]))) {
2781 swap(keys.dst, keys.src);
2782 swap(keys.port16[0], keys.port16[1]);
2783 }
2784
2785 hash = jhash_3words((__force u32)keys.dst,
2786 (__force u32)keys.src,
2787 (__force u32)keys.ports, hashrnd);
2788 if (!hash)
2789 hash = 1;
2790
2791 skb->rxhash = hash;
2792}
2793EXPORT_SYMBOL(__skb_get_rxhash);
2794
2795#ifdef CONFIG_RPS
2796
2797
2798struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2799EXPORT_SYMBOL(rps_sock_flow_table);
2800
2801struct static_key rps_needed __read_mostly;
2802
2803static struct rps_dev_flow *
2804set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2805 struct rps_dev_flow *rflow, u16 next_cpu)
2806{
2807 if (next_cpu != RPS_NO_CPU) {
2808#ifdef CONFIG_RFS_ACCEL
2809 struct netdev_rx_queue *rxqueue;
2810 struct rps_dev_flow_table *flow_table;
2811 struct rps_dev_flow *old_rflow;
2812 u32 flow_id;
2813 u16 rxq_index;
2814 int rc;
2815
2816
2817 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2818 !(dev->features & NETIF_F_NTUPLE))
2819 goto out;
2820 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2821 if (rxq_index == skb_get_rx_queue(skb))
2822 goto out;
2823
2824 rxqueue = dev->_rx + rxq_index;
2825 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2826 if (!flow_table)
2827 goto out;
2828 flow_id = skb->rxhash & flow_table->mask;
2829 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2830 rxq_index, flow_id);
2831 if (rc < 0)
2832 goto out;
2833 old_rflow = rflow;
2834 rflow = &flow_table->flows[flow_id];
2835 rflow->filter = rc;
2836 if (old_rflow->filter == rflow->filter)
2837 old_rflow->filter = RPS_NO_FILTER;
2838 out:
2839#endif
2840 rflow->last_qtail =
2841 per_cpu(softnet_data, next_cpu).input_queue_head;
2842 }
2843
2844 rflow->cpu = next_cpu;
2845 return rflow;
2846}
2847
2848
2849
2850
2851
2852
2853static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2854 struct rps_dev_flow **rflowp)
2855{
2856 struct netdev_rx_queue *rxqueue;
2857 struct rps_map *map;
2858 struct rps_dev_flow_table *flow_table;
2859 struct rps_sock_flow_table *sock_flow_table;
2860 int cpu = -1;
2861 u16 tcpu;
2862
2863 if (skb_rx_queue_recorded(skb)) {
2864 u16 index = skb_get_rx_queue(skb);
2865 if (unlikely(index >= dev->real_num_rx_queues)) {
2866 WARN_ONCE(dev->real_num_rx_queues > 1,
2867 "%s received packet on queue %u, but number "
2868 "of RX queues is %u\n",
2869 dev->name, index, dev->real_num_rx_queues);
2870 goto done;
2871 }
2872 rxqueue = dev->_rx + index;
2873 } else
2874 rxqueue = dev->_rx;
2875
2876 map = rcu_dereference(rxqueue->rps_map);
2877 if (map) {
2878 if (map->len == 1 &&
2879 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2880 tcpu = map->cpus[0];
2881 if (cpu_online(tcpu))
2882 cpu = tcpu;
2883 goto done;
2884 }
2885 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2886 goto done;
2887 }
2888
2889 skb_reset_network_header(skb);
2890 if (!skb_get_rxhash(skb))
2891 goto done;
2892
2893 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2894 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2895 if (flow_table && sock_flow_table) {
2896 u16 next_cpu;
2897 struct rps_dev_flow *rflow;
2898
2899 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2900 tcpu = rflow->cpu;
2901
2902 next_cpu = sock_flow_table->ents[skb->rxhash &
2903 sock_flow_table->mask];
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916 if (unlikely(tcpu != next_cpu) &&
2917 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2918 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2919 rflow->last_qtail)) >= 0)) {
2920 tcpu = next_cpu;
2921 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2922 }
2923
2924 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2925 *rflowp = rflow;
2926 cpu = tcpu;
2927 goto done;
2928 }
2929 }
2930
2931 if (map) {
2932 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2933
2934 if (cpu_online(tcpu)) {
2935 cpu = tcpu;
2936 goto done;
2937 }
2938 }
2939
2940done:
2941 return cpu;
2942}
2943
2944#ifdef CONFIG_RFS_ACCEL
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2958 u32 flow_id, u16 filter_id)
2959{
2960 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2961 struct rps_dev_flow_table *flow_table;
2962 struct rps_dev_flow *rflow;
2963 bool expire = true;
2964 int cpu;
2965
2966 rcu_read_lock();
2967 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2968 if (flow_table && flow_id <= flow_table->mask) {
2969 rflow = &flow_table->flows[flow_id];
2970 cpu = ACCESS_ONCE(rflow->cpu);
2971 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2972 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2973 rflow->last_qtail) <
2974 (int)(10 * flow_table->mask)))
2975 expire = false;
2976 }
2977 rcu_read_unlock();
2978 return expire;
2979}
2980EXPORT_SYMBOL(rps_may_expire_flow);
2981
2982#endif
2983
2984
2985static void rps_trigger_softirq(void *data)
2986{
2987 struct softnet_data *sd = data;
2988
2989 ____napi_schedule(sd, &sd->backlog);
2990 sd->received_rps++;
2991}
2992
2993#endif
2994
2995
2996
2997
2998
2999
3000static int rps_ipi_queued(struct softnet_data *sd)
3001{
3002#ifdef CONFIG_RPS
3003 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
3004
3005 if (sd != mysd) {
3006 sd->rps_ipi_next = mysd->rps_ipi_list;
3007 mysd->rps_ipi_list = sd;
3008
3009 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3010 return 1;
3011 }
3012#endif
3013 return 0;
3014}
3015
3016
3017
3018
3019
3020static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
3021 unsigned int *qtail)
3022{
3023 struct softnet_data *sd;
3024 unsigned long flags;
3025
3026 sd = &per_cpu(softnet_data, cpu);
3027
3028 local_irq_save(flags);
3029
3030 rps_lock(sd);
3031 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
3032 if (skb_queue_len(&sd->input_pkt_queue)) {
3033enqueue:
3034 __skb_queue_tail(&sd->input_pkt_queue, skb);
3035 input_queue_tail_incr_save(sd, qtail);
3036 rps_unlock(sd);
3037 local_irq_restore(flags);
3038 return NET_RX_SUCCESS;
3039 }
3040
3041
3042
3043
3044 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
3045 if (!rps_ipi_queued(sd))
3046 ____napi_schedule(sd, &sd->backlog);
3047 }
3048 goto enqueue;
3049 }
3050
3051 sd->dropped++;
3052 rps_unlock(sd);
3053
3054 local_irq_restore(flags);
3055
3056 atomic_long_inc(&skb->dev->rx_dropped);
3057 kfree_skb(skb);
3058 return NET_RX_DROP;
3059}
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076int netif_rx(struct sk_buff *skb)
3077{
3078 int ret;
3079
3080
3081 if (netpoll_rx(skb))
3082 return NET_RX_DROP;
3083
3084 net_timestamp_check(netdev_tstamp_prequeue, skb);
3085
3086 trace_netif_rx(skb);
3087#ifdef CONFIG_RPS
3088 if (static_key_false(&rps_needed)) {
3089 struct rps_dev_flow voidflow, *rflow = &voidflow;
3090 int cpu;
3091
3092 preempt_disable();
3093 rcu_read_lock();
3094
3095 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3096 if (cpu < 0)
3097 cpu = smp_processor_id();
3098
3099 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3100
3101 rcu_read_unlock();
3102 preempt_enable();
3103 } else
3104#endif
3105 {
3106 unsigned int qtail;
3107 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3108 put_cpu();
3109 }
3110 return ret;
3111}
3112EXPORT_SYMBOL(netif_rx);
3113
3114int netif_rx_ni(struct sk_buff *skb)
3115{
3116 int err;
3117
3118 preempt_disable();
3119 err = netif_rx(skb);
3120 if (local_softirq_pending())
3121 do_softirq();
3122 preempt_enable();
3123
3124 return err;
3125}
3126EXPORT_SYMBOL(netif_rx_ni);
3127
3128static void net_tx_action(struct softirq_action *h)
3129{
3130 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3131
3132 if (sd->completion_queue) {
3133 struct sk_buff *clist;
3134
3135 local_irq_disable();
3136 clist = sd->completion_queue;
3137 sd->completion_queue = NULL;
3138 local_irq_enable();
3139
3140 while (clist) {
3141 struct sk_buff *skb = clist;
3142 clist = clist->next;
3143
3144 WARN_ON(atomic_read(&skb->users));
3145 trace_kfree_skb(skb, net_tx_action);
3146 __kfree_skb(skb);
3147 }
3148 }
3149
3150 if (sd->output_queue) {
3151 struct Qdisc *head;
3152
3153 local_irq_disable();
3154 head = sd->output_queue;
3155 sd->output_queue = NULL;
3156 sd->output_queue_tailp = &sd->output_queue;
3157 local_irq_enable();
3158
3159 while (head) {
3160 struct Qdisc *q = head;
3161 spinlock_t *root_lock;
3162
3163 head = head->next_sched;
3164
3165 root_lock = qdisc_lock(q);
3166 if (spin_trylock(root_lock)) {
3167 smp_mb__before_clear_bit();
3168 clear_bit(__QDISC_STATE_SCHED,
3169 &q->state);
3170 qdisc_run(q);
3171 spin_unlock(root_lock);
3172 } else {
3173 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3174 &q->state)) {
3175 __netif_reschedule(q);
3176 } else {
3177 smp_mb__before_clear_bit();
3178 clear_bit(__QDISC_STATE_SCHED,
3179 &q->state);
3180 }
3181 }
3182 }
3183 }
3184}
3185
3186#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3187 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3188
3189int (*br_fdb_test_addr_hook)(struct net_device *dev,
3190 unsigned char *addr) __read_mostly;
3191EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3192#endif
3193
3194#ifdef CONFIG_NET_CLS_ACT
3195
3196
3197
3198
3199
3200
3201
3202
3203static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3204{
3205 struct net_device *dev = skb->dev;
3206 u32 ttl = G_TC_RTTL(skb->tc_verd);
3207 int result = TC_ACT_OK;
3208 struct Qdisc *q;
3209
3210 if (unlikely(MAX_RED_LOOP < ttl++)) {
3211 net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
3212 skb->skb_iif, dev->ifindex);
3213 return TC_ACT_SHOT;
3214 }
3215
3216 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3217 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3218
3219 q = rxq->qdisc;
3220 if (q != &noop_qdisc) {
3221 spin_lock(qdisc_lock(q));
3222 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3223 result = qdisc_enqueue_root(skb, q);
3224 spin_unlock(qdisc_lock(q));
3225 }
3226
3227 return result;
3228}
3229
3230static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3231 struct packet_type **pt_prev,
3232 int *ret, struct net_device *orig_dev)
3233{
3234 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3235
3236 if (!rxq || rxq->qdisc == &noop_qdisc)
3237 goto out;
3238
3239 if (*pt_prev) {
3240 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3241 *pt_prev = NULL;
3242 }
3243
3244 switch (ing_filter(skb, rxq)) {
3245 case TC_ACT_SHOT:
3246 case TC_ACT_STOLEN:
3247 kfree_skb(skb);
3248 return NULL;
3249 }
3250
3251out:
3252 skb->tc_verd = 0;
3253 return skb;
3254}
3255#endif
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271int netdev_rx_handler_register(struct net_device *dev,
3272 rx_handler_func_t *rx_handler,
3273 void *rx_handler_data)
3274{
3275 ASSERT_RTNL();
3276
3277 if (dev->rx_handler)
3278 return -EBUSY;
3279
3280 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3281 rcu_assign_pointer(dev->rx_handler, rx_handler);
3282
3283 return 0;
3284}
3285EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295void netdev_rx_handler_unregister(struct net_device *dev)
3296{
3297
3298 ASSERT_RTNL();
3299 RCU_INIT_POINTER(dev->rx_handler, NULL);
3300 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3301}
3302EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3303
3304
3305
3306
3307
3308static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
3309{
3310 switch (skb->protocol) {
3311 case __constant_htons(ETH_P_ARP):
3312 case __constant_htons(ETH_P_IP):
3313 case __constant_htons(ETH_P_IPV6):
3314 case __constant_htons(ETH_P_8021Q):
3315 return true;
3316 default:
3317 return false;
3318 }
3319}
3320
3321static int __netif_receive_skb(struct sk_buff *skb)
3322{
3323 struct packet_type *ptype, *pt_prev;
3324 rx_handler_func_t *rx_handler;
3325 struct net_device *orig_dev;
3326 struct net_device *null_or_dev;
3327 bool deliver_exact = false;
3328 int ret = NET_RX_DROP;
3329 __be16 type;
3330 unsigned long pflags = current->flags;
3331
3332 net_timestamp_check(!netdev_tstamp_prequeue, skb);
3333
3334 trace_netif_receive_skb(skb);
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3346 current->flags |= PF_MEMALLOC;
3347
3348
3349 if (netpoll_receive_skb(skb))
3350 goto out;
3351
3352 orig_dev = skb->dev;
3353
3354 skb_reset_network_header(skb);
3355 skb_reset_transport_header(skb);
3356 skb_reset_mac_len(skb);
3357
3358 pt_prev = NULL;
3359
3360 rcu_read_lock();
3361
3362another_round:
3363 skb->skb_iif = skb->dev->ifindex;
3364
3365 __this_cpu_inc(softnet_data.processed);
3366
3367 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3368 skb = vlan_untag(skb);
3369 if (unlikely(!skb))
3370 goto unlock;
3371 }
3372
3373#ifdef CONFIG_NET_CLS_ACT
3374 if (skb->tc_verd & TC_NCLS) {
3375 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3376 goto ncls;
3377 }
3378#endif
3379
3380 if (sk_memalloc_socks() && skb_pfmemalloc(skb))
3381 goto skip_taps;
3382
3383 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3384 if (!ptype->dev || ptype->dev == skb->dev) {
3385 if (pt_prev)
3386 ret = deliver_skb(skb, pt_prev, orig_dev);
3387 pt_prev = ptype;
3388 }
3389 }
3390
3391skip_taps:
3392#ifdef CONFIG_NET_CLS_ACT
3393 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3394 if (!skb)
3395 goto unlock;
3396ncls:
3397#endif
3398
3399 if (sk_memalloc_socks() && skb_pfmemalloc(skb)
3400 && !skb_pfmemalloc_protocol(skb))
3401 goto drop;
3402
3403 if (vlan_tx_tag_present(skb)) {
3404 if (pt_prev) {
3405 ret = deliver_skb(skb, pt_prev, orig_dev);
3406 pt_prev = NULL;
3407 }
3408 if (vlan_do_receive(&skb))
3409 goto another_round;
3410 else if (unlikely(!skb))
3411 goto unlock;
3412 }
3413
3414 rx_handler = rcu_dereference(skb->dev->rx_handler);
3415 if (rx_handler) {
3416 if (pt_prev) {
3417 ret = deliver_skb(skb, pt_prev, orig_dev);
3418 pt_prev = NULL;
3419 }
3420 switch (rx_handler(&skb)) {
3421 case RX_HANDLER_CONSUMED:
3422 goto unlock;
3423 case RX_HANDLER_ANOTHER:
3424 goto another_round;
3425 case RX_HANDLER_EXACT:
3426 deliver_exact = true;
3427 case RX_HANDLER_PASS:
3428 break;
3429 default:
3430 BUG();
3431 }
3432 }
3433
3434 if (vlan_tx_nonzero_tag_present(skb))
3435 skb->pkt_type = PACKET_OTHERHOST;
3436
3437
3438 null_or_dev = deliver_exact ? skb->dev : NULL;
3439
3440 type = skb->protocol;
3441 list_for_each_entry_rcu(ptype,
3442 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3443 if (ptype->type == type &&
3444 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3445 ptype->dev == orig_dev)) {
3446 if (pt_prev)
3447 ret = deliver_skb(skb, pt_prev, orig_dev);
3448 pt_prev = ptype;
3449 }
3450 }
3451
3452 if (pt_prev) {
3453 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
3454 goto drop;
3455 else
3456 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3457 } else {
3458drop:
3459 atomic_long_inc(&skb->dev->rx_dropped);
3460 kfree_skb(skb);
3461
3462
3463
3464 ret = NET_RX_DROP;
3465 }
3466
3467unlock:
3468 rcu_read_unlock();
3469out:
3470 tsk_restore_flags(current, pflags, PF_MEMALLOC);
3471 return ret;
3472}
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489int netif_receive_skb(struct sk_buff *skb)
3490{
3491 net_timestamp_check(netdev_tstamp_prequeue, skb);
3492
3493 if (skb_defer_rx_timestamp(skb))
3494 return NET_RX_SUCCESS;
3495
3496#ifdef CONFIG_RPS
3497 if (static_key_false(&rps_needed)) {
3498 struct rps_dev_flow voidflow, *rflow = &voidflow;
3499 int cpu, ret;
3500
3501 rcu_read_lock();
3502
3503 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3504
3505 if (cpu >= 0) {
3506 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3507 rcu_read_unlock();
3508 return ret;
3509 }
3510 rcu_read_unlock();
3511 }
3512#endif
3513 return __netif_receive_skb(skb);
3514}
3515EXPORT_SYMBOL(netif_receive_skb);
3516
3517
3518
3519
3520static void flush_backlog(void *arg)
3521{
3522 struct net_device *dev = arg;
3523 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3524 struct sk_buff *skb, *tmp;
3525
3526 rps_lock(sd);
3527 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3528 if (skb->dev == dev) {
3529 __skb_unlink(skb, &sd->input_pkt_queue);
3530 kfree_skb(skb);
3531 input_queue_head_incr(sd);
3532 }
3533 }
3534 rps_unlock(sd);
3535
3536 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3537 if (skb->dev == dev) {
3538 __skb_unlink(skb, &sd->process_queue);
3539 kfree_skb(skb);
3540 input_queue_head_incr(sd);
3541 }
3542 }
3543}
3544
3545static int napi_gro_complete(struct sk_buff *skb)
3546{
3547 struct packet_offload *ptype;
3548 __be16 type = skb->protocol;
3549 struct list_head *head = &offload_base;
3550 int err = -ENOENT;
3551
3552 BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
3553
3554 if (NAPI_GRO_CB(skb)->count == 1) {
3555 skb_shinfo(skb)->gso_size = 0;
3556 goto out;
3557 }
3558
3559 rcu_read_lock();
3560 list_for_each_entry_rcu(ptype, head, list) {
3561 if (ptype->type != type || !ptype->callbacks.gro_complete)
3562 continue;
3563
3564 err = ptype->callbacks.gro_complete(skb);
3565 break;
3566 }
3567 rcu_read_unlock();
3568
3569 if (err) {
3570 WARN_ON(&ptype->list == head);
3571 kfree_skb(skb);
3572 return NET_RX_SUCCESS;
3573 }
3574
3575out:
3576 return netif_receive_skb(skb);
3577}
3578
3579
3580
3581
3582
3583void napi_gro_flush(struct napi_struct *napi, bool flush_old)
3584{
3585 struct sk_buff *skb, *prev = NULL;
3586
3587
3588 for (skb = napi->gro_list; skb != NULL; skb = skb->next) {
3589 skb->prev = prev;
3590 prev = skb;
3591 }
3592
3593 for (skb = prev; skb; skb = prev) {
3594 skb->next = NULL;
3595
3596 if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
3597 return;
3598
3599 prev = skb->prev;
3600 napi_gro_complete(skb);
3601 napi->gro_count--;
3602 }
3603
3604 napi->gro_list = NULL;
3605}
3606EXPORT_SYMBOL(napi_gro_flush);
3607
3608static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
3609{
3610 struct sk_buff *p;
3611 unsigned int maclen = skb->dev->hard_header_len;
3612
3613 for (p = napi->gro_list; p; p = p->next) {
3614 unsigned long diffs;
3615
3616 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3617 diffs |= p->vlan_tci ^ skb->vlan_tci;
3618 if (maclen == ETH_HLEN)
3619 diffs |= compare_ether_header(skb_mac_header(p),
3620 skb_gro_mac_header(skb));
3621 else if (!diffs)
3622 diffs = memcmp(skb_mac_header(p),
3623 skb_gro_mac_header(skb),
3624 maclen);
3625 NAPI_GRO_CB(p)->same_flow = !diffs;
3626 NAPI_GRO_CB(p)->flush = 0;
3627 }
3628}
3629
3630static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3631{
3632 struct sk_buff **pp = NULL;
3633 struct packet_offload *ptype;
3634 __be16 type = skb->protocol;
3635 struct list_head *head = &offload_base;
3636 int same_flow;
3637 int mac_len;
3638 enum gro_result ret;
3639
3640 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3641 goto normal;
3642
3643 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3644 goto normal;
3645
3646 gro_list_prepare(napi, skb);
3647
3648 rcu_read_lock();
3649 list_for_each_entry_rcu(ptype, head, list) {
3650 if (ptype->type != type || !ptype->callbacks.gro_receive)
3651 continue;
3652
3653 skb_set_network_header(skb, skb_gro_offset(skb));
3654 mac_len = skb->network_header - skb->mac_header;
3655 skb->mac_len = mac_len;
3656 NAPI_GRO_CB(skb)->same_flow = 0;
3657 NAPI_GRO_CB(skb)->flush = 0;
3658 NAPI_GRO_CB(skb)->free = 0;
3659
3660 pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
3661 break;
3662 }
3663 rcu_read_unlock();
3664
3665 if (&ptype->list == head)
3666 goto normal;
3667
3668 same_flow = NAPI_GRO_CB(skb)->same_flow;
3669 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3670
3671 if (pp) {
3672 struct sk_buff *nskb = *pp;
3673
3674 *pp = nskb->next;
3675 nskb->next = NULL;
3676 napi_gro_complete(nskb);
3677 napi->gro_count--;
3678 }
3679
3680 if (same_flow)
3681 goto ok;
3682
3683 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3684 goto normal;
3685
3686 napi->gro_count++;
3687 NAPI_GRO_CB(skb)->count = 1;
3688 NAPI_GRO_CB(skb)->age = jiffies;
3689 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3690 skb->next = napi->gro_list;
3691 napi->gro_list = skb;
3692 ret = GRO_HELD;
3693
3694pull:
3695 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3696 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3697
3698 BUG_ON(skb->end - skb->tail < grow);
3699
3700 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3701
3702 skb->tail += grow;
3703 skb->data_len -= grow;
3704
3705 skb_shinfo(skb)->frags[0].page_offset += grow;
3706 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3707
3708 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3709 skb_frag_unref(skb, 0);
3710 memmove(skb_shinfo(skb)->frags,
3711 skb_shinfo(skb)->frags + 1,
3712 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3713 }
3714 }
3715
3716ok:
3717 return ret;
3718
3719normal:
3720 ret = GRO_NORMAL;
3721 goto pull;
3722}
3723
3724
3725static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3726{
3727 switch (ret) {
3728 case GRO_NORMAL:
3729 if (netif_receive_skb(skb))
3730 ret = GRO_DROP;
3731 break;
3732
3733 case GRO_DROP:
3734 kfree_skb(skb);
3735 break;
3736
3737 case GRO_MERGED_FREE:
3738 if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
3739 kmem_cache_free(skbuff_head_cache, skb);
3740 else
3741 __kfree_skb(skb);
3742 break;
3743
3744 case GRO_HELD:
3745 case GRO_MERGED:
3746 break;
3747 }
3748
3749 return ret;
3750}
3751
3752static void skb_gro_reset_offset(struct sk_buff *skb)
3753{
3754 const struct skb_shared_info *pinfo = skb_shinfo(skb);
3755 const skb_frag_t *frag0 = &pinfo->frags[0];
3756
3757 NAPI_GRO_CB(skb)->data_offset = 0;
3758 NAPI_GRO_CB(skb)->frag0 = NULL;
3759 NAPI_GRO_CB(skb)->frag0_len = 0;
3760
3761 if (skb->mac_header == skb->tail &&
3762 pinfo->nr_frags &&
3763 !PageHighMem(skb_frag_page(frag0))) {
3764 NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
3765 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0);
3766 }
3767}
3768
3769gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3770{
3771 skb_gro_reset_offset(skb);
3772
3773 return napi_skb_finish(dev_gro_receive(napi, skb), skb);
3774}
3775EXPORT_SYMBOL(napi_gro_receive);
3776
3777static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3778{
3779 __skb_pull(skb, skb_headlen(skb));
3780
3781 skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
3782 skb->vlan_tci = 0;
3783 skb->dev = napi->dev;
3784 skb->skb_iif = 0;
3785
3786 napi->skb = skb;
3787}
3788
3789struct sk_buff *napi_get_frags(struct napi_struct *napi)
3790{
3791 struct sk_buff *skb = napi->skb;
3792
3793 if (!skb) {
3794 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3795 if (skb)
3796 napi->skb = skb;
3797 }
3798 return skb;
3799}
3800EXPORT_SYMBOL(napi_get_frags);
3801
3802static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3803 gro_result_t ret)
3804{
3805 switch (ret) {
3806 case GRO_NORMAL:
3807 case GRO_HELD:
3808 skb->protocol = eth_type_trans(skb, skb->dev);
3809
3810 if (ret == GRO_HELD)
3811 skb_gro_pull(skb, -ETH_HLEN);
3812 else if (netif_receive_skb(skb))
3813 ret = GRO_DROP;
3814 break;
3815
3816 case GRO_DROP:
3817 case GRO_MERGED_FREE:
3818 napi_reuse_skb(napi, skb);
3819 break;
3820
3821 case GRO_MERGED:
3822 break;
3823 }
3824
3825 return ret;
3826}
3827
3828static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3829{
3830 struct sk_buff *skb = napi->skb;
3831 struct ethhdr *eth;
3832 unsigned int hlen;
3833 unsigned int off;
3834
3835 napi->skb = NULL;
3836
3837 skb_reset_mac_header(skb);
3838 skb_gro_reset_offset(skb);
3839
3840 off = skb_gro_offset(skb);
3841 hlen = off + sizeof(*eth);
3842 eth = skb_gro_header_fast(skb, off);
3843 if (skb_gro_header_hard(skb, hlen)) {
3844 eth = skb_gro_header_slow(skb, hlen, off);
3845 if (unlikely(!eth)) {
3846 napi_reuse_skb(napi, skb);
3847 skb = NULL;
3848 goto out;
3849 }
3850 }
3851
3852 skb_gro_pull(skb, sizeof(*eth));
3853
3854
3855
3856
3857
3858 skb->protocol = eth->h_proto;
3859
3860out:
3861 return skb;
3862}
3863
3864gro_result_t napi_gro_frags(struct napi_struct *napi)
3865{
3866 struct sk_buff *skb = napi_frags_skb(napi);
3867
3868 if (!skb)
3869 return GRO_DROP;
3870
3871 return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
3872}
3873EXPORT_SYMBOL(napi_gro_frags);
3874
3875
3876
3877
3878
3879static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3880{
3881#ifdef CONFIG_RPS
3882 struct softnet_data *remsd = sd->rps_ipi_list;
3883
3884 if (remsd) {
3885 sd->rps_ipi_list = NULL;
3886
3887 local_irq_enable();
3888
3889
3890 while (remsd) {
3891 struct softnet_data *next = remsd->rps_ipi_next;
3892
3893 if (cpu_online(remsd->cpu))
3894 __smp_call_function_single(remsd->cpu,
3895 &remsd->csd, 0);
3896 remsd = next;
3897 }
3898 } else
3899#endif
3900 local_irq_enable();
3901}
3902
3903static int process_backlog(struct napi_struct *napi, int quota)
3904{
3905 int work = 0;
3906 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3907
3908#ifdef CONFIG_RPS
3909
3910
3911
3912 if (sd->rps_ipi_list) {
3913 local_irq_disable();
3914 net_rps_action_and_irq_enable(sd);
3915 }
3916#endif
3917 napi->weight = weight_p;
3918 local_irq_disable();
3919 while (work < quota) {
3920 struct sk_buff *skb;
3921 unsigned int qlen;
3922
3923 while ((skb = __skb_dequeue(&sd->process_queue))) {
3924 local_irq_enable();
3925 __netif_receive_skb(skb);
3926 local_irq_disable();
3927 input_queue_head_incr(sd);
3928 if (++work >= quota) {
3929 local_irq_enable();
3930 return work;
3931 }
3932 }
3933
3934 rps_lock(sd);
3935 qlen = skb_queue_len(&sd->input_pkt_queue);
3936 if (qlen)
3937 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3938 &sd->process_queue);
3939
3940 if (qlen < quota - work) {
3941
3942
3943
3944
3945
3946
3947
3948 list_del(&napi->poll_list);
3949 napi->state = 0;
3950
3951 quota = work + qlen;
3952 }
3953 rps_unlock(sd);
3954 }
3955 local_irq_enable();
3956
3957 return work;
3958}
3959
3960
3961
3962
3963
3964
3965
3966void __napi_schedule(struct napi_struct *n)
3967{
3968 unsigned long flags;
3969
3970 local_irq_save(flags);
3971 ____napi_schedule(&__get_cpu_var(softnet_data), n);
3972 local_irq_restore(flags);
3973}
3974EXPORT_SYMBOL(__napi_schedule);
3975
3976void __napi_complete(struct napi_struct *n)
3977{
3978 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3979 BUG_ON(n->gro_list);
3980
3981 list_del(&n->poll_list);
3982 smp_mb__before_clear_bit();
3983 clear_bit(NAPI_STATE_SCHED, &n->state);
3984}
3985EXPORT_SYMBOL(__napi_complete);
3986
3987void napi_complete(struct napi_struct *n)
3988{
3989 unsigned long flags;
3990
3991
3992
3993
3994
3995 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3996 return;
3997
3998 napi_gro_flush(n, false);
3999 local_irq_save(flags);
4000 __napi_complete(n);
4001 local_irq_restore(flags);
4002}
4003EXPORT_SYMBOL(napi_complete);
4004
4005void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
4006 int (*poll)(struct napi_struct *, int), int weight)
4007{
4008 INIT_LIST_HEAD(&napi->poll_list);
4009 napi->gro_count = 0;
4010 napi->gro_list = NULL;
4011 napi->skb = NULL;
4012 napi->poll = poll;
4013 napi->weight = weight;
4014 list_add(&napi->dev_list, &dev->napi_list);
4015 napi->dev = dev;
4016#ifdef CONFIG_NETPOLL
4017 spin_lock_init(&napi->poll_lock);
4018 napi->poll_owner = -1;
4019#endif
4020 set_bit(NAPI_STATE_SCHED, &napi->state);
4021}
4022EXPORT_SYMBOL(netif_napi_add);
4023
4024void netif_napi_del(struct napi_struct *napi)
4025{
4026 struct sk_buff *skb, *next;
4027
4028 list_del_init(&napi->dev_list);
4029 napi_free_frags(napi);
4030
4031 for (skb = napi->gro_list; skb; skb = next) {
4032 next = skb->next;
4033 skb->next = NULL;
4034 kfree_skb(skb);
4035 }
4036
4037 napi->gro_list = NULL;
4038 napi->gro_count = 0;
4039}
4040EXPORT_SYMBOL(netif_napi_del);
4041
4042static void net_rx_action(struct softirq_action *h)
4043{
4044 struct softnet_data *sd = &__get_cpu_var(softnet_data);
4045 unsigned long time_limit = jiffies + 2;
4046 int budget = netdev_budget;
4047 void *have;
4048
4049 local_irq_disable();
4050
4051 while (!list_empty(&sd->poll_list)) {
4052 struct napi_struct *n;
4053 int work, weight;
4054
4055
4056
4057
4058
4059 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
4060 goto softnet_break;
4061
4062 local_irq_enable();
4063
4064
4065
4066
4067
4068
4069 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
4070
4071 have = netpoll_poll_lock(n);
4072
4073 weight = n->weight;
4074
4075
4076
4077
4078
4079
4080
4081 work = 0;
4082 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
4083 work = n->poll(n, weight);
4084 trace_napi_poll(n);
4085 }
4086
4087 WARN_ON_ONCE(work > weight);
4088
4089 budget -= work;
4090
4091 local_irq_disable();
4092
4093
4094
4095
4096
4097
4098 if (unlikely(work == weight)) {
4099 if (unlikely(napi_disable_pending(n))) {
4100 local_irq_enable();
4101 napi_complete(n);
4102 local_irq_disable();
4103 } else {
4104 if (n->gro_list) {
4105
4106
4107
4108 local_irq_enable();
4109 napi_gro_flush(n, HZ >= 1000);
4110 local_irq_disable();
4111 }
4112 list_move_tail(&n->poll_list, &sd->poll_list);
4113 }
4114 }
4115
4116 netpoll_poll_unlock(have);
4117 }
4118out:
4119 net_rps_action_and_irq_enable(sd);
4120
4121#ifdef CONFIG_NET_DMA
4122
4123
4124
4125
4126 dma_issue_pending_all();
4127#endif
4128
4129 return;
4130
4131softnet_break:
4132 sd->time_squeeze++;
4133 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
4134 goto out;
4135}
4136
4137static gifconf_func_t *gifconf_list[NPROTO];
4138
4139
4140
4141
4142
4143
4144
4145
4146
4147
4148int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
4149{
4150 if (family >= NPROTO)
4151 return -EINVAL;
4152 gifconf_list[family] = gifconf;
4153 return 0;
4154}
4155EXPORT_SYMBOL(register_gifconf);
4156
4157
4158
4159
4160
4161
4162
4163
4164
4165
4166
4167
4168
4169static int dev_ifname(struct net *net, struct ifreq __user *arg)
4170{
4171 struct net_device *dev;
4172 struct ifreq ifr;
4173 unsigned seq;
4174
4175
4176
4177
4178
4179 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4180 return -EFAULT;
4181
4182retry:
4183 seq = read_seqcount_begin(&devnet_rename_seq);
4184 rcu_read_lock();
4185 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4186 if (!dev) {
4187 rcu_read_unlock();
4188 return -ENODEV;
4189 }
4190
4191 strcpy(ifr.ifr_name, dev->name);
4192 rcu_read_unlock();
4193 if (read_seqcount_retry(&devnet_rename_seq, seq))
4194 goto retry;
4195
4196 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4197 return -EFAULT;
4198 return 0;
4199}
4200
4201
4202
4203
4204
4205
4206
4207static int dev_ifconf(struct net *net, char __user *arg)
4208{
4209 struct ifconf ifc;
4210 struct net_device *dev;
4211 char __user *pos;
4212 int len;
4213 int total;
4214 int i;
4215
4216
4217
4218
4219
4220 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4221 return -EFAULT;
4222
4223 pos = ifc.ifc_buf;
4224 len = ifc.ifc_len;
4225
4226
4227
4228
4229
4230 total = 0;
4231 for_each_netdev(net, dev) {
4232 for (i = 0; i < NPROTO; i++) {
4233 if (gifconf_list[i]) {
4234 int done;
4235 if (!pos)
4236 done = gifconf_list[i](dev, NULL, 0);
4237 else
4238 done = gifconf_list[i](dev, pos + total,
4239 len - total);
4240 if (done < 0)
4241 return -EFAULT;
4242 total += done;
4243 }
4244 }
4245 }
4246
4247
4248
4249
4250 ifc.ifc_len = total;
4251
4252
4253
4254
4255 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4256}
4257
4258#ifdef CONFIG_PROC_FS
4259
4260#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
4261
4262#define get_bucket(x) ((x) >> BUCKET_SPACE)
4263#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4264#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4265
4266static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
4267{
4268 struct net *net = seq_file_net(seq);
4269 struct net_device *dev;
4270 struct hlist_node *p;
4271 struct hlist_head *h;
4272 unsigned int count = 0, offset = get_offset(*pos);
4273
4274 h = &net->dev_name_head[get_bucket(*pos)];
4275 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4276 if (++count == offset)
4277 return dev;
4278 }
4279
4280 return NULL;
4281}
4282
4283static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
4284{
4285 struct net_device *dev;
4286 unsigned int bucket;
4287
4288 do {
4289 dev = dev_from_same_bucket(seq, pos);
4290 if (dev)
4291 return dev;
4292
4293 bucket = get_bucket(*pos) + 1;
4294 *pos = set_bucket_offset(bucket, 1);
4295 } while (bucket < NETDEV_HASHENTRIES);
4296
4297 return NULL;
4298}
4299
4300
4301
4302
4303
4304void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4305 __acquires(RCU)
4306{
4307 rcu_read_lock();
4308 if (!*pos)
4309 return SEQ_START_TOKEN;
4310
4311 if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
4312 return NULL;
4313
4314 return dev_from_bucket(seq, pos);
4315}
4316
4317void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4318{
4319 ++*pos;
4320 return dev_from_bucket(seq, pos);
4321}
4322
4323void dev_seq_stop(struct seq_file *seq, void *v)
4324 __releases(RCU)
4325{
4326 rcu_read_unlock();
4327}
4328
4329static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4330{
4331 struct rtnl_link_stats64 temp;
4332 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4333
4334 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4335 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4336 dev->name, stats->rx_bytes, stats->rx_packets,
4337 stats->rx_errors,
4338 stats->rx_dropped + stats->rx_missed_errors,
4339 stats->rx_fifo_errors,
4340 stats->rx_length_errors + stats->rx_over_errors +
4341 stats->rx_crc_errors + stats->rx_frame_errors,
4342 stats->rx_compressed, stats->multicast,
4343 stats->tx_bytes, stats->tx_packets,
4344 stats->tx_errors, stats->tx_dropped,
4345 stats->tx_fifo_errors, stats->collisions,
4346 stats->tx_carrier_errors +
4347 stats->tx_aborted_errors +
4348 stats->tx_window_errors +
4349 stats->tx_heartbeat_errors,
4350 stats->tx_compressed);
4351}
4352
4353
4354
4355
4356
4357static int dev_seq_show(struct seq_file *seq, void *v)
4358{
4359 if (v == SEQ_START_TOKEN)
4360 seq_puts(seq, "Inter-| Receive "
4361 " | Transmit\n"
4362 " face |bytes packets errs drop fifo frame "
4363 "compressed multicast|bytes packets errs "
4364 "drop fifo colls carrier compressed\n");
4365 else
4366 dev_seq_printf_stats(seq, v);
4367 return 0;
4368}
4369
4370static struct softnet_data *softnet_get_online(loff_t *pos)
4371{
4372 struct softnet_data *sd = NULL;
4373
4374 while (*pos < nr_cpu_ids)
4375 if (cpu_online(*pos)) {
4376 sd = &per_cpu(softnet_data, *pos);
4377 break;
4378 } else
4379 ++*pos;
4380 return sd;
4381}
4382
4383static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4384{
4385 return softnet_get_online(pos);
4386}
4387
4388static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4389{
4390 ++*pos;
4391 return softnet_get_online(pos);
4392}
4393
4394static void softnet_seq_stop(struct seq_file *seq, void *v)
4395{
4396}
4397
4398static int softnet_seq_show(struct seq_file *seq, void *v)
4399{
4400 struct softnet_data *sd = v;
4401
4402 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4403 sd->processed, sd->dropped, sd->time_squeeze, 0,
4404 0, 0, 0, 0,
4405 sd->cpu_collision, sd->received_rps);
4406 return 0;
4407}
4408
4409static const struct seq_operations dev_seq_ops = {
4410 .start = dev_seq_start,
4411 .next = dev_seq_next,
4412 .stop = dev_seq_stop,
4413 .show = dev_seq_show,
4414};
4415
4416static int dev_seq_open(struct inode *inode, struct file *file)
4417{
4418 return seq_open_net(inode, file, &dev_seq_ops,
4419 sizeof(struct seq_net_private));
4420}
4421
4422static const struct file_operations dev_seq_fops = {
4423 .owner = THIS_MODULE,
4424 .open = dev_seq_open,
4425 .read = seq_read,
4426 .llseek = seq_lseek,
4427 .release = seq_release_net,
4428};
4429
4430static const struct seq_operations softnet_seq_ops = {
4431 .start = softnet_seq_start,
4432 .next = softnet_seq_next,
4433 .stop = softnet_seq_stop,
4434 .show = softnet_seq_show,
4435};
4436
4437static int softnet_seq_open(struct inode *inode, struct file *file)
4438{
4439 return seq_open(file, &softnet_seq_ops);
4440}
4441
4442static const struct file_operations softnet_seq_fops = {
4443 .owner = THIS_MODULE,
4444 .open = softnet_seq_open,
4445 .read = seq_read,
4446 .llseek = seq_lseek,
4447 .release = seq_release,
4448};
4449
4450static void *ptype_get_idx(loff_t pos)
4451{
4452 struct packet_type *pt = NULL;
4453 loff_t i = 0;
4454 int t;
4455
4456 list_for_each_entry_rcu(pt, &ptype_all, list) {
4457 if (i == pos)
4458 return pt;
4459 ++i;
4460 }
4461
4462 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4463 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4464 if (i == pos)
4465 return pt;
4466 ++i;
4467 }
4468 }
4469 return NULL;
4470}
4471
4472static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4473 __acquires(RCU)
4474{
4475 rcu_read_lock();
4476 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4477}
4478
4479static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4480{
4481 struct packet_type *pt;
4482 struct list_head *nxt;
4483 int hash;
4484
4485 ++*pos;
4486 if (v == SEQ_START_TOKEN)
4487 return ptype_get_idx(0);
4488
4489 pt = v;
4490 nxt = pt->list.next;
4491 if (pt->type == htons(ETH_P_ALL)) {
4492 if (nxt != &ptype_all)
4493 goto found;
4494 hash = 0;
4495 nxt = ptype_base[0].next;
4496 } else
4497 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4498
4499 while (nxt == &ptype_base[hash]) {
4500 if (++hash >= PTYPE_HASH_SIZE)
4501 return NULL;
4502 nxt = ptype_base[hash].next;
4503 }
4504found:
4505 return list_entry(nxt, struct packet_type, list);
4506}
4507
4508static void ptype_seq_stop(struct seq_file *seq, void *v)
4509 __releases(RCU)
4510{
4511 rcu_read_unlock();
4512}
4513
4514static int ptype_seq_show(struct seq_file *seq, void *v)
4515{
4516 struct packet_type *pt = v;
4517
4518 if (v == SEQ_START_TOKEN)
4519 seq_puts(seq, "Type Device Function\n");
4520 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4521 if (pt->type == htons(ETH_P_ALL))
4522 seq_puts(seq, "ALL ");
4523 else
4524 seq_printf(seq, "%04x", ntohs(pt->type));
4525
4526 seq_printf(seq, " %-8s %pF\n",
4527 pt->dev ? pt->dev->name : "", pt->func);
4528 }
4529
4530 return 0;
4531}
4532
4533static const struct seq_operations ptype_seq_ops = {
4534 .start = ptype_seq_start,
4535 .next = ptype_seq_next,
4536 .stop = ptype_seq_stop,
4537 .show = ptype_seq_show,
4538};
4539
4540static int ptype_seq_open(struct inode *inode, struct file *file)
4541{
4542 return seq_open_net(inode, file, &ptype_seq_ops,
4543 sizeof(struct seq_net_private));
4544}
4545
4546static const struct file_operations ptype_seq_fops = {
4547 .owner = THIS_MODULE,
4548 .open = ptype_seq_open,
4549 .read = seq_read,
4550 .llseek = seq_lseek,
4551 .release = seq_release_net,
4552};
4553
4554
4555static int __net_init dev_proc_net_init(struct net *net)
4556{
4557 int rc = -ENOMEM;
4558
4559 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4560 goto out;
4561 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4562 goto out_dev;
4563 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4564 goto out_softnet;
4565
4566 if (wext_proc_init(net))
4567 goto out_ptype;
4568 rc = 0;
4569out:
4570 return rc;
4571out_ptype:
4572 proc_net_remove(net, "ptype");
4573out_softnet:
4574 proc_net_remove(net, "softnet_stat");
4575out_dev:
4576 proc_net_remove(net, "dev");
4577 goto out;
4578}
4579
4580static void __net_exit dev_proc_net_exit(struct net *net)
4581{
4582 wext_proc_exit(net);
4583
4584 proc_net_remove(net, "ptype");
4585 proc_net_remove(net, "softnet_stat");
4586 proc_net_remove(net, "dev");
4587}
4588
4589static struct pernet_operations __net_initdata dev_proc_ops = {
4590 .init = dev_proc_net_init,
4591 .exit = dev_proc_net_exit,
4592};
4593
4594static int __init dev_proc_init(void)
4595{
4596 return register_pernet_subsys(&dev_proc_ops);
4597}
4598#else
4599#define dev_proc_init() 0
4600#endif
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613int netdev_set_master(struct net_device *slave, struct net_device *master)
4614{
4615 struct net_device *old = slave->master;
4616
4617 ASSERT_RTNL();
4618
4619 if (master) {
4620 if (old)
4621 return -EBUSY;
4622 dev_hold(master);
4623 }
4624
4625 slave->master = master;
4626
4627 if (old)
4628 dev_put(old);
4629 return 0;
4630}
4631EXPORT_SYMBOL(netdev_set_master);
4632
4633
4634
4635
4636
4637
4638
4639
4640
4641
4642
4643int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4644{
4645 int err;
4646
4647 ASSERT_RTNL();
4648
4649 err = netdev_set_master(slave, master);
4650 if (err)
4651 return err;
4652 if (master)
4653 slave->flags |= IFF_SLAVE;
4654 else
4655 slave->flags &= ~IFF_SLAVE;
4656
4657 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4658 return 0;
4659}
4660EXPORT_SYMBOL(netdev_set_bond_master);
4661
4662static void dev_change_rx_flags(struct net_device *dev, int flags)
4663{
4664 const struct net_device_ops *ops = dev->netdev_ops;
4665
4666 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4667 ops->ndo_change_rx_flags(dev, flags);
4668}
4669
4670static int __dev_set_promiscuity(struct net_device *dev, int inc)
4671{
4672 unsigned int old_flags = dev->flags;
4673 kuid_t uid;
4674 kgid_t gid;
4675
4676 ASSERT_RTNL();
4677
4678 dev->flags |= IFF_PROMISC;
4679 dev->promiscuity += inc;
4680 if (dev->promiscuity == 0) {
4681
4682
4683
4684
4685 if (inc < 0)
4686 dev->flags &= ~IFF_PROMISC;
4687 else {
4688 dev->promiscuity -= inc;
4689 pr_warn("%s: promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n",
4690 dev->name);
4691 return -EOVERFLOW;
4692 }
4693 }
4694 if (dev->flags != old_flags) {
4695 pr_info("device %s %s promiscuous mode\n",
4696 dev->name,
4697 dev->flags & IFF_PROMISC ? "entered" : "left");
4698 if (audit_enabled) {
4699 current_uid_gid(&uid, &gid);
4700 audit_log(current->audit_context, GFP_ATOMIC,
4701 AUDIT_ANOM_PROMISCUOUS,
4702 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4703 dev->name, (dev->flags & IFF_PROMISC),
4704 (old_flags & IFF_PROMISC),
4705 from_kuid(&init_user_ns, audit_get_loginuid(current)),
4706 from_kuid(&init_user_ns, uid),
4707 from_kgid(&init_user_ns, gid),
4708 audit_get_sessionid(current));
4709 }
4710
4711 dev_change_rx_flags(dev, IFF_PROMISC);
4712 }
4713 return 0;
4714}
4715
4716
4717
4718
4719
4720
4721
4722
4723
4724
4725
4726
4727int dev_set_promiscuity(struct net_device *dev, int inc)
4728{
4729 unsigned int old_flags = dev->flags;
4730 int err;
4731
4732 err = __dev_set_promiscuity(dev, inc);
4733 if (err < 0)
4734 return err;
4735 if (dev->flags != old_flags)
4736 dev_set_rx_mode(dev);
4737 return err;
4738}
4739EXPORT_SYMBOL(dev_set_promiscuity);
4740
4741
4742
4743
4744
4745
4746
4747
4748
4749
4750
4751
4752
4753
4754int dev_set_allmulti(struct net_device *dev, int inc)
4755{
4756 unsigned int old_flags = dev->flags;
4757
4758 ASSERT_RTNL();
4759
4760 dev->flags |= IFF_ALLMULTI;
4761 dev->allmulti += inc;
4762 if (dev->allmulti == 0) {
4763
4764
4765
4766
4767 if (inc < 0)
4768 dev->flags &= ~IFF_ALLMULTI;
4769 else {
4770 dev->allmulti -= inc;
4771 pr_warn("%s: allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n",
4772 dev->name);
4773 return -EOVERFLOW;
4774 }
4775 }
4776 if (dev->flags ^ old_flags) {
4777 dev_change_rx_flags(dev, IFF_ALLMULTI);
4778 dev_set_rx_mode(dev);
4779 }
4780 return 0;
4781}
4782EXPORT_SYMBOL(dev_set_allmulti);
4783
4784
4785
4786
4787
4788
4789
4790void __dev_set_rx_mode(struct net_device *dev)
4791{
4792 const struct net_device_ops *ops = dev->netdev_ops;
4793
4794
4795 if (!(dev->flags&IFF_UP))
4796 return;
4797
4798 if (!netif_device_present(dev))
4799 return;
4800
4801 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4802
4803
4804
4805 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4806 __dev_set_promiscuity(dev, 1);
4807 dev->uc_promisc = true;
4808 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4809 __dev_set_promiscuity(dev, -1);
4810 dev->uc_promisc = false;
4811 }
4812 }
4813
4814 if (ops->ndo_set_rx_mode)
4815 ops->ndo_set_rx_mode(dev);
4816}
4817
4818void dev_set_rx_mode(struct net_device *dev)
4819{
4820 netif_addr_lock_bh(dev);
4821 __dev_set_rx_mode(dev);
4822 netif_addr_unlock_bh(dev);
4823}
4824
4825
4826
4827
4828
4829
4830
4831unsigned int dev_get_flags(const struct net_device *dev)
4832{
4833 unsigned int flags;
4834
4835 flags = (dev->flags & ~(IFF_PROMISC |
4836 IFF_ALLMULTI |
4837 IFF_RUNNING |
4838 IFF_LOWER_UP |
4839 IFF_DORMANT)) |
4840 (dev->gflags & (IFF_PROMISC |
4841 IFF_ALLMULTI));
4842
4843 if (netif_running(dev)) {
4844 if (netif_oper_up(dev))
4845 flags |= IFF_RUNNING;
4846 if (netif_carrier_ok(dev))
4847 flags |= IFF_LOWER_UP;
4848 if (netif_dormant(dev))
4849 flags |= IFF_DORMANT;
4850 }
4851
4852 return flags;
4853}
4854EXPORT_SYMBOL(dev_get_flags);
4855
4856int __dev_change_flags(struct net_device *dev, unsigned int flags)
4857{
4858 unsigned int old_flags = dev->flags;
4859 int ret;
4860
4861 ASSERT_RTNL();
4862
4863
4864
4865
4866
4867 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4868 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4869 IFF_AUTOMEDIA)) |
4870 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4871 IFF_ALLMULTI));
4872
4873
4874
4875
4876
4877 if ((old_flags ^ flags) & IFF_MULTICAST)
4878 dev_change_rx_flags(dev, IFF_MULTICAST);
4879
4880 dev_set_rx_mode(dev);
4881
4882
4883
4884
4885
4886
4887
4888 ret = 0;
4889 if ((old_flags ^ flags) & IFF_UP) {
4890 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4891
4892 if (!ret)
4893 dev_set_rx_mode(dev);
4894 }
4895
4896 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4897 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4898
4899 dev->gflags ^= IFF_PROMISC;
4900 dev_set_promiscuity(dev, inc);
4901 }
4902
4903
4904
4905
4906
4907 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4908 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4909
4910 dev->gflags ^= IFF_ALLMULTI;
4911 dev_set_allmulti(dev, inc);
4912 }
4913
4914 return ret;
4915}
4916
4917void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4918{
4919 unsigned int changes = dev->flags ^ old_flags;
4920
4921 if (changes & IFF_UP) {
4922 if (dev->flags & IFF_UP)
4923 call_netdevice_notifiers(NETDEV_UP, dev);
4924 else
4925 call_netdevice_notifiers(NETDEV_DOWN, dev);
4926 }
4927
4928 if (dev->flags & IFF_UP &&
4929 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4930 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4931}
4932
4933
4934
4935
4936
4937
4938
4939
4940
4941int dev_change_flags(struct net_device *dev, unsigned int flags)
4942{
4943 int ret;
4944 unsigned int changes, old_flags = dev->flags;
4945
4946 ret = __dev_change_flags(dev, flags);
4947 if (ret < 0)
4948 return ret;
4949
4950 changes = old_flags ^ dev->flags;
4951 if (changes)
4952 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4953
4954 __dev_notify_flags(dev, old_flags);
4955 return ret;
4956}
4957EXPORT_SYMBOL(dev_change_flags);
4958
4959
4960
4961
4962
4963
4964
4965
4966int dev_set_mtu(struct net_device *dev, int new_mtu)
4967{
4968 const struct net_device_ops *ops = dev->netdev_ops;
4969 int err;
4970
4971 if (new_mtu == dev->mtu)
4972 return 0;
4973
4974
4975 if (new_mtu < 0)
4976 return -EINVAL;
4977
4978 if (!netif_device_present(dev))
4979 return -ENODEV;
4980
4981 err = 0;
4982 if (ops->ndo_change_mtu)
4983 err = ops->ndo_change_mtu(dev, new_mtu);
4984 else
4985 dev->mtu = new_mtu;
4986
4987 if (!err)
4988 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4989 return err;
4990}
4991EXPORT_SYMBOL(dev_set_mtu);
4992
4993
4994
4995
4996
4997
4998void dev_set_group(struct net_device *dev, int new_group)
4999{
5000 dev->group = new_group;
5001}
5002EXPORT_SYMBOL(dev_set_group);
5003
5004
5005
5006
5007
5008
5009
5010
5011int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
5012{
5013 const struct net_device_ops *ops = dev->netdev_ops;
5014 int err;
5015
5016 if (!ops->ndo_set_mac_address)
5017 return -EOPNOTSUPP;
5018 if (sa->sa_family != dev->type)
5019 return -EINVAL;
5020 if (!netif_device_present(dev))
5021 return -ENODEV;
5022 err = ops->ndo_set_mac_address(dev, sa);
5023 if (!err)
5024 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5025 add_device_randomness(dev->dev_addr, dev->addr_len);
5026 return err;
5027}
5028EXPORT_SYMBOL(dev_set_mac_address);
5029
5030
5031
5032
5033static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
5034{
5035 int err;
5036 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
5037
5038 if (!dev)
5039 return -ENODEV;
5040
5041 switch (cmd) {
5042 case SIOCGIFFLAGS:
5043 ifr->ifr_flags = (short) dev_get_flags(dev);
5044 return 0;
5045
5046 case SIOCGIFMETRIC:
5047
5048 ifr->ifr_metric = 0;
5049 return 0;
5050
5051 case SIOCGIFMTU:
5052 ifr->ifr_mtu = dev->mtu;
5053 return 0;
5054
5055 case SIOCGIFHWADDR:
5056 if (!dev->addr_len)
5057 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
5058 else
5059 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
5060 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5061 ifr->ifr_hwaddr.sa_family = dev->type;
5062 return 0;
5063
5064 case SIOCGIFSLAVE:
5065 err = -EINVAL;
5066 break;
5067
5068 case SIOCGIFMAP:
5069 ifr->ifr_map.mem_start = dev->mem_start;
5070 ifr->ifr_map.mem_end = dev->mem_end;
5071 ifr->ifr_map.base_addr = dev->base_addr;
5072 ifr->ifr_map.irq = dev->irq;
5073 ifr->ifr_map.dma = dev->dma;
5074 ifr->ifr_map.port = dev->if_port;
5075 return 0;
5076
5077 case SIOCGIFINDEX:
5078 ifr->ifr_ifindex = dev->ifindex;
5079 return 0;
5080
5081 case SIOCGIFTXQLEN:
5082 ifr->ifr_qlen = dev->tx_queue_len;
5083 return 0;
5084
5085 default:
5086
5087
5088
5089 WARN_ON(1);
5090 err = -ENOTTY;
5091 break;
5092
5093 }
5094 return err;
5095}
5096
5097
5098
5099
5100static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
5101{
5102 int err;
5103 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
5104 const struct net_device_ops *ops;
5105
5106 if (!dev)
5107 return -ENODEV;
5108
5109 ops = dev->netdev_ops;
5110
5111 switch (cmd) {
5112 case SIOCSIFFLAGS:
5113 return dev_change_flags(dev, ifr->ifr_flags);
5114
5115 case SIOCSIFMETRIC:
5116
5117 return -EOPNOTSUPP;
5118
5119 case SIOCSIFMTU:
5120 return dev_set_mtu(dev, ifr->ifr_mtu);
5121
5122 case SIOCSIFHWADDR:
5123 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
5124
5125 case SIOCSIFHWBROADCAST:
5126 if (ifr->ifr_hwaddr.sa_family != dev->type)
5127 return -EINVAL;
5128 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
5129 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5130 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5131 return 0;
5132
5133 case SIOCSIFMAP:
5134 if (ops->ndo_set_config) {
5135 if (!netif_device_present(dev))
5136 return -ENODEV;
5137 return ops->ndo_set_config(dev, &ifr->ifr_map);
5138 }
5139 return -EOPNOTSUPP;
5140
5141 case SIOCADDMULTI:
5142 if (!ops->ndo_set_rx_mode ||
5143 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5144 return -EINVAL;
5145 if (!netif_device_present(dev))
5146 return -ENODEV;
5147 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5148
5149 case SIOCDELMULTI:
5150 if (!ops->ndo_set_rx_mode ||
5151 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5152 return -EINVAL;
5153 if (!netif_device_present(dev))
5154 return -ENODEV;
5155 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5156
5157 case SIOCSIFTXQLEN:
5158 if (ifr->ifr_qlen < 0)
5159 return -EINVAL;
5160 dev->tx_queue_len = ifr->ifr_qlen;
5161 return 0;
5162
5163 case SIOCSIFNAME:
5164 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5165 return dev_change_name(dev, ifr->ifr_newname);
5166
5167 case SIOCSHWTSTAMP:
5168 err = net_hwtstamp_validate(ifr);
5169 if (err)
5170 return err;
5171
5172
5173
5174
5175
5176 default:
5177 if ((cmd >= SIOCDEVPRIVATE &&
5178 cmd <= SIOCDEVPRIVATE + 15) ||
5179 cmd == SIOCBONDENSLAVE ||
5180 cmd == SIOCBONDRELEASE ||
5181 cmd == SIOCBONDSETHWADDR ||
5182 cmd == SIOCBONDSLAVEINFOQUERY ||
5183 cmd == SIOCBONDINFOQUERY ||
5184 cmd == SIOCBONDCHANGEACTIVE ||
5185 cmd == SIOCGMIIPHY ||
5186 cmd == SIOCGMIIREG ||
5187 cmd == SIOCSMIIREG ||
5188 cmd == SIOCBRADDIF ||
5189 cmd == SIOCBRDELIF ||
5190 cmd == SIOCSHWTSTAMP ||
5191 cmd == SIOCWANDEV) {
5192 err = -EOPNOTSUPP;
5193 if (ops->ndo_do_ioctl) {
5194 if (netif_device_present(dev))
5195 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5196 else
5197 err = -ENODEV;
5198 }
5199 } else
5200 err = -EINVAL;
5201
5202 }
5203 return err;
5204}
5205
5206
5207
5208
5209
5210
5211
5212
5213
5214
5215
5216
5217
5218
5219
5220
5221
5222
5223int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5224{
5225 struct ifreq ifr;
5226 int ret;
5227 char *colon;
5228
5229
5230
5231
5232
5233
5234 if (cmd == SIOCGIFCONF) {
5235 rtnl_lock();
5236 ret = dev_ifconf(net, (char __user *) arg);
5237 rtnl_unlock();
5238 return ret;
5239 }
5240 if (cmd == SIOCGIFNAME)
5241 return dev_ifname(net, (struct ifreq __user *)arg);
5242
5243 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5244 return -EFAULT;
5245
5246 ifr.ifr_name[IFNAMSIZ-1] = 0;
5247
5248 colon = strchr(ifr.ifr_name, ':');
5249 if (colon)
5250 *colon = 0;
5251
5252
5253
5254
5255
5256 switch (cmd) {
5257
5258
5259
5260
5261
5262
5263 case SIOCGIFFLAGS:
5264 case SIOCGIFMETRIC:
5265 case SIOCGIFMTU:
5266 case SIOCGIFHWADDR:
5267 case SIOCGIFSLAVE:
5268 case SIOCGIFMAP:
5269 case SIOCGIFINDEX:
5270 case SIOCGIFTXQLEN:
5271 dev_load(net, ifr.ifr_name);
5272 rcu_read_lock();
5273 ret = dev_ifsioc_locked(net, &ifr, cmd);
5274 rcu_read_unlock();
5275 if (!ret) {
5276 if (colon)
5277 *colon = ':';
5278 if (copy_to_user(arg, &ifr,
5279 sizeof(struct ifreq)))
5280 ret = -EFAULT;
5281 }
5282 return ret;
5283
5284 case SIOCETHTOOL:
5285 dev_load(net, ifr.ifr_name);
5286 rtnl_lock();
5287 ret = dev_ethtool(net, &ifr);
5288 rtnl_unlock();
5289 if (!ret) {
5290 if (colon)
5291 *colon = ':';
5292 if (copy_to_user(arg, &ifr,
5293 sizeof(struct ifreq)))
5294 ret = -EFAULT;
5295 }
5296 return ret;
5297
5298
5299
5300
5301
5302
5303
5304 case SIOCGMIIPHY:
5305 case SIOCGMIIREG:
5306 case SIOCSIFNAME:
5307 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5308 return -EPERM;
5309 dev_load(net, ifr.ifr_name);
5310 rtnl_lock();
5311 ret = dev_ifsioc(net, &ifr, cmd);
5312 rtnl_unlock();
5313 if (!ret) {
5314 if (colon)
5315 *colon = ':';
5316 if (copy_to_user(arg, &ifr,
5317 sizeof(struct ifreq)))
5318 ret = -EFAULT;
5319 }
5320 return ret;
5321
5322
5323
5324
5325
5326
5327
5328 case SIOCSIFMAP:
5329 case SIOCSIFTXQLEN:
5330 if (!capable(CAP_NET_ADMIN))
5331 return -EPERM;
5332
5333
5334
5335
5336
5337
5338
5339 case SIOCSIFFLAGS:
5340 case SIOCSIFMETRIC:
5341 case SIOCSIFMTU:
5342 case SIOCSIFHWADDR:
5343 case SIOCSIFSLAVE:
5344 case SIOCADDMULTI:
5345 case SIOCDELMULTI:
5346 case SIOCSIFHWBROADCAST:
5347 case SIOCSMIIREG:
5348 case SIOCBONDENSLAVE:
5349 case SIOCBONDRELEASE:
5350 case SIOCBONDSETHWADDR:
5351 case SIOCBONDCHANGEACTIVE:
5352 case SIOCBRADDIF:
5353 case SIOCBRDELIF:
5354 case SIOCSHWTSTAMP:
5355 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
5356 return -EPERM;
5357
5358 case SIOCBONDSLAVEINFOQUERY:
5359 case SIOCBONDINFOQUERY:
5360 dev_load(net, ifr.ifr_name);
5361 rtnl_lock();
5362 ret = dev_ifsioc(net, &ifr, cmd);
5363 rtnl_unlock();
5364 return ret;
5365
5366 case SIOCGIFMEM:
5367
5368
5369 case SIOCSIFMEM:
5370
5371
5372 case SIOCSIFLINK:
5373 return -ENOTTY;
5374
5375
5376
5377
5378 default:
5379 if (cmd == SIOCWANDEV ||
5380 (cmd >= SIOCDEVPRIVATE &&
5381 cmd <= SIOCDEVPRIVATE + 15)) {
5382 dev_load(net, ifr.ifr_name);
5383 rtnl_lock();
5384 ret = dev_ifsioc(net, &ifr, cmd);
5385 rtnl_unlock();
5386 if (!ret && copy_to_user(arg, &ifr,
5387 sizeof(struct ifreq)))
5388 ret = -EFAULT;
5389 return ret;
5390 }
5391
5392 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5393 return wext_handle_ioctl(net, &ifr, cmd, arg);
5394 return -ENOTTY;
5395 }
5396}
5397
5398
5399
5400
5401
5402
5403
5404
5405
5406
5407static int dev_new_index(struct net *net)
5408{
5409 int ifindex = net->ifindex;
5410 for (;;) {
5411 if (++ifindex <= 0)
5412 ifindex = 1;
5413 if (!__dev_get_by_index(net, ifindex))
5414 return net->ifindex = ifindex;
5415 }
5416}
5417
5418
5419static LIST_HEAD(net_todo_list);
5420
5421static void net_set_todo(struct net_device *dev)
5422{
5423 list_add_tail(&dev->todo_list, &net_todo_list);
5424}
5425
5426static void rollback_registered_many(struct list_head *head)
5427{
5428 struct net_device *dev, *tmp;
5429
5430 BUG_ON(dev_boot_phase);
5431 ASSERT_RTNL();
5432
5433 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5434
5435
5436
5437
5438 if (dev->reg_state == NETREG_UNINITIALIZED) {
5439 pr_debug("unregister_netdevice: device %s/%p never was registered\n",
5440 dev->name, dev);
5441
5442 WARN_ON(1);
5443 list_del(&dev->unreg_list);
5444 continue;
5445 }
5446 dev->dismantle = true;
5447 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5448 }
5449
5450
5451 dev_close_many(head);
5452
5453 list_for_each_entry(dev, head, unreg_list) {
5454
5455 unlist_netdevice(dev);
5456
5457 dev->reg_state = NETREG_UNREGISTERING;
5458 }
5459
5460 synchronize_net();
5461
5462 list_for_each_entry(dev, head, unreg_list) {
5463
5464 dev_shutdown(dev);
5465
5466
5467
5468
5469
5470 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5471
5472 if (!dev->rtnl_link_ops ||
5473 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5474 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5475
5476
5477
5478
5479 dev_uc_flush(dev);
5480 dev_mc_flush(dev);
5481
5482 if (dev->netdev_ops->ndo_uninit)
5483 dev->netdev_ops->ndo_uninit(dev);
5484
5485
5486 WARN_ON(dev->master);
5487
5488
5489 netdev_unregister_kobject(dev);
5490 }
5491
5492 synchronize_net();
5493
5494 list_for_each_entry(dev, head, unreg_list)
5495 dev_put(dev);
5496}
5497
5498static void rollback_registered(struct net_device *dev)
5499{
5500 LIST_HEAD(single);
5501
5502 list_add(&dev->unreg_list, &single);
5503 rollback_registered_many(&single);
5504 list_del(&single);
5505}
5506
5507static netdev_features_t netdev_fix_features(struct net_device *dev,
5508 netdev_features_t features)
5509{
5510
5511 if ((features & NETIF_F_HW_CSUM) &&
5512 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5513 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5514 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5515 }
5516
5517
5518 if ((features & NETIF_F_SG) &&
5519 !(features & NETIF_F_ALL_CSUM)) {
5520 netdev_dbg(dev,
5521 "Dropping NETIF_F_SG since no checksum feature.\n");
5522 features &= ~NETIF_F_SG;
5523 }
5524
5525
5526 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5527 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5528 features &= ~NETIF_F_ALL_TSO;
5529 }
5530
5531
5532 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5533 features &= ~NETIF_F_TSO_ECN;
5534
5535
5536 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5537 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5538 features &= ~NETIF_F_GSO;
5539 }
5540
5541
5542 if (features & NETIF_F_UFO) {
5543
5544 if (!((features & NETIF_F_GEN_CSUM) ||
5545 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5546 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5547 netdev_dbg(dev,
5548 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5549 features &= ~NETIF_F_UFO;
5550 }
5551
5552 if (!(features & NETIF_F_SG)) {
5553 netdev_dbg(dev,
5554 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5555 features &= ~NETIF_F_UFO;
5556 }
5557 }
5558
5559 return features;
5560}
5561
5562int __netdev_update_features(struct net_device *dev)
5563{
5564 netdev_features_t features;
5565 int err = 0;
5566
5567 ASSERT_RTNL();
5568
5569 features = netdev_get_wanted_features(dev);
5570
5571 if (dev->netdev_ops->ndo_fix_features)
5572 features = dev->netdev_ops->ndo_fix_features(dev, features);
5573
5574
5575 features = netdev_fix_features(dev, features);
5576
5577 if (dev->features == features)
5578 return 0;
5579
5580 netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
5581 &dev->features, &features);
5582
5583 if (dev->netdev_ops->ndo_set_features)
5584 err = dev->netdev_ops->ndo_set_features(dev, features);
5585
5586 if (unlikely(err < 0)) {
5587 netdev_err(dev,
5588 "set_features() failed (%d); wanted %pNF, left %pNF\n",
5589 err, &features, &dev->features);
5590 return -1;
5591 }
5592
5593 if (!err)
5594 dev->features = features;
5595
5596 return 1;
5597}
5598
5599
5600
5601
5602
5603
5604
5605
5606
5607void netdev_update_features(struct net_device *dev)
5608{
5609 if (__netdev_update_features(dev))
5610 netdev_features_change(dev);
5611}
5612EXPORT_SYMBOL(netdev_update_features);
5613
5614
5615
5616
5617
5618
5619
5620
5621
5622
5623
5624void netdev_change_features(struct net_device *dev)
5625{
5626 __netdev_update_features(dev);
5627 netdev_features_change(dev);
5628}
5629EXPORT_SYMBOL(netdev_change_features);
5630
5631
5632
5633
5634
5635
5636
5637
5638
5639
5640void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5641 struct net_device *dev)
5642{
5643 if (rootdev->operstate == IF_OPER_DORMANT)
5644 netif_dormant_on(dev);
5645 else
5646 netif_dormant_off(dev);
5647
5648 if (netif_carrier_ok(rootdev)) {
5649 if (!netif_carrier_ok(dev))
5650 netif_carrier_on(dev);
5651 } else {
5652 if (netif_carrier_ok(dev))
5653 netif_carrier_off(dev);
5654 }
5655}
5656EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5657
5658#ifdef CONFIG_RPS
5659static int netif_alloc_rx_queues(struct net_device *dev)
5660{
5661 unsigned int i, count = dev->num_rx_queues;
5662 struct netdev_rx_queue *rx;
5663
5664 BUG_ON(count < 1);
5665
5666 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5667 if (!rx) {
5668 pr_err("netdev: Unable to allocate %u rx queues\n", count);
5669 return -ENOMEM;
5670 }
5671 dev->_rx = rx;
5672
5673 for (i = 0; i < count; i++)
5674 rx[i].dev = dev;
5675 return 0;
5676}
5677#endif
5678
5679static void netdev_init_one_queue(struct net_device *dev,
5680 struct netdev_queue *queue, void *_unused)
5681{
5682
5683 spin_lock_init(&queue->_xmit_lock);
5684 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5685 queue->xmit_lock_owner = -1;
5686 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5687 queue->dev = dev;
5688#ifdef CONFIG_BQL
5689 dql_init(&queue->dql, HZ);
5690#endif
5691}
5692
5693static int netif_alloc_netdev_queues(struct net_device *dev)
5694{
5695 unsigned int count = dev->num_tx_queues;
5696 struct netdev_queue *tx;
5697
5698 BUG_ON(count < 1);
5699
5700 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5701 if (!tx) {
5702 pr_err("netdev: Unable to allocate %u tx queues\n", count);
5703 return -ENOMEM;
5704 }
5705 dev->_tx = tx;
5706
5707 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5708 spin_lock_init(&dev->tx_global_lock);
5709
5710 return 0;
5711}
5712
5713
5714
5715
5716
5717
5718
5719
5720
5721
5722
5723
5724
5725
5726
5727
5728
5729
5730int register_netdevice(struct net_device *dev)
5731{
5732 int ret;
5733 struct net *net = dev_net(dev);
5734
5735 BUG_ON(dev_boot_phase);
5736 ASSERT_RTNL();
5737
5738 might_sleep();
5739
5740
5741 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5742 BUG_ON(!net);
5743
5744 spin_lock_init(&dev->addr_list_lock);
5745 netdev_set_addr_lockdep_class(dev);
5746
5747 dev->iflink = -1;
5748
5749 ret = dev_get_valid_name(net, dev, dev->name);
5750 if (ret < 0)
5751 goto out;
5752
5753
5754 if (dev->netdev_ops->ndo_init) {
5755 ret = dev->netdev_ops->ndo_init(dev);
5756 if (ret) {
5757 if (ret > 0)
5758 ret = -EIO;
5759 goto out;
5760 }
5761 }
5762
5763 ret = -EBUSY;
5764 if (!dev->ifindex)
5765 dev->ifindex = dev_new_index(net);
5766 else if (__dev_get_by_index(net, dev->ifindex))
5767 goto err_uninit;
5768
5769 if (dev->iflink == -1)
5770 dev->iflink = dev->ifindex;
5771
5772
5773
5774
5775 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5776 dev->features |= NETIF_F_SOFT_FEATURES;
5777 dev->wanted_features = dev->features & dev->hw_features;
5778
5779
5780 if (!(dev->flags & IFF_LOOPBACK)) {
5781 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5782 if (dev->features & NETIF_F_ALL_CSUM) {
5783 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5784 dev->features |= NETIF_F_NOCACHE_COPY;
5785 }
5786 }
5787
5788
5789
5790 dev->vlan_features |= NETIF_F_HIGHDMA;
5791
5792 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5793 ret = notifier_to_errno(ret);
5794 if (ret)
5795 goto err_uninit;
5796
5797 ret = netdev_register_kobject(dev);
5798 if (ret)
5799 goto err_uninit;
5800 dev->reg_state = NETREG_REGISTERED;
5801
5802 __netdev_update_features(dev);
5803
5804
5805
5806
5807
5808
5809 set_bit(__LINK_STATE_PRESENT, &dev->state);
5810
5811 linkwatch_init_dev(dev);
5812
5813 dev_init_scheduler(dev);
5814 dev_hold(dev);
5815 list_netdevice(dev);
5816 add_device_randomness(dev->dev_addr, dev->addr_len);
5817
5818
5819 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5820 ret = notifier_to_errno(ret);
5821 if (ret) {
5822 rollback_registered(dev);
5823 dev->reg_state = NETREG_UNREGISTERED;
5824 }
5825
5826
5827
5828
5829 if (!dev->rtnl_link_ops ||
5830 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5831 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5832
5833out:
5834 return ret;
5835
5836err_uninit:
5837 if (dev->netdev_ops->ndo_uninit)
5838 dev->netdev_ops->ndo_uninit(dev);
5839 goto out;
5840}
5841EXPORT_SYMBOL(register_netdevice);
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853int init_dummy_netdev(struct net_device *dev)
5854{
5855
5856
5857
5858
5859
5860 memset(dev, 0, sizeof(struct net_device));
5861
5862
5863
5864
5865 dev->reg_state = NETREG_DUMMY;
5866
5867
5868 INIT_LIST_HEAD(&dev->napi_list);
5869
5870
5871 set_bit(__LINK_STATE_PRESENT, &dev->state);
5872 set_bit(__LINK_STATE_START, &dev->state);
5873
5874
5875
5876
5877
5878
5879 return 0;
5880}
5881EXPORT_SYMBOL_GPL(init_dummy_netdev);
5882
5883
5884
5885
5886
5887
5888
5889
5890
5891
5892
5893
5894
5895
5896
5897int register_netdev(struct net_device *dev)
5898{
5899 int err;
5900
5901 rtnl_lock();
5902 err = register_netdevice(dev);
5903 rtnl_unlock();
5904 return err;
5905}
5906EXPORT_SYMBOL(register_netdev);
5907
5908int netdev_refcnt_read(const struct net_device *dev)
5909{
5910 int i, refcnt = 0;
5911
5912 for_each_possible_cpu(i)
5913 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5914 return refcnt;
5915}
5916EXPORT_SYMBOL(netdev_refcnt_read);
5917
5918
5919
5920
5921
5922
5923
5924
5925
5926
5927
5928
5929
5930static void netdev_wait_allrefs(struct net_device *dev)
5931{
5932 unsigned long rebroadcast_time, warning_time;
5933 int refcnt;
5934
5935 linkwatch_forget_dev(dev);
5936
5937 rebroadcast_time = warning_time = jiffies;
5938 refcnt = netdev_refcnt_read(dev);
5939
5940 while (refcnt != 0) {
5941 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5942 rtnl_lock();
5943
5944
5945 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5946
5947 __rtnl_unlock();
5948 rcu_barrier();
5949 rtnl_lock();
5950
5951 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
5952 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5953 &dev->state)) {
5954
5955
5956
5957
5958
5959
5960 linkwatch_run_queue();
5961 }
5962
5963 __rtnl_unlock();
5964
5965 rebroadcast_time = jiffies;
5966 }
5967
5968 msleep(250);
5969
5970 refcnt = netdev_refcnt_read(dev);
5971
5972 if (time_after(jiffies, warning_time + 10 * HZ)) {
5973 pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
5974 dev->name, refcnt);
5975 warning_time = jiffies;
5976 }
5977 }
5978}
5979
5980
5981
5982
5983
5984
5985
5986
5987
5988
5989
5990
5991
5992
5993
5994
5995
5996
5997
5998
5999
6000
6001
6002
6003
6004void netdev_run_todo(void)
6005{
6006 struct list_head list;
6007
6008
6009 list_replace_init(&net_todo_list, &list);
6010
6011 __rtnl_unlock();
6012
6013
6014
6015 if (!list_empty(&list))
6016 rcu_barrier();
6017
6018 while (!list_empty(&list)) {
6019 struct net_device *dev
6020 = list_first_entry(&list, struct net_device, todo_list);
6021 list_del(&dev->todo_list);
6022
6023 rtnl_lock();
6024 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6025 __rtnl_unlock();
6026
6027 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
6028 pr_err("network todo '%s' but state %d\n",
6029 dev->name, dev->reg_state);
6030 dump_stack();
6031 continue;
6032 }
6033
6034 dev->reg_state = NETREG_UNREGISTERED;
6035
6036 on_each_cpu(flush_backlog, dev, 1);
6037
6038 netdev_wait_allrefs(dev);
6039
6040
6041 BUG_ON(netdev_refcnt_read(dev));
6042 WARN_ON(rcu_access_pointer(dev->ip_ptr));
6043 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
6044 WARN_ON(dev->dn_ptr);
6045
6046 if (dev->destructor)
6047 dev->destructor(dev);
6048
6049
6050 kobject_put(&dev->dev.kobj);
6051 }
6052}
6053
6054
6055
6056
6057void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
6058 const struct net_device_stats *netdev_stats)
6059{
6060#if BITS_PER_LONG == 64
6061 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
6062 memcpy(stats64, netdev_stats, sizeof(*stats64));
6063#else
6064 size_t i, n = sizeof(*stats64) / sizeof(u64);
6065 const unsigned long *src = (const unsigned long *)netdev_stats;
6066 u64 *dst = (u64 *)stats64;
6067
6068 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
6069 sizeof(*stats64) / sizeof(u64));
6070 for (i = 0; i < n; i++)
6071 dst[i] = src[i];
6072#endif
6073}
6074EXPORT_SYMBOL(netdev_stats_to_stats64);
6075
6076
6077
6078
6079
6080
6081
6082
6083
6084
6085
6086struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
6087 struct rtnl_link_stats64 *storage)
6088{
6089 const struct net_device_ops *ops = dev->netdev_ops;
6090
6091 if (ops->ndo_get_stats64) {
6092 memset(storage, 0, sizeof(*storage));
6093 ops->ndo_get_stats64(dev, storage);
6094 } else if (ops->ndo_get_stats) {
6095 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
6096 } else {
6097 netdev_stats_to_stats64(storage, &dev->stats);
6098 }
6099 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
6100 return storage;
6101}
6102EXPORT_SYMBOL(dev_get_stats);
6103
6104struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
6105{
6106 struct netdev_queue *queue = dev_ingress_queue(dev);
6107
6108#ifdef CONFIG_NET_CLS_ACT
6109 if (queue)
6110 return queue;
6111 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
6112 if (!queue)
6113 return NULL;
6114 netdev_init_one_queue(dev, queue, NULL);
6115 queue->qdisc = &noop_qdisc;
6116 queue->qdisc_sleeping = &noop_qdisc;
6117 rcu_assign_pointer(dev->ingress_queue, queue);
6118#endif
6119 return queue;
6120}
6121
6122static const struct ethtool_ops default_ethtool_ops;
6123
6124void netdev_set_default_ethtool_ops(struct net_device *dev,
6125 const struct ethtool_ops *ops)
6126{
6127 if (dev->ethtool_ops == &default_ethtool_ops)
6128 dev->ethtool_ops = ops;
6129}
6130EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops);
6131
6132
6133
6134
6135
6136
6137
6138
6139
6140
6141
6142
6143
6144struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
6145 void (*setup)(struct net_device *),
6146 unsigned int txqs, unsigned int rxqs)
6147{
6148 struct net_device *dev;
6149 size_t alloc_size;
6150 struct net_device *p;
6151
6152 BUG_ON(strlen(name) >= sizeof(dev->name));
6153
6154 if (txqs < 1) {
6155 pr_err("alloc_netdev: Unable to allocate device with zero queues\n");
6156 return NULL;
6157 }
6158
6159#ifdef CONFIG_RPS
6160 if (rxqs < 1) {
6161 pr_err("alloc_netdev: Unable to allocate device with zero RX queues\n");
6162 return NULL;
6163 }
6164#endif
6165
6166 alloc_size = sizeof(struct net_device);
6167 if (sizeof_priv) {
6168
6169 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6170 alloc_size += sizeof_priv;
6171 }
6172
6173 alloc_size += NETDEV_ALIGN - 1;
6174
6175 p = kzalloc(alloc_size, GFP_KERNEL);
6176 if (!p) {
6177 pr_err("alloc_netdev: Unable to allocate device\n");
6178 return NULL;
6179 }
6180
6181 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6182 dev->padded = (char *)dev - (char *)p;
6183
6184 dev->pcpu_refcnt = alloc_percpu(int);
6185 if (!dev->pcpu_refcnt)
6186 goto free_p;
6187
6188 if (dev_addr_init(dev))
6189 goto free_pcpu;
6190
6191 dev_mc_init(dev);
6192 dev_uc_init(dev);
6193
6194 dev_net_set(dev, &init_net);
6195
6196 dev->gso_max_size = GSO_MAX_SIZE;
6197 dev->gso_max_segs = GSO_MAX_SEGS;
6198
6199 INIT_LIST_HEAD(&dev->napi_list);
6200 INIT_LIST_HEAD(&dev->unreg_list);
6201 INIT_LIST_HEAD(&dev->link_watch_list);
6202 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6203 setup(dev);
6204
6205 dev->num_tx_queues = txqs;
6206 dev->real_num_tx_queues = txqs;
6207 if (netif_alloc_netdev_queues(dev))
6208 goto free_all;
6209
6210#ifdef CONFIG_RPS
6211 dev->num_rx_queues = rxqs;
6212 dev->real_num_rx_queues = rxqs;
6213 if (netif_alloc_rx_queues(dev))
6214 goto free_all;
6215#endif
6216
6217 strcpy(dev->name, name);
6218 dev->group = INIT_NETDEV_GROUP;
6219 if (!dev->ethtool_ops)
6220 dev->ethtool_ops = &default_ethtool_ops;
6221 return dev;
6222
6223free_all:
6224 free_netdev(dev);
6225 return NULL;
6226
6227free_pcpu:
6228 free_percpu(dev->pcpu_refcnt);
6229 kfree(dev->_tx);
6230#ifdef CONFIG_RPS
6231 kfree(dev->_rx);
6232#endif
6233
6234free_p:
6235 kfree(p);
6236 return NULL;
6237}
6238EXPORT_SYMBOL(alloc_netdev_mqs);
6239
6240
6241
6242
6243
6244
6245
6246
6247
6248void free_netdev(struct net_device *dev)
6249{
6250 struct napi_struct *p, *n;
6251
6252 release_net(dev_net(dev));
6253
6254 kfree(dev->_tx);
6255#ifdef CONFIG_RPS
6256 kfree(dev->_rx);
6257#endif
6258
6259 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6260
6261
6262 dev_addr_flush(dev);
6263
6264 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6265 netif_napi_del(p);
6266
6267 free_percpu(dev->pcpu_refcnt);
6268 dev->pcpu_refcnt = NULL;
6269
6270
6271 if (dev->reg_state == NETREG_UNINITIALIZED) {
6272 kfree((char *)dev - dev->padded);
6273 return;
6274 }
6275
6276 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6277 dev->reg_state = NETREG_RELEASED;
6278
6279
6280 put_device(&dev->dev);
6281}
6282EXPORT_SYMBOL(free_netdev);
6283
6284
6285
6286
6287
6288
6289
6290void synchronize_net(void)
6291{
6292 might_sleep();
6293 if (rtnl_is_locked())
6294 synchronize_rcu_expedited();
6295 else
6296 synchronize_rcu();
6297}
6298EXPORT_SYMBOL(synchronize_net);
6299
6300
6301
6302
6303
6304
6305
6306
6307
6308
6309
6310
6311
6312
6313void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6314{
6315 ASSERT_RTNL();
6316
6317 if (head) {
6318 list_move_tail(&dev->unreg_list, head);
6319 } else {
6320 rollback_registered(dev);
6321
6322 net_set_todo(dev);
6323 }
6324}
6325EXPORT_SYMBOL(unregister_netdevice_queue);
6326
6327
6328
6329
6330
6331void unregister_netdevice_many(struct list_head *head)
6332{
6333 struct net_device *dev;
6334
6335 if (!list_empty(head)) {
6336 rollback_registered_many(head);
6337 list_for_each_entry(dev, head, unreg_list)
6338 net_set_todo(dev);
6339 }
6340}
6341EXPORT_SYMBOL(unregister_netdevice_many);
6342
6343
6344
6345
6346
6347
6348
6349
6350
6351
6352
6353
6354void unregister_netdev(struct net_device *dev)
6355{
6356 rtnl_lock();
6357 unregister_netdevice(dev);
6358 rtnl_unlock();
6359}
6360EXPORT_SYMBOL(unregister_netdev);
6361
6362
6363
6364
6365
6366
6367
6368
6369
6370
6371
6372
6373
6374
6375
6376int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6377{
6378 int err;
6379
6380 ASSERT_RTNL();
6381
6382
6383 err = -EINVAL;
6384 if (dev->features & NETIF_F_NETNS_LOCAL)
6385 goto out;
6386
6387
6388 if (dev->reg_state != NETREG_REGISTERED)
6389 goto out;
6390
6391
6392 err = 0;
6393 if (net_eq(dev_net(dev), net))
6394 goto out;
6395
6396
6397
6398
6399 err = -EEXIST;
6400 if (__dev_get_by_name(net, dev->name)) {
6401
6402 if (!pat)
6403 goto out;
6404 if (dev_get_valid_name(net, dev, pat) < 0)
6405 goto out;
6406 }
6407
6408
6409
6410
6411
6412
6413 dev_close(dev);
6414
6415
6416 err = -ENODEV;
6417 unlist_netdevice(dev);
6418
6419 synchronize_net();
6420
6421
6422 dev_shutdown(dev);
6423
6424
6425
6426
6427
6428
6429
6430
6431 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6432 rcu_barrier();
6433 call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
6434 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6435
6436
6437
6438
6439 dev_uc_flush(dev);
6440 dev_mc_flush(dev);
6441
6442
6443 kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE);
6444
6445
6446 dev_net_set(dev, net);
6447
6448
6449 if (__dev_get_by_index(net, dev->ifindex)) {
6450 int iflink = (dev->iflink == dev->ifindex);
6451 dev->ifindex = dev_new_index(net);
6452 if (iflink)
6453 dev->iflink = dev->ifindex;
6454 }
6455
6456
6457 kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
6458
6459
6460 err = device_rename(&dev->dev, dev->name);
6461 WARN_ON(err);
6462
6463
6464 list_netdevice(dev);
6465
6466
6467 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6468
6469
6470
6471
6472
6473 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6474
6475 synchronize_net();
6476 err = 0;
6477out:
6478 return err;
6479}
6480EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6481
6482static int dev_cpu_callback(struct notifier_block *nfb,
6483 unsigned long action,
6484 void *ocpu)
6485{
6486 struct sk_buff **list_skb;
6487 struct sk_buff *skb;
6488 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6489 struct softnet_data *sd, *oldsd;
6490
6491 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6492 return NOTIFY_OK;
6493
6494 local_irq_disable();
6495 cpu = smp_processor_id();
6496 sd = &per_cpu(softnet_data, cpu);
6497 oldsd = &per_cpu(softnet_data, oldcpu);
6498
6499
6500 list_skb = &sd->completion_queue;
6501 while (*list_skb)
6502 list_skb = &(*list_skb)->next;
6503
6504 *list_skb = oldsd->completion_queue;
6505 oldsd->completion_queue = NULL;
6506
6507
6508 if (oldsd->output_queue) {
6509 *sd->output_queue_tailp = oldsd->output_queue;
6510 sd->output_queue_tailp = oldsd->output_queue_tailp;
6511 oldsd->output_queue = NULL;
6512 oldsd->output_queue_tailp = &oldsd->output_queue;
6513 }
6514
6515 if (!list_empty(&oldsd->poll_list)) {
6516 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6517 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6518 }
6519
6520 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6521 local_irq_enable();
6522
6523
6524 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6525 netif_rx(skb);
6526 input_queue_head_incr(oldsd);
6527 }
6528 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6529 netif_rx(skb);
6530 input_queue_head_incr(oldsd);
6531 }
6532
6533 return NOTIFY_OK;
6534}
6535
6536
6537
6538
6539
6540
6541
6542
6543
6544
6545
6546
6547netdev_features_t netdev_increment_features(netdev_features_t all,
6548 netdev_features_t one, netdev_features_t mask)
6549{
6550 if (mask & NETIF_F_GEN_CSUM)
6551 mask |= NETIF_F_ALL_CSUM;
6552 mask |= NETIF_F_VLAN_CHALLENGED;
6553
6554 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6555 all &= one | ~NETIF_F_ALL_FOR_ALL;
6556
6557
6558 if (all & NETIF_F_GEN_CSUM)
6559 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6560
6561 return all;
6562}
6563EXPORT_SYMBOL(netdev_increment_features);
6564
6565static struct hlist_head *netdev_create_hash(void)
6566{
6567 int i;
6568 struct hlist_head *hash;
6569
6570 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6571 if (hash != NULL)
6572 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6573 INIT_HLIST_HEAD(&hash[i]);
6574
6575 return hash;
6576}
6577
6578
6579static int __net_init netdev_init(struct net *net)
6580{
6581 if (net != &init_net)
6582 INIT_LIST_HEAD(&net->dev_base_head);
6583
6584 net->dev_name_head = netdev_create_hash();
6585 if (net->dev_name_head == NULL)
6586 goto err_name;
6587
6588 net->dev_index_head = netdev_create_hash();
6589 if (net->dev_index_head == NULL)
6590 goto err_idx;
6591
6592 return 0;
6593
6594err_idx:
6595 kfree(net->dev_name_head);
6596err_name:
6597 return -ENOMEM;
6598}
6599
6600
6601
6602
6603
6604
6605
6606const char *netdev_drivername(const struct net_device *dev)
6607{
6608 const struct device_driver *driver;
6609 const struct device *parent;
6610 const char *empty = "";
6611
6612 parent = dev->dev.parent;
6613 if (!parent)
6614 return empty;
6615
6616 driver = parent->driver;
6617 if (driver && driver->name)
6618 return driver->name;
6619 return empty;
6620}
6621
6622static int __netdev_printk(const char *level, const struct net_device *dev,
6623 struct va_format *vaf)
6624{
6625 int r;
6626
6627 if (dev && dev->dev.parent) {
6628 r = dev_printk_emit(level[1] - '0',
6629 dev->dev.parent,
6630 "%s %s %s: %pV",
6631 dev_driver_string(dev->dev.parent),
6632 dev_name(dev->dev.parent),
6633 netdev_name(dev), vaf);
6634 } else if (dev) {
6635 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6636 } else {
6637 r = printk("%s(NULL net_device): %pV", level, vaf);
6638 }
6639
6640 return r;
6641}
6642
6643int netdev_printk(const char *level, const struct net_device *dev,
6644 const char *format, ...)
6645{
6646 struct va_format vaf;
6647 va_list args;
6648 int r;
6649
6650 va_start(args, format);
6651
6652 vaf.fmt = format;
6653 vaf.va = &args;
6654
6655 r = __netdev_printk(level, dev, &vaf);
6656
6657 va_end(args);
6658
6659 return r;
6660}
6661EXPORT_SYMBOL(netdev_printk);
6662
6663#define define_netdev_printk_level(func, level) \
6664int func(const struct net_device *dev, const char *fmt, ...) \
6665{ \
6666 int r; \
6667 struct va_format vaf; \
6668 va_list args; \
6669 \
6670 va_start(args, fmt); \
6671 \
6672 vaf.fmt = fmt; \
6673 vaf.va = &args; \
6674 \
6675 r = __netdev_printk(level, dev, &vaf); \
6676 \
6677 va_end(args); \
6678 \
6679 return r; \
6680} \
6681EXPORT_SYMBOL(func);
6682
6683define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6684define_netdev_printk_level(netdev_alert, KERN_ALERT);
6685define_netdev_printk_level(netdev_crit, KERN_CRIT);
6686define_netdev_printk_level(netdev_err, KERN_ERR);
6687define_netdev_printk_level(netdev_warn, KERN_WARNING);
6688define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6689define_netdev_printk_level(netdev_info, KERN_INFO);
6690
6691static void __net_exit netdev_exit(struct net *net)
6692{
6693 kfree(net->dev_name_head);
6694 kfree(net->dev_index_head);
6695}
6696
6697static struct pernet_operations __net_initdata netdev_net_ops = {
6698 .init = netdev_init,
6699 .exit = netdev_exit,
6700};
6701
6702static void __net_exit default_device_exit(struct net *net)
6703{
6704 struct net_device *dev, *aux;
6705
6706
6707
6708
6709 rtnl_lock();
6710 for_each_netdev_safe(net, dev, aux) {
6711 int err;
6712 char fb_name[IFNAMSIZ];
6713
6714
6715 if (dev->features & NETIF_F_NETNS_LOCAL)
6716 continue;
6717
6718
6719 if (dev->rtnl_link_ops)
6720 continue;
6721
6722
6723 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6724 err = dev_change_net_namespace(dev, &init_net, fb_name);
6725 if (err) {
6726 pr_emerg("%s: failed to move %s to init_net: %d\n",
6727 __func__, dev->name, err);
6728 BUG();
6729 }
6730 }
6731 rtnl_unlock();
6732}
6733
6734static void __net_exit default_device_exit_batch(struct list_head *net_list)
6735{
6736
6737
6738
6739
6740
6741 struct net_device *dev;
6742 struct net *net;
6743 LIST_HEAD(dev_kill_list);
6744
6745 rtnl_lock();
6746 list_for_each_entry(net, net_list, exit_list) {
6747 for_each_netdev_reverse(net, dev) {
6748 if (dev->rtnl_link_ops)
6749 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6750 else
6751 unregister_netdevice_queue(dev, &dev_kill_list);
6752 }
6753 }
6754 unregister_netdevice_many(&dev_kill_list);
6755 list_del(&dev_kill_list);
6756 rtnl_unlock();
6757}
6758
6759static struct pernet_operations __net_initdata default_device_ops = {
6760 .exit = default_device_exit,
6761 .exit_batch = default_device_exit_batch,
6762};
6763
6764
6765
6766
6767
6768
6769
6770
6771
6772
6773
6774
6775static int __init net_dev_init(void)
6776{
6777 int i, rc = -ENOMEM;
6778
6779 BUG_ON(!dev_boot_phase);
6780
6781 if (dev_proc_init())
6782 goto out;
6783
6784 if (netdev_kobject_init())
6785 goto out;
6786
6787 INIT_LIST_HEAD(&ptype_all);
6788 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6789 INIT_LIST_HEAD(&ptype_base[i]);
6790
6791 INIT_LIST_HEAD(&offload_base);
6792
6793 if (register_pernet_subsys(&netdev_net_ops))
6794 goto out;
6795
6796
6797
6798
6799
6800 for_each_possible_cpu(i) {
6801 struct softnet_data *sd = &per_cpu(softnet_data, i);
6802
6803 memset(sd, 0, sizeof(*sd));
6804 skb_queue_head_init(&sd->input_pkt_queue);
6805 skb_queue_head_init(&sd->process_queue);
6806 sd->completion_queue = NULL;
6807 INIT_LIST_HEAD(&sd->poll_list);
6808 sd->output_queue = NULL;
6809 sd->output_queue_tailp = &sd->output_queue;
6810#ifdef CONFIG_RPS
6811 sd->csd.func = rps_trigger_softirq;
6812 sd->csd.info = sd;
6813 sd->csd.flags = 0;
6814 sd->cpu = i;
6815#endif
6816
6817 sd->backlog.poll = process_backlog;
6818 sd->backlog.weight = weight_p;
6819 sd->backlog.gro_list = NULL;
6820 sd->backlog.gro_count = 0;
6821 }
6822
6823 dev_boot_phase = 0;
6824
6825
6826
6827
6828
6829
6830
6831
6832
6833
6834 if (register_pernet_device(&loopback_net_ops))
6835 goto out;
6836
6837 if (register_pernet_device(&default_device_ops))
6838 goto out;
6839
6840 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6841 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6842
6843 hotcpu_notifier(dev_cpu_callback, 0);
6844 dst_init();
6845 dev_mcast_init();
6846 rc = 0;
6847out:
6848 return rc;
6849}
6850
6851subsys_initcall(net_dev_init);
6852
6853static int __init initialize_hashrnd(void)
6854{
6855 get_random_bytes(&hashrnd, sizeof(hashrnd));
6856 return 0;
6857}
6858
6859late_initcall_sync(initialize_hashrnd);
6860
6861