1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
78#include <linux/capability.h>
79#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/hash.h>
83#include <linux/slab.h>
84#include <linux/sched.h>
85#include <linux/mutex.h>
86#include <linux/string.h>
87#include <linux/mm.h>
88#include <linux/socket.h>
89#include <linux/sockios.h>
90#include <linux/errno.h>
91#include <linux/interrupt.h>
92#include <linux/if_ether.h>
93#include <linux/netdevice.h>
94#include <linux/etherdevice.h>
95#include <linux/ethtool.h>
96#include <linux/notifier.h>
97#include <linux/skbuff.h>
98#include <net/net_namespace.h>
99#include <net/sock.h>
100#include <linux/rtnetlink.h>
101#include <linux/proc_fs.h>
102#include <linux/seq_file.h>
103#include <linux/stat.h>
104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <net/xfrm.h>
108#include <linux/highmem.h>
109#include <linux/init.h>
110#include <linux/kmod.h>
111#include <linux/module.h>
112#include <linux/netpoll.h>
113#include <linux/rcupdate.h>
114#include <linux/delay.h>
115#include <net/wext.h>
116#include <net/iw_handler.h>
117#include <asm/current.h>
118#include <linux/audit.h>
119#include <linux/dmaengine.h>
120#include <linux/err.h>
121#include <linux/ctype.h>
122#include <linux/if_arp.h>
123#include <linux/if_vlan.h>
124#include <linux/ip.h>
125#include <net/ip.h>
126#include <linux/ipv6.h>
127#include <linux/in.h>
128#include <linux/jhash.h>
129#include <linux/random.h>
130#include <trace/events/napi.h>
131#include <trace/events/net.h>
132#include <trace/events/skb.h>
133#include <linux/pci.h>
134#include <linux/inetdevice.h>
135#include <linux/cpu_rmap.h>
136#include <linux/if_tunnel.h>
137#include <linux/if_pppox.h>
138#include <linux/ppp_defs.h>
139#include <linux/net_tstamp.h>
140
141#include "net-sysfs.h"
142
143
144#define MAX_GRO_SKBS 8
145
146
147#define GRO_MAX_HEAD (MAX_HEADER + 128)
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177#define PTYPE_HASH_SIZE (16)
178#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
179
180static DEFINE_SPINLOCK(ptype_lock);
181static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
182static struct list_head ptype_all __read_mostly;
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203DEFINE_RWLOCK(dev_base_lock);
204EXPORT_SYMBOL(dev_base_lock);
205
206static inline void dev_base_seq_inc(struct net *net)
207{
208 while (++net->dev_base_seq == 0);
209}
210
211static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
212{
213 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
214 return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
215}
216
217static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
218{
219 return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
220}
221
222static inline void rps_lock(struct softnet_data *sd)
223{
224#ifdef CONFIG_RPS
225 spin_lock(&sd->input_pkt_queue.lock);
226#endif
227}
228
229static inline void rps_unlock(struct softnet_data *sd)
230{
231#ifdef CONFIG_RPS
232 spin_unlock(&sd->input_pkt_queue.lock);
233#endif
234}
235
236
237static int list_netdevice(struct net_device *dev)
238{
239 struct net *net = dev_net(dev);
240
241 ASSERT_RTNL();
242
243 write_lock_bh(&dev_base_lock);
244 list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
245 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
246 hlist_add_head_rcu(&dev->index_hlist,
247 dev_index_hash(net, dev->ifindex));
248 write_unlock_bh(&dev_base_lock);
249
250 dev_base_seq_inc(net);
251
252 return 0;
253}
254
255
256
257
258static void unlist_netdevice(struct net_device *dev)
259{
260 ASSERT_RTNL();
261
262
263 write_lock_bh(&dev_base_lock);
264 list_del_rcu(&dev->dev_list);
265 hlist_del_rcu(&dev->name_hlist);
266 hlist_del_rcu(&dev->index_hlist);
267 write_unlock_bh(&dev_base_lock);
268
269 dev_base_seq_inc(dev_net(dev));
270}
271
272
273
274
275
276static RAW_NOTIFIER_HEAD(netdev_chain);
277
278
279
280
281
282
283DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
284EXPORT_PER_CPU_SYMBOL(softnet_data);
285
286#ifdef CONFIG_LOCKDEP
287
288
289
290
291static const unsigned short netdev_lock_type[] =
292 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
293 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
294 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
295 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
296 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
297 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
298 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
299 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
300 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
301 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
302 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
303 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
304 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
305 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
306 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
307 ARPHRD_VOID, ARPHRD_NONE};
308
309static const char *const netdev_lock_name[] =
310 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
311 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
312 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
313 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
314 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
315 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
316 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
317 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
318 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
319 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
320 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
321 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
322 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
323 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
324 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
325 "_xmit_VOID", "_xmit_NONE"};
326
327static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
328static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
329
330static inline unsigned short netdev_lock_pos(unsigned short dev_type)
331{
332 int i;
333
334 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
335 if (netdev_lock_type[i] == dev_type)
336 return i;
337
338 return ARRAY_SIZE(netdev_lock_type) - 1;
339}
340
341static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
342 unsigned short dev_type)
343{
344 int i;
345
346 i = netdev_lock_pos(dev_type);
347 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
348 netdev_lock_name[i]);
349}
350
351static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
352{
353 int i;
354
355 i = netdev_lock_pos(dev->type);
356 lockdep_set_class_and_name(&dev->addr_list_lock,
357 &netdev_addr_lock_key[i],
358 netdev_lock_name[i]);
359}
360#else
361static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
362 unsigned short dev_type)
363{
364}
365static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
366{
367}
368#endif
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392static inline struct list_head *ptype_head(const struct packet_type *pt)
393{
394 if (pt->type == htons(ETH_P_ALL))
395 return &ptype_all;
396 else
397 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
398}
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413void dev_add_pack(struct packet_type *pt)
414{
415 struct list_head *head = ptype_head(pt);
416
417 spin_lock(&ptype_lock);
418 list_add_rcu(&pt->list, head);
419 spin_unlock(&ptype_lock);
420}
421EXPORT_SYMBOL(dev_add_pack);
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436void __dev_remove_pack(struct packet_type *pt)
437{
438 struct list_head *head = ptype_head(pt);
439 struct packet_type *pt1;
440
441 spin_lock(&ptype_lock);
442
443 list_for_each_entry(pt1, head, list) {
444 if (pt == pt1) {
445 list_del_rcu(&pt->list);
446 goto out;
447 }
448 }
449
450 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
451out:
452 spin_unlock(&ptype_lock);
453}
454EXPORT_SYMBOL(__dev_remove_pack);
455
456
457
458
459
460
461
462
463
464
465
466
467
468void dev_remove_pack(struct packet_type *pt)
469{
470 __dev_remove_pack(pt);
471
472 synchronize_net();
473}
474EXPORT_SYMBOL(dev_remove_pack);
475
476
477
478
479
480
481
482
483static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
484
485
486
487
488
489
490
491
492
493
494static int netdev_boot_setup_add(char *name, struct ifmap *map)
495{
496 struct netdev_boot_setup *s;
497 int i;
498
499 s = dev_boot_setup;
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
502 memset(s[i].name, 0, sizeof(s[i].name));
503 strlcpy(s[i].name, name, IFNAMSIZ);
504 memcpy(&s[i].map, map, sizeof(s[i].map));
505 break;
506 }
507 }
508
509 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
510}
511
512
513
514
515
516
517
518
519
520
521int netdev_boot_setup_check(struct net_device *dev)
522{
523 struct netdev_boot_setup *s = dev_boot_setup;
524 int i;
525
526 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
527 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
528 !strcmp(dev->name, s[i].name)) {
529 dev->irq = s[i].map.irq;
530 dev->base_addr = s[i].map.base_addr;
531 dev->mem_start = s[i].map.mem_start;
532 dev->mem_end = s[i].map.mem_end;
533 return 1;
534 }
535 }
536 return 0;
537}
538EXPORT_SYMBOL(netdev_boot_setup_check);
539
540
541
542
543
544
545
546
547
548
549
550
551unsigned long netdev_boot_base(const char *prefix, int unit)
552{
553 const struct netdev_boot_setup *s = dev_boot_setup;
554 char name[IFNAMSIZ];
555 int i;
556
557 sprintf(name, "%s%d", prefix, unit);
558
559
560
561
562
563 if (__dev_get_by_name(&init_net, name))
564 return 1;
565
566 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
567 if (!strcmp(name, s[i].name))
568 return s[i].map.base_addr;
569 return 0;
570}
571
572
573
574
575int __init netdev_boot_setup(char *str)
576{
577 int ints[5];
578 struct ifmap map;
579
580 str = get_options(str, ARRAY_SIZE(ints), ints);
581 if (!str || !*str)
582 return 0;
583
584
585 memset(&map, 0, sizeof(map));
586 if (ints[0] > 0)
587 map.irq = ints[1];
588 if (ints[0] > 1)
589 map.base_addr = ints[2];
590 if (ints[0] > 2)
591 map.mem_start = ints[3];
592 if (ints[0] > 3)
593 map.mem_end = ints[4];
594
595
596 return netdev_boot_setup_add(str, &map);
597}
598
599__setup("netdev=", netdev_boot_setup);
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619struct net_device *__dev_get_by_name(struct net *net, const char *name)
620{
621 struct hlist_node *p;
622 struct net_device *dev;
623 struct hlist_head *head = dev_name_hash(net, name);
624
625 hlist_for_each_entry(dev, p, head, name_hlist)
626 if (!strncmp(dev->name, name, IFNAMSIZ))
627 return dev;
628
629 return NULL;
630}
631EXPORT_SYMBOL(__dev_get_by_name);
632
633
634
635
636
637
638
639
640
641
642
643
644
645struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
646{
647 struct hlist_node *p;
648 struct net_device *dev;
649 struct hlist_head *head = dev_name_hash(net, name);
650
651 hlist_for_each_entry_rcu(dev, p, head, name_hlist)
652 if (!strncmp(dev->name, name, IFNAMSIZ))
653 return dev;
654
655 return NULL;
656}
657EXPORT_SYMBOL(dev_get_by_name_rcu);
658
659
660
661
662
663
664
665
666
667
668
669
670
671struct net_device *dev_get_by_name(struct net *net, const char *name)
672{
673 struct net_device *dev;
674
675 rcu_read_lock();
676 dev = dev_get_by_name_rcu(net, name);
677 if (dev)
678 dev_hold(dev);
679 rcu_read_unlock();
680 return dev;
681}
682EXPORT_SYMBOL(dev_get_by_name);
683
684
685
686
687
688
689
690
691
692
693
694
695
696struct net_device *__dev_get_by_index(struct net *net, int ifindex)
697{
698 struct hlist_node *p;
699 struct net_device *dev;
700 struct hlist_head *head = dev_index_hash(net, ifindex);
701
702 hlist_for_each_entry(dev, p, head, index_hlist)
703 if (dev->ifindex == ifindex)
704 return dev;
705
706 return NULL;
707}
708EXPORT_SYMBOL(__dev_get_by_index);
709
710
711
712
713
714
715
716
717
718
719
720
721struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
722{
723 struct hlist_node *p;
724 struct net_device *dev;
725 struct hlist_head *head = dev_index_hash(net, ifindex);
726
727 hlist_for_each_entry_rcu(dev, p, head, index_hlist)
728 if (dev->ifindex == ifindex)
729 return dev;
730
731 return NULL;
732}
733EXPORT_SYMBOL(dev_get_by_index_rcu);
734
735
736
737
738
739
740
741
742
743
744
745
746
747struct net_device *dev_get_by_index(struct net *net, int ifindex)
748{
749 struct net_device *dev;
750
751 rcu_read_lock();
752 dev = dev_get_by_index_rcu(net, ifindex);
753 if (dev)
754 dev_hold(dev);
755 rcu_read_unlock();
756 return dev;
757}
758EXPORT_SYMBOL(dev_get_by_index);
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
775 const char *ha)
776{
777 struct net_device *dev;
778
779 for_each_netdev_rcu(net, dev)
780 if (dev->type == type &&
781 !memcmp(dev->dev_addr, ha, dev->addr_len))
782 return dev;
783
784 return NULL;
785}
786EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
787
788struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
789{
790 struct net_device *dev;
791
792 ASSERT_RTNL();
793 for_each_netdev(net, dev)
794 if (dev->type == type)
795 return dev;
796
797 return NULL;
798}
799EXPORT_SYMBOL(__dev_getfirstbyhwtype);
800
801struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
802{
803 struct net_device *dev, *ret = NULL;
804
805 rcu_read_lock();
806 for_each_netdev_rcu(net, dev)
807 if (dev->type == type) {
808 dev_hold(dev);
809 ret = dev;
810 break;
811 }
812 rcu_read_unlock();
813 return ret;
814}
815EXPORT_SYMBOL(dev_getfirstbyhwtype);
816
817
818
819
820
821
822
823
824
825
826
827
828struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
829 unsigned short mask)
830{
831 struct net_device *dev, *ret;
832
833 ret = NULL;
834 for_each_netdev_rcu(net, dev) {
835 if (((dev->flags ^ if_flags) & mask) == 0) {
836 ret = dev;
837 break;
838 }
839 }
840 return ret;
841}
842EXPORT_SYMBOL(dev_get_by_flags_rcu);
843
844
845
846
847
848
849
850
851
852int dev_valid_name(const char *name)
853{
854 if (*name == '\0')
855 return 0;
856 if (strlen(name) >= IFNAMSIZ)
857 return 0;
858 if (!strcmp(name, ".") || !strcmp(name, ".."))
859 return 0;
860
861 while (*name) {
862 if (*name == '/' || isspace(*name))
863 return 0;
864 name++;
865 }
866 return 1;
867}
868EXPORT_SYMBOL(dev_valid_name);
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885static int __dev_alloc_name(struct net *net, const char *name, char *buf)
886{
887 int i = 0;
888 const char *p;
889 const int max_netdevices = 8*PAGE_SIZE;
890 unsigned long *inuse;
891 struct net_device *d;
892
893 p = strnchr(name, IFNAMSIZ-1, '%');
894 if (p) {
895
896
897
898
899
900 if (p[1] != 'd' || strchr(p + 2, '%'))
901 return -EINVAL;
902
903
904 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
905 if (!inuse)
906 return -ENOMEM;
907
908 for_each_netdev(net, d) {
909 if (!sscanf(d->name, name, &i))
910 continue;
911 if (i < 0 || i >= max_netdevices)
912 continue;
913
914
915 snprintf(buf, IFNAMSIZ, name, i);
916 if (!strncmp(buf, d->name, IFNAMSIZ))
917 set_bit(i, inuse);
918 }
919
920 i = find_first_zero_bit(inuse, max_netdevices);
921 free_page((unsigned long) inuse);
922 }
923
924 if (buf != name)
925 snprintf(buf, IFNAMSIZ, name, i);
926 if (!__dev_get_by_name(net, buf))
927 return i;
928
929
930
931
932
933 return -ENFILE;
934}
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950int dev_alloc_name(struct net_device *dev, const char *name)
951{
952 char buf[IFNAMSIZ];
953 struct net *net;
954 int ret;
955
956 BUG_ON(!dev_net(dev));
957 net = dev_net(dev);
958 ret = __dev_alloc_name(net, name, buf);
959 if (ret >= 0)
960 strlcpy(dev->name, buf, IFNAMSIZ);
961 return ret;
962}
963EXPORT_SYMBOL(dev_alloc_name);
964
965static int dev_get_valid_name(struct net_device *dev, const char *name)
966{
967 struct net *net;
968
969 BUG_ON(!dev_net(dev));
970 net = dev_net(dev);
971
972 if (!dev_valid_name(name))
973 return -EINVAL;
974
975 if (strchr(name, '%'))
976 return dev_alloc_name(dev, name);
977 else if (__dev_get_by_name(net, name))
978 return -EEXIST;
979 else if (dev->name != name)
980 strlcpy(dev->name, name, IFNAMSIZ);
981
982 return 0;
983}
984
985
986
987
988
989
990
991
992
993int dev_change_name(struct net_device *dev, const char *newname)
994{
995 char oldname[IFNAMSIZ];
996 int err = 0;
997 int ret;
998 struct net *net;
999
1000 ASSERT_RTNL();
1001 BUG_ON(!dev_net(dev));
1002
1003 net = dev_net(dev);
1004 if (dev->flags & IFF_UP)
1005 return -EBUSY;
1006
1007 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1008 return 0;
1009
1010 memcpy(oldname, dev->name, IFNAMSIZ);
1011
1012 err = dev_get_valid_name(dev, newname);
1013 if (err < 0)
1014 return err;
1015
1016rollback:
1017 ret = device_rename(&dev->dev, dev->name);
1018 if (ret) {
1019 memcpy(dev->name, oldname, IFNAMSIZ);
1020 return ret;
1021 }
1022
1023 write_lock_bh(&dev_base_lock);
1024 hlist_del_rcu(&dev->name_hlist);
1025 write_unlock_bh(&dev_base_lock);
1026
1027 synchronize_rcu();
1028
1029 write_lock_bh(&dev_base_lock);
1030 hlist_add_head_rcu(&dev->name_hlist, dev_name_hash(net, dev->name));
1031 write_unlock_bh(&dev_base_lock);
1032
1033 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1034 ret = notifier_to_errno(ret);
1035
1036 if (ret) {
1037
1038 if (err >= 0) {
1039 err = ret;
1040 memcpy(dev->name, oldname, IFNAMSIZ);
1041 goto rollback;
1042 } else {
1043 printk(KERN_ERR
1044 "%s: name change rollback failed: %d.\n",
1045 dev->name, ret);
1046 }
1047 }
1048
1049 return err;
1050}
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1061{
1062 ASSERT_RTNL();
1063
1064 if (len >= IFALIASZ)
1065 return -EINVAL;
1066
1067 if (!len) {
1068 if (dev->ifalias) {
1069 kfree(dev->ifalias);
1070 dev->ifalias = NULL;
1071 }
1072 return 0;
1073 }
1074
1075 dev->ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
1076 if (!dev->ifalias)
1077 return -ENOMEM;
1078
1079 strlcpy(dev->ifalias, alias, len+1);
1080 return len;
1081}
1082
1083
1084
1085
1086
1087
1088
1089
1090void netdev_features_change(struct net_device *dev)
1091{
1092 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1093}
1094EXPORT_SYMBOL(netdev_features_change);
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104void netdev_state_change(struct net_device *dev)
1105{
1106 if (dev->flags & IFF_UP) {
1107 call_netdevice_notifiers(NETDEV_CHANGE, dev);
1108 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1109 }
1110}
1111EXPORT_SYMBOL(netdev_state_change);
1112
1113int netdev_bonding_change(struct net_device *dev, unsigned long event)
1114{
1115 return call_netdevice_notifiers(event, dev);
1116}
1117EXPORT_SYMBOL(netdev_bonding_change);
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129void dev_load(struct net *net, const char *name)
1130{
1131 struct net_device *dev;
1132 int no_module;
1133
1134 rcu_read_lock();
1135 dev = dev_get_by_name_rcu(net, name);
1136 rcu_read_unlock();
1137
1138 no_module = !dev;
1139 if (no_module && capable(CAP_NET_ADMIN))
1140 no_module = request_module("netdev-%s", name);
1141 if (no_module && capable(CAP_SYS_MODULE)) {
1142 if (!request_module("%s", name))
1143 pr_err("Loading kernel module for a network device "
1144"with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s "
1145"instead\n", name);
1146 }
1147}
1148EXPORT_SYMBOL(dev_load);
1149
1150static int __dev_open(struct net_device *dev)
1151{
1152 const struct net_device_ops *ops = dev->netdev_ops;
1153 int ret;
1154
1155 ASSERT_RTNL();
1156
1157 if (!netif_device_present(dev))
1158 return -ENODEV;
1159
1160 ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
1161 ret = notifier_to_errno(ret);
1162 if (ret)
1163 return ret;
1164
1165 set_bit(__LINK_STATE_START, &dev->state);
1166
1167 if (ops->ndo_validate_addr)
1168 ret = ops->ndo_validate_addr(dev);
1169
1170 if (!ret && ops->ndo_open)
1171 ret = ops->ndo_open(dev);
1172
1173 if (ret)
1174 clear_bit(__LINK_STATE_START, &dev->state);
1175 else {
1176 dev->flags |= IFF_UP;
1177 net_dmaengine_get();
1178 dev_set_rx_mode(dev);
1179 dev_activate(dev);
1180 }
1181
1182 return ret;
1183}
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197int dev_open(struct net_device *dev)
1198{
1199 int ret;
1200
1201 if (dev->flags & IFF_UP)
1202 return 0;
1203
1204 ret = __dev_open(dev);
1205 if (ret < 0)
1206 return ret;
1207
1208 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1209 call_netdevice_notifiers(NETDEV_UP, dev);
1210
1211 return ret;
1212}
1213EXPORT_SYMBOL(dev_open);
1214
1215static int __dev_close_many(struct list_head *head)
1216{
1217 struct net_device *dev;
1218
1219 ASSERT_RTNL();
1220 might_sleep();
1221
1222 list_for_each_entry(dev, head, unreg_list) {
1223 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1224
1225 clear_bit(__LINK_STATE_START, &dev->state);
1226
1227
1228
1229
1230
1231
1232
1233 smp_mb__after_clear_bit();
1234 }
1235
1236 dev_deactivate_many(head);
1237
1238 list_for_each_entry(dev, head, unreg_list) {
1239 const struct net_device_ops *ops = dev->netdev_ops;
1240
1241
1242
1243
1244
1245
1246
1247
1248 if (ops->ndo_stop)
1249 ops->ndo_stop(dev);
1250
1251 dev->flags &= ~IFF_UP;
1252 net_dmaengine_put();
1253 }
1254
1255 return 0;
1256}
1257
1258static int __dev_close(struct net_device *dev)
1259{
1260 int retval;
1261 LIST_HEAD(single);
1262
1263 list_add(&dev->unreg_list, &single);
1264 retval = __dev_close_many(&single);
1265 list_del(&single);
1266 return retval;
1267}
1268
1269static int dev_close_many(struct list_head *head)
1270{
1271 struct net_device *dev, *tmp;
1272 LIST_HEAD(tmp_list);
1273
1274 list_for_each_entry_safe(dev, tmp, head, unreg_list)
1275 if (!(dev->flags & IFF_UP))
1276 list_move(&dev->unreg_list, &tmp_list);
1277
1278 __dev_close_many(head);
1279
1280 list_for_each_entry(dev, head, unreg_list) {
1281 rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
1282 call_netdevice_notifiers(NETDEV_DOWN, dev);
1283 }
1284
1285
1286 list_splice(&tmp_list, head);
1287 return 0;
1288}
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299int dev_close(struct net_device *dev)
1300{
1301 if (dev->flags & IFF_UP) {
1302 LIST_HEAD(single);
1303
1304 list_add(&dev->unreg_list, &single);
1305 dev_close_many(&single);
1306 list_del(&single);
1307 }
1308 return 0;
1309}
1310EXPORT_SYMBOL(dev_close);
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321void dev_disable_lro(struct net_device *dev)
1322{
1323 u32 flags;
1324
1325
1326
1327
1328
1329 if (is_vlan_dev(dev))
1330 dev = vlan_dev_real_dev(dev);
1331
1332 if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
1333 flags = dev->ethtool_ops->get_flags(dev);
1334 else
1335 flags = ethtool_op_get_flags(dev);
1336
1337 if (!(flags & ETH_FLAG_LRO))
1338 return;
1339
1340 __ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
1341 if (unlikely(dev->features & NETIF_F_LRO))
1342 netdev_WARN(dev, "failed to disable LRO!\n");
1343}
1344EXPORT_SYMBOL(dev_disable_lro);
1345
1346
1347static int dev_boot_phase = 1;
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363int register_netdevice_notifier(struct notifier_block *nb)
1364{
1365 struct net_device *dev;
1366 struct net_device *last;
1367 struct net *net;
1368 int err;
1369
1370 rtnl_lock();
1371 err = raw_notifier_chain_register(&netdev_chain, nb);
1372 if (err)
1373 goto unlock;
1374 if (dev_boot_phase)
1375 goto unlock;
1376 for_each_net(net) {
1377 for_each_netdev(net, dev) {
1378 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1379 err = notifier_to_errno(err);
1380 if (err)
1381 goto rollback;
1382
1383 if (!(dev->flags & IFF_UP))
1384 continue;
1385
1386 nb->notifier_call(nb, NETDEV_UP, dev);
1387 }
1388 }
1389
1390unlock:
1391 rtnl_unlock();
1392 return err;
1393
1394rollback:
1395 last = dev;
1396 for_each_net(net) {
1397 for_each_netdev(net, dev) {
1398 if (dev == last)
1399 goto outroll;
1400
1401 if (dev->flags & IFF_UP) {
1402 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1403 nb->notifier_call(nb, NETDEV_DOWN, dev);
1404 }
1405 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1406 nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
1407 }
1408 }
1409
1410outroll:
1411 raw_notifier_chain_unregister(&netdev_chain, nb);
1412 goto unlock;
1413}
1414EXPORT_SYMBOL(register_netdevice_notifier);
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426int unregister_netdevice_notifier(struct notifier_block *nb)
1427{
1428 int err;
1429
1430 rtnl_lock();
1431 err = raw_notifier_chain_unregister(&netdev_chain, nb);
1432 rtnl_unlock();
1433 return err;
1434}
1435EXPORT_SYMBOL(unregister_netdevice_notifier);
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1447{
1448 ASSERT_RTNL();
1449 return raw_notifier_call_chain(&netdev_chain, val, dev);
1450}
1451EXPORT_SYMBOL(call_netdevice_notifiers);
1452
1453
1454static atomic_t netstamp_needed = ATOMIC_INIT(0);
1455
1456void net_enable_timestamp(void)
1457{
1458 atomic_inc(&netstamp_needed);
1459}
1460EXPORT_SYMBOL(net_enable_timestamp);
1461
1462void net_disable_timestamp(void)
1463{
1464 atomic_dec(&netstamp_needed);
1465}
1466EXPORT_SYMBOL(net_disable_timestamp);
1467
1468static inline void net_timestamp_set(struct sk_buff *skb)
1469{
1470 if (atomic_read(&netstamp_needed))
1471 __net_timestamp(skb);
1472 else
1473 skb->tstamp.tv64 = 0;
1474}
1475
1476static inline void net_timestamp_check(struct sk_buff *skb)
1477{
1478 if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
1479 __net_timestamp(skb);
1480}
1481
1482static int net_hwtstamp_validate(struct ifreq *ifr)
1483{
1484 struct hwtstamp_config cfg;
1485 enum hwtstamp_tx_types tx_type;
1486 enum hwtstamp_rx_filters rx_filter;
1487 int tx_type_valid = 0;
1488 int rx_filter_valid = 0;
1489
1490 if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
1491 return -EFAULT;
1492
1493 if (cfg.flags)
1494 return -EINVAL;
1495
1496 tx_type = cfg.tx_type;
1497 rx_filter = cfg.rx_filter;
1498
1499 switch (tx_type) {
1500 case HWTSTAMP_TX_OFF:
1501 case HWTSTAMP_TX_ON:
1502 case HWTSTAMP_TX_ONESTEP_SYNC:
1503 tx_type_valid = 1;
1504 break;
1505 }
1506
1507 switch (rx_filter) {
1508 case HWTSTAMP_FILTER_NONE:
1509 case HWTSTAMP_FILTER_ALL:
1510 case HWTSTAMP_FILTER_SOME:
1511 case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
1512 case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
1513 case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
1514 case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
1515 case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
1516 case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
1517 case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
1518 case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
1519 case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
1520 case HWTSTAMP_FILTER_PTP_V2_EVENT:
1521 case HWTSTAMP_FILTER_PTP_V2_SYNC:
1522 case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
1523 rx_filter_valid = 1;
1524 break;
1525 }
1526
1527 if (!tx_type_valid || !rx_filter_valid)
1528 return -ERANGE;
1529
1530 return 0;
1531}
1532
1533static inline bool is_skb_forwardable(struct net_device *dev,
1534 struct sk_buff *skb)
1535{
1536 unsigned int len;
1537
1538 if (!(dev->flags & IFF_UP))
1539 return false;
1540
1541 len = dev->mtu + dev->hard_header_len + VLAN_HLEN;
1542 if (skb->len <= len)
1543 return true;
1544
1545
1546
1547
1548 if (skb_is_gso(skb))
1549 return true;
1550
1551 return false;
1552}
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1573{
1574 if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
1575 if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
1576 atomic_long_inc(&dev->rx_dropped);
1577 kfree_skb(skb);
1578 return NET_RX_DROP;
1579 }
1580 }
1581
1582 skb_orphan(skb);
1583 nf_reset(skb);
1584
1585 if (unlikely(!is_skb_forwardable(dev, skb))) {
1586 atomic_long_inc(&dev->rx_dropped);
1587 kfree_skb(skb);
1588 return NET_RX_DROP;
1589 }
1590 skb_set_dev(skb, dev);
1591 skb->tstamp.tv64 = 0;
1592 skb->pkt_type = PACKET_HOST;
1593 skb->protocol = eth_type_trans(skb, dev);
1594 return netif_rx(skb);
1595}
1596EXPORT_SYMBOL_GPL(dev_forward_skb);
1597
1598static inline int deliver_skb(struct sk_buff *skb,
1599 struct packet_type *pt_prev,
1600 struct net_device *orig_dev)
1601{
1602 atomic_inc(&skb->users);
1603 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1604}
1605
1606
1607
1608
1609
1610
1611static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1612{
1613 struct packet_type *ptype;
1614 struct sk_buff *skb2 = NULL;
1615 struct packet_type *pt_prev = NULL;
1616
1617 rcu_read_lock();
1618 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1619
1620
1621
1622 if ((ptype->dev == dev || !ptype->dev) &&
1623 (ptype->af_packet_priv == NULL ||
1624 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1625 if (pt_prev) {
1626 deliver_skb(skb2, pt_prev, skb->dev);
1627 pt_prev = ptype;
1628 continue;
1629 }
1630
1631 skb2 = skb_clone(skb, GFP_ATOMIC);
1632 if (!skb2)
1633 break;
1634
1635 net_timestamp_set(skb2);
1636
1637
1638
1639
1640
1641 skb_reset_mac_header(skb2);
1642
1643 if (skb_network_header(skb2) < skb2->data ||
1644 skb2->network_header > skb2->tail) {
1645 if (net_ratelimit())
1646 printk(KERN_CRIT "protocol %04x is "
1647 "buggy, dev %s\n",
1648 ntohs(skb2->protocol),
1649 dev->name);
1650 skb_reset_network_header(skb2);
1651 }
1652
1653 skb2->transport_header = skb2->network_header;
1654 skb2->pkt_type = PACKET_OUTGOING;
1655 pt_prev = ptype;
1656 }
1657 }
1658 if (pt_prev)
1659 pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
1660 rcu_read_unlock();
1661}
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675static void netif_setup_tc(struct net_device *dev, unsigned int txq)
1676{
1677 int i;
1678 struct netdev_tc_txq *tc = &dev->tc_to_txq[0];
1679
1680
1681 if (tc->offset + tc->count > txq) {
1682 pr_warning("Number of in use tx queues changed "
1683 "invalidating tc mappings. Priority "
1684 "traffic classification disabled!\n");
1685 dev->num_tc = 0;
1686 return;
1687 }
1688
1689
1690 for (i = 1; i < TC_BITMASK + 1; i++) {
1691 int q = netdev_get_prio_tc_map(dev, i);
1692
1693 tc = &dev->tc_to_txq[q];
1694 if (tc->offset + tc->count > txq) {
1695 pr_warning("Number of in use tx queues "
1696 "changed. Priority %i to tc "
1697 "mapping %i is no longer valid "
1698 "setting map to 0\n",
1699 i, q);
1700 netdev_set_prio_tc_map(dev, i, 0);
1701 }
1702 }
1703}
1704
1705
1706
1707
1708
1709int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1710{
1711 int rc;
1712
1713 if (txq < 1 || txq > dev->num_tx_queues)
1714 return -EINVAL;
1715
1716 if (dev->reg_state == NETREG_REGISTERED ||
1717 dev->reg_state == NETREG_UNREGISTERING) {
1718 ASSERT_RTNL();
1719
1720 rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
1721 txq);
1722 if (rc)
1723 return rc;
1724
1725 if (dev->num_tc)
1726 netif_setup_tc(dev, txq);
1727
1728 if (txq < dev->real_num_tx_queues)
1729 qdisc_reset_all_tx_gt(dev, txq);
1730 }
1731
1732 dev->real_num_tx_queues = txq;
1733 return 0;
1734}
1735EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1736
1737#ifdef CONFIG_RPS
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1749{
1750 int rc;
1751
1752 if (rxq < 1 || rxq > dev->num_rx_queues)
1753 return -EINVAL;
1754
1755 if (dev->reg_state == NETREG_REGISTERED) {
1756 ASSERT_RTNL();
1757
1758 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1759 rxq);
1760 if (rc)
1761 return rc;
1762 }
1763
1764 dev->real_num_rx_queues = rxq;
1765 return 0;
1766}
1767EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1768#endif
1769
1770static inline void __netif_reschedule(struct Qdisc *q)
1771{
1772 struct softnet_data *sd;
1773 unsigned long flags;
1774
1775 local_irq_save(flags);
1776 sd = &__get_cpu_var(softnet_data);
1777 q->next_sched = NULL;
1778 *sd->output_queue_tailp = q;
1779 sd->output_queue_tailp = &q->next_sched;
1780 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1781 local_irq_restore(flags);
1782}
1783
1784void __netif_schedule(struct Qdisc *q)
1785{
1786 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1787 __netif_reschedule(q);
1788}
1789EXPORT_SYMBOL(__netif_schedule);
1790
1791void dev_kfree_skb_irq(struct sk_buff *skb)
1792{
1793 if (atomic_dec_and_test(&skb->users)) {
1794 struct softnet_data *sd;
1795 unsigned long flags;
1796
1797 local_irq_save(flags);
1798 sd = &__get_cpu_var(softnet_data);
1799 skb->next = sd->completion_queue;
1800 sd->completion_queue = skb;
1801 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1802 local_irq_restore(flags);
1803 }
1804}
1805EXPORT_SYMBOL(dev_kfree_skb_irq);
1806
1807void dev_kfree_skb_any(struct sk_buff *skb)
1808{
1809 if (in_irq() || irqs_disabled())
1810 dev_kfree_skb_irq(skb);
1811 else
1812 dev_kfree_skb(skb);
1813}
1814EXPORT_SYMBOL(dev_kfree_skb_any);
1815
1816
1817
1818
1819
1820
1821
1822
1823void netif_device_detach(struct net_device *dev)
1824{
1825 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1826 netif_running(dev)) {
1827 netif_tx_stop_all_queues(dev);
1828 }
1829}
1830EXPORT_SYMBOL(netif_device_detach);
1831
1832
1833
1834
1835
1836
1837
1838void netif_device_attach(struct net_device *dev)
1839{
1840 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1841 netif_running(dev)) {
1842 netif_tx_wake_all_queues(dev);
1843 __netdev_watchdog_up(dev);
1844 }
1845}
1846EXPORT_SYMBOL(netif_device_attach);
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857#ifdef CONFIG_NET_NS
1858void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
1859{
1860 skb_dst_drop(skb);
1861 if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
1862 secpath_reset(skb);
1863 nf_reset(skb);
1864 skb_init_secmark(skb);
1865 skb->mark = 0;
1866 skb->priority = 0;
1867 skb->nf_trace = 0;
1868 skb->ipvs_property = 0;
1869#ifdef CONFIG_NET_SCHED
1870 skb->tc_index = 0;
1871#endif
1872 }
1873 skb->dev = dev;
1874}
1875EXPORT_SYMBOL(skb_set_dev);
1876#endif
1877
1878
1879
1880
1881
1882int skb_checksum_help(struct sk_buff *skb)
1883{
1884 __wsum csum;
1885 int ret = 0, offset;
1886
1887 if (skb->ip_summed == CHECKSUM_COMPLETE)
1888 goto out_set_summed;
1889
1890 if (unlikely(skb_shinfo(skb)->gso_size)) {
1891
1892 goto out_set_summed;
1893 }
1894
1895 offset = skb_checksum_start_offset(skb);
1896 BUG_ON(offset >= skb_headlen(skb));
1897 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1898
1899 offset += skb->csum_offset;
1900 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1901
1902 if (skb_cloned(skb) &&
1903 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1904 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1905 if (ret)
1906 goto out;
1907 }
1908
1909 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1910out_set_summed:
1911 skb->ip_summed = CHECKSUM_NONE;
1912out:
1913 return ret;
1914}
1915EXPORT_SYMBOL(skb_checksum_help);
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
1928{
1929 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1930 struct packet_type *ptype;
1931 __be16 type = skb->protocol;
1932 int vlan_depth = ETH_HLEN;
1933 int err;
1934
1935 while (type == htons(ETH_P_8021Q)) {
1936 struct vlan_hdr *vh;
1937
1938 if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
1939 return ERR_PTR(-EINVAL);
1940
1941 vh = (struct vlan_hdr *)(skb->data + vlan_depth);
1942 type = vh->h_vlan_encapsulated_proto;
1943 vlan_depth += VLAN_HLEN;
1944 }
1945
1946 skb_reset_mac_header(skb);
1947 skb->mac_len = skb->network_header - skb->mac_header;
1948 __skb_pull(skb, skb->mac_len);
1949
1950 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1951 struct net_device *dev = skb->dev;
1952 struct ethtool_drvinfo info = {};
1953
1954 if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
1955 dev->ethtool_ops->get_drvinfo(dev, &info);
1956
1957 WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
1958 info.driver, dev ? dev->features : 0L,
1959 skb->sk ? skb->sk->sk_route_caps : 0L,
1960 skb->len, skb->data_len, skb->ip_summed);
1961
1962 if (skb_header_cloned(skb) &&
1963 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1964 return ERR_PTR(err);
1965 }
1966
1967 rcu_read_lock();
1968 list_for_each_entry_rcu(ptype,
1969 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
1970 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1971 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1972 err = ptype->gso_send_check(skb);
1973 segs = ERR_PTR(err);
1974 if (err || skb_gso_ok(skb, features))
1975 break;
1976 __skb_push(skb, (skb->data -
1977 skb_network_header(skb)));
1978 }
1979 segs = ptype->gso_segment(skb, features);
1980 break;
1981 }
1982 }
1983 rcu_read_unlock();
1984
1985 __skb_push(skb, skb->data - skb_mac_header(skb));
1986
1987 return segs;
1988}
1989EXPORT_SYMBOL(skb_gso_segment);
1990
1991
1992#ifdef CONFIG_BUG
1993void netdev_rx_csum_fault(struct net_device *dev)
1994{
1995 if (net_ratelimit()) {
1996 printk(KERN_ERR "%s: hw csum failure.\n",
1997 dev ? dev->name : "<unknown>");
1998 dump_stack();
1999 }
2000}
2001EXPORT_SYMBOL(netdev_rx_csum_fault);
2002#endif
2003
2004
2005
2006
2007
2008
2009static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2010{
2011#ifdef CONFIG_HIGHMEM
2012 int i;
2013 if (!(dev->features & NETIF_F_HIGHDMA)) {
2014 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2015 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2016 if (PageHighMem(skb_frag_page(frag)))
2017 return 1;
2018 }
2019 }
2020
2021 if (PCI_DMA_BUS_IS_PHYS) {
2022 struct device *pdev = dev->dev.parent;
2023
2024 if (!pdev)
2025 return 0;
2026 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2027 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2028 dma_addr_t addr = page_to_phys(skb_frag_page(frag));
2029 if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
2030 return 1;
2031 }
2032 }
2033#endif
2034 return 0;
2035}
2036
2037struct dev_gso_cb {
2038 void (*destructor)(struct sk_buff *skb);
2039};
2040
2041#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2042
2043static void dev_gso_skb_destructor(struct sk_buff *skb)
2044{
2045 struct dev_gso_cb *cb;
2046
2047 do {
2048 struct sk_buff *nskb = skb->next;
2049
2050 skb->next = nskb->next;
2051 nskb->next = NULL;
2052 kfree_skb(nskb);
2053 } while (skb->next);
2054
2055 cb = DEV_GSO_CB(skb);
2056 if (cb->destructor)
2057 cb->destructor(skb);
2058}
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068static int dev_gso_segment(struct sk_buff *skb, int features)
2069{
2070 struct sk_buff *segs;
2071
2072 segs = skb_gso_segment(skb, features);
2073
2074
2075 if (!segs)
2076 return 0;
2077
2078 if (IS_ERR(segs))
2079 return PTR_ERR(segs);
2080
2081 skb->next = segs;
2082 DEV_GSO_CB(skb)->destructor = skb->destructor;
2083 skb->destructor = dev_gso_skb_destructor;
2084
2085 return 0;
2086}
2087
2088
2089
2090
2091
2092
2093static inline void skb_orphan_try(struct sk_buff *skb)
2094{
2095 struct sock *sk = skb->sk;
2096
2097 if (sk && !skb_shinfo(skb)->tx_flags) {
2098
2099
2100
2101 if (!skb->rxhash)
2102 skb->rxhash = sk->sk_hash;
2103 skb_orphan(skb);
2104 }
2105}
2106
2107static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2108{
2109 return ((features & NETIF_F_GEN_CSUM) ||
2110 ((features & NETIF_F_V4_CSUM) &&
2111 protocol == htons(ETH_P_IP)) ||
2112 ((features & NETIF_F_V6_CSUM) &&
2113 protocol == htons(ETH_P_IPV6)) ||
2114 ((features & NETIF_F_FCOE_CRC) &&
2115 protocol == htons(ETH_P_FCOE)));
2116}
2117
2118static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
2119{
2120 if (!can_checksum_protocol(features, protocol)) {
2121 features &= ~NETIF_F_ALL_CSUM;
2122 features &= ~NETIF_F_SG;
2123 } else if (illegal_highdma(skb->dev, skb)) {
2124 features &= ~NETIF_F_SG;
2125 }
2126
2127 return features;
2128}
2129
2130u32 netif_skb_features(struct sk_buff *skb)
2131{
2132 __be16 protocol = skb->protocol;
2133 u32 features = skb->dev->features;
2134
2135 if (protocol == htons(ETH_P_8021Q)) {
2136 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2137 protocol = veh->h_vlan_encapsulated_proto;
2138 } else if (!vlan_tx_tag_present(skb)) {
2139 return harmonize_features(skb, protocol, features);
2140 }
2141
2142 features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
2143
2144 if (protocol != htons(ETH_P_8021Q)) {
2145 return harmonize_features(skb, protocol, features);
2146 } else {
2147 features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
2148 NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
2149 return harmonize_features(skb, protocol, features);
2150 }
2151}
2152EXPORT_SYMBOL(netif_skb_features);
2153
2154
2155
2156
2157
2158
2159
2160
2161static inline int skb_needs_linearize(struct sk_buff *skb,
2162 int features)
2163{
2164 return skb_is_nonlinear(skb) &&
2165 ((skb_has_frag_list(skb) &&
2166 !(features & NETIF_F_FRAGLIST)) ||
2167 (skb_shinfo(skb)->nr_frags &&
2168 !(features & NETIF_F_SG)));
2169}
2170
2171int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2172 struct netdev_queue *txq)
2173{
2174 const struct net_device_ops *ops = dev->netdev_ops;
2175 int rc = NETDEV_TX_OK;
2176 unsigned int skb_len;
2177
2178 if (likely(!skb->next)) {
2179 u32 features;
2180
2181
2182
2183
2184
2185 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2186 skb_dst_drop(skb);
2187
2188 if (!list_empty(&ptype_all))
2189 dev_queue_xmit_nit(skb, dev);
2190
2191 skb_orphan_try(skb);
2192
2193 features = netif_skb_features(skb);
2194
2195 if (vlan_tx_tag_present(skb) &&
2196 !(features & NETIF_F_HW_VLAN_TX)) {
2197 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2198 if (unlikely(!skb))
2199 goto out;
2200
2201 skb->vlan_tci = 0;
2202 }
2203
2204 if (netif_needs_gso(skb, features)) {
2205 if (unlikely(dev_gso_segment(skb, features)))
2206 goto out_kfree_skb;
2207 if (skb->next)
2208 goto gso;
2209 } else {
2210 if (skb_needs_linearize(skb, features) &&
2211 __skb_linearize(skb))
2212 goto out_kfree_skb;
2213
2214
2215
2216
2217
2218 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2219 skb_set_transport_header(skb,
2220 skb_checksum_start_offset(skb));
2221 if (!(features & NETIF_F_ALL_CSUM) &&
2222 skb_checksum_help(skb))
2223 goto out_kfree_skb;
2224 }
2225 }
2226
2227 skb_len = skb->len;
2228 rc = ops->ndo_start_xmit(skb, dev);
2229 trace_net_dev_xmit(skb, rc, dev, skb_len);
2230 if (rc == NETDEV_TX_OK)
2231 txq_trans_update(txq);
2232 return rc;
2233 }
2234
2235gso:
2236 do {
2237 struct sk_buff *nskb = skb->next;
2238
2239 skb->next = nskb->next;
2240 nskb->next = NULL;
2241
2242
2243
2244
2245
2246 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
2247 skb_dst_drop(nskb);
2248
2249 skb_len = nskb->len;
2250 rc = ops->ndo_start_xmit(nskb, dev);
2251 trace_net_dev_xmit(nskb, rc, dev, skb_len);
2252 if (unlikely(rc != NETDEV_TX_OK)) {
2253 if (rc & ~NETDEV_TX_MASK)
2254 goto out_kfree_gso_skb;
2255 nskb->next = skb->next;
2256 skb->next = nskb;
2257 return rc;
2258 }
2259 txq_trans_update(txq);
2260 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
2261 return NETDEV_TX_BUSY;
2262 } while (skb->next);
2263
2264out_kfree_gso_skb:
2265 if (likely(skb->next == NULL))
2266 skb->destructor = DEV_GSO_CB(skb)->destructor;
2267out_kfree_skb:
2268 kfree_skb(skb);
2269out:
2270 return rc;
2271}
2272
2273static u32 hashrnd __read_mostly;
2274
2275
2276
2277
2278
2279u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
2280 unsigned int num_tx_queues)
2281{
2282 u32 hash;
2283 u16 qoffset = 0;
2284 u16 qcount = num_tx_queues;
2285
2286 if (skb_rx_queue_recorded(skb)) {
2287 hash = skb_get_rx_queue(skb);
2288 while (unlikely(hash >= num_tx_queues))
2289 hash -= num_tx_queues;
2290 return hash;
2291 }
2292
2293 if (dev->num_tc) {
2294 u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
2295 qoffset = dev->tc_to_txq[tc].offset;
2296 qcount = dev->tc_to_txq[tc].count;
2297 }
2298
2299 if (skb->sk && skb->sk->sk_hash)
2300 hash = skb->sk->sk_hash;
2301 else
2302 hash = (__force u16) skb->protocol ^ skb->rxhash;
2303 hash = jhash_1word(hash, hashrnd);
2304
2305 return (u16) (((u64) hash * qcount) >> 32) + qoffset;
2306}
2307EXPORT_SYMBOL(__skb_tx_hash);
2308
2309static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
2310{
2311 if (unlikely(queue_index >= dev->real_num_tx_queues)) {
2312 if (net_ratelimit()) {
2313 pr_warning("%s selects TX queue %d, but "
2314 "real number of TX queues is %d\n",
2315 dev->name, queue_index, dev->real_num_tx_queues);
2316 }
2317 return 0;
2318 }
2319 return queue_index;
2320}
2321
2322static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
2323{
2324#ifdef CONFIG_XPS
2325 struct xps_dev_maps *dev_maps;
2326 struct xps_map *map;
2327 int queue_index = -1;
2328
2329 rcu_read_lock();
2330 dev_maps = rcu_dereference(dev->xps_maps);
2331 if (dev_maps) {
2332 map = rcu_dereference(
2333 dev_maps->cpu_map[raw_smp_processor_id()]);
2334 if (map) {
2335 if (map->len == 1)
2336 queue_index = map->queues[0];
2337 else {
2338 u32 hash;
2339 if (skb->sk && skb->sk->sk_hash)
2340 hash = skb->sk->sk_hash;
2341 else
2342 hash = (__force u16) skb->protocol ^
2343 skb->rxhash;
2344 hash = jhash_1word(hash, hashrnd);
2345 queue_index = map->queues[
2346 ((u64)hash * map->len) >> 32];
2347 }
2348 if (unlikely(queue_index >= dev->real_num_tx_queues))
2349 queue_index = -1;
2350 }
2351 }
2352 rcu_read_unlock();
2353
2354 return queue_index;
2355#else
2356 return -1;
2357#endif
2358}
2359
2360static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2361 struct sk_buff *skb)
2362{
2363 int queue_index;
2364 const struct net_device_ops *ops = dev->netdev_ops;
2365
2366 if (dev->real_num_tx_queues == 1)
2367 queue_index = 0;
2368 else if (ops->ndo_select_queue) {
2369 queue_index = ops->ndo_select_queue(dev, skb);
2370 queue_index = dev_cap_txqueue(dev, queue_index);
2371 } else {
2372 struct sock *sk = skb->sk;
2373 queue_index = sk_tx_queue_get(sk);
2374
2375 if (queue_index < 0 || skb->ooo_okay ||
2376 queue_index >= dev->real_num_tx_queues) {
2377 int old_index = queue_index;
2378
2379 queue_index = get_xps_queue(dev, skb);
2380 if (queue_index < 0)
2381 queue_index = skb_tx_hash(dev, skb);
2382
2383 if (queue_index != old_index && sk) {
2384 struct dst_entry *dst =
2385 rcu_dereference_check(sk->sk_dst_cache, 1);
2386
2387 if (dst && skb_dst(skb) == dst)
2388 sk_tx_queue_set(sk, queue_index);
2389 }
2390 }
2391 }
2392
2393 skb_set_queue_mapping(skb, queue_index);
2394 return netdev_get_tx_queue(dev, queue_index);
2395}
2396
2397static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2398 struct net_device *dev,
2399 struct netdev_queue *txq)
2400{
2401 spinlock_t *root_lock = qdisc_lock(q);
2402 bool contended;
2403 int rc;
2404
2405 qdisc_skb_cb(skb)->pkt_len = skb->len;
2406 qdisc_calculate_pkt_len(skb, q);
2407
2408
2409
2410
2411
2412
2413 contended = qdisc_is_running(q);
2414 if (unlikely(contended))
2415 spin_lock(&q->busylock);
2416
2417 spin_lock(root_lock);
2418 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2419 kfree_skb(skb);
2420 rc = NET_XMIT_DROP;
2421 } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
2422 qdisc_run_begin(q)) {
2423
2424
2425
2426
2427
2428 if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
2429 skb_dst_force(skb);
2430
2431 qdisc_bstats_update(q, skb);
2432
2433 if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
2434 if (unlikely(contended)) {
2435 spin_unlock(&q->busylock);
2436 contended = false;
2437 }
2438 __qdisc_run(q);
2439 } else
2440 qdisc_run_end(q);
2441
2442 rc = NET_XMIT_SUCCESS;
2443 } else {
2444 skb_dst_force(skb);
2445 rc = q->enqueue(skb, q) & NET_XMIT_MASK;
2446 if (qdisc_run_begin(q)) {
2447 if (unlikely(contended)) {
2448 spin_unlock(&q->busylock);
2449 contended = false;
2450 }
2451 __qdisc_run(q);
2452 }
2453 }
2454 spin_unlock(root_lock);
2455 if (unlikely(contended))
2456 spin_unlock(&q->busylock);
2457 return rc;
2458}
2459
2460static DEFINE_PER_CPU(int, xmit_recursion);
2461#define RECURSION_LIMIT 10
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488int dev_queue_xmit(struct sk_buff *skb)
2489{
2490 struct net_device *dev = skb->dev;
2491 struct netdev_queue *txq;
2492 struct Qdisc *q;
2493 int rc = -ENOMEM;
2494
2495
2496
2497
2498 rcu_read_lock_bh();
2499
2500 txq = dev_pick_tx(dev, skb);
2501 q = rcu_dereference_bh(txq->qdisc);
2502
2503#ifdef CONFIG_NET_CLS_ACT
2504 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
2505#endif
2506 trace_net_dev_queue(skb);
2507 if (q->enqueue) {
2508 rc = __dev_xmit_skb(skb, q, dev, txq);
2509 goto out;
2510 }
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524 if (dev->flags & IFF_UP) {
2525 int cpu = smp_processor_id();
2526
2527 if (txq->xmit_lock_owner != cpu) {
2528
2529 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2530 goto recursion_alert;
2531
2532 HARD_TX_LOCK(dev, txq, cpu);
2533
2534 if (!netif_tx_queue_stopped(txq)) {
2535 __this_cpu_inc(xmit_recursion);
2536 rc = dev_hard_start_xmit(skb, dev, txq);
2537 __this_cpu_dec(xmit_recursion);
2538 if (dev_xmit_complete(rc)) {
2539 HARD_TX_UNLOCK(dev, txq);
2540 goto out;
2541 }
2542 }
2543 HARD_TX_UNLOCK(dev, txq);
2544 if (net_ratelimit())
2545 printk(KERN_CRIT "Virtual device %s asks to "
2546 "queue packet!\n", dev->name);
2547 } else {
2548
2549
2550
2551recursion_alert:
2552 if (net_ratelimit())
2553 printk(KERN_CRIT "Dead loop on virtual device "
2554 "%s, fix it urgently!\n", dev->name);
2555 }
2556 }
2557
2558 rc = -ENETDOWN;
2559 rcu_read_unlock_bh();
2560
2561 kfree_skb(skb);
2562 return rc;
2563out:
2564 rcu_read_unlock_bh();
2565 return rc;
2566}
2567EXPORT_SYMBOL(dev_queue_xmit);
2568
2569
2570
2571
2572
2573
2574int netdev_max_backlog __read_mostly = 1000;
2575int netdev_tstamp_prequeue __read_mostly = 1;
2576int netdev_budget __read_mostly = 300;
2577int weight_p __read_mostly = 64;
2578
2579
2580static inline void ____napi_schedule(struct softnet_data *sd,
2581 struct napi_struct *napi)
2582{
2583 list_add_tail(&napi->poll_list, &sd->poll_list);
2584 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2585}
2586
2587
2588
2589
2590
2591
2592
2593void __skb_get_rxhash(struct sk_buff *skb)
2594{
2595 int nhoff, hash = 0, poff;
2596 const struct ipv6hdr *ip6;
2597 const struct iphdr *ip;
2598 const struct vlan_hdr *vlan;
2599 u8 ip_proto;
2600 u32 addr1, addr2;
2601 u16 proto;
2602 union {
2603 u32 v32;
2604 u16 v16[2];
2605 } ports;
2606
2607 nhoff = skb_network_offset(skb);
2608 proto = skb->protocol;
2609
2610again:
2611 switch (proto) {
2612 case __constant_htons(ETH_P_IP):
2613ip:
2614 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2615 goto done;
2616
2617 ip = (const struct iphdr *) (skb->data + nhoff);
2618 if (ip_is_fragment(ip))
2619 ip_proto = 0;
2620 else
2621 ip_proto = ip->protocol;
2622 addr1 = (__force u32) ip->saddr;
2623 addr2 = (__force u32) ip->daddr;
2624 nhoff += ip->ihl * 4;
2625 break;
2626 case __constant_htons(ETH_P_IPV6):
2627ipv6:
2628 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2629 goto done;
2630
2631 ip6 = (const struct ipv6hdr *) (skb->data + nhoff);
2632 ip_proto = ip6->nexthdr;
2633 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2634 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
2635 nhoff += 40;
2636 break;
2637 case __constant_htons(ETH_P_8021Q):
2638 if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff))
2639 goto done;
2640 vlan = (const struct vlan_hdr *) (skb->data + nhoff);
2641 proto = vlan->h_vlan_encapsulated_proto;
2642 nhoff += sizeof(*vlan);
2643 goto again;
2644 case __constant_htons(ETH_P_PPP_SES):
2645 if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff))
2646 goto done;
2647 proto = *((__be16 *) (skb->data + nhoff +
2648 sizeof(struct pppoe_hdr)));
2649 nhoff += PPPOE_SES_HLEN;
2650 switch (proto) {
2651 case __constant_htons(PPP_IP):
2652 goto ip;
2653 case __constant_htons(PPP_IPV6):
2654 goto ipv6;
2655 default:
2656 goto done;
2657 }
2658 default:
2659 goto done;
2660 }
2661
2662 switch (ip_proto) {
2663 case IPPROTO_GRE:
2664 if (pskb_may_pull(skb, nhoff + 16)) {
2665 u8 *h = skb->data + nhoff;
2666 __be16 flags = *(__be16 *)h;
2667
2668
2669
2670
2671
2672 if (!(flags & (GRE_VERSION|GRE_ROUTING))) {
2673 proto = *(__be16 *)(h + 2);
2674 nhoff += 4;
2675 if (flags & GRE_CSUM)
2676 nhoff += 4;
2677 if (flags & GRE_KEY)
2678 nhoff += 4;
2679 if (flags & GRE_SEQ)
2680 nhoff += 4;
2681 goto again;
2682 }
2683 }
2684 break;
2685 case IPPROTO_IPIP:
2686 goto again;
2687 default:
2688 break;
2689 }
2690
2691 ports.v32 = 0;
2692 poff = proto_ports_offset(ip_proto);
2693 if (poff >= 0) {
2694 nhoff += poff;
2695 if (pskb_may_pull(skb, nhoff + 4)) {
2696 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2697 if (ports.v16[1] < ports.v16[0])
2698 swap(ports.v16[0], ports.v16[1]);
2699 skb->l4_rxhash = 1;
2700 }
2701 }
2702
2703
2704 if (addr2 < addr1)
2705 swap(addr1, addr2);
2706
2707 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2708 if (!hash)
2709 hash = 1;
2710
2711done:
2712 skb->rxhash = hash;
2713}
2714EXPORT_SYMBOL(__skb_get_rxhash);
2715
2716#ifdef CONFIG_RPS
2717
2718
2719struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
2720EXPORT_SYMBOL(rps_sock_flow_table);
2721
2722static struct rps_dev_flow *
2723set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2724 struct rps_dev_flow *rflow, u16 next_cpu)
2725{
2726 if (next_cpu != RPS_NO_CPU) {
2727#ifdef CONFIG_RFS_ACCEL
2728 struct netdev_rx_queue *rxqueue;
2729 struct rps_dev_flow_table *flow_table;
2730 struct rps_dev_flow *old_rflow;
2731 u32 flow_id;
2732 u16 rxq_index;
2733 int rc;
2734
2735
2736 if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
2737 !(dev->features & NETIF_F_NTUPLE))
2738 goto out;
2739 rxq_index = cpu_rmap_lookup_index(dev->rx_cpu_rmap, next_cpu);
2740 if (rxq_index == skb_get_rx_queue(skb))
2741 goto out;
2742
2743 rxqueue = dev->_rx + rxq_index;
2744 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2745 if (!flow_table)
2746 goto out;
2747 flow_id = skb->rxhash & flow_table->mask;
2748 rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
2749 rxq_index, flow_id);
2750 if (rc < 0)
2751 goto out;
2752 old_rflow = rflow;
2753 rflow = &flow_table->flows[flow_id];
2754 rflow->filter = rc;
2755 if (old_rflow->filter == rflow->filter)
2756 old_rflow->filter = RPS_NO_FILTER;
2757 out:
2758#endif
2759 rflow->last_qtail =
2760 per_cpu(softnet_data, next_cpu).input_queue_head;
2761 }
2762
2763 rflow->cpu = next_cpu;
2764 return rflow;
2765}
2766
2767
2768
2769
2770
2771
2772static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2773 struct rps_dev_flow **rflowp)
2774{
2775 struct netdev_rx_queue *rxqueue;
2776 struct rps_map *map;
2777 struct rps_dev_flow_table *flow_table;
2778 struct rps_sock_flow_table *sock_flow_table;
2779 int cpu = -1;
2780 u16 tcpu;
2781
2782 if (skb_rx_queue_recorded(skb)) {
2783 u16 index = skb_get_rx_queue(skb);
2784 if (unlikely(index >= dev->real_num_rx_queues)) {
2785 WARN_ONCE(dev->real_num_rx_queues > 1,
2786 "%s received packet on queue %u, but number "
2787 "of RX queues is %u\n",
2788 dev->name, index, dev->real_num_rx_queues);
2789 goto done;
2790 }
2791 rxqueue = dev->_rx + index;
2792 } else
2793 rxqueue = dev->_rx;
2794
2795 map = rcu_dereference(rxqueue->rps_map);
2796 if (map) {
2797 if (map->len == 1 &&
2798 !rcu_access_pointer(rxqueue->rps_flow_table)) {
2799 tcpu = map->cpus[0];
2800 if (cpu_online(tcpu))
2801 cpu = tcpu;
2802 goto done;
2803 }
2804 } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
2805 goto done;
2806 }
2807
2808 skb_reset_network_header(skb);
2809 if (!skb_get_rxhash(skb))
2810 goto done;
2811
2812 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2813 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2814 if (flow_table && sock_flow_table) {
2815 u16 next_cpu;
2816 struct rps_dev_flow *rflow;
2817
2818 rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
2819 tcpu = rflow->cpu;
2820
2821 next_cpu = sock_flow_table->ents[skb->rxhash &
2822 sock_flow_table->mask];
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835 if (unlikely(tcpu != next_cpu) &&
2836 (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
2837 ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
2838 rflow->last_qtail)) >= 0))
2839 rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
2840
2841 if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
2842 *rflowp = rflow;
2843 cpu = tcpu;
2844 goto done;
2845 }
2846 }
2847
2848 if (map) {
2849 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2850
2851 if (cpu_online(tcpu)) {
2852 cpu = tcpu;
2853 goto done;
2854 }
2855 }
2856
2857done:
2858 return cpu;
2859}
2860
2861#ifdef CONFIG_RFS_ACCEL
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
2875 u32 flow_id, u16 filter_id)
2876{
2877 struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
2878 struct rps_dev_flow_table *flow_table;
2879 struct rps_dev_flow *rflow;
2880 bool expire = true;
2881 int cpu;
2882
2883 rcu_read_lock();
2884 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2885 if (flow_table && flow_id <= flow_table->mask) {
2886 rflow = &flow_table->flows[flow_id];
2887 cpu = ACCESS_ONCE(rflow->cpu);
2888 if (rflow->filter == filter_id && cpu != RPS_NO_CPU &&
2889 ((int)(per_cpu(softnet_data, cpu).input_queue_head -
2890 rflow->last_qtail) <
2891 (int)(10 * flow_table->mask)))
2892 expire = false;
2893 }
2894 rcu_read_unlock();
2895 return expire;
2896}
2897EXPORT_SYMBOL(rps_may_expire_flow);
2898
2899#endif
2900
2901
2902static void rps_trigger_softirq(void *data)
2903{
2904 struct softnet_data *sd = data;
2905
2906 ____napi_schedule(sd, &sd->backlog);
2907 sd->received_rps++;
2908}
2909
2910#endif
2911
2912
2913
2914
2915
2916
2917static int rps_ipi_queued(struct softnet_data *sd)
2918{
2919#ifdef CONFIG_RPS
2920 struct softnet_data *mysd = &__get_cpu_var(softnet_data);
2921
2922 if (sd != mysd) {
2923 sd->rps_ipi_next = mysd->rps_ipi_list;
2924 mysd->rps_ipi_list = sd;
2925
2926 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2927 return 1;
2928 }
2929#endif
2930 return 0;
2931}
2932
2933
2934
2935
2936
2937static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
2938 unsigned int *qtail)
2939{
2940 struct softnet_data *sd;
2941 unsigned long flags;
2942
2943 sd = &per_cpu(softnet_data, cpu);
2944
2945 local_irq_save(flags);
2946
2947 rps_lock(sd);
2948 if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
2949 if (skb_queue_len(&sd->input_pkt_queue)) {
2950enqueue:
2951 __skb_queue_tail(&sd->input_pkt_queue, skb);
2952 input_queue_tail_incr_save(sd, qtail);
2953 rps_unlock(sd);
2954 local_irq_restore(flags);
2955 return NET_RX_SUCCESS;
2956 }
2957
2958
2959
2960
2961 if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
2962 if (!rps_ipi_queued(sd))
2963 ____napi_schedule(sd, &sd->backlog);
2964 }
2965 goto enqueue;
2966 }
2967
2968 sd->dropped++;
2969 rps_unlock(sd);
2970
2971 local_irq_restore(flags);
2972
2973 atomic_long_inc(&skb->dev->rx_dropped);
2974 kfree_skb(skb);
2975 return NET_RX_DROP;
2976}
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993int netif_rx(struct sk_buff *skb)
2994{
2995 int ret;
2996
2997
2998 if (netpoll_rx(skb))
2999 return NET_RX_DROP;
3000
3001 if (netdev_tstamp_prequeue)
3002 net_timestamp_check(skb);
3003
3004 trace_netif_rx(skb);
3005#ifdef CONFIG_RPS
3006 {
3007 struct rps_dev_flow voidflow, *rflow = &voidflow;
3008 int cpu;
3009
3010 preempt_disable();
3011 rcu_read_lock();
3012
3013 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3014 if (cpu < 0)
3015 cpu = smp_processor_id();
3016
3017 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3018
3019 rcu_read_unlock();
3020 preempt_enable();
3021 }
3022#else
3023 {
3024 unsigned int qtail;
3025 ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
3026 put_cpu();
3027 }
3028#endif
3029 return ret;
3030}
3031EXPORT_SYMBOL(netif_rx);
3032
3033int netif_rx_ni(struct sk_buff *skb)
3034{
3035 int err;
3036
3037 preempt_disable();
3038 err = netif_rx(skb);
3039 if (local_softirq_pending())
3040 do_softirq();
3041 preempt_enable();
3042
3043 return err;
3044}
3045EXPORT_SYMBOL(netif_rx_ni);
3046
3047static void net_tx_action(struct softirq_action *h)
3048{
3049 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3050
3051 if (sd->completion_queue) {
3052 struct sk_buff *clist;
3053
3054 local_irq_disable();
3055 clist = sd->completion_queue;
3056 sd->completion_queue = NULL;
3057 local_irq_enable();
3058
3059 while (clist) {
3060 struct sk_buff *skb = clist;
3061 clist = clist->next;
3062
3063 WARN_ON(atomic_read(&skb->users));
3064 trace_kfree_skb(skb, net_tx_action);
3065 __kfree_skb(skb);
3066 }
3067 }
3068
3069 if (sd->output_queue) {
3070 struct Qdisc *head;
3071
3072 local_irq_disable();
3073 head = sd->output_queue;
3074 sd->output_queue = NULL;
3075 sd->output_queue_tailp = &sd->output_queue;
3076 local_irq_enable();
3077
3078 while (head) {
3079 struct Qdisc *q = head;
3080 spinlock_t *root_lock;
3081
3082 head = head->next_sched;
3083
3084 root_lock = qdisc_lock(q);
3085 if (spin_trylock(root_lock)) {
3086 smp_mb__before_clear_bit();
3087 clear_bit(__QDISC_STATE_SCHED,
3088 &q->state);
3089 qdisc_run(q);
3090 spin_unlock(root_lock);
3091 } else {
3092 if (!test_bit(__QDISC_STATE_DEACTIVATED,
3093 &q->state)) {
3094 __netif_reschedule(q);
3095 } else {
3096 smp_mb__before_clear_bit();
3097 clear_bit(__QDISC_STATE_SCHED,
3098 &q->state);
3099 }
3100 }
3101 }
3102 }
3103}
3104
3105#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
3106 (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
3107
3108int (*br_fdb_test_addr_hook)(struct net_device *dev,
3109 unsigned char *addr) __read_mostly;
3110EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
3111#endif
3112
3113#ifdef CONFIG_NET_CLS_ACT
3114
3115
3116
3117
3118
3119
3120
3121
3122static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
3123{
3124 struct net_device *dev = skb->dev;
3125 u32 ttl = G_TC_RTTL(skb->tc_verd);
3126 int result = TC_ACT_OK;
3127 struct Qdisc *q;
3128
3129 if (unlikely(MAX_RED_LOOP < ttl++)) {
3130 if (net_ratelimit())
3131 pr_warning( "Redir loop detected Dropping packet (%d->%d)\n",
3132 skb->skb_iif, dev->ifindex);
3133 return TC_ACT_SHOT;
3134 }
3135
3136 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
3137 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
3138
3139 q = rxq->qdisc;
3140 if (q != &noop_qdisc) {
3141 spin_lock(qdisc_lock(q));
3142 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
3143 result = qdisc_enqueue_root(skb, q);
3144 spin_unlock(qdisc_lock(q));
3145 }
3146
3147 return result;
3148}
3149
3150static inline struct sk_buff *handle_ing(struct sk_buff *skb,
3151 struct packet_type **pt_prev,
3152 int *ret, struct net_device *orig_dev)
3153{
3154 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
3155
3156 if (!rxq || rxq->qdisc == &noop_qdisc)
3157 goto out;
3158
3159 if (*pt_prev) {
3160 *ret = deliver_skb(skb, *pt_prev, orig_dev);
3161 *pt_prev = NULL;
3162 }
3163
3164 switch (ing_filter(skb, rxq)) {
3165 case TC_ACT_SHOT:
3166 case TC_ACT_STOLEN:
3167 kfree_skb(skb);
3168 return NULL;
3169 }
3170
3171out:
3172 skb->tc_verd = 0;
3173 return skb;
3174}
3175#endif
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191int netdev_rx_handler_register(struct net_device *dev,
3192 rx_handler_func_t *rx_handler,
3193 void *rx_handler_data)
3194{
3195 ASSERT_RTNL();
3196
3197 if (dev->rx_handler)
3198 return -EBUSY;
3199
3200 rcu_assign_pointer(dev->rx_handler_data, rx_handler_data);
3201 rcu_assign_pointer(dev->rx_handler, rx_handler);
3202
3203 return 0;
3204}
3205EXPORT_SYMBOL_GPL(netdev_rx_handler_register);
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215void netdev_rx_handler_unregister(struct net_device *dev)
3216{
3217
3218 ASSERT_RTNL();
3219 RCU_INIT_POINTER(dev->rx_handler, NULL);
3220 RCU_INIT_POINTER(dev->rx_handler_data, NULL);
3221}
3222EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
3223
3224static int __netif_receive_skb(struct sk_buff *skb)
3225{
3226 struct packet_type *ptype, *pt_prev;
3227 rx_handler_func_t *rx_handler;
3228 struct net_device *orig_dev;
3229 struct net_device *null_or_dev;
3230 bool deliver_exact = false;
3231 int ret = NET_RX_DROP;
3232 __be16 type;
3233
3234 if (!netdev_tstamp_prequeue)
3235 net_timestamp_check(skb);
3236
3237 trace_netif_receive_skb(skb);
3238
3239
3240 if (netpoll_receive_skb(skb))
3241 return NET_RX_DROP;
3242
3243 if (!skb->skb_iif)
3244 skb->skb_iif = skb->dev->ifindex;
3245 orig_dev = skb->dev;
3246
3247 skb_reset_network_header(skb);
3248 skb_reset_transport_header(skb);
3249 skb_reset_mac_len(skb);
3250
3251 pt_prev = NULL;
3252
3253 rcu_read_lock();
3254
3255another_round:
3256
3257 __this_cpu_inc(softnet_data.processed);
3258
3259 if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3260 skb = vlan_untag(skb);
3261 if (unlikely(!skb))
3262 goto out;
3263 }
3264
3265#ifdef CONFIG_NET_CLS_ACT
3266 if (skb->tc_verd & TC_NCLS) {
3267 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3268 goto ncls;
3269 }
3270#endif
3271
3272 list_for_each_entry_rcu(ptype, &ptype_all, list) {
3273 if (!ptype->dev || ptype->dev == skb->dev) {
3274 if (pt_prev)
3275 ret = deliver_skb(skb, pt_prev, orig_dev);
3276 pt_prev = ptype;
3277 }
3278 }
3279
3280#ifdef CONFIG_NET_CLS_ACT
3281 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3282 if (!skb)
3283 goto out;
3284ncls:
3285#endif
3286
3287 rx_handler = rcu_dereference(skb->dev->rx_handler);
3288 if (vlan_tx_tag_present(skb)) {
3289 if (pt_prev) {
3290 ret = deliver_skb(skb, pt_prev, orig_dev);
3291 pt_prev = NULL;
3292 }
3293 if (vlan_do_receive(&skb, !rx_handler))
3294 goto another_round;
3295 else if (unlikely(!skb))
3296 goto out;
3297 }
3298
3299 if (rx_handler) {
3300 if (pt_prev) {
3301 ret = deliver_skb(skb, pt_prev, orig_dev);
3302 pt_prev = NULL;
3303 }
3304 switch (rx_handler(&skb)) {
3305 case RX_HANDLER_CONSUMED:
3306 goto out;
3307 case RX_HANDLER_ANOTHER:
3308 goto another_round;
3309 case RX_HANDLER_EXACT:
3310 deliver_exact = true;
3311 case RX_HANDLER_PASS:
3312 break;
3313 default:
3314 BUG();
3315 }
3316 }
3317
3318
3319 null_or_dev = deliver_exact ? skb->dev : NULL;
3320
3321 type = skb->protocol;
3322 list_for_each_entry_rcu(ptype,
3323 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3324 if (ptype->type == type &&
3325 (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
3326 ptype->dev == orig_dev)) {
3327 if (pt_prev)
3328 ret = deliver_skb(skb, pt_prev, orig_dev);
3329 pt_prev = ptype;
3330 }
3331 }
3332
3333 if (pt_prev) {
3334 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3335 } else {
3336 atomic_long_inc(&skb->dev->rx_dropped);
3337 kfree_skb(skb);
3338
3339
3340
3341 ret = NET_RX_DROP;
3342 }
3343
3344out:
3345 rcu_read_unlock();
3346 return ret;
3347}
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364int netif_receive_skb(struct sk_buff *skb)
3365{
3366 if (netdev_tstamp_prequeue)
3367 net_timestamp_check(skb);
3368
3369 if (skb_defer_rx_timestamp(skb))
3370 return NET_RX_SUCCESS;
3371
3372#ifdef CONFIG_RPS
3373 {
3374 struct rps_dev_flow voidflow, *rflow = &voidflow;
3375 int cpu, ret;
3376
3377 rcu_read_lock();
3378
3379 cpu = get_rps_cpu(skb->dev, skb, &rflow);
3380
3381 if (cpu >= 0) {
3382 ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
3383 rcu_read_unlock();
3384 } else {
3385 rcu_read_unlock();
3386 ret = __netif_receive_skb(skb);
3387 }
3388
3389 return ret;
3390 }
3391#else
3392 return __netif_receive_skb(skb);
3393#endif
3394}
3395EXPORT_SYMBOL(netif_receive_skb);
3396
3397
3398
3399
3400static void flush_backlog(void *arg)
3401{
3402 struct net_device *dev = arg;
3403 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3404 struct sk_buff *skb, *tmp;
3405
3406 rps_lock(sd);
3407 skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
3408 if (skb->dev == dev) {
3409 __skb_unlink(skb, &sd->input_pkt_queue);
3410 kfree_skb(skb);
3411 input_queue_head_incr(sd);
3412 }
3413 }
3414 rps_unlock(sd);
3415
3416 skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
3417 if (skb->dev == dev) {
3418 __skb_unlink(skb, &sd->process_queue);
3419 kfree_skb(skb);
3420 input_queue_head_incr(sd);
3421 }
3422 }
3423}
3424
3425static int napi_gro_complete(struct sk_buff *skb)
3426{
3427 struct packet_type *ptype;
3428 __be16 type = skb->protocol;
3429 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3430 int err = -ENOENT;
3431
3432 if (NAPI_GRO_CB(skb)->count == 1) {
3433 skb_shinfo(skb)->gso_size = 0;
3434 goto out;
3435 }
3436
3437 rcu_read_lock();
3438 list_for_each_entry_rcu(ptype, head, list) {
3439 if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3440 continue;
3441
3442 err = ptype->gro_complete(skb);
3443 break;
3444 }
3445 rcu_read_unlock();
3446
3447 if (err) {
3448 WARN_ON(&ptype->list == head);
3449 kfree_skb(skb);
3450 return NET_RX_SUCCESS;
3451 }
3452
3453out:
3454 return netif_receive_skb(skb);
3455}
3456
3457inline void napi_gro_flush(struct napi_struct *napi)
3458{
3459 struct sk_buff *skb, *next;
3460
3461 for (skb = napi->gro_list; skb; skb = next) {
3462 next = skb->next;
3463 skb->next = NULL;
3464 napi_gro_complete(skb);
3465 }
3466
3467 napi->gro_count = 0;
3468 napi->gro_list = NULL;
3469}
3470EXPORT_SYMBOL(napi_gro_flush);
3471
3472enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3473{
3474 struct sk_buff **pp = NULL;
3475 struct packet_type *ptype;
3476 __be16 type = skb->protocol;
3477 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3478 int same_flow;
3479 int mac_len;
3480 enum gro_result ret;
3481
3482 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3483 goto normal;
3484
3485 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3486 goto normal;
3487
3488 rcu_read_lock();
3489 list_for_each_entry_rcu(ptype, head, list) {
3490 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3491 continue;
3492
3493 skb_set_network_header(skb, skb_gro_offset(skb));
3494 mac_len = skb->network_header - skb->mac_header;
3495 skb->mac_len = mac_len;
3496 NAPI_GRO_CB(skb)->same_flow = 0;
3497 NAPI_GRO_CB(skb)->flush = 0;
3498 NAPI_GRO_CB(skb)->free = 0;
3499
3500 pp = ptype->gro_receive(&napi->gro_list, skb);
3501 break;
3502 }
3503 rcu_read_unlock();
3504
3505 if (&ptype->list == head)
3506 goto normal;
3507
3508 same_flow = NAPI_GRO_CB(skb)->same_flow;
3509 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3510
3511 if (pp) {
3512 struct sk_buff *nskb = *pp;
3513
3514 *pp = nskb->next;
3515 nskb->next = NULL;
3516 napi_gro_complete(nskb);
3517 napi->gro_count--;
3518 }
3519
3520 if (same_flow)
3521 goto ok;
3522
3523 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3524 goto normal;
3525
3526 napi->gro_count++;
3527 NAPI_GRO_CB(skb)->count = 1;
3528 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3529 skb->next = napi->gro_list;
3530 napi->gro_list = skb;
3531 ret = GRO_HELD;
3532
3533pull:
3534 if (skb_headlen(skb) < skb_gro_offset(skb)) {
3535 int grow = skb_gro_offset(skb) - skb_headlen(skb);
3536
3537 BUG_ON(skb->end - skb->tail < grow);
3538
3539 memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
3540
3541 skb->tail += grow;
3542 skb->data_len -= grow;
3543
3544 skb_shinfo(skb)->frags[0].page_offset += grow;
3545 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow);
3546
3547 if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) {
3548 skb_frag_unref(skb, 0);
3549 memmove(skb_shinfo(skb)->frags,
3550 skb_shinfo(skb)->frags + 1,
3551 --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
3552 }
3553 }
3554
3555ok:
3556 return ret;
3557
3558normal:
3559 ret = GRO_NORMAL;
3560 goto pull;
3561}
3562EXPORT_SYMBOL(dev_gro_receive);
3563
3564static inline gro_result_t
3565__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3566{
3567 struct sk_buff *p;
3568
3569 for (p = napi->gro_list; p; p = p->next) {
3570 unsigned long diffs;
3571
3572 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3573 diffs |= p->vlan_tci ^ skb->vlan_tci;
3574 diffs |= compare_ether_header(skb_mac_header(p),
3575 skb_gro_mac_header(skb));
3576 NAPI_GRO_CB(p)->same_flow = !diffs;
3577 NAPI_GRO_CB(p)->flush = 0;
3578 }
3579
3580 return dev_gro_receive(napi, skb);
3581}
3582
3583gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
3584{
3585 switch (ret) {
3586 case GRO_NORMAL:
3587 if (netif_receive_skb(skb))
3588 ret = GRO_DROP;
3589 break;
3590
3591 case GRO_DROP:
3592 case GRO_MERGED_FREE:
3593 kfree_skb(skb);
3594 break;
3595
3596 case GRO_HELD:
3597 case GRO_MERGED:
3598 break;
3599 }
3600
3601 return ret;
3602}
3603EXPORT_SYMBOL(napi_skb_finish);
3604
3605void skb_gro_reset_offset(struct sk_buff *skb)
3606{
3607 NAPI_GRO_CB(skb)->data_offset = 0;
3608 NAPI_GRO_CB(skb)->frag0 = NULL;
3609 NAPI_GRO_CB(skb)->frag0_len = 0;
3610
3611 if (skb->mac_header == skb->tail &&
3612 !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
3613 NAPI_GRO_CB(skb)->frag0 =
3614 skb_frag_address(&skb_shinfo(skb)->frags[0]);
3615 NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]);
3616 }
3617}
3618EXPORT_SYMBOL(skb_gro_reset_offset);
3619
3620gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3621{
3622 skb_gro_reset_offset(skb);
3623
3624 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3625}
3626EXPORT_SYMBOL(napi_gro_receive);
3627
3628static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3629{
3630 __skb_pull(skb, skb_headlen(skb));
3631 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3632 skb->vlan_tci = 0;
3633 skb->dev = napi->dev;
3634 skb->skb_iif = 0;
3635
3636 napi->skb = skb;
3637}
3638
3639struct sk_buff *napi_get_frags(struct napi_struct *napi)
3640{
3641 struct sk_buff *skb = napi->skb;
3642
3643 if (!skb) {
3644 skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
3645 if (skb)
3646 napi->skb = skb;
3647 }
3648 return skb;
3649}
3650EXPORT_SYMBOL(napi_get_frags);
3651
3652gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
3653 gro_result_t ret)
3654{
3655 switch (ret) {
3656 case GRO_NORMAL:
3657 case GRO_HELD:
3658 skb->protocol = eth_type_trans(skb, skb->dev);
3659
3660 if (ret == GRO_HELD)
3661 skb_gro_pull(skb, -ETH_HLEN);
3662 else if (netif_receive_skb(skb))
3663 ret = GRO_DROP;
3664 break;
3665
3666 case GRO_DROP:
3667 case GRO_MERGED_FREE:
3668 napi_reuse_skb(napi, skb);
3669 break;
3670
3671 case GRO_MERGED:
3672 break;
3673 }
3674
3675 return ret;
3676}
3677EXPORT_SYMBOL(napi_frags_finish);
3678
3679struct sk_buff *napi_frags_skb(struct napi_struct *napi)
3680{
3681 struct sk_buff *skb = napi->skb;
3682 struct ethhdr *eth;
3683 unsigned int hlen;
3684 unsigned int off;
3685
3686 napi->skb = NULL;
3687
3688 skb_reset_mac_header(skb);
3689 skb_gro_reset_offset(skb);
3690
3691 off = skb_gro_offset(skb);
3692 hlen = off + sizeof(*eth);
3693 eth = skb_gro_header_fast(skb, off);
3694 if (skb_gro_header_hard(skb, hlen)) {
3695 eth = skb_gro_header_slow(skb, hlen, off);
3696 if (unlikely(!eth)) {
3697 napi_reuse_skb(napi, skb);
3698 skb = NULL;
3699 goto out;
3700 }
3701 }
3702
3703 skb_gro_pull(skb, sizeof(*eth));
3704
3705
3706
3707
3708
3709 skb->protocol = eth->h_proto;
3710
3711out:
3712 return skb;
3713}
3714EXPORT_SYMBOL(napi_frags_skb);
3715
3716gro_result_t napi_gro_frags(struct napi_struct *napi)
3717{
3718 struct sk_buff *skb = napi_frags_skb(napi);
3719
3720 if (!skb)
3721 return GRO_DROP;
3722
3723 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3724}
3725EXPORT_SYMBOL(napi_gro_frags);
3726
3727
3728
3729
3730
3731static void net_rps_action_and_irq_enable(struct softnet_data *sd)
3732{
3733#ifdef CONFIG_RPS
3734 struct softnet_data *remsd = sd->rps_ipi_list;
3735
3736 if (remsd) {
3737 sd->rps_ipi_list = NULL;
3738
3739 local_irq_enable();
3740
3741
3742 while (remsd) {
3743 struct softnet_data *next = remsd->rps_ipi_next;
3744
3745 if (cpu_online(remsd->cpu))
3746 __smp_call_function_single(remsd->cpu,
3747 &remsd->csd, 0);
3748 remsd = next;
3749 }
3750 } else
3751#endif
3752 local_irq_enable();
3753}
3754
3755static int process_backlog(struct napi_struct *napi, int quota)
3756{
3757 int work = 0;
3758 struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
3759
3760#ifdef CONFIG_RPS
3761
3762
3763
3764 if (sd->rps_ipi_list) {
3765 local_irq_disable();
3766 net_rps_action_and_irq_enable(sd);
3767 }
3768#endif
3769 napi->weight = weight_p;
3770 local_irq_disable();
3771 while (work < quota) {
3772 struct sk_buff *skb;
3773 unsigned int qlen;
3774
3775 while ((skb = __skb_dequeue(&sd->process_queue))) {
3776 local_irq_enable();
3777 __netif_receive_skb(skb);
3778 local_irq_disable();
3779 input_queue_head_incr(sd);
3780 if (++work >= quota) {
3781 local_irq_enable();
3782 return work;
3783 }
3784 }
3785
3786 rps_lock(sd);
3787 qlen = skb_queue_len(&sd->input_pkt_queue);
3788 if (qlen)
3789 skb_queue_splice_tail_init(&sd->input_pkt_queue,
3790 &sd->process_queue);
3791
3792 if (qlen < quota - work) {
3793
3794
3795
3796
3797
3798
3799
3800 list_del(&napi->poll_list);
3801 napi->state = 0;
3802
3803 quota = work + qlen;
3804 }
3805 rps_unlock(sd);
3806 }
3807 local_irq_enable();
3808
3809 return work;
3810}
3811
3812
3813
3814
3815
3816
3817
3818void __napi_schedule(struct napi_struct *n)
3819{
3820 unsigned long flags;
3821
3822 local_irq_save(flags);
3823 ____napi_schedule(&__get_cpu_var(softnet_data), n);
3824 local_irq_restore(flags);
3825}
3826EXPORT_SYMBOL(__napi_schedule);
3827
3828void __napi_complete(struct napi_struct *n)
3829{
3830 BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3831 BUG_ON(n->gro_list);
3832
3833 list_del(&n->poll_list);
3834 smp_mb__before_clear_bit();
3835 clear_bit(NAPI_STATE_SCHED, &n->state);
3836}
3837EXPORT_SYMBOL(__napi_complete);
3838
3839void napi_complete(struct napi_struct *n)
3840{
3841 unsigned long flags;
3842
3843
3844
3845
3846
3847 if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3848 return;
3849
3850 napi_gro_flush(n);
3851 local_irq_save(flags);
3852 __napi_complete(n);
3853 local_irq_restore(flags);
3854}
3855EXPORT_SYMBOL(napi_complete);
3856
3857void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3858 int (*poll)(struct napi_struct *, int), int weight)
3859{
3860 INIT_LIST_HEAD(&napi->poll_list);
3861 napi->gro_count = 0;
3862 napi->gro_list = NULL;
3863 napi->skb = NULL;
3864 napi->poll = poll;
3865 napi->weight = weight;
3866 list_add(&napi->dev_list, &dev->napi_list);
3867 napi->dev = dev;
3868#ifdef CONFIG_NETPOLL
3869 spin_lock_init(&napi->poll_lock);
3870 napi->poll_owner = -1;
3871#endif
3872 set_bit(NAPI_STATE_SCHED, &napi->state);
3873}
3874EXPORT_SYMBOL(netif_napi_add);
3875
3876void netif_napi_del(struct napi_struct *napi)
3877{
3878 struct sk_buff *skb, *next;
3879
3880 list_del_init(&napi->dev_list);
3881 napi_free_frags(napi);
3882
3883 for (skb = napi->gro_list; skb; skb = next) {
3884 next = skb->next;
3885 skb->next = NULL;
3886 kfree_skb(skb);
3887 }
3888
3889 napi->gro_list = NULL;
3890 napi->gro_count = 0;
3891}
3892EXPORT_SYMBOL(netif_napi_del);
3893
3894static void net_rx_action(struct softirq_action *h)
3895{
3896 struct softnet_data *sd = &__get_cpu_var(softnet_data);
3897 unsigned long time_limit = jiffies + 2;
3898 int budget = netdev_budget;
3899 void *have;
3900
3901 local_irq_disable();
3902
3903 while (!list_empty(&sd->poll_list)) {
3904 struct napi_struct *n;
3905 int work, weight;
3906
3907
3908
3909
3910
3911 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3912 goto softnet_break;
3913
3914 local_irq_enable();
3915
3916
3917
3918
3919
3920
3921 n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
3922
3923 have = netpoll_poll_lock(n);
3924
3925 weight = n->weight;
3926
3927
3928
3929
3930
3931
3932
3933 work = 0;
3934 if (test_bit(NAPI_STATE_SCHED, &n->state)) {
3935 work = n->poll(n, weight);
3936 trace_napi_poll(n);
3937 }
3938
3939 WARN_ON_ONCE(work > weight);
3940
3941 budget -= work;
3942
3943 local_irq_disable();
3944
3945
3946
3947
3948
3949
3950 if (unlikely(work == weight)) {
3951 if (unlikely(napi_disable_pending(n))) {
3952 local_irq_enable();
3953 napi_complete(n);
3954 local_irq_disable();
3955 } else
3956 list_move_tail(&n->poll_list, &sd->poll_list);
3957 }
3958
3959 netpoll_poll_unlock(have);
3960 }
3961out:
3962 net_rps_action_and_irq_enable(sd);
3963
3964#ifdef CONFIG_NET_DMA
3965
3966
3967
3968
3969 dma_issue_pending_all();
3970#endif
3971
3972 return;
3973
3974softnet_break:
3975 sd->time_squeeze++;
3976 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3977 goto out;
3978}
3979
3980static gifconf_func_t *gifconf_list[NPROTO];
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991int register_gifconf(unsigned int family, gifconf_func_t *gifconf)
3992{
3993 if (family >= NPROTO)
3994 return -EINVAL;
3995 gifconf_list[family] = gifconf;
3996 return 0;
3997}
3998EXPORT_SYMBOL(register_gifconf);
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012static int dev_ifname(struct net *net, struct ifreq __user *arg)
4013{
4014 struct net_device *dev;
4015 struct ifreq ifr;
4016
4017
4018
4019
4020
4021 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4022 return -EFAULT;
4023
4024 rcu_read_lock();
4025 dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex);
4026 if (!dev) {
4027 rcu_read_unlock();
4028 return -ENODEV;
4029 }
4030
4031 strcpy(ifr.ifr_name, dev->name);
4032 rcu_read_unlock();
4033
4034 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
4035 return -EFAULT;
4036 return 0;
4037}
4038
4039
4040
4041
4042
4043
4044
4045static int dev_ifconf(struct net *net, char __user *arg)
4046{
4047 struct ifconf ifc;
4048 struct net_device *dev;
4049 char __user *pos;
4050 int len;
4051 int total;
4052 int i;
4053
4054
4055
4056
4057
4058 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
4059 return -EFAULT;
4060
4061 pos = ifc.ifc_buf;
4062 len = ifc.ifc_len;
4063
4064
4065
4066
4067
4068 total = 0;
4069 for_each_netdev(net, dev) {
4070 for (i = 0; i < NPROTO; i++) {
4071 if (gifconf_list[i]) {
4072 int done;
4073 if (!pos)
4074 done = gifconf_list[i](dev, NULL, 0);
4075 else
4076 done = gifconf_list[i](dev, pos + total,
4077 len - total);
4078 if (done < 0)
4079 return -EFAULT;
4080 total += done;
4081 }
4082 }
4083 }
4084
4085
4086
4087
4088 ifc.ifc_len = total;
4089
4090
4091
4092
4093 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
4094}
4095
4096#ifdef CONFIG_PROC_FS
4097
4098#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
4099
4100struct dev_iter_state {
4101 struct seq_net_private p;
4102 unsigned int pos;
4103};
4104
4105#define get_bucket(x) ((x) >> BUCKET_SPACE)
4106#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
4107#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
4108
4109static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
4110{
4111 struct dev_iter_state *state = seq->private;
4112 struct net *net = seq_file_net(seq);
4113 struct net_device *dev;
4114 struct hlist_node *p;
4115 struct hlist_head *h;
4116 unsigned int count, bucket, offset;
4117
4118 bucket = get_bucket(state->pos);
4119 offset = get_offset(state->pos);
4120 h = &net->dev_name_head[bucket];
4121 count = 0;
4122 hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
4123 if (count++ == offset) {
4124 state->pos = set_bucket_offset(bucket, count);
4125 return dev;
4126 }
4127 }
4128
4129 return NULL;
4130}
4131
4132static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
4133{
4134 struct dev_iter_state *state = seq->private;
4135 struct net_device *dev;
4136 unsigned int bucket;
4137
4138 bucket = get_bucket(state->pos);
4139 do {
4140 dev = dev_from_same_bucket(seq);
4141 if (dev)
4142 return dev;
4143
4144 bucket++;
4145 state->pos = set_bucket_offset(bucket, 0);
4146 } while (bucket < NETDEV_HASHENTRIES);
4147
4148 return NULL;
4149}
4150
4151
4152
4153
4154
4155void *dev_seq_start(struct seq_file *seq, loff_t *pos)
4156 __acquires(RCU)
4157{
4158 struct dev_iter_state *state = seq->private;
4159
4160 rcu_read_lock();
4161 if (!*pos)
4162 return SEQ_START_TOKEN;
4163
4164
4165 if (state->pos == 0 && *pos > 1)
4166 return NULL;
4167
4168 return dev_from_new_bucket(seq);
4169}
4170
4171void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4172{
4173 struct net_device *dev;
4174
4175 ++*pos;
4176
4177 if (v == SEQ_START_TOKEN)
4178 return dev_from_new_bucket(seq);
4179
4180 dev = dev_from_same_bucket(seq);
4181 if (dev)
4182 return dev;
4183
4184 return dev_from_new_bucket(seq);
4185}
4186
4187void dev_seq_stop(struct seq_file *seq, void *v)
4188 __releases(RCU)
4189{
4190 rcu_read_unlock();
4191}
4192
4193static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
4194{
4195 struct rtnl_link_stats64 temp;
4196 const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp);
4197
4198 seq_printf(seq, "%6s: %7llu %7llu %4llu %4llu %4llu %5llu %10llu %9llu "
4199 "%8llu %7llu %4llu %4llu %4llu %5llu %7llu %10llu\n",
4200 dev->name, stats->rx_bytes, stats->rx_packets,
4201 stats->rx_errors,
4202 stats->rx_dropped + stats->rx_missed_errors,
4203 stats->rx_fifo_errors,
4204 stats->rx_length_errors + stats->rx_over_errors +
4205 stats->rx_crc_errors + stats->rx_frame_errors,
4206 stats->rx_compressed, stats->multicast,
4207 stats->tx_bytes, stats->tx_packets,
4208 stats->tx_errors, stats->tx_dropped,
4209 stats->tx_fifo_errors, stats->collisions,
4210 stats->tx_carrier_errors +
4211 stats->tx_aborted_errors +
4212 stats->tx_window_errors +
4213 stats->tx_heartbeat_errors,
4214 stats->tx_compressed);
4215}
4216
4217
4218
4219
4220
4221static int dev_seq_show(struct seq_file *seq, void *v)
4222{
4223 if (v == SEQ_START_TOKEN)
4224 seq_puts(seq, "Inter-| Receive "
4225 " | Transmit\n"
4226 " face |bytes packets errs drop fifo frame "
4227 "compressed multicast|bytes packets errs "
4228 "drop fifo colls carrier compressed\n");
4229 else
4230 dev_seq_printf_stats(seq, v);
4231 return 0;
4232}
4233
4234static struct softnet_data *softnet_get_online(loff_t *pos)
4235{
4236 struct softnet_data *sd = NULL;
4237
4238 while (*pos < nr_cpu_ids)
4239 if (cpu_online(*pos)) {
4240 sd = &per_cpu(softnet_data, *pos);
4241 break;
4242 } else
4243 ++*pos;
4244 return sd;
4245}
4246
4247static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
4248{
4249 return softnet_get_online(pos);
4250}
4251
4252static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4253{
4254 ++*pos;
4255 return softnet_get_online(pos);
4256}
4257
4258static void softnet_seq_stop(struct seq_file *seq, void *v)
4259{
4260}
4261
4262static int softnet_seq_show(struct seq_file *seq, void *v)
4263{
4264 struct softnet_data *sd = v;
4265
4266 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
4267 sd->processed, sd->dropped, sd->time_squeeze, 0,
4268 0, 0, 0, 0,
4269 sd->cpu_collision, sd->received_rps);
4270 return 0;
4271}
4272
4273static const struct seq_operations dev_seq_ops = {
4274 .start = dev_seq_start,
4275 .next = dev_seq_next,
4276 .stop = dev_seq_stop,
4277 .show = dev_seq_show,
4278};
4279
4280static int dev_seq_open(struct inode *inode, struct file *file)
4281{
4282 return seq_open_net(inode, file, &dev_seq_ops,
4283 sizeof(struct dev_iter_state));
4284}
4285
4286int dev_seq_open_ops(struct inode *inode, struct file *file,
4287 const struct seq_operations *ops)
4288{
4289 return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
4290}
4291
4292static const struct file_operations dev_seq_fops = {
4293 .owner = THIS_MODULE,
4294 .open = dev_seq_open,
4295 .read = seq_read,
4296 .llseek = seq_lseek,
4297 .release = seq_release_net,
4298};
4299
4300static const struct seq_operations softnet_seq_ops = {
4301 .start = softnet_seq_start,
4302 .next = softnet_seq_next,
4303 .stop = softnet_seq_stop,
4304 .show = softnet_seq_show,
4305};
4306
4307static int softnet_seq_open(struct inode *inode, struct file *file)
4308{
4309 return seq_open(file, &softnet_seq_ops);
4310}
4311
4312static const struct file_operations softnet_seq_fops = {
4313 .owner = THIS_MODULE,
4314 .open = softnet_seq_open,
4315 .read = seq_read,
4316 .llseek = seq_lseek,
4317 .release = seq_release,
4318};
4319
4320static void *ptype_get_idx(loff_t pos)
4321{
4322 struct packet_type *pt = NULL;
4323 loff_t i = 0;
4324 int t;
4325
4326 list_for_each_entry_rcu(pt, &ptype_all, list) {
4327 if (i == pos)
4328 return pt;
4329 ++i;
4330 }
4331
4332 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
4333 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
4334 if (i == pos)
4335 return pt;
4336 ++i;
4337 }
4338 }
4339 return NULL;
4340}
4341
4342static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
4343 __acquires(RCU)
4344{
4345 rcu_read_lock();
4346 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
4347}
4348
4349static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
4350{
4351 struct packet_type *pt;
4352 struct list_head *nxt;
4353 int hash;
4354
4355 ++*pos;
4356 if (v == SEQ_START_TOKEN)
4357 return ptype_get_idx(0);
4358
4359 pt = v;
4360 nxt = pt->list.next;
4361 if (pt->type == htons(ETH_P_ALL)) {
4362 if (nxt != &ptype_all)
4363 goto found;
4364 hash = 0;
4365 nxt = ptype_base[0].next;
4366 } else
4367 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
4368
4369 while (nxt == &ptype_base[hash]) {
4370 if (++hash >= PTYPE_HASH_SIZE)
4371 return NULL;
4372 nxt = ptype_base[hash].next;
4373 }
4374found:
4375 return list_entry(nxt, struct packet_type, list);
4376}
4377
4378static void ptype_seq_stop(struct seq_file *seq, void *v)
4379 __releases(RCU)
4380{
4381 rcu_read_unlock();
4382}
4383
4384static int ptype_seq_show(struct seq_file *seq, void *v)
4385{
4386 struct packet_type *pt = v;
4387
4388 if (v == SEQ_START_TOKEN)
4389 seq_puts(seq, "Type Device Function\n");
4390 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4391 if (pt->type == htons(ETH_P_ALL))
4392 seq_puts(seq, "ALL ");
4393 else
4394 seq_printf(seq, "%04x", ntohs(pt->type));
4395
4396 seq_printf(seq, " %-8s %pF\n",
4397 pt->dev ? pt->dev->name : "", pt->func);
4398 }
4399
4400 return 0;
4401}
4402
4403static const struct seq_operations ptype_seq_ops = {
4404 .start = ptype_seq_start,
4405 .next = ptype_seq_next,
4406 .stop = ptype_seq_stop,
4407 .show = ptype_seq_show,
4408};
4409
4410static int ptype_seq_open(struct inode *inode, struct file *file)
4411{
4412 return seq_open_net(inode, file, &ptype_seq_ops,
4413 sizeof(struct seq_net_private));
4414}
4415
4416static const struct file_operations ptype_seq_fops = {
4417 .owner = THIS_MODULE,
4418 .open = ptype_seq_open,
4419 .read = seq_read,
4420 .llseek = seq_lseek,
4421 .release = seq_release_net,
4422};
4423
4424
4425static int __net_init dev_proc_net_init(struct net *net)
4426{
4427 int rc = -ENOMEM;
4428
4429 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4430 goto out;
4431 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4432 goto out_dev;
4433 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4434 goto out_softnet;
4435
4436 if (wext_proc_init(net))
4437 goto out_ptype;
4438 rc = 0;
4439out:
4440 return rc;
4441out_ptype:
4442 proc_net_remove(net, "ptype");
4443out_softnet:
4444 proc_net_remove(net, "softnet_stat");
4445out_dev:
4446 proc_net_remove(net, "dev");
4447 goto out;
4448}
4449
4450static void __net_exit dev_proc_net_exit(struct net *net)
4451{
4452 wext_proc_exit(net);
4453
4454 proc_net_remove(net, "ptype");
4455 proc_net_remove(net, "softnet_stat");
4456 proc_net_remove(net, "dev");
4457}
4458
4459static struct pernet_operations __net_initdata dev_proc_ops = {
4460 .init = dev_proc_net_init,
4461 .exit = dev_proc_net_exit,
4462};
4463
4464static int __init dev_proc_init(void)
4465{
4466 return register_pernet_subsys(&dev_proc_ops);
4467}
4468#else
4469#define dev_proc_init() 0
4470#endif
4471
4472
4473
4474
4475
4476
4477
4478
4479
4480
4481
4482
4483int netdev_set_master(struct net_device *slave, struct net_device *master)
4484{
4485 struct net_device *old = slave->master;
4486
4487 ASSERT_RTNL();
4488
4489 if (master) {
4490 if (old)
4491 return -EBUSY;
4492 dev_hold(master);
4493 }
4494
4495 slave->master = master;
4496
4497 if (old)
4498 dev_put(old);
4499 return 0;
4500}
4501EXPORT_SYMBOL(netdev_set_master);
4502
4503
4504
4505
4506
4507
4508
4509
4510
4511
4512
4513int netdev_set_bond_master(struct net_device *slave, struct net_device *master)
4514{
4515 int err;
4516
4517 ASSERT_RTNL();
4518
4519 err = netdev_set_master(slave, master);
4520 if (err)
4521 return err;
4522 if (master)
4523 slave->flags |= IFF_SLAVE;
4524 else
4525 slave->flags &= ~IFF_SLAVE;
4526
4527 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4528 return 0;
4529}
4530EXPORT_SYMBOL(netdev_set_bond_master);
4531
4532static void dev_change_rx_flags(struct net_device *dev, int flags)
4533{
4534 const struct net_device_ops *ops = dev->netdev_ops;
4535
4536 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4537 ops->ndo_change_rx_flags(dev, flags);
4538}
4539
4540static int __dev_set_promiscuity(struct net_device *dev, int inc)
4541{
4542 unsigned short old_flags = dev->flags;
4543 uid_t uid;
4544 gid_t gid;
4545
4546 ASSERT_RTNL();
4547
4548 dev->flags |= IFF_PROMISC;
4549 dev->promiscuity += inc;
4550 if (dev->promiscuity == 0) {
4551
4552
4553
4554
4555 if (inc < 0)
4556 dev->flags &= ~IFF_PROMISC;
4557 else {
4558 dev->promiscuity -= inc;
4559 printk(KERN_WARNING "%s: promiscuity touches roof, "
4560 "set promiscuity failed, promiscuity feature "
4561 "of device might be broken.\n", dev->name);
4562 return -EOVERFLOW;
4563 }
4564 }
4565 if (dev->flags != old_flags) {
4566 printk(KERN_INFO "device %s %s promiscuous mode\n",
4567 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4568 "left");
4569 if (audit_enabled) {
4570 current_uid_gid(&uid, &gid);
4571 audit_log(current->audit_context, GFP_ATOMIC,
4572 AUDIT_ANOM_PROMISCUOUS,
4573 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4574 dev->name, (dev->flags & IFF_PROMISC),
4575 (old_flags & IFF_PROMISC),
4576 audit_get_loginuid(current),
4577 uid, gid,
4578 audit_get_sessionid(current));
4579 }
4580
4581 dev_change_rx_flags(dev, IFF_PROMISC);
4582 }
4583 return 0;
4584}
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597int dev_set_promiscuity(struct net_device *dev, int inc)
4598{
4599 unsigned short old_flags = dev->flags;
4600 int err;
4601
4602 err = __dev_set_promiscuity(dev, inc);
4603 if (err < 0)
4604 return err;
4605 if (dev->flags != old_flags)
4606 dev_set_rx_mode(dev);
4607 return err;
4608}
4609EXPORT_SYMBOL(dev_set_promiscuity);
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620
4621
4622
4623
4624int dev_set_allmulti(struct net_device *dev, int inc)
4625{
4626 unsigned short old_flags = dev->flags;
4627
4628 ASSERT_RTNL();
4629
4630 dev->flags |= IFF_ALLMULTI;
4631 dev->allmulti += inc;
4632 if (dev->allmulti == 0) {
4633
4634
4635
4636
4637 if (inc < 0)
4638 dev->flags &= ~IFF_ALLMULTI;
4639 else {
4640 dev->allmulti -= inc;
4641 printk(KERN_WARNING "%s: allmulti touches roof, "
4642 "set allmulti failed, allmulti feature of "
4643 "device might be broken.\n", dev->name);
4644 return -EOVERFLOW;
4645 }
4646 }
4647 if (dev->flags ^ old_flags) {
4648 dev_change_rx_flags(dev, IFF_ALLMULTI);
4649 dev_set_rx_mode(dev);
4650 }
4651 return 0;
4652}
4653EXPORT_SYMBOL(dev_set_allmulti);
4654
4655
4656
4657
4658
4659
4660
4661void __dev_set_rx_mode(struct net_device *dev)
4662{
4663 const struct net_device_ops *ops = dev->netdev_ops;
4664
4665
4666 if (!(dev->flags&IFF_UP))
4667 return;
4668
4669 if (!netif_device_present(dev))
4670 return;
4671
4672 if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
4673
4674
4675
4676 if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
4677 __dev_set_promiscuity(dev, 1);
4678 dev->uc_promisc = true;
4679 } else if (netdev_uc_empty(dev) && dev->uc_promisc) {
4680 __dev_set_promiscuity(dev, -1);
4681 dev->uc_promisc = false;
4682 }
4683 }
4684
4685 if (ops->ndo_set_rx_mode)
4686 ops->ndo_set_rx_mode(dev);
4687}
4688
4689void dev_set_rx_mode(struct net_device *dev)
4690{
4691 netif_addr_lock_bh(dev);
4692 __dev_set_rx_mode(dev);
4693 netif_addr_unlock_bh(dev);
4694}
4695
4696
4697
4698
4699
4700
4701
4702unsigned dev_get_flags(const struct net_device *dev)
4703{
4704 unsigned flags;
4705
4706 flags = (dev->flags & ~(IFF_PROMISC |
4707 IFF_ALLMULTI |
4708 IFF_RUNNING |
4709 IFF_LOWER_UP |
4710 IFF_DORMANT)) |
4711 (dev->gflags & (IFF_PROMISC |
4712 IFF_ALLMULTI));
4713
4714 if (netif_running(dev)) {
4715 if (netif_oper_up(dev))
4716 flags |= IFF_RUNNING;
4717 if (netif_carrier_ok(dev))
4718 flags |= IFF_LOWER_UP;
4719 if (netif_dormant(dev))
4720 flags |= IFF_DORMANT;
4721 }
4722
4723 return flags;
4724}
4725EXPORT_SYMBOL(dev_get_flags);
4726
4727int __dev_change_flags(struct net_device *dev, unsigned int flags)
4728{
4729 int old_flags = dev->flags;
4730 int ret;
4731
4732 ASSERT_RTNL();
4733
4734
4735
4736
4737
4738 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4739 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4740 IFF_AUTOMEDIA)) |
4741 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4742 IFF_ALLMULTI));
4743
4744
4745
4746
4747
4748 if ((old_flags ^ flags) & IFF_MULTICAST)
4749 dev_change_rx_flags(dev, IFF_MULTICAST);
4750
4751 dev_set_rx_mode(dev);
4752
4753
4754
4755
4756
4757
4758
4759 ret = 0;
4760 if ((old_flags ^ flags) & IFF_UP) {
4761 ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
4762
4763 if (!ret)
4764 dev_set_rx_mode(dev);
4765 }
4766
4767 if ((flags ^ dev->gflags) & IFF_PROMISC) {
4768 int inc = (flags & IFF_PROMISC) ? 1 : -1;
4769
4770 dev->gflags ^= IFF_PROMISC;
4771 dev_set_promiscuity(dev, inc);
4772 }
4773
4774
4775
4776
4777
4778 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4779 int inc = (flags & IFF_ALLMULTI) ? 1 : -1;
4780
4781 dev->gflags ^= IFF_ALLMULTI;
4782 dev_set_allmulti(dev, inc);
4783 }
4784
4785 return ret;
4786}
4787
4788void __dev_notify_flags(struct net_device *dev, unsigned int old_flags)
4789{
4790 unsigned int changes = dev->flags ^ old_flags;
4791
4792 if (changes & IFF_UP) {
4793 if (dev->flags & IFF_UP)
4794 call_netdevice_notifiers(NETDEV_UP, dev);
4795 else
4796 call_netdevice_notifiers(NETDEV_DOWN, dev);
4797 }
4798
4799 if (dev->flags & IFF_UP &&
4800 (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE)))
4801 call_netdevice_notifiers(NETDEV_CHANGE, dev);
4802}
4803
4804
4805
4806
4807
4808
4809
4810
4811
4812int dev_change_flags(struct net_device *dev, unsigned flags)
4813{
4814 int ret, changes;
4815 int old_flags = dev->flags;
4816
4817 ret = __dev_change_flags(dev, flags);
4818 if (ret < 0)
4819 return ret;
4820
4821 changes = old_flags ^ dev->flags;
4822 if (changes)
4823 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4824
4825 __dev_notify_flags(dev, old_flags);
4826 return ret;
4827}
4828EXPORT_SYMBOL(dev_change_flags);
4829
4830
4831
4832
4833
4834
4835
4836
4837int dev_set_mtu(struct net_device *dev, int new_mtu)
4838{
4839 const struct net_device_ops *ops = dev->netdev_ops;
4840 int err;
4841
4842 if (new_mtu == dev->mtu)
4843 return 0;
4844
4845
4846 if (new_mtu < 0)
4847 return -EINVAL;
4848
4849 if (!netif_device_present(dev))
4850 return -ENODEV;
4851
4852 err = 0;
4853 if (ops->ndo_change_mtu)
4854 err = ops->ndo_change_mtu(dev, new_mtu);
4855 else
4856 dev->mtu = new_mtu;
4857
4858 if (!err && dev->flags & IFF_UP)
4859 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4860 return err;
4861}
4862EXPORT_SYMBOL(dev_set_mtu);
4863
4864
4865
4866
4867
4868
4869void dev_set_group(struct net_device *dev, int new_group)
4870{
4871 dev->group = new_group;
4872}
4873EXPORT_SYMBOL(dev_set_group);
4874
4875
4876
4877
4878
4879
4880
4881
4882int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4883{
4884 const struct net_device_ops *ops = dev->netdev_ops;
4885 int err;
4886
4887 if (!ops->ndo_set_mac_address)
4888 return -EOPNOTSUPP;
4889 if (sa->sa_family != dev->type)
4890 return -EINVAL;
4891 if (!netif_device_present(dev))
4892 return -ENODEV;
4893 err = ops->ndo_set_mac_address(dev, sa);
4894 if (!err)
4895 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4896 return err;
4897}
4898EXPORT_SYMBOL(dev_set_mac_address);
4899
4900
4901
4902
4903static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4904{
4905 int err;
4906 struct net_device *dev = dev_get_by_name_rcu(net, ifr->ifr_name);
4907
4908 if (!dev)
4909 return -ENODEV;
4910
4911 switch (cmd) {
4912 case SIOCGIFFLAGS:
4913 ifr->ifr_flags = (short) dev_get_flags(dev);
4914 return 0;
4915
4916 case SIOCGIFMETRIC:
4917
4918 ifr->ifr_metric = 0;
4919 return 0;
4920
4921 case SIOCGIFMTU:
4922 ifr->ifr_mtu = dev->mtu;
4923 return 0;
4924
4925 case SIOCGIFHWADDR:
4926 if (!dev->addr_len)
4927 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4928 else
4929 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4930 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4931 ifr->ifr_hwaddr.sa_family = dev->type;
4932 return 0;
4933
4934 case SIOCGIFSLAVE:
4935 err = -EINVAL;
4936 break;
4937
4938 case SIOCGIFMAP:
4939 ifr->ifr_map.mem_start = dev->mem_start;
4940 ifr->ifr_map.mem_end = dev->mem_end;
4941 ifr->ifr_map.base_addr = dev->base_addr;
4942 ifr->ifr_map.irq = dev->irq;
4943 ifr->ifr_map.dma = dev->dma;
4944 ifr->ifr_map.port = dev->if_port;
4945 return 0;
4946
4947 case SIOCGIFINDEX:
4948 ifr->ifr_ifindex = dev->ifindex;
4949 return 0;
4950
4951 case SIOCGIFTXQLEN:
4952 ifr->ifr_qlen = dev->tx_queue_len;
4953 return 0;
4954
4955 default:
4956
4957
4958
4959 WARN_ON(1);
4960 err = -ENOTTY;
4961 break;
4962
4963 }
4964 return err;
4965}
4966
4967
4968
4969
4970static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4971{
4972 int err;
4973 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4974 const struct net_device_ops *ops;
4975
4976 if (!dev)
4977 return -ENODEV;
4978
4979 ops = dev->netdev_ops;
4980
4981 switch (cmd) {
4982 case SIOCSIFFLAGS:
4983 return dev_change_flags(dev, ifr->ifr_flags);
4984
4985 case SIOCSIFMETRIC:
4986
4987 return -EOPNOTSUPP;
4988
4989 case SIOCSIFMTU:
4990 return dev_set_mtu(dev, ifr->ifr_mtu);
4991
4992 case SIOCSIFHWADDR:
4993 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4994
4995 case SIOCSIFHWBROADCAST:
4996 if (ifr->ifr_hwaddr.sa_family != dev->type)
4997 return -EINVAL;
4998 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4999 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
5000 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
5001 return 0;
5002
5003 case SIOCSIFMAP:
5004 if (ops->ndo_set_config) {
5005 if (!netif_device_present(dev))
5006 return -ENODEV;
5007 return ops->ndo_set_config(dev, &ifr->ifr_map);
5008 }
5009 return -EOPNOTSUPP;
5010
5011 case SIOCADDMULTI:
5012 if (!ops->ndo_set_rx_mode ||
5013 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5014 return -EINVAL;
5015 if (!netif_device_present(dev))
5016 return -ENODEV;
5017 return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
5018
5019 case SIOCDELMULTI:
5020 if (!ops->ndo_set_rx_mode ||
5021 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
5022 return -EINVAL;
5023 if (!netif_device_present(dev))
5024 return -ENODEV;
5025 return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
5026
5027 case SIOCSIFTXQLEN:
5028 if (ifr->ifr_qlen < 0)
5029 return -EINVAL;
5030 dev->tx_queue_len = ifr->ifr_qlen;
5031 return 0;
5032
5033 case SIOCSIFNAME:
5034 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
5035 return dev_change_name(dev, ifr->ifr_newname);
5036
5037 case SIOCSHWTSTAMP:
5038 err = net_hwtstamp_validate(ifr);
5039 if (err)
5040 return err;
5041
5042
5043
5044
5045
5046 default:
5047 if ((cmd >= SIOCDEVPRIVATE &&
5048 cmd <= SIOCDEVPRIVATE + 15) ||
5049 cmd == SIOCBONDENSLAVE ||
5050 cmd == SIOCBONDRELEASE ||
5051 cmd == SIOCBONDSETHWADDR ||
5052 cmd == SIOCBONDSLAVEINFOQUERY ||
5053 cmd == SIOCBONDINFOQUERY ||
5054 cmd == SIOCBONDCHANGEACTIVE ||
5055 cmd == SIOCGMIIPHY ||
5056 cmd == SIOCGMIIREG ||
5057 cmd == SIOCSMIIREG ||
5058 cmd == SIOCBRADDIF ||
5059 cmd == SIOCBRDELIF ||
5060 cmd == SIOCSHWTSTAMP ||
5061 cmd == SIOCWANDEV) {
5062 err = -EOPNOTSUPP;
5063 if (ops->ndo_do_ioctl) {
5064 if (netif_device_present(dev))
5065 err = ops->ndo_do_ioctl(dev, ifr, cmd);
5066 else
5067 err = -ENODEV;
5068 }
5069 } else
5070 err = -EINVAL;
5071
5072 }
5073 return err;
5074}
5075
5076
5077
5078
5079
5080
5081
5082
5083
5084
5085
5086
5087
5088
5089
5090
5091
5092
5093int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
5094{
5095 struct ifreq ifr;
5096 int ret;
5097 char *colon;
5098
5099
5100
5101
5102
5103
5104 if (cmd == SIOCGIFCONF) {
5105 rtnl_lock();
5106 ret = dev_ifconf(net, (char __user *) arg);
5107 rtnl_unlock();
5108 return ret;
5109 }
5110 if (cmd == SIOCGIFNAME)
5111 return dev_ifname(net, (struct ifreq __user *)arg);
5112
5113 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
5114 return -EFAULT;
5115
5116 ifr.ifr_name[IFNAMSIZ-1] = 0;
5117
5118 colon = strchr(ifr.ifr_name, ':');
5119 if (colon)
5120 *colon = 0;
5121
5122
5123
5124
5125
5126 switch (cmd) {
5127
5128
5129
5130
5131
5132
5133 case SIOCGIFFLAGS:
5134 case SIOCGIFMETRIC:
5135 case SIOCGIFMTU:
5136 case SIOCGIFHWADDR:
5137 case SIOCGIFSLAVE:
5138 case SIOCGIFMAP:
5139 case SIOCGIFINDEX:
5140 case SIOCGIFTXQLEN:
5141 dev_load(net, ifr.ifr_name);
5142 rcu_read_lock();
5143 ret = dev_ifsioc_locked(net, &ifr, cmd);
5144 rcu_read_unlock();
5145 if (!ret) {
5146 if (colon)
5147 *colon = ':';
5148 if (copy_to_user(arg, &ifr,
5149 sizeof(struct ifreq)))
5150 ret = -EFAULT;
5151 }
5152 return ret;
5153
5154 case SIOCETHTOOL:
5155 dev_load(net, ifr.ifr_name);
5156 rtnl_lock();
5157 ret = dev_ethtool(net, &ifr);
5158 rtnl_unlock();
5159 if (!ret) {
5160 if (colon)
5161 *colon = ':';
5162 if (copy_to_user(arg, &ifr,
5163 sizeof(struct ifreq)))
5164 ret = -EFAULT;
5165 }
5166 return ret;
5167
5168
5169
5170
5171
5172
5173
5174 case SIOCGMIIPHY:
5175 case SIOCGMIIREG:
5176 case SIOCSIFNAME:
5177 if (!capable(CAP_NET_ADMIN))
5178 return -EPERM;
5179 dev_load(net, ifr.ifr_name);
5180 rtnl_lock();
5181 ret = dev_ifsioc(net, &ifr, cmd);
5182 rtnl_unlock();
5183 if (!ret) {
5184 if (colon)
5185 *colon = ':';
5186 if (copy_to_user(arg, &ifr,
5187 sizeof(struct ifreq)))
5188 ret = -EFAULT;
5189 }
5190 return ret;
5191
5192
5193
5194
5195
5196
5197
5198 case SIOCSIFFLAGS:
5199 case SIOCSIFMETRIC:
5200 case SIOCSIFMTU:
5201 case SIOCSIFMAP:
5202 case SIOCSIFHWADDR:
5203 case SIOCSIFSLAVE:
5204 case SIOCADDMULTI:
5205 case SIOCDELMULTI:
5206 case SIOCSIFHWBROADCAST:
5207 case SIOCSIFTXQLEN:
5208 case SIOCSMIIREG:
5209 case SIOCBONDENSLAVE:
5210 case SIOCBONDRELEASE:
5211 case SIOCBONDSETHWADDR:
5212 case SIOCBONDCHANGEACTIVE:
5213 case SIOCBRADDIF:
5214 case SIOCBRDELIF:
5215 case SIOCSHWTSTAMP:
5216 if (!capable(CAP_NET_ADMIN))
5217 return -EPERM;
5218
5219 case SIOCBONDSLAVEINFOQUERY:
5220 case SIOCBONDINFOQUERY:
5221 dev_load(net, ifr.ifr_name);
5222 rtnl_lock();
5223 ret = dev_ifsioc(net, &ifr, cmd);
5224 rtnl_unlock();
5225 return ret;
5226
5227 case SIOCGIFMEM:
5228
5229
5230 case SIOCSIFMEM:
5231
5232
5233 case SIOCSIFLINK:
5234 return -ENOTTY;
5235
5236
5237
5238
5239 default:
5240 if (cmd == SIOCWANDEV ||
5241 (cmd >= SIOCDEVPRIVATE &&
5242 cmd <= SIOCDEVPRIVATE + 15)) {
5243 dev_load(net, ifr.ifr_name);
5244 rtnl_lock();
5245 ret = dev_ifsioc(net, &ifr, cmd);
5246 rtnl_unlock();
5247 if (!ret && copy_to_user(arg, &ifr,
5248 sizeof(struct ifreq)))
5249 ret = -EFAULT;
5250 return ret;
5251 }
5252
5253 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5254 return wext_handle_ioctl(net, &ifr, cmd, arg);
5255 return -ENOTTY;
5256 }
5257}
5258
5259
5260
5261
5262
5263
5264
5265
5266
5267
5268static int dev_new_index(struct net *net)
5269{
5270 static int ifindex;
5271 for (;;) {
5272 if (++ifindex <= 0)
5273 ifindex = 1;
5274 if (!__dev_get_by_index(net, ifindex))
5275 return ifindex;
5276 }
5277}
5278
5279
5280static LIST_HEAD(net_todo_list);
5281
5282static void net_set_todo(struct net_device *dev)
5283{
5284 list_add_tail(&dev->todo_list, &net_todo_list);
5285}
5286
5287static void rollback_registered_many(struct list_head *head)
5288{
5289 struct net_device *dev, *tmp;
5290
5291 BUG_ON(dev_boot_phase);
5292 ASSERT_RTNL();
5293
5294 list_for_each_entry_safe(dev, tmp, head, unreg_list) {
5295
5296
5297
5298
5299 if (dev->reg_state == NETREG_UNINITIALIZED) {
5300 pr_debug("unregister_netdevice: device %s/%p never "
5301 "was registered\n", dev->name, dev);
5302
5303 WARN_ON(1);
5304 list_del(&dev->unreg_list);
5305 continue;
5306 }
5307 dev->dismantle = true;
5308 BUG_ON(dev->reg_state != NETREG_REGISTERED);
5309 }
5310
5311
5312 dev_close_many(head);
5313
5314 list_for_each_entry(dev, head, unreg_list) {
5315
5316 unlist_netdevice(dev);
5317
5318 dev->reg_state = NETREG_UNREGISTERING;
5319 }
5320
5321 synchronize_net();
5322
5323 list_for_each_entry(dev, head, unreg_list) {
5324
5325 dev_shutdown(dev);
5326
5327
5328
5329
5330
5331 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5332
5333 if (!dev->rtnl_link_ops ||
5334 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5335 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
5336
5337
5338
5339
5340 dev_uc_flush(dev);
5341 dev_mc_flush(dev);
5342
5343 if (dev->netdev_ops->ndo_uninit)
5344 dev->netdev_ops->ndo_uninit(dev);
5345
5346
5347 WARN_ON(dev->master);
5348
5349
5350 netdev_unregister_kobject(dev);
5351 }
5352
5353
5354 dev = list_first_entry(head, struct net_device, unreg_list);
5355 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
5356
5357 synchronize_net();
5358
5359 list_for_each_entry(dev, head, unreg_list)
5360 dev_put(dev);
5361}
5362
5363static void rollback_registered(struct net_device *dev)
5364{
5365 LIST_HEAD(single);
5366
5367 list_add(&dev->unreg_list, &single);
5368 rollback_registered_many(&single);
5369 list_del(&single);
5370}
5371
5372static u32 netdev_fix_features(struct net_device *dev, u32 features)
5373{
5374
5375 if ((features & NETIF_F_HW_CSUM) &&
5376 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5377 netdev_warn(dev, "mixed HW and IP checksum settings.\n");
5378 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5379 }
5380
5381 if ((features & NETIF_F_NO_CSUM) &&
5382 (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5383 netdev_warn(dev, "mixed no checksumming and other settings.\n");
5384 features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5385 }
5386
5387
5388 if ((features & NETIF_F_SG) &&
5389 !(features & NETIF_F_ALL_CSUM)) {
5390 netdev_dbg(dev,
5391 "Dropping NETIF_F_SG since no checksum feature.\n");
5392 features &= ~NETIF_F_SG;
5393 }
5394
5395
5396 if ((features & NETIF_F_ALL_TSO) && !(features & NETIF_F_SG)) {
5397 netdev_dbg(dev, "Dropping TSO features since no SG feature.\n");
5398 features &= ~NETIF_F_ALL_TSO;
5399 }
5400
5401
5402 if ((features & NETIF_F_ALL_TSO) == NETIF_F_TSO_ECN)
5403 features &= ~NETIF_F_TSO_ECN;
5404
5405
5406 if ((features & NETIF_F_GSO) && !(features & NETIF_F_SG)) {
5407 netdev_dbg(dev, "Dropping NETIF_F_GSO since no SG feature.\n");
5408 features &= ~NETIF_F_GSO;
5409 }
5410
5411
5412 if (features & NETIF_F_UFO) {
5413
5414 if (!((features & NETIF_F_GEN_CSUM) ||
5415 (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
5416 == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5417 netdev_dbg(dev,
5418 "Dropping NETIF_F_UFO since no checksum offload features.\n");
5419 features &= ~NETIF_F_UFO;
5420 }
5421
5422 if (!(features & NETIF_F_SG)) {
5423 netdev_dbg(dev,
5424 "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
5425 features &= ~NETIF_F_UFO;
5426 }
5427 }
5428
5429 return features;
5430}
5431
5432int __netdev_update_features(struct net_device *dev)
5433{
5434 u32 features;
5435 int err = 0;
5436
5437 ASSERT_RTNL();
5438
5439 features = netdev_get_wanted_features(dev);
5440
5441 if (dev->netdev_ops->ndo_fix_features)
5442 features = dev->netdev_ops->ndo_fix_features(dev, features);
5443
5444
5445 features = netdev_fix_features(dev, features);
5446
5447 if (dev->features == features)
5448 return 0;
5449
5450 netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
5451 dev->features, features);
5452
5453 if (dev->netdev_ops->ndo_set_features)
5454 err = dev->netdev_ops->ndo_set_features(dev, features);
5455
5456 if (unlikely(err < 0)) {
5457 netdev_err(dev,
5458 "set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
5459 err, features, dev->features);
5460 return -1;
5461 }
5462
5463 if (!err)
5464 dev->features = features;
5465
5466 return 1;
5467}
5468
5469
5470
5471
5472
5473
5474
5475
5476
5477void netdev_update_features(struct net_device *dev)
5478{
5479 if (__netdev_update_features(dev))
5480 netdev_features_change(dev);
5481}
5482EXPORT_SYMBOL(netdev_update_features);
5483
5484
5485
5486
5487
5488
5489
5490
5491
5492
5493
5494void netdev_change_features(struct net_device *dev)
5495{
5496 __netdev_update_features(dev);
5497 netdev_features_change(dev);
5498}
5499EXPORT_SYMBOL(netdev_change_features);
5500
5501
5502
5503
5504
5505
5506
5507
5508
5509
5510void netif_stacked_transfer_operstate(const struct net_device *rootdev,
5511 struct net_device *dev)
5512{
5513 if (rootdev->operstate == IF_OPER_DORMANT)
5514 netif_dormant_on(dev);
5515 else
5516 netif_dormant_off(dev);
5517
5518 if (netif_carrier_ok(rootdev)) {
5519 if (!netif_carrier_ok(dev))
5520 netif_carrier_on(dev);
5521 } else {
5522 if (netif_carrier_ok(dev))
5523 netif_carrier_off(dev);
5524 }
5525}
5526EXPORT_SYMBOL(netif_stacked_transfer_operstate);
5527
5528#ifdef CONFIG_RPS
5529static int netif_alloc_rx_queues(struct net_device *dev)
5530{
5531 unsigned int i, count = dev->num_rx_queues;
5532 struct netdev_rx_queue *rx;
5533
5534 BUG_ON(count < 1);
5535
5536 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5537 if (!rx) {
5538 pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5539 return -ENOMEM;
5540 }
5541 dev->_rx = rx;
5542
5543 for (i = 0; i < count; i++)
5544 rx[i].dev = dev;
5545 return 0;
5546}
5547#endif
5548
5549static void netdev_init_one_queue(struct net_device *dev,
5550 struct netdev_queue *queue, void *_unused)
5551{
5552
5553 spin_lock_init(&queue->_xmit_lock);
5554 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5555 queue->xmit_lock_owner = -1;
5556 netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
5557 queue->dev = dev;
5558}
5559
5560static int netif_alloc_netdev_queues(struct net_device *dev)
5561{
5562 unsigned int count = dev->num_tx_queues;
5563 struct netdev_queue *tx;
5564
5565 BUG_ON(count < 1);
5566
5567 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5568 if (!tx) {
5569 pr_err("netdev: Unable to allocate %u tx queues.\n",
5570 count);
5571 return -ENOMEM;
5572 }
5573 dev->_tx = tx;
5574
5575 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5576 spin_lock_init(&dev->tx_global_lock);
5577
5578 return 0;
5579}
5580
5581
5582
5583
5584
5585
5586
5587
5588
5589
5590
5591
5592
5593
5594
5595
5596
5597
5598int register_netdevice(struct net_device *dev)
5599{
5600 int ret;
5601 struct net *net = dev_net(dev);
5602
5603 BUG_ON(dev_boot_phase);
5604 ASSERT_RTNL();
5605
5606 might_sleep();
5607
5608
5609 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5610 BUG_ON(!net);
5611
5612 spin_lock_init(&dev->addr_list_lock);
5613 netdev_set_addr_lockdep_class(dev);
5614
5615 dev->iflink = -1;
5616
5617 ret = dev_get_valid_name(dev, dev->name);
5618 if (ret < 0)
5619 goto out;
5620
5621
5622 if (dev->netdev_ops->ndo_init) {
5623 ret = dev->netdev_ops->ndo_init(dev);
5624 if (ret) {
5625 if (ret > 0)
5626 ret = -EIO;
5627 goto out;
5628 }
5629 }
5630
5631 dev->ifindex = dev_new_index(net);
5632 if (dev->iflink == -1)
5633 dev->iflink = dev->ifindex;
5634
5635
5636
5637
5638 dev->hw_features |= NETIF_F_SOFT_FEATURES;
5639 dev->features |= NETIF_F_SOFT_FEATURES;
5640 dev->wanted_features = dev->features & dev->hw_features;
5641
5642
5643 dev->hw_features |= NETIF_F_NOCACHE_COPY;
5644 if ((dev->features & NETIF_F_ALL_CSUM) &&
5645 !(dev->features & NETIF_F_NO_CSUM)) {
5646 dev->wanted_features |= NETIF_F_NOCACHE_COPY;
5647 dev->features |= NETIF_F_NOCACHE_COPY;
5648 }
5649
5650
5651
5652 dev->vlan_features |= NETIF_F_HIGHDMA;
5653
5654 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5655 ret = notifier_to_errno(ret);
5656 if (ret)
5657 goto err_uninit;
5658
5659 ret = netdev_register_kobject(dev);
5660 if (ret)
5661 goto err_uninit;
5662 dev->reg_state = NETREG_REGISTERED;
5663
5664 __netdev_update_features(dev);
5665
5666
5667
5668
5669
5670
5671 set_bit(__LINK_STATE_PRESENT, &dev->state);
5672
5673 dev_init_scheduler(dev);
5674 dev_hold(dev);
5675 list_netdevice(dev);
5676
5677
5678 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5679 ret = notifier_to_errno(ret);
5680 if (ret) {
5681 rollback_registered(dev);
5682 dev->reg_state = NETREG_UNREGISTERED;
5683 }
5684
5685
5686
5687
5688 if (!dev->rtnl_link_ops ||
5689 dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
5690 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
5691
5692out:
5693 return ret;
5694
5695err_uninit:
5696 if (dev->netdev_ops->ndo_uninit)
5697 dev->netdev_ops->ndo_uninit(dev);
5698 goto out;
5699}
5700EXPORT_SYMBOL(register_netdevice);
5701
5702
5703
5704
5705
5706
5707
5708
5709
5710
5711
5712int init_dummy_netdev(struct net_device *dev)
5713{
5714
5715
5716
5717
5718
5719 memset(dev, 0, sizeof(struct net_device));
5720
5721
5722
5723
5724 dev->reg_state = NETREG_DUMMY;
5725
5726
5727 INIT_LIST_HEAD(&dev->napi_list);
5728
5729
5730 set_bit(__LINK_STATE_PRESENT, &dev->state);
5731 set_bit(__LINK_STATE_START, &dev->state);
5732
5733
5734
5735
5736
5737
5738 return 0;
5739}
5740EXPORT_SYMBOL_GPL(init_dummy_netdev);
5741
5742
5743
5744
5745
5746
5747
5748
5749
5750
5751
5752
5753
5754
5755
5756int register_netdev(struct net_device *dev)
5757{
5758 int err;
5759
5760 rtnl_lock();
5761 err = register_netdevice(dev);
5762 rtnl_unlock();
5763 return err;
5764}
5765EXPORT_SYMBOL(register_netdev);
5766
5767int netdev_refcnt_read(const struct net_device *dev)
5768{
5769 int i, refcnt = 0;
5770
5771 for_each_possible_cpu(i)
5772 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5773 return refcnt;
5774}
5775EXPORT_SYMBOL(netdev_refcnt_read);
5776
5777
5778
5779
5780
5781
5782
5783
5784
5785
5786
5787
5788static void netdev_wait_allrefs(struct net_device *dev)
5789{
5790 unsigned long rebroadcast_time, warning_time;
5791 int refcnt;
5792
5793 linkwatch_forget_dev(dev);
5794
5795 rebroadcast_time = warning_time = jiffies;
5796 refcnt = netdev_refcnt_read(dev);
5797
5798 while (refcnt != 0) {
5799 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5800 rtnl_lock();
5801
5802
5803 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5804
5805
5806
5807 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5808 &dev->state)) {
5809
5810
5811
5812
5813
5814
5815 linkwatch_run_queue();
5816 }
5817
5818 __rtnl_unlock();
5819
5820 rebroadcast_time = jiffies;
5821 }
5822
5823 msleep(250);
5824
5825 refcnt = netdev_refcnt_read(dev);
5826
5827 if (time_after(jiffies, warning_time + 10 * HZ)) {
5828 printk(KERN_EMERG "unregister_netdevice: "
5829 "waiting for %s to become free. Usage "
5830 "count = %d\n",
5831 dev->name, refcnt);
5832 warning_time = jiffies;
5833 }
5834 }
5835}
5836
5837
5838
5839
5840
5841
5842
5843
5844
5845
5846
5847
5848
5849
5850
5851
5852
5853
5854
5855
5856
5857
5858
5859
5860
5861void netdev_run_todo(void)
5862{
5863 struct list_head list;
5864
5865
5866 list_replace_init(&net_todo_list, &list);
5867
5868 __rtnl_unlock();
5869
5870
5871
5872
5873 if (!list_empty(&list))
5874 rcu_barrier();
5875
5876 while (!list_empty(&list)) {
5877 struct net_device *dev
5878 = list_first_entry(&list, struct net_device, todo_list);
5879 list_del(&dev->todo_list);
5880
5881 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5882 printk(KERN_ERR "network todo '%s' but state %d\n",
5883 dev->name, dev->reg_state);
5884 dump_stack();
5885 continue;
5886 }
5887
5888 dev->reg_state = NETREG_UNREGISTERED;
5889
5890 on_each_cpu(flush_backlog, dev, 1);
5891
5892 netdev_wait_allrefs(dev);
5893
5894
5895 BUG_ON(netdev_refcnt_read(dev));
5896 WARN_ON(rcu_access_pointer(dev->ip_ptr));
5897 WARN_ON(rcu_access_pointer(dev->ip6_ptr));
5898 WARN_ON(dev->dn_ptr);
5899
5900 if (dev->destructor)
5901 dev->destructor(dev);
5902
5903
5904 kobject_put(&dev->dev.kobj);
5905 }
5906}
5907
5908
5909
5910
5911static void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
5912 const struct net_device_stats *netdev_stats)
5913{
5914#if BITS_PER_LONG == 64
5915 BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
5916 memcpy(stats64, netdev_stats, sizeof(*stats64));
5917#else
5918 size_t i, n = sizeof(*stats64) / sizeof(u64);
5919 const unsigned long *src = (const unsigned long *)netdev_stats;
5920 u64 *dst = (u64 *)stats64;
5921
5922 BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
5923 sizeof(*stats64) / sizeof(u64));
5924 for (i = 0; i < n; i++)
5925 dst[i] = src[i];
5926#endif
5927}
5928
5929
5930
5931
5932
5933
5934
5935
5936
5937
5938
5939struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5940 struct rtnl_link_stats64 *storage)
5941{
5942 const struct net_device_ops *ops = dev->netdev_ops;
5943
5944 if (ops->ndo_get_stats64) {
5945 memset(storage, 0, sizeof(*storage));
5946 ops->ndo_get_stats64(dev, storage);
5947 } else if (ops->ndo_get_stats) {
5948 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5949 } else {
5950 netdev_stats_to_stats64(storage, &dev->stats);
5951 }
5952 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5953 return storage;
5954}
5955EXPORT_SYMBOL(dev_get_stats);
5956
5957struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5958{
5959 struct netdev_queue *queue = dev_ingress_queue(dev);
5960
5961#ifdef CONFIG_NET_CLS_ACT
5962 if (queue)
5963 return queue;
5964 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5965 if (!queue)
5966 return NULL;
5967 netdev_init_one_queue(dev, queue, NULL);
5968 queue->qdisc = &noop_qdisc;
5969 queue->qdisc_sleeping = &noop_qdisc;
5970 rcu_assign_pointer(dev->ingress_queue, queue);
5971#endif
5972 return queue;
5973}
5974
5975
5976
5977
5978
5979
5980
5981
5982
5983
5984
5985
5986
5987struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
5988 void (*setup)(struct net_device *),
5989 unsigned int txqs, unsigned int rxqs)
5990{
5991 struct net_device *dev;
5992 size_t alloc_size;
5993 struct net_device *p;
5994
5995 BUG_ON(strlen(name) >= sizeof(dev->name));
5996
5997 if (txqs < 1) {
5998 pr_err("alloc_netdev: Unable to allocate device "
5999 "with zero queues.\n");
6000 return NULL;
6001 }
6002
6003#ifdef CONFIG_RPS
6004 if (rxqs < 1) {
6005 pr_err("alloc_netdev: Unable to allocate device "
6006 "with zero RX queues.\n");
6007 return NULL;
6008 }
6009#endif
6010
6011 alloc_size = sizeof(struct net_device);
6012 if (sizeof_priv) {
6013
6014 alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
6015 alloc_size += sizeof_priv;
6016 }
6017
6018 alloc_size += NETDEV_ALIGN - 1;
6019
6020 p = kzalloc(alloc_size, GFP_KERNEL);
6021 if (!p) {
6022 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
6023 return NULL;
6024 }
6025
6026 dev = PTR_ALIGN(p, NETDEV_ALIGN);
6027 dev->padded = (char *)dev - (char *)p;
6028
6029 dev->pcpu_refcnt = alloc_percpu(int);
6030 if (!dev->pcpu_refcnt)
6031 goto free_p;
6032
6033 if (dev_addr_init(dev))
6034 goto free_pcpu;
6035
6036 dev_mc_init(dev);
6037 dev_uc_init(dev);
6038
6039 dev_net_set(dev, &init_net);
6040
6041 dev->gso_max_size = GSO_MAX_SIZE;
6042
6043 INIT_LIST_HEAD(&dev->napi_list);
6044 INIT_LIST_HEAD(&dev->unreg_list);
6045 INIT_LIST_HEAD(&dev->link_watch_list);
6046 dev->priv_flags = IFF_XMIT_DST_RELEASE;
6047 setup(dev);
6048
6049 dev->num_tx_queues = txqs;
6050 dev->real_num_tx_queues = txqs;
6051 if (netif_alloc_netdev_queues(dev))
6052 goto free_all;
6053
6054#ifdef CONFIG_RPS
6055 dev->num_rx_queues = rxqs;
6056 dev->real_num_rx_queues = rxqs;
6057 if (netif_alloc_rx_queues(dev))
6058 goto free_all;
6059#endif
6060
6061 strcpy(dev->name, name);
6062 dev->group = INIT_NETDEV_GROUP;
6063 return dev;
6064
6065free_all:
6066 free_netdev(dev);
6067 return NULL;
6068
6069free_pcpu:
6070 free_percpu(dev->pcpu_refcnt);
6071 kfree(dev->_tx);
6072#ifdef CONFIG_RPS
6073 kfree(dev->_rx);
6074#endif
6075
6076free_p:
6077 kfree(p);
6078 return NULL;
6079}
6080EXPORT_SYMBOL(alloc_netdev_mqs);
6081
6082
6083
6084
6085
6086
6087
6088
6089
6090void free_netdev(struct net_device *dev)
6091{
6092 struct napi_struct *p, *n;
6093
6094 release_net(dev_net(dev));
6095
6096 kfree(dev->_tx);
6097#ifdef CONFIG_RPS
6098 kfree(dev->_rx);
6099#endif
6100
6101 kfree(rcu_dereference_protected(dev->ingress_queue, 1));
6102
6103
6104 dev_addr_flush(dev);
6105
6106 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
6107 netif_napi_del(p);
6108
6109 free_percpu(dev->pcpu_refcnt);
6110 dev->pcpu_refcnt = NULL;
6111
6112
6113 if (dev->reg_state == NETREG_UNINITIALIZED) {
6114 kfree((char *)dev - dev->padded);
6115 return;
6116 }
6117
6118 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
6119 dev->reg_state = NETREG_RELEASED;
6120
6121
6122 put_device(&dev->dev);
6123}
6124EXPORT_SYMBOL(free_netdev);
6125
6126
6127
6128
6129
6130
6131
6132void synchronize_net(void)
6133{
6134 might_sleep();
6135 if (rtnl_is_locked())
6136 synchronize_rcu_expedited();
6137 else
6138 synchronize_rcu();
6139}
6140EXPORT_SYMBOL(synchronize_net);
6141
6142
6143
6144
6145
6146
6147
6148
6149
6150
6151
6152
6153
6154
6155void unregister_netdevice_queue(struct net_device *dev, struct list_head *head)
6156{
6157 ASSERT_RTNL();
6158
6159 if (head) {
6160 list_move_tail(&dev->unreg_list, head);
6161 } else {
6162 rollback_registered(dev);
6163
6164 net_set_todo(dev);
6165 }
6166}
6167EXPORT_SYMBOL(unregister_netdevice_queue);
6168
6169
6170
6171
6172
6173void unregister_netdevice_many(struct list_head *head)
6174{
6175 struct net_device *dev;
6176
6177 if (!list_empty(head)) {
6178 rollback_registered_many(head);
6179 list_for_each_entry(dev, head, unreg_list)
6180 net_set_todo(dev);
6181 }
6182}
6183EXPORT_SYMBOL(unregister_netdevice_many);
6184
6185
6186
6187
6188
6189
6190
6191
6192
6193
6194
6195
6196void unregister_netdev(struct net_device *dev)
6197{
6198 rtnl_lock();
6199 unregister_netdevice(dev);
6200 rtnl_unlock();
6201}
6202EXPORT_SYMBOL(unregister_netdev);
6203
6204
6205
6206
6207
6208
6209
6210
6211
6212
6213
6214
6215
6216
6217
6218int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
6219{
6220 int err;
6221
6222 ASSERT_RTNL();
6223
6224
6225 err = -EINVAL;
6226 if (dev->features & NETIF_F_NETNS_LOCAL)
6227 goto out;
6228
6229
6230 err = -EINVAL;
6231 if (dev->reg_state != NETREG_REGISTERED)
6232 goto out;
6233
6234
6235 err = 0;
6236 if (net_eq(dev_net(dev), net))
6237 goto out;
6238
6239
6240
6241
6242 err = -EEXIST;
6243 if (__dev_get_by_name(net, dev->name)) {
6244
6245 if (!pat)
6246 goto out;
6247 if (dev_get_valid_name(dev, pat) < 0)
6248 goto out;
6249 }
6250
6251
6252
6253
6254
6255
6256 dev_close(dev);
6257
6258
6259 err = -ENODEV;
6260 unlist_netdevice(dev);
6261
6262 synchronize_net();
6263
6264
6265 dev_shutdown(dev);
6266
6267
6268
6269
6270
6271
6272
6273
6274 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
6275 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
6276 rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
6277
6278
6279
6280
6281 dev_uc_flush(dev);
6282 dev_mc_flush(dev);
6283
6284
6285 dev_net_set(dev, net);
6286
6287
6288 if (__dev_get_by_index(net, dev->ifindex)) {
6289 int iflink = (dev->iflink == dev->ifindex);
6290 dev->ifindex = dev_new_index(net);
6291 if (iflink)
6292 dev->iflink = dev->ifindex;
6293 }
6294
6295
6296 err = device_rename(&dev->dev, dev->name);
6297 WARN_ON(err);
6298
6299
6300 list_netdevice(dev);
6301
6302
6303 call_netdevice_notifiers(NETDEV_REGISTER, dev);
6304
6305
6306
6307
6308
6309 rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U);
6310
6311 synchronize_net();
6312 err = 0;
6313out:
6314 return err;
6315}
6316EXPORT_SYMBOL_GPL(dev_change_net_namespace);
6317
6318static int dev_cpu_callback(struct notifier_block *nfb,
6319 unsigned long action,
6320 void *ocpu)
6321{
6322 struct sk_buff **list_skb;
6323 struct sk_buff *skb;
6324 unsigned int cpu, oldcpu = (unsigned long)ocpu;
6325 struct softnet_data *sd, *oldsd;
6326
6327 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
6328 return NOTIFY_OK;
6329
6330 local_irq_disable();
6331 cpu = smp_processor_id();
6332 sd = &per_cpu(softnet_data, cpu);
6333 oldsd = &per_cpu(softnet_data, oldcpu);
6334
6335
6336 list_skb = &sd->completion_queue;
6337 while (*list_skb)
6338 list_skb = &(*list_skb)->next;
6339
6340 *list_skb = oldsd->completion_queue;
6341 oldsd->completion_queue = NULL;
6342
6343
6344 if (oldsd->output_queue) {
6345 *sd->output_queue_tailp = oldsd->output_queue;
6346 sd->output_queue_tailp = oldsd->output_queue_tailp;
6347 oldsd->output_queue = NULL;
6348 oldsd->output_queue_tailp = &oldsd->output_queue;
6349 }
6350
6351 if (!list_empty(&oldsd->poll_list)) {
6352 list_splice_init(&oldsd->poll_list, &sd->poll_list);
6353 raise_softirq_irqoff(NET_RX_SOFTIRQ);
6354 }
6355
6356 raise_softirq_irqoff(NET_TX_SOFTIRQ);
6357 local_irq_enable();
6358
6359
6360 while ((skb = __skb_dequeue(&oldsd->process_queue))) {
6361 netif_rx(skb);
6362 input_queue_head_incr(oldsd);
6363 }
6364 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
6365 netif_rx(skb);
6366 input_queue_head_incr(oldsd);
6367 }
6368
6369 return NOTIFY_OK;
6370}
6371
6372
6373
6374
6375
6376
6377
6378
6379
6380
6381
6382
6383u32 netdev_increment_features(u32 all, u32 one, u32 mask)
6384{
6385 if (mask & NETIF_F_GEN_CSUM)
6386 mask |= NETIF_F_ALL_CSUM;
6387 mask |= NETIF_F_VLAN_CHALLENGED;
6388
6389 all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
6390 all &= one | ~NETIF_F_ALL_FOR_ALL;
6391
6392
6393 if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
6394 all &= ~NETIF_F_NO_CSUM;
6395
6396
6397 if (all & NETIF_F_GEN_CSUM)
6398 all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
6399
6400 return all;
6401}
6402EXPORT_SYMBOL(netdev_increment_features);
6403
6404static struct hlist_head *netdev_create_hash(void)
6405{
6406 int i;
6407 struct hlist_head *hash;
6408
6409 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
6410 if (hash != NULL)
6411 for (i = 0; i < NETDEV_HASHENTRIES; i++)
6412 INIT_HLIST_HEAD(&hash[i]);
6413
6414 return hash;
6415}
6416
6417
6418static int __net_init netdev_init(struct net *net)
6419{
6420 INIT_LIST_HEAD(&net->dev_base_head);
6421
6422 net->dev_name_head = netdev_create_hash();
6423 if (net->dev_name_head == NULL)
6424 goto err_name;
6425
6426 net->dev_index_head = netdev_create_hash();
6427 if (net->dev_index_head == NULL)
6428 goto err_idx;
6429
6430 return 0;
6431
6432err_idx:
6433 kfree(net->dev_name_head);
6434err_name:
6435 return -ENOMEM;
6436}
6437
6438
6439
6440
6441
6442
6443
6444const char *netdev_drivername(const struct net_device *dev)
6445{
6446 const struct device_driver *driver;
6447 const struct device *parent;
6448 const char *empty = "";
6449
6450 parent = dev->dev.parent;
6451 if (!parent)
6452 return empty;
6453
6454 driver = parent->driver;
6455 if (driver && driver->name)
6456 return driver->name;
6457 return empty;
6458}
6459
6460int __netdev_printk(const char *level, const struct net_device *dev,
6461 struct va_format *vaf)
6462{
6463 int r;
6464
6465 if (dev && dev->dev.parent)
6466 r = dev_printk(level, dev->dev.parent, "%s: %pV",
6467 netdev_name(dev), vaf);
6468 else if (dev)
6469 r = printk("%s%s: %pV", level, netdev_name(dev), vaf);
6470 else
6471 r = printk("%s(NULL net_device): %pV", level, vaf);
6472
6473 return r;
6474}
6475EXPORT_SYMBOL(__netdev_printk);
6476
6477int netdev_printk(const char *level, const struct net_device *dev,
6478 const char *format, ...)
6479{
6480 struct va_format vaf;
6481 va_list args;
6482 int r;
6483
6484 va_start(args, format);
6485
6486 vaf.fmt = format;
6487 vaf.va = &args;
6488
6489 r = __netdev_printk(level, dev, &vaf);
6490 va_end(args);
6491
6492 return r;
6493}
6494EXPORT_SYMBOL(netdev_printk);
6495
6496#define define_netdev_printk_level(func, level) \
6497int func(const struct net_device *dev, const char *fmt, ...) \
6498{ \
6499 int r; \
6500 struct va_format vaf; \
6501 va_list args; \
6502 \
6503 va_start(args, fmt); \
6504 \
6505 vaf.fmt = fmt; \
6506 vaf.va = &args; \
6507 \
6508 r = __netdev_printk(level, dev, &vaf); \
6509 va_end(args); \
6510 \
6511 return r; \
6512} \
6513EXPORT_SYMBOL(func);
6514
6515define_netdev_printk_level(netdev_emerg, KERN_EMERG);
6516define_netdev_printk_level(netdev_alert, KERN_ALERT);
6517define_netdev_printk_level(netdev_crit, KERN_CRIT);
6518define_netdev_printk_level(netdev_err, KERN_ERR);
6519define_netdev_printk_level(netdev_warn, KERN_WARNING);
6520define_netdev_printk_level(netdev_notice, KERN_NOTICE);
6521define_netdev_printk_level(netdev_info, KERN_INFO);
6522
6523static void __net_exit netdev_exit(struct net *net)
6524{
6525 kfree(net->dev_name_head);
6526 kfree(net->dev_index_head);
6527}
6528
6529static struct pernet_operations __net_initdata netdev_net_ops = {
6530 .init = netdev_init,
6531 .exit = netdev_exit,
6532};
6533
6534static void __net_exit default_device_exit(struct net *net)
6535{
6536 struct net_device *dev, *aux;
6537
6538
6539
6540
6541 rtnl_lock();
6542 for_each_netdev_safe(net, dev, aux) {
6543 int err;
6544 char fb_name[IFNAMSIZ];
6545
6546
6547 if (dev->features & NETIF_F_NETNS_LOCAL)
6548 continue;
6549
6550
6551 if (dev->rtnl_link_ops)
6552 continue;
6553
6554
6555 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6556 err = dev_change_net_namespace(dev, &init_net, fb_name);
6557 if (err) {
6558 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
6559 __func__, dev->name, err);
6560 BUG();
6561 }
6562 }
6563 rtnl_unlock();
6564}
6565
6566static void __net_exit default_device_exit_batch(struct list_head *net_list)
6567{
6568
6569
6570
6571
6572
6573 struct net_device *dev;
6574 struct net *net;
6575 LIST_HEAD(dev_kill_list);
6576
6577 rtnl_lock();
6578 list_for_each_entry(net, net_list, exit_list) {
6579 for_each_netdev_reverse(net, dev) {
6580 if (dev->rtnl_link_ops)
6581 dev->rtnl_link_ops->dellink(dev, &dev_kill_list);
6582 else
6583 unregister_netdevice_queue(dev, &dev_kill_list);
6584 }
6585 }
6586 unregister_netdevice_many(&dev_kill_list);
6587 list_del(&dev_kill_list);
6588 rtnl_unlock();
6589}
6590
6591static struct pernet_operations __net_initdata default_device_ops = {
6592 .exit = default_device_exit,
6593 .exit_batch = default_device_exit_batch,
6594};
6595
6596
6597
6598
6599
6600
6601
6602
6603
6604
6605
6606
6607static int __init net_dev_init(void)
6608{
6609 int i, rc = -ENOMEM;
6610
6611 BUG_ON(!dev_boot_phase);
6612
6613 if (dev_proc_init())
6614 goto out;
6615
6616 if (netdev_kobject_init())
6617 goto out;
6618
6619 INIT_LIST_HEAD(&ptype_all);
6620 for (i = 0; i < PTYPE_HASH_SIZE; i++)
6621 INIT_LIST_HEAD(&ptype_base[i]);
6622
6623 if (register_pernet_subsys(&netdev_net_ops))
6624 goto out;
6625
6626
6627
6628
6629
6630 for_each_possible_cpu(i) {
6631 struct softnet_data *sd = &per_cpu(softnet_data, i);
6632
6633 memset(sd, 0, sizeof(*sd));
6634 skb_queue_head_init(&sd->input_pkt_queue);
6635 skb_queue_head_init(&sd->process_queue);
6636 sd->completion_queue = NULL;
6637 INIT_LIST_HEAD(&sd->poll_list);
6638 sd->output_queue = NULL;
6639 sd->output_queue_tailp = &sd->output_queue;
6640#ifdef CONFIG_RPS
6641 sd->csd.func = rps_trigger_softirq;
6642 sd->csd.info = sd;
6643 sd->csd.flags = 0;
6644 sd->cpu = i;
6645#endif
6646
6647 sd->backlog.poll = process_backlog;
6648 sd->backlog.weight = weight_p;
6649 sd->backlog.gro_list = NULL;
6650 sd->backlog.gro_count = 0;
6651 }
6652
6653 dev_boot_phase = 0;
6654
6655
6656
6657
6658
6659
6660
6661
6662
6663
6664 if (register_pernet_device(&loopback_net_ops))
6665 goto out;
6666
6667 if (register_pernet_device(&default_device_ops))
6668 goto out;
6669
6670 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6671 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6672
6673 hotcpu_notifier(dev_cpu_callback, 0);
6674 dst_init();
6675 dev_mcast_init();
6676 rc = 0;
6677out:
6678 return rc;
6679}
6680
6681subsys_initcall(net_dev_init);
6682
6683static int __init initialize_hashrnd(void)
6684{
6685 get_random_bytes(&hashrnd, sizeof(hashrnd));
6686 return 0;
6687}
6688
6689late_initcall_sync(initialize_hashrnd);
6690
6691