LXR linux/net/core/dev.c

   1/*
   2 *      NET3    Protocol independent device support routines.
   3 *
   4 *              This program is free software; you can redistribute it and/or
   5 *              modify it under the terms of the GNU General Public License
   6 *              as published by the Free Software Foundation; either version
   7 *              2 of the License, or (at your option) any later version.
   8 *
   9 *      Derived from the non IP parts of dev.c 1.0.19
  10 *              Authors:        Ross Biro
  11 *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12 *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13 *
  14 *      Additional Authors:
  15 *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16 *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17 *              David Hinds <dahinds@users.sourceforge.net>
  18 *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19 *              Adam Sulmicki <adam@cfar.umd.edu>
  20 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21 *
  22 *      Changes:
  23 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24 *                                      to 2 if register_netdev gets called
  25 *                                      before net_dev_init & also removed a
  26 *                                      few lines of code in the process.
  27 *              Alan Cox        :       device private ioctl copies fields back.
  28 *              Alan Cox        :       Transmit queue code does relevant
  29 *                                      stunts to keep the queue safe.
  30 *              Alan Cox        :       Fixed double lock.
  31 *              Alan Cox        :       Fixed promisc NULL pointer trap
  32 *              ????????        :       Support the full private ioctl range
  33 *              Alan Cox        :       Moved ioctl permission check into
  34 *                                      drivers
  35 *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36 *              Alan Cox        :       100 backlog just doesn't cut it when
  37 *                                      you start doing multicast video 8)
  38 *              Alan Cox        :       Rewrote net_bh and list manager.
  39 *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40 *              Alan Cox        :       Took out transmit every packet pass
  41 *                                      Saved a few bytes in the ioctl handler
  42 *              Alan Cox        :       Network driver sets packet type before
  43 *                                      calling netif_rx. Saves a function
  44 *                                      call a packet.
  45 *              Alan Cox        :       Hashed net_bh()
  46 *              Richard Kooijman:       Timestamp fixes.
  47 *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48 *              Alan Cox        :       Device lock protection.
  49 *              Alan Cox        :       Fixed nasty side effect of device close
  50 *                                      changes.
  51 *              Rudi Cilibrasi  :       Pass the right thing to
  52 *                                      set_mac_address()
  53 *              Dave Miller     :       32bit quantity for the device lock to
  54 *                                      make it work out on a Sparc.
  55 *              Bjorn Ekwall    :       Added KERNELD hack.
  56 *              Alan Cox        :       Cleaned up the backlog initialise.
  57 *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58 *                                      1 device.
  59 *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60 *                                      is no device open function.
  61 *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62 *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63 *              Cyrus Durgin    :       Cleaned for KMOD
  64 *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65 *                                      A network device unload needs to purge
  66 *                                      the backlog queue.
  67 *      Paul Rusty Russell      :       SIOCSIFNAME
  68 *              Pekka Riikonen  :       Netdev boot-time settings code
  69 *              Andrew Morton   :       Make unregister_netdevice wait
  70 *                                      indefinitely on dev->refcnt
  71 *              J Hadi Salim    :       - Backlog queue sampling
  72 *                                      - netif_rx() feedback
  73 */
  74
  75#include <asm/uaccess.h>
  76#include <asm/system.h>
  77#include <linux/bitops.h>
  78#include <linux/capability.h>
  79#include <linux/cpu.h>
  80#include <linux/types.h>
  81#include <linux/kernel.h>
  82#include <linux/sched.h>
  83#include <linux/mutex.h>
  84#include <linux/string.h>
  85#include <linux/mm.h>
  86#include <linux/socket.h>
  87#include <linux/sockios.h>
  88#include <linux/errno.h>
  89#include <linux/interrupt.h>
  90#include <linux/if_ether.h>
  91#include <linux/netdevice.h>
  92#include <linux/etherdevice.h>
  93#include <linux/notifier.h>
  94#include <linux/skbuff.h>
  95#include <net/net_namespace.h>
  96#include <net/sock.h>
  97#include <linux/rtnetlink.h>
  98#include <linux/proc_fs.h>
  99#include <linux/seq_file.h>
 100#include <linux/stat.h>
 101#include <linux/if_bridge.h>
 102#include <linux/if_macvlan.h>
 103#include <net/dst.h>
 104#include <net/pkt_sched.h>
 105#include <net/checksum.h>
 106#include <linux/highmem.h>
 107#include <linux/init.h>
 108#include <linux/kmod.h>
 109#include <linux/module.h>
 110#include <linux/kallsyms.h>
 111#include <linux/netpoll.h>
 112#include <linux/rcupdate.h>
 113#include <linux/delay.h>
 114#include <net/wext.h>
 115#include <net/iw_handler.h>
 116#include <asm/current.h>
 117#include <linux/audit.h>
 118#include <linux/dmaengine.h>
 119#include <linux/err.h>
 120#include <linux/ctype.h>
 121#include <linux/if_arp.h>
 122
 123#include "net-sysfs.h"
 124
 125/*
 126 *      The list of packet types we will receive (as opposed to discard)
 127 *      and the routines to invoke.
 128 *
 129 *      Why 16. Because with 16 the only overlap we get on a hash of the
 130 *      low nibble of the protocol value is RARP/SNAP/X.25.
 131 *
 132 *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 133 *             sure which should go first, but I bet it won't make much
 134 *             difference if we are running VLANs.  The good news is that
 135 *             this protocol won't be in the list unless compiled in, so
 136 *             the average user (w/out VLANs) will not be adversely affected.
 137 *             --BLG
 138 *
 139 *              0800    IP
 140 *              8100    802.1Q VLAN
 141 *              0001    802.3
 142 *              0002    AX.25
 143 *              0004    802.2
 144 *              8035    RARP
 145 *              0005    SNAP
 146 *              0805    X.25
 147 *              0806    ARP
 148 *              8137    IPX
 149 *              0009    Localtalk
 150 *              86DD    IPv6
 151 */
 152
 153static DEFINE_SPINLOCK(ptype_lock);
 154static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 155static struct list_head ptype_all __read_mostly;        /* Taps */
 156
 157#ifdef CONFIG_NET_DMA
 158struct net_dma {
 159        struct dma_client client;
 160        spinlock_t lock;
 161        cpumask_t channel_mask;
 162        struct dma_chan *channels[NR_CPUS];
 163};
 164
 165static enum dma_state_client
 166netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 167        enum dma_state state);
 168
 169static struct net_dma net_dma = {
 170        .client = {
 171                .event_callback = netdev_dma_event,
 172        },
 173};
 174#endif
 175
 176/*
 177 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 178 * semaphore.
 179 *
 180 * Pure readers hold dev_base_lock for reading.
 181 *
 182 * Writers must hold the rtnl semaphore while they loop through the
 183 * dev_base_head list, and hold dev_base_lock for writing when they do the
 184 * actual updates.  This allows pure readers to access the list even
 185 * while a writer is preparing to update it.
 186 *
 187 * To put it another way, dev_base_lock is held for writing only to
 188 * protect against pure readers; the rtnl semaphore provides the
 189 * protection against other writers.
 190 *
 191 * See, for example usages, register_netdevice() and
 192 * unregister_netdevice(), which must be called with the rtnl
 193 * semaphore held.
 194 */
 195DEFINE_RWLOCK(dev_base_lock);
 196
 197EXPORT_SYMBOL(dev_base_lock);
 198
 199#define NETDEV_HASHBITS 8
 200#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
 201
 202static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 203{
 204        unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 205        return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
 206}
 207
 208static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 209{
 210        return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
 211}
 212
 213/* Device list insertion */
 214static int list_netdevice(struct net_device *dev)
 215{
 216        struct net *net = dev->nd_net;
 217
 218        ASSERT_RTNL();
 219
 220        write_lock_bh(&dev_base_lock);
 221        list_add_tail(&dev->dev_list, &net->dev_base_head);
 222        hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 223        hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
 224        write_unlock_bh(&dev_base_lock);
 225        return 0;
 226}
 227
 228/* Device list removal */
 229static void unlist_netdevice(struct net_device *dev)
 230{
 231        ASSERT_RTNL();
 232
 233        /* Unlink dev from the device chain */
 234        write_lock_bh(&dev_base_lock);
 235        list_del(&dev->dev_list);
 236        hlist_del(&dev->name_hlist);
 237        hlist_del(&dev->index_hlist);
 238        write_unlock_bh(&dev_base_lock);
 239}
 240
 241/*
 242 *      Our notifier list
 243 */
 244
 245static RAW_NOTIFIER_HEAD(netdev_chain);
 246
 247/*
 248 *      Device drivers call our routines to queue packets here. We empty the
 249 *      queue in the local softnet handler.
 250 */
 251
 252DEFINE_PER_CPU(struct softnet_data, softnet_data);
 253
 254#ifdef CONFIG_DEBUG_LOCK_ALLOC
 255/*
 256 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 257 * according to dev->type
 258 */
 259static const unsigned short netdev_lock_type[] =
 260        {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 261         ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 262         ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 263         ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 264         ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 265         ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 266         ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 267         ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 268         ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 269         ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 270         ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 271         ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 272         ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 273         ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 274         ARPHRD_NONE};
 275
 276static const char *netdev_lock_name[] =
 277        {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 278         "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 279         "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 280         "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 281         "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 282         "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 283         "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 284         "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 285         "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 286         "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 287         "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 288         "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 289         "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 290         "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 291         "_xmit_NONE"};
 292
 293static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 294
 295static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 296{
 297        int i;
 298
 299        for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 300                if (netdev_lock_type[i] == dev_type)
 301                        return i;
 302        /* the last key is used by default */
 303        return ARRAY_SIZE(netdev_lock_type) - 1;
 304}
 305
 306static inline void netdev_set_lockdep_class(spinlock_t *lock,
 307                                            unsigned short dev_type)
 308{
 309        int i;
 310
 311        i = netdev_lock_pos(dev_type);
 312        lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 313                                   netdev_lock_name[i]);
 314}
 315#else
 316static inline void netdev_set_lockdep_class(spinlock_t *lock,
 317                                            unsigned short dev_type)
 318{
 319}
 320#endif
 321
 322/*******************************************************************************
 323
 324                Protocol management and registration routines
 325
 326*******************************************************************************/
 327
 328/*
 329 *      Add a protocol ID to the list. Now that the input handler is
 330 *      smarter we can dispense with all the messy stuff that used to be
 331 *      here.
 332 *
 333 *      BEWARE!!! Protocol handlers, mangling input packets,
 334 *      MUST BE last in hash buckets and checking protocol handlers
 335 *      MUST start from promiscuous ptype_all chain in net_bh.
 336 *      It is true now, do not change it.
 337 *      Explanation follows: if protocol handler, mangling packet, will
 338 *      be the first on list, it is not able to sense, that packet
 339 *      is cloned and should be copied-on-write, so that it will
 340 *      change it and subsequent readers will get broken packet.
 341 *                                                      --ANK (980803)
 342 */
 343
 344/**
 345 *      dev_add_pack - add packet handler
 346 *      @pt: packet type declaration
 347 *
 348 *      Add a protocol handler to the networking stack. The passed &packet_type
 349 *      is linked into kernel lists and may not be freed until it has been
 350 *      removed from the kernel lists.
 351 *
 352 *      This call does not sleep therefore it can not
 353 *      guarantee all CPU's that are in middle of receiving packets
 354 *      will see the new packet type (until the next received packet).
 355 */
 356
 357void dev_add_pack(struct packet_type *pt)
 358{
 359        int hash;
 360
 361        spin_lock_bh(&ptype_lock);
 362        if (pt->type == htons(ETH_P_ALL))
 363                list_add_rcu(&pt->list, &ptype_all);
 364        else {
 365                hash = ntohs(pt->type) & 15;
 366                list_add_rcu(&pt->list, &ptype_base[hash]);
 367        }
 368        spin_unlock_bh(&ptype_lock);
 369}
 370
 371/**
 372 *      __dev_remove_pack        - remove packet handler
 373 *      @pt: packet type declaration
 374 *
 375 *      Remove a protocol handler that was previously added to the kernel
 376 *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 377 *      from the kernel lists and can be freed or reused once this function
 378 *      returns.
 379 *
 380 *      The packet type might still be in use by receivers
 381 *      and must not be freed until after all the CPU's have gone
 382 *      through a quiescent state.
 383 */
 384void __dev_remove_pack(struct packet_type *pt)
 385{
 386        struct list_head *head;
 387        struct packet_type *pt1;
 388
 389        spin_lock_bh(&ptype_lock);
 390
 391        if (pt->type == htons(ETH_P_ALL))
 392                head = &ptype_all;
 393        else
 394                head = &ptype_base[ntohs(pt->type) & 15];
 395
 396        list_for_each_entry(pt1, head, list) {
 397                if (pt == pt1) {
 398                        list_del_rcu(&pt->list);
 399                        goto out;
 400                }
 401        }
 402
 403        printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 404out:
 405        spin_unlock_bh(&ptype_lock);
 406}
 407/**
 408 *      dev_remove_pack  - remove packet handler
 409 *      @pt: packet type declaration
 410 *
 411 *      Remove a protocol handler that was previously added to the kernel
 412 *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 413 *      from the kernel lists and can be freed or reused once this function
 414 *      returns.
 415 *
 416 *      This call sleeps to guarantee that no CPU is looking at the packet
 417 *      type after return.
 418 */
 419void dev_remove_pack(struct packet_type *pt)
 420{
 421        __dev_remove_pack(pt);
 422
 423        synchronize_net();
 424}
 425
 426/******************************************************************************
 427
 428                      Device Boot-time Settings Routines
 429
 430*******************************************************************************/
 431
 432/* Boot time configuration table */
 433static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 434
 435/**
 436 *      netdev_boot_setup_add   - add new setup entry
 437 *      @name: name of the device
 438 *      @map: configured settings for the device
 439 *
 440 *      Adds new setup entry to the dev_boot_setup list.  The function
 441 *      returns 0 on error and 1 on success.  This is a generic routine to
 442 *      all netdevices.
 443 */
 444static int netdev_boot_setup_add(char *name, struct ifmap *map)
 445{
 446        struct netdev_boot_setup *s;
 447        int i;
 448
 449        s = dev_boot_setup;
 450        for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 451                if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 452                        memset(s[i].name, 0, sizeof(s[i].name));
 453                        strcpy(s[i].name, name);
 454                        memcpy(&s[i].map, map, sizeof(s[i].map));
 455                        break;
 456                }
 457        }
 458
 459        return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 460}
 461
 462/**
 463 *      netdev_boot_setup_check - check boot time settings
 464 *      @dev: the netdevice
 465 *
 466 *      Check boot time settings for the device.
 467 *      The found settings are set for the device to be used
 468 *      later in the device probing.
 469 *      Returns 0 if no settings found, 1 if they are.
 470 */
 471int netdev_boot_setup_check(struct net_device *dev)
 472{
 473        struct netdev_boot_setup *s = dev_boot_setup;
 474        int i;
 475
 476        for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 477                if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 478                    !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 479                        dev->irq        = s[i].map.irq;
 480                        dev->base_addr  = s[i].map.base_addr;
 481                        dev->mem_start  = s[i].map.mem_start;
 482                        dev->mem_end    = s[i].map.mem_end;
 483                        return 1;
 484                }
 485        }
 486        return 0;
 487}
 488
 489
 490/**
 491 *      netdev_boot_base        - get address from boot time settings
 492 *      @prefix: prefix for network device
 493 *      @unit: id for network device
 494 *
 495 *      Check boot time settings for the base address of device.
 496 *      The found settings are set for the device to be used
 497 *      later in the device probing.
 498 *      Returns 0 if no settings found.
 499 */
 500unsigned long netdev_boot_base(const char *prefix, int unit)
 501{
 502        const struct netdev_boot_setup *s = dev_boot_setup;
 503        char name[IFNAMSIZ];
 504        int i;
 505
 506        sprintf(name, "%s%d", prefix, unit);
 507
 508        /*
 509         * If device already registered then return base of 1
 510         * to indicate not to probe for this interface
 511         */
 512        if (__dev_get_by_name(&init_net, name))
 513                return 1;
 514
 515        for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 516                if (!strcmp(name, s[i].name))
 517                        return s[i].map.base_addr;
 518        return 0;
 519}
 520
 521/*
 522 * Saves at boot time configured settings for any netdevice.
 523 */
 524int __init netdev_boot_setup(char *str)
 525{
 526        int ints[5];
 527        struct ifmap map;
 528
 529        str = get_options(str, ARRAY_SIZE(ints), ints);
 530        if (!str || !*str)
 531                return 0;
 532
 533        /* Save settings */
 534        memset(&map, 0, sizeof(map));
 535        if (ints[0] > 0)
 536                map.irq = ints[1];
 537        if (ints[0] > 1)
 538                map.base_addr = ints[2];
 539        if (ints[0] > 2)
 540                map.mem_start = ints[3];
 541        if (ints[0] > 3)
 542                map.mem_end = ints[4];
 543
 544        /* Add new entry to the list */
 545        return netdev_boot_setup_add(str, &map);
 546}
 547
 548__setup("netdev=", netdev_boot_setup);
 549
 550/*******************************************************************************
 551
 552                            Device Interface Subroutines
 553
 554*******************************************************************************/
 555
 556/**
 557 *      __dev_get_by_name       - find a device by its name
 558 *      @net: the applicable net namespace
 559 *      @name: name to find
 560 *
 561 *      Find an interface by name. Must be called under RTNL semaphore
 562 *      or @dev_base_lock. If the name is found a pointer to the device
 563 *      is returned. If the name is not found then %NULL is returned. The
 564 *      reference counters are not incremented so the caller must be
 565 *      careful with locks.
 566 */
 567
 568struct net_device *__dev_get_by_name(struct net *net, const char *name)
 569{
 570        struct hlist_node *p;
 571
 572        hlist_for_each(p, dev_name_hash(net, name)) {
 573                struct net_device *dev
 574                        = hlist_entry(p, struct net_device, name_hlist);
 575                if (!strncmp(dev->name, name, IFNAMSIZ))
 576                        return dev;
 577        }
 578        return NULL;
 579}
 580
 581/**
 582 *      dev_get_by_name         - find a device by its name
 583 *      @net: the applicable net namespace
 584 *      @name: name to find
 585 *
 586 *      Find an interface by name. This can be called from any
 587 *      context and does its own locking. The returned handle has
 588 *      the usage count incremented and the caller must use dev_put() to
 589 *      release it when it is no longer needed. %NULL is returned if no
 590 *      matching device is found.
 591 */
 592
 593struct net_device *dev_get_by_name(struct net *net, const char *name)
 594{
 595        struct net_device *dev;
 596
 597        read_lock(&dev_base_lock);
 598        dev = __dev_get_by_name(net, name);
 599        if (dev)
 600                dev_hold(dev);
 601        read_unlock(&dev_base_lock);
 602        return dev;
 603}
 604
 605/**
 606 *      __dev_get_by_index - find a device by its ifindex
 607 *      @net: the applicable net namespace
 608 *      @ifindex: index of device
 609 *
 610 *      Search for an interface by index. Returns %NULL if the device
 611 *      is not found or a pointer to the device. The device has not
 612 *      had its reference counter increased so the caller must be careful
 613 *      about locking. The caller must hold either the RTNL semaphore
 614 *      or @dev_base_lock.
 615 */
 616
 617struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 618{
 619        struct hlist_node *p;
 620
 621        hlist_for_each(p, dev_index_hash(net, ifindex)) {
 622                struct net_device *dev
 623                        = hlist_entry(p, struct net_device, index_hlist);
 624                if (dev->ifindex == ifindex)
 625                        return dev;
 626        }
 627        return NULL;
 628}
 629
 630
 631/**
 632 *      dev_get_by_index - find a device by its ifindex
 633 *      @net: the applicable net namespace
 634 *      @ifindex: index of device
 635 *
 636 *      Search for an interface by index. Returns NULL if the device
 637 *      is not found or a pointer to the device. The device returned has
 638 *      had a reference added and the pointer is safe until the user calls
 639 *      dev_put to indicate they have finished with it.
 640 */
 641
 642struct net_device *dev_get_by_index(struct net *net, int ifindex)
 643{
 644        struct net_device *dev;
 645
 646        read_lock(&dev_base_lock);
 647        dev = __dev_get_by_index(net, ifindex);
 648        if (dev)
 649                dev_hold(dev);
 650        read_unlock(&dev_base_lock);
 651        return dev;
 652}
 653
 654/**
 655 *      dev_getbyhwaddr - find a device by its hardware address
 656 *      @net: the applicable net namespace
 657 *      @type: media type of device
 658 *      @ha: hardware address
 659 *
 660 *      Search for an interface by MAC address. Returns NULL if the device
 661 *      is not found or a pointer to the device. The caller must hold the
 662 *      rtnl semaphore. The returned device has not had its ref count increased
 663 *      and the caller must therefore be careful about locking
 664 *
 665 *      BUGS:
 666 *      If the API was consistent this would be __dev_get_by_hwaddr
 667 */
 668
 669struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
 670{
 671        struct net_device *dev;
 672
 673        ASSERT_RTNL();
 674
 675        for_each_netdev(&init_net, dev)
 676                if (dev->type == type &&
 677                    !memcmp(dev->dev_addr, ha, dev->addr_len))
 678                        return dev;
 679
 680        return NULL;
 681}
 682
 683EXPORT_SYMBOL(dev_getbyhwaddr);
 684
 685struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 686{
 687        struct net_device *dev;
 688
 689        ASSERT_RTNL();
 690        for_each_netdev(net, dev)
 691                if (dev->type == type)
 692                        return dev;
 693
 694        return NULL;
 695}
 696
 697EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 698
 699struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 700{
 701        struct net_device *dev;
 702
 703        rtnl_lock();
 704        dev = __dev_getfirstbyhwtype(net, type);
 705        if (dev)
 706                dev_hold(dev);
 707        rtnl_unlock();
 708        return dev;
 709}
 710
 711EXPORT_SYMBOL(dev_getfirstbyhwtype);
 712
 713/**
 714 *      dev_get_by_flags - find any device with given flags
 715 *      @net: the applicable net namespace
 716 *      @if_flags: IFF_* values
 717 *      @mask: bitmask of bits in if_flags to check
 718 *
 719 *      Search for any interface with the given flags. Returns NULL if a device
 720 *      is not found or a pointer to the device. The device returned has
 721 *      had a reference added and the pointer is safe until the user calls
 722 *      dev_put to indicate they have finished with it.
 723 */
 724
 725struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
 726{
 727        struct net_device *dev, *ret;
 728
 729        ret = NULL;
 730        read_lock(&dev_base_lock);
 731        for_each_netdev(net, dev) {
 732                if (((dev->flags ^ if_flags) & mask) == 0) {
 733                        dev_hold(dev);
 734                        ret = dev;
 735                        break;
 736                }
 737        }
 738        read_unlock(&dev_base_lock);
 739        return ret;
 740}
 741
 742/**
 743 *      dev_valid_name - check if name is okay for network device
 744 *      @name: name string
 745 *
 746 *      Network device names need to be valid file names to
 747 *      to allow sysfs to work.  We also disallow any kind of
 748 *      whitespace.
 749 */
 750int dev_valid_name(const char *name)
 751{
 752        if (*name == '\0')
 753                return 0;
 754        if (strlen(name) >= IFNAMSIZ)
 755                return 0;
 756        if (!strcmp(name, ".") || !strcmp(name, ".."))
 757                return 0;
 758
 759        while (*name) {
 760                if (*name == '/' || isspace(*name))
 761                        return 0;
 762                name++;
 763        }
 764        return 1;
 765}
 766
 767/**
 768 *      __dev_alloc_name - allocate a name for a device
 769 *      @net: network namespace to allocate the device name in
 770 *      @name: name format string
 771 *      @buf:  scratch buffer and result name string
 772 *
 773 *      Passed a format string - eg "lt%d" it will try and find a suitable
 774 *      id. It scans list of devices to build up a free map, then chooses
 775 *      the first empty slot. The caller must hold the dev_base or rtnl lock
 776 *      while allocating the name and adding the device in order to avoid
 777 *      duplicates.
 778 *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 779 *      Returns the number of the unit assigned or a negative errno code.
 780 */
 781
 782static int __dev_alloc_name(struct net *net, const char *name, char *buf)
 783{
 784        int i = 0;
 785        const char *p;
 786        const int max_netdevices = 8*PAGE_SIZE;
 787        unsigned long *inuse;
 788        struct net_device *d;
 789
 790        p = strnchr(name, IFNAMSIZ-1, '%');
 791        if (p) {
 792                /*
 793                 * Verify the string as this thing may have come from
 794                 * the user.  There must be either one "%d" and no other "%"
 795                 * characters.
 796                 */
 797                if (p[1] != 'd' || strchr(p + 2, '%'))
 798                        return -EINVAL;
 799
 800                /* Use one page as a bit array of possible slots */
 801                inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
 802                if (!inuse)
 803                        return -ENOMEM;
 804
 805                for_each_netdev(net, d) {
 806                        if (!sscanf(d->name, name, &i))
 807                                continue;
 808                        if (i < 0 || i >= max_netdevices)
 809                                continue;
 810
 811                        /*  avoid cases where sscanf is not exact inverse of printf */
 812                        snprintf(buf, IFNAMSIZ, name, i);
 813                        if (!strncmp(buf, d->name, IFNAMSIZ))
 814                                set_bit(i, inuse);
 815                }
 816
 817                i = find_first_zero_bit(inuse, max_netdevices);
 818                free_page((unsigned long) inuse);
 819        }
 820
 821        snprintf(buf, IFNAMSIZ, name, i);
 822        if (!__dev_get_by_name(net, buf))
 823                return i;
 824
 825        /* It is possible to run out of possible slots
 826         * when the name is long and there isn't enough space left
 827         * for the digits, or if all bits are used.
 828         */
 829        return -ENFILE;
 830}
 831
 832/**
 833 *      dev_alloc_name - allocate a name for a device
 834 *      @dev: device
 835 *      @name: name format string
 836 *
 837 *      Passed a format string - eg "lt%d" it will try and find a suitable
 838 *      id. It scans list of devices to build up a free map, then chooses
 839 *      the first empty slot. The caller must hold the dev_base or rtnl lock
 840 *      while allocating the name and adding the device in order to avoid
 841 *      duplicates.
 842 *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 843 *      Returns the number of the unit assigned or a negative errno code.
 844 */
 845
 846int dev_alloc_name(struct net_device *dev, const char *name)
 847{
 848        char buf[IFNAMSIZ];
 849        struct net *net;
 850        int ret;
 851
 852        BUG_ON(!dev->nd_net);
 853        net = dev->nd_net;
 854        ret = __dev_alloc_name(net, name, buf);
 855        if (ret >= 0)
 856                strlcpy(dev->name, buf, IFNAMSIZ);
 857        return ret;
 858}
 859
 860
 861/**
 862 *      dev_change_name - change name of a device
 863 *      @dev: device
 864 *      @newname: name (or format string) must be at least IFNAMSIZ
 865 *
 866 *      Change name of a device, can pass format strings "eth%d".
 867 *      for wildcarding.
 868 */
 869int dev_change_name(struct net_device *dev, char *newname)
 870{
 871        char oldname[IFNAMSIZ];
 872        int err = 0;
 873        int ret;
 874        struct net *net;
 875
 876        ASSERT_RTNL();
 877        BUG_ON(!dev->nd_net);
 878
 879        net = dev->nd_net;
 880        if (dev->flags & IFF_UP)
 881                return -EBUSY;
 882
 883        if (!dev_valid_name(newname))
 884                return -EINVAL;
 885
 886        if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
 887                return 0;
 888
 889        memcpy(oldname, dev->name, IFNAMSIZ);
 890
 891        if (strchr(newname, '%')) {
 892                err = dev_alloc_name(dev, newname);
 893                if (err < 0)
 894                        return err;
 895                strcpy(newname, dev->name);
 896        }
 897        else if (__dev_get_by_name(net, newname))
 898                return -EEXIST;
 899        else
 900                strlcpy(dev->name, newname, IFNAMSIZ);
 901
 902rollback:
 903        device_rename(&dev->dev, dev->name);
 904
 905        write_lock_bh(&dev_base_lock);
 906        hlist_del(&dev->name_hlist);
 907        hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
 908        write_unlock_bh(&dev_base_lock);
 909
 910        ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
 911        ret = notifier_to_errno(ret);
 912
 913        if (ret) {
 914                if (err) {
 915                        printk(KERN_ERR
 916                               "%s: name change rollback failed: %d.\n",
 917                               dev->name, ret);
 918                } else {
 919                        err = ret;
 920                        memcpy(dev->name, oldname, IFNAMSIZ);
 921                        goto rollback;
 922                }
 923        }
 924
 925        return err;
 926}
 927
 928/**
 929 *      netdev_features_change - device changes features
 930 *      @dev: device to cause notification
 931 *
 932 *      Called to indicate a device has changed features.
 933 */
 934void netdev_features_change(struct net_device *dev)
 935{
 936        call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
 937}
 938EXPORT_SYMBOL(netdev_features_change);
 939
 940/**
 941 *      netdev_state_change - device changes state
 942 *      @dev: device to cause notification
 943 *
 944 *      Called to indicate a device has changed state. This function calls
 945 *      the notifier chains for netdev_chain and sends a NEWLINK message
 946 *      to the routing socket.
 947 */
 948void netdev_state_change(struct net_device *dev)
 949{
 950        if (dev->flags & IFF_UP) {
 951                call_netdevice_notifiers(NETDEV_CHANGE, dev);
 952                rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 953        }
 954}
 955
 956/**
 957 *      dev_load        - load a network module
 958 *      @net: the applicable net namespace
 959 *      @name: name of interface
 960 *
 961 *      If a network interface is not present and the process has suitable
 962 *      privileges this function loads the module. If module loading is not
 963 *      available in this kernel then it becomes a nop.
 964 */
 965
 966void dev_load(struct net *net, const char *name)
 967{
 968        struct net_device *dev;
 969
 970        read_lock(&dev_base_lock);
 971        dev = __dev_get_by_name(net, name);
 972        read_unlock(&dev_base_lock);
 973
 974        if (!dev && capable(CAP_SYS_MODULE))
 975                request_module("%s", name);
 976}
 977
 978/**
 979 *      dev_open        - prepare an interface for use.
 980 *      @dev:   device to open
 981 *
 982 *      Takes a device from down to up state. The device's private open
 983 *      function is invoked and then the multicast lists are loaded. Finally
 984 *      the device is moved into the up state and a %NETDEV_UP message is
 985 *      sent to the netdev notifier chain.
 986 *
 987 *      Calling this function on an active interface is a nop. On a failure
 988 *      a negative errno code is returned.
 989 */
 990int dev_open(struct net_device *dev)
 991{
 992        int ret = 0;
 993
 994        /*
 995         *      Is it already up?
 996         */
 997
 998        if (dev->flags & IFF_UP)
 999                return 0;
1000

1001        /*
1002         *      Is it even present?
1003         */
1004        if (!netif_device_present(dev))
1005                return -ENODEV;
1006
1007        /*
1008         *      Call device private open method
1009         */
1010        set_bit(__LINK_STATE_START, &dev->state);
1011
1012        if (dev->validate_addr)
1013                ret = dev->validate_addr(dev);
1014
1015        if (!ret && dev->open)
1016                ret = dev->open(dev);
1017
1018        /*
1019         *      If it went open OK then:
1020         */
1021
1022        if (ret)
1023                clear_bit(__LINK_STATE_START, &dev->state);
1024        else {
1025                /*
1026                 *      Set the flags.
1027                 */
1028                dev->flags |= IFF_UP;
1029
1030                /*
1031                 *      Initialize multicasting status
1032                 */
1033                dev_set_rx_mode(dev);
1034
1035                /*
1036                 *      Wakeup transmit queue engine
1037                 */
1038                dev_activate(dev);
1039
1040                /*
1041                 *      ... and announce new interface.
1042                 */
1043                call_netdevice_notifiers(NETDEV_UP, dev);
1044        }
1045
1046        return ret;
1047}
1048
1049/**
1050 *      dev_close - shutdown an interface.
1051 *      @dev: device to shutdown
1052 *
1053 *      This function moves an active device into down state. A
1054 *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1055 *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1056 *      chain.
1057 */
1058int dev_close(struct net_device *dev)
1059{
1060        might_sleep();
1061
1062        if (!(dev->flags & IFF_UP))
1063                return 0;
1064
1065        /*
1066         *      Tell people we are going down, so that they can
1067         *      prepare to death, when device is still operating.
1068         */
1069        call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1070
1071        dev_deactivate(dev);
1072
1073        clear_bit(__LINK_STATE_START, &dev->state);
1074
1075        /* Synchronize to scheduled poll. We cannot touch poll list,
1076         * it can be even on different cpu. So just clear netif_running().
1077         *
1078         * dev->stop() will invoke napi_disable() on all of it's
1079         * napi_struct instances on this device.
1080         */
1081        smp_mb__after_clear_bit(); /* Commit netif_running(). */
1082
1083        /*
1084         *      Call the device specific close. This cannot fail.
1085         *      Only if device is UP
1086         *
1087         *      We allow it to be called even after a DETACH hot-plug
1088         *      event.
1089         */
1090        if (dev->stop)
1091                dev->stop(dev);
1092
1093        /*
1094         *      Device is now down.
1095         */
1096
1097        dev->flags &= ~IFF_UP;
1098
1099        /*
1100         * Tell people we are down
1101         */
1102        call_netdevice_notifiers(NETDEV_DOWN, dev);
1103
1104        return 0;
1105}
1106
1107
1108static int dev_boot_phase = 1;
1109
1110/*
1111 *      Device change register/unregister. These are not inline or static
1112 *      as we export them to the world.
1113 */
1114
1115/**
1116 *      register_netdevice_notifier - register a network notifier block
1117 *      @nb: notifier
1118 *
1119 *      Register a notifier to be called when network device events occur.
1120 *      The notifier passed is linked into the kernel structures and must
1121 *      not be reused until it has been unregistered. A negative errno code
1122 *      is returned on a failure.
1123 *
1124 *      When registered all registration and up events are replayed
1125 *      to the new notifier to allow device to have a race free
1126 *      view of the network device list.
1127 */
1128
1129int register_netdevice_notifier(struct notifier_block *nb)
1130{
1131        struct net_device *dev;
1132        struct net_device *last;
1133        struct net *net;
1134        int err;
1135
1136        rtnl_lock();
1137        err = raw_notifier_chain_register(&netdev_chain, nb);
1138        if (err)
1139                goto unlock;
1140        if (dev_boot_phase)
1141                goto unlock;
1142        for_each_net(net) {
1143                for_each_netdev(net, dev) {
1144                        err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1145                        err = notifier_to_errno(err);
1146                        if (err)
1147                                goto rollback;
1148
1149                        if (!(dev->flags & IFF_UP))
1150                                continue;
1151
1152                        nb->notifier_call(nb, NETDEV_UP, dev);
1153                }
1154        }
1155
1156unlock:
1157        rtnl_unlock();
1158        return err;
1159
1160rollback:
1161        last = dev;
1162        for_each_net(net) {
1163                for_each_netdev(net, dev) {
1164                        if (dev == last)
1165                                break;
1166
1167                        if (dev->flags & IFF_UP) {
1168                                nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1169                                nb->notifier_call(nb, NETDEV_DOWN, dev);
1170                        }
1171                        nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1172                }
1173        }
1174
1175        raw_notifier_chain_unregister(&netdev_chain, nb);
1176        goto unlock;
1177}
1178
1179/**
1180 *      unregister_netdevice_notifier - unregister a network notifier block
1181 *      @nb: notifier
1182 *
1183 *      Unregister a notifier previously registered by
1184 *      register_netdevice_notifier(). The notifier is unlinked into the
1185 *      kernel structures and may then be reused. A negative errno code
1186 *      is returned on a failure.
1187 */
1188
1189int unregister_netdevice_notifier(struct notifier_block *nb)
1190{
1191        int err;
1192
1193        rtnl_lock();
1194        err = raw_notifier_chain_unregister(&netdev_chain, nb);
1195        rtnl_unlock();
1196        return err;
1197}
1198
1199/**
1200 *      call_netdevice_notifiers - call all network notifier blocks
1201 *      @val: value passed unmodified to notifier function
1202 *      @dev: net_device pointer passed unmodified to notifier function
1203 *
1204 *      Call all network notifier blocks.  Parameters and return value
1205 *      are as for raw_notifier_call_chain().
1206 */
1207
1208int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
1209{
1210        return raw_notifier_call_chain(&netdev_chain, val, dev);
1211}
1212
1213/* When > 0 there are consumers of rx skb time stamps */
1214static atomic_t netstamp_needed = ATOMIC_INIT(0);
1215
1216void net_enable_timestamp(void)
1217{
1218        atomic_inc(&netstamp_needed);
1219}
1220
1221void net_disable_timestamp(void)
1222{
1223        atomic_dec(&netstamp_needed);
1224}
1225
1226static inline void net_timestamp(struct sk_buff *skb)
1227{
1228        if (atomic_read(&netstamp_needed))
1229                __net_timestamp(skb);
1230        else
1231                skb->tstamp.tv64 = 0;
1232}
1233
1234/*
1235 *      Support routine. Sends outgoing frames to any network
1236 *      taps currently in use.
1237 */
1238
1239static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1240{
1241        struct packet_type *ptype;
1242
1243        net_timestamp(skb);
1244
1245        rcu_read_lock();
1246        list_for_each_entry_rcu(ptype, &ptype_all, list) {
1247                /* Never send packets back to the socket
1248                 * they originated from - MvS (miquels@drinkel.ow.org)
1249                 */
1250                if ((ptype->dev == dev || !ptype->dev) &&
1251                    (ptype->af_packet_priv == NULL ||
1252                     (struct sock *)ptype->af_packet_priv != skb->sk)) {
1253                        struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1254                        if (!skb2)
1255                                break;
1256
1257                        /* skb->nh should be correctly
1258                           set by sender, so that the second statement is
1259                           just protection against buggy protocols.
1260                         */
1261                        skb_reset_mac_header(skb2);
1262
1263                        if (skb_network_header(skb2) < skb2->data ||
1264                            skb2->network_header > skb2->tail) {
1265                                if (net_ratelimit())
1266                                        printk(KERN_CRIT "protocol %04x is "
1267                                               "buggy, dev %s\n",
1268                                               skb2->protocol, dev->name);
1269                                skb_reset_network_header(skb2);
1270                        }
1271
1272                        skb2->transport_header = skb2->network_header;
1273                        skb2->pkt_type = PACKET_OUTGOING;
1274                        ptype->func(skb2, skb->dev, ptype, skb->dev);
1275                }
1276        }
1277        rcu_read_unlock();
1278}
1279
1280
1281void __netif_schedule(struct net_device *dev)
1282{
1283        if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1284                unsigned long flags;
1285                struct softnet_data *sd;
1286
1287                local_irq_save(flags);
1288                sd = &__get_cpu_var(softnet_data);
1289                dev->next_sched = sd->output_queue;
1290                sd->output_queue = dev;
1291                raise_softirq_irqoff(NET_TX_SOFTIRQ);
1292                local_irq_restore(flags);
1293        }
1294}
1295EXPORT_SYMBOL(__netif_schedule);
1296
1297void dev_kfree_skb_irq(struct sk_buff *skb)
1298{
1299        if (atomic_dec_and_test(&skb->users)) {
1300                struct softnet_data *sd;
1301                unsigned long flags;
1302
1303                local_irq_save(flags);
1304                sd = &__get_cpu_var(softnet_data);
1305                skb->next = sd->completion_queue;
1306                sd->completion_queue = skb;
1307                raise_softirq_irqoff(NET_TX_SOFTIRQ);
1308                local_irq_restore(flags);
1309        }
1310}
1311EXPORT_SYMBOL(dev_kfree_skb_irq);
1312
1313void dev_kfree_skb_any(struct sk_buff *skb)
1314{
1315        if (in_irq() || irqs_disabled())
1316                dev_kfree_skb_irq(skb);
1317        else
1318                dev_kfree_skb(skb);
1319}
1320EXPORT_SYMBOL(dev_kfree_skb_any);
1321
1322
1323/**
1324 * netif_device_detach - mark device as removed
1325 * @dev: network device
1326 *
1327 * Mark device as removed from system and therefore no longer available.
1328 */
1329void netif_device_detach(struct net_device *dev)
1330{
1331        if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1332            netif_running(dev)) {
1333                netif_stop_queue(dev);
1334        }
1335}
1336EXPORT_SYMBOL(netif_device_detach);
1337
1338/**
1339 * netif_device_attach - mark device as attached
1340 * @dev: network device
1341 *
1342 * Mark device as attached from system and restart if needed.
1343 */
1344void netif_device_attach(struct net_device *dev)
1345{
1346        if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1347            netif_running(dev)) {
1348                netif_wake_queue(dev);
1349                __netdev_watchdog_up(dev);
1350        }
1351}
1352EXPORT_SYMBOL(netif_device_attach);
1353
1354
1355/*
1356 * Invalidate hardware checksum when packet is to be mangled, and
1357 * complete checksum manually on outgoing path.
1358 */
1359int skb_checksum_help(struct sk_buff *skb)
1360{
1361        __wsum csum;
1362        int ret = 0, offset;
1363
1364        if (skb->ip_summed == CHECKSUM_COMPLETE)
1365                goto out_set_summed;
1366
1367        if (unlikely(skb_shinfo(skb)->gso_size)) {
1368                /* Let GSO fix up the checksum. */
1369                goto out_set_summed;
1370        }
1371
1372        offset = skb->csum_start - skb_headroom(skb);
1373        BUG_ON(offset >= skb_headlen(skb));
1374        csum = skb_checksum(skb, offset, skb->len - offset, 0);
1375
1376        offset += skb->csum_offset;
1377        BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1378
1379        if (skb_cloned(skb) &&
1380            !skb_clone_writable(skb, offset + sizeof(__sum16))) {
1381                ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1382                if (ret)
1383                        goto out;
1384        }
1385
1386        *(__sum16 *)(skb->data + offset) = csum_fold(csum);
1387out_set_summed:
1388        skb->ip_summed = CHECKSUM_NONE;
1389out:
1390        return ret;
1391}
1392
1393/**
1394 *      skb_gso_segment - Perform segmentation on skb.
1395 *      @skb: buffer to segment
1396 *      @features: features for the output path (see dev->features)
1397 *
1398 *      This function segments the given skb and returns a list of segments.
1399 *
1400 *      It may return NULL if the skb requires no segmentation.  This is
1401 *      only possible when GSO is used for verifying header integrity.
1402 */
1403struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1404{
1405        struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1406        struct packet_type *ptype;
1407        __be16 type = skb->protocol;
1408        int err;
1409
1410        BUG_ON(skb_shinfo(skb)->frag_list);
1411
1412        skb_reset_mac_header(skb);
1413        skb->mac_len = skb->network_header - skb->mac_header;
1414        __skb_pull(skb, skb->mac_len);
1415
1416        if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1417                if (skb_header_cloned(skb) &&
1418                    (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1419                        return ERR_PTR(err);
1420        }
1421
1422        rcu_read_lock();
1423        list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1424                if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1425                        if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1426                                err = ptype->gso_send_check(skb);
1427                                segs = ERR_PTR(err);
1428                                if (err || skb_gso_ok(skb, features))
1429                                        break;
1430                                __skb_push(skb, (skb->data -
1431                                                 skb_network_header(skb)));
1432                        }
1433                        segs = ptype->gso_segment(skb, features);
1434                        break;
1435                }
1436        }
1437        rcu_read_unlock();
1438
1439        __skb_push(skb, skb->data - skb_mac_header(skb));
1440
1441        return segs;
1442}
1443
1444EXPORT_SYMBOL(skb_gso_segment);
1445
1446/* Take action when hardware reception checksum errors are detected. */
1447#ifdef CONFIG_BUG
1448void netdev_rx_csum_fault(struct net_device *dev)
1449{
1450        if (net_ratelimit()) {
1451                printk(KERN_ERR "%s: hw csum failure.\n",
1452                        dev ? dev->name : "<unknown>");
1453                dump_stack();
1454        }
1455}
1456EXPORT_SYMBOL(netdev_rx_csum_fault);
1457#endif
1458
1459/* Actually, we should eliminate this check as soon as we know, that:
1460 * 1. IOMMU is present and allows to map all the memory.
1461 * 2. No high memory really exists on this machine.
1462 */
1463
1464static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1465{
1466#ifdef CONFIG_HIGHMEM
1467        int i;
1468
1469        if (dev->features & NETIF_F_HIGHDMA)
1470                return 0;
1471
1472        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1473                if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1474                        return 1;
1475
1476#endif
1477        return 0;
1478}
1479
1480struct dev_gso_cb {
1481        void (*destructor)(struct sk_buff *skb);
1482};
1483
1484#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1485
1486static void dev_gso_skb_destructor(struct sk_buff *skb)
1487{
1488        struct dev_gso_cb *cb;
1489
1490        do {
1491                struct sk_buff *nskb = skb->next;
1492
1493                skb->next = nskb->next;
1494                nskb->next = NULL;
1495                kfree_skb(nskb);
1496        } while (skb->next);
1497
1498        cb = DEV_GSO_CB(skb);
1499        if (cb->destructor)
1500                cb->destructor(skb);
1501}
1502
1503/**
1504 *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1505 *      @skb: buffer to segment
1506 *
1507 *      This function segments the given skb and stores the list of segments
1508 *      in skb->next.
1509 */
1510static int dev_gso_segment(struct sk_buff *skb)
1511{
1512        struct net_device *dev = skb->dev;
1513        struct sk_buff *segs;
1514        int features = dev->features & ~(illegal_highdma(dev, skb) ?
1515                                         NETIF_F_SG : 0);
1516
1517        segs = skb_gso_segment(skb, features);
1518
1519        /* Verifying header integrity only. */
1520        if (!segs)
1521                return 0;
1522
1523        if (unlikely(IS_ERR(segs)))
1524                return PTR_ERR(segs);
1525
1526        skb->next = segs;
1527        DEV_GSO_CB(skb)->destructor = skb->destructor;
1528        skb->destructor = dev_gso_skb_destructor;
1529
1530        return 0;
1531}
1532
1533int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1534{
1535        if (likely(!skb->next)) {
1536                if (!list_empty(&ptype_all))
1537                        dev_queue_xmit_nit(skb, dev);
1538
1539                if (netif_needs_gso(dev, skb)) {
1540                        if (unlikely(dev_gso_segment(skb)))
1541                                goto out_kfree_skb;
1542                        if (skb->next)
1543                                goto gso;
1544                }
1545
1546                return dev->hard_start_xmit(skb, dev);
1547        }
1548
1549gso:
1550        do {
1551                struct sk_buff *nskb = skb->next;
1552                int rc;
1553
1554                skb->next = nskb->next;
1555                nskb->next = NULL;
1556                rc = dev->hard_start_xmit(nskb, dev);
1557                if (unlikely(rc)) {
1558                        nskb->next = skb->next;
1559                        skb->next = nskb;
1560                        return rc;
1561                }
1562                if (unlikely((netif_queue_stopped(dev) ||
1563                             netif_subqueue_stopped(dev, skb)) &&
1564                             skb->next))
1565                        return NETDEV_TX_BUSY;
1566        } while (skb->next);
1567
1568        skb->destructor = DEV_GSO_CB(skb)->destructor;
1569
1570out_kfree_skb:
1571        kfree_skb(skb);
1572        return 0;
1573}
1574
1575/**
1576 *      dev_queue_xmit - transmit a buffer
1577 *      @skb: buffer to transmit
1578 *
1579 *      Queue a buffer for transmission to a network device. The caller must
1580 *      have set the device and priority and built the buffer before calling
1581 *      this function. The function can be called from an interrupt.
1582 *
1583 *      A negative errno code is returned on a failure. A success does not
1584 *      guarantee the frame will be transmitted as it may be dropped due
1585 *      to congestion or traffic shaping.
1586 *
1587 * -----------------------------------------------------------------------------------
1588 *      I notice this method can also return errors from the queue disciplines,
1589 *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1590 *      be positive.
1591 *
1592 *      Regardless of the return value, the skb is consumed, so it is currently
1593 *      difficult to retry a send to this method.  (You can bump the ref count
1594 *      before sending to hold a reference for retry if you are careful.)
1595 *
1596 *      When calling this method, interrupts MUST be enabled.  This is because
1597 *      the BH enable code must have IRQs enabled so that it will not deadlock.
1598 *          --BLG
1599 */
1600
1601int dev_queue_xmit(struct sk_buff *skb)
1602{
1603        struct net_device *dev = skb->dev;
1604        struct Qdisc *q;
1605        int rc = -ENOMEM;
1606
1607        /* GSO will handle the following emulations directly. */
1608        if (netif_needs_gso(dev, skb))
1609                goto gso;
1610
1611        if (skb_shinfo(skb)->frag_list &&
1612            !(dev->features & NETIF_F_FRAGLIST) &&
1613            __skb_linearize(skb))
1614                goto out_kfree_skb;
1615
1616        /* Fragmented skb is linearized if device does not support SG,
1617         * or if at least one of fragments is in highmem and device
1618         * does not support DMA from it.
1619         */
1620        if (skb_shinfo(skb)->nr_frags &&
1621            (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1622            __skb_linearize(skb))
1623                goto out_kfree_skb;
1624
1625        /* If packet is not checksummed and device does not support
1626         * checksumming for this protocol, complete checksumming here.
1627         */
1628        if (skb->ip_summed == CHECKSUM_PARTIAL) {
1629                skb_set_transport_header(skb, skb->csum_start -
1630                                              skb_headroom(skb));
1631
1632                if (!(dev->features & NETIF_F_GEN_CSUM) &&
1633                    !((dev->features & NETIF_F_IP_CSUM) &&
1634                      skb->protocol == htons(ETH_P_IP)) &&
1635                    !((dev->features & NETIF_F_IPV6_CSUM) &&
1636                      skb->protocol == htons(ETH_P_IPV6)))
1637                        if (skb_checksum_help(skb))
1638                                goto out_kfree_skb;
1639        }
1640
1641gso:
1642        spin_lock_prefetch(&dev->queue_lock);
1643
1644        /* Disable soft irqs for various locks below. Also
1645         * stops preemption for RCU.
1646         */
1647        rcu_read_lock_bh();
1648
1649        /* Updates of qdisc are serialized by queue_lock.
1650         * The struct Qdisc which is pointed to by qdisc is now a
1651         * rcu structure - it may be accessed without acquiring
1652         * a lock (but the structure may be stale.) The freeing of the
1653         * qdisc will be deferred until it's known that there are no
1654         * more references to it.
1655         *
1656         * If the qdisc has an enqueue function, we still need to
1657         * hold the queue_lock before calling it, since queue_lock
1658         * also serializes access to the device queue.
1659         */
1660
1661        q = rcu_dereference(dev->qdisc);
1662#ifdef CONFIG_NET_CLS_ACT
1663        skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1664#endif
1665        if (q->enqueue) {
1666                /* Grab device queue */
1667                spin_lock(&dev->queue_lock);
1668                q = dev->qdisc;
1669                if (q->enqueue) {
1670                        /* reset queue_mapping to zero */
1671                        skb_set_queue_mapping(skb, 0);
1672                        rc = q->enqueue(skb, q);
1673                        qdisc_run(dev);
1674                        spin_unlock(&dev->queue_lock);
1675
1676                        rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1677                        goto out;
1678                }
1679                spin_unlock(&dev->queue_lock);
1680        }
1681
1682        /* The device has no queue. Common case for software devices:
1683           loopback, all the sorts of tunnels...
1684
1685           Really, it is unlikely that netif_tx_lock protection is necessary
1686           here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1687           counters.)
1688           However, it is possible, that they rely on protection
1689           made by us here.
1690
1691           Check this and shot the lock. It is not prone from deadlocks.
1692           Either shot noqueue qdisc, it is even simpler 8)
1693         */
1694        if (dev->flags & IFF_UP) {
1695                int cpu = smp_processor_id(); /* ok because BHs are off */
1696
1697                if (dev->xmit_lock_owner != cpu) {
1698
1699                        HARD_TX_LOCK(dev, cpu);
1700
1701                        if (!netif_queue_stopped(dev) &&
1702                            !netif_subqueue_stopped(dev, skb)) {
1703                                rc = 0;
1704                                if (!dev_hard_start_xmit(skb, dev)) {
1705                                        HARD_TX_UNLOCK(dev);
1706                                        goto out;
1707                                }
1708                        }
1709                        HARD_TX_UNLOCK(dev);
1710                        if (net_ratelimit())
1711                                printk(KERN_CRIT "Virtual device %s asks to "
1712                                       "queue packet!\n", dev->name);
1713                } else {
1714                        /* Recursion is detected! It is possible,
1715                         * unfortunately */
1716                        if (net_ratelimit())
1717                                printk(KERN_CRIT "Dead loop on virtual device "
1718                                       "%s, fix it urgently!\n", dev->name);
1719                }
1720        }
1721
1722        rc = -ENETDOWN;
1723        rcu_read_unlock_bh();
1724
1725out_kfree_skb:
1726        kfree_skb(skb);
1727        return rc;
1728out:
1729        rcu_read_unlock_bh();
1730        return rc;
1731}
1732
1733
1734/*=======================================================================
1735                        Receiver routines
1736  =======================================================================*/
1737
1738int netdev_max_backlog __read_mostly = 1000;
1739int netdev_budget __read_mostly = 300;
1740int weight_p __read_mostly = 64;            /* old backlog weight */
1741
1742DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1743
1744
1745/**
1746 *      netif_rx        -       post buffer to the network code
1747 *      @skb: buffer to post
1748 *
1749 *      This function receives a packet from a device driver and queues it for
1750 *      the upper (protocol) levels to process.  It always succeeds. The buffer
1751 *      may be dropped during processing for congestion control or by the
1752 *      protocol layers.
1753 *
1754 *      return values:
1755 *      NET_RX_SUCCESS  (no congestion)
1756 *      NET_RX_DROP     (packet was dropped)
1757 *
1758 */
1759
1760int netif_rx(struct sk_buff *skb)
1761{
1762        struct softnet_data *queue;
1763        unsigned long flags;
1764
1765        /* if netpoll wants it, pretend we never saw it */
1766        if (netpoll_rx(skb))
1767                return NET_RX_DROP;
1768
1769        if (!skb->tstamp.tv64)
1770                net_timestamp(skb);
1771
1772        /*
1773         * The code is rearranged so that the path is the most
1774         * short when CPU is congested, but is still operating.
1775         */
1776        local_irq_save(flags);
1777        queue = &__get_cpu_var(softnet_data);
1778
1779        __get_cpu_var(netdev_rx_stat).total++;
1780        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1781                if (queue->input_pkt_queue.qlen) {
1782enqueue:
1783                        dev_hold(skb->dev);
1784                        __skb_queue_tail(&queue->input_pkt_queue, skb);
1785                        local_irq_restore(flags);
1786                        return NET_RX_SUCCESS;
1787                }
1788
1789                napi_schedule(&queue->backlog);
1790                goto enqueue;
1791        }
1792
1793        __get_cpu_var(netdev_rx_stat).dropped++;
1794        local_irq_restore(flags);
1795
1796        kfree_skb(skb);
1797        return NET_RX_DROP;
1798}
1799
1800int netif_rx_ni(struct sk_buff *skb)
1801{
1802        int err;
1803
1804        preempt_disable();
1805        err = netif_rx(skb);
1806        if (local_softirq_pending())
1807                do_softirq();
1808        preempt_enable();
1809
1810        return err;
1811}
1812
1813EXPORT_SYMBOL(netif_rx_ni);
1814
1815static inline struct net_device *skb_bond(struct sk_buff *skb)
1816{
1817        struct net_device *dev = skb->dev;
1818
1819        if (dev->master) {
1820                if (skb_bond_should_drop(skb)) {
1821                        kfree_skb(skb);
1822                        return NULL;
1823                }
1824                skb->dev = dev->master;
1825        }
1826
1827        return dev;
1828}
1829
1830
1831static void net_tx_action(struct softirq_action *h)
1832{
1833        struct softnet_data *sd = &__get_cpu_var(softnet_data);
1834
1835        if (sd->completion_queue) {
1836                struct sk_buff *clist;
1837
1838                local_irq_disable();
1839                clist = sd->completion_queue;
1840                sd->completion_queue = NULL;
1841                local_irq_enable();
1842
1843                while (clist) {
1844                        struct sk_buff *skb = clist;
1845                        clist = clist->next;
1846
1847                        BUG_TRAP(!atomic_read(&skb->users));
1848                        __kfree_skb(skb);
1849                }
1850        }
1851
1852        if (sd->output_queue) {
1853                struct net_device *head;
1854
1855                local_irq_disable();
1856                head = sd->output_queue;
1857                sd->output_queue = NULL;
1858                local_irq_enable();
1859
1860                while (head) {
1861                        struct net_device *dev = head;
1862                        head = head->next_sched;
1863
1864                        smp_mb__before_clear_bit();
1865                        clear_bit(__LINK_STATE_SCHED, &dev->state);
1866
1867                        if (spin_trylock(&dev->queue_lock)) {
1868                                qdisc_run(dev);
1869                                spin_unlock(&dev->queue_lock);
1870                        } else {
1871                                netif_schedule(dev);
1872                        }
1873                }
1874        }
1875}
1876
1877static inline int deliver_skb(struct sk_buff *skb,
1878                              struct packet_type *pt_prev,
1879                              struct net_device *orig_dev)
1880{
1881        atomic_inc(&skb->users);
1882        return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1883}
1884
1885#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1886/* These hooks defined here for ATM */
1887struct net_bridge;
1888struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1889                                                unsigned char *addr);
1890void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1891
1892/*
1893 * If bridge module is loaded call bridging hook.
1894 *  returns NULL if packet was consumed.
1895 */
1896struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1897                                        struct sk_buff *skb) __read_mostly;
1898static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1899                                            struct packet_type **pt_prev, int *ret,
1900                                            struct net_device *orig_dev)
1901{
1902        struct net_bridge_port *port;
1903
1904        if (skb->pkt_type == PACKET_LOOPBACK ||
1905            (port = rcu_dereference(skb->dev->br_port)) == NULL)
1906                return skb;
1907
1908        if (*pt_prev) {
1909                *ret = deliver_skb(skb, *pt_prev, orig_dev);
1910                *pt_prev = NULL;
1911        }
1912
1913        return br_handle_frame_hook(port, skb);
1914}
1915#else
1916#define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1917#endif
1918
1919#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1920struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1921EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1922
1923static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1924                                             struct packet_type **pt_prev,
1925                                             int *ret,
1926                                             struct net_device *orig_dev)
1927{
1928        if (skb->dev->macvlan_port == NULL)
1929                return skb;
1930
1931        if (*pt_prev) {
1932                *ret = deliver_skb(skb, *pt_prev, orig_dev);
1933                *pt_prev = NULL;
1934        }
1935        return macvlan_handle_frame_hook(skb);
1936}
1937#else
1938#define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
1939#endif
1940
1941#ifdef CONFIG_NET_CLS_ACT
1942/* TODO: Maybe we should just force sch_ingress to be compiled in
1943 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1944 * a compare and 2 stores extra right now if we dont have it on
1945 * but have CONFIG_NET_CLS_ACT
1946 * NOTE: This doesnt stop any functionality; if you dont have
1947 * the ingress scheduler, you just cant add policies on ingress.
1948 *
1949 */
1950static int ing_filter(struct sk_buff *skb)
1951{
1952        struct Qdisc *q;
1953        struct net_device *dev = skb->dev;
1954        int result = TC_ACT_OK;
1955        u32 ttl = G_TC_RTTL(skb->tc_verd);
1956
1957        if (MAX_RED_LOOP < ttl++) {
1958                printk(KERN_WARNING
1959                       "Redir loop detected Dropping packet (%d->%d)\n",
1960                       skb->iif, dev->ifindex);
1961                return TC_ACT_SHOT;
1962        }
1963
1964        skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
1965        skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
1966
1967        spin_lock(&dev->ingress_lock);
1968        if ((q = dev->qdisc_ingress) != NULL)
1969                result = q->enqueue(skb, q);
1970        spin_unlock(&dev->ingress_lock);
1971
1972        return result;
1973}
1974
1975static inline struct sk_buff *handle_ing(struct sk_buff *skb,
1976                                         struct packet_type **pt_prev,
1977                                         int *ret, struct net_device *orig_dev)
1978{
1979        if (!skb->dev->qdisc_ingress)
1980                goto out;
1981
1982        if (*pt_prev) {
1983                *ret = deliver_skb(skb, *pt_prev, orig_dev);
1984                *pt_prev = NULL;
1985        } else {
1986                /* Huh? Why does turning on AF_PACKET affect this? */
1987                skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1988        }
1989
1990        switch (ing_filter(skb)) {
1991        case TC_ACT_SHOT:
1992        case TC_ACT_STOLEN:
1993                kfree_skb(skb);
1994                return NULL;
1995        }
1996
1997out:
1998        skb->tc_verd = 0;
1999        return skb;
2000}

2001#endif
2002
2003/**
2004 *      netif_receive_skb - process receive buffer from network
2005 *      @skb: buffer to process
2006 *
2007 *      netif_receive_skb() is the main receive data processing function.
2008 *      It always succeeds. The buffer may be dropped during processing
2009 *      for congestion control or by the protocol layers.
2010 *
2011 *      This function may only be called from softirq context and interrupts
2012 *      should be enabled.
2013 *
2014 *      Return values (usually ignored):
2015 *      NET_RX_SUCCESS: no congestion
2016 *      NET_RX_DROP: packet was dropped
2017 */
2018int netif_receive_skb(struct sk_buff *skb)
2019{
2020        struct packet_type *ptype, *pt_prev;
2021        struct net_device *orig_dev;
2022        int ret = NET_RX_DROP;
2023        __be16 type;
2024
2025        /* if we've gotten here through NAPI, check netpoll */
2026        if (netpoll_receive_skb(skb))
2027                return NET_RX_DROP;
2028
2029        if (!skb->tstamp.tv64)
2030                net_timestamp(skb);
2031
2032        if (!skb->iif)
2033                skb->iif = skb->dev->ifindex;
2034
2035        orig_dev = skb_bond(skb);
2036
2037        if (!orig_dev)
2038                return NET_RX_DROP;
2039
2040        __get_cpu_var(netdev_rx_stat).total++;
2041
2042        skb_reset_network_header(skb);
2043        skb_reset_transport_header(skb);
2044        skb->mac_len = skb->network_header - skb->mac_header;
2045
2046        pt_prev = NULL;
2047
2048        rcu_read_lock();
2049
2050#ifdef CONFIG_NET_CLS_ACT
2051        if (skb->tc_verd & TC_NCLS) {
2052                skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2053                goto ncls;
2054        }
2055#endif
2056
2057        list_for_each_entry_rcu(ptype, &ptype_all, list) {
2058                if (!ptype->dev || ptype->dev == skb->dev) {
2059                        if (pt_prev)
2060                                ret = deliver_skb(skb, pt_prev, orig_dev);
2061                        pt_prev = ptype;
2062                }
2063        }
2064
2065#ifdef CONFIG_NET_CLS_ACT
2066        skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2067        if (!skb)
2068                goto out;
2069ncls:
2070#endif
2071
2072        skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2073        if (!skb)
2074                goto out;
2075        skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2076        if (!skb)
2077                goto out;
2078
2079        type = skb->protocol;
2080        list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2081                if (ptype->type == type &&
2082                    (!ptype->dev || ptype->dev == skb->dev)) {
2083                        if (pt_prev)
2084                                ret = deliver_skb(skb, pt_prev, orig_dev);
2085                        pt_prev = ptype;
2086                }
2087        }
2088
2089        if (pt_prev) {
2090                ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2091        } else {
2092                kfree_skb(skb);
2093                /* Jamal, now you will not able to escape explaining
2094                 * me how you were going to use this. :-)
2095                 */
2096                ret = NET_RX_DROP;
2097        }
2098
2099out:
2100        rcu_read_unlock();
2101        return ret;
2102}
2103
2104static int process_backlog(struct napi_struct *napi, int quota)
2105{
2106        int work = 0;
2107        struct softnet_data *queue = &__get_cpu_var(softnet_data);
2108        unsigned long start_time = jiffies;
2109
2110        napi->weight = weight_p;
2111        do {
2112                struct sk_buff *skb;
2113                struct net_device *dev;
2114
2115                local_irq_disable();
2116                skb = __skb_dequeue(&queue->input_pkt_queue);
2117                if (!skb) {
2118                        __napi_complete(napi);
2119                        local_irq_enable();
2120                        break;
2121                }
2122
2123                local_irq_enable();
2124
2125                dev = skb->dev;
2126
2127                netif_receive_skb(skb);
2128
2129                dev_put(dev);
2130        } while (++work < quota && jiffies == start_time);
2131
2132        return work;
2133}
2134
2135/**
2136 * __napi_schedule - schedule for receive
2137 * @n: entry to schedule
2138 *
2139 * The entry's receive function will be scheduled to run
2140 */
2141void fastcall __napi_schedule(struct napi_struct *n)
2142{
2143        unsigned long flags;
2144
2145        local_irq_save(flags);
2146        list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2147        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2148        local_irq_restore(flags);
2149}
2150EXPORT_SYMBOL(__napi_schedule);
2151
2152
2153static void net_rx_action(struct softirq_action *h)
2154{
2155        struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
2156        unsigned long start_time = jiffies;
2157        int budget = netdev_budget;
2158        void *have;
2159
2160        local_irq_disable();
2161
2162        while (!list_empty(list)) {
2163                struct napi_struct *n;
2164                int work, weight;
2165
2166                /* If softirq window is exhuasted then punt.
2167                 *
2168                 * Note that this is a slight policy change from the
2169                 * previous NAPI code, which would allow up to 2
2170                 * jiffies to pass before breaking out.  The test
2171                 * used to be "jiffies - start_time > 1".
2172                 */
2173                if (unlikely(budget <= 0 || jiffies != start_time))
2174                        goto softnet_break;
2175
2176                local_irq_enable();
2177
2178                /* Even though interrupts have been re-enabled, this
2179                 * access is safe because interrupts can only add new
2180                 * entries to the tail of this list, and only ->poll()
2181                 * calls can remove this head entry from the list.
2182                 */
2183                n = list_entry(list->next, struct napi_struct, poll_list);
2184
2185                have = netpoll_poll_lock(n);
2186
2187                weight = n->weight;
2188
2189                /* This NAPI_STATE_SCHED test is for avoiding a race
2190                 * with netpoll's poll_napi().  Only the entity which
2191                 * obtains the lock and sees NAPI_STATE_SCHED set will
2192                 * actually make the ->poll() call.  Therefore we avoid
2193                 * accidently calling ->poll() when NAPI is not scheduled.
2194                 */
2195                work = 0;
2196                if (test_bit(NAPI_STATE_SCHED, &n->state))
2197                        work = n->poll(n, weight);
2198
2199                WARN_ON_ONCE(work > weight);
2200
2201                budget -= work;
2202
2203                local_irq_disable();
2204
2205                /* Drivers must not modify the NAPI state if they
2206                 * consume the entire weight.  In such cases this code
2207                 * still "owns" the NAPI instance and therefore can
2208                 * move the instance around on the list at-will.
2209                 */
2210                if (unlikely(work == weight)) {
2211                        if (unlikely(napi_disable_pending(n)))
2212                                __napi_complete(n);
2213                        else
2214                                list_move_tail(&n->poll_list, list);
2215                }
2216
2217                netpoll_poll_unlock(have);
2218        }
2219out:
2220        local_irq_enable();
2221
2222#ifdef CONFIG_NET_DMA
2223        /*
2224         * There may not be any more sk_buffs coming right now, so push
2225         * any pending DMA copies to hardware
2226         */
2227        if (!cpus_empty(net_dma.channel_mask)) {
2228                int chan_idx;
2229                for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2230                        struct dma_chan *chan = net_dma.channels[chan_idx];
2231                        if (chan)
2232                                dma_async_memcpy_issue_pending(chan);
2233                }
2234        }
2235#endif
2236
2237        return;
2238
2239softnet_break:
2240        __get_cpu_var(netdev_rx_stat).time_squeeze++;
2241        __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2242        goto out;
2243}
2244
2245static gifconf_func_t * gifconf_list [NPROTO];
2246
2247/**
2248 *      register_gifconf        -       register a SIOCGIF handler
2249 *      @family: Address family
2250 *      @gifconf: Function handler
2251 *
2252 *      Register protocol dependent address dumping routines. The handler
2253 *      that is passed must not be freed or reused until it has been replaced
2254 *      by another handler.
2255 */
2256int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2257{
2258        if (family >= NPROTO)
2259                return -EINVAL;
2260        gifconf_list[family] = gifconf;
2261        return 0;
2262}
2263
2264
2265/*
2266 *      Map an interface index to its name (SIOCGIFNAME)
2267 */
2268
2269/*
2270 *      We need this ioctl for efficient implementation of the
2271 *      if_indextoname() function required by the IPv6 API.  Without
2272 *      it, we would have to search all the interfaces to find a
2273 *      match.  --pb
2274 */
2275
2276static int dev_ifname(struct net *net, struct ifreq __user *arg)
2277{
2278        struct net_device *dev;
2279        struct ifreq ifr;
2280
2281        /*
2282         *      Fetch the caller's info block.
2283         */
2284
2285        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2286                return -EFAULT;
2287
2288        read_lock(&dev_base_lock);
2289        dev = __dev_get_by_index(net, ifr.ifr_ifindex);
2290        if (!dev) {
2291                read_unlock(&dev_base_lock);
2292                return -ENODEV;
2293        }
2294
2295        strcpy(ifr.ifr_name, dev->name);
2296        read_unlock(&dev_base_lock);
2297
2298        if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2299                return -EFAULT;
2300        return 0;
2301}
2302
2303/*
2304 *      Perform a SIOCGIFCONF call. This structure will change
2305 *      size eventually, and there is nothing I can do about it.
2306 *      Thus we will need a 'compatibility mode'.
2307 */
2308
2309static int dev_ifconf(struct net *net, char __user *arg)
2310{
2311        struct ifconf ifc;
2312        struct net_device *dev;
2313        char __user *pos;
2314        int len;
2315        int total;
2316        int i;
2317
2318        /*
2319         *      Fetch the caller's info block.
2320         */
2321
2322        if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2323                return -EFAULT;
2324
2325        pos = ifc.ifc_buf;
2326        len = ifc.ifc_len;
2327
2328        /*
2329         *      Loop over the interfaces, and write an info block for each.
2330         */
2331
2332        total = 0;
2333        for_each_netdev(net, dev) {
2334                for (i = 0; i < NPROTO; i++) {
2335                        if (gifconf_list[i]) {
2336                                int done;
2337                                if (!pos)
2338                                        done = gifconf_list[i](dev, NULL, 0);
2339                                else
2340                                        done = gifconf_list[i](dev, pos + total,
2341                                                               len - total);
2342                                if (done < 0)
2343                                        return -EFAULT;
2344                                total += done;
2345                        }
2346                }
2347        }
2348
2349        /*
2350         *      All done.  Write the updated control block back to the caller.
2351         */
2352        ifc.ifc_len = total;
2353
2354        /*
2355         *      Both BSD and Solaris return 0 here, so we do too.
2356         */
2357        return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2358}
2359
2360#ifdef CONFIG_PROC_FS
2361/*
2362 *      This is invoked by the /proc filesystem handler to display a device
2363 *      in detail.
2364 */
2365void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2366{
2367        struct net *net = seq->private;
2368        loff_t off;
2369        struct net_device *dev;
2370
2371        read_lock(&dev_base_lock);
2372        if (!*pos)
2373                return SEQ_START_TOKEN;
2374
2375        off = 1;
2376        for_each_netdev(net, dev)
2377                if (off++ == *pos)
2378                        return dev;
2379
2380        return NULL;
2381}
2382
2383void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2384{
2385        struct net *net = seq->private;
2386        ++*pos;
2387        return v == SEQ_START_TOKEN ?
2388                first_net_device(net) : next_net_device((struct net_device *)v);
2389}
2390
2391void dev_seq_stop(struct seq_file *seq, void *v)
2392{
2393        read_unlock(&dev_base_lock);
2394}
2395
2396static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2397{
2398        struct net_device_stats *stats = dev->get_stats(dev);
2399
2400        seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2401                   "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2402                   dev->name, stats->rx_bytes, stats->rx_packets,
2403                   stats->rx_errors,
2404                   stats->rx_dropped + stats->rx_missed_errors,
2405                   stats->rx_fifo_errors,
2406                   stats->rx_length_errors + stats->rx_over_errors +
2407                    stats->rx_crc_errors + stats->rx_frame_errors,
2408                   stats->rx_compressed, stats->multicast,
2409                   stats->tx_bytes, stats->tx_packets,
2410                   stats->tx_errors, stats->tx_dropped,
2411                   stats->tx_fifo_errors, stats->collisions,
2412                   stats->tx_carrier_errors +
2413                    stats->tx_aborted_errors +
2414                    stats->tx_window_errors +
2415                    stats->tx_heartbeat_errors,
2416                   stats->tx_compressed);
2417}
2418
2419/*
2420 *      Called from the PROCfs module. This now uses the new arbitrary sized
2421 *      /proc/net interface to create /proc/net/dev
2422 */
2423static int dev_seq_show(struct seq_file *seq, void *v)
2424{
2425        if (v == SEQ_START_TOKEN)
2426                seq_puts(seq, "Inter-|   Receive                            "
2427                              "                    |  Transmit\n"
2428                              " face |bytes    packets errs drop fifo frame "
2429                              "compressed multicast|bytes    packets errs "
2430                              "drop fifo colls carrier compressed\n");
2431        else
2432                dev_seq_printf_stats(seq, v);
2433        return 0;
2434}
2435
2436static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2437{
2438        struct netif_rx_stats *rc = NULL;
2439
2440        while (*pos < NR_CPUS)
2441                if (cpu_online(*pos)) {
2442                        rc = &per_cpu(netdev_rx_stat, *pos);
2443                        break;
2444                } else
2445                        ++*pos;
2446        return rc;
2447}
2448
2449static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2450{
2451        return softnet_get_online(pos);
2452}
2453
2454static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2455{
2456        ++*pos;
2457        return softnet_get_online(pos);
2458}
2459
2460static void softnet_seq_stop(struct seq_file *seq, void *v)
2461{
2462}
2463
2464static int softnet_seq_show(struct seq_file *seq, void *v)
2465{
2466        struct netif_rx_stats *s = v;
2467
2468        seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2469                   s->total, s->dropped, s->time_squeeze, 0,
2470                   0, 0, 0, 0, /* was fastroute */
2471                   s->cpu_collision );
2472        return 0;
2473}
2474
2475static const struct seq_operations dev_seq_ops = {
2476        .start = dev_seq_start,
2477        .next  = dev_seq_next,
2478        .stop  = dev_seq_stop,
2479        .show  = dev_seq_show,
2480};
2481
2482static int dev_seq_open(struct inode *inode, struct file *file)
2483{
2484        struct seq_file *seq;
2485        int res;
2486        res =  seq_open(file, &dev_seq_ops);
2487        if (!res) {
2488                seq = file->private_data;
2489                seq->private = get_proc_net(inode);
2490                if (!seq->private) {
2491                        seq_release(inode, file);
2492                        res = -ENXIO;
2493                }
2494        }
2495        return res;
2496}
2497
2498static int dev_seq_release(struct inode *inode, struct file *file)
2499{
2500        struct seq_file *seq = file->private_data;
2501        struct net *net = seq->private;
2502        put_net(net);
2503        return seq_release(inode, file);
2504}
2505
2506static const struct file_operations dev_seq_fops = {
2507        .owner   = THIS_MODULE,
2508        .open    = dev_seq_open,
2509        .read    = seq_read,
2510        .llseek  = seq_lseek,
2511        .release = dev_seq_release,
2512};
2513
2514static const struct seq_operations softnet_seq_ops = {
2515        .start = softnet_seq_start,
2516        .next  = softnet_seq_next,
2517        .stop  = softnet_seq_stop,
2518        .show  = softnet_seq_show,
2519};
2520
2521static int softnet_seq_open(struct inode *inode, struct file *file)
2522{
2523        return seq_open(file, &softnet_seq_ops);
2524}
2525
2526static const struct file_operations softnet_seq_fops = {
2527        .owner   = THIS_MODULE,
2528        .open    = softnet_seq_open,
2529        .read    = seq_read,
2530        .llseek  = seq_lseek,
2531        .release = seq_release,
2532};
2533
2534static void *ptype_get_idx(loff_t pos)
2535{
2536        struct packet_type *pt = NULL;
2537        loff_t i = 0;
2538        int t;
2539
2540        list_for_each_entry_rcu(pt, &ptype_all, list) {
2541                if (i == pos)
2542                        return pt;
2543                ++i;
2544        }
2545
2546        for (t = 0; t < 16; t++) {
2547                list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2548                        if (i == pos)
2549                                return pt;
2550                        ++i;
2551                }
2552        }
2553        return NULL;
2554}
2555
2556static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2557{
2558        rcu_read_lock();
2559        return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2560}
2561
2562static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2563{
2564        struct packet_type *pt;
2565        struct list_head *nxt;
2566        int hash;
2567
2568        ++*pos;
2569        if (v == SEQ_START_TOKEN)
2570                return ptype_get_idx(0);
2571
2572        pt = v;
2573        nxt = pt->list.next;
2574        if (pt->type == htons(ETH_P_ALL)) {
2575                if (nxt != &ptype_all)
2576                        goto found;
2577                hash = 0;
2578                nxt = ptype_base[0].next;
2579        } else
2580                hash = ntohs(pt->type) & 15;
2581
2582        while (nxt == &ptype_base[hash]) {
2583                if (++hash >= 16)
2584                        return NULL;
2585                nxt = ptype_base[hash].next;
2586        }
2587found:
2588        return list_entry(nxt, struct packet_type, list);
2589}
2590
2591static void ptype_seq_stop(struct seq_file *seq, void *v)
2592{
2593        rcu_read_unlock();
2594}
2595
2596static void ptype_seq_decode(struct seq_file *seq, void *sym)
2597{
2598#ifdef CONFIG_KALLSYMS
2599        unsigned long offset = 0, symsize;
2600        const char *symname;
2601        char *modname;
2602        char namebuf[128];
2603
2604        symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2605                                  &modname, namebuf);
2606
2607        if (symname) {
2608                char *delim = ":";
2609
2610                if (!modname)
2611                        modname = delim = "";
2612                seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2613                           symname, offset);
2614                return;
2615        }
2616#endif
2617
2618        seq_printf(seq, "[%p]", sym);
2619}
2620
2621static int ptype_seq_show(struct seq_file *seq, void *v)
2622{
2623        struct packet_type *pt = v;
2624
2625        if (v == SEQ_START_TOKEN)
2626                seq_puts(seq, "Type Device      Function\n");
2627        else {
2628                if (pt->type == htons(ETH_P_ALL))
2629                        seq_puts(seq, "ALL ");
2630                else
2631                        seq_printf(seq, "%04x", ntohs(pt->type));
2632
2633                seq_printf(seq, " %-8s ",
2634                           pt->dev ? pt->dev->name : "");
2635                ptype_seq_decode(seq,  pt->func);
2636                seq_putc(seq, '\n');
2637        }
2638
2639        return 0;
2640}
2641
2642static const struct seq_operations ptype_seq_ops = {
2643        .start = ptype_seq_start,
2644        .next  = ptype_seq_next,
2645        .stop  = ptype_seq_stop,
2646        .show  = ptype_seq_show,
2647};
2648
2649static int ptype_seq_open(struct inode *inode, struct file *file)
2650{
2651        return seq_open(file, &ptype_seq_ops);
2652}
2653
2654static const struct file_operations ptype_seq_fops = {
2655        .owner   = THIS_MODULE,
2656        .open    = ptype_seq_open,
2657        .read    = seq_read,
2658        .llseek  = seq_lseek,
2659        .release = seq_release,
2660};
2661
2662
2663static int __net_init dev_proc_net_init(struct net *net)
2664{
2665        int rc = -ENOMEM;
2666
2667        if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
2668                goto out;
2669        if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
2670                goto out_dev;
2671        if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
2672                goto out_softnet;
2673
2674        if (wext_proc_init(net))
2675                goto out_ptype;
2676        rc = 0;
2677out:
2678        return rc;
2679out_ptype:
2680        proc_net_remove(net, "ptype");
2681out_softnet:
2682        proc_net_remove(net, "softnet_stat");
2683out_dev:
2684        proc_net_remove(net, "dev");
2685        goto out;
2686}
2687
2688static void __net_exit dev_proc_net_exit(struct net *net)
2689{
2690        wext_proc_exit(net);
2691
2692        proc_net_remove(net, "ptype");
2693        proc_net_remove(net, "softnet_stat");
2694        proc_net_remove(net, "dev");
2695}
2696
2697static struct pernet_operations __net_initdata dev_proc_ops = {
2698        .init = dev_proc_net_init,
2699        .exit = dev_proc_net_exit,
2700};
2701
2702static int __init dev_proc_init(void)
2703{
2704        return register_pernet_subsys(&dev_proc_ops);
2705}
2706#else
2707#define dev_proc_init() 0
2708#endif  /* CONFIG_PROC_FS */
2709
2710
2711/**
2712 *      netdev_set_master       -       set up master/slave pair
2713 *      @slave: slave device
2714 *      @master: new master device
2715 *
2716 *      Changes the master device of the slave. Pass %NULL to break the
2717 *      bonding. The caller must hold the RTNL semaphore. On a failure
2718 *      a negative errno code is returned. On success the reference counts
2719 *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2720 *      function returns zero.
2721 */
2722int netdev_set_master(struct net_device *slave, struct net_device *master)
2723{
2724        struct net_device *old = slave->master;
2725
2726        ASSERT_RTNL();
2727
2728        if (master) {
2729                if (old)
2730                        return -EBUSY;
2731                dev_hold(master);
2732        }
2733
2734        slave->master = master;
2735
2736        synchronize_net();
2737
2738        if (old)
2739                dev_put(old);
2740
2741        if (master)
2742                slave->flags |= IFF_SLAVE;
2743        else
2744                slave->flags &= ~IFF_SLAVE;
2745
2746        rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2747        return 0;
2748}
2749
2750static void __dev_set_promiscuity(struct net_device *dev, int inc)
2751{
2752        unsigned short old_flags = dev->flags;
2753
2754        ASSERT_RTNL();
2755
2756        if ((dev->promiscuity += inc) == 0)
2757                dev->flags &= ~IFF_PROMISC;
2758        else
2759                dev->flags |= IFF_PROMISC;
2760        if (dev->flags != old_flags) {
2761                printk(KERN_INFO "device %s %s promiscuous mode\n",
2762                       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2763                                                               "left");
2764                audit_log(current->audit_context, GFP_ATOMIC,
2765                        AUDIT_ANOM_PROMISCUOUS,
2766                        "dev=%s prom=%d old_prom=%d auid=%u",
2767                        dev->name, (dev->flags & IFF_PROMISC),
2768                        (old_flags & IFF_PROMISC),
2769                        audit_get_loginuid(current->audit_context));
2770
2771                if (dev->change_rx_flags)
2772                        dev->change_rx_flags(dev, IFF_PROMISC);
2773        }
2774}
2775
2776/**
2777 *      dev_set_promiscuity     - update promiscuity count on a device
2778 *      @dev: device
2779 *      @inc: modifier
2780 *
2781 *      Add or remove promiscuity from a device. While the count in the device
2782 *      remains above zero the interface remains promiscuous. Once it hits zero
2783 *      the device reverts back to normal filtering operation. A negative inc
2784 *      value is used to drop promiscuity on the device.
2785 */
2786void dev_set_promiscuity(struct net_device *dev, int inc)
2787{
2788        unsigned short old_flags = dev->flags;
2789
2790        __dev_set_promiscuity(dev, inc);
2791        if (dev->flags != old_flags)
2792                dev_set_rx_mode(dev);
2793}
2794
2795/**
2796 *      dev_set_allmulti        - update allmulti count on a device
2797 *      @dev: device
2798 *      @inc: modifier
2799 *
2800 *      Add or remove reception of all multicast frames to a device. While the
2801 *      count in the device remains above zero the interface remains listening
2802 *      to all interfaces. Once it hits zero the device reverts back to normal
2803 *      filtering operation. A negative @inc value is used to drop the counter
2804 *      when releasing a resource needing all multicasts.
2805 */
2806
2807void dev_set_allmulti(struct net_device *dev, int inc)
2808{
2809        unsigned short old_flags = dev->flags;
2810
2811        ASSERT_RTNL();
2812
2813        dev->flags |= IFF_ALLMULTI;
2814        if ((dev->allmulti += inc) == 0)
2815                dev->flags &= ~IFF_ALLMULTI;
2816        if (dev->flags ^ old_flags) {
2817                if (dev->change_rx_flags)
2818                        dev->change_rx_flags(dev, IFF_ALLMULTI);
2819                dev_set_rx_mode(dev);
2820        }
2821}
2822
2823/*
2824 *      Upload unicast and multicast address lists to device and
2825 *      configure RX filtering. When the device doesn't support unicast
2826 *      filtering it is put in promiscuous mode while unicast addresses
2827 *      are present.
2828 */
2829void __dev_set_rx_mode(struct net_device *dev)
2830{
2831        /* dev_open will call this function so the list will stay sane. */
2832        if (!(dev->flags&IFF_UP))
2833                return;
2834
2835        if (!netif_device_present(dev))
2836                return;
2837
2838        if (dev->set_rx_mode)
2839                dev->set_rx_mode(dev);
2840        else {
2841                /* Unicast addresses changes may only happen under the rtnl,
2842                 * therefore calling __dev_set_promiscuity here is safe.
2843                 */
2844                if (dev->uc_count > 0 && !dev->uc_promisc) {
2845                        __dev_set_promiscuity(dev, 1);
2846                        dev->uc_promisc = 1;
2847                } else if (dev->uc_count == 0 && dev->uc_promisc) {
2848                        __dev_set_promiscuity(dev, -1);
2849                        dev->uc_promisc = 0;
2850                }
2851
2852                if (dev->set_multicast_list)
2853                        dev->set_multicast_list(dev);
2854        }
2855}
2856
2857void dev_set_rx_mode(struct net_device *dev)
2858{
2859        netif_tx_lock_bh(dev);
2860        __dev_set_rx_mode(dev);
2861        netif_tx_unlock_bh(dev);
2862}
2863
2864int __dev_addr_delete(struct dev_addr_list **list, int *count,
2865                      void *addr, int alen, int glbl)
2866{
2867        struct dev_addr_list *da;
2868
2869        for (; (da = *list) != NULL; list = &da->next) {
2870                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2871                    alen == da->da_addrlen) {
2872                        if (glbl) {
2873                                int old_glbl = da->da_gusers;
2874                                da->da_gusers = 0;
2875                                if (old_glbl == 0)
2876                                        break;
2877                        }
2878                        if (--da->da_users)
2879                                return 0;
2880
2881                        *list = da->next;
2882                        kfree(da);
2883                        (*count)--;
2884                        return 0;
2885                }
2886        }
2887        return -ENOENT;
2888}
2889
2890int __dev_addr_add(struct dev_addr_list **list, int *count,
2891                   void *addr, int alen, int glbl)
2892{
2893        struct dev_addr_list *da;
2894
2895        for (da = *list; da != NULL; da = da->next) {
2896                if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2897                    da->da_addrlen == alen) {
2898                        if (glbl) {
2899                                int old_glbl = da->da_gusers;
2900                                da->da_gusers = 1;
2901                                if (old_glbl)
2902                                        return 0;
2903                        }
2904                        da->da_users++;
2905                        return 0;
2906                }
2907        }
2908
2909        da = kmalloc(sizeof(*da), GFP_ATOMIC);
2910        if (da == NULL)
2911                return -ENOMEM;
2912        memcpy(da->da_addr, addr, alen);
2913        da->da_addrlen = alen;
2914        da->da_users = 1;
2915        da->da_gusers = glbl ? 1 : 0;
2916        da->next = *list;
2917        *list = da;
2918        (*count)++;
2919        return 0;
2920}
2921
2922/**
2923 *      dev_unicast_delete      - Release secondary unicast address.
2924 *      @dev: device
2925 *      @addr: address to delete
2926 *      @alen: length of @addr
2927 *
2928 *      Release reference to a secondary unicast address and remove it
2929 *      from the device if the reference count drops to zero.
2930 *
2931 *      The caller must hold the rtnl_mutex.
2932 */
2933int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2934{
2935        int err;
2936
2937        ASSERT_RTNL();
2938
2939        netif_tx_lock_bh(dev);
2940        err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2941        if (!err)
2942                __dev_set_rx_mode(dev);
2943        netif_tx_unlock_bh(dev);
2944        return err;
2945}
2946EXPORT_SYMBOL(dev_unicast_delete);
2947
2948/**
2949 *      dev_unicast_add         - add a secondary unicast address
2950 *      @dev: device
2951 *      @addr: address to delete
2952 *      @alen: length of @addr
2953 *
2954 *      Add a secondary unicast address to the device or increase
2955 *      the reference count if it already exists.
2956 *
2957 *      The caller must hold the rtnl_mutex.
2958 */
2959int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2960{
2961        int err;
2962
2963        ASSERT_RTNL();
2964
2965        netif_tx_lock_bh(dev);
2966        err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2967        if (!err)
2968                __dev_set_rx_mode(dev);
2969        netif_tx_unlock_bh(dev);
2970        return err;
2971}
2972EXPORT_SYMBOL(dev_unicast_add);
2973
2974static void __dev_addr_discard(struct dev_addr_list **list)
2975{
2976        struct dev_addr_list *tmp;
2977
2978        while (*list != NULL) {
2979                tmp = *list;
2980                *list = tmp->next;
2981                if (tmp->da_users > tmp->da_gusers)
2982                        printk("__dev_addr_discard: address leakage! "
2983                               "da_users=%d\n", tmp->da_users);
2984                kfree(tmp);
2985        }
2986}
2987
2988static void dev_addr_discard(struct net_device *dev)
2989{
2990        netif_tx_lock_bh(dev);
2991
2992        __dev_addr_discard(&dev->uc_list);
2993        dev->uc_count = 0;
2994
2995        __dev_addr_discard(&dev->mc_list);
2996        dev->mc_count = 0;
2997
2998        netif_tx_unlock_bh(dev);
2999}
3000

3001unsigned dev_get_flags(const struct net_device *dev)
3002{
3003        unsigned flags;
3004
3005        flags = (dev->flags & ~(IFF_PROMISC |
3006                                IFF_ALLMULTI |
3007                                IFF_RUNNING |
3008                                IFF_LOWER_UP |
3009                                IFF_DORMANT)) |
3010                (dev->gflags & (IFF_PROMISC |
3011                                IFF_ALLMULTI));
3012
3013        if (netif_running(dev)) {
3014                if (netif_oper_up(dev))
3015                        flags |= IFF_RUNNING;
3016                if (netif_carrier_ok(dev))
3017                        flags |= IFF_LOWER_UP;
3018                if (netif_dormant(dev))
3019                        flags |= IFF_DORMANT;
3020        }
3021
3022        return flags;
3023}
3024
3025int dev_change_flags(struct net_device *dev, unsigned flags)
3026{
3027        int ret, changes;
3028        int old_flags = dev->flags;
3029
3030        ASSERT_RTNL();
3031
3032        /*
3033         *      Set the flags on our device.
3034         */
3035
3036        dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3037                               IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3038                               IFF_AUTOMEDIA)) |
3039                     (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3040                                    IFF_ALLMULTI));
3041
3042        /*
3043         *      Load in the correct multicast list now the flags have changed.
3044         */
3045
3046        if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3047                dev->change_rx_flags(dev, IFF_MULTICAST);
3048
3049        dev_set_rx_mode(dev);
3050
3051        /*
3052         *      Have we downed the interface. We handle IFF_UP ourselves
3053         *      according to user attempts to set it, rather than blindly
3054         *      setting it.
3055         */
3056
3057        ret = 0;
3058        if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
3059                ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3060
3061                if (!ret)
3062                        dev_set_rx_mode(dev);
3063        }
3064
3065        if (dev->flags & IFF_UP &&
3066            ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3067                                          IFF_VOLATILE)))
3068                call_netdevice_notifiers(NETDEV_CHANGE, dev);
3069
3070        if ((flags ^ dev->gflags) & IFF_PROMISC) {
3071                int inc = (flags & IFF_PROMISC) ? +1 : -1;
3072                dev->gflags ^= IFF_PROMISC;
3073                dev_set_promiscuity(dev, inc);
3074        }
3075
3076        /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3077           is important. Some (broken) drivers set IFF_PROMISC, when
3078           IFF_ALLMULTI is requested not asking us and not reporting.
3079         */
3080        if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3081                int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3082                dev->gflags ^= IFF_ALLMULTI;
3083                dev_set_allmulti(dev, inc);
3084        }
3085
3086        /* Exclude state transition flags, already notified */
3087        changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3088        if (changes)
3089                rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
3090
3091        return ret;
3092}
3093
3094int dev_set_mtu(struct net_device *dev, int new_mtu)
3095{
3096        int err;
3097
3098        if (new_mtu == dev->mtu)
3099                return 0;
3100
3101        /*      MTU must be positive.    */
3102        if (new_mtu < 0)
3103                return -EINVAL;
3104
3105        if (!netif_device_present(dev))
3106                return -ENODEV;
3107
3108        err = 0;
3109        if (dev->change_mtu)
3110                err = dev->change_mtu(dev, new_mtu);
3111        else
3112                dev->mtu = new_mtu;
3113        if (!err && dev->flags & IFF_UP)
3114                call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
3115        return err;
3116}
3117
3118int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3119{
3120        int err;
3121
3122        if (!dev->set_mac_address)
3123                return -EOPNOTSUPP;
3124        if (sa->sa_family != dev->type)
3125                return -EINVAL;
3126        if (!netif_device_present(dev))
3127                return -ENODEV;
3128        err = dev->set_mac_address(dev, sa);
3129        if (!err)
3130                call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3131        return err;
3132}
3133
3134/*
3135 *      Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
3136 */
3137static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
3138{
3139        int err;
3140        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3141
3142        if (!dev)
3143                return -ENODEV;
3144
3145        switch (cmd) {
3146                case SIOCGIFFLAGS:      /* Get interface flags */
3147                        ifr->ifr_flags = dev_get_flags(dev);
3148                        return 0;
3149
3150                case SIOCGIFMETRIC:     /* Get the metric on the interface
3151                                           (currently unused) */
3152                        ifr->ifr_metric = 0;
3153                        return 0;
3154
3155                case SIOCGIFMTU:        /* Get the MTU of a device */
3156                        ifr->ifr_mtu = dev->mtu;
3157                        return 0;
3158
3159                case SIOCGIFHWADDR:
3160                        if (!dev->addr_len)
3161                                memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3162                        else
3163                                memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3164                                       min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3165                        ifr->ifr_hwaddr.sa_family = dev->type;
3166                        return 0;
3167
3168                case SIOCGIFSLAVE:
3169                        err = -EINVAL;
3170                        break;
3171
3172                case SIOCGIFMAP:
3173                        ifr->ifr_map.mem_start = dev->mem_start;
3174                        ifr->ifr_map.mem_end   = dev->mem_end;
3175                        ifr->ifr_map.base_addr = dev->base_addr;
3176                        ifr->ifr_map.irq       = dev->irq;
3177                        ifr->ifr_map.dma       = dev->dma;
3178                        ifr->ifr_map.port      = dev->if_port;
3179                        return 0;
3180
3181                case SIOCGIFINDEX:
3182                        ifr->ifr_ifindex = dev->ifindex;
3183                        return 0;
3184
3185                case SIOCGIFTXQLEN:
3186                        ifr->ifr_qlen = dev->tx_queue_len;
3187                        return 0;
3188
3189                default:
3190                        /* dev_ioctl() should ensure this case
3191                         * is never reached
3192                         */
3193                        WARN_ON(1);
3194                        err = -EINVAL;
3195                        break;
3196
3197        }
3198        return err;
3199}
3200
3201/*
3202 *      Perform the SIOCxIFxxx calls, inside rtnl_lock()
3203 */
3204static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3205{
3206        int err;
3207        struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3208
3209        if (!dev)
3210                return -ENODEV;
3211
3212        switch (cmd) {
3213                case SIOCSIFFLAGS:      /* Set interface flags */
3214                        return dev_change_flags(dev, ifr->ifr_flags);
3215
3216                case SIOCSIFMETRIC:     /* Set the metric on the interface
3217                                           (currently unused) */
3218                        return -EOPNOTSUPP;
3219
3220                case SIOCSIFMTU:        /* Set the MTU of a device */
3221                        return dev_set_mtu(dev, ifr->ifr_mtu);
3222
3223                case SIOCSIFHWADDR:
3224                        return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3225
3226                case SIOCSIFHWBROADCAST:
3227                        if (ifr->ifr_hwaddr.sa_family != dev->type)
3228                                return -EINVAL;
3229                        memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3230                               min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3231                        call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
3232                        return 0;
3233
3234                case SIOCSIFMAP:
3235                        if (dev->set_config) {
3236                                if (!netif_device_present(dev))
3237                                        return -ENODEV;
3238                                return dev->set_config(dev, &ifr->ifr_map);
3239                        }
3240                        return -EOPNOTSUPP;
3241
3242                case SIOCADDMULTI:
3243                        if (!dev->set_multicast_list ||
3244                            ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3245                                return -EINVAL;
3246                        if (!netif_device_present(dev))
3247                                return -ENODEV;
3248                        return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3249                                          dev->addr_len, 1);
3250
3251                case SIOCDELMULTI:
3252                        if (!dev->set_multicast_list ||
3253                            ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3254                                return -EINVAL;
3255                        if (!netif_device_present(dev))
3256                                return -ENODEV;
3257                        return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3258                                             dev->addr_len, 1);
3259
3260                case SIOCSIFTXQLEN:
3261                        if (ifr->ifr_qlen < 0)
3262                                return -EINVAL;
3263                        dev->tx_queue_len = ifr->ifr_qlen;
3264                        return 0;
3265
3266                case SIOCSIFNAME:
3267                        ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3268                        return dev_change_name(dev, ifr->ifr_newname);
3269
3270                /*
3271                 *      Unknown or private ioctl
3272                 */
3273
3274                default:
3275                        if ((cmd >= SIOCDEVPRIVATE &&
3276                            cmd <= SIOCDEVPRIVATE + 15) ||
3277                            cmd == SIOCBONDENSLAVE ||
3278                            cmd == SIOCBONDRELEASE ||
3279                            cmd == SIOCBONDSETHWADDR ||
3280                            cmd == SIOCBONDSLAVEINFOQUERY ||
3281                            cmd == SIOCBONDINFOQUERY ||
3282                            cmd == SIOCBONDCHANGEACTIVE ||
3283                            cmd == SIOCGMIIPHY ||
3284                            cmd == SIOCGMIIREG ||
3285                            cmd == SIOCSMIIREG ||
3286                            cmd == SIOCBRADDIF ||
3287                            cmd == SIOCBRDELIF ||
3288                            cmd == SIOCWANDEV) {
3289                                err = -EOPNOTSUPP;
3290                                if (dev->do_ioctl) {
3291                                        if (netif_device_present(dev))
3292                                                err = dev->do_ioctl(dev, ifr,
3293                                                                    cmd);
3294                                        else
3295                                                err = -ENODEV;
3296                                }
3297                        } else
3298                                err = -EINVAL;
3299
3300        }
3301        return err;
3302}
3303
3304/*
3305 *      This function handles all "interface"-type I/O control requests. The actual
3306 *      'doing' part of this is dev_ifsioc above.
3307 */
3308
3309/**
3310 *      dev_ioctl       -       network device ioctl
3311 *      @net: the applicable net namespace
3312 *      @cmd: command to issue
3313 *      @arg: pointer to a struct ifreq in user space
3314 *
3315 *      Issue ioctl functions to devices. This is normally called by the
3316 *      user space syscall interfaces but can sometimes be useful for
3317 *      other purposes. The return value is the return from the syscall if
3318 *      positive or a negative errno code on error.
3319 */
3320
3321int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
3322{
3323        struct ifreq ifr;
3324        int ret;
3325        char *colon;
3326
3327        /* One special case: SIOCGIFCONF takes ifconf argument
3328           and requires shared lock, because it sleeps writing
3329           to user space.
3330         */
3331
3332        if (cmd == SIOCGIFCONF) {
3333                rtnl_lock();
3334                ret = dev_ifconf(net, (char __user *) arg);
3335                rtnl_unlock();
3336                return ret;
3337        }
3338        if (cmd == SIOCGIFNAME)
3339                return dev_ifname(net, (struct ifreq __user *)arg);
3340
3341        if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3342                return -EFAULT;
3343
3344        ifr.ifr_name[IFNAMSIZ-1] = 0;
3345
3346        colon = strchr(ifr.ifr_name, ':');
3347        if (colon)
3348                *colon = 0;
3349
3350        /*
3351         *      See which interface the caller is talking about.
3352         */
3353
3354        switch (cmd) {
3355                /*
3356                 *      These ioctl calls:
3357                 *      - can be done by all.
3358                 *      - atomic and do not require locking.
3359                 *      - return a value
3360                 */
3361                case SIOCGIFFLAGS:
3362                case SIOCGIFMETRIC:
3363                case SIOCGIFMTU:
3364                case SIOCGIFHWADDR:
3365                case SIOCGIFSLAVE:
3366                case SIOCGIFMAP:
3367                case SIOCGIFINDEX:
3368                case SIOCGIFTXQLEN:
3369                        dev_load(net, ifr.ifr_name);
3370                        read_lock(&dev_base_lock);
3371                        ret = dev_ifsioc_locked(net, &ifr, cmd);
3372                        read_unlock(&dev_base_lock);
3373                        if (!ret) {
3374                                if (colon)
3375                                        *colon = ':';
3376                                if (copy_to_user(arg, &ifr,
3377                                                 sizeof(struct ifreq)))
3378                                        ret = -EFAULT;
3379                        }
3380                        return ret;
3381
3382                case SIOCETHTOOL:
3383                        dev_load(net, ifr.ifr_name);
3384                        rtnl_lock();
3385                        ret = dev_ethtool(net, &ifr);
3386                        rtnl_unlock();
3387                        if (!ret) {
3388                                if (colon)
3389                                        *colon = ':';
3390                                if (copy_to_user(arg, &ifr,
3391                                                 sizeof(struct ifreq)))
3392                                        ret = -EFAULT;
3393                        }
3394                        return ret;
3395
3396                /*
3397                 *      These ioctl calls:
3398                 *      - require superuser power.
3399                 *      - require strict serialization.
3400                 *      - return a value
3401                 */
3402                case SIOCGMIIPHY:
3403                case SIOCGMIIREG:
3404                case SIOCSIFNAME:
3405                        if (!capable(CAP_NET_ADMIN))
3406                                return -EPERM;
3407                        dev_load(net, ifr.ifr_name);
3408                        rtnl_lock();
3409                        ret = dev_ifsioc(net, &ifr, cmd);
3410                        rtnl_unlock();
3411                        if (!ret) {
3412                                if (colon)
3413                                        *colon = ':';
3414                                if (copy_to_user(arg, &ifr,
3415                                                 sizeof(struct ifreq)))
3416                                        ret = -EFAULT;
3417                        }
3418                        return ret;
3419
3420                /*
3421                 *      These ioctl calls:
3422                 *      - require superuser power.
3423                 *      - require strict serialization.
3424                 *      - do not return a value
3425                 */
3426                case SIOCSIFFLAGS:
3427                case SIOCSIFMETRIC:
3428                case SIOCSIFMTU:
3429                case SIOCSIFMAP:
3430                case SIOCSIFHWADDR:
3431                case SIOCSIFSLAVE:
3432                case SIOCADDMULTI:
3433                case SIOCDELMULTI:
3434                case SIOCSIFHWBROADCAST:
3435                case SIOCSIFTXQLEN:
3436                case SIOCSMIIREG:
3437                case SIOCBONDENSLAVE:
3438                case SIOCBONDRELEASE:
3439                case SIOCBONDSETHWADDR:
3440                case SIOCBONDCHANGEACTIVE:
3441                case SIOCBRADDIF:
3442                case SIOCBRDELIF:
3443                        if (!capable(CAP_NET_ADMIN))
3444                                return -EPERM;
3445                        /* fall through */
3446                case SIOCBONDSLAVEINFOQUERY:
3447                case SIOCBONDINFOQUERY:
3448                        dev_load(net, ifr.ifr_name);
3449                        rtnl_lock();
3450                        ret = dev_ifsioc(net, &ifr, cmd);
3451                        rtnl_unlock();
3452                        return ret;
3453
3454                case SIOCGIFMEM:
3455                        /* Get the per device memory space. We can add this but
3456                         * currently do not support it */
3457                case SIOCSIFMEM:
3458                        /* Set the per device memory buffer space.
3459                         * Not applicable in our case */
3460                case SIOCSIFLINK:
3461                        return -EINVAL;
3462
3463                /*
3464                 *      Unknown or private ioctl.
3465                 */
3466                default:
3467                        if (cmd == SIOCWANDEV ||
3468                            (cmd >= SIOCDEVPRIVATE &&
3469                             cmd <= SIOCDEVPRIVATE + 15)) {
3470                                dev_load(net, ifr.ifr_name);
3471                                rtnl_lock();
3472                                ret = dev_ifsioc(net, &ifr, cmd);
3473                                rtnl_unlock();
3474                                if (!ret && copy_to_user(arg, &ifr,
3475                                                         sizeof(struct ifreq)))
3476                                        ret = -EFAULT;
3477                                return ret;
3478                        }
3479                        /* Take care of Wireless Extensions */
3480                        if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3481                                return wext_handle_ioctl(net, &ifr, cmd, arg);
3482                        return -EINVAL;
3483        }
3484}
3485
3486
3487/**
3488 *      dev_new_index   -       allocate an ifindex
3489 *      @net: the applicable net namespace
3490 *
3491 *      Returns a suitable unique value for a new device interface
3492 *      number.  The caller must hold the rtnl semaphore or the
3493 *      dev_base_lock to be sure it remains unique.
3494 */
3495static int dev_new_index(struct net *net)
3496{
3497        static int ifindex;
3498        for (;;) {
3499                if (++ifindex <= 0)
3500                        ifindex = 1;
3501                if (!__dev_get_by_index(net, ifindex))
3502                        return ifindex;
3503        }
3504}
3505
3506/* Delayed registration/unregisteration */
3507static DEFINE_SPINLOCK(net_todo_list_lock);
3508static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3509
3510static void net_set_todo(struct net_device *dev)
3511{
3512        spin_lock(&net_todo_list_lock);
3513        list_add_tail(&dev->todo_list, &net_todo_list);
3514        spin_unlock(&net_todo_list_lock);
3515}
3516
3517static void rollback_registered(struct net_device *dev)
3518{
3519        BUG_ON(dev_boot_phase);
3520        ASSERT_RTNL();
3521
3522        /* Some devices call without registering for initialization unwind. */
3523        if (dev->reg_state == NETREG_UNINITIALIZED) {
3524                printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3525                                  "was registered\n", dev->name, dev);
3526
3527                WARN_ON(1);
3528                return;
3529        }
3530
3531        BUG_ON(dev->reg_state != NETREG_REGISTERED);
3532
3533        /* If device is running, close it first. */
3534        dev_close(dev);
3535
3536        /* And unlink it from device chain. */
3537        unlist_netdevice(dev);
3538
3539        dev->reg_state = NETREG_UNREGISTERING;
3540
3541        synchronize_net();
3542
3543        /* Shutdown queueing discipline. */
3544        dev_shutdown(dev);
3545
3546
3547        /* Notify protocols, that we are about to destroy
3548           this device. They should clean all the things.
3549        */
3550        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3551
3552        /*
3553         *      Flush the unicast and multicast chains
3554         */
3555        dev_addr_discard(dev);
3556
3557        if (dev->uninit)
3558                dev->uninit(dev);
3559
3560        /* Notifier chain MUST detach us from master device. */
3561        BUG_TRAP(!dev->master);
3562
3563        /* Remove entries from kobject tree */
3564        netdev_unregister_kobject(dev);
3565
3566        synchronize_net();
3567
3568        dev_put(dev);
3569}
3570
3571/**
3572 *      register_netdevice      - register a network device
3573 *      @dev: device to register
3574 *
3575 *      Take a completed network device structure and add it to the kernel
3576 *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3577 *      chain. 0 is returned on success. A negative errno code is returned
3578 *      on a failure to set up the device, or if the name is a duplicate.
3579 *
3580 *      Callers must hold the rtnl semaphore. You may want
3581 *      register_netdev() instead of this.
3582 *
3583 *      BUGS:
3584 *      The locking appears insufficient to guarantee two parallel registers
3585 *      will not get the same name.
3586 */
3587
3588int register_netdevice(struct net_device *dev)
3589{
3590        struct hlist_head *head;
3591        struct hlist_node *p;
3592        int ret;
3593        struct net *net;
3594
3595        BUG_ON(dev_boot_phase);
3596        ASSERT_RTNL();
3597
3598        might_sleep();
3599
3600        /* When net_device's are persistent, this will be fatal. */
3601        BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3602        BUG_ON(!dev->nd_net);
3603        net = dev->nd_net;
3604
3605        spin_lock_init(&dev->queue_lock);
3606        spin_lock_init(&dev->_xmit_lock);
3607        netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3608        dev->xmit_lock_owner = -1;
3609        spin_lock_init(&dev->ingress_lock);
3610
3611        dev->iflink = -1;
3612
3613        /* Init, if this function is available */
3614        if (dev->init) {
3615                ret = dev->init(dev);
3616                if (ret) {
3617                        if (ret > 0)
3618                                ret = -EIO;
3619                        goto out;
3620                }
3621        }
3622
3623        if (!dev_valid_name(dev->name)) {
3624                ret = -EINVAL;
3625                goto err_uninit;
3626        }
3627
3628        dev->ifindex = dev_new_index(net);
3629        if (dev->iflink == -1)
3630                dev->iflink = dev->ifindex;
3631
3632        /* Check for existence of name */
3633        head = dev_name_hash(net, dev->name);
3634        hlist_for_each(p, head) {
3635                struct net_device *d
3636                        = hlist_entry(p, struct net_device, name_hlist);
3637                if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3638                        ret = -EEXIST;
3639                        goto err_uninit;
3640                }
3641        }
3642
3643        /* Fix illegal checksum combinations */
3644        if ((dev->features & NETIF_F_HW_CSUM) &&
3645            (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3646                printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3647                       dev->name);
3648                dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3649        }
3650
3651        if ((dev->features & NETIF_F_NO_CSUM) &&
3652            (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3653                printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3654                       dev->name);
3655                dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3656        }
3657
3658
3659        /* Fix illegal SG+CSUM combinations. */
3660        if ((dev->features & NETIF_F_SG) &&
3661            !(dev->features & NETIF_F_ALL_CSUM)) {
3662                printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3663                       dev->name);
3664                dev->features &= ~NETIF_F_SG;
3665        }
3666
3667        /* TSO requires that SG is present as well. */
3668        if ((dev->features & NETIF_F_TSO) &&
3669            !(dev->features & NETIF_F_SG)) {
3670                printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3671                       dev->name);
3672                dev->features &= ~NETIF_F_TSO;
3673        }
3674        if (dev->features & NETIF_F_UFO) {
3675                if (!(dev->features & NETIF_F_HW_CSUM)) {
3676                        printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3677                                        "NETIF_F_HW_CSUM feature.\n",
3678                                                        dev->name);
3679                        dev->features &= ~NETIF_F_UFO;
3680                }
3681                if (!(dev->features & NETIF_F_SG)) {
3682                        printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3683                                        "NETIF_F_SG feature.\n",
3684                                        dev->name);
3685                        dev->features &= ~NETIF_F_UFO;
3686                }
3687        }
3688
3689        ret = netdev_register_kobject(dev);
3690        if (ret)
3691                goto err_uninit;
3692        dev->reg_state = NETREG_REGISTERED;
3693
3694        /*
3695         *      Default initial state at registry is that the
3696         *      device is present.
3697         */
3698
3699        set_bit(__LINK_STATE_PRESENT, &dev->state);
3700
3701        dev_init_scheduler(dev);
3702        dev_hold(dev);
3703        list_netdevice(dev);
3704
3705        /* Notify protocols, that a new device appeared. */
3706        ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
3707        ret = notifier_to_errno(ret);
3708        if (ret) {
3709                rollback_registered(dev);
3710                dev->reg_state = NETREG_UNREGISTERED;
3711        }
3712
3713out:
3714        return ret;
3715
3716err_uninit:
3717        if (dev->uninit)
3718                dev->uninit(dev);
3719        goto out;
3720}
3721
3722/**
3723 *      register_netdev - register a network device
3724 *      @dev: device to register
3725 *
3726 *      Take a completed network device structure and add it to the kernel
3727 *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3728 *      chain. 0 is returned on success. A negative errno code is returned
3729 *      on a failure to set up the device, or if the name is a duplicate.
3730 *
3731 *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3732 *      and expands the device name if you passed a format string to
3733 *      alloc_netdev.
3734 */
3735int register_netdev(struct net_device *dev)
3736{
3737        int err;
3738
3739        rtnl_lock();
3740
3741        /*
3742         * If the name is a format string the caller wants us to do a
3743         * name allocation.
3744         */
3745        if (strchr(dev->name, '%')) {
3746                err = dev_alloc_name(dev, dev->name);
3747                if (err < 0)
3748                        goto out;
3749        }
3750
3751        err = register_netdevice(dev);
3752out:
3753        rtnl_unlock();
3754        return err;
3755}
3756EXPORT_SYMBOL(register_netdev);
3757
3758/*
3759 * netdev_wait_allrefs - wait until all references are gone.
3760 *
3761 * This is called when unregistering network devices.
3762 *
3763 * Any protocol or device that holds a reference should register
3764 * for netdevice notification, and cleanup and put back the
3765 * reference if they receive an UNREGISTER event.
3766 * We can get stuck here if buggy protocols don't correctly
3767 * call dev_put.
3768 */
3769static void netdev_wait_allrefs(struct net_device *dev)
3770{
3771        unsigned long rebroadcast_time, warning_time;
3772
3773        rebroadcast_time = warning_time = jiffies;
3774        while (atomic_read(&dev->refcnt) != 0) {
3775                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3776                        rtnl_lock();
3777
3778                        /* Rebroadcast unregister notification */
3779                        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3780
3781                        if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3782                                     &dev->state)) {
3783                                /* We must not have linkwatch events
3784                                 * pending on unregister. If this
3785                                 * happens, we simply run the queue
3786                                 * unscheduled, resulting in a noop
3787                                 * for this device.
3788                                 */
3789                                linkwatch_run_queue();
3790                        }
3791
3792                        __rtnl_unlock();
3793
3794                        rebroadcast_time = jiffies;
3795                }
3796
3797                msleep(250);
3798
3799                if (time_after(jiffies, warning_time + 10 * HZ)) {
3800                        printk(KERN_EMERG "unregister_netdevice: "
3801                               "waiting for %s to become free. Usage "
3802                               "count = %d\n",
3803                               dev->name, atomic_read(&dev->refcnt));
3804                        warning_time = jiffies;
3805                }
3806        }
3807}
3808
3809/* The sequence is:
3810 *
3811 *      rtnl_lock();
3812 *      ...
3813 *      register_netdevice(x1);
3814 *      register_netdevice(x2);
3815 *      ...
3816 *      unregister_netdevice(y1);
3817 *      unregister_netdevice(y2);
3818 *      ...
3819 *      rtnl_unlock();
3820 *      free_netdev(y1);
3821 *      free_netdev(y2);
3822 *
3823 * We are invoked by rtnl_unlock() after it drops the semaphore.
3824 * This allows us to deal with problems:
3825 * 1) We can delete sysfs objects which invoke hotplug
3826 *    without deadlocking with linkwatch via keventd.
3827 * 2) Since we run with the RTNL semaphore not held, we can sleep
3828 *    safely in order to wait for the netdev refcnt to drop to zero.
3829 */
3830static DEFINE_MUTEX(net_todo_run_mutex);
3831void netdev_run_todo(void)
3832{
3833        struct list_head list;
3834
3835        /* Need to guard against multiple cpu's getting out of order. */
3836        mutex_lock(&net_todo_run_mutex);
3837
3838        /* Not safe to do outside the semaphore.  We must not return
3839         * until all unregister events invoked by the local processor
3840         * have been completed (either by this todo run, or one on
3841         * another cpu).
3842         */
3843        if (list_empty(&net_todo_list))
3844                goto out;
3845
3846        /* Snapshot list, allow later requests */
3847        spin_lock(&net_todo_list_lock);
3848        list_replace_init(&net_todo_list, &list);
3849        spin_unlock(&net_todo_list_lock);
3850
3851        while (!list_empty(&list)) {
3852                struct net_device *dev
3853                        = list_entry(list.next, struct net_device, todo_list);
3854                list_del(&dev->todo_list);
3855
3856                if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3857                        printk(KERN_ERR "network todo '%s' but state %d\n",
3858                               dev->name, dev->reg_state);
3859                        dump_stack();
3860                        continue;
3861                }
3862
3863                dev->reg_state = NETREG_UNREGISTERED;
3864
3865                netdev_wait_allrefs(dev);
3866
3867                /* paranoia */
3868                BUG_ON(atomic_read(&dev->refcnt));
3869                BUG_TRAP(!dev->ip_ptr);
3870                BUG_TRAP(!dev->ip6_ptr);
3871                BUG_TRAP(!dev->dn_ptr);
3872
3873                if (dev->destructor)
3874                        dev->destructor(dev);
3875
3876                /* Free network device */
3877                kobject_put(&dev->dev.kobj);
3878        }
3879
3880out:
3881        mutex_unlock(&net_todo_run_mutex);
3882}
3883
3884static struct net_device_stats *internal_stats(struct net_device *dev)
3885{
3886        return &dev->stats;
3887}
3888
3889/**
3890 *      alloc_netdev_mq - allocate network device
3891 *      @sizeof_priv:   size of private data to allocate space for
3892 *      @name:          device name format string
3893 *      @setup:         callback to initialize device
3894 *      @queue_count:   the number of subqueues to allocate
3895 *
3896 *      Allocates a struct net_device with private data area for driver use
3897 *      and performs basic initialization.  Also allocates subquue structs
3898 *      for each queue on the device at the end of the netdevice.
3899 */
3900struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3901                void (*setup)(struct net_device *), unsigned int queue_count)
3902{
3903        void *p;
3904        struct net_device *dev;
3905        int alloc_size;
3906
3907        BUG_ON(strlen(name) >= sizeof(dev->name));
3908
3909        /* ensure 32-byte alignment of both the device and private area */
3910        alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3911                     (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3912                     ~NETDEV_ALIGN_CONST;
3913        alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3914
3915        p = kzalloc(alloc_size, GFP_KERNEL);
3916        if (!p) {
3917                printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3918                return NULL;
3919        }
3920
3921        dev = (struct net_device *)
3922                (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3923        dev->padded = (char *)dev - (char *)p;
3924        dev->nd_net = &init_net;
3925
3926        if (sizeof_priv) {
3927                dev->priv = ((char *)dev +
3928                             ((sizeof(struct net_device) +
3929                               (sizeof(struct net_device_subqueue) *
3930                                (queue_count - 1)) + NETDEV_ALIGN_CONST)
3931                              & ~NETDEV_ALIGN_CONST));
3932        }
3933
3934        dev->egress_subqueue_count = queue_count;
3935
3936        dev->get_stats = internal_stats;
3937        netpoll_netdev_init(dev);
3938        setup(dev);
3939        strcpy(dev->name, name);
3940        return dev;
3941}
3942EXPORT_SYMBOL(alloc_netdev_mq);
3943
3944/**
3945 *      free_netdev - free network device
3946 *      @dev: device
3947 *
3948 *      This function does the last stage of destroying an allocated device
3949 *      interface. The reference to the device object is released.
3950 *      If this is the last reference then it will be freed.
3951 */
3952void free_netdev(struct net_device *dev)
3953{
3954        /*  Compatibility with error handling in drivers */
3955        if (dev->reg_state == NETREG_UNINITIALIZED) {
3956                kfree((char *)dev - dev->padded);
3957                return;
3958        }
3959
3960        BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3961        dev->reg_state = NETREG_RELEASED;
3962
3963        /* will free via device release */
3964        put_device(&dev->dev);
3965}
3966
3967/* Synchronize with packet receive processing. */
3968void synchronize_net(void)
3969{
3970        might_sleep();
3971        synchronize_rcu();
3972}
3973
3974/**
3975 *      unregister_netdevice - remove device from the kernel
3976 *      @dev: device
3977 *
3978 *      This function shuts down a device interface and removes it
3979 *      from the kernel tables.
3980 *
3981 *      Callers must hold the rtnl semaphore.  You may want
3982 *      unregister_netdev() instead of this.
3983 */
3984
3985void unregister_netdevice(struct net_device *dev)
3986{
3987        rollback_registered(dev);
3988        /* Finish processing unregister after unlock */
3989        net_set_todo(dev);
3990}
3991
3992/**
3993 *      unregister_netdev - remove device from the kernel
3994 *      @dev: device
3995 *
3996 *      This function shuts down a device interface and removes it
3997 *      from the kernel tables.
3998 *
3999 *      This is just a wrapper for unregister_netdevice that takes
4000 *      the rtnl semaphore.  In general you want to use this and not

4001 *      unregister_netdevice.
4002 */
4003void unregister_netdev(struct net_device *dev)
4004{
4005        rtnl_lock();
4006        unregister_netdevice(dev);
4007        rtnl_unlock();
4008}
4009
4010EXPORT_SYMBOL(unregister_netdev);
4011
4012/**
4013 *      dev_change_net_namespace - move device to different nethost namespace
4014 *      @dev: device
4015 *      @net: network namespace
4016 *      @pat: If not NULL name pattern to try if the current device name
4017 *            is already taken in the destination network namespace.
4018 *
4019 *      This function shuts down a device interface and moves it
4020 *      to a new network namespace. On success 0 is returned, on
4021 *      a failure a netagive errno code is returned.
4022 *
4023 *      Callers must hold the rtnl semaphore.
4024 */
4025
4026int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4027{
4028        char buf[IFNAMSIZ];
4029        const char *destname;
4030        int err;
4031
4032        ASSERT_RTNL();
4033
4034        /* Don't allow namespace local devices to be moved. */
4035        err = -EINVAL;
4036        if (dev->features & NETIF_F_NETNS_LOCAL)
4037                goto out;
4038
4039        /* Ensure the device has been registrered */
4040        err = -EINVAL;
4041        if (dev->reg_state != NETREG_REGISTERED)
4042                goto out;
4043
4044        /* Get out if there is nothing todo */
4045        err = 0;
4046        if (dev->nd_net == net)
4047                goto out;
4048
4049        /* Pick the destination device name, and ensure
4050         * we can use it in the destination network namespace.
4051         */
4052        err = -EEXIST;
4053        destname = dev->name;
4054        if (__dev_get_by_name(net, destname)) {
4055                /* We get here if we can't use the current device name */
4056                if (!pat)
4057                        goto out;
4058                if (!dev_valid_name(pat))
4059                        goto out;
4060                if (strchr(pat, '%')) {
4061                        if (__dev_alloc_name(net, pat, buf) < 0)
4062                                goto out;
4063                        destname = buf;
4064                } else
4065                        destname = pat;
4066                if (__dev_get_by_name(net, destname))
4067                        goto out;
4068        }
4069
4070        /*
4071         * And now a mini version of register_netdevice unregister_netdevice.
4072         */
4073
4074        /* If device is running close it first. */
4075        dev_close(dev);
4076
4077        /* And unlink it from device chain */
4078        err = -ENODEV;
4079        unlist_netdevice(dev);
4080
4081        synchronize_net();
4082
4083        /* Shutdown queueing discipline. */
4084        dev_shutdown(dev);
4085
4086        /* Notify protocols, that we are about to destroy
4087           this device. They should clean all the things.
4088        */
4089        call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4090
4091        /*
4092         *      Flush the unicast and multicast chains
4093         */
4094        dev_addr_discard(dev);
4095
4096        /* Actually switch the network namespace */
4097        dev->nd_net = net;
4098
4099        /* Assign the new device name */
4100        if (destname != dev->name)
4101                strcpy(dev->name, destname);
4102
4103        /* If there is an ifindex conflict assign a new one */
4104        if (__dev_get_by_index(net, dev->ifindex)) {
4105                int iflink = (dev->iflink == dev->ifindex);
4106                dev->ifindex = dev_new_index(net);
4107                if (iflink)
4108                        dev->iflink = dev->ifindex;
4109        }
4110
4111        /* Fixup kobjects */
4112        err = device_rename(&dev->dev, dev->name);
4113        WARN_ON(err);
4114
4115        /* Add the device back in the hashes */
4116        list_netdevice(dev);
4117
4118        /* Notify protocols, that a new device appeared. */
4119        call_netdevice_notifiers(NETDEV_REGISTER, dev);
4120
4121        synchronize_net();
4122        err = 0;
4123out:
4124        return err;
4125}
4126
4127static int dev_cpu_callback(struct notifier_block *nfb,
4128                            unsigned long action,
4129                            void *ocpu)
4130{
4131        struct sk_buff **list_skb;
4132        struct net_device **list_net;
4133        struct sk_buff *skb;
4134        unsigned int cpu, oldcpu = (unsigned long)ocpu;
4135        struct softnet_data *sd, *oldsd;
4136
4137        if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
4138                return NOTIFY_OK;
4139
4140        local_irq_disable();
4141        cpu = smp_processor_id();
4142        sd = &per_cpu(softnet_data, cpu);
4143        oldsd = &per_cpu(softnet_data, oldcpu);
4144
4145        /* Find end of our completion_queue. */
4146        list_skb = &sd->completion_queue;
4147        while (*list_skb)
4148                list_skb = &(*list_skb)->next;
4149        /* Append completion queue from offline CPU. */
4150        *list_skb = oldsd->completion_queue;
4151        oldsd->completion_queue = NULL;
4152
4153        /* Find end of our output_queue. */
4154        list_net = &sd->output_queue;
4155        while (*list_net)
4156                list_net = &(*list_net)->next_sched;
4157        /* Append output queue from offline CPU. */
4158        *list_net = oldsd->output_queue;
4159        oldsd->output_queue = NULL;
4160
4161        raise_softirq_irqoff(NET_TX_SOFTIRQ);
4162        local_irq_enable();
4163
4164        /* Process offline CPU's input_pkt_queue */
4165        while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4166                netif_rx(skb);
4167
4168        return NOTIFY_OK;
4169}
4170
4171#ifdef CONFIG_NET_DMA
4172/**
4173 * net_dma_rebalance - try to maintain one DMA channel per CPU
4174 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4175 *
4176 * This is called when the number of channels allocated to the net_dma client
4177 * changes.  The net_dma client tries to have one DMA channel per CPU.
4178 */
4179
4180static void net_dma_rebalance(struct net_dma *net_dma)
4181{
4182        unsigned int cpu, i, n, chan_idx;
4183        struct dma_chan *chan;
4184
4185        if (cpus_empty(net_dma->channel_mask)) {
4186                for_each_online_cpu(cpu)
4187                        rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
4188                return;
4189        }
4190
4191        i = 0;
4192        cpu = first_cpu(cpu_online_map);
4193
4194        for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4195                chan = net_dma->channels[chan_idx];
4196
4197                n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4198                   + (i < (num_online_cpus() %
4199                        cpus_weight(net_dma->channel_mask)) ? 1 : 0));
4200
4201                while(n) {
4202                        per_cpu(softnet_data, cpu).net_dma = chan;
4203                        cpu = next_cpu(cpu, cpu_online_map);
4204                        n--;
4205                }
4206                i++;
4207        }
4208}
4209
4210/**
4211 * netdev_dma_event - event callback for the net_dma_client
4212 * @client: should always be net_dma_client
4213 * @chan: DMA channel for the event
4214 * @state: DMA state to be handled
4215 */
4216static enum dma_state_client
4217netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4218        enum dma_state state)
4219{
4220        int i, found = 0, pos = -1;
4221        struct net_dma *net_dma =
4222                container_of(client, struct net_dma, client);
4223        enum dma_state_client ack = DMA_DUP; /* default: take no action */
4224
4225        spin_lock(&net_dma->lock);
4226        switch (state) {
4227        case DMA_RESOURCE_AVAILABLE:
4228                for (i = 0; i < NR_CPUS; i++)
4229                        if (net_dma->channels[i] == chan) {
4230                                found = 1;
4231                                break;
4232                        } else if (net_dma->channels[i] == NULL && pos < 0)
4233                                pos = i;
4234
4235                if (!found && pos >= 0) {
4236                        ack = DMA_ACK;
4237                        net_dma->channels[pos] = chan;
4238                        cpu_set(pos, net_dma->channel_mask);
4239                        net_dma_rebalance(net_dma);
4240                }
4241                break;
4242        case DMA_RESOURCE_REMOVED:
4243                for (i = 0; i < NR_CPUS; i++)
4244                        if (net_dma->channels[i] == chan) {
4245                                found = 1;
4246                                pos = i;
4247                                break;
4248                        }
4249
4250                if (found) {
4251                        ack = DMA_ACK;
4252                        cpu_clear(pos, net_dma->channel_mask);
4253                        net_dma->channels[i] = NULL;
4254                        net_dma_rebalance(net_dma);
4255                }
4256                break;
4257        default:
4258                break;
4259        }
4260        spin_unlock(&net_dma->lock);
4261
4262        return ack;
4263}
4264
4265/**
4266 * netdev_dma_regiser - register the networking subsystem as a DMA client
4267 */
4268static int __init netdev_dma_register(void)
4269{
4270        spin_lock_init(&net_dma.lock);
4271        dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4272        dma_async_client_register(&net_dma.client);
4273        dma_async_client_chan_request(&net_dma.client);
4274        return 0;
4275}
4276
4277#else
4278static int __init netdev_dma_register(void) { return -ENODEV; }
4279#endif /* CONFIG_NET_DMA */
4280
4281/**
4282 *      netdev_compute_feature - compute conjunction of two feature sets
4283 *      @all: first feature set
4284 *      @one: second feature set
4285 *
4286 *      Computes a new feature set after adding a device with feature set
4287 *      @one to the master device with current feature set @all.  Returns
4288 *      the new feature set.
4289 */
4290int netdev_compute_features(unsigned long all, unsigned long one)
4291{
4292        /* if device needs checksumming, downgrade to hw checksumming */
4293        if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4294                all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4295
4296        /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4297        if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4298                all ^= NETIF_F_HW_CSUM
4299                        | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4300
4301        if (one & NETIF_F_GSO)
4302                one |= NETIF_F_GSO_SOFTWARE;
4303        one |= NETIF_F_GSO;
4304
4305        /* If even one device supports robust GSO, enable it for all. */
4306        if (one & NETIF_F_GSO_ROBUST)
4307                all |= NETIF_F_GSO_ROBUST;
4308
4309        all &= one | NETIF_F_LLTX;
4310
4311        if (!(all & NETIF_F_ALL_CSUM))
4312                all &= ~NETIF_F_SG;
4313        if (!(all & NETIF_F_SG))
4314                all &= ~NETIF_F_GSO_MASK;
4315
4316        return all;
4317}
4318EXPORT_SYMBOL(netdev_compute_features);
4319
4320static struct hlist_head *netdev_create_hash(void)
4321{
4322        int i;
4323        struct hlist_head *hash;
4324
4325        hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4326        if (hash != NULL)
4327                for (i = 0; i < NETDEV_HASHENTRIES; i++)
4328                        INIT_HLIST_HEAD(&hash[i]);
4329
4330        return hash;
4331}
4332
4333/* Initialize per network namespace state */
4334static int __net_init netdev_init(struct net *net)
4335{
4336        INIT_LIST_HEAD(&net->dev_base_head);
4337
4338        net->dev_name_head = netdev_create_hash();
4339        if (net->dev_name_head == NULL)
4340                goto err_name;
4341
4342        net->dev_index_head = netdev_create_hash();
4343        if (net->dev_index_head == NULL)
4344                goto err_idx;
4345
4346        return 0;
4347
4348err_idx:
4349        kfree(net->dev_name_head);
4350err_name:
4351        return -ENOMEM;
4352}
4353
4354static void __net_exit netdev_exit(struct net *net)
4355{
4356        kfree(net->dev_name_head);
4357        kfree(net->dev_index_head);
4358}
4359
4360static struct pernet_operations __net_initdata netdev_net_ops = {
4361        .init = netdev_init,
4362        .exit = netdev_exit,
4363};
4364
4365static void __net_exit default_device_exit(struct net *net)
4366{
4367        struct net_device *dev, *next;
4368        /*
4369         * Push all migratable of the network devices back to the
4370         * initial network namespace
4371         */
4372        rtnl_lock();
4373        for_each_netdev_safe(net, dev, next) {
4374                int err;
4375
4376                /* Ignore unmoveable devices (i.e. loopback) */
4377                if (dev->features & NETIF_F_NETNS_LOCAL)
4378                        continue;
4379
4380                /* Push remaing network devices to init_net */
4381                err = dev_change_net_namespace(dev, &init_net, "dev%d");
4382                if (err) {
4383                        printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4384                                __func__, dev->name, err);
4385                        unregister_netdevice(dev);
4386                }
4387        }
4388        rtnl_unlock();
4389}
4390
4391static struct pernet_operations __net_initdata default_device_ops = {
4392        .exit = default_device_exit,
4393};
4394
4395/*
4396 *      Initialize the DEV module. At boot time this walks the device list and
4397 *      unhooks any devices that fail to initialise (normally hardware not
4398 *      present) and leaves us with a valid list of present and active devices.
4399 *
4400 */
4401
4402/*
4403 *       This is called single threaded during boot, so no need
4404 *       to take the rtnl semaphore.
4405 */
4406static int __init net_dev_init(void)
4407{
4408        int i, rc = -ENOMEM;
4409
4410        BUG_ON(!dev_boot_phase);
4411
4412        if (dev_proc_init())
4413                goto out;
4414
4415        if (netdev_kobject_init())
4416                goto out;
4417
4418        INIT_LIST_HEAD(&ptype_all);
4419        for (i = 0; i < 16; i++)
4420                INIT_LIST_HEAD(&ptype_base[i]);
4421
4422        if (register_pernet_subsys(&netdev_net_ops))
4423                goto out;
4424
4425        if (register_pernet_device(&default_device_ops))
4426                goto out;
4427
4428        /*
4429         *      Initialise the packet receive queues.
4430         */
4431
4432        for_each_possible_cpu(i) {
4433                struct softnet_data *queue;
4434
4435                queue = &per_cpu(softnet_data, i);
4436                skb_queue_head_init(&queue->input_pkt_queue);
4437                queue->completion_queue = NULL;
4438                INIT_LIST_HEAD(&queue->poll_list);
4439
4440                queue->backlog.poll = process_backlog;
4441                queue->backlog.weight = weight_p;
4442        }
4443
4444        netdev_dma_register();
4445
4446        dev_boot_phase = 0;
4447
4448        open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4449        open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4450
4451        hotcpu_notifier(dev_cpu_callback, 0);
4452        dst_init();
4453        dev_mcast_init();
4454        rc = 0;
4455out:
4456        return rc;
4457}
4458
4459subsys_initcall(net_dev_init);
4460
4461EXPORT_SYMBOL(__dev_get_by_index);
4462EXPORT_SYMBOL(__dev_get_by_name);
4463EXPORT_SYMBOL(__dev_remove_pack);
4464EXPORT_SYMBOL(dev_valid_name);
4465EXPORT_SYMBOL(dev_add_pack);
4466EXPORT_SYMBOL(dev_alloc_name);
4467EXPORT_SYMBOL(dev_close);
4468EXPORT_SYMBOL(dev_get_by_flags);
4469EXPORT_SYMBOL(dev_get_by_index);
4470EXPORT_SYMBOL(dev_get_by_name);
4471EXPORT_SYMBOL(dev_open);
4472EXPORT_SYMBOL(dev_queue_xmit);
4473EXPORT_SYMBOL(dev_remove_pack);
4474EXPORT_SYMBOL(dev_set_allmulti);
4475EXPORT_SYMBOL(dev_set_promiscuity);
4476EXPORT_SYMBOL(dev_change_flags);
4477EXPORT_SYMBOL(dev_set_mtu);
4478EXPORT_SYMBOL(dev_set_mac_address);
4479EXPORT_SYMBOL(free_netdev);
4480EXPORT_SYMBOL(netdev_boot_setup_check);
4481EXPORT_SYMBOL(netdev_set_master);
4482EXPORT_SYMBOL(netdev_state_change);
4483EXPORT_SYMBOL(netif_receive_skb);
4484EXPORT_SYMBOL(netif_rx);
4485EXPORT_SYMBOL(register_gifconf);
4486EXPORT_SYMBOL(register_netdevice);
4487EXPORT_SYMBOL(register_netdevice_notifier);
4488EXPORT_SYMBOL(skb_checksum_help);
4489EXPORT_SYMBOL(synchronize_net);
4490EXPORT_SYMBOL(unregister_netdevice);
4491EXPORT_SYMBOL(unregister_netdevice_notifier);
4492EXPORT_SYMBOL(net_enable_timestamp);
4493EXPORT_SYMBOL(net_disable_timestamp);
4494EXPORT_SYMBOL(dev_get_flags);
4495
4496#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4497EXPORT_SYMBOL(br_handle_frame_hook);
4498EXPORT_SYMBOL(br_fdb_get_hook);
4499EXPORT_SYMBOL(br_fdb_put_hook);
4500#endif
4501
4502#ifdef CONFIG_KMOD
4503EXPORT_SYMBOL(dev_load);
4504#endif
4505
4506EXPORT_PER_CPU_SYMBOL(softnet_data);
4507