linux/drivers/net/bonding/bond_main.c
<<
>>
Prefs
   1/*
   2 * originally based on the dummy device.
   3 *
   4 * Copyright 1999, Thomas Davis, tadavis@lbl.gov.
   5 * Licensed under the GPL. Based on dummy.c, and eql.c devices.
   6 *
   7 * bonding.c: an Ethernet Bonding driver
   8 *
   9 * This is useful to talk to a Cisco EtherChannel compatible equipment:
  10 *      Cisco 5500
  11 *      Sun Trunking (Solaris)
  12 *      Alteon AceDirector Trunks
  13 *      Linux Bonding
  14 *      and probably many L2 switches ...
  15 *
  16 * How it works:
  17 *    ifconfig bond0 ipaddress netmask up
  18 *      will setup a network device, with an ip address.  No mac address
  19 *      will be assigned at this time.  The hw mac address will come from
  20 *      the first slave bonded to the channel.  All slaves will then use
  21 *      this hw mac address.
  22 *
  23 *    ifconfig bond0 down
  24 *         will release all slaves, marking them as down.
  25 *
  26 *    ifenslave bond0 eth0
  27 *      will attach eth0 to bond0 as a slave.  eth0 hw mac address will either
  28 *      a: be used as initial mac address
  29 *      b: if a hw mac address already is there, eth0's hw mac address
  30 *         will then be set from bond0.
  31 *
  32 */
  33
  34#include <linux/kernel.h>
  35#include <linux/module.h>
  36#include <linux/types.h>
  37#include <linux/fcntl.h>
  38#include <linux/interrupt.h>
  39#include <linux/ptrace.h>
  40#include <linux/ioport.h>
  41#include <linux/in.h>
  42#include <net/ip.h>
  43#include <linux/ip.h>
  44#include <linux/tcp.h>
  45#include <linux/udp.h>
  46#include <linux/slab.h>
  47#include <linux/string.h>
  48#include <linux/init.h>
  49#include <linux/timer.h>
  50#include <linux/socket.h>
  51#include <linux/ctype.h>
  52#include <linux/inet.h>
  53#include <linux/bitops.h>
  54#include <linux/io.h>
  55#include <asm/dma.h>
  56#include <linux/uaccess.h>
  57#include <linux/errno.h>
  58#include <linux/netdevice.h>
  59#include <linux/inetdevice.h>
  60#include <linux/igmp.h>
  61#include <linux/etherdevice.h>
  62#include <linux/skbuff.h>
  63#include <net/sock.h>
  64#include <linux/rtnetlink.h>
  65#include <linux/smp.h>
  66#include <linux/if_ether.h>
  67#include <net/arp.h>
  68#include <linux/mii.h>
  69#include <linux/ethtool.h>
  70#include <linux/if_vlan.h>
  71#include <linux/if_bonding.h>
  72#include <linux/jiffies.h>
  73#include <linux/preempt.h>
  74#include <net/route.h>
  75#include <net/net_namespace.h>
  76#include <net/netns/generic.h>
  77#include <net/pkt_sched.h>
  78#include <linux/rculist.h>
  79#include <net/flow_dissector.h>
  80#include <net/switchdev.h>
  81#include <net/bonding.h>
  82#include <net/bond_3ad.h>
  83#include <net/bond_alb.h>
  84
  85#include "bonding_priv.h"
  86
  87/*---------------------------- Module parameters ----------------------------*/
  88
  89/* monitor all links that often (in milliseconds). <=0 disables monitoring */
  90
  91static int max_bonds    = BOND_DEFAULT_MAX_BONDS;
  92static int tx_queues    = BOND_DEFAULT_TX_QUEUES;
  93static int num_peer_notif = 1;
  94static int miimon;
  95static int updelay;
  96static int downdelay;
  97static int use_carrier  = 1;
  98static char *mode;
  99static char *primary;
 100static char *primary_reselect;
 101static char *lacp_rate;
 102static int min_links;
 103static char *ad_select;
 104static char *xmit_hash_policy;
 105static int arp_interval;
 106static char *arp_ip_target[BOND_MAX_ARP_TARGETS];
 107static char *arp_validate;
 108static char *arp_all_targets;
 109static char *fail_over_mac;
 110static int all_slaves_active;
 111static struct bond_params bonding_defaults;
 112static int resend_igmp = BOND_DEFAULT_RESEND_IGMP;
 113static int packets_per_slave = 1;
 114static int lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;
 115
 116module_param(max_bonds, int, 0);
 117MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
 118module_param(tx_queues, int, 0);
 119MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
 120module_param_named(num_grat_arp, num_peer_notif, int, 0644);
 121MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on "
 122                               "failover event (alias of num_unsol_na)");
 123module_param_named(num_unsol_na, num_peer_notif, int, 0644);
 124MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on "
 125                               "failover event (alias of num_grat_arp)");
 126module_param(miimon, int, 0);
 127MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
 128module_param(updelay, int, 0);
 129MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
 130module_param(downdelay, int, 0);
 131MODULE_PARM_DESC(downdelay, "Delay before considering link down, "
 132                            "in milliseconds");
 133module_param(use_carrier, int, 0);
 134MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; "
 135                              "0 for off, 1 for on (default)");
 136module_param(mode, charp, 0);
 137MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, "
 138                       "1 for active-backup, 2 for balance-xor, "
 139                       "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, "
 140                       "6 for balance-alb");
 141module_param(primary, charp, 0);
 142MODULE_PARM_DESC(primary, "Primary network device to use");
 143module_param(primary_reselect, charp, 0);
 144MODULE_PARM_DESC(primary_reselect, "Reselect primary slave "
 145                                   "once it comes up; "
 146                                   "0 for always (default), "
 147                                   "1 for only if speed of primary is "
 148                                   "better, "
 149                                   "2 for only on active slave "
 150                                   "failure");
 151module_param(lacp_rate, charp, 0);
 152MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; "
 153                            "0 for slow, 1 for fast");
 154module_param(ad_select, charp, 0);
 155MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; "
 156                            "0 for stable (default), 1 for bandwidth, "
 157                            "2 for count");
 158module_param(min_links, int, 0);
 159MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier");
 160
 161module_param(xmit_hash_policy, charp, 0);
 162MODULE_PARM_DESC(xmit_hash_policy, "balance-xor and 802.3ad hashing method; "
 163                                   "0 for layer 2 (default), 1 for layer 3+4, "
 164                                   "2 for layer 2+3, 3 for encap layer 2+3, "
 165                                   "4 for encap layer 3+4");
 166module_param(arp_interval, int, 0);
 167MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
 168module_param_array(arp_ip_target, charp, NULL, 0);
 169MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
 170module_param(arp_validate, charp, 0);
 171MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; "
 172                               "0 for none (default), 1 for active, "
 173                               "2 for backup, 3 for all");
 174module_param(arp_all_targets, charp, 0);
 175MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all");
 176module_param(fail_over_mac, charp, 0);
 177MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to "
 178                                "the same MAC; 0 for none (default), "
 179                                "1 for active, 2 for follow");
 180module_param(all_slaves_active, int, 0);
 181MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface "
 182                                     "by setting active flag for all slaves; "
 183                                     "0 for never (default), 1 for always.");
 184module_param(resend_igmp, int, 0);
 185MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on "
 186                              "link failure");
 187module_param(packets_per_slave, int, 0);
 188MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr "
 189                                    "mode; 0 for a random slave, 1 packet per "
 190                                    "slave (default), >1 packets per slave.");
 191module_param(lp_interval, uint, 0);
 192MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where "
 193                              "the bonding driver sends learning packets to "
 194                              "each slaves peer switch. The default is 1.");
 195
 196/*----------------------------- Global variables ----------------------------*/
 197
 198#ifdef CONFIG_NET_POLL_CONTROLLER
 199atomic_t netpoll_block_tx = ATOMIC_INIT(0);
 200#endif
 201
 202int bond_net_id __read_mostly;
 203
 204/*-------------------------- Forward declarations ---------------------------*/
 205
 206static int bond_init(struct net_device *bond_dev);
 207static void bond_uninit(struct net_device *bond_dev);
 208static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
 209                                                struct rtnl_link_stats64 *stats);
 210static void bond_slave_arr_handler(struct work_struct *work);
 211static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
 212                                  int mod);
 213
 214/*---------------------------- General routines -----------------------------*/
 215
 216const char *bond_mode_name(int mode)
 217{
 218        static const char *names[] = {
 219                [BOND_MODE_ROUNDROBIN] = "load balancing (round-robin)",
 220                [BOND_MODE_ACTIVEBACKUP] = "fault-tolerance (active-backup)",
 221                [BOND_MODE_XOR] = "load balancing (xor)",
 222                [BOND_MODE_BROADCAST] = "fault-tolerance (broadcast)",
 223                [BOND_MODE_8023AD] = "IEEE 802.3ad Dynamic link aggregation",
 224                [BOND_MODE_TLB] = "transmit load balancing",
 225                [BOND_MODE_ALB] = "adaptive load balancing",
 226        };
 227
 228        if (mode < BOND_MODE_ROUNDROBIN || mode > BOND_MODE_ALB)
 229                return "unknown";
 230
 231        return names[mode];
 232}
 233
 234/*---------------------------------- VLAN -----------------------------------*/
 235
 236/**
 237 * bond_dev_queue_xmit - Prepare skb for xmit.
 238 *
 239 * @bond: bond device that got this skb for tx.
 240 * @skb: hw accel VLAN tagged skb to transmit
 241 * @slave_dev: slave that is supposed to xmit this skbuff
 242 */
 243void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 244                        struct net_device *slave_dev)
 245{
 246        skb->dev = slave_dev;
 247
 248        BUILD_BUG_ON(sizeof(skb->queue_mapping) !=
 249                     sizeof(qdisc_skb_cb(skb)->slave_dev_queue_mapping));
 250        skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping;
 251
 252        if (unlikely(netpoll_tx_running(bond->dev)))
 253                bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
 254        else
 255                dev_queue_xmit(skb);
 256}
 257
 258/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
 259 * We don't protect the slave list iteration with a lock because:
 260 * a. This operation is performed in IOCTL context,
 261 * b. The operation is protected by the RTNL semaphore in the 8021q code,
 262 * c. Holding a lock with BH disabled while directly calling a base driver
 263 *    entry point is generally a BAD idea.
 264 *
 265 * The design of synchronization/protection for this operation in the 8021q
 266 * module is good for one or more VLAN devices over a single physical device
 267 * and cannot be extended for a teaming solution like bonding, so there is a
 268 * potential race condition here where a net device from the vlan group might
 269 * be referenced (either by a base driver or the 8021q code) while it is being
 270 * removed from the system. However, it turns out we're not making matters
 271 * worse, and if it works for regular VLAN usage it will work here too.
 272*/
 273
 274/**
 275 * bond_vlan_rx_add_vid - Propagates adding an id to slaves
 276 * @bond_dev: bonding net device that got called
 277 * @vid: vlan id being added
 278 */
 279static int bond_vlan_rx_add_vid(struct net_device *bond_dev,
 280                                __be16 proto, u16 vid)
 281{
 282        struct bonding *bond = netdev_priv(bond_dev);
 283        struct slave *slave, *rollback_slave;
 284        struct list_head *iter;
 285        int res;
 286
 287        bond_for_each_slave(bond, slave, iter) {
 288                res = vlan_vid_add(slave->dev, proto, vid);
 289                if (res)
 290                        goto unwind;
 291        }
 292
 293        return 0;
 294
 295unwind:
 296        /* unwind to the slave that failed */
 297        bond_for_each_slave(bond, rollback_slave, iter) {
 298                if (rollback_slave == slave)
 299                        break;
 300
 301                vlan_vid_del(rollback_slave->dev, proto, vid);
 302        }
 303
 304        return res;
 305}
 306
 307/**
 308 * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves
 309 * @bond_dev: bonding net device that got called
 310 * @vid: vlan id being removed
 311 */
 312static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
 313                                 __be16 proto, u16 vid)
 314{
 315        struct bonding *bond = netdev_priv(bond_dev);
 316        struct list_head *iter;
 317        struct slave *slave;
 318
 319        bond_for_each_slave(bond, slave, iter)
 320                vlan_vid_del(slave->dev, proto, vid);
 321
 322        if (bond_is_lb(bond))
 323                bond_alb_clear_vlan(bond, vid);
 324
 325        return 0;
 326}
 327
 328/*------------------------------- Link status -------------------------------*/
 329
 330/* Set the carrier state for the master according to the state of its
 331 * slaves.  If any slaves are up, the master is up.  In 802.3ad mode,
 332 * do special 802.3ad magic.
 333 *
 334 * Returns zero if carrier state does not change, nonzero if it does.
 335 */
 336int bond_set_carrier(struct bonding *bond)
 337{
 338        struct list_head *iter;
 339        struct slave *slave;
 340
 341        if (!bond_has_slaves(bond))
 342                goto down;
 343
 344        if (BOND_MODE(bond) == BOND_MODE_8023AD)
 345                return bond_3ad_set_carrier(bond);
 346
 347        bond_for_each_slave(bond, slave, iter) {
 348                if (slave->link == BOND_LINK_UP) {
 349                        if (!netif_carrier_ok(bond->dev)) {
 350                                netif_carrier_on(bond->dev);
 351                                return 1;
 352                        }
 353                        return 0;
 354                }
 355        }
 356
 357down:
 358        if (netif_carrier_ok(bond->dev)) {
 359                netif_carrier_off(bond->dev);
 360                return 1;
 361        }
 362        return 0;
 363}
 364
 365/* Get link speed and duplex from the slave's base driver
 366 * using ethtool. If for some reason the call fails or the
 367 * values are invalid, set speed and duplex to -1,
 368 * and return. Return 1 if speed or duplex settings are
 369 * UNKNOWN; 0 otherwise.
 370 */
 371static int bond_update_speed_duplex(struct slave *slave)
 372{
 373        struct net_device *slave_dev = slave->dev;
 374        struct ethtool_link_ksettings ecmd;
 375        int res;
 376
 377        slave->speed = SPEED_UNKNOWN;
 378        slave->duplex = DUPLEX_UNKNOWN;
 379
 380        res = __ethtool_get_link_ksettings(slave_dev, &ecmd);
 381        if (res < 0)
 382                return 1;
 383        if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1))
 384                return 1;
 385        switch (ecmd.base.duplex) {
 386        case DUPLEX_FULL:
 387        case DUPLEX_HALF:
 388                break;
 389        default:
 390                return 1;
 391        }
 392
 393        slave->speed = ecmd.base.speed;
 394        slave->duplex = ecmd.base.duplex;
 395
 396        return 0;
 397}
 398
 399const char *bond_slave_link_status(s8 link)
 400{
 401        switch (link) {
 402        case BOND_LINK_UP:
 403                return "up";
 404        case BOND_LINK_FAIL:
 405                return "going down";
 406        case BOND_LINK_DOWN:
 407                return "down";
 408        case BOND_LINK_BACK:
 409                return "going back";
 410        default:
 411                return "unknown";
 412        }
 413}
 414
 415/* if <dev> supports MII link status reporting, check its link status.
 416 *
 417 * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(),
 418 * depending upon the setting of the use_carrier parameter.
 419 *
 420 * Return either BMSR_LSTATUS, meaning that the link is up (or we
 421 * can't tell and just pretend it is), or 0, meaning that the link is
 422 * down.
 423 *
 424 * If reporting is non-zero, instead of faking link up, return -1 if
 425 * both ETHTOOL and MII ioctls fail (meaning the device does not
 426 * support them).  If use_carrier is set, return whatever it says.
 427 * It'd be nice if there was a good way to tell if a driver supports
 428 * netif_carrier, but there really isn't.
 429 */
 430static int bond_check_dev_link(struct bonding *bond,
 431                               struct net_device *slave_dev, int reporting)
 432{
 433        const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 434        int (*ioctl)(struct net_device *, struct ifreq *, int);
 435        struct ifreq ifr;
 436        struct mii_ioctl_data *mii;
 437
 438        if (!reporting && !netif_running(slave_dev))
 439                return 0;
 440
 441        if (bond->params.use_carrier)
 442                return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
 443
 444        /* Try to get link status using Ethtool first. */
 445        if (slave_dev->ethtool_ops->get_link)
 446                return slave_dev->ethtool_ops->get_link(slave_dev) ?
 447                        BMSR_LSTATUS : 0;
 448
 449        /* Ethtool can't be used, fallback to MII ioctls. */
 450        ioctl = slave_ops->ndo_do_ioctl;
 451        if (ioctl) {
 452                /* TODO: set pointer to correct ioctl on a per team member
 453                 *       bases to make this more efficient. that is, once
 454                 *       we determine the correct ioctl, we will always
 455                 *       call it and not the others for that team
 456                 *       member.
 457                 */
 458
 459                /* We cannot assume that SIOCGMIIPHY will also read a
 460                 * register; not all network drivers (e.g., e100)
 461                 * support that.
 462                 */
 463
 464                /* Yes, the mii is overlaid on the ifreq.ifr_ifru */
 465                strncpy(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
 466                mii = if_mii(&ifr);
 467                if (ioctl(slave_dev, &ifr, SIOCGMIIPHY) == 0) {
 468                        mii->reg_num = MII_BMSR;
 469                        if (ioctl(slave_dev, &ifr, SIOCGMIIREG) == 0)
 470                                return mii->val_out & BMSR_LSTATUS;
 471                }
 472        }
 473
 474        /* If reporting, report that either there's no dev->do_ioctl,
 475         * or both SIOCGMIIREG and get_link failed (meaning that we
 476         * cannot report link status).  If not reporting, pretend
 477         * we're ok.
 478         */
 479        return reporting ? -1 : BMSR_LSTATUS;
 480}
 481
 482/*----------------------------- Multicast list ------------------------------*/
 483
 484/* Push the promiscuity flag down to appropriate slaves */
 485static int bond_set_promiscuity(struct bonding *bond, int inc)
 486{
 487        struct list_head *iter;
 488        int err = 0;
 489
 490        if (bond_uses_primary(bond)) {
 491                struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
 492
 493                if (curr_active)
 494                        err = dev_set_promiscuity(curr_active->dev, inc);
 495        } else {
 496                struct slave *slave;
 497
 498                bond_for_each_slave(bond, slave, iter) {
 499                        err = dev_set_promiscuity(slave->dev, inc);
 500                        if (err)
 501                                return err;
 502                }
 503        }
 504        return err;
 505}
 506
 507/* Push the allmulti flag down to all slaves */
 508static int bond_set_allmulti(struct bonding *bond, int inc)
 509{
 510        struct list_head *iter;
 511        int err = 0;
 512
 513        if (bond_uses_primary(bond)) {
 514                struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
 515
 516                if (curr_active)
 517                        err = dev_set_allmulti(curr_active->dev, inc);
 518        } else {
 519                struct slave *slave;
 520
 521                bond_for_each_slave(bond, slave, iter) {
 522                        err = dev_set_allmulti(slave->dev, inc);
 523                        if (err)
 524                                return err;
 525                }
 526        }
 527        return err;
 528}
 529
 530/* Retrieve the list of registered multicast addresses for the bonding
 531 * device and retransmit an IGMP JOIN request to the current active
 532 * slave.
 533 */
 534static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
 535{
 536        struct bonding *bond = container_of(work, struct bonding,
 537                                            mcast_work.work);
 538
 539        if (!rtnl_trylock()) {
 540                queue_delayed_work(bond->wq, &bond->mcast_work, 1);
 541                return;
 542        }
 543        call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev);
 544
 545        if (bond->igmp_retrans > 1) {
 546                bond->igmp_retrans--;
 547                queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5);
 548        }
 549        rtnl_unlock();
 550}
 551
 552/* Flush bond's hardware addresses from slave */
 553static void bond_hw_addr_flush(struct net_device *bond_dev,
 554                               struct net_device *slave_dev)
 555{
 556        struct bonding *bond = netdev_priv(bond_dev);
 557
 558        dev_uc_unsync(slave_dev, bond_dev);
 559        dev_mc_unsync(slave_dev, bond_dev);
 560
 561        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 562                /* del lacpdu mc addr from mc list */
 563                u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 564
 565                dev_mc_del(slave_dev, lacpdu_multicast);
 566        }
 567}
 568
 569/*--------------------------- Active slave change ---------------------------*/
 570
 571/* Update the hardware address list and promisc/allmulti for the new and
 572 * old active slaves (if any).  Modes that are not using primary keep all
 573 * slaves up date at all times; only the modes that use primary need to call
 574 * this function to swap these settings during a failover.
 575 */
 576static void bond_hw_addr_swap(struct bonding *bond, struct slave *new_active,
 577                              struct slave *old_active)
 578{
 579        if (old_active) {
 580                if (bond->dev->flags & IFF_PROMISC)
 581                        dev_set_promiscuity(old_active->dev, -1);
 582
 583                if (bond->dev->flags & IFF_ALLMULTI)
 584                        dev_set_allmulti(old_active->dev, -1);
 585
 586                bond_hw_addr_flush(bond->dev, old_active->dev);
 587        }
 588
 589        if (new_active) {
 590                /* FIXME: Signal errors upstream. */
 591                if (bond->dev->flags & IFF_PROMISC)
 592                        dev_set_promiscuity(new_active->dev, 1);
 593
 594                if (bond->dev->flags & IFF_ALLMULTI)
 595                        dev_set_allmulti(new_active->dev, 1);
 596
 597                netif_addr_lock_bh(bond->dev);
 598                dev_uc_sync(new_active->dev, bond->dev);
 599                dev_mc_sync(new_active->dev, bond->dev);
 600                netif_addr_unlock_bh(bond->dev);
 601        }
 602}
 603
 604/**
 605 * bond_set_dev_addr - clone slave's address to bond
 606 * @bond_dev: bond net device
 607 * @slave_dev: slave net device
 608 *
 609 * Should be called with RTNL held.
 610 */
 611static void bond_set_dev_addr(struct net_device *bond_dev,
 612                              struct net_device *slave_dev)
 613{
 614        netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n",
 615                   bond_dev, slave_dev, slave_dev->addr_len);
 616        memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
 617        bond_dev->addr_assign_type = NET_ADDR_STOLEN;
 618        call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
 619}
 620
 621static struct slave *bond_get_old_active(struct bonding *bond,
 622                                         struct slave *new_active)
 623{
 624        struct slave *slave;
 625        struct list_head *iter;
 626
 627        bond_for_each_slave(bond, slave, iter) {
 628                if (slave == new_active)
 629                        continue;
 630
 631                if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr))
 632                        return slave;
 633        }
 634
 635        return NULL;
 636}
 637
 638/* bond_do_fail_over_mac
 639 *
 640 * Perform special MAC address swapping for fail_over_mac settings
 641 *
 642 * Called with RTNL
 643 */
 644static void bond_do_fail_over_mac(struct bonding *bond,
 645                                  struct slave *new_active,
 646                                  struct slave *old_active)
 647{
 648        u8 tmp_mac[MAX_ADDR_LEN];
 649        struct sockaddr_storage ss;
 650        int rv;
 651
 652        switch (bond->params.fail_over_mac) {
 653        case BOND_FOM_ACTIVE:
 654                if (new_active)
 655                        bond_set_dev_addr(bond->dev, new_active->dev);
 656                break;
 657        case BOND_FOM_FOLLOW:
 658                /* if new_active && old_active, swap them
 659                 * if just old_active, do nothing (going to no active slave)
 660                 * if just new_active, set new_active to bond's MAC
 661                 */
 662                if (!new_active)
 663                        return;
 664
 665                if (!old_active)
 666                        old_active = bond_get_old_active(bond, new_active);
 667
 668                if (old_active) {
 669                        bond_hw_addr_copy(tmp_mac, new_active->dev->dev_addr,
 670                                          new_active->dev->addr_len);
 671                        bond_hw_addr_copy(ss.__data,
 672                                          old_active->dev->dev_addr,
 673                                          old_active->dev->addr_len);
 674                        ss.ss_family = new_active->dev->type;
 675                } else {
 676                        bond_hw_addr_copy(ss.__data, bond->dev->dev_addr,
 677                                          bond->dev->addr_len);
 678                        ss.ss_family = bond->dev->type;
 679                }
 680
 681                rv = dev_set_mac_address(new_active->dev,
 682                                         (struct sockaddr *)&ss);
 683                if (rv) {
 684                        netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
 685                                   -rv, new_active->dev->name);
 686                        goto out;
 687                }
 688
 689                if (!old_active)
 690                        goto out;
 691
 692                bond_hw_addr_copy(ss.__data, tmp_mac,
 693                                  new_active->dev->addr_len);
 694                ss.ss_family = old_active->dev->type;
 695
 696                rv = dev_set_mac_address(old_active->dev,
 697                                         (struct sockaddr *)&ss);
 698                if (rv)
 699                        netdev_err(bond->dev, "Error %d setting MAC of slave %s\n",
 700                                   -rv, new_active->dev->name);
 701out:
 702                break;
 703        default:
 704                netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n",
 705                           bond->params.fail_over_mac);
 706                break;
 707        }
 708
 709}
 710
 711static struct slave *bond_choose_primary_or_current(struct bonding *bond)
 712{
 713        struct slave *prim = rtnl_dereference(bond->primary_slave);
 714        struct slave *curr = rtnl_dereference(bond->curr_active_slave);
 715
 716        if (!prim || prim->link != BOND_LINK_UP) {
 717                if (!curr || curr->link != BOND_LINK_UP)
 718                        return NULL;
 719                return curr;
 720        }
 721
 722        if (bond->force_primary) {
 723                bond->force_primary = false;
 724                return prim;
 725        }
 726
 727        if (!curr || curr->link != BOND_LINK_UP)
 728                return prim;
 729
 730        /* At this point, prim and curr are both up */
 731        switch (bond->params.primary_reselect) {
 732        case BOND_PRI_RESELECT_ALWAYS:
 733                return prim;
 734        case BOND_PRI_RESELECT_BETTER:
 735                if (prim->speed < curr->speed)
 736                        return curr;
 737                if (prim->speed == curr->speed && prim->duplex <= curr->duplex)
 738                        return curr;
 739                return prim;
 740        case BOND_PRI_RESELECT_FAILURE:
 741                return curr;
 742        default:
 743                netdev_err(bond->dev, "impossible primary_reselect %d\n",
 744                           bond->params.primary_reselect);
 745                return curr;
 746        }
 747}
 748
 749/**
 750 * bond_find_best_slave - select the best available slave to be the active one
 751 * @bond: our bonding struct
 752 */
 753static struct slave *bond_find_best_slave(struct bonding *bond)
 754{
 755        struct slave *slave, *bestslave = NULL;
 756        struct list_head *iter;
 757        int mintime = bond->params.updelay;
 758
 759        slave = bond_choose_primary_or_current(bond);
 760        if (slave)
 761                return slave;
 762
 763        bond_for_each_slave(bond, slave, iter) {
 764                if (slave->link == BOND_LINK_UP)
 765                        return slave;
 766                if (slave->link == BOND_LINK_BACK && bond_slave_is_up(slave) &&
 767                    slave->delay < mintime) {
 768                        mintime = slave->delay;
 769                        bestslave = slave;
 770                }
 771        }
 772
 773        return bestslave;
 774}
 775
 776static bool bond_should_notify_peers(struct bonding *bond)
 777{
 778        struct slave *slave;
 779
 780        rcu_read_lock();
 781        slave = rcu_dereference(bond->curr_active_slave);
 782        rcu_read_unlock();
 783
 784        netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
 785                   slave ? slave->dev->name : "NULL");
 786
 787        if (!slave || !bond->send_peer_notif ||
 788            !netif_carrier_ok(bond->dev) ||
 789            test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
 790                return false;
 791
 792        return true;
 793}
 794
 795/**
 796 * change_active_interface - change the active slave into the specified one
 797 * @bond: our bonding struct
 798 * @new: the new slave to make the active one
 799 *
 800 * Set the new slave to the bond's settings and unset them on the old
 801 * curr_active_slave.
 802 * Setting include flags, mc-list, promiscuity, allmulti, etc.
 803 *
 804 * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP,
 805 * because it is apparently the best available slave we have, even though its
 806 * updelay hasn't timed out yet.
 807 *
 808 * Caller must hold RTNL.
 809 */
 810void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 811{
 812        struct slave *old_active;
 813
 814        ASSERT_RTNL();
 815
 816        old_active = rtnl_dereference(bond->curr_active_slave);
 817
 818        if (old_active == new_active)
 819                return;
 820
 821        if (new_active) {
 822                new_active->last_link_up = jiffies;
 823
 824                if (new_active->link == BOND_LINK_BACK) {
 825                        if (bond_uses_primary(bond)) {
 826                                netdev_info(bond->dev, "making interface %s the new active one %d ms earlier\n",
 827                                            new_active->dev->name,
 828                                            (bond->params.updelay - new_active->delay) * bond->params.miimon);
 829                        }
 830
 831                        new_active->delay = 0;
 832                        bond_set_slave_link_state(new_active, BOND_LINK_UP,
 833                                                  BOND_SLAVE_NOTIFY_NOW);
 834
 835                        if (BOND_MODE(bond) == BOND_MODE_8023AD)
 836                                bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
 837
 838                        if (bond_is_lb(bond))
 839                                bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
 840                } else {
 841                        if (bond_uses_primary(bond)) {
 842                                netdev_info(bond->dev, "making interface %s the new active one\n",
 843                                            new_active->dev->name);
 844                        }
 845                }
 846        }
 847
 848        if (bond_uses_primary(bond))
 849                bond_hw_addr_swap(bond, new_active, old_active);
 850
 851        if (bond_is_lb(bond)) {
 852                bond_alb_handle_active_change(bond, new_active);
 853                if (old_active)
 854                        bond_set_slave_inactive_flags(old_active,
 855                                                      BOND_SLAVE_NOTIFY_NOW);
 856                if (new_active)
 857                        bond_set_slave_active_flags(new_active,
 858                                                    BOND_SLAVE_NOTIFY_NOW);
 859        } else {
 860                rcu_assign_pointer(bond->curr_active_slave, new_active);
 861        }
 862
 863        if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
 864                if (old_active)
 865                        bond_set_slave_inactive_flags(old_active,
 866                                                      BOND_SLAVE_NOTIFY_NOW);
 867
 868                if (new_active) {
 869                        bool should_notify_peers = false;
 870
 871                        bond_set_slave_active_flags(new_active,
 872                                                    BOND_SLAVE_NOTIFY_NOW);
 873
 874                        if (bond->params.fail_over_mac)
 875                                bond_do_fail_over_mac(bond, new_active,
 876                                                      old_active);
 877
 878                        if (netif_running(bond->dev)) {
 879                                bond->send_peer_notif =
 880                                        bond->params.num_peer_notif;
 881                                should_notify_peers =
 882                                        bond_should_notify_peers(bond);
 883                        }
 884
 885                        call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
 886                        if (should_notify_peers)
 887                                call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
 888                                                         bond->dev);
 889                }
 890        }
 891
 892        /* resend IGMP joins since active slave has changed or
 893         * all were sent on curr_active_slave.
 894         * resend only if bond is brought up with the affected
 895         * bonding modes and the retransmission is enabled
 896         */
 897        if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) &&
 898            ((bond_uses_primary(bond) && new_active) ||
 899             BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {
 900                bond->igmp_retrans = bond->params.resend_igmp;
 901                queue_delayed_work(bond->wq, &bond->mcast_work, 1);
 902        }
 903}
 904
 905/**
 906 * bond_select_active_slave - select a new active slave, if needed
 907 * @bond: our bonding struct
 908 *
 909 * This functions should be called when one of the following occurs:
 910 * - The old curr_active_slave has been released or lost its link.
 911 * - The primary_slave has got its link back.
 912 * - A slave has got its link back and there's no old curr_active_slave.
 913 *
 914 * Caller must hold RTNL.
 915 */
 916void bond_select_active_slave(struct bonding *bond)
 917{
 918        struct slave *best_slave;
 919        int rv;
 920
 921        ASSERT_RTNL();
 922
 923        best_slave = bond_find_best_slave(bond);
 924        if (best_slave != rtnl_dereference(bond->curr_active_slave)) {
 925                bond_change_active_slave(bond, best_slave);
 926                rv = bond_set_carrier(bond);
 927                if (!rv)
 928                        return;
 929
 930                if (netif_carrier_ok(bond->dev)) {
 931                        netdev_info(bond->dev, "first active interface up!\n");
 932                } else {
 933                        netdev_info(bond->dev, "now running without any active interface!\n");
 934                }
 935        }
 936}
 937
 938#ifdef CONFIG_NET_POLL_CONTROLLER
 939static inline int slave_enable_netpoll(struct slave *slave)
 940{
 941        struct netpoll *np;
 942        int err = 0;
 943
 944        np = kzalloc(sizeof(*np), GFP_KERNEL);
 945        err = -ENOMEM;
 946        if (!np)
 947                goto out;
 948
 949        err = __netpoll_setup(np, slave->dev);
 950        if (err) {
 951                kfree(np);
 952                goto out;
 953        }
 954        slave->np = np;
 955out:
 956        return err;
 957}
 958static inline void slave_disable_netpoll(struct slave *slave)
 959{
 960        struct netpoll *np = slave->np;
 961
 962        if (!np)
 963                return;
 964
 965        slave->np = NULL;
 966        __netpoll_free_async(np);
 967}
 968
 969static void bond_poll_controller(struct net_device *bond_dev)
 970{
 971        struct bonding *bond = netdev_priv(bond_dev);
 972        struct slave *slave = NULL;
 973        struct list_head *iter;
 974        struct ad_info ad_info;
 975        struct netpoll_info *ni;
 976        const struct net_device_ops *ops;
 977
 978        if (BOND_MODE(bond) == BOND_MODE_8023AD)
 979                if (bond_3ad_get_active_agg_info(bond, &ad_info))
 980                        return;
 981
 982        bond_for_each_slave_rcu(bond, slave, iter) {
 983                ops = slave->dev->netdev_ops;
 984                if (!bond_slave_is_up(slave) || !ops->ndo_poll_controller)
 985                        continue;
 986
 987                if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 988                        struct aggregator *agg =
 989                            SLAVE_AD_INFO(slave)->port.aggregator;
 990
 991                        if (agg &&
 992                            agg->aggregator_identifier != ad_info.aggregator_id)
 993                                continue;
 994                }
 995
 996                ni = rcu_dereference_bh(slave->dev->npinfo);
 997                if (down_trylock(&ni->dev_lock))
 998                        continue;
 999                ops->ndo_poll_controller(slave->dev);
1000                up(&ni->dev_lock);
1001        }
1002}
1003
1004static void bond_netpoll_cleanup(struct net_device *bond_dev)
1005{
1006        struct bonding *bond = netdev_priv(bond_dev);
1007        struct list_head *iter;
1008        struct slave *slave;
1009
1010        bond_for_each_slave(bond, slave, iter)
1011                if (bond_slave_is_up(slave))
1012                        slave_disable_netpoll(slave);
1013}
1014
1015static int bond_netpoll_setup(struct net_device *dev, struct netpoll_info *ni, gfp_t gfp)
1016{
1017        struct bonding *bond = netdev_priv(dev);
1018        struct list_head *iter;
1019        struct slave *slave;
1020        int err = 0;
1021
1022        bond_for_each_slave(bond, slave, iter) {
1023                err = slave_enable_netpoll(slave);
1024                if (err) {
1025                        bond_netpoll_cleanup(dev);
1026                        break;
1027                }
1028        }
1029        return err;
1030}
1031#else
1032static inline int slave_enable_netpoll(struct slave *slave)
1033{
1034        return 0;
1035}
1036static inline void slave_disable_netpoll(struct slave *slave)
1037{
1038}
1039static void bond_netpoll_cleanup(struct net_device *bond_dev)
1040{
1041}
1042#endif
1043
1044/*---------------------------------- IOCTL ----------------------------------*/
1045
1046static netdev_features_t bond_fix_features(struct net_device *dev,
1047                                           netdev_features_t features)
1048{
1049        struct bonding *bond = netdev_priv(dev);
1050        struct list_head *iter;
1051        netdev_features_t mask;
1052        struct slave *slave;
1053
1054        mask = features;
1055
1056        features &= ~NETIF_F_ONE_FOR_ALL;
1057        features |= NETIF_F_ALL_FOR_ALL;
1058
1059        bond_for_each_slave(bond, slave, iter) {
1060                features = netdev_increment_features(features,
1061                                                     slave->dev->features,
1062                                                     mask);
1063        }
1064        features = netdev_add_tso_features(features, mask);
1065
1066        return features;
1067}
1068
1069#define BOND_VLAN_FEATURES      (NETIF_F_HW_CSUM | NETIF_F_SG | \
1070                                 NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
1071                                 NETIF_F_HIGHDMA | NETIF_F_LRO)
1072
1073#define BOND_ENC_FEATURES       (NETIF_F_HW_CSUM | NETIF_F_SG | \
1074                                 NETIF_F_RXCSUM | NETIF_F_ALL_TSO)
1075
1076static void bond_compute_features(struct bonding *bond)
1077{
1078        unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
1079                                        IFF_XMIT_DST_RELEASE_PERM;
1080        netdev_features_t vlan_features = BOND_VLAN_FEATURES;
1081        netdev_features_t enc_features  = BOND_ENC_FEATURES;
1082        struct net_device *bond_dev = bond->dev;
1083        struct list_head *iter;
1084        struct slave *slave;
1085        unsigned short max_hard_header_len = ETH_HLEN;
1086        unsigned int gso_max_size = GSO_MAX_SIZE;
1087        u16 gso_max_segs = GSO_MAX_SEGS;
1088
1089        if (!bond_has_slaves(bond))
1090                goto done;
1091        vlan_features &= NETIF_F_ALL_FOR_ALL;
1092
1093        bond_for_each_slave(bond, slave, iter) {
1094                vlan_features = netdev_increment_features(vlan_features,
1095                        slave->dev->vlan_features, BOND_VLAN_FEATURES);
1096
1097                enc_features = netdev_increment_features(enc_features,
1098                                                         slave->dev->hw_enc_features,
1099                                                         BOND_ENC_FEATURES);
1100                dst_release_flag &= slave->dev->priv_flags;
1101                if (slave->dev->hard_header_len > max_hard_header_len)
1102                        max_hard_header_len = slave->dev->hard_header_len;
1103
1104                gso_max_size = min(gso_max_size, slave->dev->gso_max_size);
1105                gso_max_segs = min(gso_max_segs, slave->dev->gso_max_segs);
1106        }
1107        bond_dev->hard_header_len = max_hard_header_len;
1108
1109done:
1110        bond_dev->vlan_features = vlan_features;
1111        bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL;
1112        bond_dev->gso_max_segs = gso_max_segs;
1113        netif_set_gso_max_size(bond_dev, gso_max_size);
1114
1115        bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
1116        if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
1117            dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
1118                bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
1119
1120        netdev_change_features(bond_dev);
1121}
1122
1123static void bond_setup_by_slave(struct net_device *bond_dev,
1124                                struct net_device *slave_dev)
1125{
1126        bond_dev->header_ops        = slave_dev->header_ops;
1127
1128        bond_dev->type              = slave_dev->type;
1129        bond_dev->hard_header_len   = slave_dev->hard_header_len;
1130        bond_dev->addr_len          = slave_dev->addr_len;
1131
1132        memcpy(bond_dev->broadcast, slave_dev->broadcast,
1133                slave_dev->addr_len);
1134}
1135
1136/* On bonding slaves other than the currently active slave, suppress
1137 * duplicates except for alb non-mcast/bcast.
1138 */
1139static bool bond_should_deliver_exact_match(struct sk_buff *skb,
1140                                            struct slave *slave,
1141                                            struct bonding *bond)
1142{
1143        if (bond_is_slave_inactive(slave)) {
1144                if (BOND_MODE(bond) == BOND_MODE_ALB &&
1145                    skb->pkt_type != PACKET_BROADCAST &&
1146                    skb->pkt_type != PACKET_MULTICAST)
1147                        return false;
1148                return true;
1149        }
1150        return false;
1151}
1152
1153static rx_handler_result_t bond_handle_frame(struct sk_buff **pskb)
1154{
1155        struct sk_buff *skb = *pskb;
1156        struct slave *slave;
1157        struct bonding *bond;
1158        int (*recv_probe)(const struct sk_buff *, struct bonding *,
1159                          struct slave *);
1160        int ret = RX_HANDLER_ANOTHER;
1161
1162        skb = skb_share_check(skb, GFP_ATOMIC);
1163        if (unlikely(!skb))
1164                return RX_HANDLER_CONSUMED;
1165
1166        *pskb = skb;
1167
1168        slave = bond_slave_get_rcu(skb->dev);
1169        bond = slave->bond;
1170
1171        recv_probe = ACCESS_ONCE(bond->recv_probe);
1172        if (recv_probe) {
1173                ret = recv_probe(skb, bond, slave);
1174                if (ret == RX_HANDLER_CONSUMED) {
1175                        consume_skb(skb);
1176                        return ret;
1177                }
1178        }
1179
1180        if (bond_should_deliver_exact_match(skb, slave, bond)) {
1181                return RX_HANDLER_EXACT;
1182        }
1183
1184        skb->dev = bond->dev;
1185
1186        if (BOND_MODE(bond) == BOND_MODE_ALB &&
1187            bond->dev->priv_flags & IFF_BRIDGE_PORT &&
1188            skb->pkt_type == PACKET_HOST) {
1189
1190                if (unlikely(skb_cow_head(skb,
1191                                          skb->data - skb_mac_header(skb)))) {
1192                        kfree_skb(skb);
1193                        return RX_HANDLER_CONSUMED;
1194                }
1195                bond_hw_addr_copy(eth_hdr(skb)->h_dest, bond->dev->dev_addr,
1196                                  bond->dev->addr_len);
1197        }
1198
1199        return ret;
1200}
1201
1202static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
1203{
1204        switch (BOND_MODE(bond)) {
1205        case BOND_MODE_ROUNDROBIN:
1206                return NETDEV_LAG_TX_TYPE_ROUNDROBIN;
1207        case BOND_MODE_ACTIVEBACKUP:
1208                return NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
1209        case BOND_MODE_BROADCAST:
1210                return NETDEV_LAG_TX_TYPE_BROADCAST;
1211        case BOND_MODE_XOR:
1212        case BOND_MODE_8023AD:
1213                return NETDEV_LAG_TX_TYPE_HASH;
1214        default:
1215                return NETDEV_LAG_TX_TYPE_UNKNOWN;
1216        }
1217}
1218
1219static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
1220{
1221        struct netdev_lag_upper_info lag_upper_info;
1222        char linkname[IFNAMSIZ+7];
1223        int err;
1224
1225        lag_upper_info.tx_type = bond_lag_tx_type(bond);
1226        err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
1227                                           &lag_upper_info);
1228        if (err)
1229                return err;
1230
1231        sprintf(linkname, "slave_%s", slave->dev->name);
1232        err = sysfs_create_link(&(bond->dev->dev.kobj), &(slave->dev->dev.kobj),
1233                                linkname);
1234        if (err) {
1235                netdev_upper_dev_unlink(slave->dev, bond->dev);
1236                return err;
1237        }
1238
1239        rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
1240        return 0;
1241}
1242
1243static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
1244{
1245        char linkname[IFNAMSIZ+7];
1246        sprintf(linkname, "slave_%s", slave->dev->name);
1247        sysfs_remove_link(&(bond->dev->dev.kobj), linkname);
1248        netdev_upper_dev_unlink(slave->dev, bond->dev);
1249        slave->dev->flags &= ~IFF_SLAVE;
1250        rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
1251}
1252
1253static struct slave *bond_alloc_slave(struct bonding *bond)
1254{
1255        struct slave *slave = NULL;
1256
1257        slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
1258        if (!slave)
1259                return NULL;
1260
1261        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
1262                SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info),
1263                                               GFP_KERNEL);
1264                if (!SLAVE_AD_INFO(slave)) {
1265                        kfree(slave);
1266                        return NULL;
1267                }
1268        }
1269        return slave;
1270}
1271
1272static void bond_free_slave(struct slave *slave)
1273{
1274        struct bonding *bond = bond_get_bond_by_slave(slave);
1275
1276        if (BOND_MODE(bond) == BOND_MODE_8023AD)
1277                kfree(SLAVE_AD_INFO(slave));
1278
1279        kfree(slave);
1280}
1281
1282static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
1283{
1284        info->bond_mode = BOND_MODE(bond);
1285        info->miimon = bond->params.miimon;
1286        info->num_slaves = bond->slave_cnt;
1287}
1288
1289static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
1290{
1291        strcpy(info->slave_name, slave->dev->name);
1292        info->link = slave->link;
1293        info->state = bond_slave_state(slave);
1294        info->link_failure_count = slave->link_failure_count;
1295}
1296
1297static void bond_netdev_notify(struct net_device *dev,
1298                               struct netdev_bonding_info *info)
1299{
1300        rtnl_lock();
1301        netdev_bonding_info_change(dev, info);
1302        rtnl_unlock();
1303}
1304
1305static void bond_netdev_notify_work(struct work_struct *_work)
1306{
1307        struct netdev_notify_work *w =
1308                container_of(_work, struct netdev_notify_work, work.work);
1309
1310        bond_netdev_notify(w->dev, &w->bonding_info);
1311        dev_put(w->dev);
1312        kfree(w);
1313}
1314
1315void bond_queue_slave_event(struct slave *slave)
1316{
1317        struct bonding *bond = slave->bond;
1318        struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
1319
1320        if (!nnw)
1321                return;
1322
1323        dev_hold(slave->dev);
1324        nnw->dev = slave->dev;
1325        bond_fill_ifslave(slave, &nnw->bonding_info.slave);
1326        bond_fill_ifbond(bond, &nnw->bonding_info.master);
1327        INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
1328
1329        queue_delayed_work(slave->bond->wq, &nnw->work, 0);
1330}
1331
1332void bond_lower_state_changed(struct slave *slave)
1333{
1334        struct netdev_lag_lower_state_info info;
1335
1336        info.link_up = slave->link == BOND_LINK_UP ||
1337                       slave->link == BOND_LINK_FAIL;
1338        info.tx_enabled = bond_is_active_slave(slave);
1339        netdev_lower_state_changed(slave->dev, &info);
1340}
1341
1342/* enslave device <slave> to bond device <master> */
1343int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
1344{
1345        struct bonding *bond = netdev_priv(bond_dev);
1346        const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
1347        struct slave *new_slave = NULL, *prev_slave;
1348        struct sockaddr_storage ss;
1349        int link_reporting;
1350        int res = 0, i;
1351
1352        if (!bond->params.use_carrier &&
1353            slave_dev->ethtool_ops->get_link == NULL &&
1354            slave_ops->ndo_do_ioctl == NULL) {
1355                netdev_warn(bond_dev, "no link monitoring support for %s\n",
1356                            slave_dev->name);
1357        }
1358
1359        /* already in-use? */
1360        if (netdev_is_rx_handler_busy(slave_dev)) {
1361                netdev_err(bond_dev,
1362                           "Error: Device is in use and cannot be enslaved\n");
1363                return -EBUSY;
1364        }
1365
1366        if (bond_dev == slave_dev) {
1367                netdev_err(bond_dev, "cannot enslave bond to itself.\n");
1368                return -EPERM;
1369        }
1370
1371        /* vlan challenged mutual exclusion */
1372        /* no need to lock since we're protected by rtnl_lock */
1373        if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
1374                netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
1375                           slave_dev->name);
1376                if (vlan_uses_dev(bond_dev)) {
1377                        netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
1378                                   slave_dev->name, bond_dev->name);
1379                        return -EPERM;
1380                } else {
1381                        netdev_warn(bond_dev, "enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n",
1382                                    slave_dev->name, slave_dev->name,
1383                                    bond_dev->name);
1384                }
1385        } else {
1386                netdev_dbg(bond_dev, "%s is !NETIF_F_VLAN_CHALLENGED\n",
1387                           slave_dev->name);
1388        }
1389
1390        /* Old ifenslave binaries are no longer supported.  These can
1391         * be identified with moderate accuracy by the state of the slave:
1392         * the current ifenslave will set the interface down prior to
1393         * enslaving it; the old ifenslave will not.
1394         */
1395        if (slave_dev->flags & IFF_UP) {
1396                netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
1397                           slave_dev->name);
1398                res = -EPERM;
1399                goto err_undo_flags;
1400        }
1401
1402        /* set bonding device ether type by slave - bonding netdevices are
1403         * created with ether_setup, so when the slave type is not ARPHRD_ETHER
1404         * there is a need to override some of the type dependent attribs/funcs.
1405         *
1406         * bond ether type mutual exclusion - don't allow slaves of dissimilar
1407         * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
1408         */
1409        if (!bond_has_slaves(bond)) {
1410                if (bond_dev->type != slave_dev->type) {
1411                        netdev_dbg(bond_dev, "change device type from %d to %d\n",
1412                                   bond_dev->type, slave_dev->type);
1413
1414                        res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
1415                                                       bond_dev);
1416                        res = notifier_to_errno(res);
1417                        if (res) {
1418                                netdev_err(bond_dev, "refused to change device type\n");
1419                                res = -EBUSY;
1420                                goto err_undo_flags;
1421                        }
1422
1423                        /* Flush unicast and multicast addresses */
1424                        dev_uc_flush(bond_dev);
1425                        dev_mc_flush(bond_dev);
1426
1427                        if (slave_dev->type != ARPHRD_ETHER)
1428                                bond_setup_by_slave(bond_dev, slave_dev);
1429                        else {
1430                                ether_setup(bond_dev);
1431                                bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1432                        }
1433
1434                        call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
1435                                                 bond_dev);
1436                }
1437        } else if (bond_dev->type != slave_dev->type) {
1438                netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
1439                           slave_dev->name, slave_dev->type, bond_dev->type);
1440                res = -EINVAL;
1441                goto err_undo_flags;
1442        }
1443
1444        if (slave_dev->type == ARPHRD_INFINIBAND &&
1445            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1446                netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
1447                            slave_dev->type);
1448                res = -EOPNOTSUPP;
1449                goto err_undo_flags;
1450        }
1451
1452        if (!slave_ops->ndo_set_mac_address ||
1453            slave_dev->type == ARPHRD_INFINIBAND) {
1454                netdev_warn(bond_dev, "The slave device specified does not support setting the MAC address\n");
1455                if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
1456                    bond->params.fail_over_mac != BOND_FOM_ACTIVE) {
1457                        if (!bond_has_slaves(bond)) {
1458                                bond->params.fail_over_mac = BOND_FOM_ACTIVE;
1459                                netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
1460                        } else {
1461                                netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
1462                                res = -EOPNOTSUPP;
1463                                goto err_undo_flags;
1464                        }
1465                }
1466        }
1467
1468        call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
1469
1470        /* If this is the first slave, then we need to set the master's hardware
1471         * address to be the same as the slave's.
1472         */
1473        if (!bond_has_slaves(bond) &&
1474            bond->dev->addr_assign_type == NET_ADDR_RANDOM)
1475                bond_set_dev_addr(bond->dev, slave_dev);
1476
1477        new_slave = bond_alloc_slave(bond);
1478        if (!new_slave) {
1479                res = -ENOMEM;
1480                goto err_undo_flags;
1481        }
1482
1483        new_slave->bond = bond;
1484        new_slave->dev = slave_dev;
1485        /* Set the new_slave's queue_id to be zero.  Queue ID mapping
1486         * is set via sysfs or module option if desired.
1487         */
1488        new_slave->queue_id = 0;
1489
1490        /* Save slave's original mtu and then set it to match the bond */
1491        new_slave->original_mtu = slave_dev->mtu;
1492        res = dev_set_mtu(slave_dev, bond->dev->mtu);
1493        if (res) {
1494                netdev_dbg(bond_dev, "Error %d calling dev_set_mtu\n", res);
1495                goto err_free;
1496        }
1497
1498        /* Save slave's original ("permanent") mac address for modes
1499         * that need it, and for restoring it upon release, and then
1500         * set it to the master's address
1501         */
1502        bond_hw_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr,
1503                          slave_dev->addr_len);
1504
1505        if (!bond->params.fail_over_mac ||
1506            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1507                /* Set slave to master's mac address.  The application already
1508                 * set the master's mac address to that of the first slave
1509                 */
1510                memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
1511                ss.ss_family = slave_dev->type;
1512                res = dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
1513                if (res) {
1514                        netdev_dbg(bond_dev, "Error %d calling set_mac_address\n", res);
1515                        goto err_restore_mtu;
1516                }
1517        }
1518
1519        /* set slave flag before open to prevent IPv6 addrconf */
1520        slave_dev->flags |= IFF_SLAVE;
1521
1522        /* open the slave since the application closed it */
1523        res = dev_open(slave_dev);
1524        if (res) {
1525                netdev_dbg(bond_dev, "Opening slave %s failed\n", slave_dev->name);
1526                goto err_restore_mac;
1527        }
1528
1529        slave_dev->priv_flags |= IFF_BONDING;
1530        /* initialize slave stats */
1531        dev_get_stats(new_slave->dev, &new_slave->slave_stats);
1532
1533        if (bond_is_lb(bond)) {
1534                /* bond_alb_init_slave() must be called before all other stages since
1535                 * it might fail and we do not want to have to undo everything
1536                 */
1537                res = bond_alb_init_slave(bond, new_slave);
1538                if (res)
1539                        goto err_close;
1540        }
1541
1542        /* If the mode uses primary, then the following is handled by
1543         * bond_change_active_slave().
1544         */
1545        if (!bond_uses_primary(bond)) {
1546                /* set promiscuity level to new slave */
1547                if (bond_dev->flags & IFF_PROMISC) {
1548                        res = dev_set_promiscuity(slave_dev, 1);
1549                        if (res)
1550                                goto err_close;
1551                }
1552
1553                /* set allmulti level to new slave */
1554                if (bond_dev->flags & IFF_ALLMULTI) {
1555                        res = dev_set_allmulti(slave_dev, 1);
1556                        if (res)
1557                                goto err_close;
1558                }
1559
1560                netif_addr_lock_bh(bond_dev);
1561
1562                dev_mc_sync_multiple(slave_dev, bond_dev);
1563                dev_uc_sync_multiple(slave_dev, bond_dev);
1564
1565                netif_addr_unlock_bh(bond_dev);
1566        }
1567
1568        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
1569                /* add lacpdu mc addr to mc list */
1570                u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
1571
1572                dev_mc_add(slave_dev, lacpdu_multicast);
1573        }
1574
1575        res = vlan_vids_add_by_dev(slave_dev, bond_dev);
1576        if (res) {
1577                netdev_err(bond_dev, "Couldn't add bond vlan ids to %s\n",
1578                           slave_dev->name);
1579                goto err_close;
1580        }
1581
1582        prev_slave = bond_last_slave(bond);
1583
1584        new_slave->delay = 0;
1585        new_slave->link_failure_count = 0;
1586
1587        if (bond_update_speed_duplex(new_slave))
1588                new_slave->link = BOND_LINK_DOWN;
1589
1590        new_slave->last_rx = jiffies -
1591                (msecs_to_jiffies(bond->params.arp_interval) + 1);
1592        for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
1593                new_slave->target_last_arp_rx[i] = new_slave->last_rx;
1594
1595        if (bond->params.miimon && !bond->params.use_carrier) {
1596                link_reporting = bond_check_dev_link(bond, slave_dev, 1);
1597
1598                if ((link_reporting == -1) && !bond->params.arp_interval) {
1599                        /* miimon is set but a bonded network driver
1600                         * does not support ETHTOOL/MII and
1601                         * arp_interval is not set.  Note: if
1602                         * use_carrier is enabled, we will never go
1603                         * here (because netif_carrier is always
1604                         * supported); thus, we don't need to change
1605                         * the messages for netif_carrier.
1606                         */
1607                        netdev_warn(bond_dev, "MII and ETHTOOL support not available for interface %s, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n",
1608                                    slave_dev->name);
1609                } else if (link_reporting == -1) {
1610                        /* unable get link status using mii/ethtool */
1611                        netdev_warn(bond_dev, "can't get link status from interface %s; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n",
1612                                    slave_dev->name);
1613                }
1614        }
1615
1616        /* check for initial state */
1617        new_slave->link = BOND_LINK_NOCHANGE;
1618        if (bond->params.miimon) {
1619                if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
1620                        if (bond->params.updelay) {
1621                                bond_set_slave_link_state(new_slave,
1622                                                          BOND_LINK_BACK,
1623                                                          BOND_SLAVE_NOTIFY_NOW);
1624                                new_slave->delay = bond->params.updelay;
1625                        } else {
1626                                bond_set_slave_link_state(new_slave,
1627                                                          BOND_LINK_UP,
1628                                                          BOND_SLAVE_NOTIFY_NOW);
1629                        }
1630                } else {
1631                        bond_set_slave_link_state(new_slave, BOND_LINK_DOWN,
1632                                                  BOND_SLAVE_NOTIFY_NOW);
1633                }
1634        } else if (bond->params.arp_interval) {
1635                bond_set_slave_link_state(new_slave,
1636                                          (netif_carrier_ok(slave_dev) ?
1637                                          BOND_LINK_UP : BOND_LINK_DOWN),
1638                                          BOND_SLAVE_NOTIFY_NOW);
1639        } else {
1640                bond_set_slave_link_state(new_slave, BOND_LINK_UP,
1641                                          BOND_SLAVE_NOTIFY_NOW);
1642        }
1643
1644        if (new_slave->link != BOND_LINK_DOWN)
1645                new_slave->last_link_up = jiffies;
1646        netdev_dbg(bond_dev, "Initial state of slave_dev is BOND_LINK_%s\n",
1647                   new_slave->link == BOND_LINK_DOWN ? "DOWN" :
1648                   (new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
1649
1650        if (bond_uses_primary(bond) && bond->params.primary[0]) {
1651                /* if there is a primary slave, remember it */
1652                if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
1653                        rcu_assign_pointer(bond->primary_slave, new_slave);
1654                        bond->force_primary = true;
1655                }
1656        }
1657
1658        switch (BOND_MODE(bond)) {
1659        case BOND_MODE_ACTIVEBACKUP:
1660                bond_set_slave_inactive_flags(new_slave,
1661                                              BOND_SLAVE_NOTIFY_NOW);
1662                break;
1663        case BOND_MODE_8023AD:
1664                /* in 802.3ad mode, the internal mechanism
1665                 * will activate the slaves in the selected
1666                 * aggregator
1667                 */
1668                bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
1669                /* if this is the first slave */
1670                if (!prev_slave) {
1671                        SLAVE_AD_INFO(new_slave)->id = 1;
1672                        /* Initialize AD with the number of times that the AD timer is called in 1 second
1673                         * can be called only after the mac address of the bond is set
1674                         */
1675                        bond_3ad_initialize(bond, 1000/AD_TIMER_INTERVAL);
1676                } else {
1677                        SLAVE_AD_INFO(new_slave)->id =
1678                                SLAVE_AD_INFO(prev_slave)->id + 1;
1679                }
1680
1681                bond_3ad_bind_slave(new_slave);
1682                break;
1683        case BOND_MODE_TLB:
1684        case BOND_MODE_ALB:
1685                bond_set_active_slave(new_slave);
1686                bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW);
1687                break;
1688        default:
1689                netdev_dbg(bond_dev, "This slave is always active in trunk mode\n");
1690
1691                /* always active in trunk mode */
1692                bond_set_active_slave(new_slave);
1693
1694                /* In trunking mode there is little meaning to curr_active_slave
1695                 * anyway (it holds no special properties of the bond device),
1696                 * so we can change it without calling change_active_interface()
1697                 */
1698                if (!rcu_access_pointer(bond->curr_active_slave) &&
1699                    new_slave->link == BOND_LINK_UP)
1700                        rcu_assign_pointer(bond->curr_active_slave, new_slave);
1701
1702                break;
1703        } /* switch(bond_mode) */
1704
1705#ifdef CONFIG_NET_POLL_CONTROLLER
1706        slave_dev->npinfo = bond->dev->npinfo;
1707        if (slave_dev->npinfo) {
1708                if (slave_enable_netpoll(new_slave)) {
1709                        netdev_info(bond_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n");
1710                        res = -EBUSY;
1711                        goto err_detach;
1712                }
1713        }
1714#endif
1715
1716        if (!(bond_dev->features & NETIF_F_LRO))
1717                dev_disable_lro(slave_dev);
1718
1719        res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
1720                                         new_slave);
1721        if (res) {
1722                netdev_dbg(bond_dev, "Error %d calling netdev_rx_handler_register\n", res);
1723                goto err_detach;
1724        }
1725
1726        res = bond_master_upper_dev_link(bond, new_slave);
1727        if (res) {
1728                netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
1729                goto err_unregister;
1730        }
1731
1732        res = bond_sysfs_slave_add(new_slave);
1733        if (res) {
1734                netdev_dbg(bond_dev, "Error %d calling bond_sysfs_slave_add\n", res);
1735                goto err_upper_unlink;
1736        }
1737
1738        bond->slave_cnt++;
1739        bond_compute_features(bond);
1740        bond_set_carrier(bond);
1741
1742        if (bond_uses_primary(bond)) {
1743                block_netpoll_tx();
1744                bond_select_active_slave(bond);
1745                unblock_netpoll_tx();
1746        }
1747
1748        if (bond_mode_uses_xmit_hash(bond))
1749                bond_update_slave_arr(bond, NULL);
1750
1751        netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n",
1752                    slave_dev->name,
1753                    bond_is_active_slave(new_slave) ? "an active" : "a backup",
1754                    new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
1755
1756        /* enslave is successful */
1757        bond_queue_slave_event(new_slave);
1758        return 0;
1759
1760/* Undo stages on error */
1761err_upper_unlink:
1762        bond_upper_dev_unlink(bond, new_slave);
1763
1764err_unregister:
1765        netdev_rx_handler_unregister(slave_dev);
1766
1767err_detach:
1768        if (!bond_uses_primary(bond))
1769                bond_hw_addr_flush(bond_dev, slave_dev);
1770
1771        vlan_vids_del_by_dev(slave_dev, bond_dev);
1772        if (rcu_access_pointer(bond->primary_slave) == new_slave)
1773                RCU_INIT_POINTER(bond->primary_slave, NULL);
1774        if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
1775                block_netpoll_tx();
1776                bond_change_active_slave(bond, NULL);
1777                bond_select_active_slave(bond);
1778                unblock_netpoll_tx();
1779        }
1780        /* either primary_slave or curr_active_slave might've changed */
1781        synchronize_rcu();
1782        slave_disable_netpoll(new_slave);
1783
1784err_close:
1785        slave_dev->priv_flags &= ~IFF_BONDING;
1786        dev_close(slave_dev);
1787
1788err_restore_mac:
1789        slave_dev->flags &= ~IFF_SLAVE;
1790        if (!bond->params.fail_over_mac ||
1791            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1792                /* XXX TODO - fom follow mode needs to change master's
1793                 * MAC if this slave's MAC is in use by the bond, or at
1794                 * least print a warning.
1795                 */
1796                bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr,
1797                                  new_slave->dev->addr_len);
1798                ss.ss_family = slave_dev->type;
1799                dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
1800        }
1801
1802err_restore_mtu:
1803        dev_set_mtu(slave_dev, new_slave->original_mtu);
1804
1805err_free:
1806        bond_free_slave(new_slave);
1807
1808err_undo_flags:
1809        /* Enslave of first slave has failed and we need to fix master's mac */
1810        if (!bond_has_slaves(bond)) {
1811                if (ether_addr_equal_64bits(bond_dev->dev_addr,
1812                                            slave_dev->dev_addr))
1813                        eth_hw_addr_random(bond_dev);
1814                if (bond_dev->type != ARPHRD_ETHER) {
1815                        dev_close(bond_dev);
1816                        ether_setup(bond_dev);
1817                        bond_dev->flags |= IFF_MASTER;
1818                        bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
1819                }
1820        }
1821
1822        return res;
1823}
1824
1825/* Try to release the slave device <slave> from the bond device <master>
1826 * It is legal to access curr_active_slave without a lock because all the function
1827 * is RTNL-locked. If "all" is true it means that the function is being called
1828 * while destroying a bond interface and all slaves are being released.
1829 *
1830 * The rules for slave state should be:
1831 *   for Active/Backup:
1832 *     Active stays on all backups go down
1833 *   for Bonded connections:
1834 *     The first up interface should be left on and all others downed.
1835 */
1836static int __bond_release_one(struct net_device *bond_dev,
1837                              struct net_device *slave_dev,
1838                              bool all)
1839{
1840        struct bonding *bond = netdev_priv(bond_dev);
1841        struct slave *slave, *oldcurrent;
1842        struct sockaddr_storage ss;
1843        int old_flags = bond_dev->flags;
1844        netdev_features_t old_features = bond_dev->features;
1845
1846        /* slave is not a slave or master is not master of this slave */
1847        if (!(slave_dev->flags & IFF_SLAVE) ||
1848            !netdev_has_upper_dev(slave_dev, bond_dev)) {
1849                netdev_dbg(bond_dev, "cannot release %s\n",
1850                           slave_dev->name);
1851                return -EINVAL;
1852        }
1853
1854        block_netpoll_tx();
1855
1856        slave = bond_get_slave_by_dev(bond, slave_dev);
1857        if (!slave) {
1858                /* not a slave of this bond */
1859                netdev_info(bond_dev, "%s not enslaved\n",
1860                            slave_dev->name);
1861                unblock_netpoll_tx();
1862                return -EINVAL;
1863        }
1864
1865        bond_set_slave_inactive_flags(slave, BOND_SLAVE_NOTIFY_NOW);
1866
1867        bond_sysfs_slave_del(slave);
1868
1869        /* recompute stats just before removing the slave */
1870        bond_get_stats(bond->dev, &bond->bond_stats);
1871
1872        bond_upper_dev_unlink(bond, slave);
1873        /* unregister rx_handler early so bond_handle_frame wouldn't be called
1874         * for this slave anymore.
1875         */
1876        netdev_rx_handler_unregister(slave_dev);
1877
1878        if (BOND_MODE(bond) == BOND_MODE_8023AD)
1879                bond_3ad_unbind_slave(slave);
1880
1881        if (bond_mode_uses_xmit_hash(bond))
1882                bond_update_slave_arr(bond, slave);
1883
1884        netdev_info(bond_dev, "Releasing %s interface %s\n",
1885                    bond_is_active_slave(slave) ? "active" : "backup",
1886                    slave_dev->name);
1887
1888        oldcurrent = rcu_access_pointer(bond->curr_active_slave);
1889
1890        RCU_INIT_POINTER(bond->current_arp_slave, NULL);
1891
1892        if (!all && (!bond->params.fail_over_mac ||
1893                     BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) {
1894                if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
1895                    bond_has_slaves(bond))
1896                        netdev_warn(bond_dev, "the permanent HWaddr of %s - %pM - is still in use by %s - set the HWaddr of %s to a different address to avoid conflicts\n",
1897                                    slave_dev->name, slave->perm_hwaddr,
1898                                    bond_dev->name, slave_dev->name);
1899        }
1900
1901        if (rtnl_dereference(bond->primary_slave) == slave)
1902                RCU_INIT_POINTER(bond->primary_slave, NULL);
1903
1904        if (oldcurrent == slave)
1905                bond_change_active_slave(bond, NULL);
1906
1907        if (bond_is_lb(bond)) {
1908                /* Must be called only after the slave has been
1909                 * detached from the list and the curr_active_slave
1910                 * has been cleared (if our_slave == old_current),
1911                 * but before a new active slave is selected.
1912                 */
1913                bond_alb_deinit_slave(bond, slave);
1914        }
1915
1916        if (all) {
1917                RCU_INIT_POINTER(bond->curr_active_slave, NULL);
1918        } else if (oldcurrent == slave) {
1919                /* Note that we hold RTNL over this sequence, so there
1920                 * is no concern that another slave add/remove event
1921                 * will interfere.
1922                 */
1923                bond_select_active_slave(bond);
1924        }
1925
1926        if (!bond_has_slaves(bond)) {
1927                bond_set_carrier(bond);
1928                eth_hw_addr_random(bond_dev);
1929        }
1930
1931        unblock_netpoll_tx();
1932        synchronize_rcu();
1933        bond->slave_cnt--;
1934
1935        if (!bond_has_slaves(bond)) {
1936                call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
1937                call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);
1938        }
1939
1940        bond_compute_features(bond);
1941        if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
1942            (old_features & NETIF_F_VLAN_CHALLENGED))
1943                netdev_info(bond_dev, "last VLAN challenged slave %s left bond %s - VLAN blocking is removed\n",
1944                            slave_dev->name, bond_dev->name);
1945
1946        /* must do this from outside any spinlocks */
1947        vlan_vids_del_by_dev(slave_dev, bond_dev);
1948
1949        /* If the mode uses primary, then this case was handled above by
1950         * bond_change_active_slave(..., NULL)
1951         */
1952        if (!bond_uses_primary(bond)) {
1953                /* unset promiscuity level from slave
1954                 * NOTE: The NETDEV_CHANGEADDR call above may change the value
1955                 * of the IFF_PROMISC flag in the bond_dev, but we need the
1956                 * value of that flag before that change, as that was the value
1957                 * when this slave was attached, so we cache at the start of the
1958                 * function and use it here. Same goes for ALLMULTI below
1959                 */
1960                if (old_flags & IFF_PROMISC)
1961                        dev_set_promiscuity(slave_dev, -1);
1962
1963                /* unset allmulti level from slave */
1964                if (old_flags & IFF_ALLMULTI)
1965                        dev_set_allmulti(slave_dev, -1);
1966
1967                bond_hw_addr_flush(bond_dev, slave_dev);
1968        }
1969
1970        slave_disable_netpoll(slave);
1971
1972        /* close slave before restoring its mac address */
1973        dev_close(slave_dev);
1974
1975        if (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
1976            BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
1977                /* restore original ("permanent") mac address */
1978                bond_hw_addr_copy(ss.__data, slave->perm_hwaddr,
1979                                  slave->dev->addr_len);
1980                ss.ss_family = slave_dev->type;
1981                dev_set_mac_address(slave_dev, (struct sockaddr *)&ss);
1982        }
1983
1984        dev_set_mtu(slave_dev, slave->original_mtu);
1985
1986        slave_dev->priv_flags &= ~IFF_BONDING;
1987
1988        bond_free_slave(slave);
1989
1990        return 0;
1991}
1992
1993/* A wrapper used because of ndo_del_link */
1994int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
1995{
1996        return __bond_release_one(bond_dev, slave_dev, false);
1997}
1998
1999/* First release a slave and then destroy the bond if no more slaves are left.
2000 * Must be under rtnl_lock when this function is called.
2001 */
2002static int  bond_release_and_destroy(struct net_device *bond_dev,
2003                                     struct net_device *slave_dev)
2004{
2005        struct bonding *bond = netdev_priv(bond_dev);
2006        int ret;
2007
2008        ret = bond_release(bond_dev, slave_dev);
2009        if (ret == 0 && !bond_has_slaves(bond)) {
2010                bond_dev->priv_flags |= IFF_DISABLE_NETPOLL;
2011                netdev_info(bond_dev, "Destroying bond %s\n",
2012                            bond_dev->name);
2013                bond_remove_proc_entry(bond);
2014                unregister_netdevice(bond_dev);
2015        }
2016        return ret;
2017}
2018
2019static void bond_info_query(struct net_device *bond_dev, struct ifbond *info)
2020{
2021        struct bonding *bond = netdev_priv(bond_dev);
2022        bond_fill_ifbond(bond, info);
2023}
2024
2025static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *info)
2026{
2027        struct bonding *bond = netdev_priv(bond_dev);
2028        struct list_head *iter;
2029        int i = 0, res = -ENODEV;
2030        struct slave *slave;
2031
2032        bond_for_each_slave(bond, slave, iter) {
2033                if (i++ == (int)info->slave_id) {
2034                        res = 0;
2035                        bond_fill_ifslave(slave, info);
2036                        break;
2037                }
2038        }
2039
2040        return res;
2041}
2042
2043/*-------------------------------- Monitoring -------------------------------*/
2044
2045/* called with rcu_read_lock() */
2046static int bond_miimon_inspect(struct bonding *bond)
2047{
2048        int link_state, commit = 0;
2049        struct list_head *iter;
2050        struct slave *slave;
2051        bool ignore_updelay;
2052
2053        ignore_updelay = !rcu_dereference(bond->curr_active_slave);
2054
2055        bond_for_each_slave_rcu(bond, slave, iter) {
2056                slave->new_link = BOND_LINK_NOCHANGE;
2057
2058                link_state = bond_check_dev_link(bond, slave->dev, 0);
2059
2060                switch (slave->link) {
2061                case BOND_LINK_UP:
2062                        if (link_state)
2063                                continue;
2064
2065                        bond_propose_link_state(slave, BOND_LINK_FAIL);
2066                        slave->delay = bond->params.downdelay;
2067                        if (slave->delay) {
2068                                netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
2069                                            (BOND_MODE(bond) ==
2070                                             BOND_MODE_ACTIVEBACKUP) ?
2071                                             (bond_is_active_slave(slave) ?
2072                                              "active " : "backup ") : "",
2073                                            slave->dev->name,
2074                                            bond->params.downdelay * bond->params.miimon);
2075                        }
2076                        /*FALLTHRU*/
2077                case BOND_LINK_FAIL:
2078                        if (link_state) {
2079                                /* recovered before downdelay expired */
2080                                bond_propose_link_state(slave, BOND_LINK_UP);
2081                                slave->last_link_up = jiffies;
2082                                netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
2083                                            (bond->params.downdelay - slave->delay) *
2084                                            bond->params.miimon,
2085                                            slave->dev->name);
2086                                commit++;
2087                                continue;
2088                        }
2089
2090                        if (slave->delay <= 0) {
2091                                slave->new_link = BOND_LINK_DOWN;
2092                                commit++;
2093                                continue;
2094                        }
2095
2096                        slave->delay--;
2097                        break;
2098
2099                case BOND_LINK_DOWN:
2100                        if (!link_state)
2101                                continue;
2102
2103                        bond_propose_link_state(slave, BOND_LINK_BACK);
2104                        slave->delay = bond->params.updelay;
2105
2106                        if (slave->delay) {
2107                                netdev_info(bond->dev, "link status up for interface %s, enabling it in %d ms\n",
2108                                            slave->dev->name,
2109                                            ignore_updelay ? 0 :
2110                                            bond->params.updelay *
2111                                            bond->params.miimon);
2112                        }
2113                        /*FALLTHRU*/
2114                case BOND_LINK_BACK:
2115                        if (!link_state) {
2116                                bond_propose_link_state(slave, BOND_LINK_DOWN);
2117                                netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
2118                                            (bond->params.updelay - slave->delay) *
2119                                            bond->params.miimon,
2120                                            slave->dev->name);
2121
2122                                commit++;
2123                                continue;
2124                        }
2125
2126                        if (ignore_updelay)
2127                                slave->delay = 0;
2128
2129                        if (slave->delay <= 0) {
2130                                slave->new_link = BOND_LINK_UP;
2131                                commit++;
2132                                ignore_updelay = false;
2133                                continue;
2134                        }
2135
2136                        slave->delay--;
2137                        break;
2138                }
2139        }
2140
2141        return commit;
2142}
2143
2144static void bond_miimon_commit(struct bonding *bond)
2145{
2146        struct list_head *iter;
2147        struct slave *slave, *primary;
2148
2149        bond_for_each_slave(bond, slave, iter) {
2150                switch (slave->new_link) {
2151                case BOND_LINK_NOCHANGE:
2152                        continue;
2153
2154                case BOND_LINK_UP:
2155                        if (bond_update_speed_duplex(slave)) {
2156                                slave->link = BOND_LINK_DOWN;
2157                                netdev_warn(bond->dev,
2158                                            "failed to get link speed/duplex for %s\n",
2159                                            slave->dev->name);
2160                                continue;
2161                        }
2162                        bond_set_slave_link_state(slave, BOND_LINK_UP,
2163                                                  BOND_SLAVE_NOTIFY_NOW);
2164                        slave->last_link_up = jiffies;
2165
2166                        primary = rtnl_dereference(bond->primary_slave);
2167                        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
2168                                /* prevent it from being the active one */
2169                                bond_set_backup_slave(slave);
2170                        } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
2171                                /* make it immediately active */
2172                                bond_set_active_slave(slave);
2173                        } else if (slave != primary) {
2174                                /* prevent it from being the active one */
2175                                bond_set_backup_slave(slave);
2176                        }
2177
2178                        netdev_info(bond->dev, "link status definitely up for interface %s, %u Mbps %s duplex\n",
2179                                    slave->dev->name,
2180                                    slave->speed == SPEED_UNKNOWN ? 0 : slave->speed,
2181                                    slave->duplex ? "full" : "half");
2182
2183                        /* notify ad that the link status has changed */
2184                        if (BOND_MODE(bond) == BOND_MODE_8023AD)
2185                                bond_3ad_handle_link_change(slave, BOND_LINK_UP);
2186
2187                        if (bond_is_lb(bond))
2188                                bond_alb_handle_link_change(bond, slave,
2189                                                            BOND_LINK_UP);
2190
2191                        if (BOND_MODE(bond) == BOND_MODE_XOR)
2192                                bond_update_slave_arr(bond, NULL);
2193
2194                        if (!bond->curr_active_slave || slave == primary)
2195                                goto do_failover;
2196
2197                        continue;
2198
2199                case BOND_LINK_DOWN:
2200                        if (slave->link_failure_count < UINT_MAX)
2201                                slave->link_failure_count++;
2202
2203                        bond_set_slave_link_state(slave, BOND_LINK_DOWN,
2204                                                  BOND_SLAVE_NOTIFY_NOW);
2205
2206                        if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
2207                            BOND_MODE(bond) == BOND_MODE_8023AD)
2208                                bond_set_slave_inactive_flags(slave,
2209                                                              BOND_SLAVE_NOTIFY_NOW);
2210
2211                        netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n",
2212                                    slave->dev->name);
2213
2214                        if (BOND_MODE(bond) == BOND_MODE_8023AD)
2215                                bond_3ad_handle_link_change(slave,
2216                                                            BOND_LINK_DOWN);
2217
2218                        if (bond_is_lb(bond))
2219                                bond_alb_handle_link_change(bond, slave,
2220                                                            BOND_LINK_DOWN);
2221
2222                        if (BOND_MODE(bond) == BOND_MODE_XOR)
2223                                bond_update_slave_arr(bond, NULL);
2224
2225                        if (slave == rcu_access_pointer(bond->curr_active_slave))
2226                                goto do_failover;
2227
2228                        continue;
2229
2230                default:
2231                        netdev_err(bond->dev, "invalid new link %d on slave %s\n",
2232                                   slave->new_link, slave->dev->name);
2233                        slave->new_link = BOND_LINK_NOCHANGE;
2234
2235                        continue;
2236                }
2237
2238do_failover:
2239                block_netpoll_tx();
2240                bond_select_active_slave(bond);
2241                unblock_netpoll_tx();
2242        }
2243
2244        bond_set_carrier(bond);
2245}
2246
2247/* bond_mii_monitor
2248 *
2249 * Really a wrapper that splits the mii monitor into two phases: an
2250 * inspection, then (if inspection indicates something needs to be done)
2251 * an acquisition of appropriate locks followed by a commit phase to
2252 * implement whatever link state changes are indicated.
2253 */
2254static void bond_mii_monitor(struct work_struct *work)
2255{
2256        struct bonding *bond = container_of(work, struct bonding,
2257                                            mii_work.work);
2258        bool should_notify_peers = false;
2259        unsigned long delay;
2260        struct slave *slave;
2261        struct list_head *iter;
2262
2263        delay = msecs_to_jiffies(bond->params.miimon);
2264
2265        if (!bond_has_slaves(bond))
2266                goto re_arm;
2267
2268        rcu_read_lock();
2269
2270        should_notify_peers = bond_should_notify_peers(bond);
2271
2272        if (bond_miimon_inspect(bond)) {
2273                rcu_read_unlock();
2274
2275                /* Race avoidance with bond_close cancel of workqueue */
2276                if (!rtnl_trylock()) {
2277                        delay = 1;
2278                        should_notify_peers = false;
2279                        goto re_arm;
2280                }
2281
2282                bond_for_each_slave(bond, slave, iter) {
2283                        bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER);
2284                }
2285                bond_miimon_commit(bond);
2286
2287                rtnl_unlock();  /* might sleep, hold no other locks */
2288        } else
2289                rcu_read_unlock();
2290
2291re_arm:
2292        if (bond->params.miimon)
2293                queue_delayed_work(bond->wq, &bond->mii_work, delay);
2294
2295        if (should_notify_peers) {
2296                if (!rtnl_trylock())
2297                        return;
2298                call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
2299                rtnl_unlock();
2300        }
2301}
2302
2303static bool bond_has_this_ip(struct bonding *bond, __be32 ip)
2304{
2305        struct net_device *upper;
2306        struct list_head *iter;
2307        bool ret = false;
2308
2309        if (ip == bond_confirm_addr(bond->dev, 0, ip))
2310                return true;
2311
2312        rcu_read_lock();
2313        netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
2314                if (ip == bond_confirm_addr(upper, 0, ip)) {
2315                        ret = true;
2316                        break;
2317                }
2318        }
2319        rcu_read_unlock();
2320
2321        return ret;
2322}
2323
2324/* We go to the (large) trouble of VLAN tagging ARP frames because
2325 * switches in VLAN mode (especially if ports are configured as
2326 * "native" to a VLAN) might not pass non-tagged frames.
2327 */
2328static void bond_arp_send(struct net_device *slave_dev, int arp_op,
2329                          __be32 dest_ip, __be32 src_ip,
2330                          struct bond_vlan_tag *inner,
2331                          struct bond_vlan_tag *outer)
2332{
2333        struct sk_buff *skb;
2334
2335        netdev_dbg(slave_dev, "arp %d on slave %s: dst %pI4 src %pI4\n",
2336                   arp_op, slave_dev->name, &dest_ip, &src_ip);
2337
2338        skb = arp_create(arp_op, ETH_P_ARP, dest_ip, slave_dev, src_ip,
2339                         NULL, slave_dev->dev_addr, NULL);
2340
2341        if (!skb) {
2342                net_err_ratelimited("ARP packet allocation failed\n");
2343                return;
2344        }
2345        if (outer->vlan_id) {
2346                if (inner->vlan_id) {
2347                        netdev_dbg(slave_dev, "inner tag: proto %X vid %X\n",
2348                                   ntohs(inner->vlan_proto), inner->vlan_id);
2349                        skb = vlan_insert_tag_set_proto(skb, inner->vlan_proto,
2350                                                        inner->vlan_id);
2351                        if (!skb) {
2352                                net_err_ratelimited("failed to insert inner VLAN tag\n");
2353                                return;
2354                        }
2355                }
2356
2357                netdev_dbg(slave_dev, "outer reg: proto %X vid %X\n",
2358                           ntohs(outer->vlan_proto), outer->vlan_id);
2359                __vlan_hwaccel_put_tag(skb, outer->vlan_proto,
2360                                       outer->vlan_id);
2361        }
2362        arp_xmit(skb);
2363}
2364
2365
2366static void bond_arp_send_all(struct bonding *bond, struct slave *slave)
2367{
2368        struct net_device *upper, *vlan_upper;
2369        struct list_head *iter, *vlan_iter;
2370        struct rtable *rt;
2371        struct bond_vlan_tag inner, outer;
2372        __be32 *targets = bond->params.arp_targets, addr;
2373        int i;
2374
2375        for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
2376                netdev_dbg(bond->dev, "basa: target %pI4\n", &targets[i]);
2377                inner.vlan_proto = 0;
2378                inner.vlan_id = 0;
2379                outer.vlan_proto = 0;
2380                outer.vlan_id = 0;
2381
2382                /* Find out through which dev should the packet go */
2383                rt = ip_route_output(dev_net(bond->dev), targets[i], 0,
2384                                     RTO_ONLINK, 0);
2385                if (IS_ERR(rt)) {
2386                        /* there's no route to target - try to send arp
2387                         * probe to generate any traffic (arp_validate=0)
2388                         */
2389                        if (bond->params.arp_validate)
2390                                net_warn_ratelimited("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
2391                                                     bond->dev->name,
2392                                                     &targets[i]);
2393                        bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i], 0, &inner, &outer);
2394                        continue;
2395                }
2396
2397                /* bond device itself */
2398                if (rt->dst.dev == bond->dev)
2399                        goto found;
2400
2401                rcu_read_lock();
2402                /* first we search only for vlan devices. for every vlan
2403                 * found we verify its upper dev list, searching for the
2404                 * rt->dst.dev. If found we save the tag of the vlan and
2405                 * proceed to send the packet.
2406                 */
2407                netdev_for_each_all_upper_dev_rcu(bond->dev, vlan_upper,
2408                                                  vlan_iter) {
2409                        if (!is_vlan_dev(vlan_upper))
2410                                continue;
2411
2412                        if (vlan_upper == rt->dst.dev) {
2413                                outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
2414                                outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
2415                                rcu_read_unlock();
2416                                goto found;
2417                        }
2418                        netdev_for_each_all_upper_dev_rcu(vlan_upper, upper,
2419                                                          iter) {
2420                                if (upper == rt->dst.dev) {
2421                                        /* If the upper dev is a vlan dev too,
2422                                         *  set the vlan tag to inner tag.
2423                                         */
2424                                        if (is_vlan_dev(upper)) {
2425                                                inner.vlan_proto = vlan_dev_vlan_proto(upper);
2426                                                inner.vlan_id = vlan_dev_vlan_id(upper);
2427                                        }
2428                                        outer.vlan_proto = vlan_dev_vlan_proto(vlan_upper);
2429                                        outer.vlan_id = vlan_dev_vlan_id(vlan_upper);
2430                                        rcu_read_unlock();
2431                                        goto found;
2432                                }
2433                        }
2434                }
2435
2436                /* if the device we're looking for is not on top of any of
2437                 * our upper vlans, then just search for any dev that
2438                 * matches, and in case it's a vlan - save the id
2439                 */
2440                netdev_for_each_all_upper_dev_rcu(bond->dev, upper, iter) {
2441                        if (upper == rt->dst.dev) {
2442                                rcu_read_unlock();
2443                                goto found;
2444                        }
2445                }
2446                rcu_read_unlock();
2447
2448                /* Not our device - skip */
2449                netdev_dbg(bond->dev, "no path to arp_ip_target %pI4 via rt.dev %s\n",
2450                           &targets[i], rt->dst.dev ? rt->dst.dev->name : "NULL");
2451
2452                ip_rt_put(rt);
2453                continue;
2454
2455found:
2456                addr = bond_confirm_addr(rt->dst.dev, targets[i], 0);
2457                ip_rt_put(rt);
2458                bond_arp_send(slave->dev, ARPOP_REQUEST, targets[i],
2459                              addr, &inner, &outer);
2460        }
2461}
2462
2463static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32 sip, __be32 tip)
2464{
2465        int i;
2466
2467        if (!sip || !bond_has_this_ip(bond, tip)) {
2468                netdev_dbg(bond->dev, "bva: sip %pI4 tip %pI4 not found\n",
2469                           &sip, &tip);
2470                return;
2471        }
2472
2473        i = bond_get_targets_ip(bond->params.arp_targets, sip);
2474        if (i == -1) {
2475                netdev_dbg(bond->dev, "bva: sip %pI4 not found in targets\n",
2476                           &sip);
2477                return;
2478        }
2479        slave->last_rx = jiffies;
2480        slave->target_last_arp_rx[i] = jiffies;
2481}
2482
2483int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
2484                 struct slave *slave)
2485{
2486        struct arphdr *arp = (struct arphdr *)skb->data;
2487        struct slave *curr_active_slave, *curr_arp_slave;
2488        unsigned char *arp_ptr;
2489        __be32 sip, tip;
2490        int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
2491
2492        if (!slave_do_arp_validate(bond, slave)) {
2493                if ((slave_do_arp_validate_only(bond) && is_arp) ||
2494                    !slave_do_arp_validate_only(bond))
2495                        slave->last_rx = jiffies;
2496                return RX_HANDLER_ANOTHER;
2497        } else if (!is_arp) {
2498                return RX_HANDLER_ANOTHER;
2499        }
2500
2501        alen = arp_hdr_len(bond->dev);
2502
2503        netdev_dbg(bond->dev, "bond_arp_rcv: skb->dev %s\n",
2504                   skb->dev->name);
2505
2506        if (alen > skb_headlen(skb)) {
2507                arp = kmalloc(alen, GFP_ATOMIC);
2508                if (!arp)
2509                        goto out_unlock;
2510                if (skb_copy_bits(skb, 0, arp, alen) < 0)
2511                        goto out_unlock;
2512        }
2513
2514        if (arp->ar_hln != bond->dev->addr_len ||
2515            skb->pkt_type == PACKET_OTHERHOST ||
2516            skb->pkt_type == PACKET_LOOPBACK ||
2517            arp->ar_hrd != htons(ARPHRD_ETHER) ||
2518            arp->ar_pro != htons(ETH_P_IP) ||
2519            arp->ar_pln != 4)
2520                goto out_unlock;
2521
2522        arp_ptr = (unsigned char *)(arp + 1);
2523        arp_ptr += bond->dev->addr_len;
2524        memcpy(&sip, arp_ptr, 4);
2525        arp_ptr += 4 + bond->dev->addr_len;
2526        memcpy(&tip, arp_ptr, 4);
2527
2528        netdev_dbg(bond->dev, "bond_arp_rcv: %s/%d av %d sv %d sip %pI4 tip %pI4\n",
2529                   slave->dev->name, bond_slave_state(slave),
2530                     bond->params.arp_validate, slave_do_arp_validate(bond, slave),
2531                     &sip, &tip);
2532
2533        curr_active_slave = rcu_dereference(bond->curr_active_slave);
2534        curr_arp_slave = rcu_dereference(bond->current_arp_slave);
2535
2536        /* We 'trust' the received ARP enough to validate it if:
2537         *
2538         * (a) the slave receiving the ARP is active (which includes the
2539         * current ARP slave, if any), or
2540         *
2541         * (b) the receiving slave isn't active, but there is a currently
2542         * active slave and it received valid arp reply(s) after it became
2543         * the currently active slave, or
2544         *
2545         * (c) there is an ARP slave that sent an ARP during the prior ARP
2546         * interval, and we receive an ARP reply on any slave.  We accept
2547         * these because switch FDB update delays may deliver the ARP
2548         * reply to a slave other than the sender of the ARP request.
2549         *
2550         * Note: for (b), backup slaves are receiving the broadcast ARP
2551         * request, not a reply.  This request passes from the sending
2552         * slave through the L2 switch(es) to the receiving slave.  Since
2553         * this is checking the request, sip/tip are swapped for
2554         * validation.
2555         *
2556         * This is done to avoid endless looping when we can't reach the
2557         * arp_ip_target and fool ourselves with our own arp requests.
2558         */
2559        if (bond_is_active_slave(slave))
2560                bond_validate_arp(bond, slave, sip, tip);
2561        else if (curr_active_slave &&
2562                 time_after(slave_last_rx(bond, curr_active_slave),
2563                            curr_active_slave->last_link_up))
2564                bond_validate_arp(bond, slave, tip, sip);
2565        else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
2566                 bond_time_in_interval(bond,
2567                                       dev_trans_start(curr_arp_slave->dev), 1))
2568                bond_validate_arp(bond, slave, sip, tip);
2569
2570out_unlock:
2571        if (arp != (struct arphdr *)skb->data)
2572                kfree(arp);
2573        return RX_HANDLER_ANOTHER;
2574}
2575
2576/* function to verify if we're in the arp_interval timeslice, returns true if
2577 * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval +
2578 * arp_interval/2) . the arp_interval/2 is needed for really fast networks.
2579 */
2580static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
2581                                  int mod)
2582{
2583        int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2584
2585        return time_in_range(jiffies,
2586                             last_act - delta_in_ticks,
2587                             last_act + mod * delta_in_ticks + delta_in_ticks/2);
2588}
2589
2590/* This function is called regularly to monitor each slave's link
2591 * ensuring that traffic is being sent and received when arp monitoring
2592 * is used in load-balancing mode. if the adapter has been dormant, then an
2593 * arp is transmitted to generate traffic. see activebackup_arp_monitor for
2594 * arp monitoring in active backup mode.
2595 */
2596static void bond_loadbalance_arp_mon(struct work_struct *work)
2597{
2598        struct bonding *bond = container_of(work, struct bonding,
2599                                            arp_work.work);
2600        struct slave *slave, *oldcurrent;
2601        struct list_head *iter;
2602        int do_failover = 0, slave_state_changed = 0;
2603
2604        if (!bond_has_slaves(bond))
2605                goto re_arm;
2606
2607        rcu_read_lock();
2608
2609        oldcurrent = rcu_dereference(bond->curr_active_slave);
2610        /* see if any of the previous devices are up now (i.e. they have
2611         * xmt and rcv traffic). the curr_active_slave does not come into
2612         * the picture unless it is null. also, slave->last_link_up is not
2613         * needed here because we send an arp on each slave and give a slave
2614         * as long as it needs to get the tx/rx within the delta.
2615         * TODO: what about up/down delay in arp mode? it wasn't here before
2616         *       so it can wait
2617         */
2618        bond_for_each_slave_rcu(bond, slave, iter) {
2619                unsigned long trans_start = dev_trans_start(slave->dev);
2620
2621                if (slave->link != BOND_LINK_UP) {
2622                        if (bond_time_in_interval(bond, trans_start, 1) &&
2623                            bond_time_in_interval(bond, slave->last_rx, 1)) {
2624
2625                                slave->link  = BOND_LINK_UP;
2626                                slave_state_changed = 1;
2627
2628                                /* primary_slave has no meaning in round-robin
2629                                 * mode. the window of a slave being up and
2630                                 * curr_active_slave being null after enslaving
2631                                 * is closed.
2632                                 */
2633                                if (!oldcurrent) {
2634                                        netdev_info(bond->dev, "link status definitely up for interface %s\n",
2635                                                    slave->dev->name);
2636                                        do_failover = 1;
2637                                } else {
2638                                        netdev_info(bond->dev, "interface %s is now up\n",
2639                                                    slave->dev->name);
2640                                }
2641                        }
2642                } else {
2643                        /* slave->link == BOND_LINK_UP */
2644
2645                        /* not all switches will respond to an arp request
2646                         * when the source ip is 0, so don't take the link down
2647                         * if we don't know our ip yet
2648                         */
2649                        if (!bond_time_in_interval(bond, trans_start, 2) ||
2650                            !bond_time_in_interval(bond, slave->last_rx, 2)) {
2651
2652                                slave->link  = BOND_LINK_DOWN;
2653                                slave_state_changed = 1;
2654
2655                                if (slave->link_failure_count < UINT_MAX)
2656                                        slave->link_failure_count++;
2657
2658                                netdev_info(bond->dev, "interface %s is now down\n",
2659                                            slave->dev->name);
2660
2661                                if (slave == oldcurrent)
2662                                        do_failover = 1;
2663                        }
2664                }
2665
2666                /* note: if switch is in round-robin mode, all links
2667                 * must tx arp to ensure all links rx an arp - otherwise
2668                 * links may oscillate or not come up at all; if switch is
2669                 * in something like xor mode, there is nothing we can
2670                 * do - all replies will be rx'ed on same link causing slaves
2671                 * to be unstable during low/no traffic periods
2672                 */
2673                if (bond_slave_is_up(slave))
2674                        bond_arp_send_all(bond, slave);
2675        }
2676
2677        rcu_read_unlock();
2678
2679        if (do_failover || slave_state_changed) {
2680                if (!rtnl_trylock())
2681                        goto re_arm;
2682
2683                if (slave_state_changed) {
2684                        bond_slave_state_change(bond);
2685                        if (BOND_MODE(bond) == BOND_MODE_XOR)
2686                                bond_update_slave_arr(bond, NULL);
2687                }
2688                if (do_failover) {
2689                        block_netpoll_tx();
2690                        bond_select_active_slave(bond);
2691                        unblock_netpoll_tx();
2692                }
2693                rtnl_unlock();
2694        }
2695
2696re_arm:
2697        if (bond->params.arp_interval)
2698                queue_delayed_work(bond->wq, &bond->arp_work,
2699                                   msecs_to_jiffies(bond->params.arp_interval));
2700}
2701
2702/* Called to inspect slaves for active-backup mode ARP monitor link state
2703 * changes.  Sets new_link in slaves to specify what action should take
2704 * place for the slave.  Returns 0 if no changes are found, >0 if changes
2705 * to link states must be committed.
2706 *
2707 * Called with rcu_read_lock held.
2708 */
2709static int bond_ab_arp_inspect(struct bonding *bond)
2710{
2711        unsigned long trans_start, last_rx;
2712        struct list_head *iter;
2713        struct slave *slave;
2714        int commit = 0;
2715
2716        bond_for_each_slave_rcu(bond, slave, iter) {
2717                slave->new_link = BOND_LINK_NOCHANGE;
2718                last_rx = slave_last_rx(bond, slave);
2719
2720                if (slave->link != BOND_LINK_UP) {
2721                        if (bond_time_in_interval(bond, last_rx, 1)) {
2722                                slave->new_link = BOND_LINK_UP;
2723                                commit++;
2724                        }
2725                        continue;
2726                }
2727
2728                /* Give slaves 2*delta after being enslaved or made
2729                 * active.  This avoids bouncing, as the last receive
2730                 * times need a full ARP monitor cycle to be updated.
2731                 */
2732                if (bond_time_in_interval(bond, slave->last_link_up, 2))
2733                        continue;
2734
2735                /* Backup slave is down if:
2736                 * - No current_arp_slave AND
2737                 * - more than 3*delta since last receive AND
2738                 * - the bond has an IP address
2739                 *
2740                 * Note: a non-null current_arp_slave indicates
2741                 * the curr_active_slave went down and we are
2742                 * searching for a new one; under this condition
2743                 * we only take the curr_active_slave down - this
2744                 * gives each slave a chance to tx/rx traffic
2745                 * before being taken out
2746                 */
2747                if (!bond_is_active_slave(slave) &&
2748                    !rcu_access_pointer(bond->current_arp_slave) &&
2749                    !bond_time_in_interval(bond, last_rx, 3)) {
2750                        slave->new_link = BOND_LINK_DOWN;
2751                        commit++;
2752                }
2753
2754                /* Active slave is down if:
2755                 * - more than 2*delta since transmitting OR
2756                 * - (more than 2*delta since receive AND
2757                 *    the bond has an IP address)
2758                 */
2759                trans_start = dev_trans_start(slave->dev);
2760                if (bond_is_active_slave(slave) &&
2761                    (!bond_time_in_interval(bond, trans_start, 2) ||
2762                     !bond_time_in_interval(bond, last_rx, 2))) {
2763                        slave->new_link = BOND_LINK_DOWN;
2764                        commit++;
2765                }
2766        }
2767
2768        return commit;
2769}
2770
2771/* Called to commit link state changes noted by inspection step of
2772 * active-backup mode ARP monitor.
2773 *
2774 * Called with RTNL hold.
2775 */
2776static void bond_ab_arp_commit(struct bonding *bond)
2777{
2778        unsigned long trans_start;
2779        struct list_head *iter;
2780        struct slave *slave;
2781
2782        bond_for_each_slave(bond, slave, iter) {
2783                switch (slave->new_link) {
2784                case BOND_LINK_NOCHANGE:
2785                        continue;
2786
2787                case BOND_LINK_UP:
2788                        trans_start = dev_trans_start(slave->dev);
2789                        if (rtnl_dereference(bond->curr_active_slave) != slave ||
2790                            (!rtnl_dereference(bond->curr_active_slave) &&
2791                             bond_time_in_interval(bond, trans_start, 1))) {
2792                                struct slave *current_arp_slave;
2793
2794                                current_arp_slave = rtnl_dereference(bond->current_arp_slave);
2795                                bond_set_slave_link_state(slave, BOND_LINK_UP,
2796                                                          BOND_SLAVE_NOTIFY_NOW);
2797                                if (current_arp_slave) {
2798                                        bond_set_slave_inactive_flags(
2799                                                current_arp_slave,
2800                                                BOND_SLAVE_NOTIFY_NOW);
2801                                        RCU_INIT_POINTER(bond->current_arp_slave, NULL);
2802                                }
2803
2804                                netdev_info(bond->dev, "link status definitely up for interface %s\n",
2805                                            slave->dev->name);
2806
2807                                if (!rtnl_dereference(bond->curr_active_slave) ||
2808                                    slave == rtnl_dereference(bond->primary_slave))
2809                                        goto do_failover;
2810
2811                        }
2812
2813                        continue;
2814
2815                case BOND_LINK_DOWN:
2816                        if (slave->link_failure_count < UINT_MAX)
2817                                slave->link_failure_count++;
2818
2819                        bond_set_slave_link_state(slave, BOND_LINK_DOWN,
2820                                                  BOND_SLAVE_NOTIFY_NOW);
2821                        bond_set_slave_inactive_flags(slave,
2822                                                      BOND_SLAVE_NOTIFY_NOW);
2823
2824                        netdev_info(bond->dev, "link status definitely down for interface %s, disabling it\n",
2825                                    slave->dev->name);
2826
2827                        if (slave == rtnl_dereference(bond->curr_active_slave)) {
2828                                RCU_INIT_POINTER(bond->current_arp_slave, NULL);
2829                                goto do_failover;
2830                        }
2831
2832                        continue;
2833
2834                default:
2835                        netdev_err(bond->dev, "impossible: new_link %d on slave %s\n",
2836                                   slave->new_link, slave->dev->name);
2837                        continue;
2838                }
2839
2840do_failover:
2841                block_netpoll_tx();
2842                bond_select_active_slave(bond);
2843                unblock_netpoll_tx();
2844        }
2845
2846        bond_set_carrier(bond);
2847}
2848
2849/* Send ARP probes for active-backup mode ARP monitor.
2850 *
2851 * Called with rcu_read_lock held.
2852 */
2853static bool bond_ab_arp_probe(struct bonding *bond)
2854{
2855        struct slave *slave, *before = NULL, *new_slave = NULL,
2856                     *curr_arp_slave = rcu_dereference(bond->current_arp_slave),
2857                     *curr_active_slave = rcu_dereference(bond->curr_active_slave);
2858        struct list_head *iter;
2859        bool found = false;
2860        bool should_notify_rtnl = BOND_SLAVE_NOTIFY_LATER;
2861
2862        if (curr_arp_slave && curr_active_slave)
2863                netdev_info(bond->dev, "PROBE: c_arp %s && cas %s BAD\n",
2864                            curr_arp_slave->dev->name,
2865                            curr_active_slave->dev->name);
2866
2867        if (curr_active_slave) {
2868                bond_arp_send_all(bond, curr_active_slave);
2869                return should_notify_rtnl;
2870        }
2871
2872        /* if we don't have a curr_active_slave, search for the next available
2873         * backup slave from the current_arp_slave and make it the candidate
2874         * for becoming the curr_active_slave
2875         */
2876
2877        if (!curr_arp_slave) {
2878                curr_arp_slave = bond_first_slave_rcu(bond);
2879                if (!curr_arp_slave)
2880                        return should_notify_rtnl;
2881        }
2882
2883        bond_set_slave_inactive_flags(curr_arp_slave, BOND_SLAVE_NOTIFY_LATER);
2884
2885        bond_for_each_slave_rcu(bond, slave, iter) {
2886                if (!found && !before && bond_slave_is_up(slave))
2887                        before = slave;
2888
2889                if (found && !new_slave && bond_slave_is_up(slave))
2890                        new_slave = slave;
2891                /* if the link state is up at this point, we
2892                 * mark it down - this can happen if we have
2893                 * simultaneous link failures and
2894                 * reselect_active_interface doesn't make this
2895                 * one the current slave so it is still marked
2896                 * up when it is actually down
2897                 */
2898                if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
2899                        bond_set_slave_link_state(slave, BOND_LINK_DOWN,
2900                                                  BOND_SLAVE_NOTIFY_LATER);
2901                        if (slave->link_failure_count < UINT_MAX)
2902                                slave->link_failure_count++;
2903
2904                        bond_set_slave_inactive_flags(slave,
2905                                                      BOND_SLAVE_NOTIFY_LATER);
2906
2907                        netdev_info(bond->dev, "backup interface %s is now down\n",
2908                                    slave->dev->name);
2909                }
2910                if (slave == curr_arp_slave)
2911                        found = true;
2912        }
2913
2914        if (!new_slave && before)
2915                new_slave = before;
2916
2917        if (!new_slave)
2918                goto check_state;
2919
2920        bond_set_slave_link_state(new_slave, BOND_LINK_BACK,
2921                                  BOND_SLAVE_NOTIFY_LATER);
2922        bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
2923        bond_arp_send_all(bond, new_slave);
2924        new_slave->last_link_up = jiffies;
2925        rcu_assign_pointer(bond->current_arp_slave, new_slave);
2926
2927check_state:
2928        bond_for_each_slave_rcu(bond, slave, iter) {
2929                if (slave->should_notify || slave->should_notify_link) {
2930                        should_notify_rtnl = BOND_SLAVE_NOTIFY_NOW;
2931                        break;
2932                }
2933        }
2934        return should_notify_rtnl;
2935}
2936
2937static void bond_activebackup_arp_mon(struct work_struct *work)
2938{
2939        struct bonding *bond = container_of(work, struct bonding,
2940                                            arp_work.work);
2941        bool should_notify_peers = false;
2942        bool should_notify_rtnl = false;
2943        int delta_in_ticks;
2944
2945        delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
2946
2947        if (!bond_has_slaves(bond))
2948                goto re_arm;
2949
2950        rcu_read_lock();
2951
2952        should_notify_peers = bond_should_notify_peers(bond);
2953
2954        if (bond_ab_arp_inspect(bond)) {
2955                rcu_read_unlock();
2956
2957                /* Race avoidance with bond_close flush of workqueue */
2958                if (!rtnl_trylock()) {
2959                        delta_in_ticks = 1;
2960                        should_notify_peers = false;
2961                        goto re_arm;
2962                }
2963
2964                bond_ab_arp_commit(bond);
2965
2966                rtnl_unlock();
2967                rcu_read_lock();
2968        }
2969
2970        should_notify_rtnl = bond_ab_arp_probe(bond);
2971        rcu_read_unlock();
2972
2973re_arm:
2974        if (bond->params.arp_interval)
2975                queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
2976
2977        if (should_notify_peers || should_notify_rtnl) {
2978                if (!rtnl_trylock())
2979                        return;
2980
2981                if (should_notify_peers)
2982                        call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
2983                                                 bond->dev);
2984                if (should_notify_rtnl) {
2985                        bond_slave_state_notify(bond);
2986                        bond_slave_link_notify(bond);
2987                }
2988
2989                rtnl_unlock();
2990        }
2991}
2992
2993/*-------------------------- netdev event handling --------------------------*/
2994
2995/* Change device name */
2996static int bond_event_changename(struct bonding *bond)
2997{
2998        bond_remove_proc_entry(bond);
2999        bond_create_proc_entry(bond);
3000
3001        bond_debug_reregister(bond);
3002
3003        return NOTIFY_DONE;
3004}
3005
3006static int bond_master_netdev_event(unsigned long event,
3007                                    struct net_device *bond_dev)
3008{
3009        struct bonding *event_bond = netdev_priv(bond_dev);
3010
3011        switch (event) {
3012        case NETDEV_CHANGENAME:
3013                return bond_event_changename(event_bond);
3014        case NETDEV_UNREGISTER:
3015                bond_remove_proc_entry(event_bond);
3016                break;
3017        case NETDEV_REGISTER:
3018                bond_create_proc_entry(event_bond);
3019                break;
3020        case NETDEV_NOTIFY_PEERS:
3021                if (event_bond->send_peer_notif)
3022                        event_bond->send_peer_notif--;
3023                break;
3024        default:
3025                break;
3026        }
3027
3028        return NOTIFY_DONE;
3029}
3030
3031static int bond_slave_netdev_event(unsigned long event,
3032                                   struct net_device *slave_dev)
3033{
3034        struct slave *slave = bond_slave_get_rtnl(slave_dev), *primary;
3035        struct bonding *bond;
3036        struct net_device *bond_dev;
3037
3038        /* A netdev event can be generated while enslaving a device
3039         * before netdev_rx_handler_register is called in which case
3040         * slave will be NULL
3041         */
3042        if (!slave)
3043                return NOTIFY_DONE;
3044        bond_dev = slave->bond->dev;
3045        bond = slave->bond;
3046        primary = rtnl_dereference(bond->primary_slave);
3047
3048        switch (event) {
3049        case NETDEV_UNREGISTER:
3050                if (bond_dev->type != ARPHRD_ETHER)
3051                        bond_release_and_destroy(bond_dev, slave_dev);
3052                else
3053                        bond_release(bond_dev, slave_dev);
3054                break;
3055        case NETDEV_UP:
3056        case NETDEV_CHANGE:
3057                bond_update_speed_duplex(slave);
3058                if (BOND_MODE(bond) == BOND_MODE_8023AD)
3059                        bond_3ad_adapter_speed_duplex_changed(slave);
3060                /* Fallthrough */
3061        case NETDEV_DOWN:
3062                /* Refresh slave-array if applicable!
3063                 * If the setup does not use miimon or arpmon (mode-specific!),
3064                 * then these events will not cause the slave-array to be
3065                 * refreshed. This will cause xmit to use a slave that is not
3066                 * usable. Avoid such situation by refeshing the array at these
3067                 * events. If these (miimon/arpmon) parameters are configured
3068                 * then array gets refreshed twice and that should be fine!
3069                 */
3070                if (bond_mode_uses_xmit_hash(bond))
3071                        bond_update_slave_arr(bond, NULL);
3072                break;
3073        case NETDEV_CHANGEMTU:
3074                /* TODO: Should slaves be allowed to
3075                 * independently alter their MTU?  For
3076                 * an active-backup bond, slaves need
3077                 * not be the same type of device, so
3078                 * MTUs may vary.  For other modes,
3079                 * slaves arguably should have the
3080                 * same MTUs. To do this, we'd need to
3081                 * take over the slave's change_mtu
3082                 * function for the duration of their
3083                 * servitude.
3084                 */
3085                break;
3086        case NETDEV_CHANGENAME:
3087                /* we don't care if we don't have primary set */
3088                if (!bond_uses_primary(bond) ||
3089                    !bond->params.primary[0])
3090                        break;
3091
3092                if (slave == primary) {
3093                        /* slave's name changed - he's no longer primary */
3094                        RCU_INIT_POINTER(bond->primary_slave, NULL);
3095                } else if (!strcmp(slave_dev->name, bond->params.primary)) {
3096                        /* we have a new primary slave */
3097                        rcu_assign_pointer(bond->primary_slave, slave);
3098                } else { /* we didn't change primary - exit */
3099                        break;
3100                }
3101
3102                netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n",
3103                            primary ? slave_dev->name : "none");
3104
3105                block_netpoll_tx();
3106                bond_select_active_slave(bond);
3107                unblock_netpoll_tx();
3108                break;
3109        case NETDEV_FEAT_CHANGE:
3110                bond_compute_features(bond);
3111                break;
3112        case NETDEV_RESEND_IGMP:
3113                /* Propagate to master device */
3114                call_netdevice_notifiers(event, slave->bond->dev);
3115                break;
3116        default:
3117                break;
3118        }
3119
3120        return NOTIFY_DONE;
3121}
3122
3123/* bond_netdev_event: handle netdev notifier chain events.
3124 *
3125 * This function receives events for the netdev chain.  The caller (an
3126 * ioctl handler calling blocking_notifier_call_chain) holds the necessary
3127 * locks for us to safely manipulate the slave devices (RTNL lock,
3128 * dev_probe_lock).
3129 */
3130static int bond_netdev_event(struct notifier_block *this,
3131                             unsigned long event, void *ptr)
3132{
3133        struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
3134
3135        netdev_dbg(event_dev, "event: %lx\n", event);
3136
3137        if (!(event_dev->priv_flags & IFF_BONDING))
3138                return NOTIFY_DONE;
3139
3140        if (event_dev->flags & IFF_MASTER) {
3141                netdev_dbg(event_dev, "IFF_MASTER\n");
3142                return bond_master_netdev_event(event, event_dev);
3143        }
3144
3145        if (event_dev->flags & IFF_SLAVE) {
3146                netdev_dbg(event_dev, "IFF_SLAVE\n");
3147                return bond_slave_netdev_event(event, event_dev);
3148        }
3149
3150        return NOTIFY_DONE;
3151}
3152
3153static struct notifier_block bond_netdev_notifier = {
3154        .notifier_call = bond_netdev_event,
3155};
3156
3157/*---------------------------- Hashing Policies -----------------------------*/
3158
3159/* L2 hash helper */
3160static inline u32 bond_eth_hash(struct sk_buff *skb)
3161{
3162        struct ethhdr *ep, hdr_tmp;
3163
3164        ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp);
3165        if (ep)
3166                return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto;
3167        return 0;
3168}
3169
3170/* Extract the appropriate headers based on bond's xmit policy */
3171static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
3172                              struct flow_keys *fk)
3173{
3174        const struct ipv6hdr *iph6;
3175        const struct iphdr *iph;
3176        int noff, proto = -1;
3177
3178        if (bond->params.xmit_policy > BOND_XMIT_POLICY_LAYER23)
3179                return skb_flow_dissect_flow_keys(skb, fk, 0);
3180
3181        fk->ports.ports = 0;
3182        noff = skb_network_offset(skb);
3183        if (skb->protocol == htons(ETH_P_IP)) {
3184                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph))))
3185                        return false;
3186                iph = ip_hdr(skb);
3187                iph_to_flow_copy_v4addrs(fk, iph);
3188                noff += iph->ihl << 2;
3189                if (!ip_is_fragment(iph))
3190                        proto = iph->protocol;
3191        } else if (skb->protocol == htons(ETH_P_IPV6)) {
3192                if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph6))))
3193                        return false;
3194                iph6 = ipv6_hdr(skb);
3195                iph_to_flow_copy_v6addrs(fk, iph6);
3196                noff += sizeof(*iph6);
3197                proto = iph6->nexthdr;
3198        } else {
3199                return false;
3200        }
3201        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
3202                fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
3203
3204        return true;
3205}
3206
3207/**
3208 * bond_xmit_hash - generate a hash value based on the xmit policy
3209 * @bond: bonding device
3210 * @skb: buffer to use for headers
3211 *
3212 * This function will extract the necessary headers from the skb buffer and use
3213 * them to generate a hash based on the xmit_policy set in the bonding device
3214 */
3215u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
3216{
3217        struct flow_keys flow;
3218        u32 hash;
3219
3220        if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
3221            skb->l4_hash)
3222                return skb->hash;
3223
3224        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 ||
3225            !bond_flow_dissect(bond, skb, &flow))
3226                return bond_eth_hash(skb);
3227
3228        if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
3229            bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
3230                hash = bond_eth_hash(skb);
3231        else
3232                hash = (__force u32)flow.ports.ports;
3233        hash ^= (__force u32)flow_get_u32_dst(&flow) ^
3234                (__force u32)flow_get_u32_src(&flow);
3235        hash ^= (hash >> 16);
3236        hash ^= (hash >> 8);
3237
3238        return hash;
3239}
3240
3241/*-------------------------- Device entry points ----------------------------*/
3242
3243static void bond_work_init_all(struct bonding *bond)
3244{
3245        INIT_DELAYED_WORK(&bond->mcast_work,
3246                          bond_resend_igmp_join_requests_delayed);
3247        INIT_DELAYED_WORK(&bond->alb_work, bond_alb_monitor);
3248        INIT_DELAYED_WORK(&bond->mii_work, bond_mii_monitor);
3249        if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
3250                INIT_DELAYED_WORK(&bond->arp_work, bond_activebackup_arp_mon);
3251        else
3252                INIT_DELAYED_WORK(&bond->arp_work, bond_loadbalance_arp_mon);
3253        INIT_DELAYED_WORK(&bond->ad_work, bond_3ad_state_machine_handler);
3254        INIT_DELAYED_WORK(&bond->slave_arr_work, bond_slave_arr_handler);
3255}
3256
3257static void bond_work_cancel_all(struct bonding *bond)
3258{
3259        cancel_delayed_work_sync(&bond->mii_work);
3260        cancel_delayed_work_sync(&bond->arp_work);
3261        cancel_delayed_work_sync(&bond->alb_work);
3262        cancel_delayed_work_sync(&bond->ad_work);
3263        cancel_delayed_work_sync(&bond->mcast_work);
3264        cancel_delayed_work_sync(&bond->slave_arr_work);
3265}
3266
3267static int bond_open(struct net_device *bond_dev)
3268{
3269        struct bonding *bond = netdev_priv(bond_dev);
3270        struct list_head *iter;
3271        struct slave *slave;
3272
3273        /* reset slave->backup and slave->inactive */
3274        if (bond_has_slaves(bond)) {
3275                bond_for_each_slave(bond, slave, iter) {
3276                        if (bond_uses_primary(bond) &&
3277                            slave != rcu_access_pointer(bond->curr_active_slave)) {
3278                                bond_set_slave_inactive_flags(slave,
3279                                                              BOND_SLAVE_NOTIFY_NOW);
3280                        } else if (BOND_MODE(bond) != BOND_MODE_8023AD) {
3281                                bond_set_slave_active_flags(slave,
3282                                                            BOND_SLAVE_NOTIFY_NOW);
3283                        }
3284                }
3285        }
3286
3287        bond_work_init_all(bond);
3288
3289        if (bond_is_lb(bond)) {
3290                /* bond_alb_initialize must be called before the timer
3291                 * is started.
3292                 */
3293                if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB)))
3294                        return -ENOMEM;
3295                if (bond->params.tlb_dynamic_lb)
3296                        queue_delayed_work(bond->wq, &bond->alb_work, 0);
3297        }
3298
3299        if (bond->params.miimon)  /* link check interval, in milliseconds. */
3300                queue_delayed_work(bond->wq, &bond->mii_work, 0);
3301
3302        if (bond->params.arp_interval) {  /* arp interval, in milliseconds. */
3303                queue_delayed_work(bond->wq, &bond->arp_work, 0);
3304                bond->recv_probe = bond_arp_rcv;
3305        }
3306
3307        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3308                queue_delayed_work(bond->wq, &bond->ad_work, 0);
3309                /* register to receive LACPDUs */
3310                bond->recv_probe = bond_3ad_lacpdu_recv;
3311                bond_3ad_initiate_agg_selection(bond, 1);
3312        }
3313
3314        if (bond_mode_uses_xmit_hash(bond))
3315                bond_update_slave_arr(bond, NULL);
3316
3317        return 0;
3318}
3319
3320static int bond_close(struct net_device *bond_dev)
3321{
3322        struct bonding *bond = netdev_priv(bond_dev);
3323
3324        bond_work_cancel_all(bond);
3325        bond->send_peer_notif = 0;
3326        if (bond_is_lb(bond))
3327                bond_alb_deinitialize(bond);
3328        bond->recv_probe = NULL;
3329
3330        return 0;
3331}
3332
3333/* fold stats, assuming all rtnl_link_stats64 fields are u64, but
3334 * that some drivers can provide 32bit values only.
3335 */
3336static void bond_fold_stats(struct rtnl_link_stats64 *_res,
3337                            const struct rtnl_link_stats64 *_new,
3338                            const struct rtnl_link_stats64 *_old)
3339{
3340        const u64 *new = (const u64 *)_new;
3341        const u64 *old = (const u64 *)_old;
3342        u64 *res = (u64 *)_res;
3343        int i;
3344
3345        for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) {
3346                u64 nv = new[i];
3347                u64 ov = old[i];
3348                s64 delta = nv - ov;
3349
3350                /* detects if this particular field is 32bit only */
3351                if (((nv | ov) >> 32) == 0)
3352                        delta = (s64)(s32)((u32)nv - (u32)ov);
3353
3354                /* filter anomalies, some drivers reset their stats
3355                 * at down/up events.
3356                 */
3357                if (delta > 0)
3358                        res[i] += delta;
3359        }
3360}
3361
3362static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
3363                                                struct rtnl_link_stats64 *stats)
3364{
3365        struct bonding *bond = netdev_priv(bond_dev);
3366        struct rtnl_link_stats64 temp;
3367        struct list_head *iter;
3368        struct slave *slave;
3369
3370        spin_lock(&bond->stats_lock);
3371        memcpy(stats, &bond->bond_stats, sizeof(*stats));
3372
3373        rcu_read_lock();
3374        bond_for_each_slave_rcu(bond, slave, iter) {
3375                const struct rtnl_link_stats64 *new =
3376                        dev_get_stats(slave->dev, &temp);
3377
3378                bond_fold_stats(stats, new, &slave->slave_stats);
3379
3380                /* save off the slave stats for the next run */
3381                memcpy(&slave->slave_stats, new, sizeof(*new));
3382        }
3383        rcu_read_unlock();
3384
3385        memcpy(&bond->bond_stats, stats, sizeof(*stats));
3386        spin_unlock(&bond->stats_lock);
3387
3388        return stats;
3389}
3390
3391static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd)
3392{
3393        struct bonding *bond = netdev_priv(bond_dev);
3394        struct net_device *slave_dev = NULL;
3395        struct ifbond k_binfo;
3396        struct ifbond __user *u_binfo = NULL;
3397        struct ifslave k_sinfo;
3398        struct ifslave __user *u_sinfo = NULL;
3399        struct mii_ioctl_data *mii = NULL;
3400        struct bond_opt_value newval;
3401        struct net *net;
3402        int res = 0;
3403
3404        netdev_dbg(bond_dev, "bond_ioctl: cmd=%d\n", cmd);
3405
3406        switch (cmd) {
3407        case SIOCGMIIPHY:
3408                mii = if_mii(ifr);
3409                if (!mii)
3410                        return -EINVAL;
3411
3412                mii->phy_id = 0;
3413                /* Fall Through */
3414        case SIOCGMIIREG:
3415                /* We do this again just in case we were called by SIOCGMIIREG
3416                 * instead of SIOCGMIIPHY.
3417                 */
3418                mii = if_mii(ifr);
3419                if (!mii)
3420                        return -EINVAL;
3421
3422                if (mii->reg_num == 1) {
3423                        mii->val_out = 0;
3424                        if (netif_carrier_ok(bond->dev))
3425                                mii->val_out = BMSR_LSTATUS;
3426                }
3427
3428                return 0;
3429        case BOND_INFO_QUERY_OLD:
3430        case SIOCBONDINFOQUERY:
3431                u_binfo = (struct ifbond __user *)ifr->ifr_data;
3432
3433                if (copy_from_user(&k_binfo, u_binfo, sizeof(ifbond)))
3434                        return -EFAULT;
3435
3436                bond_info_query(bond_dev, &k_binfo);
3437                if (copy_to_user(u_binfo, &k_binfo, sizeof(ifbond)))
3438                        return -EFAULT;
3439
3440                return 0;
3441        case BOND_SLAVE_INFO_QUERY_OLD:
3442        case SIOCBONDSLAVEINFOQUERY:
3443                u_sinfo = (struct ifslave __user *)ifr->ifr_data;
3444
3445                if (copy_from_user(&k_sinfo, u_sinfo, sizeof(ifslave)))
3446                        return -EFAULT;
3447
3448                res = bond_slave_info_query(bond_dev, &k_sinfo);
3449                if (res == 0 &&
3450                    copy_to_user(u_sinfo, &k_sinfo, sizeof(ifslave)))
3451                        return -EFAULT;
3452
3453                return res;
3454        default:
3455                break;
3456        }
3457
3458        net = dev_net(bond_dev);
3459
3460        if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
3461                return -EPERM;
3462
3463        slave_dev = __dev_get_by_name(net, ifr->ifr_slave);
3464
3465        netdev_dbg(bond_dev, "slave_dev=%p:\n", slave_dev);
3466
3467        if (!slave_dev)
3468                return -ENODEV;
3469
3470        netdev_dbg(bond_dev, "slave_dev->name=%s:\n", slave_dev->name);
3471        switch (cmd) {
3472        case BOND_ENSLAVE_OLD:
3473        case SIOCBONDENSLAVE:
3474                res = bond_enslave(bond_dev, slave_dev);
3475                break;
3476        case BOND_RELEASE_OLD:
3477        case SIOCBONDRELEASE:
3478                res = bond_release(bond_dev, slave_dev);
3479                break;
3480        case BOND_SETHWADDR_OLD:
3481        case SIOCBONDSETHWADDR:
3482                bond_set_dev_addr(bond_dev, slave_dev);
3483                res = 0;
3484                break;
3485        case BOND_CHANGE_ACTIVE_OLD:
3486        case SIOCBONDCHANGEACTIVE:
3487                bond_opt_initstr(&newval, slave_dev->name);
3488                res = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval);
3489                break;
3490        default:
3491                res = -EOPNOTSUPP;
3492        }
3493
3494        return res;
3495}
3496
3497static void bond_change_rx_flags(struct net_device *bond_dev, int change)
3498{
3499        struct bonding *bond = netdev_priv(bond_dev);
3500
3501        if (change & IFF_PROMISC)
3502                bond_set_promiscuity(bond,
3503                                     bond_dev->flags & IFF_PROMISC ? 1 : -1);
3504
3505        if (change & IFF_ALLMULTI)
3506                bond_set_allmulti(bond,
3507                                  bond_dev->flags & IFF_ALLMULTI ? 1 : -1);
3508}
3509
3510static void bond_set_rx_mode(struct net_device *bond_dev)
3511{
3512        struct bonding *bond = netdev_priv(bond_dev);
3513        struct list_head *iter;
3514        struct slave *slave;
3515
3516        rcu_read_lock();
3517        if (bond_uses_primary(bond)) {
3518                slave = rcu_dereference(bond->curr_active_slave);
3519                if (slave) {
3520                        dev_uc_sync(slave->dev, bond_dev);
3521                        dev_mc_sync(slave->dev, bond_dev);
3522                }
3523        } else {
3524                bond_for_each_slave_rcu(bond, slave, iter) {
3525                        dev_uc_sync_multiple(slave->dev, bond_dev);
3526                        dev_mc_sync_multiple(slave->dev, bond_dev);
3527                }
3528        }
3529        rcu_read_unlock();
3530}
3531
3532static int bond_neigh_init(struct neighbour *n)
3533{
3534        struct bonding *bond = netdev_priv(n->dev);
3535        const struct net_device_ops *slave_ops;
3536        struct neigh_parms parms;
3537        struct slave *slave;
3538        int ret;
3539
3540        slave = bond_first_slave(bond);
3541        if (!slave)
3542                return 0;
3543        slave_ops = slave->dev->netdev_ops;
3544        if (!slave_ops->ndo_neigh_setup)
3545                return 0;
3546
3547        parms.neigh_setup = NULL;
3548        parms.neigh_cleanup = NULL;
3549        ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
3550        if (ret)
3551                return ret;
3552
3553        /* Assign slave's neigh_cleanup to neighbour in case cleanup is called
3554         * after the last slave has been detached.  Assumes that all slaves
3555         * utilize the same neigh_cleanup (true at this writing as only user
3556         * is ipoib).
3557         */
3558        n->parms->neigh_cleanup = parms.neigh_cleanup;
3559
3560        if (!parms.neigh_setup)
3561                return 0;
3562
3563        return parms.neigh_setup(n);
3564}
3565
3566/* The bonding ndo_neigh_setup is called at init time beofre any
3567 * slave exists. So we must declare proxy setup function which will
3568 * be used at run time to resolve the actual slave neigh param setup.
3569 *
3570 * It's also called by master devices (such as vlans) to setup their
3571 * underlying devices. In that case - do nothing, we're already set up from
3572 * our init.
3573 */
3574static int bond_neigh_setup(struct net_device *dev,
3575                            struct neigh_parms *parms)
3576{
3577        /* modify only our neigh_parms */
3578        if (parms->dev == dev)
3579                parms->neigh_setup = bond_neigh_init;
3580
3581        return 0;
3582}
3583
3584/* Change the MTU of all of a master's slaves to match the master */
3585static int bond_change_mtu(struct net_device *bond_dev, int new_mtu)
3586{
3587        struct bonding *bond = netdev_priv(bond_dev);
3588        struct slave *slave, *rollback_slave;
3589        struct list_head *iter;
3590        int res = 0;
3591
3592        netdev_dbg(bond_dev, "bond=%p, new_mtu=%d\n", bond, new_mtu);
3593
3594        bond_for_each_slave(bond, slave, iter) {
3595                netdev_dbg(bond_dev, "s %p c_m %p\n",
3596                           slave, slave->dev->netdev_ops->ndo_change_mtu);
3597
3598                res = dev_set_mtu(slave->dev, new_mtu);
3599
3600                if (res) {
3601                        /* If we failed to set the slave's mtu to the new value
3602                         * we must abort the operation even in ACTIVE_BACKUP
3603                         * mode, because if we allow the backup slaves to have
3604                         * different mtu values than the active slave we'll
3605                         * need to change their mtu when doing a failover. That
3606                         * means changing their mtu from timer context, which
3607                         * is probably not a good idea.
3608                         */
3609                        netdev_dbg(bond_dev, "err %d %s\n", res,
3610                                   slave->dev->name);
3611                        goto unwind;
3612                }
3613        }
3614
3615        bond_dev->mtu = new_mtu;
3616
3617        return 0;
3618
3619unwind:
3620        /* unwind from head to the slave that failed */
3621        bond_for_each_slave(bond, rollback_slave, iter) {
3622                int tmp_res;
3623
3624                if (rollback_slave == slave)
3625                        break;
3626
3627                tmp_res = dev_set_mtu(rollback_slave->dev, bond_dev->mtu);
3628                if (tmp_res) {
3629                        netdev_dbg(bond_dev, "unwind err %d dev %s\n",
3630                                   tmp_res, rollback_slave->dev->name);
3631                }
3632        }
3633
3634        return res;
3635}
3636
3637/* Change HW address
3638 *
3639 * Note that many devices must be down to change the HW address, and
3640 * downing the master releases all slaves.  We can make bonds full of
3641 * bonding devices to test this, however.
3642 */
3643static int bond_set_mac_address(struct net_device *bond_dev, void *addr)
3644{
3645        struct bonding *bond = netdev_priv(bond_dev);
3646        struct slave *slave, *rollback_slave;
3647        struct sockaddr_storage *ss = addr, tmp_ss;
3648        struct list_head *iter;
3649        int res = 0;
3650
3651        if (BOND_MODE(bond) == BOND_MODE_ALB)
3652                return bond_alb_set_mac_address(bond_dev, addr);
3653
3654
3655        netdev_dbg(bond_dev, "bond=%p\n", bond);
3656
3657        /* If fail_over_mac is enabled, do nothing and return success.
3658         * Returning an error causes ifenslave to fail.
3659         */
3660        if (bond->params.fail_over_mac &&
3661            BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
3662                return 0;
3663
3664        if (!is_valid_ether_addr(ss->__data))
3665                return -EADDRNOTAVAIL;
3666
3667        bond_for_each_slave(bond, slave, iter) {
3668                netdev_dbg(bond_dev, "slave %p %s\n", slave, slave->dev->name);
3669                res = dev_set_mac_address(slave->dev, addr);
3670                if (res) {
3671                        /* TODO: consider downing the slave
3672                         * and retry ?
3673                         * User should expect communications
3674                         * breakage anyway until ARP finish
3675                         * updating, so...
3676                         */
3677                        netdev_dbg(bond_dev, "err %d %s\n", res, slave->dev->name);
3678                        goto unwind;
3679                }
3680        }
3681
3682        /* success */
3683        memcpy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len);
3684        return 0;
3685
3686unwind:
3687        memcpy(tmp_ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
3688        tmp_ss.ss_family = bond_dev->type;
3689
3690        /* unwind from head to the slave that failed */
3691        bond_for_each_slave(bond, rollback_slave, iter) {
3692                int tmp_res;
3693
3694                if (rollback_slave == slave)
3695                        break;
3696
3697                tmp_res = dev_set_mac_address(rollback_slave->dev,
3698                                              (struct sockaddr *)&tmp_ss);
3699                if (tmp_res) {
3700                        netdev_dbg(bond_dev, "unwind err %d dev %s\n",
3701                                   tmp_res, rollback_slave->dev->name);
3702                }
3703        }
3704
3705        return res;
3706}
3707
3708/**
3709 * bond_xmit_slave_id - transmit skb through slave with slave_id
3710 * @bond: bonding device that is transmitting
3711 * @skb: buffer to transmit
3712 * @slave_id: slave id up to slave_cnt-1 through which to transmit
3713 *
3714 * This function tries to transmit through slave with slave_id but in case
3715 * it fails, it tries to find the first available slave for transmission.
3716 * The skb is consumed in all cases, thus the function is void.
3717 */
3718static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
3719{
3720        struct list_head *iter;
3721        struct slave *slave;
3722        int i = slave_id;
3723
3724        /* Here we start from the slave with slave_id */
3725        bond_for_each_slave_rcu(bond, slave, iter) {
3726                if (--i < 0) {
3727                        if (bond_slave_can_tx(slave)) {
3728                                bond_dev_queue_xmit(bond, skb, slave->dev);
3729                                return;
3730                        }
3731                }
3732        }
3733
3734        /* Here we start from the first slave up to slave_id */
3735        i = slave_id;
3736        bond_for_each_slave_rcu(bond, slave, iter) {
3737                if (--i < 0)
3738                        break;
3739                if (bond_slave_can_tx(slave)) {
3740                        bond_dev_queue_xmit(bond, skb, slave->dev);
3741                        return;
3742                }
3743        }
3744        /* no slave that can tx has been found */
3745        bond_tx_drop(bond->dev, skb);
3746}
3747
3748/**
3749 * bond_rr_gen_slave_id - generate slave id based on packets_per_slave
3750 * @bond: bonding device to use
3751 *
3752 * Based on the value of the bonding device's packets_per_slave parameter
3753 * this function generates a slave id, which is usually used as the next
3754 * slave to transmit through.
3755 */
3756static u32 bond_rr_gen_slave_id(struct bonding *bond)
3757{
3758        u32 slave_id;
3759        struct reciprocal_value reciprocal_packets_per_slave;
3760        int packets_per_slave = bond->params.packets_per_slave;
3761
3762        switch (packets_per_slave) {
3763        case 0:
3764                slave_id = prandom_u32();
3765                break;
3766        case 1:
3767                slave_id = bond->rr_tx_counter;
3768                break;
3769        default:
3770                reciprocal_packets_per_slave =
3771                        bond->params.reciprocal_packets_per_slave;
3772                slave_id = reciprocal_divide(bond->rr_tx_counter,
3773                                             reciprocal_packets_per_slave);
3774                break;
3775        }
3776        bond->rr_tx_counter++;
3777
3778        return slave_id;
3779}
3780
3781static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev)
3782{
3783        struct bonding *bond = netdev_priv(bond_dev);
3784        struct iphdr *iph = ip_hdr(skb);
3785        struct slave *slave;
3786        u32 slave_id;
3787
3788        /* Start with the curr_active_slave that joined the bond as the
3789         * default for sending IGMP traffic.  For failover purposes one
3790         * needs to maintain some consistency for the interface that will
3791         * send the join/membership reports.  The curr_active_slave found
3792         * will send all of this type of traffic.
3793         */
3794        if (iph->protocol == IPPROTO_IGMP && skb->protocol == htons(ETH_P_IP)) {
3795                slave = rcu_dereference(bond->curr_active_slave);
3796                if (slave)
3797                        bond_dev_queue_xmit(bond, skb, slave->dev);
3798                else
3799                        bond_xmit_slave_id(bond, skb, 0);
3800        } else {
3801                int slave_cnt = ACCESS_ONCE(bond->slave_cnt);
3802
3803                if (likely(slave_cnt)) {
3804                        slave_id = bond_rr_gen_slave_id(bond);
3805                        bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
3806                } else {
3807                        bond_tx_drop(bond_dev, skb);
3808                }
3809        }
3810
3811        return NETDEV_TX_OK;
3812}
3813
3814/* In active-backup mode, we know that bond->curr_active_slave is always valid if
3815 * the bond has a usable interface.
3816 */
3817static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
3818{
3819        struct bonding *bond = netdev_priv(bond_dev);
3820        struct slave *slave;
3821
3822        slave = rcu_dereference(bond->curr_active_slave);
3823        if (slave)
3824                bond_dev_queue_xmit(bond, skb, slave->dev);
3825        else
3826                bond_tx_drop(bond_dev, skb);
3827
3828        return NETDEV_TX_OK;
3829}
3830
3831/* Use this to update slave_array when (a) it's not appropriate to update
3832 * slave_array right away (note that update_slave_array() may sleep)
3833 * and / or (b) RTNL is not held.
3834 */
3835void bond_slave_arr_work_rearm(struct bonding *bond, unsigned long delay)
3836{
3837        queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
3838}
3839
3840/* Slave array work handler. Holds only RTNL */
3841static void bond_slave_arr_handler(struct work_struct *work)
3842{
3843        struct bonding *bond = container_of(work, struct bonding,
3844                                            slave_arr_work.work);
3845        int ret;
3846
3847        if (!rtnl_trylock())
3848                goto err;
3849
3850        ret = bond_update_slave_arr(bond, NULL);
3851        rtnl_unlock();
3852        if (ret) {
3853                pr_warn_ratelimited("Failed to update slave array from WT\n");
3854                goto err;
3855        }
3856        return;
3857
3858err:
3859        bond_slave_arr_work_rearm(bond, 1);
3860}
3861
3862/* Build the usable slaves array in control path for modes that use xmit-hash
3863 * to determine the slave interface -
3864 * (a) BOND_MODE_8023AD
3865 * (b) BOND_MODE_XOR
3866 * (c) BOND_MODE_TLB && tlb_dynamic_lb == 0
3867 *
3868 * The caller is expected to hold RTNL only and NO other lock!
3869 */
3870int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
3871{
3872        struct slave *slave;
3873        struct list_head *iter;
3874        struct bond_up_slave *new_arr, *old_arr;
3875        int agg_id = 0;
3876        int ret = 0;
3877
3878#ifdef CONFIG_LOCKDEP
3879        WARN_ON(lockdep_is_held(&bond->mode_lock));
3880#endif
3881
3882        new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
3883                          GFP_KERNEL);
3884        if (!new_arr) {
3885                ret = -ENOMEM;
3886                pr_err("Failed to build slave-array.\n");
3887                goto out;
3888        }
3889        if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3890                struct ad_info ad_info;
3891
3892                if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
3893                        pr_debug("bond_3ad_get_active_agg_info failed\n");
3894                        kfree_rcu(new_arr, rcu);
3895                        /* No active aggragator means it's not safe to use
3896                         * the previous array.
3897                         */
3898                        old_arr = rtnl_dereference(bond->slave_arr);
3899                        if (old_arr) {
3900                                RCU_INIT_POINTER(bond->slave_arr, NULL);
3901                                kfree_rcu(old_arr, rcu);
3902                        }
3903                        goto out;
3904                }
3905                agg_id = ad_info.aggregator_id;
3906        }
3907        bond_for_each_slave(bond, slave, iter) {
3908                if (BOND_MODE(bond) == BOND_MODE_8023AD) {
3909                        struct aggregator *agg;
3910
3911                        agg = SLAVE_AD_INFO(slave)->port.aggregator;
3912                        if (!agg || agg->aggregator_identifier != agg_id)
3913                                continue;
3914                }
3915                if (!bond_slave_can_tx(slave))
3916                        continue;
3917                if (skipslave == slave)
3918                        continue;
3919                new_arr->arr[new_arr->count++] = slave;
3920        }
3921
3922        old_arr = rtnl_dereference(bond->slave_arr);
3923        rcu_assign_pointer(bond->slave_arr, new_arr);
3924        if (old_arr)
3925                kfree_rcu(old_arr, rcu);
3926out:
3927        if (ret != 0 && skipslave) {
3928                int idx;
3929
3930                /* Rare situation where caller has asked to skip a specific
3931                 * slave but allocation failed (most likely!). BTW this is
3932                 * only possible when the call is initiated from
3933                 * __bond_release_one(). In this situation; overwrite the
3934                 * skipslave entry in the array with the last entry from the
3935                 * array to avoid a situation where the xmit path may choose
3936                 * this to-be-skipped slave to send a packet out.
3937                 */
3938                old_arr = rtnl_dereference(bond->slave_arr);
3939                for (idx = 0; idx < old_arr->count; idx++) {
3940                        if (skipslave == old_arr->arr[idx]) {
3941                                old_arr->arr[idx] =
3942                                    old_arr->arr[old_arr->count-1];
3943                                old_arr->count--;
3944                                break;
3945                        }
3946                }
3947        }
3948        return ret;
3949}
3950
3951/* Use this Xmit function for 3AD as well as XOR modes. The current
3952 * usable slave array is formed in the control path. The xmit function
3953 * just calculates hash and sends the packet out.
3954 */
3955static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
3956{
3957        struct bonding *bond = netdev_priv(dev);
3958        struct slave *slave;
3959        struct bond_up_slave *slaves;
3960        unsigned int count;
3961
3962        slaves = rcu_dereference(bond->slave_arr);
3963        count = slaves ? ACCESS_ONCE(slaves->count) : 0;
3964        if (likely(count)) {
3965                slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
3966                bond_dev_queue_xmit(bond, skb, slave->dev);
3967        } else {
3968                bond_tx_drop(dev, skb);
3969        }
3970
3971        return NETDEV_TX_OK;
3972}
3973
3974/* in broadcast mode, we send everything to all usable interfaces. */
3975static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev)
3976{
3977        struct bonding *bond = netdev_priv(bond_dev);
3978        struct slave *slave = NULL;
3979        struct list_head *iter;
3980
3981        bond_for_each_slave_rcu(bond, slave, iter) {
3982                if (bond_is_last_slave(bond, slave))
3983                        break;
3984                if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
3985                        struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
3986
3987                        if (!skb2) {
3988                                net_err_ratelimited("%s: Error: %s: skb_clone() failed\n",
3989                                                    bond_dev->name, __func__);
3990                                continue;
3991                        }
3992                        bond_dev_queue_xmit(bond, skb2, slave->dev);
3993                }
3994        }
3995        if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
3996                bond_dev_queue_xmit(bond, skb, slave->dev);
3997        else
3998                bond_tx_drop(bond_dev, skb);
3999
4000        return NETDEV_TX_OK;
4001}
4002
4003/*------------------------- Device initialization ---------------------------*/
4004
4005/* Lookup the slave that corresponds to a qid */
4006static inline int bond_slave_override(struct bonding *bond,
4007                                      struct sk_buff *skb)
4008{
4009        struct slave *slave = NULL;
4010        struct list_head *iter;
4011
4012        if (!skb->queue_mapping)
4013                return 1;
4014
4015        /* Find out if any slaves have the same mapping as this skb. */
4016        bond_for_each_slave_rcu(bond, slave, iter) {
4017                if (slave->queue_id == skb->queue_mapping) {
4018                        if (bond_slave_is_up(slave) &&
4019                            slave->link == BOND_LINK_UP) {
4020                                bond_dev_queue_xmit(bond, skb, slave->dev);
4021                                return 0;
4022                        }
4023                        /* If the slave isn't UP, use default transmit policy. */
4024                        break;
4025                }
4026        }
4027
4028        return 1;
4029}
4030
4031
4032static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
4033                             void *accel_priv, select_queue_fallback_t fallback)
4034{
4035        /* This helper function exists to help dev_pick_tx get the correct
4036         * destination queue.  Using a helper function skips a call to
4037         * skb_tx_hash and will put the skbs in the queue we expect on their
4038         * way down to the bonding driver.
4039         */
4040        u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
4041
4042        /* Save the original txq to restore before passing to the driver */
4043        qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping;
4044
4045        if (unlikely(txq >= dev->real_num_tx_queues)) {
4046                do {
4047                        txq -= dev->real_num_tx_queues;
4048                } while (txq >= dev->real_num_tx_queues);
4049        }
4050        return txq;
4051}
4052
4053static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
4054{
4055        struct bonding *bond = netdev_priv(dev);
4056
4057        if (bond_should_override_tx_queue(bond) &&
4058            !bond_slave_override(bond, skb))
4059                return NETDEV_TX_OK;
4060
4061        switch (BOND_MODE(bond)) {
4062        case BOND_MODE_ROUNDROBIN:
4063                return bond_xmit_roundrobin(skb, dev);
4064        case BOND_MODE_ACTIVEBACKUP:
4065                return bond_xmit_activebackup(skb, dev);
4066        case BOND_MODE_8023AD:
4067        case BOND_MODE_XOR:
4068                return bond_3ad_xor_xmit(skb, dev);
4069        case BOND_MODE_BROADCAST:
4070                return bond_xmit_broadcast(skb, dev);
4071        case BOND_MODE_ALB:
4072                return bond_alb_xmit(skb, dev);
4073        case BOND_MODE_TLB:
4074                return bond_tlb_xmit(skb, dev);
4075        default:
4076                /* Should never happen, mode already checked */
4077                netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond));
4078                WARN_ON_ONCE(1);
4079                bond_tx_drop(dev, skb);
4080                return NETDEV_TX_OK;
4081        }
4082}
4083
4084static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
4085{
4086        struct bonding *bond = netdev_priv(dev);
4087        netdev_tx_t ret = NETDEV_TX_OK;
4088
4089        /* If we risk deadlock from transmitting this in the
4090         * netpoll path, tell netpoll to queue the frame for later tx
4091         */
4092        if (unlikely(is_netpoll_tx_blocked(dev)))
4093                return NETDEV_TX_BUSY;
4094
4095        rcu_read_lock();
4096        if (bond_has_slaves(bond))
4097                ret = __bond_start_xmit(skb, dev);
4098        else
4099                bond_tx_drop(dev, skb);
4100        rcu_read_unlock();
4101
4102        return ret;
4103}
4104
4105static int bond_ethtool_get_settings(struct net_device *bond_dev,
4106                                     struct ethtool_cmd *ecmd)
4107{
4108        struct bonding *bond = netdev_priv(bond_dev);
4109        unsigned long speed = 0;
4110        struct list_head *iter;
4111        struct slave *slave;
4112
4113        ecmd->duplex = DUPLEX_UNKNOWN;
4114        ecmd->port = PORT_OTHER;
4115
4116        /* Since bond_slave_can_tx returns false for all inactive or down slaves, we
4117         * do not need to check mode.  Though link speed might not represent
4118         * the true receive or transmit bandwidth (not all modes are symmetric)
4119         * this is an accurate maximum.
4120         */
4121        bond_for_each_slave(bond, slave, iter) {
4122                if (bond_slave_can_tx(slave)) {
4123                        if (slave->speed != SPEED_UNKNOWN)
4124                                speed += slave->speed;
4125                        if (ecmd->duplex == DUPLEX_UNKNOWN &&
4126                            slave->duplex != DUPLEX_UNKNOWN)
4127                                ecmd->duplex = slave->duplex;
4128                }
4129        }
4130        ethtool_cmd_speed_set(ecmd, speed ? : SPEED_UNKNOWN);
4131
4132        return 0;
4133}
4134
4135static void bond_ethtool_get_drvinfo(struct net_device *bond_dev,
4136                                     struct ethtool_drvinfo *drvinfo)
4137{
4138        strlcpy(drvinfo->driver, DRV_NAME, sizeof(drvinfo->driver));
4139        strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version));
4140        snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%d",
4141                 BOND_ABI_VERSION);
4142}
4143
4144static const struct ethtool_ops bond_ethtool_ops = {
4145        .get_drvinfo            = bond_ethtool_get_drvinfo,
4146        .get_settings           = bond_ethtool_get_settings,
4147        .get_link               = ethtool_op_get_link,
4148};
4149
4150static const struct net_device_ops bond_netdev_ops = {
4151        .ndo_size               = sizeof(struct net_device_ops),
4152        .ndo_init               = bond_init,
4153        .ndo_uninit             = bond_uninit,
4154        .ndo_open               = bond_open,
4155        .ndo_stop               = bond_close,
4156        .ndo_start_xmit         = bond_start_xmit,
4157        .ndo_select_queue       = bond_select_queue,
4158        .ndo_get_stats64        = bond_get_stats,
4159        .ndo_do_ioctl           = bond_do_ioctl,
4160        .ndo_change_rx_flags    = bond_change_rx_flags,
4161        .ndo_set_rx_mode        = bond_set_rx_mode,
4162        .ndo_change_mtu         = bond_change_mtu,
4163        .ndo_set_mac_address    = bond_set_mac_address,
4164        .ndo_neigh_setup        = bond_neigh_setup,
4165        .ndo_vlan_rx_add_vid    = bond_vlan_rx_add_vid,
4166        .ndo_vlan_rx_kill_vid   = bond_vlan_rx_kill_vid,
4167#ifdef CONFIG_NET_POLL_CONTROLLER
4168        .ndo_netpoll_setup      = bond_netpoll_setup,
4169        .ndo_netpoll_cleanup    = bond_netpoll_cleanup,
4170        .ndo_poll_controller    = bond_poll_controller,
4171#endif
4172        .ndo_add_slave          = bond_enslave,
4173        .ndo_del_slave          = bond_release,
4174        .ndo_fix_features       = bond_fix_features,
4175        .extended.ndo_neigh_construct   = netdev_default_l2upper_neigh_construct,
4176        .extended.ndo_neigh_destroy     = netdev_default_l2upper_neigh_destroy,
4177        .ndo_bridge_setlink     = switchdev_port_bridge_setlink,
4178        .ndo_bridge_getlink     = switchdev_port_bridge_getlink,
4179        .ndo_bridge_dellink     = switchdev_port_bridge_dellink,
4180        .ndo_fdb_add            = switchdev_port_fdb_add,
4181        .ndo_fdb_del            = switchdev_port_fdb_del,
4182        .extended.ndo_fdb_dump  = switchdev_port_fdb_dump,
4183        .ndo_features_check     = passthru_features_check,
4184};
4185
4186static const struct device_type bond_type = {
4187        .name = "bond",
4188};
4189
4190static void bond_destructor(struct net_device *bond_dev)
4191{
4192        struct bonding *bond = netdev_priv(bond_dev);
4193        if (bond->wq)
4194                destroy_workqueue(bond->wq);
4195        free_netdev(bond_dev);
4196}
4197
4198void bond_setup(struct net_device *bond_dev)
4199{
4200        struct bonding *bond = netdev_priv(bond_dev);
4201
4202        spin_lock_init(&bond->mode_lock);
4203        spin_lock_init(&bond->stats_lock);
4204        bond->params = bonding_defaults;
4205
4206        /* Initialize pointers */
4207        bond->dev = bond_dev;
4208
4209        /* Initialize the device entry points */
4210        ether_setup(bond_dev);
4211        bond_dev->netdev_ops = &bond_netdev_ops;
4212        bond_dev->ethtool_ops = &bond_ethtool_ops;
4213
4214        bond_dev->destructor = bond_destructor;
4215
4216        SET_NETDEV_DEVTYPE(bond_dev, &bond_type);
4217
4218        /* Initialize the device options */
4219        bond_dev->flags |= IFF_MASTER;
4220        bond_dev->priv_flags |= IFF_BONDING | IFF_UNICAST_FLT | IFF_NO_QUEUE;
4221        bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
4222
4223        /* don't acquire bond device's netif_tx_lock when transmitting */
4224        bond_dev->features |= NETIF_F_LLTX;
4225
4226        /* By default, we declare the bond to be fully
4227         * VLAN hardware accelerated capable. Special
4228         * care is taken in the various xmit functions
4229         * when there are slaves that are not hw accel
4230         * capable
4231         */
4232
4233        /* Don't allow bond devices to change network namespaces. */
4234        bond_dev->features |= NETIF_F_NETNS_LOCAL;
4235
4236        bond_dev->hw_features = BOND_VLAN_FEATURES |
4237                                NETIF_F_HW_VLAN_CTAG_TX |
4238                                NETIF_F_HW_VLAN_CTAG_RX |
4239                                NETIF_F_HW_VLAN_CTAG_FILTER;
4240
4241        bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
4242        bond_dev->features |= bond_dev->hw_features;
4243}
4244
4245/* Destroy a bonding device.
4246 * Must be under rtnl_lock when this function is called.
4247 */
4248static void bond_uninit(struct net_device *bond_dev)
4249{
4250        struct bonding *bond = netdev_priv(bond_dev);
4251        struct list_head *iter;
4252        struct slave *slave;
4253        struct bond_up_slave *arr;
4254
4255        bond_netpoll_cleanup(bond_dev);
4256
4257        /* Release the bonded slaves */
4258        bond_for_each_slave(bond, slave, iter)
4259                __bond_release_one(bond_dev, slave->dev, true);
4260        netdev_info(bond_dev, "Released all slaves\n");
4261
4262        arr = rtnl_dereference(bond->slave_arr);
4263        if (arr) {
4264                RCU_INIT_POINTER(bond->slave_arr, NULL);
4265                kfree_rcu(arr, rcu);
4266        }
4267
4268        list_del(&bond->bond_list);
4269
4270        bond_debug_unregister(bond);
4271}
4272
4273/*------------------------- Module initialization ---------------------------*/
4274
4275static int bond_check_params(struct bond_params *params)
4276{
4277        int arp_validate_value, fail_over_mac_value, primary_reselect_value, i;
4278        struct bond_opt_value newval;
4279        const struct bond_opt_value *valptr;
4280        int arp_all_targets_value = 0;
4281        u16 ad_actor_sys_prio = 0;
4282        u16 ad_user_port_key = 0;
4283        __be32 arp_target[BOND_MAX_ARP_TARGETS] = { 0 };
4284        int arp_ip_count;
4285        int bond_mode   = BOND_MODE_ROUNDROBIN;
4286        int xmit_hashtype = BOND_XMIT_POLICY_LAYER2;
4287        int lacp_fast = 0;
4288        int tlb_dynamic_lb = 0;
4289
4290        /* Convert string parameters. */
4291        if (mode) {
4292                bond_opt_initstr(&newval, mode);
4293                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_MODE), &newval);
4294                if (!valptr) {
4295                        pr_err("Error: Invalid bonding mode \"%s\"\n", mode);
4296                        return -EINVAL;
4297                }
4298                bond_mode = valptr->value;
4299        }
4300
4301        if (xmit_hash_policy) {
4302                if ((bond_mode != BOND_MODE_XOR) &&
4303                    (bond_mode != BOND_MODE_8023AD) &&
4304                    (bond_mode != BOND_MODE_TLB)) {
4305                        pr_info("xmit_hash_policy param is irrelevant in mode %s\n",
4306                                bond_mode_name(bond_mode));
4307                } else {
4308                        bond_opt_initstr(&newval, xmit_hash_policy);
4309                        valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH),
4310                                                &newval);
4311                        if (!valptr) {
4312                                pr_err("Error: Invalid xmit_hash_policy \"%s\"\n",
4313                                       xmit_hash_policy);
4314                                return -EINVAL;
4315                        }
4316                        xmit_hashtype = valptr->value;
4317                }
4318        }
4319
4320        if (lacp_rate) {
4321                if (bond_mode != BOND_MODE_8023AD) {
4322                        pr_info("lacp_rate param is irrelevant in mode %s\n",
4323                                bond_mode_name(bond_mode));
4324                } else {
4325                        bond_opt_initstr(&newval, lacp_rate);
4326                        valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE),
4327                                                &newval);
4328                        if (!valptr) {
4329                                pr_err("Error: Invalid lacp rate \"%s\"\n",
4330                                       lacp_rate);
4331                                return -EINVAL;
4332                        }
4333                        lacp_fast = valptr->value;
4334                }
4335        }
4336
4337        if (ad_select) {
4338                bond_opt_initstr(&newval, ad_select);
4339                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT),
4340                                        &newval);
4341                if (!valptr) {
4342                        pr_err("Error: Invalid ad_select \"%s\"\n", ad_select);
4343                        return -EINVAL;
4344                }
4345                params->ad_select = valptr->value;
4346                if (bond_mode != BOND_MODE_8023AD)
4347                        pr_warn("ad_select param only affects 802.3ad mode\n");
4348        } else {
4349                params->ad_select = BOND_AD_STABLE;
4350        }
4351
4352        if (max_bonds < 0) {
4353                pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
4354                        max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
4355                max_bonds = BOND_DEFAULT_MAX_BONDS;
4356        }
4357
4358        if (miimon < 0) {
4359                pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4360                        miimon, INT_MAX);
4361                miimon = 0;
4362        }
4363
4364        if (updelay < 0) {
4365                pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4366                        updelay, INT_MAX);
4367                updelay = 0;
4368        }
4369
4370        if (downdelay < 0) {
4371                pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4372                        downdelay, INT_MAX);
4373                downdelay = 0;
4374        }
4375
4376        if ((use_carrier != 0) && (use_carrier != 1)) {
4377                pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n",
4378                        use_carrier);
4379                use_carrier = 1;
4380        }
4381
4382        if (num_peer_notif < 0 || num_peer_notif > 255) {
4383                pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
4384                        num_peer_notif);
4385                num_peer_notif = 1;
4386        }
4387
4388        /* reset values for 802.3ad/TLB/ALB */
4389        if (!bond_mode_uses_arp(bond_mode)) {
4390                if (!miimon) {
4391                        pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n");
4392                        pr_warn("Forcing miimon to 100msec\n");
4393                        miimon = BOND_DEFAULT_MIIMON;
4394                }
4395        }
4396
4397        if (tx_queues < 1 || tx_queues > 255) {
4398                pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n",
4399                        tx_queues, BOND_DEFAULT_TX_QUEUES);
4400                tx_queues = BOND_DEFAULT_TX_QUEUES;
4401        }
4402
4403        if ((all_slaves_active != 0) && (all_slaves_active != 1)) {
4404                pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n",
4405                        all_slaves_active);
4406                all_slaves_active = 0;
4407        }
4408
4409        if (resend_igmp < 0 || resend_igmp > 255) {
4410                pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n",
4411                        resend_igmp, BOND_DEFAULT_RESEND_IGMP);
4412                resend_igmp = BOND_DEFAULT_RESEND_IGMP;
4413        }
4414
4415        bond_opt_initval(&newval, packets_per_slave);
4416        if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) {
4417                pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n",
4418                        packets_per_slave, USHRT_MAX);
4419                packets_per_slave = 1;
4420        }
4421
4422        if (bond_mode == BOND_MODE_ALB) {
4423                pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n",
4424                          updelay);
4425        }
4426
4427        if (!miimon) {
4428                if (updelay || downdelay) {
4429                        /* just warn the user the up/down delay will have
4430                         * no effect since miimon is zero...
4431                         */
4432                        pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n",
4433                                updelay, downdelay);
4434                }
4435        } else {
4436                /* don't allow arp monitoring */
4437                if (arp_interval) {
4438                        pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n",
4439                                miimon, arp_interval);
4440                        arp_interval = 0;
4441                }
4442
4443                if ((updelay % miimon) != 0) {
4444                        pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
4445                                updelay, miimon, (updelay / miimon) * miimon);
4446                }
4447
4448                updelay /= miimon;
4449
4450                if ((downdelay % miimon) != 0) {
4451                        pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n",
4452                                downdelay, miimon,
4453                                (downdelay / miimon) * miimon);
4454                }
4455
4456                downdelay /= miimon;
4457        }
4458
4459        if (arp_interval < 0) {
4460                pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n",
4461                        arp_interval, INT_MAX);
4462                arp_interval = 0;
4463        }
4464
4465        for (arp_ip_count = 0, i = 0;
4466             (arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {
4467                __be32 ip;
4468
4469                /* not a complete check, but good enough to catch mistakes */
4470                if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) ||
4471                    !bond_is_ip_target_ok(ip)) {
4472                        pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
4473                                arp_ip_target[i]);
4474                        arp_interval = 0;
4475                } else {
4476                        if (bond_get_targets_ip(arp_target, ip) == -1)
4477                                arp_target[arp_ip_count++] = ip;
4478                        else
4479                                pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n",
4480                                        &ip);
4481                }
4482        }
4483
4484        if (arp_interval && !arp_ip_count) {
4485                /* don't allow arping if no arp_ip_target given... */
4486                pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n",
4487                        arp_interval);
4488                arp_interval = 0;
4489        }
4490
4491        if (arp_validate) {
4492                if (!arp_interval) {
4493                        pr_err("arp_validate requires arp_interval\n");
4494                        return -EINVAL;
4495                }
4496
4497                bond_opt_initstr(&newval, arp_validate);
4498                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_VALIDATE),
4499                                        &newval);
4500                if (!valptr) {
4501                        pr_err("Error: invalid arp_validate \"%s\"\n",
4502                               arp_validate);
4503                        return -EINVAL;
4504                }
4505                arp_validate_value = valptr->value;
4506        } else {
4507                arp_validate_value = 0;
4508        }
4509
4510        if (arp_all_targets) {
4511                bond_opt_initstr(&newval, arp_all_targets);
4512                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_ARP_ALL_TARGETS),
4513                                        &newval);
4514                if (!valptr) {
4515                        pr_err("Error: invalid arp_all_targets_value \"%s\"\n",
4516                               arp_all_targets);
4517                        arp_all_targets_value = 0;
4518                } else {
4519                        arp_all_targets_value = valptr->value;
4520                }
4521        }
4522
4523        if (miimon) {
4524                pr_info("MII link monitoring set to %d ms\n", miimon);
4525        } else if (arp_interval) {
4526                valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE,
4527                                          arp_validate_value);
4528                pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):",
4529                        arp_interval, valptr->string, arp_ip_count);
4530
4531                for (i = 0; i < arp_ip_count; i++)
4532                        pr_cont(" %s", arp_ip_target[i]);
4533
4534                pr_cont("\n");
4535
4536        } else if (max_bonds) {
4537                /* miimon and arp_interval not set, we need one so things
4538                 * work as expected, see bonding.txt for details
4539                 */
4540                pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");
4541        }
4542
4543        if (primary && !bond_mode_uses_primary(bond_mode)) {
4544                /* currently, using a primary only makes sense
4545                 * in active backup, TLB or ALB modes
4546                 */
4547                pr_warn("Warning: %s primary device specified but has no effect in %s mode\n",
4548                        primary, bond_mode_name(bond_mode));
4549                primary = NULL;
4550        }
4551
4552        if (primary && primary_reselect) {
4553                bond_opt_initstr(&newval, primary_reselect);
4554                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_PRIMARY_RESELECT),
4555                                        &newval);
4556                if (!valptr) {
4557                        pr_err("Error: Invalid primary_reselect \"%s\"\n",
4558                               primary_reselect);
4559                        return -EINVAL;
4560                }
4561                primary_reselect_value = valptr->value;
4562        } else {
4563                primary_reselect_value = BOND_PRI_RESELECT_ALWAYS;
4564        }
4565
4566        if (fail_over_mac) {
4567                bond_opt_initstr(&newval, fail_over_mac);
4568                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_FAIL_OVER_MAC),
4569                                        &newval);
4570                if (!valptr) {
4571                        pr_err("Error: invalid fail_over_mac \"%s\"\n",
4572                               fail_over_mac);
4573                        return -EINVAL;
4574                }
4575                fail_over_mac_value = valptr->value;
4576                if (bond_mode != BOND_MODE_ACTIVEBACKUP)
4577                        pr_warn("Warning: fail_over_mac only affects active-backup mode\n");
4578        } else {
4579                fail_over_mac_value = BOND_FOM_NONE;
4580        }
4581
4582        bond_opt_initstr(&newval, "default");
4583        valptr = bond_opt_parse(
4584                        bond_opt_get(BOND_OPT_AD_ACTOR_SYS_PRIO),
4585                                     &newval);
4586        if (!valptr) {
4587                pr_err("Error: No ad_actor_sys_prio default value");
4588                return -EINVAL;
4589        }
4590        ad_actor_sys_prio = valptr->value;
4591
4592        valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY),
4593                                &newval);
4594        if (!valptr) {
4595                pr_err("Error: No ad_user_port_key default value");
4596                return -EINVAL;
4597        }
4598        ad_user_port_key = valptr->value;
4599
4600        if (bond_mode == BOND_MODE_TLB) {
4601                bond_opt_initstr(&newval, "default");
4602                valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB),
4603                                        &newval);
4604                if (!valptr) {
4605                        pr_err("Error: No tlb_dynamic_lb default value");
4606                        return -EINVAL;
4607                }
4608                tlb_dynamic_lb = valptr->value;
4609        }
4610
4611        if (lp_interval == 0) {
4612                pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
4613                        INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
4614                lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;
4615        }
4616
4617        /* fill params struct with the proper values */
4618        params->mode = bond_mode;
4619        params->xmit_policy = xmit_hashtype;
4620        params->miimon = miimon;
4621        params->num_peer_notif = num_peer_notif;
4622        params->arp_interval = arp_interval;
4623        params->arp_validate = arp_validate_value;
4624        params->arp_all_targets = arp_all_targets_value;
4625        params->updelay = updelay;
4626        params->downdelay = downdelay;
4627        params->use_carrier = use_carrier;
4628        params->lacp_fast = lacp_fast;
4629        params->primary[0] = 0;
4630        params->primary_reselect = primary_reselect_value;
4631        params->fail_over_mac = fail_over_mac_value;
4632        params->tx_queues = tx_queues;
4633        params->all_slaves_active = all_slaves_active;
4634        params->resend_igmp = resend_igmp;
4635        params->min_links = min_links;
4636        params->lp_interval = lp_interval;
4637        params->packets_per_slave = packets_per_slave;
4638        params->tlb_dynamic_lb = tlb_dynamic_lb;
4639        params->ad_actor_sys_prio = ad_actor_sys_prio;
4640        eth_zero_addr(params->ad_actor_system);
4641        params->ad_user_port_key = ad_user_port_key;
4642        if (packets_per_slave > 0) {
4643                params->reciprocal_packets_per_slave =
4644                        reciprocal_value(packets_per_slave);
4645        } else {
4646                /* reciprocal_packets_per_slave is unused if
4647                 * packets_per_slave is 0 or 1, just initialize it
4648                 */
4649                params->reciprocal_packets_per_slave =
4650                        (struct reciprocal_value) { 0 };
4651        }
4652
4653        if (primary) {
4654                strncpy(params->primary, primary, IFNAMSIZ);
4655                params->primary[IFNAMSIZ - 1] = 0;
4656        }
4657
4658        memcpy(params->arp_targets, arp_target, sizeof(arp_target));
4659
4660        return 0;
4661}
4662
4663/* Called from registration process */
4664static int bond_init(struct net_device *bond_dev)
4665{
4666        struct bonding *bond = netdev_priv(bond_dev);
4667        struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id);
4668
4669        netdev_dbg(bond_dev, "Begin bond_init\n");
4670
4671        bond->wq = create_singlethread_workqueue(bond_dev->name);
4672        if (!bond->wq)
4673                return -ENOMEM;
4674
4675        netdev_lockdep_set_classes(bond_dev);
4676
4677        list_add_tail(&bond->bond_list, &bn->dev_list);
4678
4679        bond_prepare_sysfs_group(bond);
4680
4681        bond_debug_register(bond);
4682
4683        /* Ensure valid dev_addr */
4684        if (is_zero_ether_addr(bond_dev->dev_addr) &&
4685            bond_dev->addr_assign_type == NET_ADDR_PERM)
4686                eth_hw_addr_random(bond_dev);
4687
4688        return 0;
4689}
4690
4691unsigned int bond_get_num_tx_queues(void)
4692{
4693        return tx_queues;
4694}
4695
4696/* Create a new bond based on the specified name and bonding parameters.
4697 * If name is NULL, obtain a suitable "bond%d" name for us.
4698 * Caller must NOT hold rtnl_lock; we need to release it here before we
4699 * set up our sysfs entries.
4700 */
4701int bond_create(struct net *net, const char *name)
4702{
4703        struct net_device *bond_dev;
4704        struct bonding *bond;
4705        struct alb_bond_info *bond_info;
4706        int res;
4707
4708        rtnl_lock();
4709
4710        bond_dev = alloc_netdev_mq(sizeof(struct bonding),
4711                                   name ? name : "bond%d",
4712                                   bond_setup, tx_queues);
4713        if (!bond_dev) {
4714                pr_err("%s: eek! can't alloc netdev!\n", name);
4715                rtnl_unlock();
4716                return -ENOMEM;
4717        }
4718
4719        /*
4720         * Initialize rx_hashtbl_used_head to RLB_NULL_INDEX.
4721         * It is set to 0 by default which is wrong.
4722         */
4723        bond = netdev_priv(bond_dev);
4724        bond_info = &(BOND_ALB_INFO(bond));
4725        bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
4726
4727        dev_net_set(bond_dev, net);
4728        bond_dev->rtnl_link_ops = &bond_link_ops;
4729
4730        res = register_netdevice(bond_dev);
4731
4732        netif_carrier_off(bond_dev);
4733
4734        rtnl_unlock();
4735        if (res < 0)
4736                bond_destructor(bond_dev);
4737        return res;
4738}
4739
4740static int __net_init bond_net_init(struct net *net)
4741{
4742        struct bond_net *bn = net_generic(net, bond_net_id);
4743
4744        bn->net = net;
4745        INIT_LIST_HEAD(&bn->dev_list);
4746
4747        bond_create_proc_dir(bn);
4748        bond_create_sysfs(bn);
4749
4750        return 0;
4751}
4752
4753static void __net_exit bond_net_exit(struct net *net)
4754{
4755        struct bond_net *bn = net_generic(net, bond_net_id);
4756        struct bonding *bond, *tmp_bond;
4757        LIST_HEAD(list);
4758
4759        bond_destroy_sysfs(bn);
4760
4761        /* Kill off any bonds created after unregistering bond rtnl ops */
4762        rtnl_lock();
4763        list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
4764                unregister_netdevice_queue(bond->dev, &list);
4765        unregister_netdevice_many(&list);
4766        rtnl_unlock();
4767
4768        bond_destroy_proc_dir(bn);
4769}
4770
4771static struct pernet_operations bond_net_ops = {
4772        .init = bond_net_init,
4773        .exit = bond_net_exit,
4774        .id   = &bond_net_id,
4775        .size = sizeof(struct bond_net),
4776};
4777
4778static int __init bonding_init(void)
4779{
4780        int i;
4781        int res;
4782
4783        pr_info("%s", bond_version);
4784
4785        res = bond_check_params(&bonding_defaults);
4786        if (res)
4787                goto out;
4788
4789        res = register_pernet_subsys(&bond_net_ops);
4790        if (res)
4791                goto out;
4792
4793        res = bond_netlink_init();
4794        if (res)
4795                goto err_link;
4796
4797        bond_create_debugfs();
4798
4799        for (i = 0; i < max_bonds; i++) {
4800                res = bond_create(&init_net, NULL);
4801                if (res)
4802                        goto err;
4803        }
4804
4805        register_netdevice_notifier_rh(&bond_netdev_notifier);
4806out:
4807        return res;
4808err:
4809        bond_destroy_debugfs();
4810        bond_netlink_fini();
4811err_link:
4812        unregister_pernet_subsys(&bond_net_ops);
4813        goto out;
4814
4815}
4816
4817static void __exit bonding_exit(void)
4818{
4819        unregister_netdevice_notifier_rh(&bond_netdev_notifier);
4820
4821        bond_destroy_debugfs();
4822
4823        bond_netlink_fini();
4824        unregister_pernet_subsys(&bond_net_ops);
4825
4826#ifdef CONFIG_NET_POLL_CONTROLLER
4827        /* Make sure we don't have an imbalance on our netpoll blocking */
4828        WARN_ON(atomic_read(&netpoll_block_tx));
4829#endif
4830}
4831
4832module_init(bonding_init);
4833module_exit(bonding_exit);
4834MODULE_LICENSE("GPL");
4835MODULE_VERSION(DRV_VERSION);
4836MODULE_DESCRIPTION(DRV_DESCRIPTION ", v" DRV_VERSION);
4837MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
4838