linux/drivers/net/bonding/bond_alb.c
<<
>>
Prefs
   1/*
   2 * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms of the GNU General Public License as published by the
   6 * Free Software Foundation; either version 2 of the License, or
   7 * (at your option) any later version.
   8 *
   9 * This program is distributed in the hope that it will be useful, but
  10 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  11 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 * for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License along
  15 * with this program; if not, write to the Free Software Foundation, Inc.,
  16 * 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17 *
  18 * The full GNU General Public License is included in this distribution in the
  19 * file called LICENSE.
  20 *
  21 */
  22
  23#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  24
  25#include <linux/skbuff.h>
  26#include <linux/netdevice.h>
  27#include <linux/etherdevice.h>
  28#include <linux/pkt_sched.h>
  29#include <linux/spinlock.h>
  30#include <linux/slab.h>
  31#include <linux/timer.h>
  32#include <linux/ip.h>
  33#include <linux/ipv6.h>
  34#include <linux/if_arp.h>
  35#include <linux/if_ether.h>
  36#include <linux/if_bonding.h>
  37#include <linux/if_vlan.h>
  38#include <linux/in.h>
  39#include <net/ipx.h>
  40#include <net/arp.h>
  41#include <net/ipv6.h>
  42#include <asm/byteorder.h>
  43#include "bonding.h"
  44#include "bond_alb.h"
  45
  46
  47
  48#ifndef __long_aligned
  49#define __long_aligned __attribute__((aligned((sizeof(long)))))
  50#endif
  51static const u8 mac_bcast[ETH_ALEN] __long_aligned = {
  52        0xff, 0xff, 0xff, 0xff, 0xff, 0xff
  53};
  54static const u8 mac_v6_allmcast[ETH_ALEN] __long_aligned = {
  55        0x33, 0x33, 0x00, 0x00, 0x00, 0x01
  56};
  57static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
  58
  59#pragma pack(1)
  60struct learning_pkt {
  61        u8 mac_dst[ETH_ALEN];
  62        u8 mac_src[ETH_ALEN];
  63        __be16 type;
  64        u8 padding[ETH_ZLEN - ETH_HLEN];
  65};
  66
  67struct arp_pkt {
  68        __be16  hw_addr_space;
  69        __be16  prot_addr_space;
  70        u8      hw_addr_len;
  71        u8      prot_addr_len;
  72        __be16  op_code;
  73        u8      mac_src[ETH_ALEN];      /* sender hardware address */
  74        __be32  ip_src;                 /* sender IP address */
  75        u8      mac_dst[ETH_ALEN];      /* target hardware address */
  76        __be32  ip_dst;                 /* target IP address */
  77};
  78#pragma pack()
  79
  80static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb)
  81{
  82        return (struct arp_pkt *)skb_network_header(skb);
  83}
  84
  85/* Forward declaration */
  86static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[]);
  87static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
  88static void rlb_src_unlink(struct bonding *bond, u32 index);
  89static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
  90                         u32 ip_dst_hash);
  91
  92static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
  93{
  94        int i;
  95        u8 hash = 0;
  96
  97        for (i = 0; i < hash_size; i++) {
  98                hash ^= hash_start[i];
  99        }
 100
 101        return hash;
 102}
 103
 104/*********************** tlb specific functions ***************************/
 105
 106static inline void _lock_tx_hashtbl_bh(struct bonding *bond)
 107{
 108        spin_lock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 109}
 110
 111static inline void _unlock_tx_hashtbl_bh(struct bonding *bond)
 112{
 113        spin_unlock_bh(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 114}
 115
 116static inline void _lock_tx_hashtbl(struct bonding *bond)
 117{
 118        spin_lock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 119}
 120
 121static inline void _unlock_tx_hashtbl(struct bonding *bond)
 122{
 123        spin_unlock(&(BOND_ALB_INFO(bond).tx_hashtbl_lock));
 124}
 125
 126/* Caller must hold tx_hashtbl lock */
 127static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
 128{
 129        if (save_load) {
 130                entry->load_history = 1 + entry->tx_bytes /
 131                                      BOND_TLB_REBALANCE_INTERVAL;
 132                entry->tx_bytes = 0;
 133        }
 134
 135        entry->tx_slave = NULL;
 136        entry->next = TLB_NULL_INDEX;
 137        entry->prev = TLB_NULL_INDEX;
 138}
 139
 140static inline void tlb_init_slave(struct slave *slave)
 141{
 142        SLAVE_TLB_INFO(slave).load = 0;
 143        SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
 144}
 145
 146/* Caller must hold bond lock for read, BH disabled */
 147static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
 148                         int save_load)
 149{
 150        struct tlb_client_info *tx_hash_table;
 151        u32 index;
 152
 153        /* clear slave from tx_hashtbl */
 154        tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
 155
 156        /* skip this if we've already freed the tx hash table */
 157        if (tx_hash_table) {
 158                index = SLAVE_TLB_INFO(slave).head;
 159                while (index != TLB_NULL_INDEX) {
 160                        u32 next_index = tx_hash_table[index].next;
 161                        tlb_init_table_entry(&tx_hash_table[index], save_load);
 162                        index = next_index;
 163                }
 164        }
 165
 166        tlb_init_slave(slave);
 167}
 168
 169/* Caller must hold bond lock for read */
 170static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
 171                         int save_load)
 172{
 173        _lock_tx_hashtbl_bh(bond);
 174        __tlb_clear_slave(bond, slave, save_load);
 175        _unlock_tx_hashtbl_bh(bond);
 176}
 177
 178/* Must be called before starting the monitor timer */
 179static int tlb_initialize(struct bonding *bond)
 180{
 181        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 182        int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
 183        struct tlb_client_info *new_hashtbl;
 184        int i;
 185
 186        new_hashtbl = kzalloc(size, GFP_KERNEL);
 187        if (!new_hashtbl)
 188                return -1;
 189
 190        _lock_tx_hashtbl_bh(bond);
 191
 192        bond_info->tx_hashtbl = new_hashtbl;
 193
 194        for (i = 0; i < TLB_HASH_TABLE_SIZE; i++) {
 195                tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
 196        }
 197
 198        _unlock_tx_hashtbl_bh(bond);
 199
 200        return 0;
 201}
 202
 203/* Must be called only after all slaves have been released */
 204static void tlb_deinitialize(struct bonding *bond)
 205{
 206        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 207
 208        _lock_tx_hashtbl_bh(bond);
 209
 210        kfree(bond_info->tx_hashtbl);
 211        bond_info->tx_hashtbl = NULL;
 212
 213        _unlock_tx_hashtbl_bh(bond);
 214}
 215
 216static long long compute_gap(struct slave *slave)
 217{
 218        return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */
 219               (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
 220}
 221
 222/* Caller must hold bond lock for read */
 223static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
 224{
 225        struct slave *slave, *least_loaded;
 226        long long max_gap;
 227        int i;
 228
 229        least_loaded = NULL;
 230        max_gap = LLONG_MIN;
 231
 232        /* Find the slave with the largest gap */
 233        bond_for_each_slave(bond, slave, i) {
 234                if (SLAVE_IS_OK(slave)) {
 235                        long long gap = compute_gap(slave);
 236
 237                        if (max_gap < gap) {
 238                                least_loaded = slave;
 239                                max_gap = gap;
 240                        }
 241                }
 242        }
 243
 244        return least_loaded;
 245}
 246
 247static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
 248                                                u32 skb_len)
 249{
 250        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 251        struct tlb_client_info *hash_table;
 252        struct slave *assigned_slave;
 253
 254        hash_table = bond_info->tx_hashtbl;
 255        assigned_slave = hash_table[hash_index].tx_slave;
 256        if (!assigned_slave) {
 257                assigned_slave = tlb_get_least_loaded_slave(bond);
 258
 259                if (assigned_slave) {
 260                        struct tlb_slave_info *slave_info =
 261                                &(SLAVE_TLB_INFO(assigned_slave));
 262                        u32 next_index = slave_info->head;
 263
 264                        hash_table[hash_index].tx_slave = assigned_slave;
 265                        hash_table[hash_index].next = next_index;
 266                        hash_table[hash_index].prev = TLB_NULL_INDEX;
 267
 268                        if (next_index != TLB_NULL_INDEX) {
 269                                hash_table[next_index].prev = hash_index;
 270                        }
 271
 272                        slave_info->head = hash_index;
 273                        slave_info->load +=
 274                                hash_table[hash_index].load_history;
 275                }
 276        }
 277
 278        if (assigned_slave) {
 279                hash_table[hash_index].tx_bytes += skb_len;
 280        }
 281
 282        return assigned_slave;
 283}
 284
 285/* Caller must hold bond lock for read */
 286static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
 287                                        u32 skb_len)
 288{
 289        struct slave *tx_slave;
 290        /*
 291         * We don't need to disable softirq here, becase
 292         * tlb_choose_channel() is only called by bond_alb_xmit()
 293         * which already has softirq disabled.
 294         */
 295        _lock_tx_hashtbl(bond);
 296        tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
 297        _unlock_tx_hashtbl(bond);
 298        return tx_slave;
 299}
 300
 301/*********************** rlb specific functions ***************************/
 302static inline void _lock_rx_hashtbl_bh(struct bonding *bond)
 303{
 304        spin_lock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 305}
 306
 307static inline void _unlock_rx_hashtbl_bh(struct bonding *bond)
 308{
 309        spin_unlock_bh(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 310}
 311
 312static inline void _lock_rx_hashtbl(struct bonding *bond)
 313{
 314        spin_lock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 315}
 316
 317static inline void _unlock_rx_hashtbl(struct bonding *bond)
 318{
 319        spin_unlock(&(BOND_ALB_INFO(bond).rx_hashtbl_lock));
 320}
 321
 322/* when an ARP REPLY is received from a client update its info
 323 * in the rx_hashtbl
 324 */
 325static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
 326{
 327        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 328        struct rlb_client_info *client_info;
 329        u32 hash_index;
 330
 331        _lock_rx_hashtbl_bh(bond);
 332
 333        hash_index = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
 334        client_info = &(bond_info->rx_hashtbl[hash_index]);
 335
 336        if ((client_info->assigned) &&
 337            (client_info->ip_src == arp->ip_dst) &&
 338            (client_info->ip_dst == arp->ip_src) &&
 339            (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {
 340                /* update the clients MAC address */
 341                memcpy(client_info->mac_dst, arp->mac_src, ETH_ALEN);
 342                client_info->ntt = 1;
 343                bond_info->rx_ntt = 1;
 344        }
 345
 346        _unlock_rx_hashtbl_bh(bond);
 347}
 348
 349static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
 350                        struct slave *slave)
 351{
 352        struct arp_pkt *arp, _arp;
 353
 354        if (skb->protocol != cpu_to_be16(ETH_P_ARP))
 355                goto out;
 356
 357        arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp);
 358        if (!arp)
 359                goto out;
 360
 361        /* We received an ARP from arp->ip_src.
 362         * We might have used this IP address previously (on the bonding host
 363         * itself or on a system that is bridged together with the bond).
 364         * However, if arp->mac_src is different than what is stored in
 365         * rx_hashtbl, some other host is now using the IP and we must prevent
 366         * sending out client updates with this IP address and the old MAC
 367         * address.
 368         * Clean up all hash table entries that have this address as ip_src but
 369         * have a different mac_src.
 370         */
 371        rlb_purge_src_ip(bond, arp);
 372
 373        if (arp->op_code == htons(ARPOP_REPLY)) {
 374                /* update rx hash table for this ARP */
 375                rlb_update_entry_from_arp(bond, arp);
 376                pr_debug("Server received an ARP Reply from client\n");
 377        }
 378out:
 379        return RX_HANDLER_ANOTHER;
 380}
 381
 382/* Caller must hold bond lock for read */
 383static struct slave *rlb_next_rx_slave(struct bonding *bond)
 384{
 385        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 386        struct slave *rx_slave, *slave, *start_at;
 387        int i = 0;
 388
 389        if (bond_info->next_rx_slave) {
 390                start_at = bond_info->next_rx_slave;
 391        } else {
 392                start_at = bond->first_slave;
 393        }
 394
 395        rx_slave = NULL;
 396
 397        bond_for_each_slave_from(bond, slave, i, start_at) {
 398                if (SLAVE_IS_OK(slave)) {
 399                        if (!rx_slave) {
 400                                rx_slave = slave;
 401                        } else if (slave->speed > rx_slave->speed) {
 402                                rx_slave = slave;
 403                        }
 404                }
 405        }
 406
 407        if (rx_slave) {
 408                bond_info->next_rx_slave = rx_slave->next;
 409        }
 410
 411        return rx_slave;
 412}
 413
 414/* teach the switch the mac of a disabled slave
 415 * on the primary for fault tolerance
 416 *
 417 * Caller must hold bond->curr_slave_lock for write or bond lock for write
 418 */
 419static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
 420{
 421        if (!bond->curr_active_slave) {
 422                return;
 423        }
 424
 425        if (!bond->alb_info.primary_is_promisc) {
 426                if (!dev_set_promiscuity(bond->curr_active_slave->dev, 1))
 427                        bond->alb_info.primary_is_promisc = 1;
 428                else
 429                        bond->alb_info.primary_is_promisc = 0;
 430        }
 431
 432        bond->alb_info.rlb_promisc_timeout_counter = 0;
 433
 434        alb_send_learning_packets(bond->curr_active_slave, addr);
 435}
 436
 437/* slave being removed should not be active at this point
 438 *
 439 * Caller must hold bond lock for read
 440 */
 441static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
 442{
 443        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 444        struct rlb_client_info *rx_hash_table;
 445        u32 index, next_index;
 446
 447        /* clear slave from rx_hashtbl */
 448        _lock_rx_hashtbl_bh(bond);
 449
 450        rx_hash_table = bond_info->rx_hashtbl;
 451        index = bond_info->rx_hashtbl_used_head;
 452        for (; index != RLB_NULL_INDEX; index = next_index) {
 453                next_index = rx_hash_table[index].used_next;
 454                if (rx_hash_table[index].slave == slave) {
 455                        struct slave *assigned_slave = rlb_next_rx_slave(bond);
 456
 457                        if (assigned_slave) {
 458                                rx_hash_table[index].slave = assigned_slave;
 459                                if (!ether_addr_equal_64bits(rx_hash_table[index].mac_dst,
 460                                                             mac_bcast)) {
 461                                        bond_info->rx_hashtbl[index].ntt = 1;
 462                                        bond_info->rx_ntt = 1;
 463                                        /* A slave has been removed from the
 464                                         * table because it is either disabled
 465                                         * or being released. We must retry the
 466                                         * update to avoid clients from not
 467                                         * being updated & disconnecting when
 468                                         * there is stress
 469                                         */
 470                                        bond_info->rlb_update_retry_counter =
 471                                                RLB_UPDATE_RETRY;
 472                                }
 473                        } else {  /* there is no active slave */
 474                                rx_hash_table[index].slave = NULL;
 475                        }
 476                }
 477        }
 478
 479        _unlock_rx_hashtbl_bh(bond);
 480
 481        write_lock_bh(&bond->curr_slave_lock);
 482
 483        if (slave != bond->curr_active_slave) {
 484                rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
 485        }
 486
 487        write_unlock_bh(&bond->curr_slave_lock);
 488}
 489
 490static void rlb_update_client(struct rlb_client_info *client_info)
 491{
 492        int i;
 493
 494        if (!client_info->slave) {
 495                return;
 496        }
 497
 498        for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
 499                struct sk_buff *skb;
 500
 501                skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
 502                                 client_info->ip_dst,
 503                                 client_info->slave->dev,
 504                                 client_info->ip_src,
 505                                 client_info->mac_dst,
 506                                 client_info->slave->dev->dev_addr,
 507                                 client_info->mac_dst);
 508                if (!skb) {
 509                        pr_err("%s: Error: failed to create an ARP packet\n",
 510                               client_info->slave->bond->dev->name);
 511                        continue;
 512                }
 513
 514                skb->dev = client_info->slave->dev;
 515
 516                if (client_info->tag) {
 517                        skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id);
 518                        if (!skb) {
 519                                pr_err("%s: Error: failed to insert VLAN tag\n",
 520                                       client_info->slave->bond->dev->name);
 521                                continue;
 522                        }
 523                }
 524
 525                arp_xmit(skb);
 526        }
 527}
 528
 529/* sends ARP REPLIES that update the clients that need updating */
 530static void rlb_update_rx_clients(struct bonding *bond)
 531{
 532        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 533        struct rlb_client_info *client_info;
 534        u32 hash_index;
 535
 536        _lock_rx_hashtbl_bh(bond);
 537
 538        hash_index = bond_info->rx_hashtbl_used_head;
 539        for (; hash_index != RLB_NULL_INDEX;
 540             hash_index = client_info->used_next) {
 541                client_info = &(bond_info->rx_hashtbl[hash_index]);
 542                if (client_info->ntt) {
 543                        rlb_update_client(client_info);
 544                        if (bond_info->rlb_update_retry_counter == 0) {
 545                                client_info->ntt = 0;
 546                        }
 547                }
 548        }
 549
 550        /* do not update the entries again until this counter is zero so that
 551         * not to confuse the clients.
 552         */
 553        bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
 554
 555        _unlock_rx_hashtbl_bh(bond);
 556}
 557
 558/* The slave was assigned a new mac address - update the clients */
 559static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
 560{
 561        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 562        struct rlb_client_info *client_info;
 563        int ntt = 0;
 564        u32 hash_index;
 565
 566        _lock_rx_hashtbl_bh(bond);
 567
 568        hash_index = bond_info->rx_hashtbl_used_head;
 569        for (; hash_index != RLB_NULL_INDEX;
 570             hash_index = client_info->used_next) {
 571                client_info = &(bond_info->rx_hashtbl[hash_index]);
 572
 573                if ((client_info->slave == slave) &&
 574                    !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
 575                        client_info->ntt = 1;
 576                        ntt = 1;
 577                }
 578        }
 579
 580        // update the team's flag only after the whole iteration
 581        if (ntt) {
 582                bond_info->rx_ntt = 1;
 583                //fasten the change
 584                bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
 585        }
 586
 587        _unlock_rx_hashtbl_bh(bond);
 588}
 589
 590/* mark all clients using src_ip to be updated */
 591static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
 592{
 593        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 594        struct rlb_client_info *client_info;
 595        u32 hash_index;
 596
 597        _lock_rx_hashtbl(bond);
 598
 599        hash_index = bond_info->rx_hashtbl_used_head;
 600        for (; hash_index != RLB_NULL_INDEX;
 601             hash_index = client_info->used_next) {
 602                client_info = &(bond_info->rx_hashtbl[hash_index]);
 603
 604                if (!client_info->slave) {
 605                        pr_err("%s: Error: found a client with no channel in the client's hash table\n",
 606                               bond->dev->name);
 607                        continue;
 608                }
 609                /*update all clients using this src_ip, that are not assigned
 610                 * to the team's address (curr_active_slave) and have a known
 611                 * unicast mac address.
 612                 */
 613                if ((client_info->ip_src == src_ip) &&
 614                    !ether_addr_equal_64bits(client_info->slave->dev->dev_addr,
 615                                             bond->dev->dev_addr) &&
 616                    !ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
 617                        client_info->ntt = 1;
 618                        bond_info->rx_ntt = 1;
 619                }
 620        }
 621
 622        _unlock_rx_hashtbl(bond);
 623}
 624
 625/* Caller must hold both bond and ptr locks for read */
 626static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond)
 627{
 628        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 629        struct arp_pkt *arp = arp_pkt(skb);
 630        struct slave *assigned_slave;
 631        struct rlb_client_info *client_info;
 632        u32 hash_index = 0;
 633
 634        _lock_rx_hashtbl(bond);
 635
 636        hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
 637        client_info = &(bond_info->rx_hashtbl[hash_index]);
 638
 639        if (client_info->assigned) {
 640                if ((client_info->ip_src == arp->ip_src) &&
 641                    (client_info->ip_dst == arp->ip_dst)) {
 642                        /* the entry is already assigned to this client */
 643                        if (!ether_addr_equal_64bits(arp->mac_dst, mac_bcast)) {
 644                                /* update mac address from arp */
 645                                memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
 646                        }
 647                        memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
 648
 649                        assigned_slave = client_info->slave;
 650                        if (assigned_slave) {
 651                                _unlock_rx_hashtbl(bond);
 652                                return assigned_slave;
 653                        }
 654                } else {
 655                        /* the entry is already assigned to some other client,
 656                         * move the old client to primary (curr_active_slave) so
 657                         * that the new client can be assigned to this entry.
 658                         */
 659                        if (bond->curr_active_slave &&
 660                            client_info->slave != bond->curr_active_slave) {
 661                                client_info->slave = bond->curr_active_slave;
 662                                rlb_update_client(client_info);
 663                        }
 664                }
 665        }
 666        /* assign a new slave */
 667        assigned_slave = rlb_next_rx_slave(bond);
 668
 669        if (assigned_slave) {
 670                if (!(client_info->assigned &&
 671                      client_info->ip_src == arp->ip_src)) {
 672                        /* ip_src is going to be updated,
 673                         * fix the src hash list
 674                         */
 675                        u32 hash_src = _simple_hash((u8 *)&arp->ip_src,
 676                                                    sizeof(arp->ip_src));
 677                        rlb_src_unlink(bond, hash_index);
 678                        rlb_src_link(bond, hash_src, hash_index);
 679                }
 680
 681                client_info->ip_src = arp->ip_src;
 682                client_info->ip_dst = arp->ip_dst;
 683                /* arp->mac_dst is broadcast for arp reqeusts.
 684                 * will be updated with clients actual unicast mac address
 685                 * upon receiving an arp reply.
 686                 */
 687                memcpy(client_info->mac_dst, arp->mac_dst, ETH_ALEN);
 688                memcpy(client_info->mac_src, arp->mac_src, ETH_ALEN);
 689                client_info->slave = assigned_slave;
 690
 691                if (!ether_addr_equal_64bits(client_info->mac_dst, mac_bcast)) {
 692                        client_info->ntt = 1;
 693                        bond->alb_info.rx_ntt = 1;
 694                } else {
 695                        client_info->ntt = 0;
 696                }
 697
 698                if (bond_vlan_used(bond)) {
 699                        if (!vlan_get_tag(skb, &client_info->vlan_id))
 700                                client_info->tag = 1;
 701                }
 702
 703                if (!client_info->assigned) {
 704                        u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
 705                        bond_info->rx_hashtbl_used_head = hash_index;
 706                        client_info->used_next = prev_tbl_head;
 707                        if (prev_tbl_head != RLB_NULL_INDEX) {
 708                                bond_info->rx_hashtbl[prev_tbl_head].used_prev =
 709                                        hash_index;
 710                        }
 711                        client_info->assigned = 1;
 712                }
 713        }
 714
 715        _unlock_rx_hashtbl(bond);
 716
 717        return assigned_slave;
 718}
 719
 720/* chooses (and returns) transmit channel for arp reply
 721 * does not choose channel for other arp types since they are
 722 * sent on the curr_active_slave
 723 */
 724static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
 725{
 726        struct arp_pkt *arp = arp_pkt(skb);
 727        struct slave *tx_slave = NULL;
 728
 729        /* Don't modify or load balance ARPs that do not originate locally
 730         * (e.g.,arrive via a bridge).
 731         */
 732        if (!bond_slave_has_mac(bond, arp->mac_src))
 733                return NULL;
 734
 735        if (arp->op_code == htons(ARPOP_REPLY)) {
 736                /* the arp must be sent on the selected
 737                * rx channel
 738                */
 739                tx_slave = rlb_choose_channel(skb, bond);
 740                if (tx_slave) {
 741                        memcpy(arp->mac_src,tx_slave->dev->dev_addr, ETH_ALEN);
 742                }
 743                pr_debug("Server sent ARP Reply packet\n");
 744        } else if (arp->op_code == htons(ARPOP_REQUEST)) {
 745                /* Create an entry in the rx_hashtbl for this client as a
 746                 * place holder.
 747                 * When the arp reply is received the entry will be updated
 748                 * with the correct unicast address of the client.
 749                 */
 750                rlb_choose_channel(skb, bond);
 751
 752                /* The ARP reply packets must be delayed so that
 753                 * they can cancel out the influence of the ARP request.
 754                 */
 755                bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;
 756
 757                /* arp requests are broadcast and are sent on the primary
 758                 * the arp request will collapse all clients on the subnet to
 759                 * the primary slave. We must register these clients to be
 760                 * updated with their assigned mac.
 761                 */
 762                rlb_req_update_subnet_clients(bond, arp->ip_src);
 763                pr_debug("Server sent ARP Request packet\n");
 764        }
 765
 766        return tx_slave;
 767}
 768
 769/* Caller must hold bond lock for read */
 770static void rlb_rebalance(struct bonding *bond)
 771{
 772        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 773        struct slave *assigned_slave;
 774        struct rlb_client_info *client_info;
 775        int ntt;
 776        u32 hash_index;
 777
 778        _lock_rx_hashtbl_bh(bond);
 779
 780        ntt = 0;
 781        hash_index = bond_info->rx_hashtbl_used_head;
 782        for (; hash_index != RLB_NULL_INDEX;
 783             hash_index = client_info->used_next) {
 784                client_info = &(bond_info->rx_hashtbl[hash_index]);
 785                assigned_slave = rlb_next_rx_slave(bond);
 786                if (assigned_slave && (client_info->slave != assigned_slave)) {
 787                        client_info->slave = assigned_slave;
 788                        client_info->ntt = 1;
 789                        ntt = 1;
 790                }
 791        }
 792
 793        /* update the team's flag only after the whole iteration */
 794        if (ntt) {
 795                bond_info->rx_ntt = 1;
 796        }
 797        _unlock_rx_hashtbl_bh(bond);
 798}
 799
 800/* Caller must hold rx_hashtbl lock */
 801static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
 802{
 803        entry->used_next = RLB_NULL_INDEX;
 804        entry->used_prev = RLB_NULL_INDEX;
 805        entry->assigned = 0;
 806        entry->slave = NULL;
 807        entry->tag = 0;
 808}
 809static void rlb_init_table_entry_src(struct rlb_client_info *entry)
 810{
 811        entry->src_first = RLB_NULL_INDEX;
 812        entry->src_prev = RLB_NULL_INDEX;
 813        entry->src_next = RLB_NULL_INDEX;
 814}
 815
 816static void rlb_init_table_entry(struct rlb_client_info *entry)
 817{
 818        memset(entry, 0, sizeof(struct rlb_client_info));
 819        rlb_init_table_entry_dst(entry);
 820        rlb_init_table_entry_src(entry);
 821}
 822
 823static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index)
 824{
 825        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 826        u32 next_index = bond_info->rx_hashtbl[index].used_next;
 827        u32 prev_index = bond_info->rx_hashtbl[index].used_prev;
 828
 829        if (index == bond_info->rx_hashtbl_used_head)
 830                bond_info->rx_hashtbl_used_head = next_index;
 831        if (prev_index != RLB_NULL_INDEX)
 832                bond_info->rx_hashtbl[prev_index].used_next = next_index;
 833        if (next_index != RLB_NULL_INDEX)
 834                bond_info->rx_hashtbl[next_index].used_prev = prev_index;
 835}
 836
 837/* unlink a rlb hash table entry from the src list */
 838static void rlb_src_unlink(struct bonding *bond, u32 index)
 839{
 840        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 841        u32 next_index = bond_info->rx_hashtbl[index].src_next;
 842        u32 prev_index = bond_info->rx_hashtbl[index].src_prev;
 843
 844        bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX;
 845        bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX;
 846
 847        if (next_index != RLB_NULL_INDEX)
 848                bond_info->rx_hashtbl[next_index].src_prev = prev_index;
 849
 850        if (prev_index == RLB_NULL_INDEX)
 851                return;
 852
 853        /* is prev_index pointing to the head of this list? */
 854        if (bond_info->rx_hashtbl[prev_index].src_first == index)
 855                bond_info->rx_hashtbl[prev_index].src_first = next_index;
 856        else
 857                bond_info->rx_hashtbl[prev_index].src_next = next_index;
 858
 859}
 860
 861static void rlb_delete_table_entry(struct bonding *bond, u32 index)
 862{
 863        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 864        struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
 865
 866        rlb_delete_table_entry_dst(bond, index);
 867        rlb_init_table_entry_dst(entry);
 868
 869        rlb_src_unlink(bond, index);
 870}
 871
 872/* add the rx_hashtbl[ip_dst_hash] entry to the list
 873 * of entries with identical ip_src_hash
 874 */
 875static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
 876{
 877        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 878        u32 next;
 879
 880        bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
 881        next = bond_info->rx_hashtbl[ip_src_hash].src_first;
 882        bond_info->rx_hashtbl[ip_dst_hash].src_next = next;
 883        if (next != RLB_NULL_INDEX)
 884                bond_info->rx_hashtbl[next].src_prev = ip_dst_hash;
 885        bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
 886}
 887
 888/* deletes all rx_hashtbl entries with  arp->ip_src if their mac_src does
 889 * not match arp->mac_src */
 890static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
 891{
 892        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 893        u32 ip_src_hash = _simple_hash((u8*)&(arp->ip_src), sizeof(arp->ip_src));
 894        u32 index;
 895
 896        _lock_rx_hashtbl_bh(bond);
 897
 898        index = bond_info->rx_hashtbl[ip_src_hash].src_first;
 899        while (index != RLB_NULL_INDEX) {
 900                struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
 901                u32 next_index = entry->src_next;
 902                if (entry->ip_src == arp->ip_src &&
 903                    !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
 904                                rlb_delete_table_entry(bond, index);
 905                index = next_index;
 906        }
 907        _unlock_rx_hashtbl_bh(bond);
 908}
 909
 910static int rlb_initialize(struct bonding *bond)
 911{
 912        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 913        struct rlb_client_info  *new_hashtbl;
 914        int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
 915        int i;
 916
 917        new_hashtbl = kmalloc(size, GFP_KERNEL);
 918        if (!new_hashtbl)
 919                return -1;
 920
 921        _lock_rx_hashtbl_bh(bond);
 922
 923        bond_info->rx_hashtbl = new_hashtbl;
 924
 925        bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
 926
 927        for (i = 0; i < RLB_HASH_TABLE_SIZE; i++) {
 928                rlb_init_table_entry(bond_info->rx_hashtbl + i);
 929        }
 930
 931        _unlock_rx_hashtbl_bh(bond);
 932
 933        /* register to receive ARPs */
 934        bond->recv_probe = rlb_arp_recv;
 935
 936        return 0;
 937}
 938
 939static void rlb_deinitialize(struct bonding *bond)
 940{
 941        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 942
 943        _lock_rx_hashtbl_bh(bond);
 944
 945        kfree(bond_info->rx_hashtbl);
 946        bond_info->rx_hashtbl = NULL;
 947        bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
 948
 949        _unlock_rx_hashtbl_bh(bond);
 950}
 951
 952static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
 953{
 954        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
 955        u32 curr_index;
 956
 957        _lock_rx_hashtbl_bh(bond);
 958
 959        curr_index = bond_info->rx_hashtbl_used_head;
 960        while (curr_index != RLB_NULL_INDEX) {
 961                struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
 962                u32 next_index = bond_info->rx_hashtbl[curr_index].used_next;
 963
 964                if (curr->tag && (curr->vlan_id == vlan_id))
 965                        rlb_delete_table_entry(bond, curr_index);
 966
 967                curr_index = next_index;
 968        }
 969
 970        _unlock_rx_hashtbl_bh(bond);
 971}
 972
 973/*********************** tlb/rlb shared functions *********************/
 974
 975static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[])
 976{
 977        struct bonding *bond = bond_get_bond_by_slave(slave);
 978        struct learning_pkt pkt;
 979        int size = sizeof(struct learning_pkt);
 980        int i;
 981
 982        memset(&pkt, 0, size);
 983        memcpy(pkt.mac_dst, mac_addr, ETH_ALEN);
 984        memcpy(pkt.mac_src, mac_addr, ETH_ALEN);
 985        pkt.type = cpu_to_be16(ETH_P_LOOP);
 986
 987        for (i = 0; i < MAX_LP_BURST; i++) {
 988                struct sk_buff *skb;
 989                char *data;
 990
 991                skb = dev_alloc_skb(size);
 992                if (!skb) {
 993                        return;
 994                }
 995
 996                data = skb_put(skb, size);
 997                memcpy(data, &pkt, size);
 998
 999                skb_reset_mac_header(skb);
1000                skb->network_header = skb->mac_header + ETH_HLEN;
1001                skb->protocol = pkt.type;
1002                skb->priority = TC_PRIO_CONTROL;
1003                skb->dev = slave->dev;
1004
1005                if (bond_vlan_used(bond)) {
1006                        struct vlan_entry *vlan;
1007
1008                        vlan = bond_next_vlan(bond,
1009                                              bond->alb_info.current_alb_vlan);
1010
1011                        bond->alb_info.current_alb_vlan = vlan;
1012                        if (!vlan) {
1013                                kfree_skb(skb);
1014                                continue;
1015                        }
1016
1017                        skb = vlan_put_tag(skb, htons(ETH_P_8021Q), vlan->vlan_id);
1018                        if (!skb) {
1019                                pr_err("%s: Error: failed to insert VLAN tag\n",
1020                                       bond->dev->name);
1021                                continue;
1022                        }
1023                }
1024
1025                dev_queue_xmit(skb);
1026        }
1027}
1028
1029static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[])
1030{
1031        struct net_device *dev = slave->dev;
1032        struct sockaddr s_addr;
1033
1034        if (slave->bond->params.mode == BOND_MODE_TLB) {
1035                memcpy(dev->dev_addr, addr, dev->addr_len);
1036                return 0;
1037        }
1038
1039        /* for rlb each slave must have a unique hw mac addresses so that */
1040        /* each slave will receive packets destined to a different mac */
1041        memcpy(s_addr.sa_data, addr, dev->addr_len);
1042        s_addr.sa_family = dev->type;
1043        if (dev_set_mac_address(dev, &s_addr)) {
1044                pr_err("%s: Error: dev_set_mac_address of dev %s failed!\n"
1045                       "ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n",
1046                       slave->bond->dev->name, dev->name);
1047                return -EOPNOTSUPP;
1048        }
1049        return 0;
1050}
1051
1052/*
1053 * Swap MAC addresses between two slaves.
1054 *
1055 * Called with RTNL held, and no other locks.
1056 *
1057 */
1058
1059static void alb_swap_mac_addr(struct bonding *bond, struct slave *slave1, struct slave *slave2)
1060{
1061        u8 tmp_mac_addr[ETH_ALEN];
1062
1063        memcpy(tmp_mac_addr, slave1->dev->dev_addr, ETH_ALEN);
1064        alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr);
1065        alb_set_slave_mac_addr(slave2, tmp_mac_addr);
1066
1067}
1068
1069/*
1070 * Send learning packets after MAC address swap.
1071 *
1072 * Called with RTNL and no other locks
1073 */
1074static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
1075                                struct slave *slave2)
1076{
1077        int slaves_state_differ = (SLAVE_IS_OK(slave1) != SLAVE_IS_OK(slave2));
1078        struct slave *disabled_slave = NULL;
1079
1080        ASSERT_RTNL();
1081
1082        /* fasten the change in the switch */
1083        if (SLAVE_IS_OK(slave1)) {
1084                alb_send_learning_packets(slave1, slave1->dev->dev_addr);
1085                if (bond->alb_info.rlb_enabled) {
1086                        /* inform the clients that the mac address
1087                         * has changed
1088                         */
1089                        rlb_req_update_slave_clients(bond, slave1);
1090                }
1091        } else {
1092                disabled_slave = slave1;
1093        }
1094
1095        if (SLAVE_IS_OK(slave2)) {
1096                alb_send_learning_packets(slave2, slave2->dev->dev_addr);
1097                if (bond->alb_info.rlb_enabled) {
1098                        /* inform the clients that the mac address
1099                         * has changed
1100                         */
1101                        rlb_req_update_slave_clients(bond, slave2);
1102                }
1103        } else {
1104                disabled_slave = slave2;
1105        }
1106
1107        if (bond->alb_info.rlb_enabled && slaves_state_differ) {
1108                /* A disabled slave was assigned an active mac addr */
1109                rlb_teach_disabled_mac_on_primary(bond,
1110                                                  disabled_slave->dev->dev_addr);
1111        }
1112}
1113
1114/**
1115 * alb_change_hw_addr_on_detach
1116 * @bond: bonding we're working on
1117 * @slave: the slave that was just detached
1118 *
1119 * We assume that @slave was already detached from the slave list.
1120 *
1121 * If @slave's permanent hw address is different both from its current
1122 * address and from @bond's address, then somewhere in the bond there's
1123 * a slave that has @slave's permanet address as its current address.
1124 * We'll make sure that that slave no longer uses @slave's permanent address.
1125 *
1126 * Caller must hold RTNL and no other locks
1127 */
1128static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
1129{
1130        int perm_curr_diff;
1131        int perm_bond_diff;
1132
1133        perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
1134                                                  slave->dev->dev_addr);
1135        perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
1136                                                  bond->dev->dev_addr);
1137
1138        if (perm_curr_diff && perm_bond_diff) {
1139                struct slave *tmp_slave;
1140                int i, found = 0;
1141
1142                bond_for_each_slave(bond, tmp_slave, i) {
1143                        if (ether_addr_equal_64bits(slave->perm_hwaddr,
1144                                                    tmp_slave->dev->dev_addr)) {
1145                                found = 1;
1146                                break;
1147                        }
1148                }
1149
1150                if (found) {
1151                        /* locking: needs RTNL and nothing else */
1152                        alb_swap_mac_addr(bond, slave, tmp_slave);
1153                        alb_fasten_mac_swap(bond, slave, tmp_slave);
1154                }
1155        }
1156}
1157
1158/**
1159 * alb_handle_addr_collision_on_attach
1160 * @bond: bonding we're working on
1161 * @slave: the slave that was just attached
1162 *
1163 * checks uniqueness of slave's mac address and handles the case the
1164 * new slave uses the bonds mac address.
1165 *
1166 * If the permanent hw address of @slave is @bond's hw address, we need to
1167 * find a different hw address to give @slave, that isn't in use by any other
1168 * slave in the bond. This address must be, of course, one of the permanent
1169 * addresses of the other slaves.
1170 *
1171 * We go over the slave list, and for each slave there we compare its
1172 * permanent hw address with the current address of all the other slaves.
1173 * If no match was found, then we've found a slave with a permanent address
1174 * that isn't used by any other slave in the bond, so we can assign it to
1175 * @slave.
1176 *
1177 * assumption: this function is called before @slave is attached to the
1178 *             bond slave list.
1179 *
1180 * caller must hold the bond lock for write since the mac addresses are compared
1181 * and may be swapped.
1182 */
1183static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
1184{
1185        struct slave *tmp_slave1, *tmp_slave2, *free_mac_slave;
1186        struct slave *has_bond_addr = bond->curr_active_slave;
1187        int i, j, found = 0;
1188
1189        if (bond->slave_cnt == 0) {
1190                /* this is the first slave */
1191                return 0;
1192        }
1193
1194        /* if slave's mac address differs from bond's mac address
1195         * check uniqueness of slave's mac address against the other
1196         * slaves in the bond.
1197         */
1198        if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) {
1199                bond_for_each_slave(bond, tmp_slave1, i) {
1200                        if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr,
1201                                                    slave->dev->dev_addr)) {
1202                                found = 1;
1203                                break;
1204                        }
1205                }
1206
1207                if (!found)
1208                        return 0;
1209
1210                /* Try setting slave mac to bond address and fall-through
1211                   to code handling that situation below... */
1212                alb_set_slave_mac_addr(slave, bond->dev->dev_addr);
1213        }
1214
1215        /* The slave's address is equal to the address of the bond.
1216         * Search for a spare address in the bond for this slave.
1217         */
1218        free_mac_slave = NULL;
1219
1220        bond_for_each_slave(bond, tmp_slave1, i) {
1221                found = 0;
1222                bond_for_each_slave(bond, tmp_slave2, j) {
1223                        if (ether_addr_equal_64bits(tmp_slave1->perm_hwaddr,
1224                                                    tmp_slave2->dev->dev_addr)) {
1225                                found = 1;
1226                                break;
1227                        }
1228                }
1229
1230                if (!found) {
1231                        /* no slave has tmp_slave1's perm addr
1232                         * as its curr addr
1233                         */
1234                        free_mac_slave = tmp_slave1;
1235                        break;
1236                }
1237
1238                if (!has_bond_addr) {
1239                        if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr,
1240                                                    bond->dev->dev_addr)) {
1241
1242                                has_bond_addr = tmp_slave1;
1243                        }
1244                }
1245        }
1246
1247        if (free_mac_slave) {
1248                alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr);
1249
1250                pr_warning("%s: Warning: the hw address of slave %s is in use by the bond; giving it the hw address of %s\n",
1251                           bond->dev->name, slave->dev->name,
1252                           free_mac_slave->dev->name);
1253
1254        } else if (has_bond_addr) {
1255                pr_err("%s: Error: the hw address of slave %s is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n",
1256                       bond->dev->name, slave->dev->name);
1257                return -EFAULT;
1258        }
1259
1260        return 0;
1261}
1262
1263/**
1264 * alb_set_mac_address
1265 * @bond:
1266 * @addr:
1267 *
1268 * In TLB mode all slaves are configured to the bond's hw address, but set
1269 * their dev_addr field to different addresses (based on their permanent hw
1270 * addresses).
1271 *
1272 * For each slave, this function sets the interface to the new address and then
1273 * changes its dev_addr field to its previous value.
1274 *
1275 * Unwinding assumes bond's mac address has not yet changed.
1276 */
1277static int alb_set_mac_address(struct bonding *bond, void *addr)
1278{
1279        struct sockaddr sa;
1280        struct slave *slave, *stop_at;
1281        char tmp_addr[ETH_ALEN];
1282        int res;
1283        int i;
1284
1285        if (bond->alb_info.rlb_enabled) {
1286                return 0;
1287        }
1288
1289        bond_for_each_slave(bond, slave, i) {
1290                /* save net_device's current hw address */
1291                memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);
1292
1293                res = dev_set_mac_address(slave->dev, addr);
1294
1295                /* restore net_device's hw address */
1296                memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);
1297
1298                if (res)
1299                        goto unwind;
1300        }
1301
1302        return 0;
1303
1304unwind:
1305        memcpy(sa.sa_data, bond->dev->dev_addr, bond->dev->addr_len);
1306        sa.sa_family = bond->dev->type;
1307
1308        /* unwind from head to the slave that failed */
1309        stop_at = slave;
1310        bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at) {
1311                memcpy(tmp_addr, slave->dev->dev_addr, ETH_ALEN);
1312                dev_set_mac_address(slave->dev, &sa);
1313                memcpy(slave->dev->dev_addr, tmp_addr, ETH_ALEN);
1314        }
1315
1316        return res;
1317}
1318
1319/************************ exported alb funcions ************************/
1320
1321int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
1322{
1323        int res;
1324
1325        res = tlb_initialize(bond);
1326        if (res) {
1327                return res;
1328        }
1329
1330        if (rlb_enabled) {
1331                bond->alb_info.rlb_enabled = 1;
1332                /* initialize rlb */
1333                res = rlb_initialize(bond);
1334                if (res) {
1335                        tlb_deinitialize(bond);
1336                        return res;
1337                }
1338        } else {
1339                bond->alb_info.rlb_enabled = 0;
1340        }
1341
1342        return 0;
1343}
1344
1345void bond_alb_deinitialize(struct bonding *bond)
1346{
1347        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1348
1349        tlb_deinitialize(bond);
1350
1351        if (bond_info->rlb_enabled) {
1352                rlb_deinitialize(bond);
1353        }
1354}
1355
1356int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
1357{
1358        struct bonding *bond = netdev_priv(bond_dev);
1359        struct ethhdr *eth_data;
1360        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1361        struct slave *tx_slave = NULL;
1362        static const __be32 ip_bcast = htonl(0xffffffff);
1363        int hash_size = 0;
1364        int do_tx_balance = 1;
1365        u32 hash_index = 0;
1366        const u8 *hash_start = NULL;
1367        int res = 1;
1368        struct ipv6hdr *ip6hdr;
1369
1370        skb_reset_mac_header(skb);
1371        eth_data = eth_hdr(skb);
1372
1373        /* make sure that the curr_active_slave do not change during tx
1374         */
1375        read_lock(&bond->curr_slave_lock);
1376
1377        switch (ntohs(skb->protocol)) {
1378        case ETH_P_IP: {
1379                const struct iphdr *iph = ip_hdr(skb);
1380
1381                if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) ||
1382                    (iph->daddr == ip_bcast) ||
1383                    (iph->protocol == IPPROTO_IGMP)) {
1384                        do_tx_balance = 0;
1385                        break;
1386                }
1387                hash_start = (char *)&(iph->daddr);
1388                hash_size = sizeof(iph->daddr);
1389        }
1390                break;
1391        case ETH_P_IPV6:
1392                /* IPv6 doesn't really use broadcast mac address, but leave
1393                 * that here just in case.
1394                 */
1395                if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast)) {
1396                        do_tx_balance = 0;
1397                        break;
1398                }
1399
1400                /* IPv6 uses all-nodes multicast as an equivalent to
1401                 * broadcasts in IPv4.
1402                 */
1403                if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) {
1404                        do_tx_balance = 0;
1405                        break;
1406                }
1407
1408                /* Additianally, DAD probes should not be tx-balanced as that
1409                 * will lead to false positives for duplicate addresses and
1410                 * prevent address configuration from working.
1411                 */
1412                ip6hdr = ipv6_hdr(skb);
1413                if (ipv6_addr_any(&ip6hdr->saddr)) {
1414                        do_tx_balance = 0;
1415                        break;
1416                }
1417
1418                hash_start = (char *)&(ipv6_hdr(skb)->daddr);
1419                hash_size = sizeof(ipv6_hdr(skb)->daddr);
1420                break;
1421        case ETH_P_IPX:
1422                if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) {
1423                        /* something is wrong with this packet */
1424                        do_tx_balance = 0;
1425                        break;
1426                }
1427
1428                if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) {
1429                        /* The only protocol worth balancing in
1430                         * this family since it has an "ARP" like
1431                         * mechanism
1432                         */
1433                        do_tx_balance = 0;
1434                        break;
1435                }
1436
1437                hash_start = (char*)eth_data->h_dest;
1438                hash_size = ETH_ALEN;
1439                break;
1440        case ETH_P_ARP:
1441                do_tx_balance = 0;
1442                if (bond_info->rlb_enabled) {
1443                        tx_slave = rlb_arp_xmit(skb, bond);
1444                }
1445                break;
1446        default:
1447                do_tx_balance = 0;
1448                break;
1449        }
1450
1451        if (do_tx_balance) {
1452                hash_index = _simple_hash(hash_start, hash_size);
1453                tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
1454        }
1455
1456        if (!tx_slave) {
1457                /* unbalanced or unassigned, send through primary */
1458                tx_slave = bond->curr_active_slave;
1459                bond_info->unbalanced_load += skb->len;
1460        }
1461
1462        if (tx_slave && SLAVE_IS_OK(tx_slave)) {
1463                if (tx_slave != bond->curr_active_slave) {
1464                        memcpy(eth_data->h_source,
1465                               tx_slave->dev->dev_addr,
1466                               ETH_ALEN);
1467                }
1468
1469                res = bond_dev_queue_xmit(bond, skb, tx_slave->dev);
1470        } else {
1471                if (tx_slave) {
1472                        _lock_tx_hashtbl(bond);
1473                        __tlb_clear_slave(bond, tx_slave, 0);
1474                        _unlock_tx_hashtbl(bond);
1475                }
1476        }
1477
1478        read_unlock(&bond->curr_slave_lock);
1479
1480        if (res) {
1481                /* no suitable interface, frame not sent */
1482                kfree_skb(skb);
1483        }
1484        return NETDEV_TX_OK;
1485}
1486
1487void bond_alb_monitor(struct work_struct *work)
1488{
1489        struct bonding *bond = container_of(work, struct bonding,
1490                                            alb_work.work);
1491        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1492        struct slave *slave;
1493        int i;
1494
1495        read_lock(&bond->lock);
1496
1497        if (bond->slave_cnt == 0) {
1498                bond_info->tx_rebalance_counter = 0;
1499                bond_info->lp_counter = 0;
1500                goto re_arm;
1501        }
1502
1503        bond_info->tx_rebalance_counter++;
1504        bond_info->lp_counter++;
1505
1506        /* send learning packets */
1507        if (bond_info->lp_counter >= BOND_ALB_LP_TICKS) {
1508                /* change of curr_active_slave involves swapping of mac addresses.
1509                 * in order to avoid this swapping from happening while
1510                 * sending the learning packets, the curr_slave_lock must be held for
1511                 * read.
1512                 */
1513                read_lock(&bond->curr_slave_lock);
1514
1515                bond_for_each_slave(bond, slave, i) {
1516                        alb_send_learning_packets(slave, slave->dev->dev_addr);
1517                }
1518
1519                read_unlock(&bond->curr_slave_lock);
1520
1521                bond_info->lp_counter = 0;
1522        }
1523
1524        /* rebalance tx traffic */
1525        if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
1526
1527                read_lock(&bond->curr_slave_lock);
1528
1529                bond_for_each_slave(bond, slave, i) {
1530                        tlb_clear_slave(bond, slave, 1);
1531                        if (slave == bond->curr_active_slave) {
1532                                SLAVE_TLB_INFO(slave).load =
1533                                        bond_info->unbalanced_load /
1534                                                BOND_TLB_REBALANCE_INTERVAL;
1535                                bond_info->unbalanced_load = 0;
1536                        }
1537                }
1538
1539                read_unlock(&bond->curr_slave_lock);
1540
1541                bond_info->tx_rebalance_counter = 0;
1542        }
1543
1544        /* handle rlb stuff */
1545        if (bond_info->rlb_enabled) {
1546                if (bond_info->primary_is_promisc &&
1547                    (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
1548
1549                        /*
1550                         * dev_set_promiscuity requires rtnl and
1551                         * nothing else.  Avoid race with bond_close.
1552                         */
1553                        read_unlock(&bond->lock);
1554                        if (!rtnl_trylock()) {
1555                                read_lock(&bond->lock);
1556                                goto re_arm;
1557                        }
1558
1559                        bond_info->rlb_promisc_timeout_counter = 0;
1560
1561                        /* If the primary was set to promiscuous mode
1562                         * because a slave was disabled then
1563                         * it can now leave promiscuous mode.
1564                         */
1565                        dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1566                        bond_info->primary_is_promisc = 0;
1567
1568                        rtnl_unlock();
1569                        read_lock(&bond->lock);
1570                }
1571
1572                if (bond_info->rlb_rebalance) {
1573                        bond_info->rlb_rebalance = 0;
1574                        rlb_rebalance(bond);
1575                }
1576
1577                /* check if clients need updating */
1578                if (bond_info->rx_ntt) {
1579                        if (bond_info->rlb_update_delay_counter) {
1580                                --bond_info->rlb_update_delay_counter;
1581                        } else {
1582                                rlb_update_rx_clients(bond);
1583                                if (bond_info->rlb_update_retry_counter) {
1584                                        --bond_info->rlb_update_retry_counter;
1585                                } else {
1586                                        bond_info->rx_ntt = 0;
1587                                }
1588                        }
1589                }
1590        }
1591
1592re_arm:
1593        queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
1594
1595        read_unlock(&bond->lock);
1596}
1597
1598/* assumption: called before the slave is attached to the bond
1599 * and not locked by the bond lock
1600 */
1601int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
1602{
1603        int res;
1604
1605        res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr);
1606        if (res) {
1607                return res;
1608        }
1609
1610        /* caller must hold the bond lock for write since the mac addresses
1611         * are compared and may be swapped.
1612         */
1613        read_lock(&bond->lock);
1614
1615        res = alb_handle_addr_collision_on_attach(bond, slave);
1616
1617        read_unlock(&bond->lock);
1618
1619        if (res) {
1620                return res;
1621        }
1622
1623        tlb_init_slave(slave);
1624
1625        /* order a rebalance ASAP */
1626        bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
1627
1628        if (bond->alb_info.rlb_enabled) {
1629                bond->alb_info.rlb_rebalance = 1;
1630        }
1631
1632        return 0;
1633}
1634
1635/*
1636 * Remove slave from tlb and rlb hash tables, and fix up MAC addresses
1637 * if necessary.
1638 *
1639 * Caller must hold RTNL and no other locks
1640 */
1641void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
1642{
1643        if (bond->slave_cnt > 1) {
1644                alb_change_hw_addr_on_detach(bond, slave);
1645        }
1646
1647        tlb_clear_slave(bond, slave, 0);
1648
1649        if (bond->alb_info.rlb_enabled) {
1650                bond->alb_info.next_rx_slave = NULL;
1651                rlb_clear_slave(bond, slave);
1652        }
1653}
1654
1655/* Caller must hold bond lock for read */
1656void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
1657{
1658        struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
1659
1660        if (link == BOND_LINK_DOWN) {
1661                tlb_clear_slave(bond, slave, 0);
1662                if (bond->alb_info.rlb_enabled) {
1663                        rlb_clear_slave(bond, slave);
1664                }
1665        } else if (link == BOND_LINK_UP) {
1666                /* order a rebalance ASAP */
1667                bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
1668                if (bond->alb_info.rlb_enabled) {
1669                        bond->alb_info.rlb_rebalance = 1;
1670                        /* If the updelay module parameter is smaller than the
1671                         * forwarding delay of the switch the rebalance will
1672                         * not work because the rebalance arp replies will
1673                         * not be forwarded to the clients..
1674                         */
1675                }
1676        }
1677}
1678
1679/**
1680 * bond_alb_handle_active_change - assign new curr_active_slave
1681 * @bond: our bonding struct
1682 * @new_slave: new slave to assign
1683 *
1684 * Set the bond->curr_active_slave to @new_slave and handle
1685 * mac address swapping and promiscuity changes as needed.
1686 *
1687 * If new_slave is NULL, caller must hold curr_slave_lock or
1688 * bond->lock for write.
1689 *
1690 * If new_slave is not NULL, caller must hold RTNL, bond->lock for
1691 * read and curr_slave_lock for write.  Processing here may sleep, so
1692 * no other locks may be held.
1693 */
1694void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
1695        __releases(&bond->curr_slave_lock)
1696        __releases(&bond->lock)
1697        __acquires(&bond->lock)
1698        __acquires(&bond->curr_slave_lock)
1699{
1700        struct slave *swap_slave;
1701        int i;
1702
1703        if (bond->curr_active_slave == new_slave) {
1704                return;
1705        }
1706
1707        if (bond->curr_active_slave && bond->alb_info.primary_is_promisc) {
1708                dev_set_promiscuity(bond->curr_active_slave->dev, -1);
1709                bond->alb_info.primary_is_promisc = 0;
1710                bond->alb_info.rlb_promisc_timeout_counter = 0;
1711        }
1712
1713        swap_slave = bond->curr_active_slave;
1714        bond->curr_active_slave = new_slave;
1715
1716        if (!new_slave || (bond->slave_cnt == 0)) {
1717                return;
1718        }
1719
1720        /* set the new curr_active_slave to the bonds mac address
1721         * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave
1722         */
1723        if (!swap_slave) {
1724                struct slave *tmp_slave;
1725                /* find slave that is holding the bond's mac address */
1726                bond_for_each_slave(bond, tmp_slave, i) {
1727                        if (ether_addr_equal_64bits(tmp_slave->dev->dev_addr,
1728                                                    bond->dev->dev_addr)) {
1729                                swap_slave = tmp_slave;
1730                                break;
1731                        }
1732                }
1733        }
1734
1735        /*
1736         * Arrange for swap_slave and new_slave to temporarily be
1737         * ignored so we can mess with their MAC addresses without
1738         * fear of interference from transmit activity.
1739         */
1740        if (swap_slave) {
1741                tlb_clear_slave(bond, swap_slave, 1);
1742        }
1743        tlb_clear_slave(bond, new_slave, 1);
1744
1745        write_unlock_bh(&bond->curr_slave_lock);
1746        read_unlock(&bond->lock);
1747
1748        ASSERT_RTNL();
1749
1750        /* curr_active_slave must be set before calling alb_swap_mac_addr */
1751        if (swap_slave) {
1752                /* swap mac address */
1753                alb_swap_mac_addr(bond, swap_slave, new_slave);
1754        } else {
1755                /* set the new_slave to the bond mac address */
1756                alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr);
1757        }
1758
1759        if (swap_slave) {
1760                alb_fasten_mac_swap(bond, swap_slave, new_slave);
1761                read_lock(&bond->lock);
1762        } else {
1763                read_lock(&bond->lock);
1764                alb_send_learning_packets(new_slave, bond->dev->dev_addr);
1765        }
1766
1767        write_lock_bh(&bond->curr_slave_lock);
1768}
1769
1770/*
1771 * Called with RTNL
1772 */
1773int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
1774        __acquires(&bond->lock)
1775        __releases(&bond->lock)
1776{
1777        struct bonding *bond = netdev_priv(bond_dev);
1778        struct sockaddr *sa = addr;
1779        struct slave *slave, *swap_slave;
1780        int res;
1781        int i;
1782
1783        if (!is_valid_ether_addr(sa->sa_data)) {
1784                return -EADDRNOTAVAIL;
1785        }
1786
1787        res = alb_set_mac_address(bond, addr);
1788        if (res) {
1789                return res;
1790        }
1791
1792        memcpy(bond_dev->dev_addr, sa->sa_data, bond_dev->addr_len);
1793
1794        /* If there is no curr_active_slave there is nothing else to do.
1795         * Otherwise we'll need to pass the new address to it and handle
1796         * duplications.
1797         */
1798        if (!bond->curr_active_slave) {
1799                return 0;
1800        }
1801
1802        swap_slave = NULL;
1803
1804        bond_for_each_slave(bond, slave, i) {
1805                if (ether_addr_equal_64bits(slave->dev->dev_addr,
1806                                            bond_dev->dev_addr)) {
1807                        swap_slave = slave;
1808                        break;
1809                }
1810        }
1811
1812        if (swap_slave) {
1813                alb_swap_mac_addr(bond, swap_slave, bond->curr_active_slave);
1814                alb_fasten_mac_swap(bond, swap_slave, bond->curr_active_slave);
1815        } else {
1816                alb_set_slave_mac_addr(bond->curr_active_slave, bond_dev->dev_addr);
1817
1818                read_lock(&bond->lock);
1819                alb_send_learning_packets(bond->curr_active_slave, bond_dev->dev_addr);
1820                if (bond->alb_info.rlb_enabled) {
1821                        /* inform clients mac address has changed */
1822                        rlb_req_update_slave_clients(bond, bond->curr_active_slave);
1823                }
1824                read_unlock(&bond->lock);
1825        }
1826
1827        return 0;
1828}
1829
1830void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
1831{
1832        if (bond->alb_info.current_alb_vlan &&
1833            (bond->alb_info.current_alb_vlan->vlan_id == vlan_id)) {
1834                bond->alb_info.current_alb_vlan = NULL;
1835        }
1836
1837        if (bond->alb_info.rlb_enabled) {
1838                rlb_clear_vlan(bond, vlan_id);
1839        }
1840}
1841
1842