linux/drivers/net/ethernet/mellanox/mlx5/core/lag.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/netdevice.h>
  34#include <linux/mlx5/driver.h>
  35#include <linux/mlx5/vport.h>
  36#include "mlx5_core.h"
  37
  38enum {
  39        MLX5_LAG_FLAG_BONDED = 1 << 0,
  40};
  41
  42struct lag_func {
  43        struct mlx5_core_dev *dev;
  44        struct net_device    *netdev;
  45};
  46
  47/* Used for collection of netdev event info. */
  48struct lag_tracker {
  49        enum   netdev_lag_tx_type           tx_type;
  50        struct netdev_lag_lower_state_info  netdev_state[MLX5_MAX_PORTS];
  51        bool is_bonded;
  52};
  53
  54/* LAG data of a ConnectX card.
  55 * It serves both its phys functions.
  56 */
  57struct mlx5_lag {
  58        u8                        flags;
  59        u8                        v2p_map[MLX5_MAX_PORTS];
  60        struct lag_func           pf[MLX5_MAX_PORTS];
  61        struct lag_tracker        tracker;
  62        struct delayed_work       bond_work;
  63        struct notifier_block     nb;
  64};
  65
  66/* General purpose, use for short periods of time.
  67 * Beware of lock dependencies (preferably, no locks should be acquired
  68 * under it).
  69 */
  70static DEFINE_MUTEX(lag_mutex);
  71
  72static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  73                               u8 remap_port2)
  74{
  75        u32   in[MLX5_ST_SZ_DW(create_lag_in)]   = {0};
  76        u32   out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
  77        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
  78
  79        MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
  80
  81        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  82        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  83
  84        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  85}
  86
  87static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  88                               u8 remap_port2)
  89{
  90        u32   in[MLX5_ST_SZ_DW(modify_lag_in)]   = {0};
  91        u32   out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
  92        void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
  93
  94        MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
  95        MLX5_SET(modify_lag_in, in, field_select, 0x1);
  96
  97        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  98        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  99
 100        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 101}
 102
 103static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
 104{
 105        u32  in[MLX5_ST_SZ_DW(destroy_lag_in)]  = {0};
 106        u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
 107
 108        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 109
 110        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 111}
 112
 113int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
 114{
 115        u32  in[MLX5_ST_SZ_DW(create_vport_lag_in)]  = {0};
 116        u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
 117
 118        MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
 119
 120        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 121}
 122EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
 123
 124int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 125{
 126        u32  in[MLX5_ST_SZ_DW(destroy_vport_lag_in)]  = {0};
 127        u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
 128
 129        MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
 130
 131        return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 132}
 133EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 134
 135static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev)
 136{
 137        return dev->priv.lag;
 138}
 139
 140static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 141                                       struct net_device *ndev)
 142{
 143        int i;
 144
 145        for (i = 0; i < MLX5_MAX_PORTS; i++)
 146                if (ldev->pf[i].netdev == ndev)
 147                        return i;
 148
 149        return -1;
 150}
 151
 152static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev)
 153{
 154        return !!(ldev->flags & MLX5_LAG_FLAG_BONDED);
 155}
 156
 157static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 158                                           u8 *port1, u8 *port2)
 159{
 160        if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 161                if (tracker->netdev_state[0].tx_enabled) {
 162                        *port1 = 1;
 163                        *port2 = 1;
 164                } else {
 165                        *port1 = 2;
 166                        *port2 = 2;
 167                }
 168        } else {
 169                *port1 = 1;
 170                *port2 = 2;
 171                if (!tracker->netdev_state[0].link_up)
 172                        *port1 = 2;
 173                else if (!tracker->netdev_state[1].link_up)
 174                        *port2 = 1;
 175        }
 176}
 177
 178static void mlx5_activate_lag(struct mlx5_lag *ldev,
 179                              struct lag_tracker *tracker)
 180{
 181        struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 182        int err;
 183
 184        ldev->flags |= MLX5_LAG_FLAG_BONDED;
 185
 186        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0],
 187                                       &ldev->v2p_map[1]);
 188
 189        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]);
 190        if (err)
 191                mlx5_core_err(dev0,
 192                              "Failed to create LAG (%d)\n",
 193                              err);
 194}
 195
 196static void mlx5_deactivate_lag(struct mlx5_lag *ldev)
 197{
 198        struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 199        int err;
 200
 201        ldev->flags &= ~MLX5_LAG_FLAG_BONDED;
 202
 203        err = mlx5_cmd_destroy_lag(dev0);
 204        if (err)
 205                mlx5_core_err(dev0,
 206                              "Failed to destroy LAG (%d)\n",
 207                              err);
 208}
 209
 210static void mlx5_do_bond(struct mlx5_lag *ldev)
 211{
 212        struct mlx5_core_dev *dev0 = ldev->pf[0].dev;
 213        struct mlx5_core_dev *dev1 = ldev->pf[1].dev;
 214        struct lag_tracker tracker;
 215        u8 v2p_port1, v2p_port2;
 216        int i, err;
 217
 218        if (!dev0 || !dev1)
 219                return;
 220
 221        mutex_lock(&lag_mutex);
 222        tracker = ldev->tracker;
 223        mutex_unlock(&lag_mutex);
 224
 225        if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) {
 226                if (mlx5_sriov_is_enabled(dev0) ||
 227                    mlx5_sriov_is_enabled(dev1)) {
 228                        mlx5_core_warn(dev0, "LAG is not supported with SRIOV");
 229                        return;
 230                }
 231
 232                for (i = 0; i < MLX5_MAX_PORTS; i++)
 233                        mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
 234                                                    MLX5_INTERFACE_PROTOCOL_IB);
 235
 236                mlx5_activate_lag(ldev, &tracker);
 237
 238                mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
 239                mlx5_nic_vport_enable_roce(dev1);
 240        } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
 241                mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1,
 242                                               &v2p_port2);
 243
 244                if ((v2p_port1 != ldev->v2p_map[0]) ||
 245                    (v2p_port2 != ldev->v2p_map[1])) {
 246                        ldev->v2p_map[0] = v2p_port1;
 247                        ldev->v2p_map[1] = v2p_port2;
 248
 249                        err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
 250                        if (err)
 251                                mlx5_core_err(dev0,
 252                                              "Failed to modify LAG (%d)\n",
 253                                              err);
 254                }
 255        } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) {
 256                mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
 257                mlx5_nic_vport_disable_roce(dev1);
 258
 259                mlx5_deactivate_lag(ldev);
 260
 261                for (i = 0; i < MLX5_MAX_PORTS; i++)
 262                        if (ldev->pf[i].dev)
 263                                mlx5_add_dev_by_protocol(ldev->pf[i].dev,
 264                                                         MLX5_INTERFACE_PROTOCOL_IB);
 265        }
 266}
 267
 268static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 269{
 270        schedule_delayed_work(&ldev->bond_work, delay);
 271}
 272
 273static void mlx5_do_bond_work(struct work_struct *work)
 274{
 275        struct delayed_work *delayed_work = to_delayed_work(work);
 276        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
 277                                             bond_work);
 278        int status;
 279
 280        status = mlx5_dev_list_trylock();
 281        if (!status) {
 282                /* 1 sec delay. */
 283                mlx5_queue_bond_work(ldev, HZ);
 284                return;
 285        }
 286
 287        mlx5_do_bond(ldev);
 288        mlx5_dev_list_unlock();
 289}
 290
 291static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 292                                         struct lag_tracker *tracker,
 293                                         struct net_device *ndev,
 294                                         struct netdev_notifier_changeupper_info *info)
 295{
 296        struct net_device *upper = info->upper_dev, *ndev_tmp;
 297        struct netdev_lag_upper_info *lag_upper_info;
 298        bool is_bonded;
 299        int bond_status = 0;
 300        int num_slaves = 0;
 301        int idx;
 302
 303        if (!netif_is_lag_master(upper))
 304                return 0;
 305
 306        lag_upper_info = info->upper_info;
 307
 308        /* The event may still be of interest if the slave does not belong to
 309         * us, but is enslaved to a master which has one or more of our netdevs
 310         * as slaves (e.g., if a new slave is added to a master that bonds two
 311         * of our netdevs, we should unbond).
 312         */
 313        rcu_read_lock();
 314        for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
 315                idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
 316                if (idx > -1)
 317                        bond_status |= (1 << idx);
 318
 319                num_slaves++;
 320        }
 321        rcu_read_unlock();
 322
 323        /* None of this lagdev's netdevs are slaves of this master. */
 324        if (!(bond_status & 0x3))
 325                return 0;
 326
 327        if (lag_upper_info)
 328                tracker->tx_type = lag_upper_info->tx_type;
 329
 330        /* Determine bonding status:
 331         * A device is considered bonded if both its physical ports are slaves
 332         * of the same lag master, and only them.
 333         * Lag mode must be activebackup or hash.
 334         */
 335        is_bonded = (num_slaves == MLX5_MAX_PORTS) &&
 336                    (bond_status == 0x3) &&
 337                    ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ||
 338                     (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH));
 339
 340        if (tracker->is_bonded != is_bonded) {
 341                tracker->is_bonded = is_bonded;
 342                return 1;
 343        }
 344
 345        return 0;
 346}
 347
 348static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
 349                                              struct lag_tracker *tracker,
 350                                              struct net_device *ndev,
 351                                              struct netdev_notifier_changelowerstate_info *info)
 352{
 353        struct netdev_lag_lower_state_info *lag_lower_info;
 354        int idx;
 355
 356        if (!netif_is_lag_port(ndev))
 357                return 0;
 358
 359        idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
 360        if (idx == -1)
 361                return 0;
 362
 363        /* This information is used to determine virtual to physical
 364         * port mapping.
 365         */
 366        lag_lower_info = info->lower_state_info;
 367        if (!lag_lower_info)
 368                return 0;
 369
 370        tracker->netdev_state[idx] = *lag_lower_info;
 371
 372        return 1;
 373}
 374
 375static int mlx5_lag_netdev_event(struct notifier_block *this,
 376                                 unsigned long event, void *ptr)
 377{
 378        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 379        struct lag_tracker tracker;
 380        struct mlx5_lag *ldev;
 381        int changed = 0;
 382
 383        if (!net_eq(dev_net(ndev), &init_net))
 384                return NOTIFY_DONE;
 385
 386        if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
 387                return NOTIFY_DONE;
 388
 389        ldev    = container_of(this, struct mlx5_lag, nb);
 390        tracker = ldev->tracker;
 391
 392        switch (event) {
 393        case NETDEV_CHANGEUPPER:
 394                changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
 395                                                        ptr);
 396                break;
 397        case NETDEV_CHANGELOWERSTATE:
 398                changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
 399                                                             ndev, ptr);
 400                break;
 401        }
 402
 403        mutex_lock(&lag_mutex);
 404        ldev->tracker = tracker;
 405        mutex_unlock(&lag_mutex);
 406
 407        if (changed)
 408                mlx5_queue_bond_work(ldev, 0);
 409
 410        return NOTIFY_DONE;
 411}
 412
 413static struct mlx5_lag *mlx5_lag_dev_alloc(void)
 414{
 415        struct mlx5_lag *ldev;
 416
 417        ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
 418        if (!ldev)
 419                return NULL;
 420
 421        INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 422
 423        return ldev;
 424}
 425
 426static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
 427{
 428        kfree(ldev);
 429}
 430
 431static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
 432                                struct mlx5_core_dev *dev,
 433                                struct net_device *netdev)
 434{
 435        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 436
 437        if (fn >= MLX5_MAX_PORTS)
 438                return;
 439
 440        mutex_lock(&lag_mutex);
 441        ldev->pf[fn].dev    = dev;
 442        ldev->pf[fn].netdev = netdev;
 443        ldev->tracker.netdev_state[fn].link_up = 0;
 444        ldev->tracker.netdev_state[fn].tx_enabled = 0;
 445
 446        dev->priv.lag = ldev;
 447        mutex_unlock(&lag_mutex);
 448}
 449
 450static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
 451                                   struct mlx5_core_dev *dev)
 452{
 453        int i;
 454
 455        for (i = 0; i < MLX5_MAX_PORTS; i++)
 456                if (ldev->pf[i].dev == dev)
 457                        break;
 458
 459        if (i == MLX5_MAX_PORTS)
 460                return;
 461
 462        mutex_lock(&lag_mutex);
 463        memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
 464
 465        dev->priv.lag = NULL;
 466        mutex_unlock(&lag_mutex);
 467}
 468
 469
 470/* Must be called with intf_mutex held */
 471void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 472{
 473        struct mlx5_lag *ldev = NULL;
 474        struct mlx5_core_dev *tmp_dev;
 475
 476        if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
 477            !MLX5_CAP_GEN(dev, lag_master) ||
 478            (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS))
 479                return;
 480
 481        tmp_dev = mlx5_get_next_phys_dev(dev);
 482        if (tmp_dev)
 483                ldev = tmp_dev->priv.lag;
 484
 485        if (!ldev) {
 486                ldev = mlx5_lag_dev_alloc();
 487                if (!ldev) {
 488                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
 489                        return;
 490                }
 491        }
 492
 493        mlx5_lag_dev_add_pf(ldev, dev, netdev);
 494
 495        if (!ldev->nb.notifier_call) {
 496                ldev->nb.notifier_call = mlx5_lag_netdev_event;
 497                if (register_netdevice_notifier(&ldev->nb)) {
 498                        ldev->nb.notifier_call = NULL;
 499                        mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 500                }
 501        }
 502}
 503
 504/* Must be called with intf_mutex held */
 505void mlx5_lag_remove(struct mlx5_core_dev *dev)
 506{
 507        struct mlx5_lag *ldev;
 508        int i;
 509
 510        ldev = mlx5_lag_dev_get(dev);
 511        if (!ldev)
 512                return;
 513
 514        if (mlx5_lag_is_bonded(ldev))
 515                mlx5_deactivate_lag(ldev);
 516
 517        mlx5_lag_dev_remove_pf(ldev, dev);
 518
 519        for (i = 0; i < MLX5_MAX_PORTS; i++)
 520                if (ldev->pf[i].dev)
 521                        break;
 522
 523        if (i == MLX5_MAX_PORTS) {
 524                if (ldev->nb.notifier_call)
 525                        unregister_netdevice_notifier(&ldev->nb);
 526                cancel_delayed_work_sync(&ldev->bond_work);
 527                mlx5_lag_dev_free(ldev);
 528        }
 529}
 530
 531bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 532{
 533        struct mlx5_lag *ldev;
 534        bool res;
 535
 536        mutex_lock(&lag_mutex);
 537        ldev = mlx5_lag_dev_get(dev);
 538        res  = ldev && mlx5_lag_is_bonded(ldev);
 539        mutex_unlock(&lag_mutex);
 540
 541        return res;
 542}
 543EXPORT_SYMBOL(mlx5_lag_is_active);
 544
 545struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 546{
 547        struct net_device *ndev = NULL;
 548        struct mlx5_lag *ldev;
 549
 550        mutex_lock(&lag_mutex);
 551        ldev = mlx5_lag_dev_get(dev);
 552
 553        if (!(ldev && mlx5_lag_is_bonded(ldev)))
 554                goto unlock;
 555
 556        if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 557                ndev = ldev->tracker.netdev_state[0].tx_enabled ?
 558                       ldev->pf[0].netdev : ldev->pf[1].netdev;
 559        } else {
 560                ndev = ldev->pf[0].netdev;
 561        }
 562        if (ndev)
 563                dev_hold(ndev);
 564
 565unlock:
 566        mutex_unlock(&lag_mutex);
 567
 568        return ndev;
 569}
 570EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 571
 572bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
 573{
 574        struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
 575                                                 priv);
 576        struct mlx5_lag *ldev;
 577
 578        if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
 579                return true;
 580
 581        ldev = mlx5_lag_dev_get(dev);
 582        if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev)
 583                return true;
 584
 585        /* If bonded, we do not add an IB device for PF1. */
 586        return false;
 587}
 588
 589