linux/drivers/net/ethernet/mellanox/mlx5/core/lag.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/netdevice.h>
  34#include <linux/mlx5/driver.h>
  35#include <linux/mlx5/vport.h>
  36#include "mlx5_core.h"
  37#include "eswitch.h"
  38#include "lag.h"
  39#include "lag_mp.h"
  40
  41/* General purpose, use for short periods of time.
  42 * Beware of lock dependencies (preferably, no locks should be acquired
  43 * under it).
  44 */
  45static DEFINE_SPINLOCK(lag_lock);
  46
  47static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  48                               u8 remap_port2)
  49{
  50        u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
  51        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
  52
  53        MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
  54
  55        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  56        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  57
  58        return mlx5_cmd_exec_in(dev, create_lag, in);
  59}
  60
  61static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  62                               u8 remap_port2)
  63{
  64        u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
  65        void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
  66
  67        MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
  68        MLX5_SET(modify_lag_in, in, field_select, 0x1);
  69
  70        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  71        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  72
  73        return mlx5_cmd_exec_in(dev, modify_lag, in);
  74}
  75
  76int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
  77{
  78        u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
  79
  80        MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
  81
  82        return mlx5_cmd_exec_in(dev, create_vport_lag, in);
  83}
  84EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
  85
  86int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
  87{
  88        u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
  89
  90        MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
  91
  92        return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
  93}
  94EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
  95
  96int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
  97                                struct net_device *ndev)
  98{
  99        int i;
 100
 101        for (i = 0; i < MLX5_MAX_PORTS; i++)
 102                if (ldev->pf[i].netdev == ndev)
 103                        return i;
 104
 105        return -ENOENT;
 106}
 107
 108static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
 109{
 110        return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
 111}
 112
 113static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
 114{
 115        return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
 116}
 117
 118static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 119                                           u8 *port1, u8 *port2)
 120{
 121        *port1 = 1;
 122        *port2 = 2;
 123        if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled ||
 124            !tracker->netdev_state[MLX5_LAG_P1].link_up) {
 125                *port1 = 2;
 126                return;
 127        }
 128
 129        if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled ||
 130            !tracker->netdev_state[MLX5_LAG_P2].link_up)
 131                *port2 = 1;
 132}
 133
 134void mlx5_modify_lag(struct mlx5_lag *ldev,
 135                     struct lag_tracker *tracker)
 136{
 137        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 138        u8 v2p_port1, v2p_port2;
 139        int err;
 140
 141        mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
 142                                       &v2p_port2);
 143
 144        if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
 145            v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
 146                ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
 147                ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
 148
 149                mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
 150                               ldev->v2p_map[MLX5_LAG_P1],
 151                               ldev->v2p_map[MLX5_LAG_P2]);
 152
 153                err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
 154                if (err)
 155                        mlx5_core_err(dev0,
 156                                      "Failed to modify LAG (%d)\n",
 157                                      err);
 158        }
 159}
 160
 161static int mlx5_create_lag(struct mlx5_lag *ldev,
 162                           struct lag_tracker *tracker)
 163{
 164        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 165        int err;
 166
 167        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
 168                                       &ldev->v2p_map[MLX5_LAG_P2]);
 169
 170        mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
 171                       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
 172
 173        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
 174                                  ldev->v2p_map[MLX5_LAG_P2]);
 175        if (err)
 176                mlx5_core_err(dev0,
 177                              "Failed to create LAG (%d)\n",
 178                              err);
 179        return err;
 180}
 181
 182int mlx5_activate_lag(struct mlx5_lag *ldev,
 183                      struct lag_tracker *tracker,
 184                      u8 flags)
 185{
 186        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 187        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 188        int err;
 189
 190        err = mlx5_create_lag(ldev, tracker);
 191        if (err) {
 192                if (roce_lag) {
 193                        mlx5_core_err(dev0,
 194                                      "Failed to activate RoCE LAG\n");
 195                } else {
 196                        mlx5_core_err(dev0,
 197                                      "Failed to activate VF LAG\n"
 198                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 199                }
 200                return err;
 201        }
 202
 203        ldev->flags |= flags;
 204        return 0;
 205}
 206
 207static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 208{
 209        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 210        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 211        bool roce_lag = __mlx5_lag_is_roce(ldev);
 212        int err;
 213
 214        ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 215
 216        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 217        err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 218        if (err) {
 219                if (roce_lag) {
 220                        mlx5_core_err(dev0,
 221                                      "Failed to deactivate RoCE LAG; driver restart required\n");
 222                } else {
 223                        mlx5_core_err(dev0,
 224                                      "Failed to deactivate VF LAG; driver restart required\n"
 225                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 226                }
 227        }
 228
 229        return err;
 230}
 231
 232static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 233{
 234        if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
 235                return false;
 236
 237#ifdef CONFIG_MLX5_ESWITCH
 238        return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
 239                                   ldev->pf[MLX5_LAG_P2].dev);
 240#else
 241        return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
 242                !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
 243#endif
 244}
 245
 246static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev)
 247{
 248        int i;
 249
 250        for (i = 0; i < MLX5_MAX_PORTS; i++)
 251                if (ldev->pf[i].dev)
 252                        mlx5_add_dev_by_protocol(ldev->pf[i].dev,
 253                                                 MLX5_INTERFACE_PROTOCOL_IB);
 254}
 255
 256static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev)
 257{
 258        int i;
 259
 260        for (i = 0; i < MLX5_MAX_PORTS; i++)
 261                if (ldev->pf[i].dev)
 262                        mlx5_remove_dev_by_protocol(ldev->pf[i].dev,
 263                                                    MLX5_INTERFACE_PROTOCOL_IB);
 264}
 265
 266static void mlx5_do_bond(struct mlx5_lag *ldev)
 267{
 268        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 269        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 270        struct lag_tracker tracker;
 271        bool do_bond, roce_lag;
 272        int err;
 273
 274        if (!mlx5_lag_is_ready(ldev))
 275                return;
 276
 277        spin_lock(&lag_lock);
 278        tracker = ldev->tracker;
 279        spin_unlock(&lag_lock);
 280
 281        do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 282
 283        if (do_bond && !__mlx5_lag_is_active(ldev)) {
 284                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
 285                           !mlx5_sriov_is_enabled(dev1);
 286
 287#ifdef CONFIG_MLX5_ESWITCH
 288                roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
 289                            dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 290#endif
 291
 292                if (roce_lag)
 293                        mlx5_lag_remove_ib_devices(ldev);
 294
 295                err = mlx5_activate_lag(ldev, &tracker,
 296                                        roce_lag ? MLX5_LAG_FLAG_ROCE :
 297                                        MLX5_LAG_FLAG_SRIOV);
 298                if (err) {
 299                        if (roce_lag)
 300                                mlx5_lag_add_ib_devices(ldev);
 301
 302                        return;
 303                }
 304
 305                if (roce_lag) {
 306                        mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
 307                        mlx5_nic_vport_enable_roce(dev1);
 308                }
 309        } else if (do_bond && __mlx5_lag_is_active(ldev)) {
 310                mlx5_modify_lag(ldev, &tracker);
 311        } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
 312                roce_lag = __mlx5_lag_is_roce(ldev);
 313
 314                if (roce_lag) {
 315                        mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB);
 316                        mlx5_nic_vport_disable_roce(dev1);
 317                }
 318
 319                err = mlx5_deactivate_lag(ldev);
 320                if (err)
 321                        return;
 322
 323                if (roce_lag)
 324                        mlx5_lag_add_ib_devices(ldev);
 325        }
 326}
 327
 328static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 329{
 330        queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 331}
 332
 333static void mlx5_do_bond_work(struct work_struct *work)
 334{
 335        struct delayed_work *delayed_work = to_delayed_work(work);
 336        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
 337                                             bond_work);
 338        int status;
 339
 340        status = mlx5_dev_list_trylock();
 341        if (!status) {
 342                /* 1 sec delay. */
 343                mlx5_queue_bond_work(ldev, HZ);
 344                return;
 345        }
 346
 347        mlx5_do_bond(ldev);
 348        mlx5_dev_list_unlock();
 349}
 350
 351static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 352                                         struct lag_tracker *tracker,
 353                                         struct net_device *ndev,
 354                                         struct netdev_notifier_changeupper_info *info)
 355{
 356        struct net_device *upper = info->upper_dev, *ndev_tmp;
 357        struct netdev_lag_upper_info *lag_upper_info = NULL;
 358        bool is_bonded, is_in_lag, mode_supported;
 359        int bond_status = 0;
 360        int num_slaves = 0;
 361        int idx;
 362
 363        if (!netif_is_lag_master(upper))
 364                return 0;
 365
 366        if (info->linking)
 367                lag_upper_info = info->upper_info;
 368
 369        /* The event may still be of interest if the slave does not belong to
 370         * us, but is enslaved to a master which has one or more of our netdevs
 371         * as slaves (e.g., if a new slave is added to a master that bonds two
 372         * of our netdevs, we should unbond).
 373         */
 374        rcu_read_lock();
 375        for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
 376                idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
 377                if (idx >= 0)
 378                        bond_status |= (1 << idx);
 379
 380                num_slaves++;
 381        }
 382        rcu_read_unlock();
 383
 384        /* None of this lagdev's netdevs are slaves of this master. */
 385        if (!(bond_status & 0x3))
 386                return 0;
 387
 388        if (lag_upper_info)
 389                tracker->tx_type = lag_upper_info->tx_type;
 390
 391        /* Determine bonding status:
 392         * A device is considered bonded if both its physical ports are slaves
 393         * of the same lag master, and only them.
 394         */
 395        is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 396
 397        if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
 398                NL_SET_ERR_MSG_MOD(info->info.extack,
 399                                   "Can't activate LAG offload, PF is configured with more than 64 VFs");
 400                return 0;
 401        }
 402
 403        /* Lag mode must be activebackup or hash. */
 404        mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
 405                         tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
 406
 407        if (is_in_lag && !mode_supported)
 408                NL_SET_ERR_MSG_MOD(info->info.extack,
 409                                   "Can't activate LAG offload, TX type isn't supported");
 410
 411        is_bonded = is_in_lag && mode_supported;
 412        if (tracker->is_bonded != is_bonded) {
 413                tracker->is_bonded = is_bonded;
 414                return 1;
 415        }
 416
 417        return 0;
 418}
 419
 420static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
 421                                              struct lag_tracker *tracker,
 422                                              struct net_device *ndev,
 423                                              struct netdev_notifier_changelowerstate_info *info)
 424{
 425        struct netdev_lag_lower_state_info *lag_lower_info;
 426        int idx;
 427
 428        if (!netif_is_lag_port(ndev))
 429                return 0;
 430
 431        idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
 432        if (idx < 0)
 433                return 0;
 434
 435        /* This information is used to determine virtual to physical
 436         * port mapping.
 437         */
 438        lag_lower_info = info->lower_state_info;
 439        if (!lag_lower_info)
 440                return 0;
 441
 442        tracker->netdev_state[idx] = *lag_lower_info;
 443
 444        return 1;
 445}
 446
 447static int mlx5_lag_netdev_event(struct notifier_block *this,
 448                                 unsigned long event, void *ptr)
 449{
 450        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 451        struct lag_tracker tracker;
 452        struct mlx5_lag *ldev;
 453        int changed = 0;
 454
 455        if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
 456                return NOTIFY_DONE;
 457
 458        ldev    = container_of(this, struct mlx5_lag, nb);
 459
 460        if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
 461                return NOTIFY_DONE;
 462
 463        tracker = ldev->tracker;
 464
 465        switch (event) {
 466        case NETDEV_CHANGEUPPER:
 467                changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
 468                                                        ptr);
 469                break;
 470        case NETDEV_CHANGELOWERSTATE:
 471                changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
 472                                                             ndev, ptr);
 473                break;
 474        }
 475
 476        spin_lock(&lag_lock);
 477        ldev->tracker = tracker;
 478        spin_unlock(&lag_lock);
 479
 480        if (changed)
 481                mlx5_queue_bond_work(ldev, 0);
 482
 483        return NOTIFY_DONE;
 484}
 485
 486static struct mlx5_lag *mlx5_lag_dev_alloc(void)
 487{
 488        struct mlx5_lag *ldev;
 489
 490        ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
 491        if (!ldev)
 492                return NULL;
 493
 494        ldev->wq = create_singlethread_workqueue("mlx5_lag");
 495        if (!ldev->wq) {
 496                kfree(ldev);
 497                return NULL;
 498        }
 499
 500        INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 501
 502        return ldev;
 503}
 504
 505static void mlx5_lag_dev_free(struct mlx5_lag *ldev)
 506{
 507        destroy_workqueue(ldev->wq);
 508        kfree(ldev);
 509}
 510
 511static int mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
 512                               struct mlx5_core_dev *dev,
 513                               struct net_device *netdev)
 514{
 515        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 516
 517        if (fn >= MLX5_MAX_PORTS)
 518                return -EPERM;
 519
 520        spin_lock(&lag_lock);
 521        ldev->pf[fn].dev    = dev;
 522        ldev->pf[fn].netdev = netdev;
 523        ldev->tracker.netdev_state[fn].link_up = 0;
 524        ldev->tracker.netdev_state[fn].tx_enabled = 0;
 525
 526        dev->priv.lag = ldev;
 527
 528        spin_unlock(&lag_lock);
 529
 530        return fn;
 531}
 532
 533static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
 534                                   struct mlx5_core_dev *dev)
 535{
 536        int i;
 537
 538        for (i = 0; i < MLX5_MAX_PORTS; i++)
 539                if (ldev->pf[i].dev == dev)
 540                        break;
 541
 542        if (i == MLX5_MAX_PORTS)
 543                return;
 544
 545        spin_lock(&lag_lock);
 546        memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
 547
 548        dev->priv.lag = NULL;
 549        spin_unlock(&lag_lock);
 550}
 551
 552/* Must be called with intf_mutex held */
 553void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev)
 554{
 555        struct mlx5_lag *ldev = NULL;
 556        struct mlx5_core_dev *tmp_dev;
 557        int i, err;
 558
 559        if (!MLX5_CAP_GEN(dev, vport_group_manager))
 560                return;
 561
 562        tmp_dev = mlx5_get_next_phys_dev(dev);
 563        if (tmp_dev)
 564                ldev = tmp_dev->priv.lag;
 565
 566        if (!ldev) {
 567                ldev = mlx5_lag_dev_alloc();
 568                if (!ldev) {
 569                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
 570                        return;
 571                }
 572        }
 573
 574        if (mlx5_lag_dev_add_pf(ldev, dev, netdev) < 0)
 575                return;
 576
 577        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 578                tmp_dev = ldev->pf[i].dev;
 579                if (!tmp_dev || !MLX5_CAP_GEN(tmp_dev, lag_master) ||
 580                    MLX5_CAP_GEN(tmp_dev, num_lag_ports) != MLX5_MAX_PORTS)
 581                        break;
 582        }
 583
 584        if (i >= MLX5_MAX_PORTS)
 585                ldev->flags |= MLX5_LAG_FLAG_READY;
 586
 587        if (!ldev->nb.notifier_call) {
 588                ldev->nb.notifier_call = mlx5_lag_netdev_event;
 589                if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
 590                        ldev->nb.notifier_call = NULL;
 591                        mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 592                }
 593        }
 594
 595        err = mlx5_lag_mp_init(ldev);
 596        if (err)
 597                mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
 598                              err);
 599}
 600
 601/* Must be called with intf_mutex held */
 602void mlx5_lag_remove(struct mlx5_core_dev *dev)
 603{
 604        struct mlx5_lag *ldev;
 605        int i;
 606
 607        ldev = mlx5_lag_dev_get(dev);
 608        if (!ldev)
 609                return;
 610
 611        if (__mlx5_lag_is_active(ldev))
 612                mlx5_deactivate_lag(ldev);
 613
 614        mlx5_lag_dev_remove_pf(ldev, dev);
 615
 616        ldev->flags &= ~MLX5_LAG_FLAG_READY;
 617
 618        for (i = 0; i < MLX5_MAX_PORTS; i++)
 619                if (ldev->pf[i].dev)
 620                        break;
 621
 622        if (i == MLX5_MAX_PORTS) {
 623                if (ldev->nb.notifier_call) {
 624                        unregister_netdevice_notifier_net(&init_net, &ldev->nb);
 625                        ldev->nb.notifier_call = NULL;
 626                }
 627                mlx5_lag_mp_cleanup(ldev);
 628                cancel_delayed_work_sync(&ldev->bond_work);
 629                mlx5_lag_dev_free(ldev);
 630        }
 631}
 632
 633bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
 634{
 635        struct mlx5_lag *ldev;
 636        bool res;
 637
 638        spin_lock(&lag_lock);
 639        ldev = mlx5_lag_dev_get(dev);
 640        res  = ldev && __mlx5_lag_is_roce(ldev);
 641        spin_unlock(&lag_lock);
 642
 643        return res;
 644}
 645EXPORT_SYMBOL(mlx5_lag_is_roce);
 646
 647bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 648{
 649        struct mlx5_lag *ldev;
 650        bool res;
 651
 652        spin_lock(&lag_lock);
 653        ldev = mlx5_lag_dev_get(dev);
 654        res  = ldev && __mlx5_lag_is_active(ldev);
 655        spin_unlock(&lag_lock);
 656
 657        return res;
 658}
 659EXPORT_SYMBOL(mlx5_lag_is_active);
 660
 661bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 662{
 663        struct mlx5_lag *ldev;
 664        bool res;
 665
 666        spin_lock(&lag_lock);
 667        ldev = mlx5_lag_dev_get(dev);
 668        res  = ldev && __mlx5_lag_is_sriov(ldev);
 669        spin_unlock(&lag_lock);
 670
 671        return res;
 672}
 673EXPORT_SYMBOL(mlx5_lag_is_sriov);
 674
 675void mlx5_lag_update(struct mlx5_core_dev *dev)
 676{
 677        struct mlx5_lag *ldev;
 678
 679        mlx5_dev_list_lock();
 680        ldev = mlx5_lag_dev_get(dev);
 681        if (!ldev)
 682                goto unlock;
 683
 684        mlx5_do_bond(ldev);
 685
 686unlock:
 687        mlx5_dev_list_unlock();
 688}
 689
 690struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 691{
 692        struct net_device *ndev = NULL;
 693        struct mlx5_lag *ldev;
 694
 695        spin_lock(&lag_lock);
 696        ldev = mlx5_lag_dev_get(dev);
 697
 698        if (!(ldev && __mlx5_lag_is_roce(ldev)))
 699                goto unlock;
 700
 701        if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 702                ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
 703                       ldev->pf[MLX5_LAG_P1].netdev :
 704                       ldev->pf[MLX5_LAG_P2].netdev;
 705        } else {
 706                ndev = ldev->pf[MLX5_LAG_P1].netdev;
 707        }
 708        if (ndev)
 709                dev_hold(ndev);
 710
 711unlock:
 712        spin_unlock(&lag_lock);
 713
 714        return ndev;
 715}
 716EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 717
 718u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 719                           struct net_device *slave)
 720{
 721        struct mlx5_lag *ldev;
 722        u8 port = 0;
 723
 724        spin_lock(&lag_lock);
 725        ldev = mlx5_lag_dev_get(dev);
 726        if (!(ldev && __mlx5_lag_is_roce(ldev)))
 727                goto unlock;
 728
 729        if (ldev->pf[MLX5_LAG_P1].netdev == slave)
 730                port = MLX5_LAG_P1;
 731        else
 732                port = MLX5_LAG_P2;
 733
 734        port = ldev->v2p_map[port];
 735
 736unlock:
 737        spin_unlock(&lag_lock);
 738        return port;
 739}
 740EXPORT_SYMBOL(mlx5_lag_get_slave_port);
 741
 742bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
 743{
 744        struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
 745                                                 priv);
 746        struct mlx5_lag *ldev;
 747
 748        if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB)
 749                return true;
 750
 751        ldev = mlx5_lag_dev_get(dev);
 752        if (!ldev || !__mlx5_lag_is_roce(ldev) ||
 753            ldev->pf[MLX5_LAG_P1].dev == dev)
 754                return true;
 755
 756        /* If bonded, we do not add an IB device for PF1. */
 757        return false;
 758}
 759
 760int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 761                                 u64 *values,
 762                                 int num_counters,
 763                                 size_t *offsets)
 764{
 765        int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
 766        struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
 767        struct mlx5_lag *ldev;
 768        int num_ports;
 769        int ret, i, j;
 770        void *out;
 771
 772        out = kvzalloc(outlen, GFP_KERNEL);
 773        if (!out)
 774                return -ENOMEM;
 775
 776        memset(values, 0, sizeof(*values) * num_counters);
 777
 778        spin_lock(&lag_lock);
 779        ldev = mlx5_lag_dev_get(dev);
 780        if (ldev && __mlx5_lag_is_roce(ldev)) {
 781                num_ports = MLX5_MAX_PORTS;
 782                mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
 783                mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
 784        } else {
 785                num_ports = 1;
 786                mdev[MLX5_LAG_P1] = dev;
 787        }
 788        spin_unlock(&lag_lock);
 789
 790        for (i = 0; i < num_ports; ++i) {
 791                u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
 792
 793                MLX5_SET(query_cong_statistics_in, in, opcode,
 794                         MLX5_CMD_OP_QUERY_CONG_STATISTICS);
 795                ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
 796                                          out);
 797                if (ret)
 798                        goto free;
 799
 800                for (j = 0; j < num_counters; ++j)
 801                        values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
 802        }
 803
 804free:
 805        kvfree(out);
 806        return ret;
 807}
 808EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
 809