linux/drivers/net/ethernet/mellanox/mlx5/core/lag.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/netdevice.h>
  34#include <linux/mlx5/driver.h>
  35#include <linux/mlx5/vport.h>
  36#include "mlx5_core.h"
  37#include "eswitch.h"
  38#include "lag.h"
  39#include "lag_mp.h"
  40
  41/* General purpose, use for short periods of time.
  42 * Beware of lock dependencies (preferably, no locks should be acquired
  43 * under it).
  44 */
  45static DEFINE_SPINLOCK(lag_lock);
  46
  47static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  48                               u8 remap_port2)
  49{
  50        u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
  51        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
  52
  53        MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
  54
  55        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  56        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  57
  58        return mlx5_cmd_exec_in(dev, create_lag, in);
  59}
  60
  61static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  62                               u8 remap_port2)
  63{
  64        u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
  65        void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
  66
  67        MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
  68        MLX5_SET(modify_lag_in, in, field_select, 0x1);
  69
  70        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  71        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  72
  73        return mlx5_cmd_exec_in(dev, modify_lag, in);
  74}
  75
  76int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
  77{
  78        u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
  79
  80        MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
  81
  82        return mlx5_cmd_exec_in(dev, create_vport_lag, in);
  83}
  84EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
  85
  86int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
  87{
  88        u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
  89
  90        MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
  91
  92        return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
  93}
  94EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
  95
  96static int mlx5_lag_netdev_event(struct notifier_block *this,
  97                                 unsigned long event, void *ptr);
  98static void mlx5_do_bond_work(struct work_struct *work);
  99
 100static void mlx5_ldev_free(struct kref *ref)
 101{
 102        struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
 103
 104        if (ldev->nb.notifier_call)
 105                unregister_netdevice_notifier_net(&init_net, &ldev->nb);
 106        mlx5_lag_mp_cleanup(ldev);
 107        cancel_delayed_work_sync(&ldev->bond_work);
 108        destroy_workqueue(ldev->wq);
 109        kfree(ldev);
 110}
 111
 112static void mlx5_ldev_put(struct mlx5_lag *ldev)
 113{
 114        kref_put(&ldev->ref, mlx5_ldev_free);
 115}
 116
 117static void mlx5_ldev_get(struct mlx5_lag *ldev)
 118{
 119        kref_get(&ldev->ref);
 120}
 121
 122static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 123{
 124        struct mlx5_lag *ldev;
 125        int err;
 126
 127        ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
 128        if (!ldev)
 129                return NULL;
 130
 131        ldev->wq = create_singlethread_workqueue("mlx5_lag");
 132        if (!ldev->wq) {
 133                kfree(ldev);
 134                return NULL;
 135        }
 136
 137        kref_init(&ldev->ref);
 138        INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 139
 140        ldev->nb.notifier_call = mlx5_lag_netdev_event;
 141        if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
 142                ldev->nb.notifier_call = NULL;
 143                mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 144        }
 145
 146        err = mlx5_lag_mp_init(ldev);
 147        if (err)
 148                mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
 149                              err);
 150
 151        return ldev;
 152}
 153
 154int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 155                                struct net_device *ndev)
 156{
 157        int i;
 158
 159        for (i = 0; i < MLX5_MAX_PORTS; i++)
 160                if (ldev->pf[i].netdev == ndev)
 161                        return i;
 162
 163        return -ENOENT;
 164}
 165
 166static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
 167{
 168        return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
 169}
 170
 171static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
 172{
 173        return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
 174}
 175
 176static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 177                                           u8 *port1, u8 *port2)
 178{
 179        bool p1en;
 180        bool p2en;
 181
 182        p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
 183               tracker->netdev_state[MLX5_LAG_P1].link_up;
 184
 185        p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
 186               tracker->netdev_state[MLX5_LAG_P2].link_up;
 187
 188        *port1 = 1;
 189        *port2 = 2;
 190        if ((!p1en && !p2en) || (p1en && p2en))
 191                return;
 192
 193        if (p1en)
 194                *port2 = 1;
 195        else
 196                *port1 = 2;
 197}
 198
 199void mlx5_modify_lag(struct mlx5_lag *ldev,
 200                     struct lag_tracker *tracker)
 201{
 202        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 203        u8 v2p_port1, v2p_port2;
 204        int err;
 205
 206        mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
 207                                       &v2p_port2);
 208
 209        if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
 210            v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
 211                ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
 212                ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
 213
 214                mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
 215                               ldev->v2p_map[MLX5_LAG_P1],
 216                               ldev->v2p_map[MLX5_LAG_P2]);
 217
 218                err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
 219                if (err)
 220                        mlx5_core_err(dev0,
 221                                      "Failed to modify LAG (%d)\n",
 222                                      err);
 223        }
 224}
 225
 226static int mlx5_create_lag(struct mlx5_lag *ldev,
 227                           struct lag_tracker *tracker)
 228{
 229        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 230        int err;
 231
 232        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
 233                                       &ldev->v2p_map[MLX5_LAG_P2]);
 234
 235        mlx5_core_info(dev0, "lag map port 1:%d port 2:%d",
 236                       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]);
 237
 238        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
 239                                  ldev->v2p_map[MLX5_LAG_P2]);
 240        if (err)
 241                mlx5_core_err(dev0,
 242                              "Failed to create LAG (%d)\n",
 243                              err);
 244        return err;
 245}
 246
 247int mlx5_activate_lag(struct mlx5_lag *ldev,
 248                      struct lag_tracker *tracker,
 249                      u8 flags)
 250{
 251        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 252        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 253        int err;
 254
 255        err = mlx5_create_lag(ldev, tracker);
 256        if (err) {
 257                if (roce_lag) {
 258                        mlx5_core_err(dev0,
 259                                      "Failed to activate RoCE LAG\n");
 260                } else {
 261                        mlx5_core_err(dev0,
 262                                      "Failed to activate VF LAG\n"
 263                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 264                }
 265                return err;
 266        }
 267
 268        ldev->flags |= flags;
 269        return 0;
 270}
 271
 272static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 273{
 274        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 275        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 276        bool roce_lag = __mlx5_lag_is_roce(ldev);
 277        int err;
 278
 279        ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 280
 281        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 282        err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 283        if (err) {
 284                if (roce_lag) {
 285                        mlx5_core_err(dev0,
 286                                      "Failed to deactivate RoCE LAG; driver restart required\n");
 287                } else {
 288                        mlx5_core_err(dev0,
 289                                      "Failed to deactivate VF LAG; driver restart required\n"
 290                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 291                }
 292        }
 293
 294        return err;
 295}
 296
 297static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 298{
 299        if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
 300                return false;
 301
 302#ifdef CONFIG_MLX5_ESWITCH
 303        return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
 304                                   ldev->pf[MLX5_LAG_P2].dev);
 305#else
 306        return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
 307                !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
 308#endif
 309}
 310
 311static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
 312{
 313        int i;
 314
 315        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 316                if (!ldev->pf[i].dev)
 317                        continue;
 318
 319                if (ldev->pf[i].dev->priv.flags &
 320                    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
 321                        continue;
 322
 323                ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 324                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 325        }
 326}
 327
 328static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
 329{
 330        int i;
 331
 332        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 333                if (!ldev->pf[i].dev)
 334                        continue;
 335
 336                ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 337                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 338        }
 339}
 340
 341static void mlx5_disable_lag(struct mlx5_lag *ldev)
 342{
 343        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 344        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 345        bool roce_lag;
 346        int err;
 347
 348        roce_lag = __mlx5_lag_is_roce(ldev);
 349
 350        if (roce_lag) {
 351                if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
 352                        dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 353                        mlx5_rescan_drivers_locked(dev0);
 354                }
 355                mlx5_nic_vport_disable_roce(dev1);
 356        }
 357
 358        err = mlx5_deactivate_lag(ldev);
 359        if (err)
 360                return;
 361
 362        if (roce_lag)
 363                mlx5_lag_add_devices(ldev);
 364}
 365
 366static void mlx5_do_bond(struct mlx5_lag *ldev)
 367{
 368        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 369        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 370        struct lag_tracker tracker;
 371        bool do_bond, roce_lag;
 372        int err;
 373
 374        if (!mlx5_lag_is_ready(ldev))
 375                return;
 376
 377        tracker = ldev->tracker;
 378
 379        do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 380
 381        if (do_bond && !__mlx5_lag_is_active(ldev)) {
 382                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
 383                           !mlx5_sriov_is_enabled(dev1);
 384
 385#ifdef CONFIG_MLX5_ESWITCH
 386                roce_lag = roce_lag &&
 387                           dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
 388                           dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 389#endif
 390
 391                if (roce_lag)
 392                        mlx5_lag_remove_devices(ldev);
 393
 394                err = mlx5_activate_lag(ldev, &tracker,
 395                                        roce_lag ? MLX5_LAG_FLAG_ROCE :
 396                                        MLX5_LAG_FLAG_SRIOV);
 397                if (err) {
 398                        if (roce_lag)
 399                                mlx5_lag_add_devices(ldev);
 400
 401                        return;
 402                }
 403
 404                if (roce_lag) {
 405                        dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 406                        mlx5_rescan_drivers_locked(dev0);
 407                        mlx5_nic_vport_enable_roce(dev1);
 408                }
 409        } else if (do_bond && __mlx5_lag_is_active(ldev)) {
 410                mlx5_modify_lag(ldev, &tracker);
 411        } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
 412                mlx5_disable_lag(ldev);
 413        }
 414}
 415
 416static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 417{
 418        queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 419}
 420
 421static void mlx5_do_bond_work(struct work_struct *work)
 422{
 423        struct delayed_work *delayed_work = to_delayed_work(work);
 424        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
 425                                             bond_work);
 426        int status;
 427
 428        status = mlx5_dev_list_trylock();
 429        if (!status) {
 430                /* 1 sec delay. */
 431                mlx5_queue_bond_work(ldev, HZ);
 432                return;
 433        }
 434
 435        mlx5_do_bond(ldev);
 436        mlx5_dev_list_unlock();
 437}
 438
 439static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 440                                         struct lag_tracker *tracker,
 441                                         struct net_device *ndev,
 442                                         struct netdev_notifier_changeupper_info *info)
 443{
 444        struct net_device *upper = info->upper_dev, *ndev_tmp;
 445        struct netdev_lag_upper_info *lag_upper_info = NULL;
 446        bool is_bonded, is_in_lag, mode_supported;
 447        int bond_status = 0;
 448        int num_slaves = 0;
 449        int idx;
 450
 451        if (!netif_is_lag_master(upper))
 452                return 0;
 453
 454        if (info->linking)
 455                lag_upper_info = info->upper_info;
 456
 457        /* The event may still be of interest if the slave does not belong to
 458         * us, but is enslaved to a master which has one or more of our netdevs
 459         * as slaves (e.g., if a new slave is added to a master that bonds two
 460         * of our netdevs, we should unbond).
 461         */
 462        rcu_read_lock();
 463        for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
 464                idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
 465                if (idx >= 0)
 466                        bond_status |= (1 << idx);
 467
 468                num_slaves++;
 469        }
 470        rcu_read_unlock();
 471
 472        /* None of this lagdev's netdevs are slaves of this master. */
 473        if (!(bond_status & 0x3))
 474                return 0;
 475
 476        if (lag_upper_info)
 477                tracker->tx_type = lag_upper_info->tx_type;
 478
 479        /* Determine bonding status:
 480         * A device is considered bonded if both its physical ports are slaves
 481         * of the same lag master, and only them.
 482         */
 483        is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 484
 485        if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
 486                NL_SET_ERR_MSG_MOD(info->info.extack,
 487                                   "Can't activate LAG offload, PF is configured with more than 64 VFs");
 488                return 0;
 489        }
 490
 491        /* Lag mode must be activebackup or hash. */
 492        mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
 493                         tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
 494
 495        if (is_in_lag && !mode_supported)
 496                NL_SET_ERR_MSG_MOD(info->info.extack,
 497                                   "Can't activate LAG offload, TX type isn't supported");
 498
 499        is_bonded = is_in_lag && mode_supported;
 500        if (tracker->is_bonded != is_bonded) {
 501                tracker->is_bonded = is_bonded;
 502                return 1;
 503        }
 504
 505        return 0;
 506}
 507
 508static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
 509                                              struct lag_tracker *tracker,
 510                                              struct net_device *ndev,
 511                                              struct netdev_notifier_changelowerstate_info *info)
 512{
 513        struct netdev_lag_lower_state_info *lag_lower_info;
 514        int idx;
 515
 516        if (!netif_is_lag_port(ndev))
 517                return 0;
 518
 519        idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
 520        if (idx < 0)
 521                return 0;
 522
 523        /* This information is used to determine virtual to physical
 524         * port mapping.
 525         */
 526        lag_lower_info = info->lower_state_info;
 527        if (!lag_lower_info)
 528                return 0;
 529
 530        tracker->netdev_state[idx] = *lag_lower_info;
 531
 532        return 1;
 533}
 534
 535static int mlx5_lag_netdev_event(struct notifier_block *this,
 536                                 unsigned long event, void *ptr)
 537{
 538        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 539        struct lag_tracker tracker;
 540        struct mlx5_lag *ldev;
 541        int changed = 0;
 542
 543        if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
 544                return NOTIFY_DONE;
 545
 546        ldev    = container_of(this, struct mlx5_lag, nb);
 547
 548        if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
 549                return NOTIFY_DONE;
 550
 551        tracker = ldev->tracker;
 552
 553        switch (event) {
 554        case NETDEV_CHANGEUPPER:
 555                changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
 556                                                        ptr);
 557                break;
 558        case NETDEV_CHANGELOWERSTATE:
 559                changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
 560                                                             ndev, ptr);
 561                break;
 562        }
 563
 564        ldev->tracker = tracker;
 565
 566        if (changed)
 567                mlx5_queue_bond_work(ldev, 0);
 568
 569        return NOTIFY_DONE;
 570}
 571
 572static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
 573                                 struct mlx5_core_dev *dev,
 574                                 struct net_device *netdev)
 575{
 576        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 577
 578        if (fn >= MLX5_MAX_PORTS)
 579                return;
 580
 581        spin_lock(&lag_lock);
 582        ldev->pf[fn].netdev = netdev;
 583        ldev->tracker.netdev_state[fn].link_up = 0;
 584        ldev->tracker.netdev_state[fn].tx_enabled = 0;
 585        spin_unlock(&lag_lock);
 586}
 587
 588static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
 589                                    struct net_device *netdev)
 590{
 591        int i;
 592
 593        spin_lock(&lag_lock);
 594        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 595                if (ldev->pf[i].netdev == netdev) {
 596                        ldev->pf[i].netdev = NULL;
 597                        break;
 598                }
 599        }
 600        spin_unlock(&lag_lock);
 601}
 602
 603static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 604                               struct mlx5_core_dev *dev)
 605{
 606        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 607
 608        if (fn >= MLX5_MAX_PORTS)
 609                return;
 610
 611        ldev->pf[fn].dev = dev;
 612        dev->priv.lag = ldev;
 613}
 614
 615/* Must be called with intf_mutex held */
 616static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 617                                  struct mlx5_core_dev *dev)
 618{
 619        int i;
 620
 621        for (i = 0; i < MLX5_MAX_PORTS; i++)
 622                if (ldev->pf[i].dev == dev)
 623                        break;
 624
 625        if (i == MLX5_MAX_PORTS)
 626                return;
 627
 628        ldev->pf[i].dev = NULL;
 629        dev->priv.lag = NULL;
 630}
 631
 632/* Must be called with intf_mutex held */
 633static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 634{
 635        struct mlx5_lag *ldev = NULL;
 636        struct mlx5_core_dev *tmp_dev;
 637
 638        if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
 639            !MLX5_CAP_GEN(dev, lag_master) ||
 640            MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
 641                return;
 642
 643        tmp_dev = mlx5_get_next_phys_dev(dev);
 644        if (tmp_dev)
 645                ldev = tmp_dev->priv.lag;
 646
 647        if (!ldev) {
 648                ldev = mlx5_lag_dev_alloc(dev);
 649                if (!ldev) {
 650                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
 651                        return;
 652                }
 653        } else {
 654                mlx5_ldev_get(ldev);
 655        }
 656
 657        mlx5_ldev_add_mdev(ldev, dev);
 658
 659        return;
 660}
 661
 662void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 663{
 664        struct mlx5_lag *ldev;
 665
 666        ldev = mlx5_lag_dev(dev);
 667        if (!ldev)
 668                return;
 669
 670        mlx5_dev_list_lock();
 671        mlx5_ldev_remove_mdev(ldev, dev);
 672        mlx5_dev_list_unlock();
 673        mlx5_ldev_put(ldev);
 674}
 675
 676void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 677{
 678        mlx5_dev_list_lock();
 679        __mlx5_lag_dev_add_mdev(dev);
 680        mlx5_dev_list_unlock();
 681}
 682
 683/* Must be called with intf_mutex held */
 684void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
 685                            struct net_device *netdev)
 686{
 687        struct mlx5_lag *ldev;
 688
 689        ldev = mlx5_lag_dev(dev);
 690        if (!ldev)
 691                return;
 692
 693        if (__mlx5_lag_is_active(ldev))
 694                mlx5_disable_lag(ldev);
 695
 696        mlx5_ldev_remove_netdev(ldev, netdev);
 697        ldev->flags &= ~MLX5_LAG_FLAG_READY;
 698}
 699
 700/* Must be called with intf_mutex held */
 701void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
 702                         struct net_device *netdev)
 703{
 704        struct mlx5_lag *ldev;
 705        int i;
 706
 707        ldev = mlx5_lag_dev(dev);
 708        if (!ldev)
 709                return;
 710
 711        mlx5_ldev_add_netdev(ldev, dev, netdev);
 712
 713        for (i = 0; i < MLX5_MAX_PORTS; i++)
 714                if (!ldev->pf[i].dev)
 715                        break;
 716
 717        if (i >= MLX5_MAX_PORTS)
 718                ldev->flags |= MLX5_LAG_FLAG_READY;
 719}
 720
 721bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
 722{
 723        struct mlx5_lag *ldev;
 724        bool res;
 725
 726        spin_lock(&lag_lock);
 727        ldev = mlx5_lag_dev(dev);
 728        res  = ldev && __mlx5_lag_is_roce(ldev);
 729        spin_unlock(&lag_lock);
 730
 731        return res;
 732}
 733EXPORT_SYMBOL(mlx5_lag_is_roce);
 734
 735bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 736{
 737        struct mlx5_lag *ldev;
 738        bool res;
 739
 740        spin_lock(&lag_lock);
 741        ldev = mlx5_lag_dev(dev);
 742        res  = ldev && __mlx5_lag_is_active(ldev);
 743        spin_unlock(&lag_lock);
 744
 745        return res;
 746}
 747EXPORT_SYMBOL(mlx5_lag_is_active);
 748
 749bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 750{
 751        struct mlx5_lag *ldev;
 752        bool res;
 753
 754        spin_lock(&lag_lock);
 755        ldev = mlx5_lag_dev(dev);
 756        res  = ldev && __mlx5_lag_is_sriov(ldev);
 757        spin_unlock(&lag_lock);
 758
 759        return res;
 760}
 761EXPORT_SYMBOL(mlx5_lag_is_sriov);
 762
 763void mlx5_lag_update(struct mlx5_core_dev *dev)
 764{
 765        struct mlx5_lag *ldev;
 766
 767        mlx5_dev_list_lock();
 768        ldev = mlx5_lag_dev(dev);
 769        if (!ldev)
 770                goto unlock;
 771
 772        mlx5_do_bond(ldev);
 773
 774unlock:
 775        mlx5_dev_list_unlock();
 776}
 777
 778struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 779{
 780        struct net_device *ndev = NULL;
 781        struct mlx5_lag *ldev;
 782
 783        spin_lock(&lag_lock);
 784        ldev = mlx5_lag_dev(dev);
 785
 786        if (!(ldev && __mlx5_lag_is_roce(ldev)))
 787                goto unlock;
 788
 789        if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 790                ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
 791                       ldev->pf[MLX5_LAG_P1].netdev :
 792                       ldev->pf[MLX5_LAG_P2].netdev;
 793        } else {
 794                ndev = ldev->pf[MLX5_LAG_P1].netdev;
 795        }
 796        if (ndev)
 797                dev_hold(ndev);
 798
 799unlock:
 800        spin_unlock(&lag_lock);
 801
 802        return ndev;
 803}
 804EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 805
 806u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 807                           struct net_device *slave)
 808{
 809        struct mlx5_lag *ldev;
 810        u8 port = 0;
 811
 812        spin_lock(&lag_lock);
 813        ldev = mlx5_lag_dev(dev);
 814        if (!(ldev && __mlx5_lag_is_roce(ldev)))
 815                goto unlock;
 816
 817        if (ldev->pf[MLX5_LAG_P1].netdev == slave)
 818                port = MLX5_LAG_P1;
 819        else
 820                port = MLX5_LAG_P2;
 821
 822        port = ldev->v2p_map[port];
 823
 824unlock:
 825        spin_unlock(&lag_lock);
 826        return port;
 827}
 828EXPORT_SYMBOL(mlx5_lag_get_slave_port);
 829
 830int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 831                                 u64 *values,
 832                                 int num_counters,
 833                                 size_t *offsets)
 834{
 835        int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
 836        struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
 837        struct mlx5_lag *ldev;
 838        int num_ports;
 839        int ret, i, j;
 840        void *out;
 841
 842        out = kvzalloc(outlen, GFP_KERNEL);
 843        if (!out)
 844                return -ENOMEM;
 845
 846        memset(values, 0, sizeof(*values) * num_counters);
 847
 848        spin_lock(&lag_lock);
 849        ldev = mlx5_lag_dev(dev);
 850        if (ldev && __mlx5_lag_is_active(ldev)) {
 851                num_ports = MLX5_MAX_PORTS;
 852                mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
 853                mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
 854        } else {
 855                num_ports = 1;
 856                mdev[MLX5_LAG_P1] = dev;
 857        }
 858        spin_unlock(&lag_lock);
 859
 860        for (i = 0; i < num_ports; ++i) {
 861                u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
 862
 863                MLX5_SET(query_cong_statistics_in, in, opcode,
 864                         MLX5_CMD_OP_QUERY_CONG_STATISTICS);
 865                ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
 866                                          out);
 867                if (ret)
 868                        goto free;
 869
 870                for (j = 0; j < num_counters; ++j)
 871                        values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
 872        }
 873
 874free:
 875        kvfree(out);
 876        return ret;
 877}
 878EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
 879