linux/drivers/net/ethernet/mellanox/mlx5/core/lag.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <linux/netdevice.h>
  34#include <linux/mlx5/driver.h>
  35#include <linux/mlx5/eswitch.h>
  36#include <linux/mlx5/vport.h>
  37#include "lib/devcom.h"
  38#include "mlx5_core.h"
  39#include "eswitch.h"
  40#include "lag.h"
  41#include "lag_mp.h"
  42
  43/* General purpose, use for short periods of time.
  44 * Beware of lock dependencies (preferably, no locks should be acquired
  45 * under it).
  46 */
  47static DEFINE_SPINLOCK(lag_lock);
  48
  49static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  50                               u8 remap_port2, bool shared_fdb)
  51{
  52        u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
  53        void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
  54
  55        MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
  56
  57        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  58        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  59        MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
  60
  61        return mlx5_cmd_exec_in(dev, create_lag, in);
  62}
  63
  64static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
  65                               u8 remap_port2)
  66{
  67        u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
  68        void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
  69
  70        MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
  71        MLX5_SET(modify_lag_in, in, field_select, 0x1);
  72
  73        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
  74        MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
  75
  76        return mlx5_cmd_exec_in(dev, modify_lag, in);
  77}
  78
  79int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
  80{
  81        u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
  82
  83        MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
  84
  85        return mlx5_cmd_exec_in(dev, create_vport_lag, in);
  86}
  87EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
  88
  89int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
  90{
  91        u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
  92
  93        MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
  94
  95        return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
  96}
  97EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
  98
  99static int mlx5_lag_netdev_event(struct notifier_block *this,
 100                                 unsigned long event, void *ptr);
 101static void mlx5_do_bond_work(struct work_struct *work);
 102
 103static void mlx5_ldev_free(struct kref *ref)
 104{
 105        struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
 106
 107        if (ldev->nb.notifier_call)
 108                unregister_netdevice_notifier_net(&init_net, &ldev->nb);
 109        mlx5_lag_mp_cleanup(ldev);
 110        cancel_delayed_work_sync(&ldev->bond_work);
 111        destroy_workqueue(ldev->wq);
 112        kfree(ldev);
 113}
 114
 115static void mlx5_ldev_put(struct mlx5_lag *ldev)
 116{
 117        kref_put(&ldev->ref, mlx5_ldev_free);
 118}
 119
 120static void mlx5_ldev_get(struct mlx5_lag *ldev)
 121{
 122        kref_get(&ldev->ref);
 123}
 124
 125static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 126{
 127        struct mlx5_lag *ldev;
 128        int err;
 129
 130        ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
 131        if (!ldev)
 132                return NULL;
 133
 134        ldev->wq = create_singlethread_workqueue("mlx5_lag");
 135        if (!ldev->wq) {
 136                kfree(ldev);
 137                return NULL;
 138        }
 139
 140        kref_init(&ldev->ref);
 141        INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
 142
 143        ldev->nb.notifier_call = mlx5_lag_netdev_event;
 144        if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
 145                ldev->nb.notifier_call = NULL;
 146                mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
 147        }
 148
 149        err = mlx5_lag_mp_init(ldev);
 150        if (err)
 151                mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
 152                              err);
 153
 154        return ldev;
 155}
 156
 157int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 158                                struct net_device *ndev)
 159{
 160        int i;
 161
 162        for (i = 0; i < MLX5_MAX_PORTS; i++)
 163                if (ldev->pf[i].netdev == ndev)
 164                        return i;
 165
 166        return -ENOENT;
 167}
 168
 169static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
 170{
 171        return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
 172}
 173
 174static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
 175{
 176        return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
 177}
 178
 179static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
 180                                           u8 *port1, u8 *port2)
 181{
 182        bool p1en;
 183        bool p2en;
 184
 185        p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
 186               tracker->netdev_state[MLX5_LAG_P1].link_up;
 187
 188        p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
 189               tracker->netdev_state[MLX5_LAG_P2].link_up;
 190
 191        *port1 = 1;
 192        *port2 = 2;
 193        if ((!p1en && !p2en) || (p1en && p2en))
 194                return;
 195
 196        if (p1en)
 197                *port2 = 1;
 198        else
 199                *port1 = 2;
 200}
 201
 202void mlx5_modify_lag(struct mlx5_lag *ldev,
 203                     struct lag_tracker *tracker)
 204{
 205        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 206        u8 v2p_port1, v2p_port2;
 207        int err;
 208
 209        mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
 210                                       &v2p_port2);
 211
 212        if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
 213            v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
 214                ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
 215                ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
 216
 217                mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
 218                               ldev->v2p_map[MLX5_LAG_P1],
 219                               ldev->v2p_map[MLX5_LAG_P2]);
 220
 221                err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
 222                if (err)
 223                        mlx5_core_err(dev0,
 224                                      "Failed to modify LAG (%d)\n",
 225                                      err);
 226        }
 227}
 228
 229static int mlx5_create_lag(struct mlx5_lag *ldev,
 230                           struct lag_tracker *tracker,
 231                           bool shared_fdb)
 232{
 233        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 234        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 235        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 236        int err;
 237
 238        mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
 239                                       &ldev->v2p_map[MLX5_LAG_P2]);
 240
 241        mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
 242                       ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
 243                       shared_fdb);
 244
 245        err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
 246                                  ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
 247        if (err) {
 248                mlx5_core_err(dev0,
 249                              "Failed to create LAG (%d)\n",
 250                              err);
 251                return err;
 252        }
 253
 254        if (shared_fdb) {
 255                err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
 256                                                              dev1->priv.eswitch);
 257                if (err)
 258                        mlx5_core_err(dev0, "Can't enable single FDB mode\n");
 259                else
 260                        mlx5_core_info(dev0, "Operation mode is single FDB\n");
 261        }
 262
 263        if (err) {
 264                MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 265                if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
 266                        mlx5_core_err(dev0,
 267                                      "Failed to deactivate RoCE LAG; driver restart required\n");
 268        }
 269
 270        return err;
 271}
 272
 273int mlx5_activate_lag(struct mlx5_lag *ldev,
 274                      struct lag_tracker *tracker,
 275                      u8 flags,
 276                      bool shared_fdb)
 277{
 278        bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
 279        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 280        int err;
 281
 282        err = mlx5_create_lag(ldev, tracker, shared_fdb);
 283        if (err) {
 284                if (roce_lag) {
 285                        mlx5_core_err(dev0,
 286                                      "Failed to activate RoCE LAG\n");
 287                } else {
 288                        mlx5_core_err(dev0,
 289                                      "Failed to activate VF LAG\n"
 290                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 291                }
 292                return err;
 293        }
 294
 295        ldev->flags |= flags;
 296        ldev->shared_fdb = shared_fdb;
 297        return 0;
 298}
 299
 300static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 301{
 302        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 303        u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 304        bool roce_lag = __mlx5_lag_is_roce(ldev);
 305        int err;
 306
 307        ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 308        mlx5_lag_mp_reset(ldev);
 309
 310        if (ldev->shared_fdb) {
 311                mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
 312                                                         ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
 313                ldev->shared_fdb = false;
 314        }
 315
 316        MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
 317        err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 318        if (err) {
 319                if (roce_lag) {
 320                        mlx5_core_err(dev0,
 321                                      "Failed to deactivate RoCE LAG; driver restart required\n");
 322                } else {
 323                        mlx5_core_err(dev0,
 324                                      "Failed to deactivate VF LAG; driver restart required\n"
 325                                      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
 326                }
 327        }
 328
 329        return err;
 330}
 331
 332static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
 333{
 334        if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
 335                return false;
 336
 337#ifdef CONFIG_MLX5_ESWITCH
 338        return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
 339                                   ldev->pf[MLX5_LAG_P2].dev);
 340#else
 341        return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
 342                !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
 343#endif
 344}
 345
 346static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
 347{
 348        int i;
 349
 350        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 351                if (!ldev->pf[i].dev)
 352                        continue;
 353
 354                if (ldev->pf[i].dev->priv.flags &
 355                    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
 356                        continue;
 357
 358                ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 359                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 360        }
 361}
 362
 363static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
 364{
 365        int i;
 366
 367        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 368                if (!ldev->pf[i].dev)
 369                        continue;
 370
 371                if (ldev->pf[i].dev->priv.flags &
 372                    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
 373                        continue;
 374
 375                ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 376                mlx5_rescan_drivers_locked(ldev->pf[i].dev);
 377        }
 378}
 379
 380static void mlx5_disable_lag(struct mlx5_lag *ldev)
 381{
 382        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 383        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 384        bool shared_fdb = ldev->shared_fdb;
 385        bool roce_lag;
 386        int err;
 387
 388        roce_lag = __mlx5_lag_is_roce(ldev);
 389
 390        if (shared_fdb) {
 391                mlx5_lag_remove_devices(ldev);
 392        } else if (roce_lag) {
 393                if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
 394                        dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 395                        mlx5_rescan_drivers_locked(dev0);
 396                }
 397                mlx5_nic_vport_disable_roce(dev1);
 398        }
 399
 400        err = mlx5_deactivate_lag(ldev);
 401        if (err)
 402                return;
 403
 404        if (shared_fdb || roce_lag)
 405                mlx5_lag_add_devices(ldev);
 406
 407        if (shared_fdb) {
 408                if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
 409                        mlx5_eswitch_reload_reps(dev0->priv.eswitch);
 410                if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
 411                        mlx5_eswitch_reload_reps(dev1->priv.eswitch);
 412        }
 413}
 414
 415static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
 416{
 417        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 418        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 419
 420        if (is_mdev_switchdev_mode(dev0) &&
 421            is_mdev_switchdev_mode(dev1) &&
 422            mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
 423            mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
 424            mlx5_devcom_is_paired(dev0->priv.devcom,
 425                                  MLX5_DEVCOM_ESW_OFFLOADS) &&
 426            MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
 427            MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
 428            MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
 429                return true;
 430
 431        return false;
 432}
 433
 434static void mlx5_do_bond(struct mlx5_lag *ldev)
 435{
 436        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 437        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 438        struct lag_tracker tracker;
 439        bool do_bond, roce_lag;
 440        int err;
 441
 442        if (!mlx5_lag_is_ready(ldev)) {
 443                do_bond = false;
 444        } else {
 445                /* VF LAG is in multipath mode, ignore bond change requests */
 446                if (mlx5_lag_is_multipath(dev0))
 447                        return;
 448
 449                tracker = ldev->tracker;
 450
 451                do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 452        }
 453
 454        if (do_bond && !__mlx5_lag_is_active(ldev)) {
 455                bool shared_fdb = mlx5_shared_fdb_supported(ldev);
 456
 457                roce_lag = !mlx5_sriov_is_enabled(dev0) &&
 458                           !mlx5_sriov_is_enabled(dev1);
 459
 460#ifdef CONFIG_MLX5_ESWITCH
 461                roce_lag = roce_lag &&
 462                           dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
 463                           dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
 464#endif
 465
 466                if (shared_fdb || roce_lag)
 467                        mlx5_lag_remove_devices(ldev);
 468
 469                err = mlx5_activate_lag(ldev, &tracker,
 470                                        roce_lag ? MLX5_LAG_FLAG_ROCE :
 471                                                   MLX5_LAG_FLAG_SRIOV,
 472                                        shared_fdb);
 473                if (err) {
 474                        if (shared_fdb || roce_lag)
 475                                mlx5_lag_add_devices(ldev);
 476
 477                        return;
 478                } else if (roce_lag) {
 479                        dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 480                        mlx5_rescan_drivers_locked(dev0);
 481                        mlx5_nic_vport_enable_roce(dev1);
 482                } else if (shared_fdb) {
 483                        dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 484                        mlx5_rescan_drivers_locked(dev0);
 485
 486                        err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
 487                        if (!err)
 488                                err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
 489
 490                        if (err) {
 491                                dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
 492                                mlx5_rescan_drivers_locked(dev0);
 493                                mlx5_deactivate_lag(ldev);
 494                                mlx5_lag_add_devices(ldev);
 495                                mlx5_eswitch_reload_reps(dev0->priv.eswitch);
 496                                mlx5_eswitch_reload_reps(dev1->priv.eswitch);
 497                                mlx5_core_err(dev0, "Failed to enable lag\n");
 498                                return;
 499                        }
 500                }
 501        } else if (do_bond && __mlx5_lag_is_active(ldev)) {
 502                mlx5_modify_lag(ldev, &tracker);
 503        } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
 504                mlx5_disable_lag(ldev);
 505        }
 506}
 507
 508static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
 509{
 510        queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
 511}
 512
 513static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
 514                                    struct mlx5_core_dev *dev1)
 515{
 516        if (dev0)
 517                mlx5_esw_lock(dev0->priv.eswitch);
 518        if (dev1)
 519                mlx5_esw_lock(dev1->priv.eswitch);
 520}
 521
 522static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
 523                                      struct mlx5_core_dev *dev1)
 524{
 525        if (dev1)
 526                mlx5_esw_unlock(dev1->priv.eswitch);
 527        if (dev0)
 528                mlx5_esw_unlock(dev0->priv.eswitch);
 529}
 530
 531static void mlx5_do_bond_work(struct work_struct *work)
 532{
 533        struct delayed_work *delayed_work = to_delayed_work(work);
 534        struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
 535                                             bond_work);
 536        struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
 537        struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
 538        int status;
 539
 540        status = mlx5_dev_list_trylock();
 541        if (!status) {
 542                mlx5_queue_bond_work(ldev, HZ);
 543                return;
 544        }
 545
 546        if (ldev->mode_changes_in_progress) {
 547                mlx5_dev_list_unlock();
 548                mlx5_queue_bond_work(ldev, HZ);
 549                return;
 550        }
 551
 552        mlx5_lag_lock_eswitches(dev0, dev1);
 553        mlx5_do_bond(ldev);
 554        mlx5_lag_unlock_eswitches(dev0, dev1);
 555        mlx5_dev_list_unlock();
 556}
 557
 558static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
 559                                         struct lag_tracker *tracker,
 560                                         struct net_device *ndev,
 561                                         struct netdev_notifier_changeupper_info *info)
 562{
 563        struct net_device *upper = info->upper_dev, *ndev_tmp;
 564        struct netdev_lag_upper_info *lag_upper_info = NULL;
 565        bool is_bonded, is_in_lag, mode_supported;
 566        int bond_status = 0;
 567        int num_slaves = 0;
 568        int idx;
 569
 570        if (!netif_is_lag_master(upper))
 571                return 0;
 572
 573        if (info->linking)
 574                lag_upper_info = info->upper_info;
 575
 576        /* The event may still be of interest if the slave does not belong to
 577         * us, but is enslaved to a master which has one or more of our netdevs
 578         * as slaves (e.g., if a new slave is added to a master that bonds two
 579         * of our netdevs, we should unbond).
 580         */
 581        rcu_read_lock();
 582        for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
 583                idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
 584                if (idx >= 0)
 585                        bond_status |= (1 << idx);
 586
 587                num_slaves++;
 588        }
 589        rcu_read_unlock();
 590
 591        /* None of this lagdev's netdevs are slaves of this master. */
 592        if (!(bond_status & 0x3))
 593                return 0;
 594
 595        if (lag_upper_info)
 596                tracker->tx_type = lag_upper_info->tx_type;
 597
 598        /* Determine bonding status:
 599         * A device is considered bonded if both its physical ports are slaves
 600         * of the same lag master, and only them.
 601         */
 602        is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
 603
 604        if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
 605                NL_SET_ERR_MSG_MOD(info->info.extack,
 606                                   "Can't activate LAG offload, PF is configured with more than 64 VFs");
 607                return 0;
 608        }
 609
 610        /* Lag mode must be activebackup or hash. */
 611        mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
 612                         tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
 613
 614        if (is_in_lag && !mode_supported)
 615                NL_SET_ERR_MSG_MOD(info->info.extack,
 616                                   "Can't activate LAG offload, TX type isn't supported");
 617
 618        is_bonded = is_in_lag && mode_supported;
 619        if (tracker->is_bonded != is_bonded) {
 620                tracker->is_bonded = is_bonded;
 621                return 1;
 622        }
 623
 624        return 0;
 625}
 626
 627static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
 628                                              struct lag_tracker *tracker,
 629                                              struct net_device *ndev,
 630                                              struct netdev_notifier_changelowerstate_info *info)
 631{
 632        struct netdev_lag_lower_state_info *lag_lower_info;
 633        int idx;
 634
 635        if (!netif_is_lag_port(ndev))
 636                return 0;
 637
 638        idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
 639        if (idx < 0)
 640                return 0;
 641
 642        /* This information is used to determine virtual to physical
 643         * port mapping.
 644         */
 645        lag_lower_info = info->lower_state_info;
 646        if (!lag_lower_info)
 647                return 0;
 648
 649        tracker->netdev_state[idx] = *lag_lower_info;
 650
 651        return 1;
 652}
 653
 654static int mlx5_lag_netdev_event(struct notifier_block *this,
 655                                 unsigned long event, void *ptr)
 656{
 657        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
 658        struct lag_tracker tracker;
 659        struct mlx5_lag *ldev;
 660        int changed = 0;
 661
 662        if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
 663                return NOTIFY_DONE;
 664
 665        ldev    = container_of(this, struct mlx5_lag, nb);
 666
 667        if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
 668                return NOTIFY_DONE;
 669
 670        tracker = ldev->tracker;
 671
 672        switch (event) {
 673        case NETDEV_CHANGEUPPER:
 674                changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
 675                                                        ptr);
 676                break;
 677        case NETDEV_CHANGELOWERSTATE:
 678                changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
 679                                                             ndev, ptr);
 680                break;
 681        }
 682
 683        ldev->tracker = tracker;
 684
 685        if (changed)
 686                mlx5_queue_bond_work(ldev, 0);
 687
 688        return NOTIFY_DONE;
 689}
 690
 691static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
 692                                 struct mlx5_core_dev *dev,
 693                                 struct net_device *netdev)
 694{
 695        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 696
 697        if (fn >= MLX5_MAX_PORTS)
 698                return;
 699
 700        spin_lock(&lag_lock);
 701        ldev->pf[fn].netdev = netdev;
 702        ldev->tracker.netdev_state[fn].link_up = 0;
 703        ldev->tracker.netdev_state[fn].tx_enabled = 0;
 704        spin_unlock(&lag_lock);
 705}
 706
 707static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
 708                                    struct net_device *netdev)
 709{
 710        int i;
 711
 712        spin_lock(&lag_lock);
 713        for (i = 0; i < MLX5_MAX_PORTS; i++) {
 714                if (ldev->pf[i].netdev == netdev) {
 715                        ldev->pf[i].netdev = NULL;
 716                        break;
 717                }
 718        }
 719        spin_unlock(&lag_lock);
 720}
 721
 722static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 723                               struct mlx5_core_dev *dev)
 724{
 725        unsigned int fn = PCI_FUNC(dev->pdev->devfn);
 726
 727        if (fn >= MLX5_MAX_PORTS)
 728                return;
 729
 730        ldev->pf[fn].dev = dev;
 731        dev->priv.lag = ldev;
 732}
 733
 734/* Must be called with intf_mutex held */
 735static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 736                                  struct mlx5_core_dev *dev)
 737{
 738        int i;
 739
 740        for (i = 0; i < MLX5_MAX_PORTS; i++)
 741                if (ldev->pf[i].dev == dev)
 742                        break;
 743
 744        if (i == MLX5_MAX_PORTS)
 745                return;
 746
 747        ldev->pf[i].dev = NULL;
 748        dev->priv.lag = NULL;
 749}
 750
 751/* Must be called with intf_mutex held */
 752static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
 753{
 754        struct mlx5_lag *ldev = NULL;
 755        struct mlx5_core_dev *tmp_dev;
 756
 757        if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
 758            !MLX5_CAP_GEN(dev, lag_master) ||
 759            MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
 760                return 0;
 761
 762        tmp_dev = mlx5_get_next_phys_dev(dev);
 763        if (tmp_dev)
 764                ldev = tmp_dev->priv.lag;
 765
 766        if (!ldev) {
 767                ldev = mlx5_lag_dev_alloc(dev);
 768                if (!ldev) {
 769                        mlx5_core_err(dev, "Failed to alloc lag dev\n");
 770                        return 0;
 771                }
 772        } else {
 773                if (ldev->mode_changes_in_progress)
 774                        return -EAGAIN;
 775                mlx5_ldev_get(ldev);
 776        }
 777
 778        mlx5_ldev_add_mdev(ldev, dev);
 779
 780        return 0;
 781}
 782
 783void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
 784{
 785        struct mlx5_lag *ldev;
 786
 787        ldev = mlx5_lag_dev(dev);
 788        if (!ldev)
 789                return;
 790
 791recheck:
 792        mlx5_dev_list_lock();
 793        if (ldev->mode_changes_in_progress) {
 794                mlx5_dev_list_unlock();
 795                msleep(100);
 796                goto recheck;
 797        }
 798        mlx5_ldev_remove_mdev(ldev, dev);
 799        mlx5_dev_list_unlock();
 800        mlx5_ldev_put(ldev);
 801}
 802
 803void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
 804{
 805        int err;
 806
 807recheck:
 808        mlx5_dev_list_lock();
 809        err = __mlx5_lag_dev_add_mdev(dev);
 810        if (err) {
 811                mlx5_dev_list_unlock();
 812                msleep(100);
 813                goto recheck;
 814        }
 815        mlx5_dev_list_unlock();
 816}
 817
 818/* Must be called with intf_mutex held */
 819void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
 820                            struct net_device *netdev)
 821{
 822        struct mlx5_lag *ldev;
 823
 824        ldev = mlx5_lag_dev(dev);
 825        if (!ldev)
 826                return;
 827
 828        mlx5_ldev_remove_netdev(ldev, netdev);
 829        ldev->flags &= ~MLX5_LAG_FLAG_READY;
 830
 831        if (__mlx5_lag_is_active(ldev))
 832                mlx5_queue_bond_work(ldev, 0);
 833}
 834
 835/* Must be called with intf_mutex held */
 836void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
 837                         struct net_device *netdev)
 838{
 839        struct mlx5_lag *ldev;
 840        int i;
 841
 842        ldev = mlx5_lag_dev(dev);
 843        if (!ldev)
 844                return;
 845
 846        mlx5_ldev_add_netdev(ldev, dev, netdev);
 847
 848        for (i = 0; i < MLX5_MAX_PORTS; i++)
 849                if (!ldev->pf[i].dev)
 850                        break;
 851
 852        if (i >= MLX5_MAX_PORTS)
 853                ldev->flags |= MLX5_LAG_FLAG_READY;
 854        mlx5_queue_bond_work(ldev, 0);
 855}
 856
 857bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
 858{
 859        struct mlx5_lag *ldev;
 860        bool res;
 861
 862        spin_lock(&lag_lock);
 863        ldev = mlx5_lag_dev(dev);
 864        res  = ldev && __mlx5_lag_is_roce(ldev);
 865        spin_unlock(&lag_lock);
 866
 867        return res;
 868}
 869EXPORT_SYMBOL(mlx5_lag_is_roce);
 870
 871bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 872{
 873        struct mlx5_lag *ldev;
 874        bool res;
 875
 876        spin_lock(&lag_lock);
 877        ldev = mlx5_lag_dev(dev);
 878        res  = ldev && __mlx5_lag_is_active(ldev);
 879        spin_unlock(&lag_lock);
 880
 881        return res;
 882}
 883EXPORT_SYMBOL(mlx5_lag_is_active);
 884
 885bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
 886{
 887        struct mlx5_lag *ldev;
 888        bool res;
 889
 890        spin_lock(&lag_lock);
 891        ldev = mlx5_lag_dev(dev);
 892        res = ldev && __mlx5_lag_is_active(ldev) &&
 893                dev == ldev->pf[MLX5_LAG_P1].dev;
 894        spin_unlock(&lag_lock);
 895
 896        return res;
 897}
 898EXPORT_SYMBOL(mlx5_lag_is_master);
 899
 900bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 901{
 902        struct mlx5_lag *ldev;
 903        bool res;
 904
 905        spin_lock(&lag_lock);
 906        ldev = mlx5_lag_dev(dev);
 907        res  = ldev && __mlx5_lag_is_sriov(ldev);
 908        spin_unlock(&lag_lock);
 909
 910        return res;
 911}
 912EXPORT_SYMBOL(mlx5_lag_is_sriov);
 913
 914bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
 915{
 916        struct mlx5_lag *ldev;
 917        bool res;
 918
 919        spin_lock(&lag_lock);
 920        ldev = mlx5_lag_dev(dev);
 921        res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
 922        spin_unlock(&lag_lock);
 923
 924        return res;
 925}
 926EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
 927
 928void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
 929{
 930        struct mlx5_core_dev *dev0;
 931        struct mlx5_core_dev *dev1;
 932        struct mlx5_lag *ldev;
 933
 934        ldev = mlx5_lag_dev(dev);
 935        if (!ldev)
 936                return;
 937
 938        mlx5_dev_list_lock();
 939
 940        dev0 = ldev->pf[MLX5_LAG_P1].dev;
 941        dev1 = ldev->pf[MLX5_LAG_P2].dev;
 942
 943        ldev->mode_changes_in_progress++;
 944        if (__mlx5_lag_is_active(ldev)) {
 945                mlx5_lag_lock_eswitches(dev0, dev1);
 946                mlx5_disable_lag(ldev);
 947                mlx5_lag_unlock_eswitches(dev0, dev1);
 948        }
 949        mlx5_dev_list_unlock();
 950}
 951
 952void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
 953{
 954        struct mlx5_lag *ldev;
 955
 956        ldev = mlx5_lag_dev(dev);
 957        if (!ldev)
 958                return;
 959
 960        mlx5_dev_list_lock();
 961        ldev->mode_changes_in_progress--;
 962        mlx5_dev_list_unlock();
 963        mlx5_queue_bond_work(ldev, 0);
 964}
 965
 966struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 967{
 968        struct net_device *ndev = NULL;
 969        struct mlx5_lag *ldev;
 970
 971        spin_lock(&lag_lock);
 972        ldev = mlx5_lag_dev(dev);
 973
 974        if (!(ldev && __mlx5_lag_is_roce(ldev)))
 975                goto unlock;
 976
 977        if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
 978                ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
 979                       ldev->pf[MLX5_LAG_P1].netdev :
 980                       ldev->pf[MLX5_LAG_P2].netdev;
 981        } else {
 982                ndev = ldev->pf[MLX5_LAG_P1].netdev;
 983        }
 984        if (ndev)
 985                dev_hold(ndev);
 986
 987unlock:
 988        spin_unlock(&lag_lock);
 989
 990        return ndev;
 991}
 992EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 993
 994u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
 995                           struct net_device *slave)
 996{
 997        struct mlx5_lag *ldev;
 998        u8 port = 0;
 999
1000        spin_lock(&lag_lock);
1001        ldev = mlx5_lag_dev(dev);
1002        if (!(ldev && __mlx5_lag_is_roce(ldev)))
1003                goto unlock;
1004
1005        if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1006                port = MLX5_LAG_P1;
1007        else
1008                port = MLX5_LAG_P2;
1009
1010        port = ldev->v2p_map[port];
1011
1012unlock:
1013        spin_unlock(&lag_lock);
1014        return port;
1015}
1016EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1017
1018struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1019{
1020        struct mlx5_core_dev *peer_dev = NULL;
1021        struct mlx5_lag *ldev;
1022
1023        spin_lock(&lag_lock);
1024        ldev = mlx5_lag_dev(dev);
1025        if (!ldev)
1026                goto unlock;
1027
1028        peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1029                           ldev->pf[MLX5_LAG_P2].dev :
1030                           ldev->pf[MLX5_LAG_P1].dev;
1031
1032unlock:
1033        spin_unlock(&lag_lock);
1034        return peer_dev;
1035}
1036EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1037
1038int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1039                                 u64 *values,
1040                                 int num_counters,
1041                                 size_t *offsets)
1042{
1043        int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1044        struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1045        struct mlx5_lag *ldev;
1046        int num_ports;
1047        int ret, i, j;
1048        void *out;
1049
1050        out = kvzalloc(outlen, GFP_KERNEL);
1051        if (!out)
1052                return -ENOMEM;
1053
1054        memset(values, 0, sizeof(*values) * num_counters);
1055
1056        spin_lock(&lag_lock);
1057        ldev = mlx5_lag_dev(dev);
1058        if (ldev && __mlx5_lag_is_active(ldev)) {
1059                num_ports = MLX5_MAX_PORTS;
1060                mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1061                mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1062        } else {
1063                num_ports = 1;
1064                mdev[MLX5_LAG_P1] = dev;
1065        }
1066        spin_unlock(&lag_lock);
1067
1068        for (i = 0; i < num_ports; ++i) {
1069                u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1070
1071                MLX5_SET(query_cong_statistics_in, in, opcode,
1072                         MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1073                ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1074                                          out);
1075                if (ret)
1076                        goto free;
1077
1078                for (j = 0; j < num_counters; ++j)
1079                        values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1080        }
1081
1082free:
1083        kvfree(out);
1084        return ret;
1085}
1086EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1087