linux/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <linux/netdevice.h>
   5#include <net/nexthop.h>
   6#include "lag.h"
   7#include "lag_mp.h"
   8#include "mlx5_core.h"
   9#include "eswitch.h"
  10#include "lib/mlx5.h"
  11
  12static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev)
  13{
  14        return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH);
  15}
  16
  17static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev)
  18{
  19        if (!mlx5_lag_is_ready(ldev))
  20                return false;
  21
  22        if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev))
  23                return false;
  24
  25        return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev,
  26                                         ldev->pf[MLX5_LAG_P2].dev);
  27}
  28
  29bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev)
  30{
  31        struct mlx5_lag *ldev;
  32        bool res;
  33
  34        ldev = mlx5_lag_dev(dev);
  35        res  = ldev && __mlx5_lag_is_multipath(ldev);
  36
  37        return res;
  38}
  39
  40/**
  41 * mlx5_lag_set_port_affinity
  42 *
  43 * @ldev: lag device
  44 * @port:
  45 *     0 - set normal affinity.
  46 *     1 - set affinity to port 1.
  47 *     2 - set affinity to port 2.
  48 *
  49 **/
  50static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev,
  51                                       enum mlx5_lag_port_affinity port)
  52{
  53        struct lag_tracker tracker;
  54
  55        if (!__mlx5_lag_is_multipath(ldev))
  56                return;
  57
  58        switch (port) {
  59        case MLX5_LAG_NORMAL_AFFINITY:
  60                tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
  61                tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
  62                tracker.netdev_state[MLX5_LAG_P1].link_up = true;
  63                tracker.netdev_state[MLX5_LAG_P2].link_up = true;
  64                break;
  65        case MLX5_LAG_P1_AFFINITY:
  66                tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true;
  67                tracker.netdev_state[MLX5_LAG_P1].link_up = true;
  68                tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false;
  69                tracker.netdev_state[MLX5_LAG_P2].link_up = false;
  70                break;
  71        case MLX5_LAG_P2_AFFINITY:
  72                tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false;
  73                tracker.netdev_state[MLX5_LAG_P1].link_up = false;
  74                tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true;
  75                tracker.netdev_state[MLX5_LAG_P2].link_up = true;
  76                break;
  77        default:
  78                mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
  79                               "Invalid affinity port %d", port);
  80                return;
  81        }
  82
  83        if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled)
  84                mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events,
  85                                         MLX5_DEV_EVENT_PORT_AFFINITY,
  86                                         (void *)0);
  87
  88        if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled)
  89                mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events,
  90                                         MLX5_DEV_EVENT_PORT_AFFINITY,
  91                                         (void *)0);
  92
  93        mlx5_modify_lag(ldev, &tracker);
  94}
  95
  96static void mlx5_lag_fib_event_flush(struct notifier_block *nb)
  97{
  98        struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
  99
 100        flush_workqueue(mp->wq);
 101}
 102
 103struct mlx5_fib_event_work {
 104        struct work_struct work;
 105        struct mlx5_lag *ldev;
 106        unsigned long event;
 107        union {
 108                struct fib_entry_notifier_info fen_info;
 109                struct fib_nh_notifier_info fnh_info;
 110        };
 111};
 112
 113static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev,
 114                                     unsigned long event,
 115                                     struct fib_info *fi)
 116{
 117        struct lag_mp *mp = &ldev->lag_mp;
 118        struct fib_nh *fib_nh0, *fib_nh1;
 119        unsigned int nhs;
 120
 121        /* Handle delete event */
 122        if (event == FIB_EVENT_ENTRY_DEL) {
 123                /* stop track */
 124                if (mp->mfi == fi)
 125                        mp->mfi = NULL;
 126                return;
 127        }
 128
 129        /* Handle add/replace event */
 130        nhs = fib_info_num_path(fi);
 131        if (nhs == 1) {
 132                if (__mlx5_lag_is_active(ldev)) {
 133                        struct fib_nh *nh = fib_info_nh(fi, 0);
 134                        struct net_device *nh_dev = nh->fib_nh_dev;
 135                        int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev);
 136
 137                        if (i < 0)
 138                                i = MLX5_LAG_NORMAL_AFFINITY;
 139                        else
 140                                ++i;
 141
 142                        mlx5_lag_set_port_affinity(ldev, i);
 143                }
 144                return;
 145        }
 146
 147        if (nhs != 2)
 148                return;
 149
 150        /* Verify next hops are ports of the same hca */
 151        fib_nh0 = fib_info_nh(fi, 0);
 152        fib_nh1 = fib_info_nh(fi, 1);
 153        if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev &&
 154              fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) &&
 155            !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev &&
 156              fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) {
 157                mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev,
 158                               "Multipath offload require two ports of the same HCA\n");
 159                return;
 160        }
 161
 162        /* First time we see multipath route */
 163        if (!mp->mfi && !__mlx5_lag_is_active(ldev)) {
 164                struct lag_tracker tracker;
 165
 166                tracker = ldev->tracker;
 167                mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false);
 168        }
 169
 170        mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
 171        mp->mfi = fi;
 172}
 173
 174static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev,
 175                                       unsigned long event,
 176                                       struct fib_nh *fib_nh,
 177                                       struct fib_info *fi)
 178{
 179        struct lag_mp *mp = &ldev->lag_mp;
 180
 181        /* Check the nh event is related to the route */
 182        if (!mp->mfi || mp->mfi != fi)
 183                return;
 184
 185        /* nh added/removed */
 186        if (event == FIB_EVENT_NH_DEL) {
 187                int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev);
 188
 189                if (i >= 0) {
 190                        i = (i + 1) % 2 + 1; /* peer port */
 191                        mlx5_lag_set_port_affinity(ldev, i);
 192                }
 193        } else if (event == FIB_EVENT_NH_ADD &&
 194                   fib_info_num_path(fi) == 2) {
 195                mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY);
 196        }
 197}
 198
 199static void mlx5_lag_fib_update(struct work_struct *work)
 200{
 201        struct mlx5_fib_event_work *fib_work =
 202                container_of(work, struct mlx5_fib_event_work, work);
 203        struct mlx5_lag *ldev = fib_work->ldev;
 204        struct fib_nh *fib_nh;
 205
 206        /* Protect internal structures from changes */
 207        rtnl_lock();
 208        switch (fib_work->event) {
 209        case FIB_EVENT_ENTRY_REPLACE:
 210        case FIB_EVENT_ENTRY_DEL:
 211                mlx5_lag_fib_route_event(ldev, fib_work->event,
 212                                         fib_work->fen_info.fi);
 213                fib_info_put(fib_work->fen_info.fi);
 214                break;
 215        case FIB_EVENT_NH_ADD:
 216        case FIB_EVENT_NH_DEL:
 217                fib_nh = fib_work->fnh_info.fib_nh;
 218                mlx5_lag_fib_nexthop_event(ldev,
 219                                           fib_work->event,
 220                                           fib_work->fnh_info.fib_nh,
 221                                           fib_nh->nh_parent);
 222                fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
 223                break;
 224        }
 225
 226        rtnl_unlock();
 227        kfree(fib_work);
 228}
 229
 230static struct mlx5_fib_event_work *
 231mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event)
 232{
 233        struct mlx5_fib_event_work *fib_work;
 234
 235        fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
 236        if (WARN_ON(!fib_work))
 237                return NULL;
 238
 239        INIT_WORK(&fib_work->work, mlx5_lag_fib_update);
 240        fib_work->ldev = ldev;
 241        fib_work->event = event;
 242
 243        return fib_work;
 244}
 245
 246static int mlx5_lag_fib_event(struct notifier_block *nb,
 247                              unsigned long event,
 248                              void *ptr)
 249{
 250        struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb);
 251        struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp);
 252        struct fib_notifier_info *info = ptr;
 253        struct mlx5_fib_event_work *fib_work;
 254        struct fib_entry_notifier_info *fen_info;
 255        struct fib_nh_notifier_info *fnh_info;
 256        struct net_device *fib_dev;
 257        struct fib_info *fi;
 258
 259        if (info->family != AF_INET)
 260                return NOTIFY_DONE;
 261
 262        if (!mlx5_lag_multipath_check_prereq(ldev))
 263                return NOTIFY_DONE;
 264
 265        switch (event) {
 266        case FIB_EVENT_ENTRY_REPLACE:
 267        case FIB_EVENT_ENTRY_DEL:
 268                fen_info = container_of(info, struct fib_entry_notifier_info,
 269                                        info);
 270                fi = fen_info->fi;
 271                if (fi->nh) {
 272                        NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
 273                        return notifier_from_errno(-EINVAL);
 274                }
 275                fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
 276                if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev &&
 277                    fib_dev != ldev->pf[MLX5_LAG_P2].netdev) {
 278                        return NOTIFY_DONE;
 279                }
 280                fib_work = mlx5_lag_init_fib_work(ldev, event);
 281                if (!fib_work)
 282                        return NOTIFY_DONE;
 283                fib_work->fen_info = *fen_info;
 284                /* Take reference on fib_info to prevent it from being
 285                 * freed while work is queued. Release it afterwards.
 286                 */
 287                fib_info_hold(fib_work->fen_info.fi);
 288                break;
 289        case FIB_EVENT_NH_ADD:
 290        case FIB_EVENT_NH_DEL:
 291                fnh_info = container_of(info, struct fib_nh_notifier_info,
 292                                        info);
 293                fib_work = mlx5_lag_init_fib_work(ldev, event);
 294                if (!fib_work)
 295                        return NOTIFY_DONE;
 296                fib_work->fnh_info = *fnh_info;
 297                fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
 298                break;
 299        default:
 300                return NOTIFY_DONE;
 301        }
 302
 303        queue_work(mp->wq, &fib_work->work);
 304
 305        return NOTIFY_DONE;
 306}
 307
 308void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
 309{
 310        /* Clear mfi, as it might become stale when a route delete event
 311         * has been missed, see mlx5_lag_fib_route_event().
 312         */
 313        ldev->lag_mp.mfi = NULL;
 314}
 315
 316int mlx5_lag_mp_init(struct mlx5_lag *ldev)
 317{
 318        struct lag_mp *mp = &ldev->lag_mp;
 319        int err;
 320
 321        /* always clear mfi, as it might become stale when a route delete event
 322         * has been missed
 323         */
 324        mp->mfi = NULL;
 325
 326        if (mp->fib_nb.notifier_call)
 327                return 0;
 328
 329        mp->wq = create_singlethread_workqueue("mlx5_lag_mp");
 330        if (!mp->wq)
 331                return -ENOMEM;
 332
 333        mp->fib_nb.notifier_call = mlx5_lag_fib_event;
 334        err = register_fib_notifier(&init_net, &mp->fib_nb,
 335                                    mlx5_lag_fib_event_flush, NULL);
 336        if (err) {
 337                destroy_workqueue(mp->wq);
 338                mp->fib_nb.notifier_call = NULL;
 339        }
 340
 341        return err;
 342}
 343
 344void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev)
 345{
 346        struct lag_mp *mp = &ldev->lag_mp;
 347
 348        if (!mp->fib_nb.notifier_call)
 349                return;
 350
 351        unregister_fib_notifier(&init_net, &mp->fib_nb);
 352        destroy_workqueue(mp->wq);
 353        mp->fib_nb.notifier_call = NULL;
 354        mp->mfi = NULL;
 355}
 356