linux/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
   3
   4#include <linux/netdevice.h>
   5#include <linux/list.h>
   6#include <net/lag.h>
   7
   8#include "mlx5_core.h"
   9#include "eswitch.h"
  10#include "esw/acl/ofld.h"
  11#include "en_rep.h"
  12
  13struct mlx5e_rep_bond {
  14        struct notifier_block nb;
  15        struct netdev_net_notifier nn;
  16        struct list_head metadata_list;
  17};
  18
  19struct mlx5e_rep_bond_slave_entry {
  20        struct list_head list;
  21        struct net_device *netdev;
  22};
  23
  24struct mlx5e_rep_bond_metadata {
  25        struct list_head list; /* link to global list of rep_bond_metadata */
  26        struct mlx5_eswitch *esw;
  27         /* private of uplink holding rep bond metadata list */
  28        struct net_device *lag_dev;
  29        u32 metadata_reg_c_0;
  30
  31        struct list_head slaves_list; /* slaves list */
  32        int slaves;
  33};
  34
  35static struct mlx5e_rep_bond_metadata *
  36mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
  37                               const struct net_device *lag_dev)
  38{
  39        struct mlx5e_rep_bond_metadata *found = NULL;
  40        struct mlx5e_rep_bond_metadata *cur;
  41
  42        list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
  43                if (cur->lag_dev == lag_dev) {
  44                        found = cur;
  45                        break;
  46                }
  47        }
  48
  49        return found;
  50}
  51
  52static struct mlx5e_rep_bond_slave_entry *
  53mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
  54                                  const struct net_device *netdev)
  55{
  56        struct mlx5e_rep_bond_slave_entry *found = NULL;
  57        struct mlx5e_rep_bond_slave_entry *cur;
  58
  59        list_for_each_entry(cur, &mdata->slaves_list, list) {
  60                if (cur->netdev == netdev) {
  61                        found = cur;
  62                        break;
  63                }
  64        }
  65
  66        return found;
  67}
  68
  69static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
  70{
  71        netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
  72                   mdata->metadata_reg_c_0);
  73        list_del(&mdata->list);
  74        mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
  75        WARN_ON(!list_empty(&mdata->slaves_list));
  76        kfree(mdata);
  77}
  78
  79/* This must be called under rtnl_lock */
  80int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
  81                           struct net_device *lag_dev)
  82{
  83        struct mlx5e_rep_bond_slave_entry *s_entry;
  84        struct mlx5e_rep_bond_metadata *mdata;
  85        struct mlx5e_rep_priv *rpriv;
  86        struct mlx5e_priv *priv;
  87        int err;
  88
  89        ASSERT_RTNL();
  90
  91        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
  92        mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
  93        if (!mdata) {
  94                /* First netdev becomes slave, no metadata presents the lag_dev. Create one */
  95                mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
  96                if (!mdata)
  97                        return -ENOMEM;
  98
  99                mdata->lag_dev = lag_dev;
 100                mdata->esw = esw;
 101                INIT_LIST_HEAD(&mdata->slaves_list);
 102                mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
 103                if (!mdata->metadata_reg_c_0) {
 104                        kfree(mdata);
 105                        return -ENOSPC;
 106                }
 107                list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
 108
 109                netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
 110                           mdata->metadata_reg_c_0);
 111        }
 112
 113        s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
 114        if (!s_entry) {
 115                err = -ENOMEM;
 116                goto entry_alloc_err;
 117        }
 118
 119        s_entry->netdev = netdev;
 120        priv = netdev_priv(netdev);
 121        rpriv = priv->ppriv;
 122
 123        err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
 124                                                     mdata->metadata_reg_c_0);
 125        if (err)
 126                goto ingress_err;
 127
 128        mdata->slaves++;
 129        list_add_tail(&s_entry->list, &mdata->slaves_list);
 130        netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
 131                   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
 132
 133        return 0;
 134
 135ingress_err:
 136        kfree(s_entry);
 137entry_alloc_err:
 138        if (!mdata->slaves)
 139                mlx5e_rep_bond_metadata_release(mdata);
 140        return err;
 141}
 142
 143/* This must be called under rtnl_lock */
 144void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 145                            const struct net_device *netdev,
 146                            const struct net_device *lag_dev)
 147{
 148        struct mlx5e_rep_bond_slave_entry *s_entry;
 149        struct mlx5e_rep_bond_metadata *mdata;
 150        struct mlx5e_rep_priv *rpriv;
 151        struct mlx5e_priv *priv;
 152
 153        ASSERT_RTNL();
 154
 155        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 156        mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
 157        if (!mdata)
 158                return;
 159
 160        s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
 161        if (!s_entry)
 162                return;
 163
 164        priv = netdev_priv(netdev);
 165        rpriv = priv->ppriv;
 166
 167        /* Reset bond_metadata to zero first then reset all ingress/egress
 168         * acls and rx rules of unslave representor's vport
 169         */
 170        mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
 171        mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
 172        mlx5e_rep_bond_update(priv, false);
 173
 174        list_del(&s_entry->list);
 175
 176        netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
 177                   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
 178
 179        if (--mdata->slaves == 0)
 180                mlx5e_rep_bond_metadata_release(mdata);
 181        kfree(s_entry);
 182}
 183
 184static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
 185{
 186        struct mlx5e_rep_priv *rpriv;
 187        struct mlx5e_priv *priv;
 188
 189        /* A given netdev is not a representor or not a slave of LAG configuration */
 190        if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev))
 191                return false;
 192
 193        priv = netdev_priv(netdev);
 194        rpriv = priv->ppriv;
 195
 196        /* Egress acl forward to vport is supported only non-uplink representor */
 197        return rpriv->rep->vport != MLX5_VPORT_UPLINK;
 198}
 199
 200static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
 201{
 202        struct netdev_notifier_changelowerstate_info *info;
 203        struct netdev_lag_lower_state_info *lag_info;
 204        struct mlx5e_rep_priv *rpriv;
 205        struct net_device *lag_dev;
 206        struct mlx5e_priv *priv;
 207        struct list_head *iter;
 208        struct net_device *dev;
 209        u16 acl_vport_num;
 210        u16 fwd_vport_num;
 211        int err;
 212
 213        if (!mlx5e_rep_is_lag_netdev(netdev))
 214                return;
 215
 216        info = ptr;
 217        lag_info = info->lower_state_info;
 218        /* This is not an event of a representor becoming active slave */
 219        if (!lag_info->tx_enabled)
 220                return;
 221
 222        priv = netdev_priv(netdev);
 223        rpriv = priv->ppriv;
 224        fwd_vport_num = rpriv->rep->vport;
 225        lag_dev = netdev_master_upper_dev_get(netdev);
 226        if (!lag_dev)
 227                return;
 228
 229        netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
 230                   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
 231
 232        /* Point everyone's egress acl to the vport of the active representor */
 233        netdev_for_each_lower_dev(lag_dev, dev, iter) {
 234                priv = netdev_priv(dev);
 235                rpriv = priv->ppriv;
 236                acl_vport_num = rpriv->rep->vport;
 237                if (acl_vport_num != fwd_vport_num) {
 238                        /* Only single rx_rule for unique bond_metadata should be
 239                         * present, delete it if it's saved as passive vport's
 240                         * rx_rule with destination as passive vport's root_ft
 241                         */
 242                        mlx5e_rep_bond_update(priv, true);
 243                        err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
 244                                                             fwd_vport_num,
 245                                                             acl_vport_num);
 246                        if (err)
 247                                netdev_warn(dev,
 248                                            "configure slave vport(%d) egress fwd, err(%d)",
 249                                            acl_vport_num, err);
 250                }
 251        }
 252
 253        /* Insert new rx_rule for unique bond_metadata, save it as active vport's
 254         * rx_rule with new destination as active vport's root_ft
 255         */
 256        err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
 257        if (err)
 258                netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
 259                            fwd_vport_num, err);
 260}
 261
 262static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
 263{
 264        struct netdev_notifier_changeupper_info *info = ptr;
 265        struct mlx5e_rep_priv *rpriv;
 266        struct net_device *lag_dev;
 267        struct mlx5e_priv *priv;
 268
 269        if (!mlx5e_rep_is_lag_netdev(netdev))
 270                return;
 271
 272        priv = netdev_priv(netdev);
 273        rpriv = priv->ppriv;
 274        lag_dev = info->upper_dev;
 275
 276        netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
 277                   info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
 278
 279        if (info->linking)
 280                mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
 281        else
 282                mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
 283}
 284
 285/* Bond device of representors and netdev events are used here in specific way
 286 * to support eswitch vports bonding and to perform failover of eswitch vport
 287 * by modifying the vport's egress acl of lower dev representors. Thus this
 288 * also change the traditional behavior of lower dev under bond device.
 289 * All non-representor netdevs or representors of other vendors as lower dev
 290 * of bond device are not supported.
 291 */
 292static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
 293                                       unsigned long event, void *ptr)
 294{
 295        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
 296
 297        switch (event) {
 298        case NETDEV_CHANGELOWERSTATE:
 299                mlx5e_rep_changelowerstate_event(netdev, ptr);
 300                break;
 301        case NETDEV_CHANGEUPPER:
 302                mlx5e_rep_changeupper_event(netdev, ptr);
 303                break;
 304        }
 305        return NOTIFY_DONE;
 306}
 307
 308/* If HW support eswitch vports bonding, register a specific notifier to
 309 * handle it when two or more representors are bonded
 310 */
 311int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
 312{
 313        struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
 314        struct net_device *netdev = rpriv->netdev;
 315        struct mlx5e_priv *priv;
 316        int ret = 0;
 317
 318        priv = netdev_priv(netdev);
 319        if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
 320                goto out;
 321
 322        uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
 323        if (!uplink_priv->bond) {
 324                ret = -ENOMEM;
 325                goto out;
 326        }
 327
 328        INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
 329        uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
 330        ret = register_netdevice_notifier_dev_net(netdev,
 331                                                  &uplink_priv->bond->nb,
 332                                                  &uplink_priv->bond->nn);
 333        if (ret) {
 334                netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
 335                kvfree(uplink_priv->bond);
 336                uplink_priv->bond = NULL;
 337        }
 338
 339out:
 340        return ret;
 341}
 342
 343void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
 344{
 345        struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 346
 347        if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
 348            !rpriv->uplink_priv.bond)
 349                return;
 350
 351        unregister_netdevice_notifier_dev_net(rpriv->netdev,
 352                                              &rpriv->uplink_priv.bond->nb,
 353                                              &rpriv->uplink_priv.bond->nn);
 354        kvfree(rpriv->uplink_priv.bond);
 355}
 356