linux/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2020 Mellanox Technologies. */
   3
   4#include <linux/refcount.h>
   5#include <linux/list.h>
   6#include <linux/rculist.h>
   7#include <linux/rtnetlink.h>
   8#include <linux/workqueue.h>
   9#include <linux/spinlock.h>
  10#include <linux/notifier.h>
  11#include <net/netevent.h>
  12#include <net/arp.h>
  13#include "neigh.h"
  14#include "tc.h"
  15#include "en_rep.h"
  16#include "fs_core.h"
  17#include "diag/en_rep_tracepoint.h"
  18
  19static unsigned long mlx5e_rep_ipv6_interval(void)
  20{
  21        if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
  22                return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
  23
  24        return ~0UL;
  25}
  26
  27static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
  28{
  29        unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
  30        unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
  31        struct net_device *netdev = rpriv->netdev;
  32        struct mlx5e_priv *priv = netdev_priv(netdev);
  33
  34        rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
  35        mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
  36}
  37
  38void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
  39{
  40        struct mlx5e_rep_priv *rpriv = priv->ppriv;
  41        struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
  42
  43        mlx5_fc_queue_stats_work(priv->mdev,
  44                                 &neigh_update->neigh_stats_work,
  45                                 neigh_update->min_interval);
  46}
  47
  48static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
  49{
  50        return refcount_inc_not_zero(&nhe->refcnt);
  51}
  52
  53static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
  54
  55void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
  56{
  57        if (refcount_dec_and_test(&nhe->refcnt)) {
  58                mlx5e_rep_neigh_entry_remove(nhe);
  59                kfree_rcu(nhe, rcu);
  60        }
  61}
  62
  63static struct mlx5e_neigh_hash_entry *
  64mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
  65                   struct mlx5e_neigh_hash_entry *nhe)
  66{
  67        struct mlx5e_neigh_hash_entry *next = NULL;
  68
  69        rcu_read_lock();
  70
  71        for (next = nhe ?
  72                     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
  73                                           &nhe->neigh_list,
  74                                           struct mlx5e_neigh_hash_entry,
  75                                           neigh_list) :
  76                     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
  77                                            struct mlx5e_neigh_hash_entry,
  78                                            neigh_list);
  79             next;
  80             next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
  81                                          &next->neigh_list,
  82                                          struct mlx5e_neigh_hash_entry,
  83                                          neigh_list))
  84                if (mlx5e_rep_neigh_entry_hold(next))
  85                        break;
  86
  87        rcu_read_unlock();
  88
  89        if (nhe)
  90                mlx5e_rep_neigh_entry_release(nhe);
  91
  92        return next;
  93}
  94
  95static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
  96{
  97        struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
  98                                                    neigh_update.neigh_stats_work.work);
  99        struct net_device *netdev = rpriv->netdev;
 100        struct mlx5e_priv *priv = netdev_priv(netdev);
 101        struct mlx5e_neigh_hash_entry *nhe = NULL;
 102
 103        rtnl_lock();
 104        if (!list_empty(&rpriv->neigh_update.neigh_list))
 105                mlx5e_rep_queue_neigh_stats_work(priv);
 106
 107        while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
 108                mlx5e_tc_update_neigh_used_value(nhe);
 109
 110        rtnl_unlock();
 111}
 112
 113struct neigh_update_work {
 114        struct work_struct work;
 115        struct neighbour *n;
 116        struct mlx5e_neigh_hash_entry *nhe;
 117};
 118
 119static void mlx5e_release_neigh_update_work(struct neigh_update_work *update_work)
 120{
 121        neigh_release(update_work->n);
 122        mlx5e_rep_neigh_entry_release(update_work->nhe);
 123        kfree(update_work);
 124}
 125
 126static void mlx5e_rep_neigh_update(struct work_struct *work)
 127{
 128        struct neigh_update_work *update_work = container_of(work, struct neigh_update_work,
 129                                                             work);
 130        struct mlx5e_neigh_hash_entry *nhe = update_work->nhe;
 131        struct neighbour *n = update_work->n;
 132        struct mlx5e_encap_entry *e = NULL;
 133        bool neigh_connected, same_dev;
 134        unsigned char ha[ETH_ALEN];
 135        u8 nud_state, dead;
 136
 137        rtnl_lock();
 138
 139        /* If these parameters are changed after we release the lock,
 140         * we'll receive another event letting us know about it.
 141         * We use this lock to avoid inconsistency between the neigh validity
 142         * and it's hw address.
 143         */
 144        read_lock_bh(&n->lock);
 145        memcpy(ha, n->ha, ETH_ALEN);
 146        nud_state = n->nud_state;
 147        dead = n->dead;
 148        same_dev = READ_ONCE(nhe->neigh_dev) == n->dev;
 149        read_unlock_bh(&n->lock);
 150
 151        neigh_connected = (nud_state & NUD_VALID) && !dead;
 152
 153        trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
 154
 155        if (!same_dev)
 156                goto out;
 157
 158        /* mlx5e_get_next_init_encap() releases previous encap before returning
 159         * the next one.
 160         */
 161        while ((e = mlx5e_get_next_init_encap(nhe, e)) != NULL)
 162                mlx5e_rep_update_flows(netdev_priv(e->out_dev), e, neigh_connected, ha);
 163
 164out:
 165        rtnl_unlock();
 166        mlx5e_release_neigh_update_work(update_work);
 167}
 168
 169static struct neigh_update_work *mlx5e_alloc_neigh_update_work(struct mlx5e_priv *priv,
 170                                                               struct neighbour *n)
 171{
 172        struct neigh_update_work *update_work;
 173        struct mlx5e_neigh_hash_entry *nhe;
 174        struct mlx5e_neigh m_neigh = {};
 175
 176        update_work = kzalloc(sizeof(*update_work), GFP_ATOMIC);
 177        if (WARN_ON(!update_work))
 178                return NULL;
 179
 180        m_neigh.family = n->ops->family;
 181        memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
 182
 183        /* Obtain reference to nhe as last step in order not to release it in
 184         * atomic context.
 185         */
 186        rcu_read_lock();
 187        nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
 188        rcu_read_unlock();
 189        if (!nhe) {
 190                kfree(update_work);
 191                return NULL;
 192        }
 193
 194        INIT_WORK(&update_work->work, mlx5e_rep_neigh_update);
 195        neigh_hold(n);
 196        update_work->n = n;
 197        update_work->nhe = nhe;
 198
 199        return update_work;
 200}
 201
 202static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 203                                    unsigned long event, void *ptr)
 204{
 205        struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
 206                                                    neigh_update.netevent_nb);
 207        struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
 208        struct net_device *netdev = rpriv->netdev;
 209        struct mlx5e_priv *priv = netdev_priv(netdev);
 210        struct mlx5e_neigh_hash_entry *nhe = NULL;
 211        struct neigh_update_work *update_work;
 212        struct neigh_parms *p;
 213        struct neighbour *n;
 214        bool found = false;
 215
 216        switch (event) {
 217        case NETEVENT_NEIGH_UPDATE:
 218                n = ptr;
 219#if IS_ENABLED(CONFIG_IPV6)
 220                if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
 221#else
 222                if (n->tbl != &arp_tbl)
 223#endif
 224                        return NOTIFY_DONE;
 225
 226                update_work = mlx5e_alloc_neigh_update_work(priv, n);
 227                if (!update_work)
 228                        return NOTIFY_DONE;
 229
 230                queue_work(priv->wq, &update_work->work);
 231                break;
 232
 233        case NETEVENT_DELAY_PROBE_TIME_UPDATE:
 234                p = ptr;
 235
 236                /* We check the device is present since we don't care about
 237                 * changes in the default table, we only care about changes
 238                 * done per device delay prob time parameter.
 239                 */
 240#if IS_ENABLED(CONFIG_IPV6)
 241                if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
 242#else
 243                if (!p->dev || p->tbl != &arp_tbl)
 244#endif
 245                        return NOTIFY_DONE;
 246
 247                rcu_read_lock();
 248                list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
 249                                        neigh_list) {
 250                        if (p->dev == READ_ONCE(nhe->neigh_dev)) {
 251                                found = true;
 252                                break;
 253                        }
 254                }
 255                rcu_read_unlock();
 256                if (!found)
 257                        return NOTIFY_DONE;
 258
 259                neigh_update->min_interval = min_t(unsigned long,
 260                                                   NEIGH_VAR(p, DELAY_PROBE_TIME),
 261                                                   neigh_update->min_interval);
 262                mlx5_fc_update_sampling_interval(priv->mdev,
 263                                                 neigh_update->min_interval);
 264                break;
 265        }
 266        return NOTIFY_DONE;
 267}
 268
 269static const struct rhashtable_params mlx5e_neigh_ht_params = {
 270        .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
 271        .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
 272        .key_len = sizeof(struct mlx5e_neigh),
 273        .automatic_shrinking = true,
 274};
 275
 276int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
 277{
 278        struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
 279        int err;
 280
 281        err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
 282        if (err)
 283                goto out_err;
 284
 285        INIT_LIST_HEAD(&neigh_update->neigh_list);
 286        mutex_init(&neigh_update->encap_lock);
 287        INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
 288                          mlx5e_rep_neigh_stats_work);
 289        mlx5e_rep_neigh_update_init_interval(rpriv);
 290
 291        neigh_update->netevent_nb.notifier_call = mlx5e_rep_netevent_event;
 292        err = register_netevent_notifier(&neigh_update->netevent_nb);
 293        if (err)
 294                goto out_notifier;
 295        return 0;
 296
 297out_notifier:
 298        neigh_update->netevent_nb.notifier_call = NULL;
 299        rhashtable_destroy(&neigh_update->neigh_ht);
 300out_err:
 301        netdev_warn(rpriv->netdev,
 302                    "Failed to initialize neighbours handling for vport %d\n",
 303                    rpriv->rep->vport);
 304        return err;
 305}
 306
 307void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
 308{
 309        struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
 310        struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 311
 312        if (!rpriv->neigh_update.netevent_nb.notifier_call)
 313                return;
 314
 315        unregister_netevent_notifier(&neigh_update->netevent_nb);
 316
 317        flush_workqueue(priv->wq); /* flush neigh update works */
 318
 319        cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
 320
 321        mutex_destroy(&neigh_update->encap_lock);
 322        rhashtable_destroy(&neigh_update->neigh_ht);
 323}
 324
 325static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
 326                                        struct mlx5e_neigh_hash_entry *nhe)
 327{
 328        struct mlx5e_rep_priv *rpriv = priv->ppriv;
 329        int err;
 330
 331        err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
 332                                     &nhe->rhash_node,
 333                                     mlx5e_neigh_ht_params);
 334        if (err)
 335                return err;
 336
 337        list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
 338
 339        return err;
 340}
 341
 342static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
 343{
 344        struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
 345
 346        mutex_lock(&rpriv->neigh_update.encap_lock);
 347
 348        list_del_rcu(&nhe->neigh_list);
 349
 350        rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
 351                               &nhe->rhash_node,
 352                               mlx5e_neigh_ht_params);
 353        mutex_unlock(&rpriv->neigh_update.encap_lock);
 354}
 355
 356/* This function must only be called under the representor's encap_lock or
 357 * inside rcu read lock section.
 358 */
 359struct mlx5e_neigh_hash_entry *
 360mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
 361                             struct mlx5e_neigh *m_neigh)
 362{
 363        struct mlx5e_rep_priv *rpriv = priv->ppriv;
 364        struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
 365        struct mlx5e_neigh_hash_entry *nhe;
 366
 367        nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
 368                                     mlx5e_neigh_ht_params);
 369        return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
 370}
 371
 372int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
 373                                 struct mlx5e_neigh *m_neigh,
 374                                 struct net_device *neigh_dev,
 375                                 struct mlx5e_neigh_hash_entry **nhe)
 376{
 377        int err;
 378
 379        *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
 380        if (!*nhe)
 381                return -ENOMEM;
 382
 383        (*nhe)->priv = priv;
 384        memcpy(&(*nhe)->m_neigh, m_neigh, sizeof(*m_neigh));
 385        spin_lock_init(&(*nhe)->encap_list_lock);
 386        INIT_LIST_HEAD(&(*nhe)->encap_list);
 387        refcount_set(&(*nhe)->refcnt, 1);
 388        WRITE_ONCE((*nhe)->neigh_dev, neigh_dev);
 389
 390        err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
 391        if (err)
 392                goto out_free;
 393        return 0;
 394
 395out_free:
 396        kfree(*nhe);
 397        return err;
 398}
 399