linux/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
<<
>>
Prefs
   1// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
   2/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
   3
   4#include <linux/kernel.h>
   5#include <linux/types.h>
   6#include <linux/rhashtable.h>
   7#include <linux/bitops.h>
   8#include <linux/in6.h>
   9#include <linux/notifier.h>
  10#include <linux/inetdevice.h>
  11#include <linux/netdevice.h>
  12#include <linux/if_bridge.h>
  13#include <linux/socket.h>
  14#include <linux/route.h>
  15#include <linux/gcd.h>
  16#include <linux/if_macvlan.h>
  17#include <linux/refcount.h>
  18#include <linux/jhash.h>
  19#include <net/netevent.h>
  20#include <net/neighbour.h>
  21#include <net/arp.h>
  22#include <net/ip_fib.h>
  23#include <net/ip6_fib.h>
  24#include <net/fib_rules.h>
  25#include <net/ip_tunnels.h>
  26#include <net/l3mdev.h>
  27#include <net/addrconf.h>
  28#include <net/ndisc.h>
  29#include <net/ipv6.h>
  30#include <net/fib_notifier.h>
  31#include <net/switchdev.h>
  32
  33#include "spectrum.h"
  34#include "core.h"
  35#include "reg.h"
  36#include "spectrum_cnt.h"
  37#include "spectrum_dpipe.h"
  38#include "spectrum_ipip.h"
  39#include "spectrum_mr.h"
  40#include "spectrum_mr_tcam.h"
  41#include "spectrum_router.h"
  42#include "spectrum_span.h"
  43
  44struct mlxsw_sp_fib;
  45struct mlxsw_sp_vr;
  46struct mlxsw_sp_lpm_tree;
  47struct mlxsw_sp_rif_ops;
  48
  49struct mlxsw_sp_router {
  50        struct mlxsw_sp *mlxsw_sp;
  51        struct mlxsw_sp_rif **rifs;
  52        struct mlxsw_sp_vr *vrs;
  53        struct rhashtable neigh_ht;
  54        struct rhashtable nexthop_group_ht;
  55        struct rhashtable nexthop_ht;
  56        struct list_head nexthop_list;
  57        struct {
  58                /* One tree for each protocol: IPv4 and IPv6 */
  59                struct mlxsw_sp_lpm_tree *proto_trees[2];
  60                struct mlxsw_sp_lpm_tree *trees;
  61                unsigned int tree_count;
  62        } lpm;
  63        struct {
  64                struct delayed_work dw;
  65                unsigned long interval; /* ms */
  66        } neighs_update;
  67        struct delayed_work nexthop_probe_dw;
  68#define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */
  69        struct list_head nexthop_neighs_list;
  70        struct list_head ipip_list;
  71        bool aborted;
  72        struct notifier_block fib_nb;
  73        struct notifier_block netevent_nb;
  74        struct notifier_block inetaddr_nb;
  75        struct notifier_block inet6addr_nb;
  76        const struct mlxsw_sp_rif_ops **rif_ops_arr;
  77        const struct mlxsw_sp_ipip_ops **ipip_ops_arr;
  78};
  79
  80struct mlxsw_sp_rif {
  81        struct list_head nexthop_list;
  82        struct list_head neigh_list;
  83        struct net_device *dev; /* NULL for underlay RIF */
  84        struct mlxsw_sp_fid *fid;
  85        unsigned char addr[ETH_ALEN];
  86        int mtu;
  87        u16 rif_index;
  88        u16 vr_id;
  89        const struct mlxsw_sp_rif_ops *ops;
  90        struct mlxsw_sp *mlxsw_sp;
  91
  92        unsigned int counter_ingress;
  93        bool counter_ingress_valid;
  94        unsigned int counter_egress;
  95        bool counter_egress_valid;
  96};
  97
  98struct mlxsw_sp_rif_params {
  99        struct net_device *dev;
 100        union {
 101                u16 system_port;
 102                u16 lag_id;
 103        };
 104        u16 vid;
 105        bool lag;
 106};
 107
 108struct mlxsw_sp_rif_subport {
 109        struct mlxsw_sp_rif common;
 110        refcount_t ref_count;
 111        union {
 112                u16 system_port;
 113                u16 lag_id;
 114        };
 115        u16 vid;
 116        bool lag;
 117};
 118
 119struct mlxsw_sp_rif_ipip_lb {
 120        struct mlxsw_sp_rif common;
 121        struct mlxsw_sp_rif_ipip_lb_config lb_config;
 122        u16 ul_vr_id; /* Reserved for Spectrum-2. */
 123        u16 ul_rif_id; /* Reserved for Spectrum. */
 124};
 125
 126struct mlxsw_sp_rif_params_ipip_lb {
 127        struct mlxsw_sp_rif_params common;
 128        struct mlxsw_sp_rif_ipip_lb_config lb_config;
 129};
 130
 131struct mlxsw_sp_rif_ops {
 132        enum mlxsw_sp_rif_type type;
 133        size_t rif_size;
 134
 135        void (*setup)(struct mlxsw_sp_rif *rif,
 136                      const struct mlxsw_sp_rif_params *params);
 137        int (*configure)(struct mlxsw_sp_rif *rif);
 138        void (*deconfigure)(struct mlxsw_sp_rif *rif);
 139        struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
 140                                         struct netlink_ext_ack *extack);
 141        void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
 142};
 143
 144static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
 145static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
 146static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
 147                                  struct mlxsw_sp_lpm_tree *lpm_tree);
 148static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
 149                                     const struct mlxsw_sp_fib *fib,
 150                                     u8 tree_id);
 151static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
 152                                       const struct mlxsw_sp_fib *fib);
 153
 154static unsigned int *
 155mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
 156                           enum mlxsw_sp_rif_counter_dir dir)
 157{
 158        switch (dir) {
 159        case MLXSW_SP_RIF_COUNTER_EGRESS:
 160                return &rif->counter_egress;
 161        case MLXSW_SP_RIF_COUNTER_INGRESS:
 162                return &rif->counter_ingress;
 163        }
 164        return NULL;
 165}
 166
 167static bool
 168mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
 169                               enum mlxsw_sp_rif_counter_dir dir)
 170{
 171        switch (dir) {
 172        case MLXSW_SP_RIF_COUNTER_EGRESS:
 173                return rif->counter_egress_valid;
 174        case MLXSW_SP_RIF_COUNTER_INGRESS:
 175                return rif->counter_ingress_valid;
 176        }
 177        return false;
 178}
 179
 180static void
 181mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
 182                               enum mlxsw_sp_rif_counter_dir dir,
 183                               bool valid)
 184{
 185        switch (dir) {
 186        case MLXSW_SP_RIF_COUNTER_EGRESS:
 187                rif->counter_egress_valid = valid;
 188                break;
 189        case MLXSW_SP_RIF_COUNTER_INGRESS:
 190                rif->counter_ingress_valid = valid;
 191                break;
 192        }
 193}
 194
 195static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
 196                                     unsigned int counter_index, bool enable,
 197                                     enum mlxsw_sp_rif_counter_dir dir)
 198{
 199        char ritr_pl[MLXSW_REG_RITR_LEN];
 200        bool is_egress = false;
 201        int err;
 202
 203        if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
 204                is_egress = true;
 205        mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
 206        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 207        if (err)
 208                return err;
 209
 210        mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
 211                                    is_egress);
 212        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
 213}
 214
 215int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
 216                                   struct mlxsw_sp_rif *rif,
 217                                   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
 218{
 219        char ricnt_pl[MLXSW_REG_RICNT_LEN];
 220        unsigned int *p_counter_index;
 221        bool valid;
 222        int err;
 223
 224        valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
 225        if (!valid)
 226                return -EINVAL;
 227
 228        p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 229        if (!p_counter_index)
 230                return -EINVAL;
 231        mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
 232                             MLXSW_REG_RICNT_OPCODE_NOP);
 233        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
 234        if (err)
 235                return err;
 236        *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
 237        return 0;
 238}
 239
 240static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
 241                                      unsigned int counter_index)
 242{
 243        char ricnt_pl[MLXSW_REG_RICNT_LEN];
 244
 245        mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
 246                             MLXSW_REG_RICNT_OPCODE_CLEAR);
 247        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
 248}
 249
 250int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
 251                               struct mlxsw_sp_rif *rif,
 252                               enum mlxsw_sp_rif_counter_dir dir)
 253{
 254        unsigned int *p_counter_index;
 255        int err;
 256
 257        p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 258        if (!p_counter_index)
 259                return -EINVAL;
 260        err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 261                                     p_counter_index);
 262        if (err)
 263                return err;
 264
 265        err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
 266        if (err)
 267                goto err_counter_clear;
 268
 269        err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
 270                                        *p_counter_index, true, dir);
 271        if (err)
 272                goto err_counter_edit;
 273        mlxsw_sp_rif_counter_valid_set(rif, dir, true);
 274        return 0;
 275
 276err_counter_edit:
 277err_counter_clear:
 278        mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 279                              *p_counter_index);
 280        return err;
 281}
 282
 283void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
 284                               struct mlxsw_sp_rif *rif,
 285                               enum mlxsw_sp_rif_counter_dir dir)
 286{
 287        unsigned int *p_counter_index;
 288
 289        if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
 290                return;
 291
 292        p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
 293        if (WARN_ON(!p_counter_index))
 294                return;
 295        mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
 296                                  *p_counter_index, false, dir);
 297        mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
 298                              *p_counter_index);
 299        mlxsw_sp_rif_counter_valid_set(rif, dir, false);
 300}
 301
 302static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
 303{
 304        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 305        struct devlink *devlink;
 306
 307        devlink = priv_to_devlink(mlxsw_sp->core);
 308        if (!devlink_dpipe_table_counter_enabled(devlink,
 309                                                 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
 310                return;
 311        mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
 312}
 313
 314static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
 315{
 316        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
 317
 318        mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
 319}
 320
 321#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
 322
 323struct mlxsw_sp_prefix_usage {
 324        DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
 325};
 326
 327#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
 328        for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
 329
 330static bool
 331mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
 332                         struct mlxsw_sp_prefix_usage *prefix_usage2)
 333{
 334        return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 335}
 336
 337static void
 338mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
 339                          struct mlxsw_sp_prefix_usage *prefix_usage2)
 340{
 341        memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
 342}
 343
 344static void
 345mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
 346                          unsigned char prefix_len)
 347{
 348        set_bit(prefix_len, prefix_usage->b);
 349}
 350
 351static void
 352mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
 353                            unsigned char prefix_len)
 354{
 355        clear_bit(prefix_len, prefix_usage->b);
 356}
 357
 358struct mlxsw_sp_fib_key {
 359        unsigned char addr[sizeof(struct in6_addr)];
 360        unsigned char prefix_len;
 361};
 362
 363enum mlxsw_sp_fib_entry_type {
 364        MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
 365        MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
 366        MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
 367        MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
 368
 369        /* This is a special case of local delivery, where a packet should be
 370         * decapsulated on reception. Note that there is no corresponding ENCAP,
 371         * because that's a type of next hop, not of FIB entry. (There can be
 372         * several next hops in a REMOTE entry, and some of them may be
 373         * encapsulating entries.)
 374         */
 375        MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
 376        MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
 377};
 378
 379struct mlxsw_sp_nexthop_group;
 380
 381struct mlxsw_sp_fib_node {
 382        struct list_head entry_list;
 383        struct list_head list;
 384        struct rhash_head ht_node;
 385        struct mlxsw_sp_fib *fib;
 386        struct mlxsw_sp_fib_key key;
 387};
 388
 389struct mlxsw_sp_fib_entry_decap {
 390        struct mlxsw_sp_ipip_entry *ipip_entry;
 391        u32 tunnel_index;
 392};
 393
 394struct mlxsw_sp_fib_entry {
 395        struct list_head list;
 396        struct mlxsw_sp_fib_node *fib_node;
 397        enum mlxsw_sp_fib_entry_type type;
 398        struct list_head nexthop_group_node;
 399        struct mlxsw_sp_nexthop_group *nh_group;
 400        struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
 401};
 402
 403struct mlxsw_sp_fib4_entry {
 404        struct mlxsw_sp_fib_entry common;
 405        u32 tb_id;
 406        u32 prio;
 407        u8 tos;
 408        u8 type;
 409};
 410
 411struct mlxsw_sp_fib6_entry {
 412        struct mlxsw_sp_fib_entry common;
 413        struct list_head rt6_list;
 414        unsigned int nrt6;
 415};
 416
 417struct mlxsw_sp_rt6 {
 418        struct list_head list;
 419        struct fib6_info *rt;
 420};
 421
 422struct mlxsw_sp_lpm_tree {
 423        u8 id; /* tree ID */
 424        unsigned int ref_count;
 425        enum mlxsw_sp_l3proto proto;
 426        unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
 427        struct mlxsw_sp_prefix_usage prefix_usage;
 428};
 429
 430struct mlxsw_sp_fib {
 431        struct rhashtable ht;
 432        struct list_head node_list;
 433        struct mlxsw_sp_vr *vr;
 434        struct mlxsw_sp_lpm_tree *lpm_tree;
 435        enum mlxsw_sp_l3proto proto;
 436};
 437
 438struct mlxsw_sp_vr {
 439        u16 id; /* virtual router ID */
 440        u32 tb_id; /* kernel fib table id */
 441        unsigned int rif_count;
 442        struct mlxsw_sp_fib *fib4;
 443        struct mlxsw_sp_fib *fib6;
 444        struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
 445        struct mlxsw_sp_rif *ul_rif;
 446        refcount_t ul_rif_refcnt;
 447};
 448
 449static const struct rhashtable_params mlxsw_sp_fib_ht_params;
 450
 451static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
 452                                                struct mlxsw_sp_vr *vr,
 453                                                enum mlxsw_sp_l3proto proto)
 454{
 455        struct mlxsw_sp_lpm_tree *lpm_tree;
 456        struct mlxsw_sp_fib *fib;
 457        int err;
 458
 459        lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
 460        fib = kzalloc(sizeof(*fib), GFP_KERNEL);
 461        if (!fib)
 462                return ERR_PTR(-ENOMEM);
 463        err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
 464        if (err)
 465                goto err_rhashtable_init;
 466        INIT_LIST_HEAD(&fib->node_list);
 467        fib->proto = proto;
 468        fib->vr = vr;
 469        fib->lpm_tree = lpm_tree;
 470        mlxsw_sp_lpm_tree_hold(lpm_tree);
 471        err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
 472        if (err)
 473                goto err_lpm_tree_bind;
 474        return fib;
 475
 476err_lpm_tree_bind:
 477        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 478err_rhashtable_init:
 479        kfree(fib);
 480        return ERR_PTR(err);
 481}
 482
 483static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
 484                                 struct mlxsw_sp_fib *fib)
 485{
 486        mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
 487        mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
 488        WARN_ON(!list_empty(&fib->node_list));
 489        rhashtable_destroy(&fib->ht);
 490        kfree(fib);
 491}
 492
 493static struct mlxsw_sp_lpm_tree *
 494mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
 495{
 496        static struct mlxsw_sp_lpm_tree *lpm_tree;
 497        int i;
 498
 499        for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 500                lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 501                if (lpm_tree->ref_count == 0)
 502                        return lpm_tree;
 503        }
 504        return NULL;
 505}
 506
 507static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
 508                                   struct mlxsw_sp_lpm_tree *lpm_tree)
 509{
 510        char ralta_pl[MLXSW_REG_RALTA_LEN];
 511
 512        mlxsw_reg_ralta_pack(ralta_pl, true,
 513                             (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
 514                             lpm_tree->id);
 515        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 516}
 517
 518static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
 519                                   struct mlxsw_sp_lpm_tree *lpm_tree)
 520{
 521        char ralta_pl[MLXSW_REG_RALTA_LEN];
 522
 523        mlxsw_reg_ralta_pack(ralta_pl, false,
 524                             (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
 525                             lpm_tree->id);
 526        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
 527}
 528
 529static int
 530mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
 531                                  struct mlxsw_sp_prefix_usage *prefix_usage,
 532                                  struct mlxsw_sp_lpm_tree *lpm_tree)
 533{
 534        char ralst_pl[MLXSW_REG_RALST_LEN];
 535        u8 root_bin = 0;
 536        u8 prefix;
 537        u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
 538
 539        mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
 540                root_bin = prefix;
 541
 542        mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
 543        mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
 544                if (prefix == 0)
 545                        continue;
 546                mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
 547                                         MLXSW_REG_RALST_BIN_NO_CHILD);
 548                last_prefix = prefix;
 549        }
 550        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
 551}
 552
 553static struct mlxsw_sp_lpm_tree *
 554mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
 555                         struct mlxsw_sp_prefix_usage *prefix_usage,
 556                         enum mlxsw_sp_l3proto proto)
 557{
 558        struct mlxsw_sp_lpm_tree *lpm_tree;
 559        int err;
 560
 561        lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
 562        if (!lpm_tree)
 563                return ERR_PTR(-EBUSY);
 564        lpm_tree->proto = proto;
 565        err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
 566        if (err)
 567                return ERR_PTR(err);
 568
 569        err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
 570                                                lpm_tree);
 571        if (err)
 572                goto err_left_struct_set;
 573        memcpy(&lpm_tree->prefix_usage, prefix_usage,
 574               sizeof(lpm_tree->prefix_usage));
 575        memset(&lpm_tree->prefix_ref_count, 0,
 576               sizeof(lpm_tree->prefix_ref_count));
 577        lpm_tree->ref_count = 1;
 578        return lpm_tree;
 579
 580err_left_struct_set:
 581        mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
 582        return ERR_PTR(err);
 583}
 584
 585static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
 586                                      struct mlxsw_sp_lpm_tree *lpm_tree)
 587{
 588        mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
 589}
 590
 591static struct mlxsw_sp_lpm_tree *
 592mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
 593                      struct mlxsw_sp_prefix_usage *prefix_usage,
 594                      enum mlxsw_sp_l3proto proto)
 595{
 596        struct mlxsw_sp_lpm_tree *lpm_tree;
 597        int i;
 598
 599        for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 600                lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 601                if (lpm_tree->ref_count != 0 &&
 602                    lpm_tree->proto == proto &&
 603                    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
 604                                             prefix_usage)) {
 605                        mlxsw_sp_lpm_tree_hold(lpm_tree);
 606                        return lpm_tree;
 607                }
 608        }
 609        return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
 610}
 611
 612static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
 613{
 614        lpm_tree->ref_count++;
 615}
 616
 617static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
 618                                  struct mlxsw_sp_lpm_tree *lpm_tree)
 619{
 620        if (--lpm_tree->ref_count == 0)
 621                mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
 622}
 623
 624#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
 625
 626static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
 627{
 628        struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
 629        struct mlxsw_sp_lpm_tree *lpm_tree;
 630        u64 max_trees;
 631        int err, i;
 632
 633        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
 634                return -EIO;
 635
 636        max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
 637        mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
 638        mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
 639                                             sizeof(struct mlxsw_sp_lpm_tree),
 640                                             GFP_KERNEL);
 641        if (!mlxsw_sp->router->lpm.trees)
 642                return -ENOMEM;
 643
 644        for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
 645                lpm_tree = &mlxsw_sp->router->lpm.trees[i];
 646                lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
 647        }
 648
 649        lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
 650                                         MLXSW_SP_L3_PROTO_IPV4);
 651        if (IS_ERR(lpm_tree)) {
 652                err = PTR_ERR(lpm_tree);
 653                goto err_ipv4_tree_get;
 654        }
 655        mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
 656
 657        lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
 658                                         MLXSW_SP_L3_PROTO_IPV6);
 659        if (IS_ERR(lpm_tree)) {
 660                err = PTR_ERR(lpm_tree);
 661                goto err_ipv6_tree_get;
 662        }
 663        mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
 664
 665        return 0;
 666
 667err_ipv6_tree_get:
 668        lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
 669        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 670err_ipv4_tree_get:
 671        kfree(mlxsw_sp->router->lpm.trees);
 672        return err;
 673}
 674
 675static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
 676{
 677        struct mlxsw_sp_lpm_tree *lpm_tree;
 678
 679        lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
 680        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 681
 682        lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
 683        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
 684
 685        kfree(mlxsw_sp->router->lpm.trees);
 686}
 687
 688static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
 689{
 690        return !!vr->fib4 || !!vr->fib6 ||
 691               !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
 692               !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
 693}
 694
 695static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 696{
 697        struct mlxsw_sp_vr *vr;
 698        int i;
 699
 700        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 701                vr = &mlxsw_sp->router->vrs[i];
 702                if (!mlxsw_sp_vr_is_used(vr))
 703                        return vr;
 704        }
 705        return NULL;
 706}
 707
 708static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
 709                                     const struct mlxsw_sp_fib *fib, u8 tree_id)
 710{
 711        char raltb_pl[MLXSW_REG_RALTB_LEN];
 712
 713        mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
 714                             (enum mlxsw_reg_ralxx_protocol) fib->proto,
 715                             tree_id);
 716        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 717}
 718
 719static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
 720                                       const struct mlxsw_sp_fib *fib)
 721{
 722        char raltb_pl[MLXSW_REG_RALTB_LEN];
 723
 724        /* Bind to tree 0 which is default */
 725        mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
 726                             (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
 727        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
 728}
 729
 730static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
 731{
 732        /* For our purpose, squash main, default and local tables into one */
 733        if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
 734                tb_id = RT_TABLE_MAIN;
 735        return tb_id;
 736}
 737
 738static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
 739                                            u32 tb_id)
 740{
 741        struct mlxsw_sp_vr *vr;
 742        int i;
 743
 744        tb_id = mlxsw_sp_fix_tb_id(tb_id);
 745
 746        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 747                vr = &mlxsw_sp->router->vrs[i];
 748                if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
 749                        return vr;
 750        }
 751        return NULL;
 752}
 753
 754int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 755                                u16 *vr_id)
 756{
 757        struct mlxsw_sp_vr *vr;
 758
 759        vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
 760        if (!vr)
 761                return -ESRCH;
 762        *vr_id = vr->id;
 763
 764        return 0;
 765}
 766
 767static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
 768                                            enum mlxsw_sp_l3proto proto)
 769{
 770        switch (proto) {
 771        case MLXSW_SP_L3_PROTO_IPV4:
 772                return vr->fib4;
 773        case MLXSW_SP_L3_PROTO_IPV6:
 774                return vr->fib6;
 775        }
 776        return NULL;
 777}
 778
 779static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
 780                                              u32 tb_id,
 781                                              struct netlink_ext_ack *extack)
 782{
 783        struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
 784        struct mlxsw_sp_fib *fib4;
 785        struct mlxsw_sp_fib *fib6;
 786        struct mlxsw_sp_vr *vr;
 787        int err;
 788
 789        vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
 790        if (!vr) {
 791                NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
 792                return ERR_PTR(-EBUSY);
 793        }
 794        fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
 795        if (IS_ERR(fib4))
 796                return ERR_CAST(fib4);
 797        fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
 798        if (IS_ERR(fib6)) {
 799                err = PTR_ERR(fib6);
 800                goto err_fib6_create;
 801        }
 802        mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
 803                                             MLXSW_SP_L3_PROTO_IPV4);
 804        if (IS_ERR(mr4_table)) {
 805                err = PTR_ERR(mr4_table);
 806                goto err_mr4_table_create;
 807        }
 808        mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
 809                                             MLXSW_SP_L3_PROTO_IPV6);
 810        if (IS_ERR(mr6_table)) {
 811                err = PTR_ERR(mr6_table);
 812                goto err_mr6_table_create;
 813        }
 814
 815        vr->fib4 = fib4;
 816        vr->fib6 = fib6;
 817        vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
 818        vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
 819        vr->tb_id = tb_id;
 820        return vr;
 821
 822err_mr6_table_create:
 823        mlxsw_sp_mr_table_destroy(mr4_table);
 824err_mr4_table_create:
 825        mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
 826err_fib6_create:
 827        mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
 828        return ERR_PTR(err);
 829}
 830
 831static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
 832                                struct mlxsw_sp_vr *vr)
 833{
 834        mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
 835        vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
 836        mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
 837        vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
 838        mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
 839        vr->fib6 = NULL;
 840        mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
 841        vr->fib4 = NULL;
 842}
 843
 844static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
 845                                           struct netlink_ext_ack *extack)
 846{
 847        struct mlxsw_sp_vr *vr;
 848
 849        tb_id = mlxsw_sp_fix_tb_id(tb_id);
 850        vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
 851        if (!vr)
 852                vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
 853        return vr;
 854}
 855
 856static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
 857{
 858        if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
 859            list_empty(&vr->fib6->node_list) &&
 860            mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
 861            mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
 862                mlxsw_sp_vr_destroy(mlxsw_sp, vr);
 863}
 864
 865static bool
 866mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
 867                                    enum mlxsw_sp_l3proto proto, u8 tree_id)
 868{
 869        struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
 870
 871        if (!mlxsw_sp_vr_is_used(vr))
 872                return false;
 873        if (fib->lpm_tree->id == tree_id)
 874                return true;
 875        return false;
 876}
 877
 878static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
 879                                        struct mlxsw_sp_fib *fib,
 880                                        struct mlxsw_sp_lpm_tree *new_tree)
 881{
 882        struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
 883        int err;
 884
 885        fib->lpm_tree = new_tree;
 886        mlxsw_sp_lpm_tree_hold(new_tree);
 887        err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
 888        if (err)
 889                goto err_tree_bind;
 890        mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
 891        return 0;
 892
 893err_tree_bind:
 894        mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
 895        fib->lpm_tree = old_tree;
 896        return err;
 897}
 898
 899static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
 900                                         struct mlxsw_sp_fib *fib,
 901                                         struct mlxsw_sp_lpm_tree *new_tree)
 902{
 903        enum mlxsw_sp_l3proto proto = fib->proto;
 904        struct mlxsw_sp_lpm_tree *old_tree;
 905        u8 old_id, new_id = new_tree->id;
 906        struct mlxsw_sp_vr *vr;
 907        int i, err;
 908
 909        old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
 910        old_id = old_tree->id;
 911
 912        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 913                vr = &mlxsw_sp->router->vrs[i];
 914                if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
 915                        continue;
 916                err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
 917                                                   mlxsw_sp_vr_fib(vr, proto),
 918                                                   new_tree);
 919                if (err)
 920                        goto err_tree_replace;
 921        }
 922
 923        memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
 924               sizeof(new_tree->prefix_ref_count));
 925        mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
 926        mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
 927
 928        return 0;
 929
 930err_tree_replace:
 931        for (i--; i >= 0; i--) {
 932                if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
 933                        continue;
 934                mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
 935                                             mlxsw_sp_vr_fib(vr, proto),
 936                                             old_tree);
 937        }
 938        return err;
 939}
 940
 941static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
 942{
 943        struct mlxsw_sp_vr *vr;
 944        u64 max_vrs;
 945        int i;
 946
 947        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
 948                return -EIO;
 949
 950        max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
 951        mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
 952                                        GFP_KERNEL);
 953        if (!mlxsw_sp->router->vrs)
 954                return -ENOMEM;
 955
 956        for (i = 0; i < max_vrs; i++) {
 957                vr = &mlxsw_sp->router->vrs[i];
 958                vr->id = i;
 959        }
 960
 961        return 0;
 962}
 963
 964static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
 965
 966static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
 967{
 968        /* At this stage we're guaranteed not to have new incoming
 969         * FIB notifications and the work queue is free from FIBs
 970         * sitting on top of mlxsw netdevs. However, we can still
 971         * have other FIBs queued. Flush the queue before flushing
 972         * the device's tables. No need for locks, as we're the only
 973         * writer.
 974         */
 975        mlxsw_core_flush_owq();
 976        mlxsw_sp_router_fib_flush(mlxsw_sp);
 977        kfree(mlxsw_sp->router->vrs);
 978}
 979
 980static struct net_device *
 981__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
 982{
 983        struct ip_tunnel *tun = netdev_priv(ol_dev);
 984        struct net *net = dev_net(ol_dev);
 985
 986        return __dev_get_by_index(net, tun->parms.link);
 987}
 988
 989u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
 990{
 991        struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
 992
 993        if (d)
 994                return l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
 995        else
 996                return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN;
 997}
 998
 999static struct mlxsw_sp_rif *
1000mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
1001                    const struct mlxsw_sp_rif_params *params,
1002                    struct netlink_ext_ack *extack);
1003
1004static struct mlxsw_sp_rif_ipip_lb *
1005mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
1006                                enum mlxsw_sp_ipip_type ipipt,
1007                                struct net_device *ol_dev,
1008                                struct netlink_ext_ack *extack)
1009{
1010        struct mlxsw_sp_rif_params_ipip_lb lb_params;
1011        const struct mlxsw_sp_ipip_ops *ipip_ops;
1012        struct mlxsw_sp_rif *rif;
1013
1014        ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1015        lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1016                .common.dev = ol_dev,
1017                .common.lag = false,
1018                .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1019        };
1020
1021        rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1022        if (IS_ERR(rif))
1023                return ERR_CAST(rif);
1024        return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1025}
1026
1027static struct mlxsw_sp_ipip_entry *
1028mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1029                          enum mlxsw_sp_ipip_type ipipt,
1030                          struct net_device *ol_dev)
1031{
1032        const struct mlxsw_sp_ipip_ops *ipip_ops;
1033        struct mlxsw_sp_ipip_entry *ipip_entry;
1034        struct mlxsw_sp_ipip_entry *ret = NULL;
1035
1036        ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1037        ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1038        if (!ipip_entry)
1039                return ERR_PTR(-ENOMEM);
1040
1041        ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1042                                                            ol_dev, NULL);
1043        if (IS_ERR(ipip_entry->ol_lb)) {
1044                ret = ERR_CAST(ipip_entry->ol_lb);
1045                goto err_ol_ipip_lb_create;
1046        }
1047
1048        ipip_entry->ipipt = ipipt;
1049        ipip_entry->ol_dev = ol_dev;
1050
1051        switch (ipip_ops->ul_proto) {
1052        case MLXSW_SP_L3_PROTO_IPV4:
1053                ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1054                break;
1055        case MLXSW_SP_L3_PROTO_IPV6:
1056                WARN_ON(1);
1057                break;
1058        }
1059
1060        return ipip_entry;
1061
1062err_ol_ipip_lb_create:
1063        kfree(ipip_entry);
1064        return ret;
1065}
1066
1067static void
1068mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1069{
1070        mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1071        kfree(ipip_entry);
1072}
1073
1074static bool
1075mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1076                                  const enum mlxsw_sp_l3proto ul_proto,
1077                                  union mlxsw_sp_l3addr saddr,
1078                                  u32 ul_tb_id,
1079                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1080{
1081        u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1082        enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1083        union mlxsw_sp_l3addr tun_saddr;
1084
1085        if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1086                return false;
1087
1088        tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1089        return tun_ul_tb_id == ul_tb_id &&
1090               mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1091}
1092
1093static int
1094mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1095                              struct mlxsw_sp_fib_entry *fib_entry,
1096                              struct mlxsw_sp_ipip_entry *ipip_entry)
1097{
1098        u32 tunnel_index;
1099        int err;
1100
1101        err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1102                                  1, &tunnel_index);
1103        if (err)
1104                return err;
1105
1106        ipip_entry->decap_fib_entry = fib_entry;
1107        fib_entry->decap.ipip_entry = ipip_entry;
1108        fib_entry->decap.tunnel_index = tunnel_index;
1109        return 0;
1110}
1111
1112static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1113                                          struct mlxsw_sp_fib_entry *fib_entry)
1114{
1115        /* Unlink this node from the IPIP entry that it's the decap entry of. */
1116        fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1117        fib_entry->decap.ipip_entry = NULL;
1118        mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1119                           1, fib_entry->decap.tunnel_index);
1120}
1121
1122static struct mlxsw_sp_fib_node *
1123mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1124                         size_t addr_len, unsigned char prefix_len);
1125static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1126                                     struct mlxsw_sp_fib_entry *fib_entry);
1127
1128static void
1129mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1130                                 struct mlxsw_sp_ipip_entry *ipip_entry)
1131{
1132        struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1133
1134        mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1135        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1136
1137        mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1138}
1139
1140static void
1141mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1142                                  struct mlxsw_sp_ipip_entry *ipip_entry,
1143                                  struct mlxsw_sp_fib_entry *decap_fib_entry)
1144{
1145        if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1146                                          ipip_entry))
1147                return;
1148        decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1149
1150        if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1151                mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1152}
1153
1154static struct mlxsw_sp_fib_entry *
1155mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1156                                     enum mlxsw_sp_l3proto proto,
1157                                     const union mlxsw_sp_l3addr *addr,
1158                                     enum mlxsw_sp_fib_entry_type type)
1159{
1160        struct mlxsw_sp_fib_entry *fib_entry;
1161        struct mlxsw_sp_fib_node *fib_node;
1162        unsigned char addr_prefix_len;
1163        struct mlxsw_sp_fib *fib;
1164        struct mlxsw_sp_vr *vr;
1165        const void *addrp;
1166        size_t addr_len;
1167        u32 addr4;
1168
1169        vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1170        if (!vr)
1171                return NULL;
1172        fib = mlxsw_sp_vr_fib(vr, proto);
1173
1174        switch (proto) {
1175        case MLXSW_SP_L3_PROTO_IPV4:
1176                addr4 = be32_to_cpu(addr->addr4);
1177                addrp = &addr4;
1178                addr_len = 4;
1179                addr_prefix_len = 32;
1180                break;
1181        case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
1182        default:
1183                WARN_ON(1);
1184                return NULL;
1185        }
1186
1187        fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1188                                            addr_prefix_len);
1189        if (!fib_node || list_empty(&fib_node->entry_list))
1190                return NULL;
1191
1192        fib_entry = list_first_entry(&fib_node->entry_list,
1193                                     struct mlxsw_sp_fib_entry, list);
1194        if (fib_entry->type != type)
1195                return NULL;
1196
1197        return fib_entry;
1198}
1199
1200/* Given an IPIP entry, find the corresponding decap route. */
1201static struct mlxsw_sp_fib_entry *
1202mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1203                               struct mlxsw_sp_ipip_entry *ipip_entry)
1204{
1205        static struct mlxsw_sp_fib_node *fib_node;
1206        const struct mlxsw_sp_ipip_ops *ipip_ops;
1207        struct mlxsw_sp_fib_entry *fib_entry;
1208        unsigned char saddr_prefix_len;
1209        union mlxsw_sp_l3addr saddr;
1210        struct mlxsw_sp_fib *ul_fib;
1211        struct mlxsw_sp_vr *ul_vr;
1212        const void *saddrp;
1213        size_t saddr_len;
1214        u32 ul_tb_id;
1215        u32 saddr4;
1216
1217        ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1218
1219        ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1220        ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1221        if (!ul_vr)
1222                return NULL;
1223
1224        ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1225        saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1226                                           ipip_entry->ol_dev);
1227
1228        switch (ipip_ops->ul_proto) {
1229        case MLXSW_SP_L3_PROTO_IPV4:
1230                saddr4 = be32_to_cpu(saddr.addr4);
1231                saddrp = &saddr4;
1232                saddr_len = 4;
1233                saddr_prefix_len = 32;
1234                break;
1235        case MLXSW_SP_L3_PROTO_IPV6:
1236                WARN_ON(1);
1237                return NULL;
1238        }
1239
1240        fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1241                                            saddr_prefix_len);
1242        if (!fib_node || list_empty(&fib_node->entry_list))
1243                return NULL;
1244
1245        fib_entry = list_first_entry(&fib_node->entry_list,
1246                                     struct mlxsw_sp_fib_entry, list);
1247        if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1248                return NULL;
1249
1250        return fib_entry;
1251}
1252
1253static struct mlxsw_sp_ipip_entry *
1254mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1255                           enum mlxsw_sp_ipip_type ipipt,
1256                           struct net_device *ol_dev)
1257{
1258        struct mlxsw_sp_ipip_entry *ipip_entry;
1259
1260        ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1261        if (IS_ERR(ipip_entry))
1262                return ipip_entry;
1263
1264        list_add_tail(&ipip_entry->ipip_list_node,
1265                      &mlxsw_sp->router->ipip_list);
1266
1267        return ipip_entry;
1268}
1269
1270static void
1271mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1272                            struct mlxsw_sp_ipip_entry *ipip_entry)
1273{
1274        list_del(&ipip_entry->ipip_list_node);
1275        mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1276}
1277
1278static bool
1279mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1280                                  const struct net_device *ul_dev,
1281                                  enum mlxsw_sp_l3proto ul_proto,
1282                                  union mlxsw_sp_l3addr ul_dip,
1283                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1284{
1285        u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1286        enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1287
1288        if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1289                return false;
1290
1291        return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1292                                                 ul_tb_id, ipip_entry);
1293}
1294
1295/* Given decap parameters, find the corresponding IPIP entry. */
1296static struct mlxsw_sp_ipip_entry *
1297mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1298                                  const struct net_device *ul_dev,
1299                                  enum mlxsw_sp_l3proto ul_proto,
1300                                  union mlxsw_sp_l3addr ul_dip)
1301{
1302        struct mlxsw_sp_ipip_entry *ipip_entry;
1303
1304        list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1305                            ipip_list_node)
1306                if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1307                                                      ul_proto, ul_dip,
1308                                                      ipip_entry))
1309                        return ipip_entry;
1310
1311        return NULL;
1312}
1313
1314static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1315                                      const struct net_device *dev,
1316                                      enum mlxsw_sp_ipip_type *p_type)
1317{
1318        struct mlxsw_sp_router *router = mlxsw_sp->router;
1319        const struct mlxsw_sp_ipip_ops *ipip_ops;
1320        enum mlxsw_sp_ipip_type ipipt;
1321
1322        for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1323                ipip_ops = router->ipip_ops_arr[ipipt];
1324                if (dev->type == ipip_ops->dev_type) {
1325                        if (p_type)
1326                                *p_type = ipipt;
1327                        return true;
1328                }
1329        }
1330        return false;
1331}
1332
1333bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1334                                const struct net_device *dev)
1335{
1336        return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1337}
1338
1339static struct mlxsw_sp_ipip_entry *
1340mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1341                                   const struct net_device *ol_dev)
1342{
1343        struct mlxsw_sp_ipip_entry *ipip_entry;
1344
1345        list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1346                            ipip_list_node)
1347                if (ipip_entry->ol_dev == ol_dev)
1348                        return ipip_entry;
1349
1350        return NULL;
1351}
1352
1353static struct mlxsw_sp_ipip_entry *
1354mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1355                                   const struct net_device *ul_dev,
1356                                   struct mlxsw_sp_ipip_entry *start)
1357{
1358        struct mlxsw_sp_ipip_entry *ipip_entry;
1359
1360        ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1361                                        ipip_list_node);
1362        list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1363                                     ipip_list_node) {
1364                struct net_device *ipip_ul_dev =
1365                        __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1366
1367                if (ipip_ul_dev == ul_dev)
1368                        return ipip_entry;
1369        }
1370
1371        return NULL;
1372}
1373
1374bool mlxsw_sp_netdev_is_ipip_ul(const struct mlxsw_sp *mlxsw_sp,
1375                                const struct net_device *dev)
1376{
1377        return mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1378}
1379
1380static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1381                                                const struct net_device *ol_dev,
1382                                                enum mlxsw_sp_ipip_type ipipt)
1383{
1384        const struct mlxsw_sp_ipip_ops *ops
1385                = mlxsw_sp->router->ipip_ops_arr[ipipt];
1386
1387        /* For deciding whether decap should be offloaded, we don't care about
1388         * overlay protocol, so ask whether either one is supported.
1389         */
1390        return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1391               ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1392}
1393
1394static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1395                                                struct net_device *ol_dev)
1396{
1397        struct mlxsw_sp_ipip_entry *ipip_entry;
1398        enum mlxsw_sp_l3proto ul_proto;
1399        enum mlxsw_sp_ipip_type ipipt;
1400        union mlxsw_sp_l3addr saddr;
1401        u32 ul_tb_id;
1402
1403        mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1404        if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1405                ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1406                ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1407                saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1408                if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1409                                                          saddr, ul_tb_id,
1410                                                          NULL)) {
1411                        ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1412                                                                ol_dev);
1413                        if (IS_ERR(ipip_entry))
1414                                return PTR_ERR(ipip_entry);
1415                }
1416        }
1417
1418        return 0;
1419}
1420
1421static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1422                                                   struct net_device *ol_dev)
1423{
1424        struct mlxsw_sp_ipip_entry *ipip_entry;
1425
1426        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1427        if (ipip_entry)
1428                mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1429}
1430
1431static void
1432mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1433                                struct mlxsw_sp_ipip_entry *ipip_entry)
1434{
1435        struct mlxsw_sp_fib_entry *decap_fib_entry;
1436
1437        decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1438        if (decap_fib_entry)
1439                mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1440                                                  decap_fib_entry);
1441}
1442
1443static int
1444mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1445                        u16 ul_rif_id, bool enable)
1446{
1447        struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1448        struct mlxsw_sp_rif *rif = &lb_rif->common;
1449        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1450        char ritr_pl[MLXSW_REG_RITR_LEN];
1451        u32 saddr4;
1452
1453        switch (lb_cf.ul_protocol) {
1454        case MLXSW_SP_L3_PROTO_IPV4:
1455                saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1456                mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1457                                    rif->rif_index, rif->vr_id, rif->dev->mtu);
1458                mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1459                            MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1460                            ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1461                break;
1462
1463        case MLXSW_SP_L3_PROTO_IPV6:
1464                return -EAFNOSUPPORT;
1465        }
1466
1467        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1468}
1469
1470static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1471                                                 struct net_device *ol_dev)
1472{
1473        struct mlxsw_sp_ipip_entry *ipip_entry;
1474        struct mlxsw_sp_rif_ipip_lb *lb_rif;
1475        int err = 0;
1476
1477        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1478        if (ipip_entry) {
1479                lb_rif = ipip_entry->ol_lb;
1480                err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1481                                              lb_rif->ul_rif_id, true);
1482                if (err)
1483                        goto out;
1484                lb_rif->common.mtu = ol_dev->mtu;
1485        }
1486
1487out:
1488        return err;
1489}
1490
1491static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1492                                                struct net_device *ol_dev)
1493{
1494        struct mlxsw_sp_ipip_entry *ipip_entry;
1495
1496        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1497        if (ipip_entry)
1498                mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1499}
1500
1501static void
1502mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1503                                  struct mlxsw_sp_ipip_entry *ipip_entry)
1504{
1505        if (ipip_entry->decap_fib_entry)
1506                mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1507}
1508
1509static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1510                                                  struct net_device *ol_dev)
1511{
1512        struct mlxsw_sp_ipip_entry *ipip_entry;
1513
1514        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1515        if (ipip_entry)
1516                mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1517}
1518
1519static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1520                                         struct mlxsw_sp_rif *old_rif,
1521                                         struct mlxsw_sp_rif *new_rif);
1522static int
1523mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1524                                 struct mlxsw_sp_ipip_entry *ipip_entry,
1525                                 bool keep_encap,
1526                                 struct netlink_ext_ack *extack)
1527{
1528        struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1529        struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1530
1531        new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1532                                                     ipip_entry->ipipt,
1533                                                     ipip_entry->ol_dev,
1534                                                     extack);
1535        if (IS_ERR(new_lb_rif))
1536                return PTR_ERR(new_lb_rif);
1537        ipip_entry->ol_lb = new_lb_rif;
1538
1539        if (keep_encap)
1540                mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1541                                             &new_lb_rif->common);
1542
1543        mlxsw_sp_rif_destroy(&old_lb_rif->common);
1544
1545        return 0;
1546}
1547
1548static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1549                                        struct mlxsw_sp_rif *rif);
1550
1551/**
1552 * Update the offload related to an IPIP entry. This always updates decap, and
1553 * in addition to that it also:
1554 * @recreate_loopback: recreates the associated loopback RIF
1555 * @keep_encap: updates next hops that use the tunnel netdevice. This is only
1556 *              relevant when recreate_loopback is true.
1557 * @update_nexthops: updates next hops, keeping the current loopback RIF. This
1558 *                   is only relevant when recreate_loopback is false.
1559 */
1560int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1561                                        struct mlxsw_sp_ipip_entry *ipip_entry,
1562                                        bool recreate_loopback,
1563                                        bool keep_encap,
1564                                        bool update_nexthops,
1565                                        struct netlink_ext_ack *extack)
1566{
1567        int err;
1568
1569        /* RIFs can't be edited, so to update loopback, we need to destroy and
1570         * recreate it. That creates a window of opportunity where RALUE and
1571         * RATR registers end up referencing a RIF that's already gone. RATRs
1572         * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1573         * of RALUE, demote the decap route back.
1574         */
1575        if (ipip_entry->decap_fib_entry)
1576                mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1577
1578        if (recreate_loopback) {
1579                err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1580                                                       keep_encap, extack);
1581                if (err)
1582                        return err;
1583        } else if (update_nexthops) {
1584                mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1585                                            &ipip_entry->ol_lb->common);
1586        }
1587
1588        if (ipip_entry->ol_dev->flags & IFF_UP)
1589                mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1590
1591        return 0;
1592}
1593
1594static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1595                                                struct net_device *ol_dev,
1596                                                struct netlink_ext_ack *extack)
1597{
1598        struct mlxsw_sp_ipip_entry *ipip_entry =
1599                mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1600        enum mlxsw_sp_l3proto ul_proto;
1601        union mlxsw_sp_l3addr saddr;
1602        u32 ul_tb_id;
1603
1604        if (!ipip_entry)
1605                return 0;
1606
1607        /* For flat configuration cases, moving overlay to a different VRF might
1608         * cause local address conflict, and the conflicting tunnels need to be
1609         * demoted.
1610         */
1611        ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1612        ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1613        saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1614        if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1615                                                 saddr, ul_tb_id,
1616                                                 ipip_entry)) {
1617                mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1618                return 0;
1619        }
1620
1621        return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1622                                                   true, false, false, extack);
1623}
1624
1625static int
1626mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1627                                     struct mlxsw_sp_ipip_entry *ipip_entry,
1628                                     struct net_device *ul_dev,
1629                                     struct netlink_ext_ack *extack)
1630{
1631        return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1632                                                   true, true, false, extack);
1633}
1634
1635static int
1636mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1637                                    struct mlxsw_sp_ipip_entry *ipip_entry,
1638                                    struct net_device *ul_dev)
1639{
1640        return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1641                                                   false, false, true, NULL);
1642}
1643
1644static int
1645mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1646                                      struct mlxsw_sp_ipip_entry *ipip_entry,
1647                                      struct net_device *ul_dev)
1648{
1649        /* A down underlay device causes encapsulated packets to not be
1650         * forwarded, but decap still works. So refresh next hops without
1651         * touching anything else.
1652         */
1653        return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1654                                                   false, false, true, NULL);
1655}
1656
1657static int
1658mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1659                                        struct net_device *ol_dev,
1660                                        struct netlink_ext_ack *extack)
1661{
1662        const struct mlxsw_sp_ipip_ops *ipip_ops;
1663        struct mlxsw_sp_ipip_entry *ipip_entry;
1664        int err;
1665
1666        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1667        if (!ipip_entry)
1668                /* A change might make a tunnel eligible for offloading, but
1669                 * that is currently not implemented. What falls to slow path
1670                 * stays there.
1671                 */
1672                return 0;
1673
1674        /* A change might make a tunnel not eligible for offloading. */
1675        if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1676                                                 ipip_entry->ipipt)) {
1677                mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1678                return 0;
1679        }
1680
1681        ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1682        err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1683        return err;
1684}
1685
1686void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1687                                       struct mlxsw_sp_ipip_entry *ipip_entry)
1688{
1689        struct net_device *ol_dev = ipip_entry->ol_dev;
1690
1691        if (ol_dev->flags & IFF_UP)
1692                mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1693        mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1694}
1695
1696/* The configuration where several tunnels have the same local address in the
1697 * same underlay table needs special treatment in the HW. That is currently not
1698 * implemented in the driver. This function finds and demotes the first tunnel
1699 * with a given source address, except the one passed in in the argument
1700 * `except'.
1701 */
1702bool
1703mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1704                                     enum mlxsw_sp_l3proto ul_proto,
1705                                     union mlxsw_sp_l3addr saddr,
1706                                     u32 ul_tb_id,
1707                                     const struct mlxsw_sp_ipip_entry *except)
1708{
1709        struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1710
1711        list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1712                                 ipip_list_node) {
1713                if (ipip_entry != except &&
1714                    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1715                                                      ul_tb_id, ipip_entry)) {
1716                        mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1717                        return true;
1718                }
1719        }
1720
1721        return false;
1722}
1723
1724static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1725                                                     struct net_device *ul_dev)
1726{
1727        struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1728
1729        list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1730                                 ipip_list_node) {
1731                struct net_device *ipip_ul_dev =
1732                        __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev);
1733
1734                if (ipip_ul_dev == ul_dev)
1735                        mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1736        }
1737}
1738
1739int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1740                                     struct net_device *ol_dev,
1741                                     unsigned long event,
1742                                     struct netdev_notifier_info *info)
1743{
1744        struct netdev_notifier_changeupper_info *chup;
1745        struct netlink_ext_ack *extack;
1746
1747        switch (event) {
1748        case NETDEV_REGISTER:
1749                return mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1750        case NETDEV_UNREGISTER:
1751                mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1752                return 0;
1753        case NETDEV_UP:
1754                mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1755                return 0;
1756        case NETDEV_DOWN:
1757                mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1758                return 0;
1759        case NETDEV_CHANGEUPPER:
1760                chup = container_of(info, typeof(*chup), info);
1761                extack = info->extack;
1762                if (netif_is_l3_master(chup->upper_dev))
1763                        return mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1764                                                                    ol_dev,
1765                                                                    extack);
1766                return 0;
1767        case NETDEV_CHANGE:
1768                extack = info->extack;
1769                return mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1770                                                               ol_dev, extack);
1771        case NETDEV_CHANGEMTU:
1772                return mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1773        }
1774        return 0;
1775}
1776
1777static int
1778__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1779                                   struct mlxsw_sp_ipip_entry *ipip_entry,
1780                                   struct net_device *ul_dev,
1781                                   unsigned long event,
1782                                   struct netdev_notifier_info *info)
1783{
1784        struct netdev_notifier_changeupper_info *chup;
1785        struct netlink_ext_ack *extack;
1786
1787        switch (event) {
1788        case NETDEV_CHANGEUPPER:
1789                chup = container_of(info, typeof(*chup), info);
1790                extack = info->extack;
1791                if (netif_is_l3_master(chup->upper_dev))
1792                        return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1793                                                                    ipip_entry,
1794                                                                    ul_dev,
1795                                                                    extack);
1796                break;
1797
1798        case NETDEV_UP:
1799                return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1800                                                           ul_dev);
1801        case NETDEV_DOWN:
1802                return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1803                                                             ipip_entry,
1804                                                             ul_dev);
1805        }
1806        return 0;
1807}
1808
1809int
1810mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1811                                 struct net_device *ul_dev,
1812                                 unsigned long event,
1813                                 struct netdev_notifier_info *info)
1814{
1815        struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1816        int err;
1817
1818        while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1819                                                                ul_dev,
1820                                                                ipip_entry))) {
1821                err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1822                                                         ul_dev, event, info);
1823                if (err) {
1824                        mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1825                                                                 ul_dev);
1826                        return err;
1827                }
1828        }
1829
1830        return 0;
1831}
1832
1833int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1834                                      enum mlxsw_sp_l3proto ul_proto,
1835                                      const union mlxsw_sp_l3addr *ul_sip,
1836                                      u32 tunnel_index)
1837{
1838        enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1839        struct mlxsw_sp_fib_entry *fib_entry;
1840        int err;
1841
1842        /* It is valid to create a tunnel with a local IP and only later
1843         * assign this IP address to a local interface
1844         */
1845        fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1846                                                         ul_proto, ul_sip,
1847                                                         type);
1848        if (!fib_entry)
1849                return 0;
1850
1851        fib_entry->decap.tunnel_index = tunnel_index;
1852        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1853
1854        err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1855        if (err)
1856                goto err_fib_entry_update;
1857
1858        return 0;
1859
1860err_fib_entry_update:
1861        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1862        mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1863        return err;
1864}
1865
1866void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1867                                      enum mlxsw_sp_l3proto ul_proto,
1868                                      const union mlxsw_sp_l3addr *ul_sip)
1869{
1870        enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1871        struct mlxsw_sp_fib_entry *fib_entry;
1872
1873        fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1874                                                         ul_proto, ul_sip,
1875                                                         type);
1876        if (!fib_entry)
1877                return;
1878
1879        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1880        mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1881}
1882
1883struct mlxsw_sp_neigh_key {
1884        struct neighbour *n;
1885};
1886
1887struct mlxsw_sp_neigh_entry {
1888        struct list_head rif_list_node;
1889        struct rhash_head ht_node;
1890        struct mlxsw_sp_neigh_key key;
1891        u16 rif;
1892        bool connected;
1893        unsigned char ha[ETH_ALEN];
1894        struct list_head nexthop_list; /* list of nexthops using
1895                                        * this neigh entry
1896                                        */
1897        struct list_head nexthop_neighs_list_node;
1898        unsigned int counter_index;
1899        bool counter_valid;
1900};
1901
1902static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1903        .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1904        .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1905        .key_len = sizeof(struct mlxsw_sp_neigh_key),
1906};
1907
1908struct mlxsw_sp_neigh_entry *
1909mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1910                        struct mlxsw_sp_neigh_entry *neigh_entry)
1911{
1912        if (!neigh_entry) {
1913                if (list_empty(&rif->neigh_list))
1914                        return NULL;
1915                else
1916                        return list_first_entry(&rif->neigh_list,
1917                                                typeof(*neigh_entry),
1918                                                rif_list_node);
1919        }
1920        if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1921                return NULL;
1922        return list_next_entry(neigh_entry, rif_list_node);
1923}
1924
1925int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1926{
1927        return neigh_entry->key.n->tbl->family;
1928}
1929
1930unsigned char *
1931mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1932{
1933        return neigh_entry->ha;
1934}
1935
1936u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1937{
1938        struct neighbour *n;
1939
1940        n = neigh_entry->key.n;
1941        return ntohl(*((__be32 *) n->primary_key));
1942}
1943
1944struct in6_addr *
1945mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1946{
1947        struct neighbour *n;
1948
1949        n = neigh_entry->key.n;
1950        return (struct in6_addr *) &n->primary_key;
1951}
1952
1953int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
1954                               struct mlxsw_sp_neigh_entry *neigh_entry,
1955                               u64 *p_counter)
1956{
1957        if (!neigh_entry->counter_valid)
1958                return -EINVAL;
1959
1960        return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
1961                                         p_counter, NULL);
1962}
1963
1964static struct mlxsw_sp_neigh_entry *
1965mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
1966                           u16 rif)
1967{
1968        struct mlxsw_sp_neigh_entry *neigh_entry;
1969
1970        neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
1971        if (!neigh_entry)
1972                return NULL;
1973
1974        neigh_entry->key.n = n;
1975        neigh_entry->rif = rif;
1976        INIT_LIST_HEAD(&neigh_entry->nexthop_list);
1977
1978        return neigh_entry;
1979}
1980
1981static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
1982{
1983        kfree(neigh_entry);
1984}
1985
1986static int
1987mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
1988                            struct mlxsw_sp_neigh_entry *neigh_entry)
1989{
1990        return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
1991                                      &neigh_entry->ht_node,
1992                                      mlxsw_sp_neigh_ht_params);
1993}
1994
1995static void
1996mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
1997                            struct mlxsw_sp_neigh_entry *neigh_entry)
1998{
1999        rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2000                               &neigh_entry->ht_node,
2001                               mlxsw_sp_neigh_ht_params);
2002}
2003
2004static bool
2005mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2006                                    struct mlxsw_sp_neigh_entry *neigh_entry)
2007{
2008        struct devlink *devlink;
2009        const char *table_name;
2010
2011        switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2012        case AF_INET:
2013                table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2014                break;
2015        case AF_INET6:
2016                table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2017                break;
2018        default:
2019                WARN_ON(1);
2020                return false;
2021        }
2022
2023        devlink = priv_to_devlink(mlxsw_sp->core);
2024        return devlink_dpipe_table_counter_enabled(devlink, table_name);
2025}
2026
2027static void
2028mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2029                             struct mlxsw_sp_neigh_entry *neigh_entry)
2030{
2031        if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2032                return;
2033
2034        if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2035                return;
2036
2037        neigh_entry->counter_valid = true;
2038}
2039
2040static void
2041mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2042                            struct mlxsw_sp_neigh_entry *neigh_entry)
2043{
2044        if (!neigh_entry->counter_valid)
2045                return;
2046        mlxsw_sp_flow_counter_free(mlxsw_sp,
2047                                   neigh_entry->counter_index);
2048        neigh_entry->counter_valid = false;
2049}
2050
2051static struct mlxsw_sp_neigh_entry *
2052mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2053{
2054        struct mlxsw_sp_neigh_entry *neigh_entry;
2055        struct mlxsw_sp_rif *rif;
2056        int err;
2057
2058        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2059        if (!rif)
2060                return ERR_PTR(-EINVAL);
2061
2062        neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2063        if (!neigh_entry)
2064                return ERR_PTR(-ENOMEM);
2065
2066        err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2067        if (err)
2068                goto err_neigh_entry_insert;
2069
2070        mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2071        list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2072
2073        return neigh_entry;
2074
2075err_neigh_entry_insert:
2076        mlxsw_sp_neigh_entry_free(neigh_entry);
2077        return ERR_PTR(err);
2078}
2079
2080static void
2081mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2082                             struct mlxsw_sp_neigh_entry *neigh_entry)
2083{
2084        list_del(&neigh_entry->rif_list_node);
2085        mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2086        mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2087        mlxsw_sp_neigh_entry_free(neigh_entry);
2088}
2089
2090static struct mlxsw_sp_neigh_entry *
2091mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2092{
2093        struct mlxsw_sp_neigh_key key;
2094
2095        key.n = n;
2096        return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2097                                      &key, mlxsw_sp_neigh_ht_params);
2098}
2099
2100static void
2101mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2102{
2103        unsigned long interval;
2104
2105#if IS_ENABLED(CONFIG_IPV6)
2106        interval = min_t(unsigned long,
2107                         NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2108                         NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2109#else
2110        interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2111#endif
2112        mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2113}
2114
2115static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2116                                                   char *rauhtd_pl,
2117                                                   int ent_index)
2118{
2119        struct net_device *dev;
2120        struct neighbour *n;
2121        __be32 dipn;
2122        u32 dip;
2123        u16 rif;
2124
2125        mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2126
2127        if (!mlxsw_sp->router->rifs[rif]) {
2128                dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2129                return;
2130        }
2131
2132        dipn = htonl(dip);
2133        dev = mlxsw_sp->router->rifs[rif]->dev;
2134        n = neigh_lookup(&arp_tbl, &dipn, dev);
2135        if (!n)
2136                return;
2137
2138        netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2139        neigh_event_send(n, NULL);
2140        neigh_release(n);
2141}
2142
2143#if IS_ENABLED(CONFIG_IPV6)
2144static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2145                                                   char *rauhtd_pl,
2146                                                   int rec_index)
2147{
2148        struct net_device *dev;
2149        struct neighbour *n;
2150        struct in6_addr dip;
2151        u16 rif;
2152
2153        mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2154                                         (char *) &dip);
2155
2156        if (!mlxsw_sp->router->rifs[rif]) {
2157                dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2158                return;
2159        }
2160
2161        dev = mlxsw_sp->router->rifs[rif]->dev;
2162        n = neigh_lookup(&nd_tbl, &dip, dev);
2163        if (!n)
2164                return;
2165
2166        netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2167        neigh_event_send(n, NULL);
2168        neigh_release(n);
2169}
2170#else
2171static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2172                                                   char *rauhtd_pl,
2173                                                   int rec_index)
2174{
2175}
2176#endif
2177
2178static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2179                                                   char *rauhtd_pl,
2180                                                   int rec_index)
2181{
2182        u8 num_entries;
2183        int i;
2184
2185        num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2186                                                                rec_index);
2187        /* Hardware starts counting at 0, so add 1. */
2188        num_entries++;
2189
2190        /* Each record consists of several neighbour entries. */
2191        for (i = 0; i < num_entries; i++) {
2192                int ent_index;
2193
2194                ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2195                mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2196                                                       ent_index);
2197        }
2198
2199}
2200
2201static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2202                                                   char *rauhtd_pl,
2203                                                   int rec_index)
2204{
2205        /* One record contains one entry. */
2206        mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2207                                               rec_index);
2208}
2209
2210static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2211                                              char *rauhtd_pl, int rec_index)
2212{
2213        switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2214        case MLXSW_REG_RAUHTD_TYPE_IPV4:
2215                mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2216                                                       rec_index);
2217                break;
2218        case MLXSW_REG_RAUHTD_TYPE_IPV6:
2219                mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2220                                                       rec_index);
2221                break;
2222        }
2223}
2224
2225static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2226{
2227        u8 num_rec, last_rec_index, num_entries;
2228
2229        num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2230        last_rec_index = num_rec - 1;
2231
2232        if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2233                return false;
2234        if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2235            MLXSW_REG_RAUHTD_TYPE_IPV6)
2236                return true;
2237
2238        num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2239                                                                last_rec_index);
2240        if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2241                return true;
2242        return false;
2243}
2244
2245static int
2246__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2247                                       char *rauhtd_pl,
2248                                       enum mlxsw_reg_rauhtd_type type)
2249{
2250        int i, num_rec;
2251        int err;
2252
2253        /* Make sure the neighbour's netdev isn't removed in the
2254         * process.
2255         */
2256        rtnl_lock();
2257        do {
2258                mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2259                err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2260                                      rauhtd_pl);
2261                if (err) {
2262                        dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2263                        break;
2264                }
2265                num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2266                for (i = 0; i < num_rec; i++)
2267                        mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2268                                                          i);
2269        } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2270        rtnl_unlock();
2271
2272        return err;
2273}
2274
2275static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2276{
2277        enum mlxsw_reg_rauhtd_type type;
2278        char *rauhtd_pl;
2279        int err;
2280
2281        rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2282        if (!rauhtd_pl)
2283                return -ENOMEM;
2284
2285        type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2286        err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2287        if (err)
2288                goto out;
2289
2290        type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2291        err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2292out:
2293        kfree(rauhtd_pl);
2294        return err;
2295}
2296
2297static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2298{
2299        struct mlxsw_sp_neigh_entry *neigh_entry;
2300
2301        /* Take RTNL mutex here to prevent lists from changes */
2302        rtnl_lock();
2303        list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2304                            nexthop_neighs_list_node)
2305                /* If this neigh have nexthops, make the kernel think this neigh
2306                 * is active regardless of the traffic.
2307                 */
2308                neigh_event_send(neigh_entry->key.n, NULL);
2309        rtnl_unlock();
2310}
2311
2312static void
2313mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2314{
2315        unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2316
2317        mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2318                               msecs_to_jiffies(interval));
2319}
2320
2321static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2322{
2323        struct mlxsw_sp_router *router;
2324        int err;
2325
2326        router = container_of(work, struct mlxsw_sp_router,
2327                              neighs_update.dw.work);
2328        err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2329        if (err)
2330                dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2331
2332        mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2333
2334        mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2335}
2336
2337static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2338{
2339        struct mlxsw_sp_neigh_entry *neigh_entry;
2340        struct mlxsw_sp_router *router;
2341
2342        router = container_of(work, struct mlxsw_sp_router,
2343                              nexthop_probe_dw.work);
2344        /* Iterate over nexthop neighbours, find those who are unresolved and
2345         * send arp on them. This solves the chicken-egg problem when
2346         * the nexthop wouldn't get offloaded until the neighbor is resolved
2347         * but it wouldn't get resolved ever in case traffic is flowing in HW
2348         * using different nexthop.
2349         *
2350         * Take RTNL mutex here to prevent lists from changes.
2351         */
2352        rtnl_lock();
2353        list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2354                            nexthop_neighs_list_node)
2355                if (!neigh_entry->connected)
2356                        neigh_event_send(neigh_entry->key.n, NULL);
2357        rtnl_unlock();
2358
2359        mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2360                               MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2361}
2362
2363static void
2364mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2365                              struct mlxsw_sp_neigh_entry *neigh_entry,
2366                              bool removing, bool dead);
2367
2368static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2369{
2370        return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2371                        MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2372}
2373
2374static int
2375mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2376                                struct mlxsw_sp_neigh_entry *neigh_entry,
2377                                enum mlxsw_reg_rauht_op op)
2378{
2379        struct neighbour *n = neigh_entry->key.n;
2380        u32 dip = ntohl(*((__be32 *) n->primary_key));
2381        char rauht_pl[MLXSW_REG_RAUHT_LEN];
2382
2383        mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2384                              dip);
2385        if (neigh_entry->counter_valid)
2386                mlxsw_reg_rauht_pack_counter(rauht_pl,
2387                                             neigh_entry->counter_index);
2388        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2389}
2390
2391static int
2392mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2393                                struct mlxsw_sp_neigh_entry *neigh_entry,
2394                                enum mlxsw_reg_rauht_op op)
2395{
2396        struct neighbour *n = neigh_entry->key.n;
2397        char rauht_pl[MLXSW_REG_RAUHT_LEN];
2398        const char *dip = n->primary_key;
2399
2400        mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2401                              dip);
2402        if (neigh_entry->counter_valid)
2403                mlxsw_reg_rauht_pack_counter(rauht_pl,
2404                                             neigh_entry->counter_index);
2405        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2406}
2407
2408bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2409{
2410        struct neighbour *n = neigh_entry->key.n;
2411
2412        /* Packets with a link-local destination address are trapped
2413         * after LPM lookup and never reach the neighbour table, so
2414         * there is no need to program such neighbours to the device.
2415         */
2416        if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2417            IPV6_ADDR_LINKLOCAL)
2418                return true;
2419        return false;
2420}
2421
2422static void
2423mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2424                            struct mlxsw_sp_neigh_entry *neigh_entry,
2425                            bool adding)
2426{
2427        enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2428        int err;
2429
2430        if (!adding && !neigh_entry->connected)
2431                return;
2432        neigh_entry->connected = adding;
2433        if (neigh_entry->key.n->tbl->family == AF_INET) {
2434                err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2435                                                      op);
2436                if (err)
2437                        return;
2438        } else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2439                if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2440                        return;
2441                err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2442                                                      op);
2443                if (err)
2444                        return;
2445        } else {
2446                WARN_ON_ONCE(1);
2447                return;
2448        }
2449
2450        if (adding)
2451                neigh_entry->key.n->flags |= NTF_OFFLOADED;
2452        else
2453                neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2454}
2455
2456void
2457mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2458                                    struct mlxsw_sp_neigh_entry *neigh_entry,
2459                                    bool adding)
2460{
2461        if (adding)
2462                mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2463        else
2464                mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2465        mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2466}
2467
2468struct mlxsw_sp_netevent_work {
2469        struct work_struct work;
2470        struct mlxsw_sp *mlxsw_sp;
2471        struct neighbour *n;
2472};
2473
2474static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2475{
2476        struct mlxsw_sp_netevent_work *net_work =
2477                container_of(work, struct mlxsw_sp_netevent_work, work);
2478        struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2479        struct mlxsw_sp_neigh_entry *neigh_entry;
2480        struct neighbour *n = net_work->n;
2481        unsigned char ha[ETH_ALEN];
2482        bool entry_connected;
2483        u8 nud_state, dead;
2484
2485        /* If these parameters are changed after we release the lock,
2486         * then we are guaranteed to receive another event letting us
2487         * know about it.
2488         */
2489        read_lock_bh(&n->lock);
2490        memcpy(ha, n->ha, ETH_ALEN);
2491        nud_state = n->nud_state;
2492        dead = n->dead;
2493        read_unlock_bh(&n->lock);
2494
2495        rtnl_lock();
2496        mlxsw_sp_span_respin(mlxsw_sp);
2497
2498        entry_connected = nud_state & NUD_VALID && !dead;
2499        neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2500        if (!entry_connected && !neigh_entry)
2501                goto out;
2502        if (!neigh_entry) {
2503                neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2504                if (IS_ERR(neigh_entry))
2505                        goto out;
2506        }
2507
2508        memcpy(neigh_entry->ha, ha, ETH_ALEN);
2509        mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2510        mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2511                                      dead);
2512
2513        if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2514                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2515
2516out:
2517        rtnl_unlock();
2518        neigh_release(n);
2519        kfree(net_work);
2520}
2521
2522static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2523
2524static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2525{
2526        struct mlxsw_sp_netevent_work *net_work =
2527                container_of(work, struct mlxsw_sp_netevent_work, work);
2528        struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2529
2530        mlxsw_sp_mp_hash_init(mlxsw_sp);
2531        kfree(net_work);
2532}
2533
2534static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2535
2536static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2537{
2538        struct mlxsw_sp_netevent_work *net_work =
2539                container_of(work, struct mlxsw_sp_netevent_work, work);
2540        struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2541
2542        __mlxsw_sp_router_init(mlxsw_sp);
2543        kfree(net_work);
2544}
2545
2546static int mlxsw_sp_router_schedule_work(struct net *net,
2547                                         struct notifier_block *nb,
2548                                         void (*cb)(struct work_struct *))
2549{
2550        struct mlxsw_sp_netevent_work *net_work;
2551        struct mlxsw_sp_router *router;
2552
2553        if (!net_eq(net, &init_net))
2554                return NOTIFY_DONE;
2555
2556        net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2557        if (!net_work)
2558                return NOTIFY_BAD;
2559
2560        router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2561        INIT_WORK(&net_work->work, cb);
2562        net_work->mlxsw_sp = router->mlxsw_sp;
2563        mlxsw_core_schedule_work(&net_work->work);
2564        return NOTIFY_DONE;
2565}
2566
2567static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2568                                          unsigned long event, void *ptr)
2569{
2570        struct mlxsw_sp_netevent_work *net_work;
2571        struct mlxsw_sp_port *mlxsw_sp_port;
2572        struct mlxsw_sp *mlxsw_sp;
2573        unsigned long interval;
2574        struct neigh_parms *p;
2575        struct neighbour *n;
2576
2577        switch (event) {
2578        case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2579                p = ptr;
2580
2581                /* We don't care about changes in the default table. */
2582                if (!p->dev || (p->tbl->family != AF_INET &&
2583                                p->tbl->family != AF_INET6))
2584                        return NOTIFY_DONE;
2585
2586                /* We are in atomic context and can't take RTNL mutex,
2587                 * so use RCU variant to walk the device chain.
2588                 */
2589                mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2590                if (!mlxsw_sp_port)
2591                        return NOTIFY_DONE;
2592
2593                mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2594                interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2595                mlxsw_sp->router->neighs_update.interval = interval;
2596
2597                mlxsw_sp_port_dev_put(mlxsw_sp_port);
2598                break;
2599        case NETEVENT_NEIGH_UPDATE:
2600                n = ptr;
2601
2602                if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2603                        return NOTIFY_DONE;
2604
2605                mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2606                if (!mlxsw_sp_port)
2607                        return NOTIFY_DONE;
2608
2609                net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2610                if (!net_work) {
2611                        mlxsw_sp_port_dev_put(mlxsw_sp_port);
2612                        return NOTIFY_BAD;
2613                }
2614
2615                INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2616                net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2617                net_work->n = n;
2618
2619                /* Take a reference to ensure the neighbour won't be
2620                 * destructed until we drop the reference in delayed
2621                 * work.
2622                 */
2623                neigh_clone(n);
2624                mlxsw_core_schedule_work(&net_work->work);
2625                mlxsw_sp_port_dev_put(mlxsw_sp_port);
2626                break;
2627        case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2628        case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2629                return mlxsw_sp_router_schedule_work(ptr, nb,
2630                                mlxsw_sp_router_mp_hash_event_work);
2631
2632        case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2633                return mlxsw_sp_router_schedule_work(ptr, nb,
2634                                mlxsw_sp_router_update_priority_work);
2635        }
2636
2637        return NOTIFY_DONE;
2638}
2639
2640static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2641{
2642        int err;
2643
2644        err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2645                              &mlxsw_sp_neigh_ht_params);
2646        if (err)
2647                return err;
2648
2649        /* Initialize the polling interval according to the default
2650         * table.
2651         */
2652        mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2653
2654        /* Create the delayed works for the activity_update */
2655        INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2656                          mlxsw_sp_router_neighs_update_work);
2657        INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2658                          mlxsw_sp_router_probe_unresolved_nexthops);
2659        mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2660        mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2661        return 0;
2662}
2663
2664static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2665{
2666        cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2667        cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2668        rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2669}
2670
2671static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2672                                         struct mlxsw_sp_rif *rif)
2673{
2674        struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2675
2676        list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2677                                 rif_list_node) {
2678                mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2679                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2680        }
2681}
2682
2683enum mlxsw_sp_nexthop_type {
2684        MLXSW_SP_NEXTHOP_TYPE_ETH,
2685        MLXSW_SP_NEXTHOP_TYPE_IPIP,
2686};
2687
2688struct mlxsw_sp_nexthop_key {
2689        struct fib_nh *fib_nh;
2690};
2691
2692struct mlxsw_sp_nexthop {
2693        struct list_head neigh_list_node; /* member of neigh entry list */
2694        struct list_head rif_list_node;
2695        struct list_head router_list_node;
2696        struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2697                                                * this belongs to
2698                                                */
2699        struct rhash_head ht_node;
2700        struct mlxsw_sp_nexthop_key key;
2701        unsigned char gw_addr[sizeof(struct in6_addr)];
2702        int ifindex;
2703        int nh_weight;
2704        int norm_nh_weight;
2705        int num_adj_entries;
2706        struct mlxsw_sp_rif *rif;
2707        u8 should_offload:1, /* set indicates this neigh is connected and
2708                              * should be put to KVD linear area of this group.
2709                              */
2710           offloaded:1, /* set in case the neigh is actually put into
2711                         * KVD linear area of this group.
2712                         */
2713           update:1; /* set indicates that MAC of this neigh should be
2714                      * updated in HW
2715                      */
2716        enum mlxsw_sp_nexthop_type type;
2717        union {
2718                struct mlxsw_sp_neigh_entry *neigh_entry;
2719                struct mlxsw_sp_ipip_entry *ipip_entry;
2720        };
2721        unsigned int counter_index;
2722        bool counter_valid;
2723};
2724
2725struct mlxsw_sp_nexthop_group {
2726        void *priv;
2727        struct rhash_head ht_node;
2728        struct list_head fib_list; /* list of fib entries that use this group */
2729        struct neigh_table *neigh_tbl;
2730        u8 adj_index_valid:1,
2731           gateway:1; /* routes using the group use a gateway */
2732        u32 adj_index;
2733        u16 ecmp_size;
2734        u16 count;
2735        int sum_norm_weight;
2736        struct mlxsw_sp_nexthop nexthops[0];
2737#define nh_rif  nexthops[0].rif
2738};
2739
2740void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2741                                    struct mlxsw_sp_nexthop *nh)
2742{
2743        struct devlink *devlink;
2744
2745        devlink = priv_to_devlink(mlxsw_sp->core);
2746        if (!devlink_dpipe_table_counter_enabled(devlink,
2747                                                 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2748                return;
2749
2750        if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2751                return;
2752
2753        nh->counter_valid = true;
2754}
2755
2756void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2757                                   struct mlxsw_sp_nexthop *nh)
2758{
2759        if (!nh->counter_valid)
2760                return;
2761        mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2762        nh->counter_valid = false;
2763}
2764
2765int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2766                                 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2767{
2768        if (!nh->counter_valid)
2769                return -EINVAL;
2770
2771        return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2772                                         p_counter, NULL);
2773}
2774
2775struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2776                                               struct mlxsw_sp_nexthop *nh)
2777{
2778        if (!nh) {
2779                if (list_empty(&router->nexthop_list))
2780                        return NULL;
2781                else
2782                        return list_first_entry(&router->nexthop_list,
2783                                                typeof(*nh), router_list_node);
2784        }
2785        if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2786                return NULL;
2787        return list_next_entry(nh, router_list_node);
2788}
2789
2790bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2791{
2792        return nh->offloaded;
2793}
2794
2795unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2796{
2797        if (!nh->offloaded)
2798                return NULL;
2799        return nh->neigh_entry->ha;
2800}
2801
2802int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2803                             u32 *p_adj_size, u32 *p_adj_hash_index)
2804{
2805        struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2806        u32 adj_hash_index = 0;
2807        int i;
2808
2809        if (!nh->offloaded || !nh_grp->adj_index_valid)
2810                return -EINVAL;
2811
2812        *p_adj_index = nh_grp->adj_index;
2813        *p_adj_size = nh_grp->ecmp_size;
2814
2815        for (i = 0; i < nh_grp->count; i++) {
2816                struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2817
2818                if (nh_iter == nh)
2819                        break;
2820                if (nh_iter->offloaded)
2821                        adj_hash_index += nh_iter->num_adj_entries;
2822        }
2823
2824        *p_adj_hash_index = adj_hash_index;
2825        return 0;
2826}
2827
2828struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2829{
2830        return nh->rif;
2831}
2832
2833bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2834{
2835        struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2836        int i;
2837
2838        for (i = 0; i < nh_grp->count; i++) {
2839                struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2840
2841                if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2842                        return true;
2843        }
2844        return false;
2845}
2846
2847static struct fib_info *
2848mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2849{
2850        return nh_grp->priv;
2851}
2852
2853struct mlxsw_sp_nexthop_group_cmp_arg {
2854        enum mlxsw_sp_l3proto proto;
2855        union {
2856                struct fib_info *fi;
2857                struct mlxsw_sp_fib6_entry *fib6_entry;
2858        };
2859};
2860
2861static bool
2862mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2863                                    const struct in6_addr *gw, int ifindex,
2864                                    int weight)
2865{
2866        int i;
2867
2868        for (i = 0; i < nh_grp->count; i++) {
2869                const struct mlxsw_sp_nexthop *nh;
2870
2871                nh = &nh_grp->nexthops[i];
2872                if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2873                    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2874                        return true;
2875        }
2876
2877        return false;
2878}
2879
2880static bool
2881mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2882                            const struct mlxsw_sp_fib6_entry *fib6_entry)
2883{
2884        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2885
2886        if (nh_grp->count != fib6_entry->nrt6)
2887                return false;
2888
2889        list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2890                struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
2891                struct in6_addr *gw;
2892                int ifindex, weight;
2893
2894                ifindex = fib6_nh->fib_nh_dev->ifindex;
2895                weight = fib6_nh->fib_nh_weight;
2896                gw = &fib6_nh->fib_nh_gw6;
2897                if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2898                                                         weight))
2899                        return false;
2900        }
2901
2902        return true;
2903}
2904
2905static int
2906mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2907{
2908        const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2909        const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2910
2911        switch (cmp_arg->proto) {
2912        case MLXSW_SP_L3_PROTO_IPV4:
2913                return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2914        case MLXSW_SP_L3_PROTO_IPV6:
2915                return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2916                                                    cmp_arg->fib6_entry);
2917        default:
2918                WARN_ON(1);
2919                return 1;
2920        }
2921}
2922
2923static int
2924mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2925{
2926        return nh_grp->neigh_tbl->family;
2927}
2928
2929static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2930{
2931        const struct mlxsw_sp_nexthop_group *nh_grp = data;
2932        const struct mlxsw_sp_nexthop *nh;
2933        struct fib_info *fi;
2934        unsigned int val;
2935        int i;
2936
2937        switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2938        case AF_INET:
2939                fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2940                return jhash(&fi, sizeof(fi), seed);
2941        case AF_INET6:
2942                val = nh_grp->count;
2943                for (i = 0; i < nh_grp->count; i++) {
2944                        nh = &nh_grp->nexthops[i];
2945                        val ^= nh->ifindex;
2946                }
2947                return jhash(&val, sizeof(val), seed);
2948        default:
2949                WARN_ON(1);
2950                return 0;
2951        }
2952}
2953
2954static u32
2955mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
2956{
2957        unsigned int val = fib6_entry->nrt6;
2958        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2959        struct net_device *dev;
2960
2961        list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2962                dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
2963                val ^= dev->ifindex;
2964        }
2965
2966        return jhash(&val, sizeof(val), seed);
2967}
2968
2969static u32
2970mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
2971{
2972        const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
2973
2974        switch (cmp_arg->proto) {
2975        case MLXSW_SP_L3_PROTO_IPV4:
2976                return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
2977        case MLXSW_SP_L3_PROTO_IPV6:
2978                return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
2979        default:
2980                WARN_ON(1);
2981                return 0;
2982        }
2983}
2984
2985static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
2986        .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
2987        .hashfn      = mlxsw_sp_nexthop_group_hash,
2988        .obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
2989        .obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
2990};
2991
2992static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
2993                                         struct mlxsw_sp_nexthop_group *nh_grp)
2994{
2995        if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
2996            !nh_grp->gateway)
2997                return 0;
2998
2999        return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3000                                      &nh_grp->ht_node,
3001                                      mlxsw_sp_nexthop_group_ht_params);
3002}
3003
3004static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3005                                          struct mlxsw_sp_nexthop_group *nh_grp)
3006{
3007        if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3008            !nh_grp->gateway)
3009                return;
3010
3011        rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3012                               &nh_grp->ht_node,
3013                               mlxsw_sp_nexthop_group_ht_params);
3014}
3015
3016static struct mlxsw_sp_nexthop_group *
3017mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3018                               struct fib_info *fi)
3019{
3020        struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3021
3022        cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3023        cmp_arg.fi = fi;
3024        return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3025                                      &cmp_arg,
3026                                      mlxsw_sp_nexthop_group_ht_params);
3027}
3028
3029static struct mlxsw_sp_nexthop_group *
3030mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3031                               struct mlxsw_sp_fib6_entry *fib6_entry)
3032{
3033        struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3034
3035        cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3036        cmp_arg.fib6_entry = fib6_entry;
3037        return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3038                                      &cmp_arg,
3039                                      mlxsw_sp_nexthop_group_ht_params);
3040}
3041
3042static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3043        .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3044        .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3045        .key_len = sizeof(struct mlxsw_sp_nexthop_key),
3046};
3047
3048static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3049                                   struct mlxsw_sp_nexthop *nh)
3050{
3051        return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3052                                      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3053}
3054
3055static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3056                                    struct mlxsw_sp_nexthop *nh)
3057{
3058        rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3059                               mlxsw_sp_nexthop_ht_params);
3060}
3061
3062static struct mlxsw_sp_nexthop *
3063mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3064                        struct mlxsw_sp_nexthop_key key)
3065{
3066        return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3067                                      mlxsw_sp_nexthop_ht_params);
3068}
3069
3070static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3071                                             const struct mlxsw_sp_fib *fib,
3072                                             u32 adj_index, u16 ecmp_size,
3073                                             u32 new_adj_index,
3074                                             u16 new_ecmp_size)
3075{
3076        char raleu_pl[MLXSW_REG_RALEU_LEN];
3077
3078        mlxsw_reg_raleu_pack(raleu_pl,
3079                             (enum mlxsw_reg_ralxx_protocol) fib->proto,
3080                             fib->vr->id, adj_index, ecmp_size, new_adj_index,
3081                             new_ecmp_size);
3082        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3083}
3084
3085static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3086                                          struct mlxsw_sp_nexthop_group *nh_grp,
3087                                          u32 old_adj_index, u16 old_ecmp_size)
3088{
3089        struct mlxsw_sp_fib_entry *fib_entry;
3090        struct mlxsw_sp_fib *fib = NULL;
3091        int err;
3092
3093        list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3094                if (fib == fib_entry->fib_node->fib)
3095                        continue;
3096                fib = fib_entry->fib_node->fib;
3097                err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3098                                                        old_adj_index,
3099                                                        old_ecmp_size,
3100                                                        nh_grp->adj_index,
3101                                                        nh_grp->ecmp_size);
3102                if (err)
3103                        return err;
3104        }
3105        return 0;
3106}
3107
3108static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3109                                     struct mlxsw_sp_nexthop *nh)
3110{
3111        struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3112        char ratr_pl[MLXSW_REG_RATR_LEN];
3113
3114        mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3115                            true, MLXSW_REG_RATR_TYPE_ETHERNET,
3116                            adj_index, neigh_entry->rif);
3117        mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3118        if (nh->counter_valid)
3119                mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3120        else
3121                mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3122
3123        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3124}
3125
3126int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3127                            struct mlxsw_sp_nexthop *nh)
3128{
3129        int i;
3130
3131        for (i = 0; i < nh->num_adj_entries; i++) {
3132                int err;
3133
3134                err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3135                if (err)
3136                        return err;
3137        }
3138
3139        return 0;
3140}
3141
3142static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3143                                          u32 adj_index,
3144                                          struct mlxsw_sp_nexthop *nh)
3145{
3146        const struct mlxsw_sp_ipip_ops *ipip_ops;
3147
3148        ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3149        return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3150}
3151
3152static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3153                                        u32 adj_index,
3154                                        struct mlxsw_sp_nexthop *nh)
3155{
3156        int i;
3157
3158        for (i = 0; i < nh->num_adj_entries; i++) {
3159                int err;
3160
3161                err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3162                                                     nh);
3163                if (err)
3164                        return err;
3165        }
3166
3167        return 0;
3168}
3169
3170static int
3171mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3172                              struct mlxsw_sp_nexthop_group *nh_grp,
3173                              bool reallocate)
3174{
3175        u32 adj_index = nh_grp->adj_index; /* base */
3176        struct mlxsw_sp_nexthop *nh;
3177        int i;
3178        int err;
3179
3180        for (i = 0; i < nh_grp->count; i++) {
3181                nh = &nh_grp->nexthops[i];
3182
3183                if (!nh->should_offload) {
3184                        nh->offloaded = 0;
3185                        continue;
3186                }
3187
3188                if (nh->update || reallocate) {
3189                        switch (nh->type) {
3190                        case MLXSW_SP_NEXTHOP_TYPE_ETH:
3191                                err = mlxsw_sp_nexthop_update
3192                                            (mlxsw_sp, adj_index, nh);
3193                                break;
3194                        case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3195                                err = mlxsw_sp_nexthop_ipip_update
3196                                            (mlxsw_sp, adj_index, nh);
3197                                break;
3198                        }
3199                        if (err)
3200                                return err;
3201                        nh->update = 0;
3202                        nh->offloaded = 1;
3203                }
3204                adj_index += nh->num_adj_entries;
3205        }
3206        return 0;
3207}
3208
3209static bool
3210mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
3211                                 const struct mlxsw_sp_fib_entry *fib_entry);
3212
3213static int
3214mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3215                                    struct mlxsw_sp_nexthop_group *nh_grp)
3216{
3217        struct mlxsw_sp_fib_entry *fib_entry;
3218        int err;
3219
3220        list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3221                if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3222                                                      fib_entry))
3223                        continue;
3224                err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3225                if (err)
3226                        return err;
3227        }
3228        return 0;
3229}
3230
3231static void
3232mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
3233                                   enum mlxsw_reg_ralue_op op, int err);
3234
3235static void
3236mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
3237{
3238        enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
3239        struct mlxsw_sp_fib_entry *fib_entry;
3240
3241        list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3242                if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
3243                                                      fib_entry))
3244                        continue;
3245                mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
3246        }
3247}
3248
3249static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3250{
3251        /* Valid sizes for an adjacency group are:
3252         * 1-64, 512, 1024, 2048 and 4096.
3253         */
3254        if (*p_adj_grp_size <= 64)
3255                return;
3256        else if (*p_adj_grp_size <= 512)
3257                *p_adj_grp_size = 512;
3258        else if (*p_adj_grp_size <= 1024)
3259                *p_adj_grp_size = 1024;
3260        else if (*p_adj_grp_size <= 2048)
3261                *p_adj_grp_size = 2048;
3262        else
3263                *p_adj_grp_size = 4096;
3264}
3265
3266static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3267                                             unsigned int alloc_size)
3268{
3269        if (alloc_size >= 4096)
3270                *p_adj_grp_size = 4096;
3271        else if (alloc_size >= 2048)
3272                *p_adj_grp_size = 2048;
3273        else if (alloc_size >= 1024)
3274                *p_adj_grp_size = 1024;
3275        else if (alloc_size >= 512)
3276                *p_adj_grp_size = 512;
3277}
3278
3279static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3280                                     u16 *p_adj_grp_size)
3281{
3282        unsigned int alloc_size;
3283        int err;
3284
3285        /* Round up the requested group size to the next size supported
3286         * by the device and make sure the request can be satisfied.
3287         */
3288        mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3289        err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3290                                              MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3291                                              *p_adj_grp_size, &alloc_size);
3292        if (err)
3293                return err;
3294        /* It is possible the allocation results in more allocated
3295         * entries than requested. Try to use as much of them as
3296         * possible.
3297         */
3298        mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3299
3300        return 0;
3301}
3302
3303static void
3304mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3305{
3306        int i, g = 0, sum_norm_weight = 0;
3307        struct mlxsw_sp_nexthop *nh;
3308
3309        for (i = 0; i < nh_grp->count; i++) {
3310                nh = &nh_grp->nexthops[i];
3311
3312                if (!nh->should_offload)
3313                        continue;
3314                if (g > 0)
3315                        g = gcd(nh->nh_weight, g);
3316                else
3317                        g = nh->nh_weight;
3318        }
3319
3320        for (i = 0; i < nh_grp->count; i++) {
3321                nh = &nh_grp->nexthops[i];
3322
3323                if (!nh->should_offload)
3324                        continue;
3325                nh->norm_nh_weight = nh->nh_weight / g;
3326                sum_norm_weight += nh->norm_nh_weight;
3327        }
3328
3329        nh_grp->sum_norm_weight = sum_norm_weight;
3330}
3331
3332static void
3333mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3334{
3335        int total = nh_grp->sum_norm_weight;
3336        u16 ecmp_size = nh_grp->ecmp_size;
3337        int i, weight = 0, lower_bound = 0;
3338
3339        for (i = 0; i < nh_grp->count; i++) {
3340                struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3341                int upper_bound;
3342
3343                if (!nh->should_offload)
3344                        continue;
3345                weight += nh->norm_nh_weight;
3346                upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3347                nh->num_adj_entries = upper_bound - lower_bound;
3348                lower_bound = upper_bound;
3349        }
3350}
3351
3352static void
3353mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3354                               struct mlxsw_sp_nexthop_group *nh_grp)
3355{
3356        u16 ecmp_size, old_ecmp_size;
3357        struct mlxsw_sp_nexthop *nh;
3358        bool offload_change = false;
3359        u32 adj_index;
3360        bool old_adj_index_valid;
3361        u32 old_adj_index;
3362        int i;
3363        int err;
3364
3365        if (!nh_grp->gateway) {
3366                mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3367                return;
3368        }
3369
3370        for (i = 0; i < nh_grp->count; i++) {
3371                nh = &nh_grp->nexthops[i];
3372
3373                if (nh->should_offload != nh->offloaded) {
3374                        offload_change = true;
3375                        if (nh->should_offload)
3376                                nh->update = 1;
3377                }
3378        }
3379        if (!offload_change) {
3380                /* Nothing was added or removed, so no need to reallocate. Just
3381                 * update MAC on existing adjacency indexes.
3382                 */
3383                err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3384                if (err) {
3385                        dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3386                        goto set_trap;
3387                }
3388                return;
3389        }
3390        mlxsw_sp_nexthop_group_normalize(nh_grp);
3391        if (!nh_grp->sum_norm_weight)
3392                /* No neigh of this group is connected so we just set
3393                 * the trap and let everthing flow through kernel.
3394                 */
3395                goto set_trap;
3396
3397        ecmp_size = nh_grp->sum_norm_weight;
3398        err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3399        if (err)
3400                /* No valid allocation size available. */
3401                goto set_trap;
3402
3403        err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3404                                  ecmp_size, &adj_index);
3405        if (err) {
3406                /* We ran out of KVD linear space, just set the
3407                 * trap and let everything flow through kernel.
3408                 */
3409                dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3410                goto set_trap;
3411        }
3412        old_adj_index_valid = nh_grp->adj_index_valid;
3413        old_adj_index = nh_grp->adj_index;
3414        old_ecmp_size = nh_grp->ecmp_size;
3415        nh_grp->adj_index_valid = 1;
3416        nh_grp->adj_index = adj_index;
3417        nh_grp->ecmp_size = ecmp_size;
3418        mlxsw_sp_nexthop_group_rebalance(nh_grp);
3419        err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3420        if (err) {
3421                dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3422                goto set_trap;
3423        }
3424
3425        if (!old_adj_index_valid) {
3426                /* The trap was set for fib entries, so we have to call
3427                 * fib entry update to unset it and use adjacency index.
3428                 */
3429                err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3430                if (err) {
3431                        dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3432                        goto set_trap;
3433                }
3434                return;
3435        }
3436
3437        err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3438                                             old_adj_index, old_ecmp_size);
3439        mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3440                           old_ecmp_size, old_adj_index);
3441        if (err) {
3442                dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3443                goto set_trap;
3444        }
3445
3446        /* Offload state within the group changed, so update the flags. */
3447        mlxsw_sp_nexthop_fib_entries_refresh(nh_grp);
3448
3449        return;
3450
3451set_trap:
3452        old_adj_index_valid = nh_grp->adj_index_valid;
3453        nh_grp->adj_index_valid = 0;
3454        for (i = 0; i < nh_grp->count; i++) {
3455                nh = &nh_grp->nexthops[i];
3456                nh->offloaded = 0;
3457        }
3458        err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3459        if (err)
3460                dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3461        if (old_adj_index_valid)
3462                mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3463                                   nh_grp->ecmp_size, nh_grp->adj_index);
3464}
3465
3466static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3467                                            bool removing)
3468{
3469        if (!removing)
3470                nh->should_offload = 1;
3471        else
3472                nh->should_offload = 0;
3473        nh->update = 1;
3474}
3475
3476static int
3477mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3478                                    struct mlxsw_sp_neigh_entry *neigh_entry)
3479{
3480        struct neighbour *n, *old_n = neigh_entry->key.n;
3481        struct mlxsw_sp_nexthop *nh;
3482        bool entry_connected;
3483        u8 nud_state, dead;
3484        int err;
3485
3486        nh = list_first_entry(&neigh_entry->nexthop_list,
3487                              struct mlxsw_sp_nexthop, neigh_list_node);
3488
3489        n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3490        if (!n) {
3491                n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3492                                 nh->rif->dev);
3493                if (IS_ERR(n))
3494                        return PTR_ERR(n);
3495                neigh_event_send(n, NULL);
3496        }
3497
3498        mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3499        neigh_entry->key.n = n;
3500        err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3501        if (err)
3502                goto err_neigh_entry_insert;
3503
3504        read_lock_bh(&n->lock);
3505        nud_state = n->nud_state;
3506        dead = n->dead;
3507        read_unlock_bh(&n->lock);
3508        entry_connected = nud_state & NUD_VALID && !dead;
3509
3510        list_for_each_entry(nh, &neigh_entry->nexthop_list,
3511                            neigh_list_node) {
3512                neigh_release(old_n);
3513                neigh_clone(n);
3514                __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3515                mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3516        }
3517
3518        neigh_release(n);
3519
3520        return 0;
3521
3522err_neigh_entry_insert:
3523        neigh_entry->key.n = old_n;
3524        mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3525        neigh_release(n);
3526        return err;
3527}
3528
3529static void
3530mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3531                              struct mlxsw_sp_neigh_entry *neigh_entry,
3532                              bool removing, bool dead)
3533{
3534        struct mlxsw_sp_nexthop *nh;
3535
3536        if (list_empty(&neigh_entry->nexthop_list))
3537                return;
3538
3539        if (dead) {
3540                int err;
3541
3542                err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3543                                                          neigh_entry);
3544                if (err)
3545                        dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3546                return;
3547        }
3548
3549        list_for_each_entry(nh, &neigh_entry->nexthop_list,
3550                            neigh_list_node) {
3551                __mlxsw_sp_nexthop_neigh_update(nh, removing);
3552                mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3553        }
3554}
3555
3556static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3557                                      struct mlxsw_sp_rif *rif)
3558{
3559        if (nh->rif)
3560                return;
3561
3562        nh->rif = rif;
3563        list_add(&nh->rif_list_node, &rif->nexthop_list);
3564}
3565
3566static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3567{
3568        if (!nh->rif)
3569                return;
3570
3571        list_del(&nh->rif_list_node);
3572        nh->rif = NULL;
3573}
3574
3575static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3576                                       struct mlxsw_sp_nexthop *nh)
3577{
3578        struct mlxsw_sp_neigh_entry *neigh_entry;
3579        struct neighbour *n;
3580        u8 nud_state, dead;
3581        int err;
3582
3583        if (!nh->nh_grp->gateway || nh->neigh_entry)
3584                return 0;
3585
3586        /* Take a reference of neigh here ensuring that neigh would
3587         * not be destructed before the nexthop entry is finished.
3588         * The reference is taken either in neigh_lookup() or
3589         * in neigh_create() in case n is not found.
3590         */
3591        n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3592        if (!n) {
3593                n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3594                                 nh->rif->dev);
3595                if (IS_ERR(n))
3596                        return PTR_ERR(n);
3597                neigh_event_send(n, NULL);
3598        }
3599        neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3600        if (!neigh_entry) {
3601                neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3602                if (IS_ERR(neigh_entry)) {
3603                        err = -EINVAL;
3604                        goto err_neigh_entry_create;
3605                }
3606        }
3607
3608        /* If that is the first nexthop connected to that neigh, add to
3609         * nexthop_neighs_list
3610         */
3611        if (list_empty(&neigh_entry->nexthop_list))
3612                list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3613                              &mlxsw_sp->router->nexthop_neighs_list);
3614
3615        nh->neigh_entry = neigh_entry;
3616        list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3617        read_lock_bh(&n->lock);
3618        nud_state = n->nud_state;
3619        dead = n->dead;
3620        read_unlock_bh(&n->lock);
3621        __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3622
3623        return 0;
3624
3625err_neigh_entry_create:
3626        neigh_release(n);
3627        return err;
3628}
3629
3630static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3631                                        struct mlxsw_sp_nexthop *nh)
3632{
3633        struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3634        struct neighbour *n;
3635
3636        if (!neigh_entry)
3637                return;
3638        n = neigh_entry->key.n;
3639
3640        __mlxsw_sp_nexthop_neigh_update(nh, true);
3641        list_del(&nh->neigh_list_node);
3642        nh->neigh_entry = NULL;
3643
3644        /* If that is the last nexthop connected to that neigh, remove from
3645         * nexthop_neighs_list
3646         */
3647        if (list_empty(&neigh_entry->nexthop_list))
3648                list_del(&neigh_entry->nexthop_neighs_list_node);
3649
3650        if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3651                mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3652
3653        neigh_release(n);
3654}
3655
3656static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3657{
3658        struct net_device *ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3659
3660        return ul_dev ? (ul_dev->flags & IFF_UP) : true;
3661}
3662
3663static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3664                                       struct mlxsw_sp_nexthop *nh,
3665                                       struct mlxsw_sp_ipip_entry *ipip_entry)
3666{
3667        bool removing;
3668
3669        if (!nh->nh_grp->gateway || nh->ipip_entry)
3670                return;
3671
3672        nh->ipip_entry = ipip_entry;
3673        removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3674        __mlxsw_sp_nexthop_neigh_update(nh, removing);
3675        mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3676}
3677
3678static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3679                                       struct mlxsw_sp_nexthop *nh)
3680{
3681        struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3682
3683        if (!ipip_entry)
3684                return;
3685
3686        __mlxsw_sp_nexthop_neigh_update(nh, true);
3687        nh->ipip_entry = NULL;
3688}
3689
3690static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3691                                        const struct fib_nh *fib_nh,
3692                                        enum mlxsw_sp_ipip_type *p_ipipt)
3693{
3694        struct net_device *dev = fib_nh->fib_nh_dev;
3695
3696        return dev &&
3697               fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3698               mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3699}
3700
3701static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3702                                       struct mlxsw_sp_nexthop *nh)
3703{
3704        switch (nh->type) {
3705        case MLXSW_SP_NEXTHOP_TYPE_ETH:
3706                mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3707                mlxsw_sp_nexthop_rif_fini(nh);
3708                break;
3709        case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3710                mlxsw_sp_nexthop_rif_fini(nh);
3711                mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3712                break;
3713        }
3714}
3715
3716static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3717                                       struct mlxsw_sp_nexthop *nh,
3718                                       struct fib_nh *fib_nh)
3719{
3720        const struct mlxsw_sp_ipip_ops *ipip_ops;
3721        struct net_device *dev = fib_nh->fib_nh_dev;
3722        struct mlxsw_sp_ipip_entry *ipip_entry;
3723        struct mlxsw_sp_rif *rif;
3724        int err;
3725
3726        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3727        if (ipip_entry) {
3728                ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3729                if (ipip_ops->can_offload(mlxsw_sp, dev,
3730                                          MLXSW_SP_L3_PROTO_IPV4)) {
3731                        nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3732                        mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3733                        return 0;
3734                }
3735        }
3736
3737        nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3738        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3739        if (!rif)
3740                return 0;
3741
3742        mlxsw_sp_nexthop_rif_init(nh, rif);
3743        err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3744        if (err)
3745                goto err_neigh_init;
3746
3747        return 0;
3748
3749err_neigh_init:
3750        mlxsw_sp_nexthop_rif_fini(nh);
3751        return err;
3752}
3753
3754static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3755                                        struct mlxsw_sp_nexthop *nh)
3756{
3757        mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3758}
3759
3760static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3761                                  struct mlxsw_sp_nexthop_group *nh_grp,
3762                                  struct mlxsw_sp_nexthop *nh,
3763                                  struct fib_nh *fib_nh)
3764{
3765        struct net_device *dev = fib_nh->fib_nh_dev;
3766        struct in_device *in_dev;
3767        int err;
3768
3769        nh->nh_grp = nh_grp;
3770        nh->key.fib_nh = fib_nh;
3771#ifdef CONFIG_IP_ROUTE_MULTIPATH
3772        nh->nh_weight = fib_nh->fib_nh_weight;
3773#else
3774        nh->nh_weight = 1;
3775#endif
3776        memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3777        err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3778        if (err)
3779                return err;
3780
3781        mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3782        list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3783
3784        if (!dev)
3785                return 0;
3786
3787        in_dev = __in_dev_get_rtnl(dev);
3788        if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3789            fib_nh->fib_nh_flags & RTNH_F_LINKDOWN)
3790                return 0;
3791
3792        err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3793        if (err)
3794                goto err_nexthop_neigh_init;
3795
3796        return 0;
3797
3798err_nexthop_neigh_init:
3799        mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3800        return err;
3801}
3802
3803static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3804                                   struct mlxsw_sp_nexthop *nh)
3805{
3806        mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3807        list_del(&nh->router_list_node);
3808        mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3809        mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3810}
3811
3812static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3813                                    unsigned long event, struct fib_nh *fib_nh)
3814{
3815        struct mlxsw_sp_nexthop_key key;
3816        struct mlxsw_sp_nexthop *nh;
3817
3818        if (mlxsw_sp->router->aborted)
3819                return;
3820
3821        key.fib_nh = fib_nh;
3822        nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3823        if (WARN_ON_ONCE(!nh))
3824                return;
3825
3826        switch (event) {
3827        case FIB_EVENT_NH_ADD:
3828                mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3829                break;
3830        case FIB_EVENT_NH_DEL:
3831                mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3832                break;
3833        }
3834
3835        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3836}
3837
3838static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3839                                        struct mlxsw_sp_rif *rif)
3840{
3841        struct mlxsw_sp_nexthop *nh;
3842        bool removing;
3843
3844        list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3845                switch (nh->type) {
3846                case MLXSW_SP_NEXTHOP_TYPE_ETH:
3847                        removing = false;
3848                        break;
3849                case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3850                        removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3851                        break;
3852                default:
3853                        WARN_ON(1);
3854                        continue;
3855                }
3856
3857                __mlxsw_sp_nexthop_neigh_update(nh, removing);
3858                mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3859        }
3860}
3861
3862static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3863                                         struct mlxsw_sp_rif *old_rif,
3864                                         struct mlxsw_sp_rif *new_rif)
3865{
3866        struct mlxsw_sp_nexthop *nh;
3867
3868        list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3869        list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3870                nh->rif = new_rif;
3871        mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3872}
3873
3874static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3875                                           struct mlxsw_sp_rif *rif)
3876{
3877        struct mlxsw_sp_nexthop *nh, *tmp;
3878
3879        list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3880                mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3881                mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3882        }
3883}
3884
3885static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
3886                                   const struct fib_info *fi)
3887{
3888        return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK ||
3889               mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL);
3890}
3891
3892static struct mlxsw_sp_nexthop_group *
3893mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
3894{
3895        struct mlxsw_sp_nexthop_group *nh_grp;
3896        struct mlxsw_sp_nexthop *nh;
3897        struct fib_nh *fib_nh;
3898        int i;
3899        int err;
3900
3901        nh_grp = kzalloc(struct_size(nh_grp, nexthops, fi->fib_nhs),
3902                         GFP_KERNEL);
3903        if (!nh_grp)
3904                return ERR_PTR(-ENOMEM);
3905        nh_grp->priv = fi;
3906        INIT_LIST_HEAD(&nh_grp->fib_list);
3907        nh_grp->neigh_tbl = &arp_tbl;
3908
3909        nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
3910        nh_grp->count = fi->fib_nhs;
3911        fib_info_hold(fi);
3912        for (i = 0; i < nh_grp->count; i++) {
3913                nh = &nh_grp->nexthops[i];
3914                fib_nh = &fi->fib_nh[i];
3915                err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
3916                if (err)
3917                        goto err_nexthop4_init;
3918        }
3919        err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
3920        if (err)
3921                goto err_nexthop_group_insert;
3922        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3923        return nh_grp;
3924
3925err_nexthop_group_insert:
3926err_nexthop4_init:
3927        for (i--; i >= 0; i--) {
3928                nh = &nh_grp->nexthops[i];
3929                mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3930        }
3931        fib_info_put(fi);
3932        kfree(nh_grp);
3933        return ERR_PTR(err);
3934}
3935
3936static void
3937mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
3938                                struct mlxsw_sp_nexthop_group *nh_grp)
3939{
3940        struct mlxsw_sp_nexthop *nh;
3941        int i;
3942
3943        mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
3944        for (i = 0; i < nh_grp->count; i++) {
3945                nh = &nh_grp->nexthops[i];
3946                mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
3947        }
3948        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
3949        WARN_ON_ONCE(nh_grp->adj_index_valid);
3950        fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
3951        kfree(nh_grp);
3952}
3953
3954static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
3955                                       struct mlxsw_sp_fib_entry *fib_entry,
3956                                       struct fib_info *fi)
3957{
3958        struct mlxsw_sp_nexthop_group *nh_grp;
3959
3960        nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
3961        if (!nh_grp) {
3962                nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
3963                if (IS_ERR(nh_grp))
3964                        return PTR_ERR(nh_grp);
3965        }
3966        list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
3967        fib_entry->nh_group = nh_grp;
3968        return 0;
3969}
3970
3971static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
3972                                        struct mlxsw_sp_fib_entry *fib_entry)
3973{
3974        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
3975
3976        list_del(&fib_entry->nexthop_group_node);
3977        if (!list_empty(&nh_grp->fib_list))
3978                return;
3979        mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
3980}
3981
3982static bool
3983mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3984{
3985        struct mlxsw_sp_fib4_entry *fib4_entry;
3986
3987        fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
3988                                  common);
3989        return !fib4_entry->tos;
3990}
3991
3992static bool
3993mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
3994{
3995        struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
3996
3997        switch (fib_entry->fib_node->fib->proto) {
3998        case MLXSW_SP_L3_PROTO_IPV4:
3999                if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4000                        return false;
4001                break;
4002        case MLXSW_SP_L3_PROTO_IPV6:
4003                break;
4004        }
4005
4006        switch (fib_entry->type) {
4007        case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4008                return !!nh_group->adj_index_valid;
4009        case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4010                return !!nh_group->nh_rif;
4011        case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4012        case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4013        case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4014                return true;
4015        default:
4016                return false;
4017        }
4018}
4019
4020static struct mlxsw_sp_nexthop *
4021mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4022                     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4023{
4024        int i;
4025
4026        for (i = 0; i < nh_grp->count; i++) {
4027                struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4028                struct fib6_info *rt = mlxsw_sp_rt6->rt;
4029
4030                if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
4031                    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4032                                    &rt->fib6_nh.fib_nh_gw6))
4033                        return nh;
4034                continue;
4035        }
4036
4037        return NULL;
4038}
4039
4040static void
4041mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4042{
4043        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4044        int i;
4045
4046        if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4047            fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE ||
4048            fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
4049            fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
4050                nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4051                return;
4052        }
4053
4054        for (i = 0; i < nh_grp->count; i++) {
4055                struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4056
4057                if (nh->offloaded)
4058                        nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4059                else
4060                        nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4061        }
4062}
4063
4064static void
4065mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4066{
4067        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4068        int i;
4069
4070        if (!list_is_singular(&nh_grp->fib_list))
4071                return;
4072
4073        for (i = 0; i < nh_grp->count; i++) {
4074                struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4075
4076                nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4077        }
4078}
4079
4080static void
4081mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4082{
4083        struct mlxsw_sp_fib6_entry *fib6_entry;
4084        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4085
4086        fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4087                                  common);
4088
4089        if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
4090            fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
4091                list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
4092                                 list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
4093                return;
4094        }
4095
4096        list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4097                struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4098                struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
4099                struct mlxsw_sp_nexthop *nh;
4100
4101                nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
4102                if (nh && nh->offloaded)
4103                        fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
4104                else
4105                        fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
4106        }
4107}
4108
4109static void
4110mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4111{
4112        struct mlxsw_sp_fib6_entry *fib6_entry;
4113        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4114
4115        fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4116                                  common);
4117        list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
4118                struct fib6_info *rt = mlxsw_sp_rt6->rt;
4119
4120                rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
4121        }
4122}
4123
4124static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
4125{
4126        switch (fib_entry->fib_node->fib->proto) {
4127        case MLXSW_SP_L3_PROTO_IPV4:
4128                mlxsw_sp_fib4_entry_offload_set(fib_entry);
4129                break;
4130        case MLXSW_SP_L3_PROTO_IPV6:
4131                mlxsw_sp_fib6_entry_offload_set(fib_entry);
4132                break;
4133        }
4134}
4135
4136static void
4137mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
4138{
4139        switch (fib_entry->fib_node->fib->proto) {
4140        case MLXSW_SP_L3_PROTO_IPV4:
4141                mlxsw_sp_fib4_entry_offload_unset(fib_entry);
4142                break;
4143        case MLXSW_SP_L3_PROTO_IPV6:
4144                mlxsw_sp_fib6_entry_offload_unset(fib_entry);
4145                break;
4146        }
4147}
4148
4149static void
4150mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
4151                                   enum mlxsw_reg_ralue_op op, int err)
4152{
4153        switch (op) {
4154        case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4155                return mlxsw_sp_fib_entry_offload_unset(fib_entry);
4156        case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4157                if (err)
4158                        return;
4159                if (mlxsw_sp_fib_entry_should_offload(fib_entry))
4160                        mlxsw_sp_fib_entry_offload_set(fib_entry);
4161                else
4162                        mlxsw_sp_fib_entry_offload_unset(fib_entry);
4163                return;
4164        default:
4165                return;
4166        }
4167}
4168
4169static void
4170mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4171                              const struct mlxsw_sp_fib_entry *fib_entry,
4172                              enum mlxsw_reg_ralue_op op)
4173{
4174        struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4175        enum mlxsw_reg_ralxx_protocol proto;
4176        u32 *p_dip;
4177
4178        proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4179
4180        switch (fib->proto) {
4181        case MLXSW_SP_L3_PROTO_IPV4:
4182                p_dip = (u32 *) fib_entry->fib_node->key.addr;
4183                mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4184                                      fib_entry->fib_node->key.prefix_len,
4185                                      *p_dip);
4186                break;
4187        case MLXSW_SP_L3_PROTO_IPV6:
4188                mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4189                                      fib_entry->fib_node->key.prefix_len,
4190                                      fib_entry->fib_node->key.addr);
4191                break;
4192        }
4193}
4194
4195static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4196                                        struct mlxsw_sp_fib_entry *fib_entry,
4197                                        enum mlxsw_reg_ralue_op op)
4198{
4199        char ralue_pl[MLXSW_REG_RALUE_LEN];
4200        enum mlxsw_reg_ralue_trap_action trap_action;
4201        u16 trap_id = 0;
4202        u32 adjacency_index = 0;
4203        u16 ecmp_size = 0;
4204
4205        /* In case the nexthop group adjacency index is valid, use it
4206         * with provided ECMP size. Otherwise, setup trap and pass
4207         * traffic to kernel.
4208         */
4209        if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4210                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4211                adjacency_index = fib_entry->nh_group->adj_index;
4212                ecmp_size = fib_entry->nh_group->ecmp_size;
4213        } else {
4214                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4215                trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4216        }
4217
4218        mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4219        mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4220                                        adjacency_index, ecmp_size);
4221        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4222}
4223
4224static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4225                                       struct mlxsw_sp_fib_entry *fib_entry,
4226                                       enum mlxsw_reg_ralue_op op)
4227{
4228        struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4229        enum mlxsw_reg_ralue_trap_action trap_action;
4230        char ralue_pl[MLXSW_REG_RALUE_LEN];
4231        u16 trap_id = 0;
4232        u16 rif_index = 0;
4233
4234        if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4235                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4236                rif_index = rif->rif_index;
4237        } else {
4238                trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4239                trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4240        }
4241
4242        mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4243        mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4244                                       rif_index);
4245        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4246}
4247
4248static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4249                                      struct mlxsw_sp_fib_entry *fib_entry,
4250                                      enum mlxsw_reg_ralue_op op)
4251{
4252        char ralue_pl[MLXSW_REG_RALUE_LEN];
4253
4254        mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4255        mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4256        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4257}
4258
4259static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4260                                           struct mlxsw_sp_fib_entry *fib_entry,
4261                                           enum mlxsw_reg_ralue_op op)
4262{
4263        enum mlxsw_reg_ralue_trap_action trap_action;
4264        char ralue_pl[MLXSW_REG_RALUE_LEN];
4265
4266        trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4267        mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4268        mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4269        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4270}
4271
4272static int
4273mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4274                                 struct mlxsw_sp_fib_entry *fib_entry,
4275                                 enum mlxsw_reg_ralue_op op)
4276{
4277        struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4278        const struct mlxsw_sp_ipip_ops *ipip_ops;
4279
4280        if (WARN_ON(!ipip_entry))
4281                return -EINVAL;
4282
4283        ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4284        return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4285                                      fib_entry->decap.tunnel_index);
4286}
4287
4288static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4289                                           struct mlxsw_sp_fib_entry *fib_entry,
4290                                           enum mlxsw_reg_ralue_op op)
4291{
4292        char ralue_pl[MLXSW_REG_RALUE_LEN];
4293
4294        mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4295        mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4296                                           fib_entry->decap.tunnel_index);
4297        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4298}
4299
4300static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4301                                   struct mlxsw_sp_fib_entry *fib_entry,
4302                                   enum mlxsw_reg_ralue_op op)
4303{
4304        switch (fib_entry->type) {
4305        case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4306                return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4307        case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4308                return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4309        case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4310                return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4311        case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4312                return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4313        case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4314                return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4315                                                        fib_entry, op);
4316        case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4317                return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4318        }
4319        return -EINVAL;
4320}
4321
4322static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4323                                 struct mlxsw_sp_fib_entry *fib_entry,
4324                                 enum mlxsw_reg_ralue_op op)
4325{
4326        int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4327
4328        mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
4329
4330        return err;
4331}
4332
4333static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4334                                     struct mlxsw_sp_fib_entry *fib_entry)
4335{
4336        return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4337                                     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4338}
4339
4340static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4341                                  struct mlxsw_sp_fib_entry *fib_entry)
4342{
4343        return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4344                                     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4345}
4346
4347static int
4348mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4349                             const struct fib_entry_notifier_info *fen_info,
4350                             struct mlxsw_sp_fib_entry *fib_entry)
4351{
4352        union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4353        u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4354        struct net_device *dev = fen_info->fi->fib_dev;
4355        struct mlxsw_sp_ipip_entry *ipip_entry;
4356        struct fib_info *fi = fen_info->fi;
4357
4358        switch (fen_info->type) {
4359        case RTN_LOCAL:
4360                ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4361                                                 MLXSW_SP_L3_PROTO_IPV4, dip);
4362                if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4363                        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4364                        return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4365                                                             fib_entry,
4366                                                             ipip_entry);
4367                }
4368                if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
4369                                                     dip.addr4)) {
4370                        u32 t_index;
4371
4372                        t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
4373                        fib_entry->decap.tunnel_index = t_index;
4374                        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4375                        return 0;
4376                }
4377                /* fall through */
4378        case RTN_BROADCAST:
4379                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4380                return 0;
4381        case RTN_BLACKHOLE:
4382                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4383                return 0;
4384        case RTN_UNREACHABLE: /* fall through */
4385        case RTN_PROHIBIT:
4386                /* Packets hitting these routes need to be trapped, but
4387                 * can do so with a lower priority than packets directed
4388                 * at the host, so use action type local instead of trap.
4389                 */
4390                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4391                return 0;
4392        case RTN_UNICAST:
4393                if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4394                        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4395                else
4396                        fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4397                return 0;
4398        default:
4399                return -EINVAL;
4400        }
4401}
4402
4403static struct mlxsw_sp_fib4_entry *
4404mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4405                           struct mlxsw_sp_fib_node *fib_node,
4406                           const struct fib_entry_notifier_info *fen_info)
4407{
4408        struct mlxsw_sp_fib4_entry *fib4_entry;
4409        struct mlxsw_sp_fib_entry *fib_entry;
4410        int err;
4411
4412        fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4413        if (!fib4_entry)
4414                return ERR_PTR(-ENOMEM);
4415        fib_entry = &fib4_entry->common;
4416
4417        err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4418        if (err)
4419                goto err_fib4_entry_type_set;
4420
4421        err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4422        if (err)
4423                goto err_nexthop4_group_get;
4424
4425        fib4_entry->prio = fen_info->fi->fib_priority;
4426        fib4_entry->tb_id = fen_info->tb_id;
4427        fib4_entry->type = fen_info->type;
4428        fib4_entry->tos = fen_info->tos;
4429
4430        fib_entry->fib_node = fib_node;
4431
4432        return fib4_entry;
4433
4434err_nexthop4_group_get:
4435err_fib4_entry_type_set:
4436        kfree(fib4_entry);
4437        return ERR_PTR(err);
4438}
4439
4440static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4441                                        struct mlxsw_sp_fib4_entry *fib4_entry)
4442{
4443        mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4444        kfree(fib4_entry);
4445}
4446
4447static struct mlxsw_sp_fib4_entry *
4448mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4449                           const struct fib_entry_notifier_info *fen_info)
4450{
4451        struct mlxsw_sp_fib4_entry *fib4_entry;
4452        struct mlxsw_sp_fib_node *fib_node;
4453        struct mlxsw_sp_fib *fib;
4454        struct mlxsw_sp_vr *vr;
4455
4456        vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4457        if (!vr)
4458                return NULL;
4459        fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4460
4461        fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4462                                            sizeof(fen_info->dst),
4463                                            fen_info->dst_len);
4464        if (!fib_node)
4465                return NULL;
4466
4467        list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4468                if (fib4_entry->tb_id == fen_info->tb_id &&
4469                    fib4_entry->tos == fen_info->tos &&
4470                    fib4_entry->type == fen_info->type &&
4471                    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4472                    fen_info->fi) {
4473                        return fib4_entry;
4474                }
4475        }
4476
4477        return NULL;
4478}
4479
4480static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4481        .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4482        .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4483        .key_len = sizeof(struct mlxsw_sp_fib_key),
4484        .automatic_shrinking = true,
4485};
4486
4487static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4488                                    struct mlxsw_sp_fib_node *fib_node)
4489{
4490        return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4491                                      mlxsw_sp_fib_ht_params);
4492}
4493
4494static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4495                                     struct mlxsw_sp_fib_node *fib_node)
4496{
4497        rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4498                               mlxsw_sp_fib_ht_params);
4499}
4500
4501static struct mlxsw_sp_fib_node *
4502mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4503                         size_t addr_len, unsigned char prefix_len)
4504{
4505        struct mlxsw_sp_fib_key key;
4506
4507        memset(&key, 0, sizeof(key));
4508        memcpy(key.addr, addr, addr_len);
4509        key.prefix_len = prefix_len;
4510        return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4511}
4512
4513static struct mlxsw_sp_fib_node *
4514mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4515                         size_t addr_len, unsigned char prefix_len)
4516{
4517        struct mlxsw_sp_fib_node *fib_node;
4518
4519        fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4520        if (!fib_node)
4521                return NULL;
4522
4523        INIT_LIST_HEAD(&fib_node->entry_list);
4524        list_add(&fib_node->list, &fib->node_list);
4525        memcpy(fib_node->key.addr, addr, addr_len);
4526        fib_node->key.prefix_len = prefix_len;
4527
4528        return fib_node;
4529}
4530
4531static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4532{
4533        list_del(&fib_node->list);
4534        WARN_ON(!list_empty(&fib_node->entry_list));
4535        kfree(fib_node);
4536}
4537
4538static bool
4539mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
4540                                 const struct mlxsw_sp_fib_entry *fib_entry)
4541{
4542        return list_first_entry(&fib_node->entry_list,
4543                                struct mlxsw_sp_fib_entry, list) == fib_entry;
4544}
4545
4546static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4547                                      struct mlxsw_sp_fib_node *fib_node)
4548{
4549        struct mlxsw_sp_prefix_usage req_prefix_usage;
4550        struct mlxsw_sp_fib *fib = fib_node->fib;
4551        struct mlxsw_sp_lpm_tree *lpm_tree;
4552        int err;
4553
4554        lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4555        if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4556                goto out;
4557
4558        mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4559        mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4560        lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4561                                         fib->proto);
4562        if (IS_ERR(lpm_tree))
4563                return PTR_ERR(lpm_tree);
4564
4565        err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4566        if (err)
4567                goto err_lpm_tree_replace;
4568
4569out:
4570        lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4571        return 0;
4572
4573err_lpm_tree_replace:
4574        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4575        return err;
4576}
4577
4578static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4579                                         struct mlxsw_sp_fib_node *fib_node)
4580{
4581        struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4582        struct mlxsw_sp_prefix_usage req_prefix_usage;
4583        struct mlxsw_sp_fib *fib = fib_node->fib;
4584        int err;
4585
4586        if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4587                return;
4588        /* Try to construct a new LPM tree from the current prefix usage
4589         * minus the unused one. If we fail, continue using the old one.
4590         */
4591        mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4592        mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4593                                    fib_node->key.prefix_len);
4594        lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4595                                         fib->proto);
4596        if (IS_ERR(lpm_tree))
4597                return;
4598
4599        err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4600        if (err)
4601                goto err_lpm_tree_replace;
4602
4603        return;
4604
4605err_lpm_tree_replace:
4606        mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4607}
4608
4609static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4610                                  struct mlxsw_sp_fib_node *fib_node,
4611                                  struct mlxsw_sp_fib *fib)
4612{
4613        int err;
4614
4615        err = mlxsw_sp_fib_node_insert(fib, fib_node);
4616        if (err)
4617                return err;
4618        fib_node->fib = fib;
4619
4620        err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4621        if (err)
4622                goto err_fib_lpm_tree_link;
4623
4624        return 0;
4625
4626err_fib_lpm_tree_link:
4627        fib_node->fib = NULL;
4628        mlxsw_sp_fib_node_remove(fib, fib_node);
4629        return err;
4630}
4631
4632static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4633                                   struct mlxsw_sp_fib_node *fib_node)
4634{
4635        struct mlxsw_sp_fib *fib = fib_node->fib;
4636
4637        mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4638        fib_node->fib = NULL;
4639        mlxsw_sp_fib_node_remove(fib, fib_node);
4640}
4641
4642static struct mlxsw_sp_fib_node *
4643mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4644                      size_t addr_len, unsigned char prefix_len,
4645                      enum mlxsw_sp_l3proto proto)
4646{
4647        struct mlxsw_sp_fib_node *fib_node;
4648        struct mlxsw_sp_fib *fib;
4649        struct mlxsw_sp_vr *vr;
4650        int err;
4651
4652        vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4653        if (IS_ERR(vr))
4654                return ERR_CAST(vr);
4655        fib = mlxsw_sp_vr_fib(vr, proto);
4656
4657        fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4658        if (fib_node)
4659                return fib_node;
4660
4661        fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4662        if (!fib_node) {
4663                err = -ENOMEM;
4664                goto err_fib_node_create;
4665        }
4666
4667        err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4668        if (err)
4669                goto err_fib_node_init;
4670
4671        return fib_node;
4672
4673err_fib_node_init:
4674        mlxsw_sp_fib_node_destroy(fib_node);
4675err_fib_node_create:
4676        mlxsw_sp_vr_put(mlxsw_sp, vr);
4677        return ERR_PTR(err);
4678}
4679
4680static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4681                                  struct mlxsw_sp_fib_node *fib_node)
4682{
4683        struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4684
4685        if (!list_empty(&fib_node->entry_list))
4686                return;
4687        mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4688        mlxsw_sp_fib_node_destroy(fib_node);
4689        mlxsw_sp_vr_put(mlxsw_sp, vr);
4690}
4691
4692static struct mlxsw_sp_fib4_entry *
4693mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
4694                              const struct mlxsw_sp_fib4_entry *new4_entry)
4695{
4696        struct mlxsw_sp_fib4_entry *fib4_entry;
4697
4698        list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
4699                if (fib4_entry->tb_id > new4_entry->tb_id)
4700                        continue;
4701                if (fib4_entry->tb_id != new4_entry->tb_id)
4702                        break;
4703                if (fib4_entry->tos > new4_entry->tos)
4704                        continue;
4705                if (fib4_entry->prio >= new4_entry->prio ||
4706                    fib4_entry->tos < new4_entry->tos)
4707                        return fib4_entry;
4708        }
4709
4710        return NULL;
4711}
4712
4713static int
4714mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
4715                               struct mlxsw_sp_fib4_entry *new4_entry)
4716{
4717        struct mlxsw_sp_fib_node *fib_node;
4718
4719        if (WARN_ON(!fib4_entry))
4720                return -EINVAL;
4721
4722        fib_node = fib4_entry->common.fib_node;
4723        list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
4724                                 common.list) {
4725                if (fib4_entry->tb_id != new4_entry->tb_id ||
4726                    fib4_entry->tos != new4_entry->tos ||
4727                    fib4_entry->prio != new4_entry->prio)
4728                        break;
4729        }
4730
4731        list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
4732        return 0;
4733}
4734
4735static int
4736mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
4737                               bool replace, bool append)
4738{
4739        struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
4740        struct mlxsw_sp_fib4_entry *fib4_entry;
4741
4742        fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
4743
4744        if (append)
4745                return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
4746        if (replace && WARN_ON(!fib4_entry))
4747                return -EINVAL;
4748
4749        /* Insert new entry before replaced one, so that we can later
4750         * remove the second.
4751         */
4752        if (fib4_entry) {
4753                list_add_tail(&new4_entry->common.list,
4754                              &fib4_entry->common.list);
4755        } else {
4756                struct mlxsw_sp_fib4_entry *last;
4757
4758                list_for_each_entry(last, &fib_node->entry_list, common.list) {
4759                        if (new4_entry->tb_id > last->tb_id)
4760                                break;
4761                        fib4_entry = last;
4762                }
4763
4764                if (fib4_entry)
4765                        list_add(&new4_entry->common.list,
4766                                 &fib4_entry->common.list);
4767                else
4768                        list_add(&new4_entry->common.list,
4769                                 &fib_node->entry_list);
4770        }
4771
4772        return 0;
4773}
4774
4775static void
4776mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
4777{
4778        list_del(&fib4_entry->common.list);
4779}
4780
4781static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
4782                                       struct mlxsw_sp_fib_entry *fib_entry)
4783{
4784        struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4785
4786        if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4787                return 0;
4788
4789        /* To prevent packet loss, overwrite the previously offloaded
4790         * entry.
4791         */
4792        if (!list_is_singular(&fib_node->entry_list)) {
4793                enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4794                struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4795
4796                mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
4797        }
4798
4799        return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4800}
4801
4802static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
4803                                        struct mlxsw_sp_fib_entry *fib_entry)
4804{
4805        struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4806
4807        if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
4808                return;
4809
4810        /* Promote the next entry by overwriting the deleted entry */
4811        if (!list_is_singular(&fib_node->entry_list)) {
4812                struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
4813                enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
4814
4815                mlxsw_sp_fib_entry_update(mlxsw_sp, n);
4816                mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
4817                return;
4818        }
4819
4820        mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4821}
4822
4823static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4824                                         struct mlxsw_sp_fib4_entry *fib4_entry,
4825                                         bool replace, bool append)
4826{
4827        int err;
4828
4829        err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
4830        if (err)
4831                return err;
4832
4833        err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
4834        if (err)
4835                goto err_fib_node_entry_add;
4836
4837        return 0;
4838
4839err_fib_node_entry_add:
4840        mlxsw_sp_fib4_node_list_remove(fib4_entry);
4841        return err;
4842}
4843
4844static void
4845mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4846                                struct mlxsw_sp_fib4_entry *fib4_entry)
4847{
4848        mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
4849        mlxsw_sp_fib4_node_list_remove(fib4_entry);
4850
4851        if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
4852                mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
4853}
4854
4855static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
4856                                        struct mlxsw_sp_fib4_entry *fib4_entry,
4857                                        bool replace)
4858{
4859        struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4860        struct mlxsw_sp_fib4_entry *replaced;
4861
4862        if (!replace)
4863                return;
4864
4865        /* We inserted the new entry before replaced one */
4866        replaced = list_next_entry(fib4_entry, common.list);
4867
4868        mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
4869        mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
4870        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4871}
4872
4873static int
4874mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
4875                         const struct fib_entry_notifier_info *fen_info,
4876                         bool replace, bool append)
4877{
4878        struct mlxsw_sp_fib4_entry *fib4_entry;
4879        struct mlxsw_sp_fib_node *fib_node;
4880        int err;
4881
4882        if (mlxsw_sp->router->aborted)
4883                return 0;
4884
4885        fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4886                                         &fen_info->dst, sizeof(fen_info->dst),
4887                                         fen_info->dst_len,
4888                                         MLXSW_SP_L3_PROTO_IPV4);
4889        if (IS_ERR(fib_node)) {
4890                dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4891                return PTR_ERR(fib_node);
4892        }
4893
4894        fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4895        if (IS_ERR(fib4_entry)) {
4896                dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4897                err = PTR_ERR(fib4_entry);
4898                goto err_fib4_entry_create;
4899        }
4900
4901        err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
4902                                            append);
4903        if (err) {
4904                dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4905                goto err_fib4_node_entry_link;
4906        }
4907
4908        mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
4909
4910        return 0;
4911
4912err_fib4_node_entry_link:
4913        mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4914err_fib4_entry_create:
4915        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4916        return err;
4917}
4918
4919static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4920                                     struct fib_entry_notifier_info *fen_info)
4921{
4922        struct mlxsw_sp_fib4_entry *fib4_entry;
4923        struct mlxsw_sp_fib_node *fib_node;
4924
4925        if (mlxsw_sp->router->aborted)
4926                return;
4927
4928        fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4929        if (WARN_ON(!fib4_entry))
4930                return;
4931        fib_node = fib4_entry->common.fib_node;
4932
4933        mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
4934        mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4935        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4936}
4937
4938static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
4939{
4940        /* Packets with link-local destination IP arriving to the router
4941         * are trapped to the CPU, so no need to program specific routes
4942         * for them.
4943         */
4944        if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
4945                return true;
4946
4947        /* Multicast routes aren't supported, so ignore them. Neighbour
4948         * Discovery packets are specifically trapped.
4949         */
4950        if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
4951                return true;
4952
4953        /* Cloned routes are irrelevant in the forwarding path. */
4954        if (rt->fib6_flags & RTF_CACHE)
4955                return true;
4956
4957        return false;
4958}
4959
4960static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
4961{
4962        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4963
4964        mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
4965        if (!mlxsw_sp_rt6)
4966                return ERR_PTR(-ENOMEM);
4967
4968        /* In case of route replace, replaced route is deleted with
4969         * no notification. Take reference to prevent accessing freed
4970         * memory.
4971         */
4972        mlxsw_sp_rt6->rt = rt;
4973        fib6_info_hold(rt);
4974
4975        return mlxsw_sp_rt6;
4976}
4977
4978#if IS_ENABLED(CONFIG_IPV6)
4979static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4980{
4981        fib6_info_release(rt);
4982}
4983#else
4984static void mlxsw_sp_rt6_release(struct fib6_info *rt)
4985{
4986}
4987#endif
4988
4989static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4990{
4991        mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
4992        kfree(mlxsw_sp_rt6);
4993}
4994
4995static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
4996{
4997        /* RTF_CACHE routes are ignored */
4998        return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
4999}
5000
5001static struct fib6_info *
5002mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
5003{
5004        return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
5005                                list)->rt;
5006}
5007
5008static struct mlxsw_sp_fib6_entry *
5009mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5010                                 const struct fib6_info *nrt, bool replace)
5011{
5012        struct mlxsw_sp_fib6_entry *fib6_entry;
5013
5014        if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
5015                return NULL;
5016
5017        list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5018                struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5019
5020                /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
5021                 * virtual router.
5022                 */
5023                if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5024                        continue;
5025                if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5026                        break;
5027                if (rt->fib6_metric < nrt->fib6_metric)
5028                        continue;
5029                if (rt->fib6_metric == nrt->fib6_metric &&
5030                    mlxsw_sp_fib6_rt_can_mp(rt))
5031                        return fib6_entry;
5032                if (rt->fib6_metric > nrt->fib6_metric)
5033                        break;
5034        }
5035
5036        return NULL;
5037}
5038
5039static struct mlxsw_sp_rt6 *
5040mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5041                            const struct fib6_info *rt)
5042{
5043        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5044
5045        list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
5046                if (mlxsw_sp_rt6->rt == rt)
5047                        return mlxsw_sp_rt6;
5048        }
5049
5050        return NULL;
5051}
5052
5053static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
5054                                        const struct fib6_info *rt,
5055                                        enum mlxsw_sp_ipip_type *ret)
5056{
5057        return rt->fib6_nh.fib_nh_dev &&
5058               mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
5059}
5060
5061static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
5062                                       struct mlxsw_sp_nexthop_group *nh_grp,
5063                                       struct mlxsw_sp_nexthop *nh,
5064                                       const struct fib6_info *rt)
5065{
5066        const struct mlxsw_sp_ipip_ops *ipip_ops;
5067        struct mlxsw_sp_ipip_entry *ipip_entry;
5068        struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5069        struct mlxsw_sp_rif *rif;
5070        int err;
5071
5072        ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5073        if (ipip_entry) {
5074                ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5075                if (ipip_ops->can_offload(mlxsw_sp, dev,
5076                                          MLXSW_SP_L3_PROTO_IPV6)) {
5077                        nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5078                        mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5079                        return 0;
5080                }
5081        }
5082
5083        nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5084        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5085        if (!rif)
5086                return 0;
5087        mlxsw_sp_nexthop_rif_init(nh, rif);
5088
5089        err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5090        if (err)
5091                goto err_nexthop_neigh_init;
5092
5093        return 0;
5094
5095err_nexthop_neigh_init:
5096        mlxsw_sp_nexthop_rif_fini(nh);
5097        return err;
5098}
5099
5100static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5101                                        struct mlxsw_sp_nexthop *nh)
5102{
5103        mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5104}
5105
5106static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5107                                  struct mlxsw_sp_nexthop_group *nh_grp,
5108                                  struct mlxsw_sp_nexthop *nh,
5109                                  const struct fib6_info *rt)
5110{
5111        struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5112
5113        nh->nh_grp = nh_grp;
5114        nh->nh_weight = rt->fib6_nh.fib_nh_weight;
5115        memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
5116        mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5117
5118        list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5119
5120        if (!dev)
5121                return 0;
5122        nh->ifindex = dev->ifindex;
5123
5124        return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5125}
5126
5127static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5128                                   struct mlxsw_sp_nexthop *nh)
5129{
5130        mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5131        list_del(&nh->router_list_node);
5132        mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5133}
5134
5135static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5136                                    const struct fib6_info *rt)
5137{
5138        return rt->fib6_nh.fib_nh_gw_family ||
5139               mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5140}
5141
5142static struct mlxsw_sp_nexthop_group *
5143mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5144                               struct mlxsw_sp_fib6_entry *fib6_entry)
5145{
5146        struct mlxsw_sp_nexthop_group *nh_grp;
5147        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5148        struct mlxsw_sp_nexthop *nh;
5149        int i = 0;
5150        int err;
5151
5152        nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5153                         GFP_KERNEL);
5154        if (!nh_grp)
5155                return ERR_PTR(-ENOMEM);
5156        INIT_LIST_HEAD(&nh_grp->fib_list);
5157#if IS_ENABLED(CONFIG_IPV6)
5158        nh_grp->neigh_tbl = &nd_tbl;
5159#endif
5160        mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5161                                        struct mlxsw_sp_rt6, list);
5162        nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5163        nh_grp->count = fib6_entry->nrt6;
5164        for (i = 0; i < nh_grp->count; i++) {
5165                struct fib6_info *rt = mlxsw_sp_rt6->rt;
5166
5167                nh = &nh_grp->nexthops[i];
5168                err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5169                if (err)
5170                        goto err_nexthop6_init;
5171                mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5172        }
5173
5174        err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5175        if (err)
5176                goto err_nexthop_group_insert;
5177
5178        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5179        return nh_grp;
5180
5181err_nexthop_group_insert:
5182err_nexthop6_init:
5183        for (i--; i >= 0; i--) {
5184                nh = &nh_grp->nexthops[i];
5185                mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5186        }
5187        kfree(nh_grp);
5188        return ERR_PTR(err);
5189}
5190
5191static void
5192mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5193                                struct mlxsw_sp_nexthop_group *nh_grp)
5194{
5195        struct mlxsw_sp_nexthop *nh;
5196        int i = nh_grp->count;
5197
5198        mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5199        for (i--; i >= 0; i--) {
5200                nh = &nh_grp->nexthops[i];
5201                mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5202        }
5203        mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5204        WARN_ON(nh_grp->adj_index_valid);
5205        kfree(nh_grp);
5206}
5207
5208static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5209                                       struct mlxsw_sp_fib6_entry *fib6_entry)
5210{
5211        struct mlxsw_sp_nexthop_group *nh_grp;
5212
5213        nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5214        if (!nh_grp) {
5215                nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5216                if (IS_ERR(nh_grp))
5217                        return PTR_ERR(nh_grp);
5218        }
5219
5220        list_add_tail(&fib6_entry->common.nexthop_group_node,
5221                      &nh_grp->fib_list);
5222        fib6_entry->common.nh_group = nh_grp;
5223
5224        return 0;
5225}
5226
5227static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5228                                        struct mlxsw_sp_fib_entry *fib_entry)
5229{
5230        struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5231
5232        list_del(&fib_entry->nexthop_group_node);
5233        if (!list_empty(&nh_grp->fib_list))
5234                return;
5235        mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5236}
5237
5238static int
5239mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5240                               struct mlxsw_sp_fib6_entry *fib6_entry)
5241{
5242        struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5243        int err;
5244
5245        fib6_entry->common.nh_group = NULL;
5246        list_del(&fib6_entry->common.nexthop_group_node);
5247
5248        err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5249        if (err)
5250                goto err_nexthop6_group_get;
5251
5252        /* In case this entry is offloaded, then the adjacency index
5253         * currently associated with it in the device's table is that
5254         * of the old group. Start using the new one instead.
5255         */
5256        err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5257        if (err)
5258                goto err_fib_node_entry_add;
5259
5260        if (list_empty(&old_nh_grp->fib_list))
5261                mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5262
5263        return 0;
5264
5265err_fib_node_entry_add:
5266        mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5267err_nexthop6_group_get:
5268        list_add_tail(&fib6_entry->common.nexthop_group_node,
5269                      &old_nh_grp->fib_list);
5270        fib6_entry->common.nh_group = old_nh_grp;
5271        return err;
5272}
5273
5274static int
5275mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5276                                struct mlxsw_sp_fib6_entry *fib6_entry,
5277                                struct fib6_info *rt)
5278{
5279        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5280        int err;
5281
5282        mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5283        if (IS_ERR(mlxsw_sp_rt6))
5284                return PTR_ERR(mlxsw_sp_rt6);
5285
5286        list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5287        fib6_entry->nrt6++;
5288
5289        err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5290        if (err)
5291                goto err_nexthop6_group_update;
5292
5293        return 0;
5294
5295err_nexthop6_group_update:
5296        fib6_entry->nrt6--;
5297        list_del(&mlxsw_sp_rt6->list);
5298        mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5299        return err;
5300}
5301
5302static void
5303mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5304                                struct mlxsw_sp_fib6_entry *fib6_entry,
5305                                struct fib6_info *rt)
5306{
5307        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5308
5309        mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt);
5310        if (WARN_ON(!mlxsw_sp_rt6))
5311                return;
5312
5313        fib6_entry->nrt6--;
5314        list_del(&mlxsw_sp_rt6->list);
5315        mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5316        mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5317}
5318
5319static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5320                                         struct mlxsw_sp_fib_entry *fib_entry,
5321                                         const struct fib6_info *rt)
5322{
5323        /* Packets hitting RTF_REJECT routes need to be discarded by the
5324         * stack. We can rely on their destination device not having a
5325         * RIF (it's the loopback device) and can thus use action type
5326         * local, which will cause them to be trapped with a lower
5327         * priority than packets that need to be locally received.
5328         */
5329        if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5330                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5331        else if (rt->fib6_type == RTN_BLACKHOLE)
5332                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5333        else if (rt->fib6_flags & RTF_REJECT)
5334                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5335        else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5336                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5337        else
5338                fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5339}
5340
5341static void
5342mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5343{
5344        struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5345
5346        list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5347                                 list) {
5348                fib6_entry->nrt6--;
5349                list_del(&mlxsw_sp_rt6->list);
5350                mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5351        }
5352}
5353
5354static struct mlxsw_sp_fib6_entry *
5355mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5356                           struct mlxsw_sp_fib_node *fib_node,
5357                           struct fib6_info *rt)
5358{
5359        struct mlxsw_sp_fib6_entry *fib6_entry;
5360        struct mlxsw_sp_fib_entry *fib_entry;
5361        struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5362        int err;
5363
5364        fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5365        if (!fib6_entry)
5366                return ERR_PTR(-ENOMEM);
5367        fib_entry = &fib6_entry->common;
5368
5369        mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt);
5370        if (IS_ERR(mlxsw_sp_rt6)) {
5371                err = PTR_ERR(mlxsw_sp_rt6);
5372                goto err_rt6_create;
5373        }
5374
5375        mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt);
5376
5377        INIT_LIST_HEAD(&fib6_entry->rt6_list);
5378        list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5379        fib6_entry->nrt6 = 1;
5380        err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5381        if (err)
5382                goto err_nexthop6_group_get;
5383
5384        fib_entry->fib_node = fib_node;
5385
5386        return fib6_entry;
5387
5388err_nexthop6_group_get:
5389        list_del(&mlxsw_sp_rt6->list);
5390        mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5391err_rt6_create:
5392        kfree(fib6_entry);
5393        return ERR_PTR(err);
5394}
5395
5396static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5397                                        struct mlxsw_sp_fib6_entry *fib6_entry)
5398{
5399        mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5400        mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5401        WARN_ON(fib6_entry->nrt6);
5402        kfree(fib6_entry);
5403}
5404
5405static struct mlxsw_sp_fib6_entry *
5406mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
5407                              const struct fib6_info *nrt, bool replace)
5408{
5409        struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
5410
5411        list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5412                struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5413
5414                if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
5415                        continue;
5416                if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
5417                        break;
5418                if (replace && rt->fib6_metric == nrt->fib6_metric) {
5419                        if (mlxsw_sp_fib6_rt_can_mp(rt) ==
5420                            mlxsw_sp_fib6_rt_can_mp(nrt))
5421                                return fib6_entry;
5422                        if (mlxsw_sp_fib6_rt_can_mp(nrt))
5423                                fallback = fallback ?: fib6_entry;
5424                }
5425                if (rt->fib6_metric > nrt->fib6_metric)
5426                        return fallback ?: fib6_entry;
5427        }
5428
5429        return fallback;
5430}
5431
5432static int
5433mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
5434                               bool replace)
5435{
5436        struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
5437        struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
5438        struct mlxsw_sp_fib6_entry *fib6_entry;
5439
5440        fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace);
5441
5442        if (replace && WARN_ON(!fib6_entry))
5443                return -EINVAL;
5444
5445        if (fib6_entry) {
5446                list_add_tail(&new6_entry->common.list,
5447                              &fib6_entry->common.list);
5448        } else {
5449                struct mlxsw_sp_fib6_entry *last;
5450
5451                list_for_each_entry(last, &fib_node->entry_list, common.list) {
5452                        struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
5453
5454                        if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
5455                                break;
5456                        fib6_entry = last;
5457                }
5458
5459                if (fib6_entry)
5460                        list_add(&new6_entry->common.list,
5461                                 &fib6_entry->common.list);
5462                else
5463                        list_add(&new6_entry->common.list,
5464                                 &fib_node->entry_list);
5465        }
5466
5467        return 0;
5468}
5469
5470static void
5471mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
5472{
5473        list_del(&fib6_entry->common.list);
5474}
5475
5476static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
5477                                         struct mlxsw_sp_fib6_entry *fib6_entry,
5478                                         bool replace)
5479{
5480        int err;
5481
5482        err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace);
5483        if (err)
5484                return err;
5485
5486        err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
5487        if (err)
5488                goto err_fib_node_entry_add;
5489
5490        return 0;
5491
5492err_fib_node_entry_add:
5493        mlxsw_sp_fib6_node_list_remove(fib6_entry);
5494        return err;
5495}
5496
5497static void
5498mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
5499                                struct mlxsw_sp_fib6_entry *fib6_entry)
5500{
5501        mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
5502        mlxsw_sp_fib6_node_list_remove(fib6_entry);
5503}
5504
5505static struct mlxsw_sp_fib6_entry *
5506mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5507                           const struct fib6_info *rt)
5508{
5509        struct mlxsw_sp_fib6_entry *fib6_entry;
5510        struct mlxsw_sp_fib_node *fib_node;
5511        struct mlxsw_sp_fib *fib;
5512        struct mlxsw_sp_vr *vr;
5513
5514        vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5515        if (!vr)
5516                return NULL;
5517        fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5518
5519        fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5520                                            sizeof(rt->fib6_dst.addr),
5521                                            rt->fib6_dst.plen);
5522        if (!fib_node)
5523                return NULL;
5524
5525        list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
5526                struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5527
5528                if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
5529                    rt->fib6_metric == iter_rt->fib6_metric &&
5530                    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5531                        return fib6_entry;
5532        }
5533
5534        return NULL;
5535}
5536
5537static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
5538                                        struct mlxsw_sp_fib6_entry *fib6_entry,
5539                                        bool replace)
5540{
5541        struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5542        struct mlxsw_sp_fib6_entry *replaced;
5543
5544        if (!replace)
5545                return;
5546
5547        replaced = list_next_entry(fib6_entry, common.list);
5548
5549        mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
5550        mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
5551        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5552}
5553
5554static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
5555                                    struct fib6_info *rt, bool replace)
5556{
5557        struct mlxsw_sp_fib6_entry *fib6_entry;
5558        struct mlxsw_sp_fib_node *fib_node;
5559        int err;
5560
5561        if (mlxsw_sp->router->aborted)
5562                return 0;
5563
5564        if (rt->fib6_src.plen)
5565                return -EINVAL;
5566
5567        if (mlxsw_sp_fib6_rt_should_ignore(rt))
5568                return 0;
5569
5570        fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5571                                         &rt->fib6_dst.addr,
5572                                         sizeof(rt->fib6_dst.addr),
5573                                         rt->fib6_dst.plen,
5574                                         MLXSW_SP_L3_PROTO_IPV6);
5575        if (IS_ERR(fib_node))
5576                return PTR_ERR(fib_node);
5577
5578        /* Before creating a new entry, try to append route to an existing
5579         * multipath entry.
5580         */
5581        fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
5582        if (fib6_entry) {
5583                err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt);
5584                if (err)
5585                        goto err_fib6_entry_nexthop_add;
5586                return 0;
5587        }
5588
5589        fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt);
5590        if (IS_ERR(fib6_entry)) {
5591                err = PTR_ERR(fib6_entry);
5592                goto err_fib6_entry_create;
5593        }
5594
5595        err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace);
5596        if (err)
5597                goto err_fib6_node_entry_link;
5598
5599        mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
5600
5601        return 0;
5602
5603err_fib6_node_entry_link:
5604        mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5605err_fib6_entry_create:
5606err_fib6_entry_nexthop_add:
5607        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5608        return err;
5609}
5610
5611static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5612                                     struct fib6_info *rt)
5613{
5614        struct mlxsw_sp_fib6_entry *fib6_entry;
5615        struct mlxsw_sp_fib_node *fib_node;
5616
5617        if (mlxsw_sp->router->aborted)
5618                return;
5619
5620        if (mlxsw_sp_fib6_rt_should_ignore(rt))
5621                return;
5622
5623        fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5624        if (WARN_ON(!fib6_entry))
5625                return;
5626
5627        /* If route is part of a multipath entry, but not the last one
5628         * removed, then only reduce its nexthop group.
5629         */
5630        if (!list_is_singular(&fib6_entry->rt6_list)) {
5631                mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt);
5632                return;
5633        }
5634
5635        fib_node = fib6_entry->common.fib_node;
5636
5637        mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5638        mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5639        mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5640}
5641
5642static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5643                                            enum mlxsw_reg_ralxx_protocol proto,
5644                                            u8 tree_id)
5645{
5646        char ralta_pl[MLXSW_REG_RALTA_LEN];
5647        char ralst_pl[MLXSW_REG_RALST_LEN];
5648        int i, err;
5649
5650        mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5651        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5652        if (err)
5653                return err;
5654
5655        mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5656        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5657        if (err)
5658                return err;
5659
5660        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5661                struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5662                char raltb_pl[MLXSW_REG_RALTB_LEN];
5663                char ralue_pl[MLXSW_REG_RALUE_LEN];
5664
5665                mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5666                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5667                                      raltb_pl);
5668                if (err)
5669                        return err;
5670
5671                mlxsw_reg_ralue_pack(ralue_pl, proto,
5672                                     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5673                mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5674                err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5675                                      ralue_pl);
5676                if (err)
5677                        return err;
5678        }
5679
5680        return 0;
5681}
5682
5683static struct mlxsw_sp_mr_table *
5684mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5685{
5686        if (family == RTNL_FAMILY_IPMR)
5687                return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5688        else
5689                return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5690}
5691
5692static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5693                                     struct mfc_entry_notifier_info *men_info,
5694                                     bool replace)
5695{
5696        struct mlxsw_sp_mr_table *mrt;
5697        struct mlxsw_sp_vr *vr;
5698
5699        if (mlxsw_sp->router->aborted)
5700                return 0;
5701
5702        vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5703        if (IS_ERR(vr))
5704                return PTR_ERR(vr);
5705
5706        mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5707        return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5708}
5709
5710static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5711                                      struct mfc_entry_notifier_info *men_info)
5712{
5713        struct mlxsw_sp_mr_table *mrt;
5714        struct mlxsw_sp_vr *vr;
5715
5716        if (mlxsw_sp->router->aborted)
5717                return;
5718
5719        vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5720        if (WARN_ON(!vr))
5721                return;
5722
5723        mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5724        mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5725        mlxsw_sp_vr_put(mlxsw_sp, vr);
5726}
5727
5728static int
5729mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5730                              struct vif_entry_notifier_info *ven_info)
5731{
5732        struct mlxsw_sp_mr_table *mrt;
5733        struct mlxsw_sp_rif *rif;
5734        struct mlxsw_sp_vr *vr;
5735
5736        if (mlxsw_sp->router->aborted)
5737                return 0;
5738
5739        vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5740        if (IS_ERR(vr))
5741                return PTR_ERR(vr);
5742
5743        mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5744        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5745        return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5746                                   ven_info->vif_index,
5747                                   ven_info->vif_flags, rif);
5748}
5749
5750static void
5751mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5752                              struct vif_entry_notifier_info *ven_info)
5753{
5754        struct mlxsw_sp_mr_table *mrt;
5755        struct mlxsw_sp_vr *vr;
5756
5757        if (mlxsw_sp->router->aborted)
5758                return;
5759
5760        vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5761        if (WARN_ON(!vr))
5762                return;
5763
5764        mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5765        mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5766        mlxsw_sp_vr_put(mlxsw_sp, vr);
5767}
5768
5769static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5770{
5771        enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5772        int err;
5773
5774        err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5775                                               MLXSW_SP_LPM_TREE_MIN);
5776        if (err)
5777                return err;
5778
5779        /* The multicast router code does not need an abort trap as by default,
5780         * packets that don't match any routes are trapped to the CPU.
5781         */
5782
5783        proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5784        return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5785                                                MLXSW_SP_LPM_TREE_MIN + 1);
5786}
5787
5788static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5789                                     struct mlxsw_sp_fib_node *fib_node)
5790{
5791        struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
5792
5793        list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
5794                                 common.list) {
5795                bool do_break = &tmp->common.list == &fib_node->entry_list;
5796
5797                mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
5798                mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5799                mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5800                /* Break when entry list is empty and node was freed.
5801                 * Otherwise, we'll access freed memory in the next
5802                 * iteration.
5803                 */
5804                if (do_break)
5805                        break;
5806        }
5807}
5808
5809static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5810                                     struct mlxsw_sp_fib_node *fib_node)
5811{
5812        struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
5813
5814        list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
5815                                 common.list) {
5816                bool do_break = &tmp->common.list == &fib_node->entry_list;
5817
5818                mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
5819                mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5820                mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5821                if (do_break)
5822                        break;
5823        }
5824}
5825
5826static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5827                                    struct mlxsw_sp_fib_node *fib_node)
5828{
5829        switch (fib_node->fib->proto) {
5830        case MLXSW_SP_L3_PROTO_IPV4:
5831                mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5832                break;
5833        case MLXSW_SP_L3_PROTO_IPV6:
5834                mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5835                break;
5836        }
5837}
5838
5839static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5840                                  struct mlxsw_sp_vr *vr,
5841                                  enum mlxsw_sp_l3proto proto)
5842{
5843        struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5844        struct mlxsw_sp_fib_node *fib_node, *tmp;
5845
5846        list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5847                bool do_break = &tmp->list == &fib->node_list;
5848
5849                mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5850                if (do_break)
5851                        break;
5852        }
5853}
5854
5855static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5856{
5857        int i, j;
5858
5859        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5860                struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5861
5862                if (!mlxsw_sp_vr_is_used(vr))
5863                        continue;
5864
5865                for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5866                        mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5867                mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5868
5869                /* If virtual router was only used for IPv4, then it's no
5870                 * longer used.
5871                 */
5872                if (!mlxsw_sp_vr_is_used(vr))
5873                        continue;
5874                mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5875        }
5876}
5877
5878static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5879{
5880        int err;
5881
5882        if (mlxsw_sp->router->aborted)
5883                return;
5884        dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5885        mlxsw_sp_router_fib_flush(mlxsw_sp);
5886        mlxsw_sp->router->aborted = true;
5887        err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5888        if (err)
5889                dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5890}
5891
5892struct mlxsw_sp_fib_event_work {
5893        struct work_struct work;
5894        union {
5895                struct fib6_entry_notifier_info fen6_info;
5896                struct fib_entry_notifier_info fen_info;
5897                struct fib_rule_notifier_info fr_info;
5898                struct fib_nh_notifier_info fnh_info;
5899                struct mfc_entry_notifier_info men_info;
5900                struct vif_entry_notifier_info ven_info;
5901        };
5902        struct mlxsw_sp *mlxsw_sp;
5903        unsigned long event;
5904};
5905
5906static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5907{
5908        struct mlxsw_sp_fib_event_work *fib_work =
5909                container_of(work, struct mlxsw_sp_fib_event_work, work);
5910        struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5911        bool replace, append;
5912        int err;
5913
5914        /* Protect internal structures from changes */
5915        rtnl_lock();
5916        mlxsw_sp_span_respin(mlxsw_sp);
5917
5918        switch (fib_work->event) {
5919        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5920        case FIB_EVENT_ENTRY_APPEND: /* fall through */
5921        case FIB_EVENT_ENTRY_ADD:
5922                replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5923                append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
5924                err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
5925                                               replace, append);
5926                if (err)
5927                        mlxsw_sp_router_fib_abort(mlxsw_sp);
5928                fib_info_put(fib_work->fen_info.fi);
5929                break;
5930        case FIB_EVENT_ENTRY_DEL:
5931                mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5932                fib_info_put(fib_work->fen_info.fi);
5933                break;
5934        case FIB_EVENT_RULE_ADD:
5935                /* if we get here, a rule was added that we do not support.
5936                 * just do the fib_abort
5937                 */
5938                mlxsw_sp_router_fib_abort(mlxsw_sp);
5939                break;
5940        case FIB_EVENT_NH_ADD: /* fall through */
5941        case FIB_EVENT_NH_DEL:
5942                mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5943                                        fib_work->fnh_info.fib_nh);
5944                fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
5945                break;
5946        }
5947        rtnl_unlock();
5948        kfree(fib_work);
5949}
5950
5951static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
5952{
5953        struct mlxsw_sp_fib_event_work *fib_work =
5954                container_of(work, struct mlxsw_sp_fib_event_work, work);
5955        struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5956        bool replace;
5957        int err;
5958
5959        rtnl_lock();
5960        mlxsw_sp_span_respin(mlxsw_sp);
5961
5962        switch (fib_work->event) {
5963        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5964        case FIB_EVENT_ENTRY_APPEND: /* fall through */
5965        case FIB_EVENT_ENTRY_ADD:
5966                replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
5967                err = mlxsw_sp_router_fib6_add(mlxsw_sp,
5968                                               fib_work->fen6_info.rt, replace);
5969                if (err)
5970                        mlxsw_sp_router_fib_abort(mlxsw_sp);
5971                mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5972                break;
5973        case FIB_EVENT_ENTRY_DEL:
5974                mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt);
5975                mlxsw_sp_rt6_release(fib_work->fen6_info.rt);
5976                break;
5977        case FIB_EVENT_RULE_ADD:
5978                /* if we get here, a rule was added that we do not support.
5979                 * just do the fib_abort
5980                 */
5981                mlxsw_sp_router_fib_abort(mlxsw_sp);
5982                break;
5983        }
5984        rtnl_unlock();
5985        kfree(fib_work);
5986}
5987
5988static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
5989{
5990        struct mlxsw_sp_fib_event_work *fib_work =
5991                container_of(work, struct mlxsw_sp_fib_event_work, work);
5992        struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5993        bool replace;
5994        int err;
5995
5996        rtnl_lock();
5997        switch (fib_work->event) {
5998        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
5999        case FIB_EVENT_ENTRY_ADD:
6000                replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6001
6002                err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
6003                                                replace);
6004                if (err)
6005                        mlxsw_sp_router_fib_abort(mlxsw_sp);
6006                mr_cache_put(fib_work->men_info.mfc);
6007                break;
6008        case FIB_EVENT_ENTRY_DEL:
6009                mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
6010                mr_cache_put(fib_work->men_info.mfc);
6011                break;
6012        case FIB_EVENT_VIF_ADD:
6013                err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
6014                                                    &fib_work->ven_info);
6015                if (err)
6016                        mlxsw_sp_router_fib_abort(mlxsw_sp);
6017                dev_put(fib_work->ven_info.dev);
6018                break;
6019        case FIB_EVENT_VIF_DEL:
6020                mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
6021                                              &fib_work->ven_info);
6022                dev_put(fib_work->ven_info.dev);
6023                break;
6024        case FIB_EVENT_RULE_ADD:
6025                /* if we get here, a rule was added that we do not support.
6026                 * just do the fib_abort
6027                 */
6028                mlxsw_sp_router_fib_abort(mlxsw_sp);
6029                break;
6030        }
6031        rtnl_unlock();
6032        kfree(fib_work);
6033}
6034
6035static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
6036                                       struct fib_notifier_info *info)
6037{
6038        struct fib_entry_notifier_info *fen_info;
6039        struct fib_nh_notifier_info *fnh_info;
6040
6041        switch (fib_work->event) {
6042        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6043        case FIB_EVENT_ENTRY_APPEND: /* fall through */
6044        case FIB_EVENT_ENTRY_ADD: /* fall through */
6045        case FIB_EVENT_ENTRY_DEL:
6046                fen_info = container_of(info, struct fib_entry_notifier_info,
6047                                        info);
6048                fib_work->fen_info = *fen_info;
6049                /* Take reference on fib_info to prevent it from being
6050                 * freed while work is queued. Release it afterwards.
6051                 */
6052                fib_info_hold(fib_work->fen_info.fi);
6053                break;
6054        case FIB_EVENT_NH_ADD: /* fall through */
6055        case FIB_EVENT_NH_DEL:
6056                fnh_info = container_of(info, struct fib_nh_notifier_info,
6057                                        info);
6058                fib_work->fnh_info = *fnh_info;
6059                fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
6060                break;
6061        }
6062}
6063
6064static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6065                                       struct fib_notifier_info *info)
6066{
6067        struct fib6_entry_notifier_info *fen6_info;
6068
6069        switch (fib_work->event) {
6070        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6071        case FIB_EVENT_ENTRY_APPEND: /* fall through */
6072        case FIB_EVENT_ENTRY_ADD: /* fall through */
6073        case FIB_EVENT_ENTRY_DEL:
6074                fen6_info = container_of(info, struct fib6_entry_notifier_info,
6075                                         info);
6076                fib_work->fen6_info = *fen6_info;
6077                fib6_info_hold(fib_work->fen6_info.rt);
6078                break;
6079        }
6080}
6081
6082static void
6083mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6084                            struct fib_notifier_info *info)
6085{
6086        switch (fib_work->event) {
6087        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6088        case FIB_EVENT_ENTRY_ADD: /* fall through */
6089        case FIB_EVENT_ENTRY_DEL:
6090                memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6091                mr_cache_hold(fib_work->men_info.mfc);
6092                break;
6093        case FIB_EVENT_VIF_ADD: /* fall through */
6094        case FIB_EVENT_VIF_DEL:
6095                memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6096                dev_hold(fib_work->ven_info.dev);
6097                break;
6098        }
6099}
6100
6101static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6102                                          struct fib_notifier_info *info,
6103                                          struct mlxsw_sp *mlxsw_sp)
6104{
6105        struct netlink_ext_ack *extack = info->extack;
6106        struct fib_rule_notifier_info *fr_info;
6107        struct fib_rule *rule;
6108        int err = 0;
6109
6110        /* nothing to do at the moment */
6111        if (event == FIB_EVENT_RULE_DEL)
6112                return 0;
6113
6114        if (mlxsw_sp->router->aborted)
6115                return 0;
6116
6117        fr_info = container_of(info, struct fib_rule_notifier_info, info);
6118        rule = fr_info->rule;
6119
6120        /* Rule only affects locally generated traffic */
6121        if (rule->iifindex == info->net->loopback_dev->ifindex)
6122                return 0;
6123
6124        switch (info->family) {
6125        case AF_INET:
6126                if (!fib4_rule_default(rule) && !rule->l3mdev)
6127                        err = -EOPNOTSUPP;
6128                break;
6129        case AF_INET6:
6130                if (!fib6_rule_default(rule) && !rule->l3mdev)
6131                        err = -EOPNOTSUPP;
6132                break;
6133        case RTNL_FAMILY_IPMR:
6134                if (!ipmr_rule_default(rule) && !rule->l3mdev)
6135                        err = -EOPNOTSUPP;
6136                break;
6137        case RTNL_FAMILY_IP6MR:
6138                if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6139                        err = -EOPNOTSUPP;
6140                break;
6141        }
6142
6143        if (err < 0)
6144                NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6145
6146        return err;
6147}
6148
6149/* Called with rcu_read_lock() */
6150static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6151                                     unsigned long event, void *ptr)
6152{
6153        struct mlxsw_sp_fib_event_work *fib_work;
6154        struct fib_notifier_info *info = ptr;
6155        struct mlxsw_sp_router *router;
6156        int err;
6157
6158        if (!net_eq(info->net, &init_net) ||
6159            (info->family != AF_INET && info->family != AF_INET6 &&
6160             info->family != RTNL_FAMILY_IPMR &&
6161             info->family != RTNL_FAMILY_IP6MR))
6162                return NOTIFY_DONE;
6163
6164        router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6165
6166        switch (event) {
6167        case FIB_EVENT_RULE_ADD: /* fall through */
6168        case FIB_EVENT_RULE_DEL:
6169                err = mlxsw_sp_router_fib_rule_event(event, info,
6170                                                     router->mlxsw_sp);
6171                if (!err || info->extack)
6172                        return notifier_from_errno(err);
6173                break;
6174        case FIB_EVENT_ENTRY_ADD:
6175        case FIB_EVENT_ENTRY_REPLACE: /* fall through */
6176        case FIB_EVENT_ENTRY_APPEND:  /* fall through */
6177                if (router->aborted) {
6178                        NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6179                        return notifier_from_errno(-EINVAL);
6180                }
6181                if (info->family == AF_INET) {
6182                        struct fib_entry_notifier_info *fen_info = ptr;
6183
6184                        if (fen_info->fi->fib_nh_is_v6) {
6185                                NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6186                                return notifier_from_errno(-EINVAL);
6187                        }
6188                }
6189                break;
6190        }
6191
6192        fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6193        if (WARN_ON(!fib_work))
6194                return NOTIFY_BAD;
6195
6196        fib_work->mlxsw_sp = router->mlxsw_sp;
6197        fib_work->event = event;
6198
6199        switch (info->family) {
6200        case AF_INET:
6201                INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6202                mlxsw_sp_router_fib4_event(fib_work, info);
6203                break;
6204        case AF_INET6:
6205                INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6206                mlxsw_sp_router_fib6_event(fib_work, info);
6207                break;
6208        case RTNL_FAMILY_IP6MR:
6209        case RTNL_FAMILY_IPMR:
6210                INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6211                mlxsw_sp_router_fibmr_event(fib_work, info);
6212                break;
6213        }
6214
6215        mlxsw_core_schedule_work(&fib_work->work);
6216
6217        return NOTIFY_DONE;
6218}
6219
6220struct mlxsw_sp_rif *
6221mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6222                         const struct net_device *dev)
6223{
6224        int i;
6225
6226        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6227                if (mlxsw_sp->router->rifs[i] &&
6228                    mlxsw_sp->router->rifs[i]->dev == dev)
6229                        return mlxsw_sp->router->rifs[i];
6230
6231        return NULL;
6232}
6233
6234static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6235{
6236        char ritr_pl[MLXSW_REG_RITR_LEN];
6237        int err;
6238
6239        mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6240        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6241        if (err)
6242                return err;
6243
6244        mlxsw_reg_ritr_enable_set(ritr_pl, false);
6245        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6246}
6247
6248static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6249                                          struct mlxsw_sp_rif *rif)
6250{
6251        mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6252        mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6253        mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6254}
6255
6256static bool
6257mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6258                           unsigned long event)
6259{
6260        struct inet6_dev *inet6_dev;
6261        bool addr_list_empty = true;
6262        struct in_device *idev;
6263
6264        switch (event) {
6265        case NETDEV_UP:
6266                return rif == NULL;
6267        case NETDEV_DOWN:
6268                idev = __in_dev_get_rtnl(dev);
6269                if (idev && idev->ifa_list)
6270                        addr_list_empty = false;
6271
6272                inet6_dev = __in6_dev_get(dev);
6273                if (addr_list_empty && inet6_dev &&
6274                    !list_empty(&inet6_dev->addr_list))
6275                        addr_list_empty = false;
6276
6277                /* macvlans do not have a RIF, but rather piggy back on the
6278                 * RIF of their lower device.
6279                 */
6280                if (netif_is_macvlan(dev) && addr_list_empty)
6281                        return true;
6282
6283                if (rif && addr_list_empty &&
6284                    !netif_is_l3_slave(rif->dev))
6285                        return true;
6286                /* It is possible we already removed the RIF ourselves
6287                 * if it was assigned to a netdev that is now a bridge
6288                 * or LAG slave.
6289                 */
6290                return false;
6291        }
6292
6293        return false;
6294}
6295
6296static enum mlxsw_sp_rif_type
6297mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6298                      const struct net_device *dev)
6299{
6300        enum mlxsw_sp_fid_type type;
6301
6302        if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6303                return MLXSW_SP_RIF_TYPE_IPIP_LB;
6304
6305        /* Otherwise RIF type is derived from the type of the underlying FID. */
6306        if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6307                type = MLXSW_SP_FID_TYPE_8021Q;
6308        else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6309                type = MLXSW_SP_FID_TYPE_8021Q;
6310        else if (netif_is_bridge_master(dev))
6311                type = MLXSW_SP_FID_TYPE_8021D;
6312        else
6313                type = MLXSW_SP_FID_TYPE_RFID;
6314
6315        return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6316}
6317
6318static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6319{
6320        int i;
6321
6322        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6323                if (!mlxsw_sp->router->rifs[i]) {
6324                        *p_rif_index = i;
6325                        return 0;
6326                }
6327        }
6328
6329        return -ENOBUFS;
6330}
6331
6332static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6333                                               u16 vr_id,
6334                                               struct net_device *l3_dev)
6335{
6336        struct mlxsw_sp_rif *rif;
6337
6338        rif = kzalloc(rif_size, GFP_KERNEL);
6339        if (!rif)
6340                return NULL;
6341
6342        INIT_LIST_HEAD(&rif->nexthop_list);
6343        INIT_LIST_HEAD(&rif->neigh_list);
6344        if (l3_dev) {
6345                ether_addr_copy(rif->addr, l3_dev->dev_addr);
6346                rif->mtu = l3_dev->mtu;
6347                rif->dev = l3_dev;
6348        }
6349        rif->vr_id = vr_id;
6350        rif->rif_index = rif_index;
6351
6352        return rif;
6353}
6354
6355struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6356                                           u16 rif_index)
6357{
6358        return mlxsw_sp->router->rifs[rif_index];
6359}
6360
6361u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6362{
6363        return rif->rif_index;
6364}
6365
6366u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6367{
6368        return lb_rif->common.rif_index;
6369}
6370
6371u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6372{
6373        u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6374        struct mlxsw_sp_vr *ul_vr;
6375
6376        ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6377        if (WARN_ON(IS_ERR(ul_vr)))
6378                return 0;
6379
6380        return ul_vr->id;
6381}
6382
6383u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6384{
6385        return lb_rif->ul_rif_id;
6386}
6387
6388int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6389{
6390        return rif->dev->ifindex;
6391}
6392
6393const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6394{
6395        return rif->dev;
6396}
6397
6398struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif)
6399{
6400        return rif->fid;
6401}
6402
6403static struct mlxsw_sp_rif *
6404mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6405                    const struct mlxsw_sp_rif_params *params,
6406                    struct netlink_ext_ack *extack)
6407{
6408        u32 tb_id = l3mdev_fib_table(params->dev);
6409        const struct mlxsw_sp_rif_ops *ops;
6410        struct mlxsw_sp_fid *fid = NULL;
6411        enum mlxsw_sp_rif_type type;
6412        struct mlxsw_sp_rif *rif;
6413        struct mlxsw_sp_vr *vr;
6414        u16 rif_index;
6415        int i, err;
6416
6417        type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6418        ops = mlxsw_sp->rif_ops_arr[type];
6419
6420        vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6421        if (IS_ERR(vr))
6422                return ERR_CAST(vr);
6423        vr->rif_count++;
6424
6425        err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6426        if (err) {
6427                NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6428                goto err_rif_index_alloc;
6429        }
6430
6431        rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6432        if (!rif) {
6433                err = -ENOMEM;
6434                goto err_rif_alloc;
6435        }
6436        dev_hold(rif->dev);
6437        mlxsw_sp->router->rifs[rif_index] = rif;
6438        rif->mlxsw_sp = mlxsw_sp;
6439        rif->ops = ops;
6440
6441        if (ops->fid_get) {
6442                fid = ops->fid_get(rif, extack);
6443                if (IS_ERR(fid)) {
6444                        err = PTR_ERR(fid);
6445                        goto err_fid_get;
6446                }
6447                rif->fid = fid;
6448        }
6449
6450        if (ops->setup)
6451                ops->setup(rif, params);
6452
6453        err = ops->configure(rif);
6454        if (err)
6455                goto err_configure;
6456
6457        for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6458                err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6459                if (err)
6460                        goto err_mr_rif_add;
6461        }
6462
6463        mlxsw_sp_rif_counters_alloc(rif);
6464
6465        return rif;
6466
6467err_mr_rif_add:
6468        for (i--; i >= 0; i--)
6469                mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6470        ops->deconfigure(rif);
6471err_configure:
6472        if (fid)
6473                mlxsw_sp_fid_put(fid);
6474err_fid_get:
6475        mlxsw_sp->router->rifs[rif_index] = NULL;
6476        dev_put(rif->dev);
6477        kfree(rif);
6478err_rif_alloc:
6479err_rif_index_alloc:
6480        vr->rif_count--;
6481        mlxsw_sp_vr_put(mlxsw_sp, vr);
6482        return ERR_PTR(err);
6483}
6484
6485static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6486{
6487        const struct mlxsw_sp_rif_ops *ops = rif->ops;
6488        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6489        struct mlxsw_sp_fid *fid = rif->fid;
6490        struct mlxsw_sp_vr *vr;
6491        int i;
6492
6493        mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6494        vr = &mlxsw_sp->router->vrs[rif->vr_id];
6495
6496        mlxsw_sp_rif_counters_free(rif);
6497        for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6498                mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6499        ops->deconfigure(rif);
6500        if (fid)
6501                /* Loopback RIFs are not associated with a FID. */
6502                mlxsw_sp_fid_put(fid);
6503        mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6504        dev_put(rif->dev);
6505        kfree(rif);
6506        vr->rif_count--;
6507        mlxsw_sp_vr_put(mlxsw_sp, vr);
6508}
6509
6510void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6511                                 struct net_device *dev)
6512{
6513        struct mlxsw_sp_rif *rif;
6514
6515        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6516        if (!rif)
6517                return;
6518        mlxsw_sp_rif_destroy(rif);
6519}
6520
6521static void
6522mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6523                                 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6524{
6525        struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6526
6527        params->vid = mlxsw_sp_port_vlan->vid;
6528        params->lag = mlxsw_sp_port->lagged;
6529        if (params->lag)
6530                params->lag_id = mlxsw_sp_port->lag_id;
6531        else
6532                params->system_port = mlxsw_sp_port->local_port;
6533}
6534
6535static struct mlxsw_sp_rif_subport *
6536mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6537{
6538        return container_of(rif, struct mlxsw_sp_rif_subport, common);
6539}
6540
6541static struct mlxsw_sp_rif *
6542mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6543                         const struct mlxsw_sp_rif_params *params,
6544                         struct netlink_ext_ack *extack)
6545{
6546        struct mlxsw_sp_rif_subport *rif_subport;
6547        struct mlxsw_sp_rif *rif;
6548
6549        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6550        if (!rif)
6551                return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6552
6553        rif_subport = mlxsw_sp_rif_subport_rif(rif);
6554        refcount_inc(&rif_subport->ref_count);
6555        return rif;
6556}
6557
6558static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6559{
6560        struct mlxsw_sp_rif_subport *rif_subport;
6561
6562        rif_subport = mlxsw_sp_rif_subport_rif(rif);
6563        if (!refcount_dec_and_test(&rif_subport->ref_count))
6564                return;
6565
6566        mlxsw_sp_rif_destroy(rif);
6567}
6568
6569static int
6570mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6571                               struct net_device *l3_dev,
6572                               struct netlink_ext_ack *extack)
6573{
6574        struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6575        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6576        struct mlxsw_sp_rif_params params = {
6577                .dev = l3_dev,
6578        };
6579        u16 vid = mlxsw_sp_port_vlan->vid;
6580        struct mlxsw_sp_rif *rif;
6581        struct mlxsw_sp_fid *fid;
6582        int err;
6583
6584        mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6585        rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6586        if (IS_ERR(rif))
6587                return PTR_ERR(rif);
6588
6589        /* FID was already created, just take a reference */
6590        fid = rif->ops->fid_get(rif, extack);
6591        err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6592        if (err)
6593                goto err_fid_port_vid_map;
6594
6595        err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6596        if (err)
6597                goto err_port_vid_learning_set;
6598
6599        err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6600                                        BR_STATE_FORWARDING);
6601        if (err)
6602                goto err_port_vid_stp_set;
6603
6604        mlxsw_sp_port_vlan->fid = fid;
6605
6606        return 0;
6607
6608err_port_vid_stp_set:
6609        mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6610err_port_vid_learning_set:
6611        mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6612err_fid_port_vid_map:
6613        mlxsw_sp_fid_put(fid);
6614        mlxsw_sp_rif_subport_put(rif);
6615        return err;
6616}
6617
6618void
6619mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6620{
6621        struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6622        struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6623        struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6624        u16 vid = mlxsw_sp_port_vlan->vid;
6625
6626        if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6627                return;
6628
6629        mlxsw_sp_port_vlan->fid = NULL;
6630        mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6631        mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6632        mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6633        mlxsw_sp_fid_put(fid);
6634        mlxsw_sp_rif_subport_put(rif);
6635}
6636
6637static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6638                                             struct net_device *port_dev,
6639                                             unsigned long event, u16 vid,
6640                                             struct netlink_ext_ack *extack)
6641{
6642        struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6643        struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6644
6645        mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6646        if (WARN_ON(!mlxsw_sp_port_vlan))
6647                return -EINVAL;
6648
6649        switch (event) {
6650        case NETDEV_UP:
6651                return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6652                                                      l3_dev, extack);
6653        case NETDEV_DOWN:
6654                mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6655                break;
6656        }
6657
6658        return 0;
6659}
6660
6661static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6662                                        unsigned long event,
6663                                        struct netlink_ext_ack *extack)
6664{
6665        if (netif_is_bridge_port(port_dev) ||
6666            netif_is_lag_port(port_dev) ||
6667            netif_is_ovs_port(port_dev))
6668                return 0;
6669
6670        return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6671                                                 MLXSW_SP_DEFAULT_VID, extack);
6672}
6673
6674static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6675                                         struct net_device *lag_dev,
6676                                         unsigned long event, u16 vid,
6677                                         struct netlink_ext_ack *extack)
6678{
6679        struct net_device *port_dev;
6680        struct list_head *iter;
6681        int err;
6682
6683        netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6684                if (mlxsw_sp_port_dev_check(port_dev)) {
6685                        err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6686                                                                port_dev,
6687                                                                event, vid,
6688                                                                extack);
6689                        if (err)
6690                                return err;
6691                }
6692        }
6693
6694        return 0;
6695}
6696
6697static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6698                                       unsigned long event,
6699                                       struct netlink_ext_ack *extack)
6700{
6701        if (netif_is_bridge_port(lag_dev))
6702                return 0;
6703
6704        return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6705                                             MLXSW_SP_DEFAULT_VID, extack);
6706}
6707
6708static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6709                                          struct net_device *l3_dev,
6710                                          unsigned long event,
6711                                          struct netlink_ext_ack *extack)
6712{
6713        struct mlxsw_sp_rif_params params = {
6714                .dev = l3_dev,
6715        };
6716        struct mlxsw_sp_rif *rif;
6717
6718        switch (event) {
6719        case NETDEV_UP:
6720                rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6721                if (IS_ERR(rif))
6722                        return PTR_ERR(rif);
6723                break;
6724        case NETDEV_DOWN:
6725                rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6726                mlxsw_sp_rif_destroy(rif);
6727                break;
6728        }
6729
6730        return 0;
6731}
6732
6733static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6734                                        struct net_device *vlan_dev,
6735                                        unsigned long event,
6736                                        struct netlink_ext_ack *extack)
6737{
6738        struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6739        u16 vid = vlan_dev_vlan_id(vlan_dev);
6740
6741        if (netif_is_bridge_port(vlan_dev))
6742                return 0;
6743
6744        if (mlxsw_sp_port_dev_check(real_dev))
6745                return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6746                                                         event, vid, extack);
6747        else if (netif_is_lag_master(real_dev))
6748                return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6749                                                     vid, extack);
6750        else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6751                return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6752                                                      extack);
6753
6754        return 0;
6755}
6756
6757static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6758{
6759        u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6760        u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6761
6762        return ether_addr_equal_masked(mac, vrrp4, mask);
6763}
6764
6765static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6766{
6767        u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6768        u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6769
6770        return ether_addr_equal_masked(mac, vrrp6, mask);
6771}
6772
6773static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6774                                const u8 *mac, bool adding)
6775{
6776        char ritr_pl[MLXSW_REG_RITR_LEN];
6777        u8 vrrp_id = adding ? mac[5] : 0;
6778        int err;
6779
6780        if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6781            !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6782                return 0;
6783
6784        mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6785        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6786        if (err)
6787                return err;
6788
6789        if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6790                mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6791        else
6792                mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6793
6794        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6795}
6796
6797static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6798                                    const struct net_device *macvlan_dev,
6799                                    struct netlink_ext_ack *extack)
6800{
6801        struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6802        struct mlxsw_sp_rif *rif;
6803        int err;
6804
6805        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6806        if (!rif) {
6807                NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6808                return -EOPNOTSUPP;
6809        }
6810
6811        err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6812                                  mlxsw_sp_fid_index(rif->fid), true);
6813        if (err)
6814                return err;
6815
6816        err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6817                                   macvlan_dev->dev_addr, true);
6818        if (err)
6819                goto err_rif_vrrp_add;
6820
6821        /* Make sure the bridge driver does not have this MAC pointing at
6822         * some other port.
6823         */
6824        if (rif->ops->fdb_del)
6825                rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6826
6827        return 0;
6828
6829err_rif_vrrp_add:
6830        mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6831                            mlxsw_sp_fid_index(rif->fid), false);
6832        return err;
6833}
6834
6835void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6836                              const struct net_device *macvlan_dev)
6837{
6838        struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6839        struct mlxsw_sp_rif *rif;
6840
6841        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6842        /* If we do not have a RIF, then we already took care of
6843         * removing the macvlan's MAC during RIF deletion.
6844         */
6845        if (!rif)
6846                return;
6847        mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6848                             false);
6849        mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6850                            mlxsw_sp_fid_index(rif->fid), false);
6851}
6852
6853static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6854                                           struct net_device *macvlan_dev,
6855                                           unsigned long event,
6856                                           struct netlink_ext_ack *extack)
6857{
6858        switch (event) {
6859        case NETDEV_UP:
6860                return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6861        case NETDEV_DOWN:
6862                mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6863                break;
6864        }
6865
6866        return 0;
6867}
6868
6869static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6870                                               struct net_device *dev,
6871                                               const unsigned char *dev_addr,
6872                                               struct netlink_ext_ack *extack)
6873{
6874        struct mlxsw_sp_rif *rif;
6875        int i;
6876
6877        /* A RIF is not created for macvlan netdevs. Their MAC is used to
6878         * populate the FDB
6879         */
6880        if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
6881                return 0;
6882
6883        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6884                rif = mlxsw_sp->router->rifs[i];
6885                if (rif && rif->dev && rif->dev != dev &&
6886                    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
6887                                             mlxsw_sp->mac_mask)) {
6888                        NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
6889                        return -EINVAL;
6890                }
6891        }
6892
6893        return 0;
6894}
6895
6896static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
6897                                     struct net_device *dev,
6898                                     unsigned long event,
6899                                     struct netlink_ext_ack *extack)
6900{
6901        if (mlxsw_sp_port_dev_check(dev))
6902                return mlxsw_sp_inetaddr_port_event(dev, event, extack);
6903        else if (netif_is_lag_master(dev))
6904                return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
6905        else if (netif_is_bridge_master(dev))
6906                return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
6907                                                      extack);
6908        else if (is_vlan_dev(dev))
6909                return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
6910                                                    extack);
6911        else if (netif_is_macvlan(dev))
6912                return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
6913                                                       extack);
6914        else
6915                return 0;
6916}
6917
6918static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
6919                                   unsigned long event, void *ptr)
6920{
6921        struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
6922        struct net_device *dev = ifa->ifa_dev->dev;
6923        struct mlxsw_sp_router *router;
6924        struct mlxsw_sp_rif *rif;
6925        int err = 0;
6926
6927        /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
6928        if (event == NETDEV_UP)
6929                goto out;
6930
6931        router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
6932        rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
6933        if (!mlxsw_sp_rif_should_config(rif, dev, event))
6934                goto out;
6935
6936        err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
6937out:
6938        return notifier_from_errno(err);
6939}
6940
6941int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
6942                                  unsigned long event, void *ptr)
6943{
6944        struct in_validator_info *ivi = (struct in_validator_info *) ptr;
6945        struct net_device *dev = ivi->ivi_dev->dev;
6946        struct mlxsw_sp *mlxsw_sp;
6947        struct mlxsw_sp_rif *rif;
6948        int err = 0;
6949
6950        mlxsw_sp = mlxsw_sp_lower_get(dev);
6951        if (!mlxsw_sp)
6952                goto out;
6953
6954        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6955        if (!mlxsw_sp_rif_should_config(rif, dev, event))
6956                goto out;
6957
6958        err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
6959                                                  ivi->extack);
6960        if (err)
6961                goto out;
6962
6963        err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
6964out:
6965        return notifier_from_errno(err);
6966}
6967
6968struct mlxsw_sp_inet6addr_event_work {
6969        struct work_struct work;
6970        struct mlxsw_sp *mlxsw_sp;
6971        struct net_device *dev;
6972        unsigned long event;
6973};
6974
6975static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
6976{
6977        struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
6978                container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
6979        struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
6980        struct net_device *dev = inet6addr_work->dev;
6981        unsigned long event = inet6addr_work->event;
6982        struct mlxsw_sp_rif *rif;
6983
6984        rtnl_lock();
6985
6986        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6987        if (!mlxsw_sp_rif_should_config(rif, dev, event))
6988                goto out;
6989
6990        __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
6991out:
6992        rtnl_unlock();
6993        dev_put(dev);
6994        kfree(inet6addr_work);
6995}
6996
6997/* Called with rcu_read_lock() */
6998static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
6999                                    unsigned long event, void *ptr)
7000{
7001        struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
7002        struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
7003        struct net_device *dev = if6->idev->dev;
7004        struct mlxsw_sp_router *router;
7005
7006        /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
7007        if (event == NETDEV_UP)
7008                return NOTIFY_DONE;
7009
7010        inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
7011        if (!inet6addr_work)
7012                return NOTIFY_BAD;
7013
7014        router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
7015        INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
7016        inet6addr_work->mlxsw_sp = router->mlxsw_sp;
7017        inet6addr_work->dev = dev;
7018        inet6addr_work->event = event;
7019        dev_hold(dev);
7020        mlxsw_core_schedule_work(&inet6addr_work->work);
7021
7022        return NOTIFY_DONE;
7023}
7024
7025int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
7026                                   unsigned long event, void *ptr)
7027{
7028        struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
7029        struct net_device *dev = i6vi->i6vi_dev->dev;
7030        struct mlxsw_sp *mlxsw_sp;
7031        struct mlxsw_sp_rif *rif;
7032        int err = 0;
7033
7034        mlxsw_sp = mlxsw_sp_lower_get(dev);
7035        if (!mlxsw_sp)
7036                goto out;
7037
7038        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7039        if (!mlxsw_sp_rif_should_config(rif, dev, event))
7040                goto out;
7041
7042        err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7043                                                  i6vi->extack);
7044        if (err)
7045                goto out;
7046
7047        err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
7048out:
7049        return notifier_from_errno(err);
7050}
7051
7052static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7053                             const char *mac, int mtu)
7054{
7055        char ritr_pl[MLXSW_REG_RITR_LEN];
7056        int err;
7057
7058        mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7059        err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7060        if (err)
7061                return err;
7062
7063        mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7064        mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7065        mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7066        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7067}
7068
7069static int
7070mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7071                                  struct mlxsw_sp_rif *rif)
7072{
7073        struct net_device *dev = rif->dev;
7074        u16 fid_index;
7075        int err;
7076
7077        fid_index = mlxsw_sp_fid_index(rif->fid);
7078
7079        err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7080        if (err)
7081                return err;
7082
7083        err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7084                                dev->mtu);
7085        if (err)
7086                goto err_rif_edit;
7087
7088        err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7089        if (err)
7090                goto err_rif_fdb_op;
7091
7092        if (rif->mtu != dev->mtu) {
7093                struct mlxsw_sp_vr *vr;
7094                int i;
7095
7096                /* The RIF is relevant only to its mr_table instance, as unlike
7097                 * unicast routing, in multicast routing a RIF cannot be shared
7098                 * between several multicast routing tables.
7099                 */
7100                vr = &mlxsw_sp->router->vrs[rif->vr_id];
7101                for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7102                        mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7103                                                   rif, dev->mtu);
7104        }
7105
7106        ether_addr_copy(rif->addr, dev->dev_addr);
7107        rif->mtu = dev->mtu;
7108
7109        netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7110
7111        return 0;
7112
7113err_rif_fdb_op:
7114        mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7115err_rif_edit:
7116        mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7117        return err;
7118}
7119
7120static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7121                            struct netdev_notifier_pre_changeaddr_info *info)
7122{
7123        struct netlink_ext_ack *extack;
7124
7125        extack = netdev_notifier_info_to_extack(&info->info);
7126        return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7127                                                   info->dev_addr, extack);
7128}
7129
7130int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7131                                         unsigned long event, void *ptr)
7132{
7133        struct mlxsw_sp *mlxsw_sp;
7134        struct mlxsw_sp_rif *rif;
7135
7136        mlxsw_sp = mlxsw_sp_lower_get(dev);
7137        if (!mlxsw_sp)
7138                return 0;
7139
7140        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7141        if (!rif)
7142                return 0;
7143
7144        switch (event) {
7145        case NETDEV_CHANGEMTU: /* fall through */
7146        case NETDEV_CHANGEADDR:
7147                return mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7148        case NETDEV_PRE_CHANGEADDR:
7149                return mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7150        }
7151
7152        return 0;
7153}
7154
7155static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7156                                  struct net_device *l3_dev,
7157                                  struct netlink_ext_ack *extack)
7158{
7159        struct mlxsw_sp_rif *rif;
7160
7161        /* If netdev is already associated with a RIF, then we need to
7162         * destroy it and create a new one with the new virtual router ID.
7163         */
7164        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7165        if (rif)
7166                __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7167                                          extack);
7168
7169        return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7170}
7171
7172static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7173                                    struct net_device *l3_dev)
7174{
7175        struct mlxsw_sp_rif *rif;
7176
7177        rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7178        if (!rif)
7179                return;
7180        __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7181}
7182
7183int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7184                                 struct netdev_notifier_changeupper_info *info)
7185{
7186        struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7187        int err = 0;
7188
7189        /* We do not create a RIF for a macvlan, but only use it to
7190         * direct more MAC addresses to the router.
7191         */
7192        if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7193                return 0;
7194
7195        switch (event) {
7196        case NETDEV_PRECHANGEUPPER:
7197                return 0;
7198        case NETDEV_CHANGEUPPER:
7199                if (info->linking) {
7200                        struct netlink_ext_ack *extack;
7201
7202                        extack = netdev_notifier_info_to_extack(&info->info);
7203                        err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7204                } else {
7205                        mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7206                }
7207                break;
7208        }
7209
7210        return err;
7211}
7212
7213static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev, void *data)
7214{
7215        struct mlxsw_sp_rif *rif = data;
7216
7217        if (!netif_is_macvlan(dev))
7218                return 0;
7219
7220        return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7221                                   mlxsw_sp_fid_index(rif->fid), false);
7222}
7223
7224static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7225{
7226        if (!netif_is_macvlan_port(rif->dev))
7227                return 0;
7228
7229        netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7230        return netdev_walk_all_upper_dev_rcu(rif->dev,
7231                                             __mlxsw_sp_rif_macvlan_flush, rif);
7232}
7233
7234static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7235                                       const struct mlxsw_sp_rif_params *params)
7236{
7237        struct mlxsw_sp_rif_subport *rif_subport;
7238
7239        rif_subport = mlxsw_sp_rif_subport_rif(rif);
7240        refcount_set(&rif_subport->ref_count, 1);
7241        rif_subport->vid = params->vid;
7242        rif_subport->lag = params->lag;
7243        if (params->lag)
7244                rif_subport->lag_id = params->lag_id;
7245        else
7246                rif_subport->system_port = params->system_port;
7247}
7248
7249static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7250{
7251        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7252        struct mlxsw_sp_rif_subport *rif_subport;
7253        char ritr_pl[MLXSW_REG_RITR_LEN];
7254
7255        rif_subport = mlxsw_sp_rif_subport_rif(rif);
7256        mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7257                            rif->rif_index, rif->vr_id, rif->dev->mtu);
7258        mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7259        mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7260                                  rif_subport->lag ? rif_subport->lag_id :
7261                                                     rif_subport->system_port,
7262                                  rif_subport->vid);
7263
7264        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7265}
7266
7267static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7268{
7269        int err;
7270
7271        err = mlxsw_sp_rif_subport_op(rif, true);
7272        if (err)
7273                return err;
7274
7275        err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7276                                  mlxsw_sp_fid_index(rif->fid), true);
7277        if (err)
7278                goto err_rif_fdb_op;
7279
7280        mlxsw_sp_fid_rif_set(rif->fid, rif);
7281        return 0;
7282
7283err_rif_fdb_op:
7284        mlxsw_sp_rif_subport_op(rif, false);
7285        return err;
7286}
7287
7288static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7289{
7290        struct mlxsw_sp_fid *fid = rif->fid;
7291
7292        mlxsw_sp_fid_rif_set(fid, NULL);
7293        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7294                            mlxsw_sp_fid_index(fid), false);
7295        mlxsw_sp_rif_macvlan_flush(rif);
7296        mlxsw_sp_rif_subport_op(rif, false);
7297}
7298
7299static struct mlxsw_sp_fid *
7300mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7301                             struct netlink_ext_ack *extack)
7302{
7303        return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7304}
7305
7306static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7307        .type                   = MLXSW_SP_RIF_TYPE_SUBPORT,
7308        .rif_size               = sizeof(struct mlxsw_sp_rif_subport),
7309        .setup                  = mlxsw_sp_rif_subport_setup,
7310        .configure              = mlxsw_sp_rif_subport_configure,
7311        .deconfigure            = mlxsw_sp_rif_subport_deconfigure,
7312        .fid_get                = mlxsw_sp_rif_subport_fid_get,
7313};
7314
7315static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7316                                    enum mlxsw_reg_ritr_if_type type,
7317                                    u16 vid_fid, bool enable)
7318{
7319        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7320        char ritr_pl[MLXSW_REG_RITR_LEN];
7321
7322        mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7323                            rif->dev->mtu);
7324        mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7325        mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7326
7327        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7328}
7329
7330u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7331{
7332        return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7333}
7334
7335static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif)
7336{
7337        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7338        u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7339        int err;
7340
7341        err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, true);
7342        if (err)
7343                return err;
7344
7345        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7346                                     mlxsw_sp_router_port(mlxsw_sp), true);
7347        if (err)
7348                goto err_fid_mc_flood_set;
7349
7350        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7351                                     mlxsw_sp_router_port(mlxsw_sp), true);
7352        if (err)
7353                goto err_fid_bc_flood_set;
7354
7355        err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7356                                  mlxsw_sp_fid_index(rif->fid), true);
7357        if (err)
7358                goto err_rif_fdb_op;
7359
7360        mlxsw_sp_fid_rif_set(rif->fid, rif);
7361        return 0;
7362
7363err_rif_fdb_op:
7364        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7365                               mlxsw_sp_router_port(mlxsw_sp), false);
7366err_fid_bc_flood_set:
7367        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7368                               mlxsw_sp_router_port(mlxsw_sp), false);
7369err_fid_mc_flood_set:
7370        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7371        return err;
7372}
7373
7374static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif)
7375{
7376        u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7377        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7378        struct mlxsw_sp_fid *fid = rif->fid;
7379
7380        mlxsw_sp_fid_rif_set(fid, NULL);
7381        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7382                            mlxsw_sp_fid_index(fid), false);
7383        mlxsw_sp_rif_macvlan_flush(rif);
7384        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7385                               mlxsw_sp_router_port(mlxsw_sp), false);
7386        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7387                               mlxsw_sp_router_port(mlxsw_sp), false);
7388        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false);
7389}
7390
7391static struct mlxsw_sp_fid *
7392mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7393                          struct netlink_ext_ack *extack)
7394{
7395        struct net_device *br_dev = rif->dev;
7396        u16 vid;
7397        int err;
7398
7399        if (is_vlan_dev(rif->dev)) {
7400                vid = vlan_dev_vlan_id(rif->dev);
7401                br_dev = vlan_dev_real_dev(rif->dev);
7402                if (WARN_ON(!netif_is_bridge_master(br_dev)))
7403                        return ERR_PTR(-EINVAL);
7404        } else {
7405                err = br_vlan_get_pvid(rif->dev, &vid);
7406                if (err < 0 || !vid) {
7407                        NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7408                        return ERR_PTR(-EINVAL);
7409                }
7410        }
7411
7412        return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, br_dev, vid, extack);
7413}
7414
7415static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7416{
7417        u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7418        struct switchdev_notifier_fdb_info info;
7419        struct net_device *br_dev;
7420        struct net_device *dev;
7421
7422        br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7423        dev = br_fdb_find_port(br_dev, mac, vid);
7424        if (!dev)
7425                return;
7426
7427        info.addr = mac;
7428        info.vid = vid;
7429        call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7430                                 NULL);
7431}
7432
7433static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_ops = {
7434        .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7435        .rif_size               = sizeof(struct mlxsw_sp_rif),
7436        .configure              = mlxsw_sp_rif_vlan_configure,
7437        .deconfigure            = mlxsw_sp_rif_vlan_deconfigure,
7438        .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7439        .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7440};
7441
7442static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7443{
7444        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7445        u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7446        int err;
7447
7448        err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7449                                       true);
7450        if (err)
7451                return err;
7452
7453        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7454                                     mlxsw_sp_router_port(mlxsw_sp), true);
7455        if (err)
7456                goto err_fid_mc_flood_set;
7457
7458        err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7459                                     mlxsw_sp_router_port(mlxsw_sp), true);
7460        if (err)
7461                goto err_fid_bc_flood_set;
7462
7463        err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7464                                  mlxsw_sp_fid_index(rif->fid), true);
7465        if (err)
7466                goto err_rif_fdb_op;
7467
7468        mlxsw_sp_fid_rif_set(rif->fid, rif);
7469        return 0;
7470
7471err_rif_fdb_op:
7472        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7473                               mlxsw_sp_router_port(mlxsw_sp), false);
7474err_fid_bc_flood_set:
7475        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7476                               mlxsw_sp_router_port(mlxsw_sp), false);
7477err_fid_mc_flood_set:
7478        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7479        return err;
7480}
7481
7482static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7483{
7484        u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7485        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7486        struct mlxsw_sp_fid *fid = rif->fid;
7487
7488        mlxsw_sp_fid_rif_set(fid, NULL);
7489        mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7490                            mlxsw_sp_fid_index(fid), false);
7491        mlxsw_sp_rif_macvlan_flush(rif);
7492        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7493                               mlxsw_sp_router_port(mlxsw_sp), false);
7494        mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7495                               mlxsw_sp_router_port(mlxsw_sp), false);
7496        mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7497}
7498
7499static struct mlxsw_sp_fid *
7500mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7501                         struct netlink_ext_ack *extack)
7502{
7503        return mlxsw_sp_bridge_fid_get(rif->mlxsw_sp, rif->dev, 0, extack);
7504}
7505
7506static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7507{
7508        struct switchdev_notifier_fdb_info info;
7509        struct net_device *dev;
7510
7511        dev = br_fdb_find_port(rif->dev, mac, 0);
7512        if (!dev)
7513                return;
7514
7515        info.addr = mac;
7516        info.vid = 0;
7517        call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7518                                 NULL);
7519}
7520
7521static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7522        .type                   = MLXSW_SP_RIF_TYPE_FID,
7523        .rif_size               = sizeof(struct mlxsw_sp_rif),
7524        .configure              = mlxsw_sp_rif_fid_configure,
7525        .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7526        .fid_get                = mlxsw_sp_rif_fid_fid_get,
7527        .fdb_del                = mlxsw_sp_rif_fid_fdb_del,
7528};
7529
7530static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7531        .type                   = MLXSW_SP_RIF_TYPE_VLAN,
7532        .rif_size               = sizeof(struct mlxsw_sp_rif),
7533        .configure              = mlxsw_sp_rif_fid_configure,
7534        .deconfigure            = mlxsw_sp_rif_fid_deconfigure,
7535        .fid_get                = mlxsw_sp_rif_vlan_fid_get,
7536        .fdb_del                = mlxsw_sp_rif_vlan_fdb_del,
7537};
7538
7539static struct mlxsw_sp_rif_ipip_lb *
7540mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7541{
7542        return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7543}
7544
7545static void
7546mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7547                           const struct mlxsw_sp_rif_params *params)
7548{
7549        struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7550        struct mlxsw_sp_rif_ipip_lb *rif_lb;
7551
7552        params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7553                                 common);
7554        rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7555        rif_lb->lb_config = params_lb->lb_config;
7556}
7557
7558static int
7559mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7560{
7561        struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7562        u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7563        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7564        struct mlxsw_sp_vr *ul_vr;
7565        int err;
7566
7567        ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7568        if (IS_ERR(ul_vr))
7569                return PTR_ERR(ul_vr);
7570
7571        err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7572        if (err)
7573                goto err_loopback_op;
7574
7575        lb_rif->ul_vr_id = ul_vr->id;
7576        lb_rif->ul_rif_id = 0;
7577        ++ul_vr->rif_count;
7578        return 0;
7579
7580err_loopback_op:
7581        mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7582        return err;
7583}
7584
7585static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7586{
7587        struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7588        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7589        struct mlxsw_sp_vr *ul_vr;
7590
7591        ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7592        mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7593
7594        --ul_vr->rif_count;
7595        mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7596}
7597
7598static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7599        .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7600        .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7601        .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7602        .configure              = mlxsw_sp1_rif_ipip_lb_configure,
7603        .deconfigure            = mlxsw_sp1_rif_ipip_lb_deconfigure,
7604};
7605
7606const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7607        [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7608        [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7609        [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7610        [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp1_rif_ipip_lb_ops,
7611};
7612
7613static int
7614mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7615{
7616        struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7617        char ritr_pl[MLXSW_REG_RITR_LEN];
7618
7619        mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7620                            ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7621        mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7622                                             MLXSW_REG_RITR_LOOPBACK_GENERIC);
7623
7624        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7625}
7626
7627static struct mlxsw_sp_rif *
7628mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7629                       struct netlink_ext_ack *extack)
7630{
7631        struct mlxsw_sp_rif *ul_rif;
7632        u16 rif_index;
7633        int err;
7634
7635        err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7636        if (err) {
7637                NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7638                return ERR_PTR(err);
7639        }
7640
7641        ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7642        if (!ul_rif)
7643                return ERR_PTR(-ENOMEM);
7644
7645        mlxsw_sp->router->rifs[rif_index] = ul_rif;
7646        ul_rif->mlxsw_sp = mlxsw_sp;
7647        err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7648        if (err)
7649                goto ul_rif_op_err;
7650
7651        return ul_rif;
7652
7653ul_rif_op_err:
7654        mlxsw_sp->router->rifs[rif_index] = NULL;
7655        kfree(ul_rif);
7656        return ERR_PTR(err);
7657}
7658
7659static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7660{
7661        struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7662
7663        mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7664        mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7665        kfree(ul_rif);
7666}
7667
7668static struct mlxsw_sp_rif *
7669mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7670                    struct netlink_ext_ack *extack)
7671{
7672        struct mlxsw_sp_vr *vr;
7673        int err;
7674
7675        vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7676        if (IS_ERR(vr))
7677                return ERR_CAST(vr);
7678
7679        if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7680                return vr->ul_rif;
7681
7682        vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7683        if (IS_ERR(vr->ul_rif)) {
7684                err = PTR_ERR(vr->ul_rif);
7685                goto err_ul_rif_create;
7686        }
7687
7688        vr->rif_count++;
7689        refcount_set(&vr->ul_rif_refcnt, 1);
7690
7691        return vr->ul_rif;
7692
7693err_ul_rif_create:
7694        mlxsw_sp_vr_put(mlxsw_sp, vr);
7695        return ERR_PTR(err);
7696}
7697
7698static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7699{
7700        struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7701        struct mlxsw_sp_vr *vr;
7702
7703        vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7704
7705        if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7706                return;
7707
7708        vr->rif_count--;
7709        mlxsw_sp_ul_rif_destroy(ul_rif);
7710        mlxsw_sp_vr_put(mlxsw_sp, vr);
7711}
7712
7713int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7714                               u16 *ul_rif_index)
7715{
7716        struct mlxsw_sp_rif *ul_rif;
7717
7718        ASSERT_RTNL();
7719
7720        ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7721        if (IS_ERR(ul_rif))
7722                return PTR_ERR(ul_rif);
7723        *ul_rif_index = ul_rif->rif_index;
7724
7725        return 0;
7726}
7727
7728void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7729{
7730        struct mlxsw_sp_rif *ul_rif;
7731
7732        ASSERT_RTNL();
7733
7734        ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7735        if (WARN_ON(!ul_rif))
7736                return;
7737
7738        mlxsw_sp_ul_rif_put(ul_rif);
7739}
7740
7741static int
7742mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7743{
7744        struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7745        u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7746        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7747        struct mlxsw_sp_rif *ul_rif;
7748        int err;
7749
7750        ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7751        if (IS_ERR(ul_rif))
7752                return PTR_ERR(ul_rif);
7753
7754        err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7755        if (err)
7756                goto err_loopback_op;
7757
7758        lb_rif->ul_vr_id = 0;
7759        lb_rif->ul_rif_id = ul_rif->rif_index;
7760
7761        return 0;
7762
7763err_loopback_op:
7764        mlxsw_sp_ul_rif_put(ul_rif);
7765        return err;
7766}
7767
7768static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7769{
7770        struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7771        struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7772        struct mlxsw_sp_rif *ul_rif;
7773
7774        ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7775        mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7776        mlxsw_sp_ul_rif_put(ul_rif);
7777}
7778
7779static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7780        .type                   = MLXSW_SP_RIF_TYPE_IPIP_LB,
7781        .rif_size               = sizeof(struct mlxsw_sp_rif_ipip_lb),
7782        .setup                  = mlxsw_sp_rif_ipip_lb_setup,
7783        .configure              = mlxsw_sp2_rif_ipip_lb_configure,
7784        .deconfigure            = mlxsw_sp2_rif_ipip_lb_deconfigure,
7785};
7786
7787const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7788        [MLXSW_SP_RIF_TYPE_SUBPORT]     = &mlxsw_sp_rif_subport_ops,
7789        [MLXSW_SP_RIF_TYPE_VLAN]        = &mlxsw_sp_rif_vlan_emu_ops,
7790        [MLXSW_SP_RIF_TYPE_FID]         = &mlxsw_sp_rif_fid_ops,
7791        [MLXSW_SP_RIF_TYPE_IPIP_LB]     = &mlxsw_sp2_rif_ipip_lb_ops,
7792};
7793
7794static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7795{
7796        u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7797
7798        mlxsw_sp->router->rifs = kcalloc(max_rifs,
7799                                         sizeof(struct mlxsw_sp_rif *),
7800                                         GFP_KERNEL);
7801        if (!mlxsw_sp->router->rifs)
7802                return -ENOMEM;
7803
7804        return 0;
7805}
7806
7807static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7808{
7809        int i;
7810
7811        for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7812                WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7813
7814        kfree(mlxsw_sp->router->rifs);
7815}
7816
7817static int
7818mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7819{
7820        char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7821
7822        mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7823        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7824}
7825
7826static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7827{
7828        mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7829        INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7830        return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7831}
7832
7833static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7834{
7835        WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7836}
7837
7838static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7839{
7840        struct mlxsw_sp_router *router;
7841
7842        /* Flush pending FIB notifications and then flush the device's
7843         * table before requesting another dump. The FIB notification
7844         * block is unregistered, so no need to take RTNL.
7845         */
7846        mlxsw_core_flush_owq();
7847        router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7848        mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7849}
7850
7851#ifdef CONFIG_IP_ROUTE_MULTIPATH
7852static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7853{
7854        mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7855}
7856
7857static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7858{
7859        mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7860}
7861
7862static void mlxsw_sp_mp4_hash_init(char *recr2_pl)
7863{
7864        bool only_l3 = !init_net.ipv4.sysctl_fib_multipath_hash_policy;
7865
7866        mlxsw_sp_mp_hash_header_set(recr2_pl,
7867                                    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7868        mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7869        mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7870        mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7871        if (only_l3)
7872                return;
7873        mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7874        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7875        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7876        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7877}
7878
7879static void mlxsw_sp_mp6_hash_init(char *recr2_pl)
7880{
7881        bool only_l3 = !ip6_multipath_hash_policy(&init_net);
7882
7883        mlxsw_sp_mp_hash_header_set(recr2_pl,
7884                                    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7885        mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7886        mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7887        mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7888        mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7889        if (only_l3) {
7890                mlxsw_sp_mp_hash_field_set(recr2_pl,
7891                                           MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7892        } else {
7893                mlxsw_sp_mp_hash_header_set(recr2_pl,
7894                                            MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7895                mlxsw_sp_mp_hash_field_set(recr2_pl,
7896                                           MLXSW_REG_RECR2_TCP_UDP_SPORT);
7897                mlxsw_sp_mp_hash_field_set(recr2_pl,
7898                                           MLXSW_REG_RECR2_TCP_UDP_DPORT);
7899        }
7900}
7901
7902static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7903{
7904        char recr2_pl[MLXSW_REG_RECR2_LEN];
7905        u32 seed;
7906
7907        seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
7908        mlxsw_reg_recr2_pack(recr2_pl, seed);
7909        mlxsw_sp_mp4_hash_init(recr2_pl);
7910        mlxsw_sp_mp6_hash_init(recr2_pl);
7911
7912        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
7913}
7914#else
7915static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
7916{
7917        return 0;
7918}
7919#endif
7920
7921static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
7922{
7923        char rdpm_pl[MLXSW_REG_RDPM_LEN];
7924        unsigned int i;
7925
7926        MLXSW_REG_ZERO(rdpm, rdpm_pl);
7927
7928        /* HW is determining switch priority based on DSCP-bits, but the
7929         * kernel is still doing that based on the ToS. Since there's a
7930         * mismatch in bits we need to make sure to translate the right
7931         * value ToS would observe, skipping the 2 least-significant ECN bits.
7932         */
7933        for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
7934                mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
7935
7936        return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
7937}
7938
7939static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7940{
7941        bool usp = init_net.ipv4.sysctl_ip_fwd_update_priority;
7942        char rgcr_pl[MLXSW_REG_RGCR_LEN];
7943        u64 max_rifs;
7944        int err;
7945
7946        if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
7947                return -EIO;
7948        max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7949
7950        mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
7951        mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
7952        mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
7953        err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7954        if (err)
7955                return err;
7956        return 0;
7957}
7958
7959static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
7960{
7961        char rgcr_pl[MLXSW_REG_RGCR_LEN];
7962
7963        mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
7964        mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
7965}
7966
7967int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
7968{
7969        struct mlxsw_sp_router *router;
7970        int err;
7971
7972        router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
7973        if (!router)
7974                return -ENOMEM;
7975        mlxsw_sp->router = router;
7976        router->mlxsw_sp = mlxsw_sp;
7977
7978        router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
7979        err = register_inetaddr_notifier(&router->inetaddr_nb);
7980        if (err)
7981                goto err_register_inetaddr_notifier;
7982
7983        router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
7984        err = register_inet6addr_notifier(&router->inet6addr_nb);
7985        if (err)
7986                goto err_register_inet6addr_notifier;
7987
7988        INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
7989        err = __mlxsw_sp_router_init(mlxsw_sp);
7990        if (err)
7991                goto err_router_init;
7992
7993        err = mlxsw_sp_rifs_init(mlxsw_sp);
7994        if (err)
7995                goto err_rifs_init;
7996
7997        err = mlxsw_sp_ipips_init(mlxsw_sp);
7998        if (err)
7999                goto err_ipips_init;
8000
8001        err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
8002                              &mlxsw_sp_nexthop_ht_params);
8003        if (err)
8004                goto err_nexthop_ht_init;
8005
8006        err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
8007                              &mlxsw_sp_nexthop_group_ht_params);
8008        if (err)
8009                goto err_nexthop_group_ht_init;
8010
8011        INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
8012        err = mlxsw_sp_lpm_init(mlxsw_sp);
8013        if (err)
8014                goto err_lpm_init;
8015
8016        err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
8017        if (err)
8018                goto err_mr_init;
8019
8020        err = mlxsw_sp_vrs_init(mlxsw_sp);
8021        if (err)
8022                goto err_vrs_init;
8023
8024        err = mlxsw_sp_neigh_init(mlxsw_sp);
8025        if (err)
8026                goto err_neigh_init;
8027
8028        mlxsw_sp->router->netevent_nb.notifier_call =
8029                mlxsw_sp_router_netevent_event;
8030        err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8031        if (err)
8032                goto err_register_netevent_notifier;
8033
8034        err = mlxsw_sp_mp_hash_init(mlxsw_sp);
8035        if (err)
8036                goto err_mp_hash_init;
8037
8038        err = mlxsw_sp_dscp_init(mlxsw_sp);
8039        if (err)
8040                goto err_dscp_init;
8041
8042        mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
8043        err = register_fib_notifier(&mlxsw_sp->router->fib_nb,
8044                                    mlxsw_sp_router_fib_dump_flush);
8045        if (err)
8046                goto err_register_fib_notifier;
8047
8048        return 0;
8049
8050err_register_fib_notifier:
8051err_dscp_init:
8052err_mp_hash_init:
8053        unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8054err_register_netevent_notifier:
8055        mlxsw_sp_neigh_fini(mlxsw_sp);
8056err_neigh_init:
8057        mlxsw_sp_vrs_fini(mlxsw_sp);
8058err_vrs_init:
8059        mlxsw_sp_mr_fini(mlxsw_sp);
8060err_mr_init:
8061        mlxsw_sp_lpm_fini(mlxsw_sp);
8062err_lpm_init:
8063        rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8064err_nexthop_group_ht_init:
8065        rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8066err_nexthop_ht_init:
8067        mlxsw_sp_ipips_fini(mlxsw_sp);
8068err_ipips_init:
8069        mlxsw_sp_rifs_fini(mlxsw_sp);
8070err_rifs_init:
8071        __mlxsw_sp_router_fini(mlxsw_sp);
8072err_router_init:
8073        unregister_inet6addr_notifier(&router->inet6addr_nb);
8074err_register_inet6addr_notifier:
8075        unregister_inetaddr_notifier(&router->inetaddr_nb);
8076err_register_inetaddr_notifier:
8077        kfree(mlxsw_sp->router);
8078        return err;
8079}
8080
8081void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8082{
8083        unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
8084        unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8085        mlxsw_sp_neigh_fini(mlxsw_sp);
8086        mlxsw_sp_vrs_fini(mlxsw_sp);
8087        mlxsw_sp_mr_fini(mlxsw_sp);
8088        mlxsw_sp_lpm_fini(mlxsw_sp);
8089        rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8090        rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8091        mlxsw_sp_ipips_fini(mlxsw_sp);
8092        mlxsw_sp_rifs_fini(mlxsw_sp);
8093        __mlxsw_sp_router_fini(mlxsw_sp);
8094        unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8095        unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8096        kfree(mlxsw_sp->router);
8097}
8098