linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2021 Mellanox Technologies. */
   3
   4#include <net/fib_notifier.h>
   5#include <net/nexthop.h>
   6#include "tc_tun_encap.h"
   7#include "en_tc.h"
   8#include "tc_tun.h"
   9#include "rep/tc.h"
  10#include "diag/en_tc_tracepoint.h"
  11
  12enum {
  13        MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
  14};
  15
  16struct mlx5e_route_key {
  17        int ip_version;
  18        union {
  19                __be32 v4;
  20                struct in6_addr v6;
  21        } endpoint_ip;
  22};
  23
  24struct mlx5e_route_entry {
  25        struct mlx5e_route_key key;
  26        struct list_head encap_entries;
  27        struct list_head decap_flows;
  28        u32 flags;
  29        struct hlist_node hlist;
  30        refcount_t refcnt;
  31        int tunnel_dev_index;
  32        struct rcu_head rcu;
  33};
  34
  35struct mlx5e_tc_tun_encap {
  36        struct mlx5e_priv *priv;
  37        struct notifier_block fib_nb;
  38        spinlock_t route_lock; /* protects route_tbl */
  39        unsigned long route_tbl_last_update;
  40        DECLARE_HASHTABLE(route_tbl, 8);
  41};
  42
  43static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
  44{
  45        return r->flags & MLX5E_ROUTE_ENTRY_VALID;
  46}
  47
  48int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
  49                             struct mlx5_flow_spec *spec)
  50{
  51        struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
  52        struct mlx5_rx_tun_attr *tun_attr;
  53        void *daddr, *saddr;
  54        u8 ip_version;
  55
  56        tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
  57        if (!tun_attr)
  58                return -ENOMEM;
  59
  60        esw_attr->rx_tun_attr = tun_attr;
  61        ip_version = mlx5e_tc_get_ip_version(spec, true);
  62
  63        if (ip_version == 4) {
  64                daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  65                                     outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
  66                saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  67                                     outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
  68                tun_attr->dst_ip.v4 = *(__be32 *)daddr;
  69                tun_attr->src_ip.v4 = *(__be32 *)saddr;
  70                if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
  71                        return 0;
  72        }
  73#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
  74        else if (ip_version == 6) {
  75                int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
  76                struct in6_addr zerov6 = {};
  77
  78                daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  79                                     outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
  80                saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
  81                                     outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
  82                memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
  83                memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
  84                if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
  85                    !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
  86                        return 0;
  87        }
  88#endif
  89        /* Only set the flag if both src and dst ip addresses exist. They are
  90         * required to establish routing.
  91         */
  92        flow_flag_set(flow, TUN_RX);
  93        flow->attr->tun_ip_version = ip_version;
  94        return 0;
  95}
  96
  97static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
  98{
  99        bool all_flow_encaps_valid = true;
 100        int i;
 101
 102        /* Flow can be associated with multiple encap entries.
 103         * Before offloading the flow verify that all of them have
 104         * a valid neighbour.
 105         */
 106        for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
 107                if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
 108                        continue;
 109                if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
 110                        all_flow_encaps_valid = false;
 111                        break;
 112                }
 113        }
 114
 115        return all_flow_encaps_valid;
 116}
 117
 118void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 119                              struct mlx5e_encap_entry *e,
 120                              struct list_head *flow_list)
 121{
 122        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 123        struct mlx5_pkt_reformat_params reformat_params;
 124        struct mlx5_esw_flow_attr *esw_attr;
 125        struct mlx5_flow_handle *rule;
 126        struct mlx5_flow_attr *attr;
 127        struct mlx5_flow_spec *spec;
 128        struct mlx5e_tc_flow *flow;
 129        int err;
 130
 131        if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
 132                return;
 133
 134        memset(&reformat_params, 0, sizeof(reformat_params));
 135        reformat_params.type = e->reformat_type;
 136        reformat_params.size = e->encap_size;
 137        reformat_params.data = e->encap_header;
 138        e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 139                                                     &reformat_params,
 140                                                     MLX5_FLOW_NAMESPACE_FDB);
 141        if (IS_ERR(e->pkt_reformat)) {
 142                mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
 143                               PTR_ERR(e->pkt_reformat));
 144                return;
 145        }
 146        e->flags |= MLX5_ENCAP_ENTRY_VALID;
 147        mlx5e_rep_queue_neigh_stats_work(priv);
 148
 149        list_for_each_entry(flow, flow_list, tmp_list) {
 150                if (!mlx5e_is_offloaded_flow(flow) || !flow_flag_test(flow, SLOW))
 151                        continue;
 152                attr = flow->attr;
 153                esw_attr = attr->esw_attr;
 154                spec = &attr->parse_attr->spec;
 155
 156                esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
 157                esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
 158
 159                /* Do not offload flows with unresolved neighbors */
 160                if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
 161                        continue;
 162                /* update from slow path rule to encap rule */
 163                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
 164                if (IS_ERR(rule)) {
 165                        err = PTR_ERR(rule);
 166                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
 167                                       err);
 168                        continue;
 169                }
 170
 171                mlx5e_tc_unoffload_from_slow_path(esw, flow);
 172                flow->rule[0] = rule;
 173                /* was unset when slow path rule removed */
 174                flow_flag_set(flow, OFFLOADED);
 175        }
 176}
 177
 178void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 179                              struct mlx5e_encap_entry *e,
 180                              struct list_head *flow_list)
 181{
 182        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 183        struct mlx5_esw_flow_attr *esw_attr;
 184        struct mlx5_flow_handle *rule;
 185        struct mlx5_flow_attr *attr;
 186        struct mlx5_flow_spec *spec;
 187        struct mlx5e_tc_flow *flow;
 188        int err;
 189
 190        list_for_each_entry(flow, flow_list, tmp_list) {
 191                if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
 192                        continue;
 193                attr = flow->attr;
 194                esw_attr = attr->esw_attr;
 195                spec = &attr->parse_attr->spec;
 196
 197                /* update from encap rule to slow path rule */
 198                rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
 199                /* mark the flow's encap dest as non-valid */
 200                esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
 201
 202                if (IS_ERR(rule)) {
 203                        err = PTR_ERR(rule);
 204                        mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
 205                                       err);
 206                        continue;
 207                }
 208
 209                mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
 210                flow->rule[0] = rule;
 211                /* was unset when fast path rule removed */
 212                flow_flag_set(flow, OFFLOADED);
 213        }
 214
 215        /* we know that the encap is valid */
 216        e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
 217        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 218}
 219
 220static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
 221                                struct list_head *flow_list,
 222                                int index)
 223{
 224        if (IS_ERR(mlx5e_flow_get(flow)))
 225                return;
 226        wait_for_completion(&flow->init_done);
 227
 228        flow->tmp_entry_index = index;
 229        list_add(&flow->tmp_list, flow_list);
 230}
 231
 232/* Takes reference to all flows attached to encap and adds the flows to
 233 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
 234 */
 235void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
 236{
 237        struct encap_flow_item *efi;
 238        struct mlx5e_tc_flow *flow;
 239
 240        list_for_each_entry(efi, &e->flows, list) {
 241                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
 242                mlx5e_take_tmp_flow(flow, flow_list, efi->index);
 243        }
 244}
 245
 246/* Takes reference to all flows attached to route and adds the flows to
 247 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
 248 */
 249static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
 250                                             struct list_head *flow_list)
 251{
 252        struct mlx5e_tc_flow *flow;
 253
 254        list_for_each_entry(flow, &r->decap_flows, decap_routes)
 255                mlx5e_take_tmp_flow(flow, flow_list, 0);
 256}
 257
 258typedef bool (match_cb)(struct mlx5e_encap_entry *);
 259
 260static struct mlx5e_encap_entry *
 261mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
 262                              struct mlx5e_encap_entry *e,
 263                              match_cb match)
 264{
 265        struct mlx5e_encap_entry *next = NULL;
 266
 267retry:
 268        rcu_read_lock();
 269
 270        /* find encap with non-zero reference counter value */
 271        for (next = e ?
 272                     list_next_or_null_rcu(&nhe->encap_list,
 273                                           &e->encap_list,
 274                                           struct mlx5e_encap_entry,
 275                                           encap_list) :
 276                     list_first_or_null_rcu(&nhe->encap_list,
 277                                            struct mlx5e_encap_entry,
 278                                            encap_list);
 279             next;
 280             next = list_next_or_null_rcu(&nhe->encap_list,
 281                                          &next->encap_list,
 282                                          struct mlx5e_encap_entry,
 283                                          encap_list))
 284                if (mlx5e_encap_take(next))
 285                        break;
 286
 287        rcu_read_unlock();
 288
 289        /* release starting encap */
 290        if (e)
 291                mlx5e_encap_put(netdev_priv(e->out_dev), e);
 292        if (!next)
 293                return next;
 294
 295        /* wait for encap to be fully initialized */
 296        wait_for_completion(&next->res_ready);
 297        /* continue searching if encap entry is not in valid state after completion */
 298        if (!match(next)) {
 299                e = next;
 300                goto retry;
 301        }
 302
 303        return next;
 304}
 305
 306static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
 307{
 308        return e->flags & MLX5_ENCAP_ENTRY_VALID;
 309}
 310
 311static struct mlx5e_encap_entry *
 312mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
 313                           struct mlx5e_encap_entry *e)
 314{
 315        return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
 316}
 317
 318static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
 319{
 320        return e->compl_result >= 0;
 321}
 322
 323struct mlx5e_encap_entry *
 324mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
 325                          struct mlx5e_encap_entry *e)
 326{
 327        return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
 328}
 329
 330void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 331{
 332        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
 333        struct mlx5e_encap_entry *e = NULL;
 334        struct mlx5e_tc_flow *flow;
 335        struct mlx5_fc *counter;
 336        struct neigh_table *tbl;
 337        bool neigh_used = false;
 338        struct neighbour *n;
 339        u64 lastuse;
 340
 341        if (m_neigh->family == AF_INET)
 342                tbl = &arp_tbl;
 343#if IS_ENABLED(CONFIG_IPV6)
 344        else if (m_neigh->family == AF_INET6)
 345                tbl = ipv6_stub->nd_tbl;
 346#endif
 347        else
 348                return;
 349
 350        /* mlx5e_get_next_valid_encap() releases previous encap before returning
 351         * next one.
 352         */
 353        while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
 354                struct mlx5e_priv *priv = netdev_priv(e->out_dev);
 355                struct encap_flow_item *efi, *tmp;
 356                struct mlx5_eswitch *esw;
 357                LIST_HEAD(flow_list);
 358
 359                esw = priv->mdev->priv.eswitch;
 360                mutex_lock(&esw->offloads.encap_tbl_lock);
 361                list_for_each_entry_safe(efi, tmp, &e->flows, list) {
 362                        flow = container_of(efi, struct mlx5e_tc_flow,
 363                                            encaps[efi->index]);
 364                        if (IS_ERR(mlx5e_flow_get(flow)))
 365                                continue;
 366                        list_add(&flow->tmp_list, &flow_list);
 367
 368                        if (mlx5e_is_offloaded_flow(flow)) {
 369                                counter = mlx5e_tc_get_counter(flow);
 370                                lastuse = mlx5_fc_query_lastuse(counter);
 371                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 372                                        neigh_used = true;
 373                                        break;
 374                                }
 375                        }
 376                }
 377                mutex_unlock(&esw->offloads.encap_tbl_lock);
 378
 379                mlx5e_put_flow_list(priv, &flow_list);
 380                if (neigh_used) {
 381                        /* release current encap before breaking the loop */
 382                        mlx5e_encap_put(priv, e);
 383                        break;
 384                }
 385        }
 386
 387        trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
 388
 389        if (neigh_used) {
 390                nhe->reported_lastuse = jiffies;
 391
 392                /* find the relevant neigh according to the cached device and
 393                 * dst ip pair
 394                 */
 395                n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
 396                if (!n)
 397                        return;
 398
 399                neigh_event_send(n, NULL);
 400                neigh_release(n);
 401        }
 402}
 403
 404static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 405{
 406        WARN_ON(!list_empty(&e->flows));
 407
 408        if (e->compl_result > 0) {
 409                mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 410
 411                if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 412                        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 413        }
 414
 415        kfree(e->tun_info);
 416        kfree(e->encap_header);
 417        kfree_rcu(e, rcu);
 418}
 419
 420static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
 421                                struct mlx5e_decap_entry *d)
 422{
 423        WARN_ON(!list_empty(&d->flows));
 424
 425        if (!d->compl_result)
 426                mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
 427
 428        kfree_rcu(d, rcu);
 429}
 430
 431void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 432{
 433        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 434
 435        if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
 436                return;
 437        list_del(&e->route_list);
 438        hash_del_rcu(&e->encap_hlist);
 439        mutex_unlock(&esw->offloads.encap_tbl_lock);
 440
 441        mlx5e_encap_dealloc(priv, e);
 442}
 443
 444static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
 445{
 446        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 447
 448        if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
 449                return;
 450        hash_del_rcu(&d->hlist);
 451        mutex_unlock(&esw->offloads.decap_tbl_lock);
 452
 453        mlx5e_decap_dealloc(priv, d);
 454}
 455
 456static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
 457                                     struct mlx5e_tc_flow *flow,
 458                                     int out_index);
 459
 460void mlx5e_detach_encap(struct mlx5e_priv *priv,
 461                        struct mlx5e_tc_flow *flow, int out_index)
 462{
 463        struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
 464        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 465
 466        if (flow->attr->esw_attr->dests[out_index].flags &
 467            MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
 468                mlx5e_detach_encap_route(priv, flow, out_index);
 469
 470        /* flow wasn't fully initialized */
 471        if (!e)
 472                return;
 473
 474        mutex_lock(&esw->offloads.encap_tbl_lock);
 475        list_del(&flow->encaps[out_index].list);
 476        flow->encaps[out_index].e = NULL;
 477        if (!refcount_dec_and_test(&e->refcnt)) {
 478                mutex_unlock(&esw->offloads.encap_tbl_lock);
 479                return;
 480        }
 481        list_del(&e->route_list);
 482        hash_del_rcu(&e->encap_hlist);
 483        mutex_unlock(&esw->offloads.encap_tbl_lock);
 484
 485        mlx5e_encap_dealloc(priv, e);
 486}
 487
 488void mlx5e_detach_decap(struct mlx5e_priv *priv,
 489                        struct mlx5e_tc_flow *flow)
 490{
 491        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 492        struct mlx5e_decap_entry *d = flow->decap_reformat;
 493
 494        if (!d)
 495                return;
 496
 497        mutex_lock(&esw->offloads.decap_tbl_lock);
 498        list_del(&flow->l3_to_l2_reformat);
 499        flow->decap_reformat = NULL;
 500
 501        if (!refcount_dec_and_test(&d->refcnt)) {
 502                mutex_unlock(&esw->offloads.decap_tbl_lock);
 503                return;
 504        }
 505        hash_del_rcu(&d->hlist);
 506        mutex_unlock(&esw->offloads.decap_tbl_lock);
 507
 508        mlx5e_decap_dealloc(priv, d);
 509}
 510
 511bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
 512                                           struct mlx5e_encap_key *b)
 513{
 514        return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
 515                a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
 516}
 517
 518static int cmp_decap_info(struct mlx5e_decap_key *a,
 519                          struct mlx5e_decap_key *b)
 520{
 521        return memcmp(&a->key, &b->key, sizeof(b->key));
 522}
 523
 524static int hash_encap_info(struct mlx5e_encap_key *key)
 525{
 526        return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
 527                     key->tc_tunnel->tunnel_type);
 528}
 529
 530static int hash_decap_info(struct mlx5e_decap_key *key)
 531{
 532        return jhash(&key->key, sizeof(key->key), 0);
 533}
 534
 535bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
 536{
 537        return refcount_inc_not_zero(&e->refcnt);
 538}
 539
 540static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
 541{
 542        return refcount_inc_not_zero(&e->refcnt);
 543}
 544
 545static struct mlx5e_encap_entry *
 546mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
 547                uintptr_t hash_key)
 548{
 549        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 550        struct mlx5e_encap_key e_key;
 551        struct mlx5e_encap_entry *e;
 552
 553        hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
 554                                   encap_hlist, hash_key) {
 555                e_key.ip_tun_key = &e->tun_info->key;
 556                e_key.tc_tunnel = e->tunnel;
 557                if (e->tunnel->encap_info_equal(&e_key, key) &&
 558                    mlx5e_encap_take(e))
 559                        return e;
 560        }
 561
 562        return NULL;
 563}
 564
 565static struct mlx5e_decap_entry *
 566mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
 567                uintptr_t hash_key)
 568{
 569        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 570        struct mlx5e_decap_key r_key;
 571        struct mlx5e_decap_entry *e;
 572
 573        hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
 574                                   hlist, hash_key) {
 575                r_key = e->key;
 576                if (!cmp_decap_info(&r_key, key) &&
 577                    mlx5e_decap_take(e))
 578                        return e;
 579        }
 580        return NULL;
 581}
 582
 583struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
 584{
 585        size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
 586
 587        return kmemdup(tun_info, tun_size, GFP_KERNEL);
 588}
 589
 590static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
 591                                      struct mlx5e_tc_flow *flow,
 592                                      int out_index,
 593                                      struct mlx5e_encap_entry *e,
 594                                      struct netlink_ext_ack *extack)
 595{
 596        int i;
 597
 598        for (i = 0; i < out_index; i++) {
 599                if (flow->encaps[i].e != e)
 600                        continue;
 601                NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
 602                netdev_err(priv->netdev, "can't duplicate encap action\n");
 603                return true;
 604        }
 605
 606        return false;
 607}
 608
 609static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
 610                               struct mlx5_flow_attr *attr,
 611                               struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
 612                               struct net_device *out_dev,
 613                               int route_dev_ifindex,
 614                               int out_index)
 615{
 616        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
 617        struct net_device *route_dev;
 618        u16 vport_num;
 619        int err = 0;
 620        u32 data;
 621
 622        route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
 623
 624        if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
 625            !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
 626                goto out;
 627
 628        err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
 629        if (err)
 630                goto out;
 631
 632        attr->dest_chain = 0;
 633        attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 634        esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
 635        data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
 636                                                       vport_num);
 637        err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
 638                                                   MLX5_FLOW_NAMESPACE_FDB,
 639                                                   VPORT_TO_REG, data);
 640        if (err >= 0) {
 641                esw_attr->dests[out_index].src_port_rewrite_act_id = err;
 642                err = 0;
 643        }
 644
 645out:
 646        if (route_dev)
 647                dev_put(route_dev);
 648        return err;
 649}
 650
 651static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
 652                                  struct mlx5_esw_flow_attr *attr,
 653                                  struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
 654                                  struct net_device *out_dev,
 655                                  int route_dev_ifindex,
 656                                  int out_index)
 657{
 658        int act_id = attr->dests[out_index].src_port_rewrite_act_id;
 659        struct net_device *route_dev;
 660        u16 vport_num;
 661        int err = 0;
 662        u32 data;
 663
 664        route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
 665
 666        if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
 667            !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
 668                err = -ENODEV;
 669                goto out;
 670        }
 671
 672        err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
 673        if (err)
 674                goto out;
 675
 676        data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
 677                                                       vport_num);
 678        mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
 679
 680out:
 681        if (route_dev)
 682                dev_put(route_dev);
 683        return err;
 684}
 685
 686static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
 687{
 688        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 689        struct mlx5_rep_uplink_priv *uplink_priv;
 690        struct mlx5e_rep_priv *uplink_rpriv;
 691        struct mlx5e_tc_tun_encap *encap;
 692        unsigned int ret;
 693
 694        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 695        uplink_priv = &uplink_rpriv->uplink_priv;
 696        encap = uplink_priv->encap;
 697
 698        spin_lock_bh(&encap->route_lock);
 699        ret = encap->route_tbl_last_update;
 700        spin_unlock_bh(&encap->route_lock);
 701        return ret;
 702}
 703
 704static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
 705                                    struct mlx5e_tc_flow *flow,
 706                                    struct mlx5e_encap_entry *e,
 707                                    bool new_encap_entry,
 708                                    unsigned long tbl_time_before,
 709                                    int out_index);
 710
 711int mlx5e_attach_encap(struct mlx5e_priv *priv,
 712                       struct mlx5e_tc_flow *flow,
 713                       struct net_device *mirred_dev,
 714                       int out_index,
 715                       struct netlink_ext_ack *extack,
 716                       struct net_device **encap_dev,
 717                       bool *encap_valid)
 718{
 719        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 720        struct mlx5e_tc_flow_parse_attr *parse_attr;
 721        struct mlx5_flow_attr *attr = flow->attr;
 722        const struct ip_tunnel_info *tun_info;
 723        unsigned long tbl_time_before = 0;
 724        struct mlx5e_encap_entry *e;
 725        struct mlx5e_encap_key key;
 726        bool entry_created = false;
 727        unsigned short family;
 728        uintptr_t hash_key;
 729        int err = 0;
 730
 731        parse_attr = attr->parse_attr;
 732        tun_info = parse_attr->tun_info[out_index];
 733        family = ip_tunnel_info_af(tun_info);
 734        key.ip_tun_key = &tun_info->key;
 735        key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
 736        if (!key.tc_tunnel) {
 737                NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
 738                return -EOPNOTSUPP;
 739        }
 740
 741        hash_key = hash_encap_info(&key);
 742
 743        mutex_lock(&esw->offloads.encap_tbl_lock);
 744        e = mlx5e_encap_get(priv, &key, hash_key);
 745
 746        /* must verify if encap is valid or not */
 747        if (e) {
 748                /* Check that entry was not already attached to this flow */
 749                if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
 750                        err = -EOPNOTSUPP;
 751                        goto out_err;
 752                }
 753
 754                mutex_unlock(&esw->offloads.encap_tbl_lock);
 755                wait_for_completion(&e->res_ready);
 756
 757                /* Protect against concurrent neigh update. */
 758                mutex_lock(&esw->offloads.encap_tbl_lock);
 759                if (e->compl_result < 0) {
 760                        err = -EREMOTEIO;
 761                        goto out_err;
 762                }
 763                goto attach_flow;
 764        }
 765
 766        e = kzalloc(sizeof(*e), GFP_KERNEL);
 767        if (!e) {
 768                err = -ENOMEM;
 769                goto out_err;
 770        }
 771
 772        refcount_set(&e->refcnt, 1);
 773        init_completion(&e->res_ready);
 774        entry_created = true;
 775        INIT_LIST_HEAD(&e->route_list);
 776
 777        tun_info = mlx5e_dup_tun_info(tun_info);
 778        if (!tun_info) {
 779                err = -ENOMEM;
 780                goto out_err_init;
 781        }
 782        e->tun_info = tun_info;
 783        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
 784        if (err)
 785                goto out_err_init;
 786
 787        INIT_LIST_HEAD(&e->flows);
 788        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
 789        tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
 790        mutex_unlock(&esw->offloads.encap_tbl_lock);
 791
 792        if (family == AF_INET)
 793                err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
 794        else if (family == AF_INET6)
 795                err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
 796
 797        /* Protect against concurrent neigh update. */
 798        mutex_lock(&esw->offloads.encap_tbl_lock);
 799        complete_all(&e->res_ready);
 800        if (err) {
 801                e->compl_result = err;
 802                goto out_err;
 803        }
 804        e->compl_result = 1;
 805
 806attach_flow:
 807        err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
 808                                       out_index);
 809        if (err)
 810                goto out_err;
 811
 812        flow->encaps[out_index].e = e;
 813        list_add(&flow->encaps[out_index].list, &e->flows);
 814        flow->encaps[out_index].index = out_index;
 815        *encap_dev = e->out_dev;
 816        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
 817                attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
 818                attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
 819                *encap_valid = true;
 820        } else {
 821                *encap_valid = false;
 822        }
 823        mutex_unlock(&esw->offloads.encap_tbl_lock);
 824
 825        return err;
 826
 827out_err:
 828        mutex_unlock(&esw->offloads.encap_tbl_lock);
 829        if (e)
 830                mlx5e_encap_put(priv, e);
 831        return err;
 832
 833out_err_init:
 834        mutex_unlock(&esw->offloads.encap_tbl_lock);
 835        kfree(tun_info);
 836        kfree(e);
 837        return err;
 838}
 839
 840int mlx5e_attach_decap(struct mlx5e_priv *priv,
 841                       struct mlx5e_tc_flow *flow,
 842                       struct netlink_ext_ack *extack)
 843{
 844        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 845        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 846        struct mlx5_pkt_reformat_params reformat_params;
 847        struct mlx5e_tc_flow_parse_attr *parse_attr;
 848        struct mlx5e_decap_entry *d;
 849        struct mlx5e_decap_key key;
 850        uintptr_t hash_key;
 851        int err = 0;
 852
 853        parse_attr = flow->attr->parse_attr;
 854        if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
 855                NL_SET_ERR_MSG_MOD(extack,
 856                                   "encap header larger than max supported");
 857                return -EOPNOTSUPP;
 858        }
 859
 860        key.key = parse_attr->eth;
 861        hash_key = hash_decap_info(&key);
 862        mutex_lock(&esw->offloads.decap_tbl_lock);
 863        d = mlx5e_decap_get(priv, &key, hash_key);
 864        if (d) {
 865                mutex_unlock(&esw->offloads.decap_tbl_lock);
 866                wait_for_completion(&d->res_ready);
 867                mutex_lock(&esw->offloads.decap_tbl_lock);
 868                if (d->compl_result) {
 869                        err = -EREMOTEIO;
 870                        goto out_free;
 871                }
 872                goto found;
 873        }
 874
 875        d = kzalloc(sizeof(*d), GFP_KERNEL);
 876        if (!d) {
 877                err = -ENOMEM;
 878                goto out_err;
 879        }
 880
 881        d->key = key;
 882        refcount_set(&d->refcnt, 1);
 883        init_completion(&d->res_ready);
 884        INIT_LIST_HEAD(&d->flows);
 885        hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
 886        mutex_unlock(&esw->offloads.decap_tbl_lock);
 887
 888        memset(&reformat_params, 0, sizeof(reformat_params));
 889        reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
 890        reformat_params.size = sizeof(parse_attr->eth);
 891        reformat_params.data = &parse_attr->eth;
 892        d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 893                                                     &reformat_params,
 894                                                     MLX5_FLOW_NAMESPACE_FDB);
 895        if (IS_ERR(d->pkt_reformat)) {
 896                err = PTR_ERR(d->pkt_reformat);
 897                d->compl_result = err;
 898        }
 899        mutex_lock(&esw->offloads.decap_tbl_lock);
 900        complete_all(&d->res_ready);
 901        if (err)
 902                goto out_free;
 903
 904found:
 905        flow->decap_reformat = d;
 906        attr->decap_pkt_reformat = d->pkt_reformat;
 907        list_add(&flow->l3_to_l2_reformat, &d->flows);
 908        mutex_unlock(&esw->offloads.decap_tbl_lock);
 909        return 0;
 910
 911out_free:
 912        mutex_unlock(&esw->offloads.decap_tbl_lock);
 913        mlx5e_decap_put(priv, d);
 914        return err;
 915
 916out_err:
 917        mutex_unlock(&esw->offloads.decap_tbl_lock);
 918        return err;
 919}
 920
 921static int cmp_route_info(struct mlx5e_route_key *a,
 922                          struct mlx5e_route_key *b)
 923{
 924        if (a->ip_version == 4 && b->ip_version == 4)
 925                return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
 926                              sizeof(a->endpoint_ip.v4));
 927        else if (a->ip_version == 6 && b->ip_version == 6)
 928                return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
 929                              sizeof(a->endpoint_ip.v6));
 930        return 1;
 931}
 932
 933static u32 hash_route_info(struct mlx5e_route_key *key)
 934{
 935        if (key->ip_version == 4)
 936                return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
 937        return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
 938}
 939
 940static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
 941                                struct mlx5e_route_entry *r)
 942{
 943        WARN_ON(!list_empty(&r->decap_flows));
 944        WARN_ON(!list_empty(&r->encap_entries));
 945
 946        kfree_rcu(r, rcu);
 947}
 948
 949static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
 950{
 951        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 952
 953        if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
 954                return;
 955
 956        hash_del_rcu(&r->hlist);
 957        mutex_unlock(&esw->offloads.encap_tbl_lock);
 958
 959        mlx5e_route_dealloc(priv, r);
 960}
 961
 962static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
 963{
 964        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 965
 966        lockdep_assert_held(&esw->offloads.encap_tbl_lock);
 967
 968        if (!refcount_dec_and_test(&r->refcnt))
 969                return;
 970        hash_del_rcu(&r->hlist);
 971        mlx5e_route_dealloc(priv, r);
 972}
 973
 974static struct mlx5e_route_entry *
 975mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
 976                u32 hash_key)
 977{
 978        struct mlx5e_route_key r_key;
 979        struct mlx5e_route_entry *r;
 980
 981        hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
 982                r_key = r->key;
 983                if (!cmp_route_info(&r_key, key) &&
 984                    refcount_inc_not_zero(&r->refcnt))
 985                        return r;
 986        }
 987        return NULL;
 988}
 989
 990static struct mlx5e_route_entry *
 991mlx5e_route_get_create(struct mlx5e_priv *priv,
 992                       struct mlx5e_route_key *key,
 993                       int tunnel_dev_index,
 994                       unsigned long *route_tbl_change_time)
 995{
 996        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 997        struct mlx5_rep_uplink_priv *uplink_priv;
 998        struct mlx5e_rep_priv *uplink_rpriv;
 999        struct mlx5e_tc_tun_encap *encap;
1000        struct mlx5e_route_entry *r;
1001        u32 hash_key;
1002
1003        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1004        uplink_priv = &uplink_rpriv->uplink_priv;
1005        encap = uplink_priv->encap;
1006
1007        hash_key = hash_route_info(key);
1008        spin_lock_bh(&encap->route_lock);
1009        r = mlx5e_route_get(encap, key, hash_key);
1010        spin_unlock_bh(&encap->route_lock);
1011        if (r) {
1012                if (!mlx5e_route_entry_valid(r)) {
1013                        mlx5e_route_put_locked(priv, r);
1014                        return ERR_PTR(-EINVAL);
1015                }
1016                return r;
1017        }
1018
1019        r = kzalloc(sizeof(*r), GFP_KERNEL);
1020        if (!r)
1021                return ERR_PTR(-ENOMEM);
1022
1023        r->key = *key;
1024        r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1025        r->tunnel_dev_index = tunnel_dev_index;
1026        refcount_set(&r->refcnt, 1);
1027        INIT_LIST_HEAD(&r->decap_flows);
1028        INIT_LIST_HEAD(&r->encap_entries);
1029
1030        spin_lock_bh(&encap->route_lock);
1031        *route_tbl_change_time = encap->route_tbl_last_update;
1032        hash_add(encap->route_tbl, &r->hlist, hash_key);
1033        spin_unlock_bh(&encap->route_lock);
1034
1035        return r;
1036}
1037
1038static struct mlx5e_route_entry *
1039mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1040{
1041        u32 hash_key = hash_route_info(key);
1042        struct mlx5e_route_entry *r;
1043
1044        spin_lock_bh(&encap->route_lock);
1045        encap->route_tbl_last_update = jiffies;
1046        r = mlx5e_route_get(encap, key, hash_key);
1047        spin_unlock_bh(&encap->route_lock);
1048
1049        return r;
1050}
1051
1052struct mlx5e_tc_fib_event_data {
1053        struct work_struct work;
1054        unsigned long event;
1055        struct mlx5e_route_entry *r;
1056        struct net_device *ul_dev;
1057};
1058
1059static void mlx5e_tc_fib_event_work(struct work_struct *work);
1060static struct mlx5e_tc_fib_event_data *
1061mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1062{
1063        struct mlx5e_tc_fib_event_data *fib_work;
1064
1065        fib_work = kzalloc(sizeof(*fib_work), flags);
1066        if (WARN_ON(!fib_work))
1067                return NULL;
1068
1069        INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1070        fib_work->event = event;
1071        fib_work->ul_dev = ul_dev;
1072
1073        return fib_work;
1074}
1075
1076static int
1077mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1078                           struct mlx5e_route_entry *r,
1079                           unsigned long event)
1080{
1081        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1082        struct mlx5e_tc_fib_event_data *fib_work;
1083        struct mlx5e_rep_priv *uplink_rpriv;
1084        struct net_device *ul_dev;
1085
1086        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1087        ul_dev = uplink_rpriv->netdev;
1088
1089        fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1090        if (!fib_work)
1091                return -ENOMEM;
1092
1093        dev_hold(ul_dev);
1094        refcount_inc(&r->refcnt);
1095        fib_work->r = r;
1096        queue_work(priv->wq, &fib_work->work);
1097
1098        return 0;
1099}
1100
1101int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1102                             struct mlx5e_tc_flow *flow)
1103{
1104        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1105        unsigned long tbl_time_before, tbl_time_after;
1106        struct mlx5e_tc_flow_parse_attr *parse_attr;
1107        struct mlx5_flow_attr *attr = flow->attr;
1108        struct mlx5_esw_flow_attr *esw_attr;
1109        struct mlx5e_route_entry *r;
1110        struct mlx5e_route_key key;
1111        int err = 0;
1112
1113        esw_attr = attr->esw_attr;
1114        parse_attr = attr->parse_attr;
1115        mutex_lock(&esw->offloads.encap_tbl_lock);
1116        if (!esw_attr->rx_tun_attr)
1117                goto out;
1118
1119        tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1120        tbl_time_after = tbl_time_before;
1121        err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1122        if (err || !esw_attr->rx_tun_attr->decap_vport)
1123                goto out;
1124
1125        key.ip_version = attr->tun_ip_version;
1126        if (key.ip_version == 4)
1127                key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1128        else
1129                key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1130
1131        r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1132                                   &tbl_time_after);
1133        if (IS_ERR(r)) {
1134                err = PTR_ERR(r);
1135                goto out;
1136        }
1137        /* Routing changed concurrently. FIB event handler might have missed new
1138         * entry, schedule update.
1139         */
1140        if (tbl_time_before != tbl_time_after) {
1141                err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1142                if (err) {
1143                        mlx5e_route_put_locked(priv, r);
1144                        goto out;
1145                }
1146        }
1147
1148        flow->decap_route = r;
1149        list_add(&flow->decap_routes, &r->decap_flows);
1150        mutex_unlock(&esw->offloads.encap_tbl_lock);
1151        return 0;
1152
1153out:
1154        mutex_unlock(&esw->offloads.encap_tbl_lock);
1155        return err;
1156}
1157
1158static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1159                                    struct mlx5e_tc_flow *flow,
1160                                    struct mlx5e_encap_entry *e,
1161                                    bool new_encap_entry,
1162                                    unsigned long tbl_time_before,
1163                                    int out_index)
1164{
1165        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1166        unsigned long tbl_time_after = tbl_time_before;
1167        struct mlx5e_tc_flow_parse_attr *parse_attr;
1168        struct mlx5_flow_attr *attr = flow->attr;
1169        const struct ip_tunnel_info *tun_info;
1170        struct mlx5_esw_flow_attr *esw_attr;
1171        struct mlx5e_route_entry *r;
1172        struct mlx5e_route_key key;
1173        unsigned short family;
1174        int err = 0;
1175
1176        esw_attr = attr->esw_attr;
1177        parse_attr = attr->parse_attr;
1178        tun_info = parse_attr->tun_info[out_index];
1179        family = ip_tunnel_info_af(tun_info);
1180
1181        if (family == AF_INET) {
1182                key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1183                key.ip_version = 4;
1184        } else if (family == AF_INET6) {
1185                key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1186                key.ip_version = 6;
1187        }
1188
1189        err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1190                                  e->route_dev_ifindex, out_index);
1191        if (err || !(esw_attr->dests[out_index].flags &
1192                     MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1193                return err;
1194
1195        r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1196                                   &tbl_time_after);
1197        if (IS_ERR(r))
1198                return PTR_ERR(r);
1199        /* Routing changed concurrently. FIB event handler might have missed new
1200         * entry, schedule update.
1201         */
1202        if (tbl_time_before != tbl_time_after) {
1203                err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1204                if (err) {
1205                        mlx5e_route_put_locked(priv, r);
1206                        return err;
1207                }
1208        }
1209
1210        flow->encap_routes[out_index].r = r;
1211        if (new_encap_entry)
1212                list_add(&e->route_list, &r->encap_entries);
1213        flow->encap_routes[out_index].index = out_index;
1214        return 0;
1215}
1216
1217void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1218                              struct mlx5e_tc_flow *flow)
1219{
1220        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1221        struct mlx5e_route_entry *r = flow->decap_route;
1222
1223        if (!r)
1224                return;
1225
1226        mutex_lock(&esw->offloads.encap_tbl_lock);
1227        list_del(&flow->decap_routes);
1228        flow->decap_route = NULL;
1229
1230        if (!refcount_dec_and_test(&r->refcnt)) {
1231                mutex_unlock(&esw->offloads.encap_tbl_lock);
1232                return;
1233        }
1234        hash_del_rcu(&r->hlist);
1235        mutex_unlock(&esw->offloads.encap_tbl_lock);
1236
1237        mlx5e_route_dealloc(priv, r);
1238}
1239
1240static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1241                                     struct mlx5e_tc_flow *flow,
1242                                     int out_index)
1243{
1244        struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1245        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1246        struct mlx5e_encap_entry *e, *tmp;
1247
1248        if (!r)
1249                return;
1250
1251        mutex_lock(&esw->offloads.encap_tbl_lock);
1252        flow->encap_routes[out_index].r = NULL;
1253
1254        if (!refcount_dec_and_test(&r->refcnt)) {
1255                mutex_unlock(&esw->offloads.encap_tbl_lock);
1256                return;
1257        }
1258        list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1259                list_del_init(&e->route_list);
1260        hash_del_rcu(&r->hlist);
1261        mutex_unlock(&esw->offloads.encap_tbl_lock);
1262
1263        mlx5e_route_dealloc(priv, r);
1264}
1265
1266static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1267                                   struct mlx5e_encap_entry *e,
1268                                   struct list_head *encap_flows)
1269{
1270        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1271        struct mlx5e_tc_flow *flow;
1272
1273        list_for_each_entry(flow, encap_flows, tmp_list) {
1274                struct mlx5_flow_attr *attr = flow->attr;
1275                struct mlx5_esw_flow_attr *esw_attr;
1276
1277                if (!mlx5e_is_offloaded_flow(flow))
1278                        continue;
1279                esw_attr = attr->esw_attr;
1280
1281                if (flow_flag_test(flow, SLOW))
1282                        mlx5e_tc_unoffload_from_slow_path(esw, flow);
1283                else
1284                        mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1285                mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1286                attr->modify_hdr = NULL;
1287
1288                esw_attr->dests[flow->tmp_entry_index].flags &=
1289                        ~MLX5_ESW_DEST_ENCAP_VALID;
1290                esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1291        }
1292
1293        e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1294        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1295                e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1296                mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1297                e->pkt_reformat = NULL;
1298        }
1299}
1300
1301static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1302                                  struct net_device *tunnel_dev,
1303                                  struct mlx5e_encap_entry *e,
1304                                  struct list_head *encap_flows)
1305{
1306        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1307        struct mlx5e_tc_flow *flow;
1308        int err;
1309
1310        err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1311                mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1312                mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1313        if (err)
1314                mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1315        e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1316
1317        list_for_each_entry(flow, encap_flows, tmp_list) {
1318                struct mlx5e_tc_flow_parse_attr *parse_attr;
1319                struct mlx5_flow_attr *attr = flow->attr;
1320                struct mlx5_esw_flow_attr *esw_attr;
1321                struct mlx5_flow_handle *rule;
1322                struct mlx5_flow_spec *spec;
1323
1324                if (flow_flag_test(flow, FAILED))
1325                        continue;
1326
1327                esw_attr = attr->esw_attr;
1328                parse_attr = attr->parse_attr;
1329                spec = &parse_attr->spec;
1330
1331                err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1332                                             e->out_dev, e->route_dev_ifindex,
1333                                             flow->tmp_entry_index);
1334                if (err) {
1335                        mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1336                        continue;
1337                }
1338
1339                err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1340                if (err) {
1341                        mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1342                                       err);
1343                        continue;
1344                }
1345
1346                if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1347                        esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1348                        esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1349                        if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1350                                goto offload_to_slow_path;
1351                        /* update from slow path rule to encap rule */
1352                        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1353                        if (IS_ERR(rule)) {
1354                                err = PTR_ERR(rule);
1355                                mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1356                                               err);
1357                        } else {
1358                                flow->rule[0] = rule;
1359                        }
1360                } else {
1361offload_to_slow_path:
1362                        rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1363                        /* mark the flow's encap dest as non-valid */
1364                        esw_attr->dests[flow->tmp_entry_index].flags &=
1365                                ~MLX5_ESW_DEST_ENCAP_VALID;
1366
1367                        if (IS_ERR(rule)) {
1368                                err = PTR_ERR(rule);
1369                                mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1370                                               err);
1371                        } else {
1372                                flow->rule[0] = rule;
1373                        }
1374                }
1375                flow_flag_set(flow, OFFLOADED);
1376        }
1377}
1378
1379static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1380                                     struct mlx5e_route_entry *r,
1381                                     struct list_head *flow_list,
1382                                     bool replace)
1383{
1384        struct net_device *tunnel_dev;
1385        struct mlx5e_encap_entry *e;
1386
1387        tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1388        if (!tunnel_dev)
1389                return -ENODEV;
1390
1391        list_for_each_entry(e, &r->encap_entries, route_list) {
1392                LIST_HEAD(encap_flows);
1393
1394                mlx5e_take_all_encap_flows(e, &encap_flows);
1395                if (list_empty(&encap_flows))
1396                        continue;
1397
1398                if (mlx5e_route_entry_valid(r))
1399                        mlx5e_invalidate_encap(priv, e, &encap_flows);
1400
1401                if (!replace) {
1402                        list_splice(&encap_flows, flow_list);
1403                        continue;
1404                }
1405
1406                mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1407                list_splice(&encap_flows, flow_list);
1408        }
1409
1410        return 0;
1411}
1412
1413static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1414                                      struct list_head *flow_list)
1415{
1416        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1417        struct mlx5e_tc_flow *flow;
1418
1419        list_for_each_entry(flow, flow_list, tmp_list)
1420                if (mlx5e_is_offloaded_flow(flow))
1421                        mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1422}
1423
1424static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1425                                  struct list_head *decap_flows)
1426{
1427        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1428        struct mlx5e_tc_flow *flow;
1429
1430        list_for_each_entry(flow, decap_flows, tmp_list) {
1431                struct mlx5e_tc_flow_parse_attr *parse_attr;
1432                struct mlx5_flow_attr *attr = flow->attr;
1433                struct mlx5_flow_handle *rule;
1434                struct mlx5_flow_spec *spec;
1435                int err;
1436
1437                if (flow_flag_test(flow, FAILED))
1438                        continue;
1439
1440                parse_attr = attr->parse_attr;
1441                spec = &parse_attr->spec;
1442                err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1443                if (err) {
1444                        mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1445                                       err);
1446                        continue;
1447                }
1448
1449                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1450                if (IS_ERR(rule)) {
1451                        err = PTR_ERR(rule);
1452                        mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1453                                       err);
1454                } else {
1455                        flow->rule[0] = rule;
1456                        flow_flag_set(flow, OFFLOADED);
1457                }
1458        }
1459}
1460
1461static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1462                                          struct mlx5e_route_entry *r,
1463                                          struct list_head *flow_list,
1464                                          bool replace)
1465{
1466        struct net_device *tunnel_dev;
1467        LIST_HEAD(decap_flows);
1468
1469        tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1470        if (!tunnel_dev)
1471                return -ENODEV;
1472
1473        mlx5e_take_all_route_decap_flows(r, &decap_flows);
1474        if (mlx5e_route_entry_valid(r))
1475                mlx5e_unoffload_flow_list(priv, &decap_flows);
1476        if (replace)
1477                mlx5e_reoffload_decap(priv, &decap_flows);
1478
1479        list_splice(&decap_flows, flow_list);
1480
1481        return 0;
1482}
1483
1484static void mlx5e_tc_fib_event_work(struct work_struct *work)
1485{
1486        struct mlx5e_tc_fib_event_data *event_data =
1487                container_of(work, struct mlx5e_tc_fib_event_data, work);
1488        struct net_device *ul_dev = event_data->ul_dev;
1489        struct mlx5e_priv *priv = netdev_priv(ul_dev);
1490        struct mlx5e_route_entry *r = event_data->r;
1491        struct mlx5_eswitch *esw;
1492        LIST_HEAD(flow_list);
1493        bool replace;
1494        int err;
1495
1496        /* sync with concurrent neigh updates */
1497        rtnl_lock();
1498        esw = priv->mdev->priv.eswitch;
1499        mutex_lock(&esw->offloads.encap_tbl_lock);
1500        replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1501
1502        if (!mlx5e_route_entry_valid(r) && !replace)
1503                goto out;
1504
1505        err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1506        if (err)
1507                mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1508                               err);
1509
1510        err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1511        if (err)
1512                mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1513                               err);
1514
1515        if (replace)
1516                r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1517out:
1518        mutex_unlock(&esw->offloads.encap_tbl_lock);
1519        rtnl_unlock();
1520
1521        mlx5e_put_flow_list(priv, &flow_list);
1522        mlx5e_route_put(priv, event_data->r);
1523        dev_put(event_data->ul_dev);
1524        kfree(event_data);
1525}
1526
1527static struct mlx5e_tc_fib_event_data *
1528mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1529                         struct net_device *ul_dev,
1530                         struct mlx5e_tc_tun_encap *encap,
1531                         unsigned long event,
1532                         struct fib_notifier_info *info)
1533{
1534        struct fib_entry_notifier_info *fen_info;
1535        struct mlx5e_tc_fib_event_data *fib_work;
1536        struct mlx5e_route_entry *r;
1537        struct mlx5e_route_key key;
1538        struct net_device *fib_dev;
1539
1540        fen_info = container_of(info, struct fib_entry_notifier_info, info);
1541        fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1542        if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1543            fen_info->dst_len != 32)
1544                return NULL;
1545
1546        fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1547        if (!fib_work)
1548                return ERR_PTR(-ENOMEM);
1549
1550        key.endpoint_ip.v4 = htonl(fen_info->dst);
1551        key.ip_version = 4;
1552
1553        /* Can't fail after this point because releasing reference to r
1554         * requires obtaining sleeping mutex which we can't do in atomic
1555         * context.
1556         */
1557        r = mlx5e_route_lookup_for_update(encap, &key);
1558        if (!r)
1559                goto out;
1560        fib_work->r = r;
1561        dev_hold(ul_dev);
1562
1563        return fib_work;
1564
1565out:
1566        kfree(fib_work);
1567        return NULL;
1568}
1569
1570static struct mlx5e_tc_fib_event_data *
1571mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1572                         struct net_device *ul_dev,
1573                         struct mlx5e_tc_tun_encap *encap,
1574                         unsigned long event,
1575                         struct fib_notifier_info *info)
1576{
1577        struct fib6_entry_notifier_info *fen_info;
1578        struct mlx5e_tc_fib_event_data *fib_work;
1579        struct mlx5e_route_entry *r;
1580        struct mlx5e_route_key key;
1581        struct net_device *fib_dev;
1582
1583        fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1584        fib_dev = fib6_info_nh_dev(fen_info->rt);
1585        if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1586            fen_info->rt->fib6_dst.plen != 128)
1587                return NULL;
1588
1589        fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1590        if (!fib_work)
1591                return ERR_PTR(-ENOMEM);
1592
1593        memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1594               sizeof(fen_info->rt->fib6_dst.addr));
1595        key.ip_version = 6;
1596
1597        /* Can't fail after this point because releasing reference to r
1598         * requires obtaining sleeping mutex which we can't do in atomic
1599         * context.
1600         */
1601        r = mlx5e_route_lookup_for_update(encap, &key);
1602        if (!r)
1603                goto out;
1604        fib_work->r = r;
1605        dev_hold(ul_dev);
1606
1607        return fib_work;
1608
1609out:
1610        kfree(fib_work);
1611        return NULL;
1612}
1613
1614static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1615{
1616        struct mlx5e_tc_fib_event_data *fib_work;
1617        struct fib_notifier_info *info = ptr;
1618        struct mlx5e_tc_tun_encap *encap;
1619        struct net_device *ul_dev;
1620        struct mlx5e_priv *priv;
1621
1622        encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1623        priv = encap->priv;
1624        ul_dev = priv->netdev;
1625        priv = netdev_priv(ul_dev);
1626
1627        switch (event) {
1628        case FIB_EVENT_ENTRY_REPLACE:
1629        case FIB_EVENT_ENTRY_DEL:
1630                if (info->family == AF_INET)
1631                        fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1632                else if (info->family == AF_INET6)
1633                        fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1634                else
1635                        return NOTIFY_DONE;
1636
1637                if (!IS_ERR_OR_NULL(fib_work)) {
1638                        queue_work(priv->wq, &fib_work->work);
1639                } else if (IS_ERR(fib_work)) {
1640                        NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1641                        mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1642                                       PTR_ERR(fib_work));
1643                }
1644
1645                break;
1646        default:
1647                return NOTIFY_DONE;
1648        }
1649
1650        return NOTIFY_DONE;
1651}
1652
1653struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1654{
1655        struct mlx5e_tc_tun_encap *encap;
1656        int err;
1657
1658        encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1659        if (!encap)
1660                return ERR_PTR(-ENOMEM);
1661
1662        encap->priv = priv;
1663        encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1664        spin_lock_init(&encap->route_lock);
1665        hash_init(encap->route_tbl);
1666        err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1667                                    NULL, NULL);
1668        if (err) {
1669                kvfree(encap);
1670                return ERR_PTR(err);
1671        }
1672
1673        return encap;
1674}
1675
1676void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1677{
1678        if (!encap)
1679                return;
1680
1681        unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1682        flush_workqueue(encap->priv->wq); /* flush fib event works */
1683        kvfree(encap);
1684}
1685