linux/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <net/flow_dissector.h>
  34#include <net/sch_generic.h>
  35#include <net/pkt_cls.h>
  36#include <net/tc_act/tc_gact.h>
  37#include <net/tc_act/tc_skbedit.h>
  38#include <linux/mlx5/fs.h>
  39#include <linux/mlx5/device.h>
  40#include <linux/rhashtable.h>
  41#include <linux/refcount.h>
  42#include <linux/completion.h>
  43#include <net/tc_act/tc_mirred.h>
  44#include <net/tc_act/tc_vlan.h>
  45#include <net/tc_act/tc_tunnel_key.h>
  46#include <net/tc_act/tc_pedit.h>
  47#include <net/tc_act/tc_csum.h>
  48#include <net/arp.h>
  49#include <net/ipv6_stubs.h>
  50#include "en.h"
  51#include "en_rep.h"
  52#include "en_tc.h"
  53#include "eswitch.h"
  54#include "eswitch_offloads_chains.h"
  55#include "fs_core.h"
  56#include "en/port.h"
  57#include "en/tc_tun.h"
  58#include "lib/devcom.h"
  59#include "lib/geneve.h"
  60#include "diag/en_tc_tracepoint.h"
  61
  62struct mlx5_nic_flow_attr {
  63        u32 action;
  64        u32 flow_tag;
  65        struct mlx5_modify_hdr *modify_hdr;
  66        u32 hairpin_tirn;
  67        u8 match_level;
  68        struct mlx5_flow_table  *hairpin_ft;
  69        struct mlx5_fc          *counter;
  70};
  71
  72#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
  73
  74enum {
  75        MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
  76        MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
  77        MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
  78        MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
  79        MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
  80        MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
  81        MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
  82        MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
  83        MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
  84        MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
  85        MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
  86        MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
  87};
  88
  89#define MLX5E_TC_MAX_SPLITS 1
  90
  91/* Helper struct for accessing a struct containing list_head array.
  92 * Containing struct
  93 *   |- Helper array
  94 *      [0] Helper item 0
  95 *          |- list_head item 0
  96 *          |- index (0)
  97 *      [1] Helper item 1
  98 *          |- list_head item 1
  99 *          |- index (1)
 100 * To access the containing struct from one of the list_head items:
 101 * 1. Get the helper item from the list_head item using
 102 *    helper item =
 103 *        container_of(list_head item, helper struct type, list_head field)
 104 * 2. Get the contining struct from the helper item and its index in the array:
 105 *    containing struct =
 106 *        container_of(helper item, containing struct type, helper field[index])
 107 */
 108struct encap_flow_item {
 109        struct mlx5e_encap_entry *e; /* attached encap instance */
 110        struct list_head list;
 111        int index;
 112};
 113
 114struct mlx5e_tc_flow {
 115        struct rhash_head       node;
 116        struct mlx5e_priv       *priv;
 117        u64                     cookie;
 118        unsigned long           flags;
 119        struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
 120        /* Flow can be associated with multiple encap IDs.
 121         * The number of encaps is bounded by the number of supported
 122         * destinations.
 123         */
 124        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
 125        struct mlx5e_tc_flow    *peer_flow;
 126        struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */
 127        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
 128        struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
 129        struct list_head        hairpin; /* flows sharing the same hairpin */
 130        struct list_head        peer;    /* flows with peer flow */
 131        struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
 132        int                     tmp_efi_index;
 133        struct list_head        tmp_list; /* temporary flow list used by neigh update */
 134        refcount_t              refcnt;
 135        struct rcu_head         rcu_head;
 136        struct completion       init_done;
 137        union {
 138                struct mlx5_esw_flow_attr esw_attr[0];
 139                struct mlx5_nic_flow_attr nic_attr[0];
 140        };
 141};
 142
 143struct mlx5e_tc_flow_parse_attr {
 144        const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
 145        struct net_device *filter_dev;
 146        struct mlx5_flow_spec spec;
 147        int num_mod_hdr_actions;
 148        int max_mod_hdr_actions;
 149        void *mod_hdr_actions;
 150        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 151};
 152
 153#define MLX5E_TC_TABLE_NUM_GROUPS 4
 154#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 155
 156struct mlx5e_hairpin {
 157        struct mlx5_hairpin *pair;
 158
 159        struct mlx5_core_dev *func_mdev;
 160        struct mlx5e_priv *func_priv;
 161        u32 tdn;
 162        u32 tirn;
 163
 164        int num_channels;
 165        struct mlx5e_rqt indir_rqt;
 166        u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
 167        struct mlx5e_ttc_table ttc;
 168};
 169
 170struct mlx5e_hairpin_entry {
 171        /* a node of a hash table which keeps all the  hairpin entries */
 172        struct hlist_node hairpin_hlist;
 173
 174        /* protects flows list */
 175        spinlock_t flows_lock;
 176        /* flows sharing the same hairpin */
 177        struct list_head flows;
 178        /* hpe's that were not fully initialized when dead peer update event
 179         * function traversed them.
 180         */
 181        struct list_head dead_peer_wait_list;
 182
 183        u16 peer_vhca_id;
 184        u8 prio;
 185        struct mlx5e_hairpin *hp;
 186        refcount_t refcnt;
 187        struct completion res_ready;
 188};
 189
 190struct mod_hdr_key {
 191        int num_actions;
 192        void *actions;
 193};
 194
 195struct mlx5e_mod_hdr_entry {
 196        /* a node of a hash table which keeps all the mod_hdr entries */
 197        struct hlist_node mod_hdr_hlist;
 198
 199        /* protects flows list */
 200        spinlock_t flows_lock;
 201        /* flows sharing the same mod_hdr entry */
 202        struct list_head flows;
 203
 204        struct mod_hdr_key key;
 205
 206        struct mlx5_modify_hdr *modify_hdr;
 207
 208        refcount_t refcnt;
 209        struct completion res_ready;
 210        int compl_result;
 211};
 212
 213#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
 214
 215static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 216                              struct mlx5e_tc_flow *flow);
 217
 218static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
 219{
 220        if (!flow || !refcount_inc_not_zero(&flow->refcnt))
 221                return ERR_PTR(-EINVAL);
 222        return flow;
 223}
 224
 225static void mlx5e_flow_put(struct mlx5e_priv *priv,
 226                           struct mlx5e_tc_flow *flow)
 227{
 228        if (refcount_dec_and_test(&flow->refcnt)) {
 229                mlx5e_tc_del_flow(priv, flow);
 230                kfree_rcu(flow, rcu_head);
 231        }
 232}
 233
 234static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
 235{
 236        /* Complete all memory stores before setting bit. */
 237        smp_mb__before_atomic();
 238        set_bit(flag, &flow->flags);
 239}
 240
 241#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
 242
 243static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
 244                                     unsigned long flag)
 245{
 246        /* test_and_set_bit() provides all necessary barriers */
 247        return test_and_set_bit(flag, &flow->flags);
 248}
 249
 250#define flow_flag_test_and_set(flow, flag)                      \
 251        __flow_flag_test_and_set(flow,                          \
 252                                 MLX5E_TC_FLOW_FLAG_##flag)
 253
 254static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
 255{
 256        /* Complete all memory stores before clearing bit. */
 257        smp_mb__before_atomic();
 258        clear_bit(flag, &flow->flags);
 259}
 260
 261#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
 262                                                      MLX5E_TC_FLOW_FLAG_##flag)
 263
 264static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
 265{
 266        bool ret = test_bit(flag, &flow->flags);
 267
 268        /* Read fields of flow structure only after checking flags. */
 269        smp_mb__after_atomic();
 270        return ret;
 271}
 272
 273#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
 274                                                    MLX5E_TC_FLOW_FLAG_##flag)
 275
 276static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 277{
 278        return flow_flag_test(flow, ESWITCH);
 279}
 280
 281static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
 282{
 283        return flow_flag_test(flow, FT);
 284}
 285
 286static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
 287{
 288        return flow_flag_test(flow, OFFLOADED);
 289}
 290
 291static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
 292{
 293        return jhash(key->actions,
 294                     key->num_actions * MLX5_MH_ACT_SZ, 0);
 295}
 296
 297static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
 298                                   struct mod_hdr_key *b)
 299{
 300        if (a->num_actions != b->num_actions)
 301                return 1;
 302
 303        return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
 304}
 305
 306static struct mod_hdr_tbl *
 307get_mod_hdr_table(struct mlx5e_priv *priv, int namespace)
 308{
 309        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 310
 311        return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr :
 312                &priv->fs.tc.mod_hdr;
 313}
 314
 315static struct mlx5e_mod_hdr_entry *
 316mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key)
 317{
 318        struct mlx5e_mod_hdr_entry *mh, *found = NULL;
 319
 320        hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) {
 321                if (!cmp_mod_hdr_info(&mh->key, key)) {
 322                        refcount_inc(&mh->refcnt);
 323                        found = mh;
 324                        break;
 325                }
 326        }
 327
 328        return found;
 329}
 330
 331static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv,
 332                              struct mlx5e_mod_hdr_entry *mh,
 333                              int namespace)
 334{
 335        struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace);
 336
 337        if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock))
 338                return;
 339        hash_del(&mh->mod_hdr_hlist);
 340        mutex_unlock(&tbl->lock);
 341
 342        WARN_ON(!list_empty(&mh->flows));
 343        if (mh->compl_result > 0)
 344                mlx5_modify_header_dealloc(priv->mdev, mh->modify_hdr);
 345
 346        kfree(mh);
 347}
 348
 349static int get_flow_name_space(struct mlx5e_tc_flow *flow)
 350{
 351        return mlx5e_is_eswitch_flow(flow) ?
 352                MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
 353}
 354static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 355                                struct mlx5e_tc_flow *flow,
 356                                struct mlx5e_tc_flow_parse_attr *parse_attr)
 357{
 358        int num_actions, actions_size, namespace, err;
 359        struct mlx5e_mod_hdr_entry *mh;
 360        struct mod_hdr_tbl *tbl;
 361        struct mod_hdr_key key;
 362        u32 hash_key;
 363
 364        num_actions  = parse_attr->num_mod_hdr_actions;
 365        actions_size = MLX5_MH_ACT_SZ * num_actions;
 366
 367        key.actions = parse_attr->mod_hdr_actions;
 368        key.num_actions = num_actions;
 369
 370        hash_key = hash_mod_hdr_info(&key);
 371
 372        namespace = get_flow_name_space(flow);
 373        tbl = get_mod_hdr_table(priv, namespace);
 374
 375        mutex_lock(&tbl->lock);
 376        mh = mlx5e_mod_hdr_get(tbl, &key, hash_key);
 377        if (mh) {
 378                mutex_unlock(&tbl->lock);
 379                wait_for_completion(&mh->res_ready);
 380
 381                if (mh->compl_result < 0) {
 382                        err = -EREMOTEIO;
 383                        goto attach_header_err;
 384                }
 385                goto attach_flow;
 386        }
 387
 388        mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
 389        if (!mh) {
 390                mutex_unlock(&tbl->lock);
 391                return -ENOMEM;
 392        }
 393
 394        mh->key.actions = (void *)mh + sizeof(*mh);
 395        memcpy(mh->key.actions, key.actions, actions_size);
 396        mh->key.num_actions = num_actions;
 397        spin_lock_init(&mh->flows_lock);
 398        INIT_LIST_HEAD(&mh->flows);
 399        refcount_set(&mh->refcnt, 1);
 400        init_completion(&mh->res_ready);
 401
 402        hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key);
 403        mutex_unlock(&tbl->lock);
 404
 405        mh->modify_hdr = mlx5_modify_header_alloc(priv->mdev, namespace,
 406                                                  mh->key.num_actions,
 407                                                  mh->key.actions);
 408        if (IS_ERR(mh->modify_hdr)) {
 409                err = PTR_ERR(mh->modify_hdr);
 410                mh->compl_result = err;
 411                goto alloc_header_err;
 412        }
 413        mh->compl_result = 1;
 414        complete_all(&mh->res_ready);
 415
 416attach_flow:
 417        flow->mh = mh;
 418        spin_lock(&mh->flows_lock);
 419        list_add(&flow->mod_hdr, &mh->flows);
 420        spin_unlock(&mh->flows_lock);
 421        if (mlx5e_is_eswitch_flow(flow))
 422                flow->esw_attr->modify_hdr = mh->modify_hdr;
 423        else
 424                flow->nic_attr->modify_hdr = mh->modify_hdr;
 425
 426        return 0;
 427
 428alloc_header_err:
 429        complete_all(&mh->res_ready);
 430attach_header_err:
 431        mlx5e_mod_hdr_put(priv, mh, namespace);
 432        return err;
 433}
 434
 435static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 436                                 struct mlx5e_tc_flow *flow)
 437{
 438        /* flow wasn't fully initialized */
 439        if (!flow->mh)
 440                return;
 441
 442        spin_lock(&flow->mh->flows_lock);
 443        list_del(&flow->mod_hdr);
 444        spin_unlock(&flow->mh->flows_lock);
 445
 446        mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow));
 447        flow->mh = NULL;
 448}
 449
 450static
 451struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 452{
 453        struct net_device *netdev;
 454        struct mlx5e_priv *priv;
 455
 456        netdev = __dev_get_by_index(net, ifindex);
 457        priv = netdev_priv(netdev);
 458        return priv->mdev;
 459}
 460
 461static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 462{
 463        u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
 464        void *tirc;
 465        int err;
 466
 467        err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
 468        if (err)
 469                goto alloc_tdn_err;
 470
 471        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 472
 473        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 474        MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
 475        MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 476
 477        err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
 478        if (err)
 479                goto create_tir_err;
 480
 481        return 0;
 482
 483create_tir_err:
 484        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 485alloc_tdn_err:
 486        return err;
 487}
 488
 489static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 490{
 491        mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
 492        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 493}
 494
 495static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 496{
 497        u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
 498        struct mlx5e_priv *priv = hp->func_priv;
 499        int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
 500
 501        mlx5e_build_default_indir_rqt(indirection_rqt, sz,
 502                                      hp->num_channels);
 503
 504        for (i = 0; i < sz; i++) {
 505                ix = i;
 506                if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
 507                        ix = mlx5e_bits_invert(i, ilog2(sz));
 508                ix = indirection_rqt[ix];
 509                rqn = hp->pair->rqn[ix];
 510                MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
 511        }
 512}
 513
 514static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 515{
 516        int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
 517        struct mlx5e_priv *priv = hp->func_priv;
 518        struct mlx5_core_dev *mdev = priv->mdev;
 519        void *rqtc;
 520        u32 *in;
 521
 522        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 523        in = kvzalloc(inlen, GFP_KERNEL);
 524        if (!in)
 525                return -ENOMEM;
 526
 527        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 528
 529        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 530        MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 531
 532        mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
 533
 534        err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
 535        if (!err)
 536                hp->indir_rqt.enabled = true;
 537
 538        kvfree(in);
 539        return err;
 540}
 541
 542static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 543{
 544        struct mlx5e_priv *priv = hp->func_priv;
 545        u32 in[MLX5_ST_SZ_DW(create_tir_in)];
 546        int tt, i, err;
 547        void *tirc;
 548
 549        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 550                struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
 551
 552                memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
 553                tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 554
 555                MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 556                MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 557                MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
 558                mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
 559
 560                err = mlx5_core_create_tir(hp->func_mdev, in,
 561                                           MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
 562                if (err) {
 563                        mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
 564                        goto err_destroy_tirs;
 565                }
 566        }
 567        return 0;
 568
 569err_destroy_tirs:
 570        for (i = 0; i < tt; i++)
 571                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
 572        return err;
 573}
 574
 575static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
 576{
 577        int tt;
 578
 579        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 580                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
 581}
 582
 583static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
 584                                         struct ttc_params *ttc_params)
 585{
 586        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
 587        int tt;
 588
 589        memset(ttc_params, 0, sizeof(*ttc_params));
 590
 591        ttc_params->any_tt_tirn = hp->tirn;
 592
 593        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 594                ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
 595
 596        ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
 597        ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
 598        ft_attr->prio = MLX5E_TC_PRIO;
 599}
 600
 601static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
 602{
 603        struct mlx5e_priv *priv = hp->func_priv;
 604        struct ttc_params ttc_params;
 605        int err;
 606
 607        err = mlx5e_hairpin_create_indirect_rqt(hp);
 608        if (err)
 609                return err;
 610
 611        err = mlx5e_hairpin_create_indirect_tirs(hp);
 612        if (err)
 613                goto err_create_indirect_tirs;
 614
 615        mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
 616        err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
 617        if (err)
 618                goto err_create_ttc_table;
 619
 620        netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
 621                   hp->num_channels, hp->ttc.ft.t->id);
 622
 623        return 0;
 624
 625err_create_ttc_table:
 626        mlx5e_hairpin_destroy_indirect_tirs(hp);
 627err_create_indirect_tirs:
 628        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 629
 630        return err;
 631}
 632
 633static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
 634{
 635        struct mlx5e_priv *priv = hp->func_priv;
 636
 637        mlx5e_destroy_ttc_table(priv, &hp->ttc);
 638        mlx5e_hairpin_destroy_indirect_tirs(hp);
 639        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 640}
 641
 642static struct mlx5e_hairpin *
 643mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
 644                     int peer_ifindex)
 645{
 646        struct mlx5_core_dev *func_mdev, *peer_mdev;
 647        struct mlx5e_hairpin *hp;
 648        struct mlx5_hairpin *pair;
 649        int err;
 650
 651        hp = kzalloc(sizeof(*hp), GFP_KERNEL);
 652        if (!hp)
 653                return ERR_PTR(-ENOMEM);
 654
 655        func_mdev = priv->mdev;
 656        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 657
 658        pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
 659        if (IS_ERR(pair)) {
 660                err = PTR_ERR(pair);
 661                goto create_pair_err;
 662        }
 663        hp->pair = pair;
 664        hp->func_mdev = func_mdev;
 665        hp->func_priv = priv;
 666        hp->num_channels = params->num_channels;
 667
 668        err = mlx5e_hairpin_create_transport(hp);
 669        if (err)
 670                goto create_transport_err;
 671
 672        if (hp->num_channels > 1) {
 673                err = mlx5e_hairpin_rss_init(hp);
 674                if (err)
 675                        goto rss_init_err;
 676        }
 677
 678        return hp;
 679
 680rss_init_err:
 681        mlx5e_hairpin_destroy_transport(hp);
 682create_transport_err:
 683        mlx5_core_hairpin_destroy(hp->pair);
 684create_pair_err:
 685        kfree(hp);
 686        return ERR_PTR(err);
 687}
 688
 689static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
 690{
 691        if (hp->num_channels > 1)
 692                mlx5e_hairpin_rss_cleanup(hp);
 693        mlx5e_hairpin_destroy_transport(hp);
 694        mlx5_core_hairpin_destroy(hp->pair);
 695        kvfree(hp);
 696}
 697
 698static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
 699{
 700        return (peer_vhca_id << 16 | prio);
 701}
 702
 703static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
 704                                                     u16 peer_vhca_id, u8 prio)
 705{
 706        struct mlx5e_hairpin_entry *hpe;
 707        u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
 708
 709        hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
 710                               hairpin_hlist, hash_key) {
 711                if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
 712                        refcount_inc(&hpe->refcnt);
 713                        return hpe;
 714                }
 715        }
 716
 717        return NULL;
 718}
 719
 720static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
 721                              struct mlx5e_hairpin_entry *hpe)
 722{
 723        /* no more hairpin flows for us, release the hairpin pair */
 724        if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
 725                return;
 726        hash_del(&hpe->hairpin_hlist);
 727        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 728
 729        if (!IS_ERR_OR_NULL(hpe->hp)) {
 730                netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 731                           dev_name(hpe->hp->pair->peer_mdev->device));
 732
 733                mlx5e_hairpin_destroy(hpe->hp);
 734        }
 735
 736        WARN_ON(!list_empty(&hpe->flows));
 737        kfree(hpe);
 738}
 739
 740#define UNKNOWN_MATCH_PRIO 8
 741
 742static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
 743                                  struct mlx5_flow_spec *spec, u8 *match_prio,
 744                                  struct netlink_ext_ack *extack)
 745{
 746        void *headers_c, *headers_v;
 747        u8 prio_val, prio_mask = 0;
 748        bool vlan_present;
 749
 750#ifdef CONFIG_MLX5_CORE_EN_DCB
 751        if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
 752                NL_SET_ERR_MSG_MOD(extack,
 753                                   "only PCP trust state supported for hairpin");
 754                return -EOPNOTSUPP;
 755        }
 756#endif
 757        headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 758        headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
 759
 760        vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
 761        if (vlan_present) {
 762                prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
 763                prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
 764        }
 765
 766        if (!vlan_present || !prio_mask) {
 767                prio_val = UNKNOWN_MATCH_PRIO;
 768        } else if (prio_mask != 0x7) {
 769                NL_SET_ERR_MSG_MOD(extack,
 770                                   "masked priority match not supported for hairpin");
 771                return -EOPNOTSUPP;
 772        }
 773
 774        *match_prio = prio_val;
 775        return 0;
 776}
 777
 778static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 779                                  struct mlx5e_tc_flow *flow,
 780                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
 781                                  struct netlink_ext_ack *extack)
 782{
 783        int peer_ifindex = parse_attr->mirred_ifindex[0];
 784        struct mlx5_hairpin_params params;
 785        struct mlx5_core_dev *peer_mdev;
 786        struct mlx5e_hairpin_entry *hpe;
 787        struct mlx5e_hairpin *hp;
 788        u64 link_speed64;
 789        u32 link_speed;
 790        u8 match_prio;
 791        u16 peer_id;
 792        int err;
 793
 794        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 795        if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
 796                NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
 797                return -EOPNOTSUPP;
 798        }
 799
 800        peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
 801        err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
 802                                     extack);
 803        if (err)
 804                return err;
 805
 806        mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 807        hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 808        if (hpe) {
 809                mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 810                wait_for_completion(&hpe->res_ready);
 811
 812                if (IS_ERR(hpe->hp)) {
 813                        err = -EREMOTEIO;
 814                        goto out_err;
 815                }
 816                goto attach_flow;
 817        }
 818
 819        hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
 820        if (!hpe) {
 821                mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 822                return -ENOMEM;
 823        }
 824
 825        spin_lock_init(&hpe->flows_lock);
 826        INIT_LIST_HEAD(&hpe->flows);
 827        INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
 828        hpe->peer_vhca_id = peer_id;
 829        hpe->prio = match_prio;
 830        refcount_set(&hpe->refcnt, 1);
 831        init_completion(&hpe->res_ready);
 832
 833        hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 834                 hash_hairpin_info(peer_id, match_prio));
 835        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 836
 837        params.log_data_size = 15;
 838        params.log_data_size = min_t(u8, params.log_data_size,
 839                                     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
 840        params.log_data_size = max_t(u8, params.log_data_size,
 841                                     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
 842
 843        params.log_num_packets = params.log_data_size -
 844                                 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
 845        params.log_num_packets = min_t(u8, params.log_num_packets,
 846                                       MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
 847
 848        params.q_counter = priv->q_counter;
 849        /* set hairpin pair per each 50Gbs share of the link */
 850        mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
 851        link_speed = max_t(u32, link_speed, 50000);
 852        link_speed64 = link_speed;
 853        do_div(link_speed64, 50000);
 854        params.num_channels = link_speed64;
 855
 856        hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 857        hpe->hp = hp;
 858        complete_all(&hpe->res_ready);
 859        if (IS_ERR(hp)) {
 860                err = PTR_ERR(hp);
 861                goto out_err;
 862        }
 863
 864        netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
 865                   hp->tirn, hp->pair->rqn[0],
 866                   dev_name(hp->pair->peer_mdev->device),
 867                   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
 868
 869attach_flow:
 870        if (hpe->hp->num_channels > 1) {
 871                flow_flag_set(flow, HAIRPIN_RSS);
 872                flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
 873        } else {
 874                flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
 875        }
 876
 877        flow->hpe = hpe;
 878        spin_lock(&hpe->flows_lock);
 879        list_add(&flow->hairpin, &hpe->flows);
 880        spin_unlock(&hpe->flows_lock);
 881
 882        return 0;
 883
 884out_err:
 885        mlx5e_hairpin_put(priv, hpe);
 886        return err;
 887}
 888
 889static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 890                                   struct mlx5e_tc_flow *flow)
 891{
 892        /* flow wasn't fully initialized */
 893        if (!flow->hpe)
 894                return;
 895
 896        spin_lock(&flow->hpe->flows_lock);
 897        list_del(&flow->hairpin);
 898        spin_unlock(&flow->hpe->flows_lock);
 899
 900        mlx5e_hairpin_put(priv, flow->hpe);
 901        flow->hpe = NULL;
 902}
 903
 904static int
 905mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 906                      struct mlx5e_tc_flow_parse_attr *parse_attr,
 907                      struct mlx5e_tc_flow *flow,
 908                      struct netlink_ext_ack *extack)
 909{
 910        struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
 911        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 912        struct mlx5_core_dev *dev = priv->mdev;
 913        struct mlx5_flow_destination dest[2] = {};
 914        struct mlx5_flow_act flow_act = {
 915                .action = attr->action,
 916                .flags    = FLOW_ACT_NO_APPEND,
 917        };
 918        struct mlx5_fc *counter = NULL;
 919        int err, dest_ix = 0;
 920
 921        flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
 922        flow_context->flow_tag = attr->flow_tag;
 923
 924        if (flow_flag_test(flow, HAIRPIN)) {
 925                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 926                if (err)
 927                        return err;
 928
 929                if (flow_flag_test(flow, HAIRPIN_RSS)) {
 930                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 931                        dest[dest_ix].ft = attr->hairpin_ft;
 932                } else {
 933                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 934                        dest[dest_ix].tir_num = attr->hairpin_tirn;
 935                }
 936                dest_ix++;
 937        } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 938                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 939                dest[dest_ix].ft = priv->fs.vlan.ft.t;
 940                dest_ix++;
 941        }
 942
 943        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 944                counter = mlx5_fc_create(dev, true);
 945                if (IS_ERR(counter))
 946                        return PTR_ERR(counter);
 947
 948                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 949                dest[dest_ix].counter_id = mlx5_fc_id(counter);
 950                dest_ix++;
 951                attr->counter = counter;
 952        }
 953
 954        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 955                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 956                flow_act.modify_hdr = attr->modify_hdr;
 957                kfree(parse_attr->mod_hdr_actions);
 958                if (err)
 959                        return err;
 960        }
 961
 962        mutex_lock(&priv->fs.tc.t_lock);
 963        if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
 964                struct mlx5_flow_table_attr ft_attr = {};
 965                int tc_grp_size, tc_tbl_size, tc_num_grps;
 966                u32 max_flow_counter;
 967
 968                max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
 969                                    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
 970
 971                tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
 972
 973                tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
 974                                    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
 975                tc_num_grps = MLX5E_TC_TABLE_NUM_GROUPS;
 976
 977                ft_attr.prio = MLX5E_TC_PRIO;
 978                ft_attr.max_fte = tc_tbl_size;
 979                ft_attr.level = MLX5E_TC_FT_LEVEL;
 980                ft_attr.autogroup.max_num_groups = tc_num_grps;
 981                priv->fs.tc.t =
 982                        mlx5_create_auto_grouped_flow_table(priv->fs.ns,
 983                                                            &ft_attr);
 984                if (IS_ERR(priv->fs.tc.t)) {
 985                        mutex_unlock(&priv->fs.tc.t_lock);
 986                        NL_SET_ERR_MSG_MOD(extack,
 987                                           "Failed to create tc offload table\n");
 988                        netdev_err(priv->netdev,
 989                                   "Failed to create tc offload table\n");
 990                        return PTR_ERR(priv->fs.tc.t);
 991                }
 992        }
 993
 994        if (attr->match_level != MLX5_MATCH_NONE)
 995                parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 996
 997        flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
 998                                            &flow_act, dest, dest_ix);
 999        mutex_unlock(&priv->fs.tc.t_lock);
1000
1001        return PTR_ERR_OR_ZERO(flow->rule[0]);
1002}
1003
1004static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1005                                  struct mlx5e_tc_flow *flow)
1006{
1007        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1008        struct mlx5_fc *counter = NULL;
1009
1010        counter = attr->counter;
1011        if (!IS_ERR_OR_NULL(flow->rule[0]))
1012                mlx5_del_flow_rules(flow->rule[0]);
1013        mlx5_fc_destroy(priv->mdev, counter);
1014
1015        mutex_lock(&priv->fs.tc.t_lock);
1016        if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) {
1017                mlx5_destroy_flow_table(priv->fs.tc.t);
1018                priv->fs.tc.t = NULL;
1019        }
1020        mutex_unlock(&priv->fs.tc.t_lock);
1021
1022        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1023                mlx5e_detach_mod_hdr(priv, flow);
1024
1025        if (flow_flag_test(flow, HAIRPIN))
1026                mlx5e_hairpin_flow_del(priv, flow);
1027}
1028
1029static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1030                               struct mlx5e_tc_flow *flow, int out_index);
1031
1032static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1033                              struct mlx5e_tc_flow *flow,
1034                              struct net_device *mirred_dev,
1035                              int out_index,
1036                              struct netlink_ext_ack *extack,
1037                              struct net_device **encap_dev,
1038                              bool *encap_valid);
1039
1040static struct mlx5_flow_handle *
1041mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1042                           struct mlx5e_tc_flow *flow,
1043                           struct mlx5_flow_spec *spec,
1044                           struct mlx5_esw_flow_attr *attr)
1045{
1046        struct mlx5_flow_handle *rule;
1047
1048        rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1049        if (IS_ERR(rule))
1050                return rule;
1051
1052        if (attr->split_count) {
1053                flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1054                if (IS_ERR(flow->rule[1])) {
1055                        mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1056                        return flow->rule[1];
1057                }
1058        }
1059
1060        return rule;
1061}
1062
1063static void
1064mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1065                             struct mlx5e_tc_flow *flow,
1066                           struct mlx5_esw_flow_attr *attr)
1067{
1068        flow_flag_clear(flow, OFFLOADED);
1069
1070        if (attr->split_count)
1071                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1072
1073        mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1074}
1075
1076static struct mlx5_flow_handle *
1077mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1078                              struct mlx5e_tc_flow *flow,
1079                              struct mlx5_flow_spec *spec,
1080                              struct mlx5_esw_flow_attr *slow_attr)
1081{
1082        struct mlx5_flow_handle *rule;
1083
1084        memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
1085        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1086        slow_attr->split_count = 0;
1087        slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1088
1089        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1090        if (!IS_ERR(rule))
1091                flow_flag_set(flow, SLOW);
1092
1093        return rule;
1094}
1095
1096static void
1097mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1098                                  struct mlx5e_tc_flow *flow,
1099                                  struct mlx5_esw_flow_attr *slow_attr)
1100{
1101        memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
1102        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1103        slow_attr->split_count = 0;
1104        slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1105        mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1106        flow_flag_clear(flow, SLOW);
1107}
1108
1109/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1110 * function.
1111 */
1112static void unready_flow_add(struct mlx5e_tc_flow *flow,
1113                             struct list_head *unready_flows)
1114{
1115        flow_flag_set(flow, NOT_READY);
1116        list_add_tail(&flow->unready, unready_flows);
1117}
1118
1119/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1120 * function.
1121 */
1122static void unready_flow_del(struct mlx5e_tc_flow *flow)
1123{
1124        list_del(&flow->unready);
1125        flow_flag_clear(flow, NOT_READY);
1126}
1127
1128static void add_unready_flow(struct mlx5e_tc_flow *flow)
1129{
1130        struct mlx5_rep_uplink_priv *uplink_priv;
1131        struct mlx5e_rep_priv *rpriv;
1132        struct mlx5_eswitch *esw;
1133
1134        esw = flow->priv->mdev->priv.eswitch;
1135        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1136        uplink_priv = &rpriv->uplink_priv;
1137
1138        mutex_lock(&uplink_priv->unready_flows_lock);
1139        unready_flow_add(flow, &uplink_priv->unready_flows);
1140        mutex_unlock(&uplink_priv->unready_flows_lock);
1141}
1142
1143static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1144{
1145        struct mlx5_rep_uplink_priv *uplink_priv;
1146        struct mlx5e_rep_priv *rpriv;
1147        struct mlx5_eswitch *esw;
1148
1149        esw = flow->priv->mdev->priv.eswitch;
1150        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1151        uplink_priv = &rpriv->uplink_priv;
1152
1153        mutex_lock(&uplink_priv->unready_flows_lock);
1154        unready_flow_del(flow);
1155        mutex_unlock(&uplink_priv->unready_flows_lock);
1156}
1157
1158static int
1159mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1160                      struct mlx5e_tc_flow *flow,
1161                      struct netlink_ext_ack *extack)
1162{
1163        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1164        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1165        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
1166        struct net_device *out_dev, *encap_dev = NULL;
1167        struct mlx5_fc *counter = NULL;
1168        struct mlx5e_rep_priv *rpriv;
1169        struct mlx5e_priv *out_priv;
1170        bool encap_valid = true;
1171        u32 max_prio, max_chain;
1172        int err = 0;
1173        int out_index;
1174
1175        if (!mlx5_esw_chains_prios_supported(esw) && attr->prio != 1) {
1176                NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
1177                return -EOPNOTSUPP;
1178        }
1179
1180        /* We check chain range only for tc flows.
1181         * For ft flows, we checked attr->chain was originally 0 and set it to
1182         * FDB_FT_CHAIN which is outside tc range.
1183         * See mlx5e_rep_setup_ft_cb().
1184         */
1185        max_chain = mlx5_esw_chains_get_chain_range(esw);
1186        if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1187                NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
1188                return -EOPNOTSUPP;
1189        }
1190
1191        max_prio = mlx5_esw_chains_get_prio_range(esw);
1192        if (attr->prio > max_prio) {
1193                NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
1194                return -EOPNOTSUPP;
1195        }
1196
1197        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1198                int mirred_ifindex;
1199
1200                if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1201                        continue;
1202
1203                mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1204                out_dev = __dev_get_by_index(dev_net(priv->netdev),
1205                                             mirred_ifindex);
1206                err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1207                                         extack, &encap_dev, &encap_valid);
1208                if (err)
1209                        return err;
1210
1211                out_priv = netdev_priv(encap_dev);
1212                rpriv = out_priv->ppriv;
1213                attr->dests[out_index].rep = rpriv->rep;
1214                attr->dests[out_index].mdev = out_priv->mdev;
1215        }
1216
1217        err = mlx5_eswitch_add_vlan_action(esw, attr);
1218        if (err)
1219                return err;
1220
1221        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1222                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1223                kfree(parse_attr->mod_hdr_actions);
1224                if (err)
1225                        return err;
1226        }
1227
1228        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1229                counter = mlx5_fc_create(attr->counter_dev, true);
1230                if (IS_ERR(counter))
1231                        return PTR_ERR(counter);
1232
1233                attr->counter = counter;
1234        }
1235
1236        /* we get here if one of the following takes place:
1237         * (1) there's no error
1238         * (2) there's an encap action and we don't have valid neigh
1239         */
1240        if (!encap_valid) {
1241                /* continue with goto slow path rule instead */
1242                struct mlx5_esw_flow_attr slow_attr;
1243
1244                flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
1245        } else {
1246                flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1247        }
1248
1249        if (IS_ERR(flow->rule[0]))
1250                return PTR_ERR(flow->rule[0]);
1251        else
1252                flow_flag_set(flow, OFFLOADED);
1253
1254        return 0;
1255}
1256
1257static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1258{
1259        struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
1260        void *headers_v = MLX5_ADDR_OF(fte_match_param,
1261                                       spec->match_value,
1262                                       misc_parameters_3);
1263        u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1264                                             headers_v,
1265                                             geneve_tlv_option_0_data);
1266
1267        return !!geneve_tlv_opt_0_data;
1268}
1269
1270static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1271                                  struct mlx5e_tc_flow *flow)
1272{
1273        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1274        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1275        struct mlx5_esw_flow_attr slow_attr;
1276        int out_index;
1277
1278        if (flow_flag_test(flow, NOT_READY)) {
1279                remove_unready_flow(flow);
1280                kvfree(attr->parse_attr);
1281                return;
1282        }
1283
1284        if (mlx5e_is_offloaded_flow(flow)) {
1285                if (flow_flag_test(flow, SLOW))
1286                        mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1287                else
1288                        mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1289        }
1290
1291        if (mlx5_flow_has_geneve_opt(flow))
1292                mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1293
1294        mlx5_eswitch_del_vlan_action(esw, attr);
1295
1296        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1297                if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1298                        mlx5e_detach_encap(priv, flow, out_index);
1299                        kfree(attr->parse_attr->tun_info[out_index]);
1300                }
1301        kvfree(attr->parse_attr);
1302
1303        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1304                mlx5e_detach_mod_hdr(priv, flow);
1305
1306        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1307                mlx5_fc_destroy(attr->counter_dev, attr->counter);
1308}
1309
1310void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1311                              struct mlx5e_encap_entry *e,
1312                              struct list_head *flow_list)
1313{
1314        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1315        struct mlx5_esw_flow_attr slow_attr, *esw_attr;
1316        struct mlx5_flow_handle *rule;
1317        struct mlx5_flow_spec *spec;
1318        struct mlx5e_tc_flow *flow;
1319        int err;
1320
1321        e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1322                                                     e->reformat_type,
1323                                                     e->encap_size, e->encap_header,
1324                                                     MLX5_FLOW_NAMESPACE_FDB);
1325        if (IS_ERR(e->pkt_reformat)) {
1326                mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1327                               PTR_ERR(e->pkt_reformat));
1328                return;
1329        }
1330        e->flags |= MLX5_ENCAP_ENTRY_VALID;
1331        mlx5e_rep_queue_neigh_stats_work(priv);
1332
1333        list_for_each_entry(flow, flow_list, tmp_list) {
1334                bool all_flow_encaps_valid = true;
1335                int i;
1336
1337                if (!mlx5e_is_offloaded_flow(flow))
1338                        continue;
1339                esw_attr = flow->esw_attr;
1340                spec = &esw_attr->parse_attr->spec;
1341
1342                esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1343                esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1344                /* Flow can be associated with multiple encap entries.
1345                 * Before offloading the flow verify that all of them have
1346                 * a valid neighbour.
1347                 */
1348                for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1349                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1350                                continue;
1351                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1352                                all_flow_encaps_valid = false;
1353                                break;
1354                        }
1355                }
1356                /* Do not offload flows with unresolved neighbors */
1357                if (!all_flow_encaps_valid)
1358                        continue;
1359                /* update from slow path rule to encap rule */
1360                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
1361                if (IS_ERR(rule)) {
1362                        err = PTR_ERR(rule);
1363                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1364                                       err);
1365                        continue;
1366                }
1367
1368                mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1369                flow->rule[0] = rule;
1370                /* was unset when slow path rule removed */
1371                flow_flag_set(flow, OFFLOADED);
1372        }
1373}
1374
1375void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1376                              struct mlx5e_encap_entry *e,
1377                              struct list_head *flow_list)
1378{
1379        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1380        struct mlx5_esw_flow_attr slow_attr;
1381        struct mlx5_flow_handle *rule;
1382        struct mlx5_flow_spec *spec;
1383        struct mlx5e_tc_flow *flow;
1384        int err;
1385
1386        list_for_each_entry(flow, flow_list, tmp_list) {
1387                if (!mlx5e_is_offloaded_flow(flow))
1388                        continue;
1389                spec = &flow->esw_attr->parse_attr->spec;
1390
1391                /* update from encap rule to slow path rule */
1392                rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
1393                /* mark the flow's encap dest as non-valid */
1394                flow->esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1395
1396                if (IS_ERR(rule)) {
1397                        err = PTR_ERR(rule);
1398                        mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1399                                       err);
1400                        continue;
1401                }
1402
1403                mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
1404                flow->rule[0] = rule;
1405                /* was unset when fast path rule removed */
1406                flow_flag_set(flow, OFFLOADED);
1407        }
1408
1409        /* we know that the encap is valid */
1410        e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1411        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1412}
1413
1414static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1415{
1416        if (mlx5e_is_eswitch_flow(flow))
1417                return flow->esw_attr->counter;
1418        else
1419                return flow->nic_attr->counter;
1420}
1421
1422/* Takes reference to all flows attached to encap and adds the flows to
1423 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1424 */
1425void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1426{
1427        struct encap_flow_item *efi;
1428        struct mlx5e_tc_flow *flow;
1429
1430        list_for_each_entry(efi, &e->flows, list) {
1431                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1432                if (IS_ERR(mlx5e_flow_get(flow)))
1433                        continue;
1434                wait_for_completion(&flow->init_done);
1435
1436                flow->tmp_efi_index = efi->index;
1437                list_add(&flow->tmp_list, flow_list);
1438        }
1439}
1440
1441/* Iterate over tmp_list of flows attached to flow_list head. */
1442void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1443{
1444        struct mlx5e_tc_flow *flow, *tmp;
1445
1446        list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1447                mlx5e_flow_put(priv, flow);
1448}
1449
1450static struct mlx5e_encap_entry *
1451mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1452                           struct mlx5e_encap_entry *e)
1453{
1454        struct mlx5e_encap_entry *next = NULL;
1455
1456retry:
1457        rcu_read_lock();
1458
1459        /* find encap with non-zero reference counter value */
1460        for (next = e ?
1461                     list_next_or_null_rcu(&nhe->encap_list,
1462                                           &e->encap_list,
1463                                           struct mlx5e_encap_entry,
1464                                           encap_list) :
1465                     list_first_or_null_rcu(&nhe->encap_list,
1466                                            struct mlx5e_encap_entry,
1467                                            encap_list);
1468             next;
1469             next = list_next_or_null_rcu(&nhe->encap_list,
1470                                          &next->encap_list,
1471                                          struct mlx5e_encap_entry,
1472                                          encap_list))
1473                if (mlx5e_encap_take(next))
1474                        break;
1475
1476        rcu_read_unlock();
1477
1478        /* release starting encap */
1479        if (e)
1480                mlx5e_encap_put(netdev_priv(e->out_dev), e);
1481        if (!next)
1482                return next;
1483
1484        /* wait for encap to be fully initialized */
1485        wait_for_completion(&next->res_ready);
1486        /* continue searching if encap entry is not in valid state after completion */
1487        if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1488                e = next;
1489                goto retry;
1490        }
1491
1492        return next;
1493}
1494
1495void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1496{
1497        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1498        struct mlx5e_encap_entry *e = NULL;
1499        struct mlx5e_tc_flow *flow;
1500        struct mlx5_fc *counter;
1501        struct neigh_table *tbl;
1502        bool neigh_used = false;
1503        struct neighbour *n;
1504        u64 lastuse;
1505
1506        if (m_neigh->family == AF_INET)
1507                tbl = &arp_tbl;
1508#if IS_ENABLED(CONFIG_IPV6)
1509        else if (m_neigh->family == AF_INET6)
1510                tbl = ipv6_stub->nd_tbl;
1511#endif
1512        else
1513                return;
1514
1515        /* mlx5e_get_next_valid_encap() releases previous encap before returning
1516         * next one.
1517         */
1518        while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1519                struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1520                struct encap_flow_item *efi, *tmp;
1521                struct mlx5_eswitch *esw;
1522                LIST_HEAD(flow_list);
1523
1524                esw = priv->mdev->priv.eswitch;
1525                mutex_lock(&esw->offloads.encap_tbl_lock);
1526                list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1527                        flow = container_of(efi, struct mlx5e_tc_flow,
1528                                            encaps[efi->index]);
1529                        if (IS_ERR(mlx5e_flow_get(flow)))
1530                                continue;
1531                        list_add(&flow->tmp_list, &flow_list);
1532
1533                        if (mlx5e_is_offloaded_flow(flow)) {
1534                                counter = mlx5e_tc_get_counter(flow);
1535                                lastuse = mlx5_fc_query_lastuse(counter);
1536                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1537                                        neigh_used = true;
1538                                        break;
1539                                }
1540                        }
1541                }
1542                mutex_unlock(&esw->offloads.encap_tbl_lock);
1543
1544                mlx5e_put_encap_flow_list(priv, &flow_list);
1545                if (neigh_used) {
1546                        /* release current encap before breaking the loop */
1547                        mlx5e_encap_put(priv, e);
1548                        break;
1549                }
1550        }
1551
1552        trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1553
1554        if (neigh_used) {
1555                nhe->reported_lastuse = jiffies;
1556
1557                /* find the relevant neigh according to the cached device and
1558                 * dst ip pair
1559                 */
1560                n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1561                if (!n)
1562                        return;
1563
1564                neigh_event_send(n, NULL);
1565                neigh_release(n);
1566        }
1567}
1568
1569static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1570{
1571        WARN_ON(!list_empty(&e->flows));
1572
1573        if (e->compl_result > 0) {
1574                mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1575
1576                if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1577                        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1578        }
1579
1580        kfree(e->tun_info);
1581        kfree(e->encap_header);
1582        kfree_rcu(e, rcu);
1583}
1584
1585void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1586{
1587        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1588
1589        if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1590                return;
1591        hash_del_rcu(&e->encap_hlist);
1592        mutex_unlock(&esw->offloads.encap_tbl_lock);
1593
1594        mlx5e_encap_dealloc(priv, e);
1595}
1596
1597static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1598                               struct mlx5e_tc_flow *flow, int out_index)
1599{
1600        struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1601        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1602
1603        /* flow wasn't fully initialized */
1604        if (!e)
1605                return;
1606
1607        mutex_lock(&esw->offloads.encap_tbl_lock);
1608        list_del(&flow->encaps[out_index].list);
1609        flow->encaps[out_index].e = NULL;
1610        if (!refcount_dec_and_test(&e->refcnt)) {
1611                mutex_unlock(&esw->offloads.encap_tbl_lock);
1612                return;
1613        }
1614        hash_del_rcu(&e->encap_hlist);
1615        mutex_unlock(&esw->offloads.encap_tbl_lock);
1616
1617        mlx5e_encap_dealloc(priv, e);
1618}
1619
1620static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1621{
1622        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1623
1624        if (!flow_flag_test(flow, ESWITCH) ||
1625            !flow_flag_test(flow, DUP))
1626                return;
1627
1628        mutex_lock(&esw->offloads.peer_mutex);
1629        list_del(&flow->peer);
1630        mutex_unlock(&esw->offloads.peer_mutex);
1631
1632        flow_flag_clear(flow, DUP);
1633
1634        if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1635                mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1636                kfree(flow->peer_flow);
1637        }
1638
1639        flow->peer_flow = NULL;
1640}
1641
1642static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1643{
1644        struct mlx5_core_dev *dev = flow->priv->mdev;
1645        struct mlx5_devcom *devcom = dev->priv.devcom;
1646        struct mlx5_eswitch *peer_esw;
1647
1648        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1649        if (!peer_esw)
1650                return;
1651
1652        __mlx5e_tc_del_fdb_peer_flow(flow);
1653        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1654}
1655
1656static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1657                              struct mlx5e_tc_flow *flow)
1658{
1659        if (mlx5e_is_eswitch_flow(flow)) {
1660                mlx5e_tc_del_fdb_peer_flow(flow);
1661                mlx5e_tc_del_fdb_flow(priv, flow);
1662        } else {
1663                mlx5e_tc_del_nic_flow(priv, flow);
1664        }
1665}
1666
1667
1668static int parse_tunnel_attr(struct mlx5e_priv *priv,
1669                             struct mlx5_flow_spec *spec,
1670                             struct flow_cls_offload *f,
1671                             struct net_device *filter_dev, u8 *match_level)
1672{
1673        struct netlink_ext_ack *extack = f->common.extack;
1674        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1675                                       outer_headers);
1676        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1677                                       outer_headers);
1678        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1679        int err;
1680
1681        err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
1682                                 headers_c, headers_v, match_level);
1683        if (err) {
1684                NL_SET_ERR_MSG_MOD(extack,
1685                                   "failed to parse tunnel attributes");
1686                return err;
1687        }
1688
1689        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
1690                struct flow_match_control match;
1691                u16 addr_type;
1692
1693                flow_rule_match_enc_control(rule, &match);
1694                addr_type = match.key->addr_type;
1695
1696                /* For tunnel addr_type used same key id`s as for non-tunnel */
1697                if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1698                        struct flow_match_ipv4_addrs match;
1699
1700                        flow_rule_match_enc_ipv4_addrs(rule, &match);
1701                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1702                                 src_ipv4_src_ipv6.ipv4_layout.ipv4,
1703                                 ntohl(match.mask->src));
1704                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1705                                 src_ipv4_src_ipv6.ipv4_layout.ipv4,
1706                                 ntohl(match.key->src));
1707
1708                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1709                                 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1710                                 ntohl(match.mask->dst));
1711                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1712                                 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1713                                 ntohl(match.key->dst));
1714
1715                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
1716                                         ethertype);
1717                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1718                                 ETH_P_IP);
1719                } else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1720                        struct flow_match_ipv6_addrs match;
1721
1722                        flow_rule_match_enc_ipv6_addrs(rule, &match);
1723                        memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1724                                            src_ipv4_src_ipv6.ipv6_layout.ipv6),
1725                               &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
1726                                                                   ipv6));
1727                        memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1728                                            src_ipv4_src_ipv6.ipv6_layout.ipv6),
1729                               &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout,
1730                                                                  ipv6));
1731
1732                        memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1733                                            dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1734                               &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
1735                                                                   ipv6));
1736                        memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1737                                            dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1738                               &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
1739                                                                  ipv6));
1740
1741                        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
1742                                         ethertype);
1743                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1744                                 ETH_P_IPV6);
1745                }
1746        }
1747
1748        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
1749                struct flow_match_ip match;
1750
1751                flow_rule_match_enc_ip(rule, &match);
1752                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
1753                         match.mask->tos & 0x3);
1754                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
1755                         match.key->tos & 0x3);
1756
1757                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
1758                         match.mask->tos >> 2);
1759                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
1760                         match.key->tos  >> 2);
1761
1762                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
1763                         match.mask->ttl);
1764                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
1765                         match.key->ttl);
1766
1767                if (match.mask->ttl &&
1768                    !MLX5_CAP_ESW_FLOWTABLE_FDB
1769                        (priv->mdev,
1770                         ft_field_support.outer_ipv4_ttl)) {
1771                        NL_SET_ERR_MSG_MOD(extack,
1772                                           "Matching on TTL is not supported");
1773                        return -EOPNOTSUPP;
1774                }
1775
1776        }
1777
1778        /* Enforce DMAC when offloading incoming tunneled flows.
1779         * Flow counters require a match on the DMAC.
1780         */
1781        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
1782        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
1783        ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1784                                     dmac_47_16), priv->netdev->dev_addr);
1785
1786        /* let software handle IP fragments */
1787        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1788        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
1789
1790        return 0;
1791}
1792
1793static void *get_match_headers_criteria(u32 flags,
1794                                        struct mlx5_flow_spec *spec)
1795{
1796        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1797                MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1798                             inner_headers) :
1799                MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1800                             outer_headers);
1801}
1802
1803static void *get_match_headers_value(u32 flags,
1804                                     struct mlx5_flow_spec *spec)
1805{
1806        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1807                MLX5_ADDR_OF(fte_match_param, spec->match_value,
1808                             inner_headers) :
1809                MLX5_ADDR_OF(fte_match_param, spec->match_value,
1810                             outer_headers);
1811}
1812
1813static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
1814                                   struct flow_cls_offload *f)
1815{
1816        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1817        struct netlink_ext_ack *extack = f->common.extack;
1818        struct net_device *ingress_dev;
1819        struct flow_match_meta match;
1820
1821        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
1822                return 0;
1823
1824        flow_rule_match_meta(rule, &match);
1825        if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
1826                NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
1827                return -EINVAL;
1828        }
1829
1830        ingress_dev = __dev_get_by_index(dev_net(filter_dev),
1831                                         match.key->ingress_ifindex);
1832        if (!ingress_dev) {
1833                NL_SET_ERR_MSG_MOD(extack,
1834                                   "Can't find the ingress port to match on");
1835                return -EINVAL;
1836        }
1837
1838        if (ingress_dev != filter_dev) {
1839                NL_SET_ERR_MSG_MOD(extack,
1840                                   "Can't match on the ingress filter port");
1841                return -EINVAL;
1842        }
1843
1844        return 0;
1845}
1846
1847static int __parse_cls_flower(struct mlx5e_priv *priv,
1848                              struct mlx5_flow_spec *spec,
1849                              struct flow_cls_offload *f,
1850                              struct net_device *filter_dev,
1851                              u8 *inner_match_level, u8 *outer_match_level)
1852{
1853        struct netlink_ext_ack *extack = f->common.extack;
1854        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1855                                       outer_headers);
1856        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1857                                       outer_headers);
1858        void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1859                                    misc_parameters);
1860        void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1861                                    misc_parameters);
1862        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1863        struct flow_dissector *dissector = rule->match.dissector;
1864        u16 addr_type = 0;
1865        u8 ip_proto = 0;
1866        u8 *match_level;
1867        int err;
1868
1869        match_level = outer_match_level;
1870
1871        if (dissector->used_keys &
1872            ~(BIT(FLOW_DISSECTOR_KEY_META) |
1873              BIT(FLOW_DISSECTOR_KEY_CONTROL) |
1874              BIT(FLOW_DISSECTOR_KEY_BASIC) |
1875              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
1876              BIT(FLOW_DISSECTOR_KEY_VLAN) |
1877              BIT(FLOW_DISSECTOR_KEY_CVLAN) |
1878              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
1879              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
1880              BIT(FLOW_DISSECTOR_KEY_PORTS) |
1881              BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
1882              BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
1883              BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
1884              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
1885              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
1886              BIT(FLOW_DISSECTOR_KEY_TCP) |
1887              BIT(FLOW_DISSECTOR_KEY_IP)  |
1888              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
1889              BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
1890                NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
1891                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
1892                            dissector->used_keys);
1893                return -EOPNOTSUPP;
1894        }
1895
1896        if (mlx5e_get_tc_tun(filter_dev)) {
1897                if (parse_tunnel_attr(priv, spec, f, filter_dev,
1898                                      outer_match_level))
1899                        return -EOPNOTSUPP;
1900
1901                /* At this point, header pointers should point to the inner
1902                 * headers, outer header were already set by parse_tunnel_attr
1903                 */
1904                match_level = inner_match_level;
1905                headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1906                                                       spec);
1907                headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1908                                                    spec);
1909        }
1910
1911        err = mlx5e_flower_parse_meta(filter_dev, f);
1912        if (err)
1913                return err;
1914
1915        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1916                struct flow_match_basic match;
1917
1918                flow_rule_match_basic(rule, &match);
1919                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1920                         ntohs(match.mask->n_proto));
1921                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1922                         ntohs(match.key->n_proto));
1923
1924                if (match.mask->n_proto)
1925                        *match_level = MLX5_MATCH_L2;
1926        }
1927        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
1928            is_vlan_dev(filter_dev)) {
1929                struct flow_dissector_key_vlan filter_dev_mask;
1930                struct flow_dissector_key_vlan filter_dev_key;
1931                struct flow_match_vlan match;
1932
1933                if (is_vlan_dev(filter_dev)) {
1934                        match.key = &filter_dev_key;
1935                        match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
1936                        match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
1937                        match.key->vlan_priority = 0;
1938                        match.mask = &filter_dev_mask;
1939                        memset(match.mask, 0xff, sizeof(*match.mask));
1940                        match.mask->vlan_priority = 0;
1941                } else {
1942                        flow_rule_match_vlan(rule, &match);
1943                }
1944                if (match.mask->vlan_id ||
1945                    match.mask->vlan_priority ||
1946                    match.mask->vlan_tpid) {
1947                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1948                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1949                                         svlan_tag, 1);
1950                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1951                                         svlan_tag, 1);
1952                        } else {
1953                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1954                                         cvlan_tag, 1);
1955                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1956                                         cvlan_tag, 1);
1957                        }
1958
1959                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
1960                                 match.mask->vlan_id);
1961                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
1962                                 match.key->vlan_id);
1963
1964                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
1965                                 match.mask->vlan_priority);
1966                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
1967                                 match.key->vlan_priority);
1968
1969                        *match_level = MLX5_MATCH_L2;
1970                }
1971        } else if (*match_level != MLX5_MATCH_NONE) {
1972                /* cvlan_tag enabled in match criteria and
1973                 * disabled in match value means both S & C tags
1974                 * don't exist (untagged of both)
1975                 */
1976                MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1977                *match_level = MLX5_MATCH_L2;
1978        }
1979
1980        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
1981                struct flow_match_vlan match;
1982
1983                flow_rule_match_cvlan(rule, &match);
1984                if (match.mask->vlan_id ||
1985                    match.mask->vlan_priority ||
1986                    match.mask->vlan_tpid) {
1987                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1988                                MLX5_SET(fte_match_set_misc, misc_c,
1989                                         outer_second_svlan_tag, 1);
1990                                MLX5_SET(fte_match_set_misc, misc_v,
1991                                         outer_second_svlan_tag, 1);
1992                        } else {
1993                                MLX5_SET(fte_match_set_misc, misc_c,
1994                                         outer_second_cvlan_tag, 1);
1995                                MLX5_SET(fte_match_set_misc, misc_v,
1996                                         outer_second_cvlan_tag, 1);
1997                        }
1998
1999                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2000                                 match.mask->vlan_id);
2001                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2002                                 match.key->vlan_id);
2003                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2004                                 match.mask->vlan_priority);
2005                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2006                                 match.key->vlan_priority);
2007
2008                        *match_level = MLX5_MATCH_L2;
2009                }
2010        }
2011
2012        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2013                struct flow_match_eth_addrs match;
2014
2015                flow_rule_match_eth_addrs(rule, &match);
2016                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2017                                             dmac_47_16),
2018                                match.mask->dst);
2019                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2020                                             dmac_47_16),
2021                                match.key->dst);
2022
2023                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2024                                             smac_47_16),
2025                                match.mask->src);
2026                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2027                                             smac_47_16),
2028                                match.key->src);
2029
2030                if (!is_zero_ether_addr(match.mask->src) ||
2031                    !is_zero_ether_addr(match.mask->dst))
2032                        *match_level = MLX5_MATCH_L2;
2033        }
2034
2035        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2036                struct flow_match_control match;
2037
2038                flow_rule_match_control(rule, &match);
2039                addr_type = match.key->addr_type;
2040
2041                /* the HW doesn't support frag first/later */
2042                if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2043                        return -EOPNOTSUPP;
2044
2045                if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2046                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2047                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2048                                 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2049
2050                        /* the HW doesn't need L3 inline to match on frag=no */
2051                        if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2052                                *match_level = MLX5_MATCH_L2;
2053        /* ***  L2 attributes parsing up to here *** */
2054                        else
2055                                *match_level = MLX5_MATCH_L3;
2056                }
2057        }
2058
2059        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2060                struct flow_match_basic match;
2061
2062                flow_rule_match_basic(rule, &match);
2063                ip_proto = match.key->ip_proto;
2064
2065                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2066                         match.mask->ip_proto);
2067                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2068                         match.key->ip_proto);
2069
2070                if (match.mask->ip_proto)
2071                        *match_level = MLX5_MATCH_L3;
2072        }
2073
2074        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2075                struct flow_match_ipv4_addrs match;
2076
2077                flow_rule_match_ipv4_addrs(rule, &match);
2078                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2079                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2080                       &match.mask->src, sizeof(match.mask->src));
2081                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2082                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2083                       &match.key->src, sizeof(match.key->src));
2084                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2085                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2086                       &match.mask->dst, sizeof(match.mask->dst));
2087                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2088                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2089                       &match.key->dst, sizeof(match.key->dst));
2090
2091                if (match.mask->src || match.mask->dst)
2092                        *match_level = MLX5_MATCH_L3;
2093        }
2094
2095        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2096                struct flow_match_ipv6_addrs match;
2097
2098                flow_rule_match_ipv6_addrs(rule, &match);
2099                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2100                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2101                       &match.mask->src, sizeof(match.mask->src));
2102                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2103                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2104                       &match.key->src, sizeof(match.key->src));
2105
2106                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2107                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2108                       &match.mask->dst, sizeof(match.mask->dst));
2109                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2110                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2111                       &match.key->dst, sizeof(match.key->dst));
2112
2113                if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2114                    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2115                        *match_level = MLX5_MATCH_L3;
2116        }
2117
2118        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2119                struct flow_match_ip match;
2120
2121                flow_rule_match_ip(rule, &match);
2122                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2123                         match.mask->tos & 0x3);
2124                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2125                         match.key->tos & 0x3);
2126
2127                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2128                         match.mask->tos >> 2);
2129                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2130                         match.key->tos  >> 2);
2131
2132                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2133                         match.mask->ttl);
2134                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2135                         match.key->ttl);
2136
2137                if (match.mask->ttl &&
2138                    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2139                                                ft_field_support.outer_ipv4_ttl)) {
2140                        NL_SET_ERR_MSG_MOD(extack,
2141                                           "Matching on TTL is not supported");
2142                        return -EOPNOTSUPP;
2143                }
2144
2145                if (match.mask->tos || match.mask->ttl)
2146                        *match_level = MLX5_MATCH_L3;
2147        }
2148
2149        /* ***  L3 attributes parsing up to here *** */
2150
2151        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2152                struct flow_match_ports match;
2153
2154                flow_rule_match_ports(rule, &match);
2155                switch (ip_proto) {
2156                case IPPROTO_TCP:
2157                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2158                                 tcp_sport, ntohs(match.mask->src));
2159                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2160                                 tcp_sport, ntohs(match.key->src));
2161
2162                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2163                                 tcp_dport, ntohs(match.mask->dst));
2164                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2165                                 tcp_dport, ntohs(match.key->dst));
2166                        break;
2167
2168                case IPPROTO_UDP:
2169                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2170                                 udp_sport, ntohs(match.mask->src));
2171                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2172                                 udp_sport, ntohs(match.key->src));
2173
2174                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2175                                 udp_dport, ntohs(match.mask->dst));
2176                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2177                                 udp_dport, ntohs(match.key->dst));
2178                        break;
2179                default:
2180                        NL_SET_ERR_MSG_MOD(extack,
2181                                           "Only UDP and TCP transports are supported for L4 matching");
2182                        netdev_err(priv->netdev,
2183                                   "Only UDP and TCP transport are supported\n");
2184                        return -EINVAL;
2185                }
2186
2187                if (match.mask->src || match.mask->dst)
2188                        *match_level = MLX5_MATCH_L4;
2189        }
2190
2191        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2192                struct flow_match_tcp match;
2193
2194                flow_rule_match_tcp(rule, &match);
2195                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2196                         ntohs(match.mask->flags));
2197                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2198                         ntohs(match.key->flags));
2199
2200                if (match.mask->flags)
2201                        *match_level = MLX5_MATCH_L4;
2202        }
2203
2204        return 0;
2205}
2206
2207static int parse_cls_flower(struct mlx5e_priv *priv,
2208                            struct mlx5e_tc_flow *flow,
2209                            struct mlx5_flow_spec *spec,
2210                            struct flow_cls_offload *f,
2211                            struct net_device *filter_dev)
2212{
2213        u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2214        struct netlink_ext_ack *extack = f->common.extack;
2215        struct mlx5_core_dev *dev = priv->mdev;
2216        struct mlx5_eswitch *esw = dev->priv.eswitch;
2217        struct mlx5e_rep_priv *rpriv = priv->ppriv;
2218        struct mlx5_eswitch_rep *rep;
2219        bool is_eswitch_flow;
2220        int err;
2221
2222        inner_match_level = MLX5_MATCH_NONE;
2223        outer_match_level = MLX5_MATCH_NONE;
2224
2225        err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
2226                                 &outer_match_level);
2227        non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2228                                 outer_match_level : inner_match_level;
2229
2230        is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2231        if (!err && is_eswitch_flow) {
2232                rep = rpriv->rep;
2233                if (rep->vport != MLX5_VPORT_UPLINK &&
2234                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2235                    esw->offloads.inline_mode < non_tunnel_match_level)) {
2236                        NL_SET_ERR_MSG_MOD(extack,
2237                                           "Flow is not offloaded due to min inline setting");
2238                        netdev_warn(priv->netdev,
2239                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2240                                    non_tunnel_match_level, esw->offloads.inline_mode);
2241                        return -EOPNOTSUPP;
2242                }
2243        }
2244
2245        if (is_eswitch_flow) {
2246                flow->esw_attr->inner_match_level = inner_match_level;
2247                flow->esw_attr->outer_match_level = outer_match_level;
2248        } else {
2249                flow->nic_attr->match_level = non_tunnel_match_level;
2250        }
2251
2252        return err;
2253}
2254
2255struct pedit_headers {
2256        struct ethhdr  eth;
2257        struct vlan_hdr vlan;
2258        struct iphdr   ip4;
2259        struct ipv6hdr ip6;
2260        struct tcphdr  tcp;
2261        struct udphdr  udp;
2262};
2263
2264struct pedit_headers_action {
2265        struct pedit_headers    vals;
2266        struct pedit_headers    masks;
2267        u32                     pedits;
2268};
2269
2270static int pedit_header_offsets[] = {
2271        [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2272        [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2273        [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2274        [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2275        [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2276};
2277
2278#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2279
2280static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2281                         struct pedit_headers_action *hdrs)
2282{
2283        u32 *curr_pmask, *curr_pval;
2284
2285        curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2286        curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2287
2288        if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2289                goto out_err;
2290
2291        *curr_pmask |= mask;
2292        *curr_pval  |= (val & mask);
2293
2294        return 0;
2295
2296out_err:
2297        return -EOPNOTSUPP;
2298}
2299
2300struct mlx5_fields {
2301        u8  field;
2302        u8  field_bsize;
2303        u32 field_mask;
2304        u32 offset;
2305        u32 match_offset;
2306};
2307
2308#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2309                {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2310                 offsetof(struct pedit_headers, field) + (off), \
2311                 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2312
2313/* masked values are the same and there are no rewrites that do not have a
2314 * match.
2315 */
2316#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2317        type matchmaskx = *(type *)(matchmaskp); \
2318        type matchvalx = *(type *)(matchvalp); \
2319        type maskx = *(type *)(maskp); \
2320        type valx = *(type *)(valp); \
2321        \
2322        (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2323                                                                 matchmaskx)); \
2324})
2325
2326static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2327                         void *matchmaskp, u8 bsize)
2328{
2329        bool same = false;
2330
2331        switch (bsize) {
2332        case 8:
2333                same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2334                break;
2335        case 16:
2336                same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2337                break;
2338        case 32:
2339                same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2340                break;
2341        }
2342
2343        return same;
2344}
2345
2346static struct mlx5_fields fields[] = {
2347        OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2348        OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2349        OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2350        OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2351        OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2352        OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2353
2354        OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2355        OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2356        OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2357        OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2358
2359        OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2360                src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2361        OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2362                src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2363        OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2364                src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2365        OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2366                src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2367        OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2368                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2369        OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2370                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2371        OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2372                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2373        OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2374                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2375        OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2376
2377        OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2378        OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2379        /* in linux iphdr tcp_flags is 8 bits long */
2380        OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2381
2382        OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2383        OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2384};
2385
2386/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
2387 * max from the SW pedit action. On success, attr->num_mod_hdr_actions
2388 * says how many HW actions were actually parsed.
2389 */
2390static int offload_pedit_fields(struct pedit_headers_action *hdrs,
2391                                struct mlx5e_tc_flow_parse_attr *parse_attr,
2392                                u32 *action_flags,
2393                                struct netlink_ext_ack *extack)
2394{
2395        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2396        int i, action_size, nactions, max_actions, first, last, next_z;
2397        void *headers_c, *headers_v, *action, *vals_p;
2398        u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2399        struct mlx5_fields *f;
2400        unsigned long mask;
2401        __be32 mask_be32;
2402        __be16 mask_be16;
2403        u8 cmd;
2404
2405        headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2406        headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2407
2408        set_masks = &hdrs[0].masks;
2409        add_masks = &hdrs[1].masks;
2410        set_vals = &hdrs[0].vals;
2411        add_vals = &hdrs[1].vals;
2412
2413        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2414        action = parse_attr->mod_hdr_actions +
2415                 parse_attr->num_mod_hdr_actions * action_size;
2416
2417        max_actions = parse_attr->max_mod_hdr_actions;
2418        nactions = parse_attr->num_mod_hdr_actions;
2419
2420        for (i = 0; i < ARRAY_SIZE(fields); i++) {
2421                bool skip;
2422
2423                f = &fields[i];
2424                /* avoid seeing bits set from previous iterations */
2425                s_mask = 0;
2426                a_mask = 0;
2427
2428                s_masks_p = (void *)set_masks + f->offset;
2429                a_masks_p = (void *)add_masks + f->offset;
2430
2431                s_mask = *s_masks_p & f->field_mask;
2432                a_mask = *a_masks_p & f->field_mask;
2433
2434                if (!s_mask && !a_mask) /* nothing to offload here */
2435                        continue;
2436
2437                if (s_mask && a_mask) {
2438                        NL_SET_ERR_MSG_MOD(extack,
2439                                           "can't set and add to the same HW field");
2440                        printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2441                        return -EOPNOTSUPP;
2442                }
2443
2444                if (nactions == max_actions) {
2445                        NL_SET_ERR_MSG_MOD(extack,
2446                                           "too many pedit actions, can't offload");
2447                        printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
2448                        return -EOPNOTSUPP;
2449                }
2450
2451                skip = false;
2452                if (s_mask) {
2453                        void *match_mask = headers_c + f->match_offset;
2454                        void *match_val = headers_v + f->match_offset;
2455
2456                        cmd  = MLX5_ACTION_TYPE_SET;
2457                        mask = s_mask;
2458                        vals_p = (void *)set_vals + f->offset;
2459                        /* don't rewrite if we have a match on the same value */
2460                        if (cmp_val_mask(vals_p, s_masks_p, match_val,
2461                                         match_mask, f->field_bsize))
2462                                skip = true;
2463                        /* clear to denote we consumed this field */
2464                        *s_masks_p &= ~f->field_mask;
2465                } else {
2466                        cmd  = MLX5_ACTION_TYPE_ADD;
2467                        mask = a_mask;
2468                        vals_p = (void *)add_vals + f->offset;
2469                        /* add 0 is no change */
2470                        if ((*(u32 *)vals_p & f->field_mask) == 0)
2471                                skip = true;
2472                        /* clear to denote we consumed this field */
2473                        *a_masks_p &= ~f->field_mask;
2474                }
2475                if (skip)
2476                        continue;
2477
2478                if (f->field_bsize == 32) {
2479                        mask_be32 = (__be32)mask;
2480                        mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2481                } else if (f->field_bsize == 16) {
2482                        mask_be32 = (__be32)mask;
2483                        mask_be16 = *(__be16 *)&mask_be32;
2484                        mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2485                }
2486
2487                first = find_first_bit(&mask, f->field_bsize);
2488                next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2489                last  = find_last_bit(&mask, f->field_bsize);
2490                if (first < next_z && next_z < last) {
2491                        NL_SET_ERR_MSG_MOD(extack,
2492                                           "rewrite of few sub-fields isn't supported");
2493                        printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2494                               mask);
2495                        return -EOPNOTSUPP;
2496                }
2497
2498                MLX5_SET(set_action_in, action, action_type, cmd);
2499                MLX5_SET(set_action_in, action, field, f->field);
2500
2501                if (cmd == MLX5_ACTION_TYPE_SET) {
2502                        int start;
2503
2504                        /* if field is bit sized it can start not from first bit */
2505                        start = find_first_bit((unsigned long *)&f->field_mask,
2506                                               f->field_bsize);
2507
2508                        MLX5_SET(set_action_in, action, offset, first - start);
2509                        /* length is num of bits to be written, zero means length of 32 */
2510                        MLX5_SET(set_action_in, action, length, (last - first + 1));
2511                }
2512
2513                if (f->field_bsize == 32)
2514                        MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2515                else if (f->field_bsize == 16)
2516                        MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2517                else if (f->field_bsize == 8)
2518                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2519
2520                action += action_size;
2521                nactions++;
2522        }
2523
2524        parse_attr->num_mod_hdr_actions = nactions;
2525        return 0;
2526}
2527
2528static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2529                                                  int namespace)
2530{
2531        if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2532                return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2533        else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2534                return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2535}
2536
2537static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
2538                                 struct pedit_headers_action *hdrs,
2539                                 int namespace,
2540                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
2541{
2542        int nkeys, action_size, max_actions;
2543
2544        nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
2545                hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
2546        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2547
2548        max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
2549        /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
2550        max_actions = min(max_actions, nkeys * 16);
2551
2552        parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
2553        if (!parse_attr->mod_hdr_actions)
2554                return -ENOMEM;
2555
2556        parse_attr->max_mod_hdr_actions = max_actions;
2557        return 0;
2558}
2559
2560static const struct pedit_headers zero_masks = {};
2561
2562static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2563                                 const struct flow_action_entry *act, int namespace,
2564                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2565                                 struct pedit_headers_action *hdrs,
2566                                 struct netlink_ext_ack *extack)
2567{
2568        u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2569        int err = -EOPNOTSUPP;
2570        u32 mask, val, offset;
2571        u8 htype;
2572
2573        htype = act->mangle.htype;
2574        err = -EOPNOTSUPP; /* can't be all optimistic */
2575
2576        if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2577                NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2578                goto out_err;
2579        }
2580
2581        if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2582                NL_SET_ERR_MSG_MOD(extack,
2583                                   "The pedit offload action is not supported");
2584                goto out_err;
2585        }
2586
2587        mask = act->mangle.mask;
2588        val = act->mangle.val;
2589        offset = act->mangle.offset;
2590
2591        err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2592        if (err)
2593                goto out_err;
2594
2595        hdrs[cmd].pedits++;
2596
2597        return 0;
2598out_err:
2599        return err;
2600}
2601
2602static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
2603                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2604                                 struct pedit_headers_action *hdrs,
2605                                 u32 *action_flags,
2606                                 struct netlink_ext_ack *extack)
2607{
2608        struct pedit_headers *cmd_masks;
2609        int err;
2610        u8 cmd;
2611
2612        if (!parse_attr->mod_hdr_actions) {
2613                err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
2614                if (err)
2615                        goto out_err;
2616        }
2617
2618        err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
2619        if (err < 0)
2620                goto out_dealloc_parsed_actions;
2621
2622        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
2623                cmd_masks = &hdrs[cmd].masks;
2624                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
2625                        NL_SET_ERR_MSG_MOD(extack,
2626                                           "attempt to offload an unsupported field");
2627                        netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
2628                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
2629                                       16, 1, cmd_masks, sizeof(zero_masks), true);
2630                        err = -EOPNOTSUPP;
2631                        goto out_dealloc_parsed_actions;
2632                }
2633        }
2634
2635        return 0;
2636
2637out_dealloc_parsed_actions:
2638        kfree(parse_attr->mod_hdr_actions);
2639out_err:
2640        return err;
2641}
2642
2643static bool csum_offload_supported(struct mlx5e_priv *priv,
2644                                   u32 action,
2645                                   u32 update_flags,
2646                                   struct netlink_ext_ack *extack)
2647{
2648        u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
2649                         TCA_CSUM_UPDATE_FLAG_UDP;
2650
2651        /*  The HW recalcs checksums only if re-writing headers */
2652        if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
2653                NL_SET_ERR_MSG_MOD(extack,
2654                                   "TC csum action is only offloaded with pedit");
2655                netdev_warn(priv->netdev,
2656                            "TC csum action is only offloaded with pedit\n");
2657                return false;
2658        }
2659
2660        if (update_flags & ~prot_flags) {
2661                NL_SET_ERR_MSG_MOD(extack,
2662                                   "can't offload TC csum action for some header/s");
2663                netdev_warn(priv->netdev,
2664                            "can't offload TC csum action for some header/s - flags %#x\n",
2665                            update_flags);
2666                return false;
2667        }
2668
2669        return true;
2670}
2671
2672struct ip_ttl_word {
2673        __u8    ttl;
2674        __u8    protocol;
2675        __sum16 check;
2676};
2677
2678struct ipv6_hoplimit_word {
2679        __be16  payload_len;
2680        __u8    nexthdr;
2681        __u8    hop_limit;
2682};
2683
2684static bool is_action_keys_supported(const struct flow_action_entry *act)
2685{
2686        u32 mask, offset;
2687        u8 htype;
2688
2689        htype = act->mangle.htype;
2690        offset = act->mangle.offset;
2691        mask = ~act->mangle.mask;
2692        /* For IPv4 & IPv6 header check 4 byte word,
2693         * to determine that modified fields
2694         * are NOT ttl & hop_limit only.
2695         */
2696        if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
2697                struct ip_ttl_word *ttl_word =
2698                        (struct ip_ttl_word *)&mask;
2699
2700                if (offset != offsetof(struct iphdr, ttl) ||
2701                    ttl_word->protocol ||
2702                    ttl_word->check) {
2703                        return true;
2704                }
2705        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
2706                struct ipv6_hoplimit_word *hoplimit_word =
2707                        (struct ipv6_hoplimit_word *)&mask;
2708
2709                if (offset != offsetof(struct ipv6hdr, payload_len) ||
2710                    hoplimit_word->payload_len ||
2711                    hoplimit_word->nexthdr) {
2712                        return true;
2713                }
2714        }
2715        return false;
2716}
2717
2718static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
2719                                          struct flow_action *flow_action,
2720                                          u32 actions,
2721                                          struct netlink_ext_ack *extack)
2722{
2723        const struct flow_action_entry *act;
2724        bool modify_ip_header;
2725        void *headers_v;
2726        u16 ethertype;
2727        u8 ip_proto;
2728        int i;
2729
2730        headers_v = get_match_headers_value(actions, spec);
2731        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2732
2733        /* for non-IP we only re-write MACs, so we're okay */
2734        if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
2735                goto out_ok;
2736
2737        modify_ip_header = false;
2738        flow_action_for_each(i, act, flow_action) {
2739                if (act->id != FLOW_ACTION_MANGLE &&
2740                    act->id != FLOW_ACTION_ADD)
2741                        continue;
2742
2743                if (is_action_keys_supported(act)) {
2744                        modify_ip_header = true;
2745                        break;
2746                }
2747        }
2748
2749        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
2750        if (modify_ip_header && ip_proto != IPPROTO_TCP &&
2751            ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2752                NL_SET_ERR_MSG_MOD(extack,
2753                                   "can't offload re-write of non TCP/UDP");
2754                pr_info("can't offload re-write of ip proto %d\n", ip_proto);
2755                return false;
2756        }
2757
2758out_ok:
2759        return true;
2760}
2761
2762static bool actions_match_supported(struct mlx5e_priv *priv,
2763                                    struct flow_action *flow_action,
2764                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
2765                                    struct mlx5e_tc_flow *flow,
2766                                    struct netlink_ext_ack *extack)
2767{
2768        u32 actions;
2769
2770        if (mlx5e_is_eswitch_flow(flow))
2771                actions = flow->esw_attr->action;
2772        else
2773                actions = flow->nic_attr->action;
2774
2775        if (flow_flag_test(flow, EGRESS) &&
2776            !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
2777              (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
2778              (actions & MLX5_FLOW_CONTEXT_ACTION_DROP)))
2779                return false;
2780
2781        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2782                return modify_header_match_supported(&parse_attr->spec,
2783                                                     flow_action, actions,
2784                                                     extack);
2785
2786        return true;
2787}
2788
2789static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
2790{
2791        struct mlx5_core_dev *fmdev, *pmdev;
2792        u64 fsystem_guid, psystem_guid;
2793
2794        fmdev = priv->mdev;
2795        pmdev = peer_priv->mdev;
2796
2797        fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
2798        psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
2799
2800        return (fsystem_guid == psystem_guid);
2801}
2802
2803static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
2804                                   const struct flow_action_entry *act,
2805                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
2806                                   struct pedit_headers_action *hdrs,
2807                                   u32 *action, struct netlink_ext_ack *extack)
2808{
2809        u16 mask16 = VLAN_VID_MASK;
2810        u16 val16 = act->vlan.vid & VLAN_VID_MASK;
2811        const struct flow_action_entry pedit_act = {
2812                .id = FLOW_ACTION_MANGLE,
2813                .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
2814                .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
2815                .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
2816                .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
2817        };
2818        u8 match_prio_mask, match_prio_val;
2819        void *headers_c, *headers_v;
2820        int err;
2821
2822        headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
2823        headers_v = get_match_headers_value(*action, &parse_attr->spec);
2824
2825        if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
2826              MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
2827                NL_SET_ERR_MSG_MOD(extack,
2828                                   "VLAN rewrite action must have VLAN protocol match");
2829                return -EOPNOTSUPP;
2830        }
2831
2832        match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
2833        match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
2834        if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
2835                NL_SET_ERR_MSG_MOD(extack,
2836                                   "Changing VLAN prio is not supported");
2837                return -EOPNOTSUPP;
2838        }
2839
2840        err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr,
2841                                    hdrs, NULL);
2842        *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2843
2844        return err;
2845}
2846
2847static int
2848add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
2849                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2850                                 struct pedit_headers_action *hdrs,
2851                                 u32 *action, struct netlink_ext_ack *extack)
2852{
2853        const struct flow_action_entry prio_tag_act = {
2854                .vlan.vid = 0,
2855                .vlan.prio =
2856                        MLX5_GET(fte_match_set_lyr_2_4,
2857                                 get_match_headers_value(*action,
2858                                                         &parse_attr->spec),
2859                                 first_prio) &
2860                        MLX5_GET(fte_match_set_lyr_2_4,
2861                                 get_match_headers_criteria(*action,
2862                                                            &parse_attr->spec),
2863                                 first_prio),
2864        };
2865
2866        return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
2867                                       &prio_tag_act, parse_attr, hdrs, action,
2868                                       extack);
2869}
2870
2871static int parse_tc_nic_actions(struct mlx5e_priv *priv,
2872                                struct flow_action *flow_action,
2873                                struct mlx5e_tc_flow_parse_attr *parse_attr,
2874                                struct mlx5e_tc_flow *flow,
2875                                struct netlink_ext_ack *extack)
2876{
2877        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
2878        struct pedit_headers_action hdrs[2] = {};
2879        const struct flow_action_entry *act;
2880        u32 action = 0;
2881        int err, i;
2882
2883        if (!flow_action_has_entries(flow_action))
2884                return -EINVAL;
2885
2886        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
2887
2888        flow_action_for_each(i, act, flow_action) {
2889                switch (act->id) {
2890                case FLOW_ACTION_ACCEPT:
2891                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2892                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
2893                        break;
2894                case FLOW_ACTION_DROP:
2895                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2896                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
2897                                               flow_table_properties_nic_receive.flow_counter))
2898                                action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2899                        break;
2900                case FLOW_ACTION_MANGLE:
2901                case FLOW_ACTION_ADD:
2902                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
2903                                                    parse_attr, hdrs, extack);
2904                        if (err)
2905                                return err;
2906
2907                        action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
2908                                  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2909                        break;
2910                case FLOW_ACTION_VLAN_MANGLE:
2911                        err = add_vlan_rewrite_action(priv,
2912                                                      MLX5_FLOW_NAMESPACE_KERNEL,
2913                                                      act, parse_attr, hdrs,
2914                                                      &action, extack);
2915                        if (err)
2916                                return err;
2917
2918                        break;
2919                case FLOW_ACTION_CSUM:
2920                        if (csum_offload_supported(priv, action,
2921                                                   act->csum_flags,
2922                                                   extack))
2923                                break;
2924
2925                        return -EOPNOTSUPP;
2926                case FLOW_ACTION_REDIRECT: {
2927                        struct net_device *peer_dev = act->dev;
2928
2929                        if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
2930                            same_hw_devs(priv, netdev_priv(peer_dev))) {
2931                                parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
2932                                flow_flag_set(flow, HAIRPIN);
2933                                action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2934                                          MLX5_FLOW_CONTEXT_ACTION_COUNT;
2935                        } else {
2936                                NL_SET_ERR_MSG_MOD(extack,
2937                                                   "device is not on same HW, can't offload");
2938                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
2939                                            peer_dev->name);
2940                                return -EINVAL;
2941                        }
2942                        }
2943                        break;
2944                case FLOW_ACTION_MARK: {
2945                        u32 mark = act->mark;
2946
2947                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
2948                                NL_SET_ERR_MSG_MOD(extack,
2949                                                   "Bad flow mark - only 16 bit is supported");
2950                                return -EINVAL;
2951                        }
2952
2953                        attr->flow_tag = mark;
2954                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2955                        }
2956                        break;
2957                default:
2958                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
2959                        return -EOPNOTSUPP;
2960                }
2961        }
2962
2963        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
2964            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
2965                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
2966                                            parse_attr, hdrs, &action, extack);
2967                if (err)
2968                        return err;
2969                /* in case all pedit actions are skipped, remove the MOD_HDR
2970                 * flag.
2971                 */
2972                if (parse_attr->num_mod_hdr_actions == 0) {
2973                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2974                        kfree(parse_attr->mod_hdr_actions);
2975                }
2976        }
2977
2978        attr->action = action;
2979        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
2980                return -EOPNOTSUPP;
2981
2982        return 0;
2983}
2984
2985struct encap_key {
2986        const struct ip_tunnel_key *ip_tun_key;
2987        struct mlx5e_tc_tunnel *tc_tunnel;
2988};
2989
2990static inline int cmp_encap_info(struct encap_key *a,
2991                                 struct encap_key *b)
2992{
2993        return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
2994               a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
2995}
2996
2997static inline int hash_encap_info(struct encap_key *key)
2998{
2999        return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3000                     key->tc_tunnel->tunnel_type);
3001}
3002
3003
3004static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
3005                                  struct net_device *peer_netdev)
3006{
3007        struct mlx5e_priv *peer_priv;
3008
3009        peer_priv = netdev_priv(peer_netdev);
3010
3011        return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3012                mlx5e_eswitch_rep(priv->netdev) &&
3013                mlx5e_eswitch_rep(peer_netdev) &&
3014                same_hw_devs(priv, peer_priv));
3015}
3016
3017
3018
3019bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3020{
3021        return refcount_inc_not_zero(&e->refcnt);
3022}
3023
3024static struct mlx5e_encap_entry *
3025mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3026                uintptr_t hash_key)
3027{
3028        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3029        struct mlx5e_encap_entry *e;
3030        struct encap_key e_key;
3031
3032        hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3033                                   encap_hlist, hash_key) {
3034                e_key.ip_tun_key = &e->tun_info->key;
3035                e_key.tc_tunnel = e->tunnel;
3036                if (!cmp_encap_info(&e_key, key) &&
3037                    mlx5e_encap_take(e))
3038                        return e;
3039        }
3040
3041        return NULL;
3042}
3043
3044static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3045{
3046        size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3047
3048        return kmemdup(tun_info, tun_size, GFP_KERNEL);
3049}
3050
3051static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3052                                      struct mlx5e_tc_flow *flow,
3053                                      int out_index,
3054                                      struct mlx5e_encap_entry *e,
3055                                      struct netlink_ext_ack *extack)
3056{
3057        int i;
3058
3059        for (i = 0; i < out_index; i++) {
3060                if (flow->encaps[i].e != e)
3061                        continue;
3062                NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3063                netdev_err(priv->netdev, "can't duplicate encap action\n");
3064                return true;
3065        }
3066
3067        return false;
3068}
3069
3070static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3071                              struct mlx5e_tc_flow *flow,
3072                              struct net_device *mirred_dev,
3073                              int out_index,
3074                              struct netlink_ext_ack *extack,
3075                              struct net_device **encap_dev,
3076                              bool *encap_valid)
3077{
3078        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3079        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3080        struct mlx5e_tc_flow_parse_attr *parse_attr;
3081        const struct ip_tunnel_info *tun_info;
3082        struct encap_key key;
3083        struct mlx5e_encap_entry *e;
3084        unsigned short family;
3085        uintptr_t hash_key;
3086        int err = 0;
3087
3088        parse_attr = attr->parse_attr;
3089        tun_info = parse_attr->tun_info[out_index];
3090        family = ip_tunnel_info_af(tun_info);
3091        key.ip_tun_key = &tun_info->key;
3092        key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3093        if (!key.tc_tunnel) {
3094                NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3095                return -EOPNOTSUPP;
3096        }
3097
3098        hash_key = hash_encap_info(&key);
3099
3100        mutex_lock(&esw->offloads.encap_tbl_lock);
3101        e = mlx5e_encap_get(priv, &key, hash_key);
3102
3103        /* must verify if encap is valid or not */
3104        if (e) {
3105                /* Check that entry was not already attached to this flow */
3106                if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3107                        err = -EOPNOTSUPP;
3108                        goto out_err;
3109                }
3110
3111                mutex_unlock(&esw->offloads.encap_tbl_lock);
3112                wait_for_completion(&e->res_ready);
3113
3114                /* Protect against concurrent neigh update. */
3115                mutex_lock(&esw->offloads.encap_tbl_lock);
3116                if (e->compl_result < 0) {
3117                        err = -EREMOTEIO;
3118                        goto out_err;
3119                }
3120                goto attach_flow;
3121        }
3122
3123        e = kzalloc(sizeof(*e), GFP_KERNEL);
3124        if (!e) {
3125                err = -ENOMEM;
3126                goto out_err;
3127        }
3128
3129        refcount_set(&e->refcnt, 1);
3130        init_completion(&e->res_ready);
3131
3132        tun_info = dup_tun_info(tun_info);
3133        if (!tun_info) {
3134                err = -ENOMEM;
3135                goto out_err_init;
3136        }
3137        e->tun_info = tun_info;
3138        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3139        if (err)
3140                goto out_err_init;
3141
3142        INIT_LIST_HEAD(&e->flows);
3143        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3144        mutex_unlock(&esw->offloads.encap_tbl_lock);
3145
3146        if (family == AF_INET)
3147                err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3148        else if (family == AF_INET6)
3149                err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3150
3151        /* Protect against concurrent neigh update. */
3152        mutex_lock(&esw->offloads.encap_tbl_lock);
3153        complete_all(&e->res_ready);
3154        if (err) {
3155                e->compl_result = err;
3156                goto out_err;
3157        }
3158        e->compl_result = 1;
3159
3160attach_flow:
3161        flow->encaps[out_index].e = e;
3162        list_add(&flow->encaps[out_index].list, &e->flows);
3163        flow->encaps[out_index].index = out_index;
3164        *encap_dev = e->out_dev;
3165        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3166                attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3167                attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3168                *encap_valid = true;
3169        } else {
3170                *encap_valid = false;
3171        }
3172        mutex_unlock(&esw->offloads.encap_tbl_lock);
3173
3174        return err;
3175
3176out_err:
3177        mutex_unlock(&esw->offloads.encap_tbl_lock);
3178        if (e)
3179                mlx5e_encap_put(priv, e);
3180        return err;
3181
3182out_err_init:
3183        mutex_unlock(&esw->offloads.encap_tbl_lock);
3184        kfree(tun_info);
3185        kfree(e);
3186        return err;
3187}
3188
3189static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3190                                const struct flow_action_entry *act,
3191                                struct mlx5_esw_flow_attr *attr,
3192                                u32 *action)
3193{
3194        u8 vlan_idx = attr->total_vlan;
3195
3196        if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3197                return -EOPNOTSUPP;
3198
3199        switch (act->id) {
3200        case FLOW_ACTION_VLAN_POP:
3201                if (vlan_idx) {
3202                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3203                                                                 MLX5_FS_VLAN_DEPTH))
3204                                return -EOPNOTSUPP;
3205
3206                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3207                } else {
3208                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3209                }
3210                break;
3211        case FLOW_ACTION_VLAN_PUSH:
3212                attr->vlan_vid[vlan_idx] = act->vlan.vid;
3213                attr->vlan_prio[vlan_idx] = act->vlan.prio;
3214                attr->vlan_proto[vlan_idx] = act->vlan.proto;
3215                if (!attr->vlan_proto[vlan_idx])
3216                        attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3217
3218                if (vlan_idx) {
3219                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3220                                                                 MLX5_FS_VLAN_DEPTH))
3221                                return -EOPNOTSUPP;
3222
3223                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3224                } else {
3225                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3226                            (act->vlan.proto != htons(ETH_P_8021Q) ||
3227                             act->vlan.prio))
3228                                return -EOPNOTSUPP;
3229
3230                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3231                }
3232                break;
3233        default:
3234                return -EINVAL;
3235        }
3236
3237        attr->total_vlan = vlan_idx + 1;
3238
3239        return 0;
3240}
3241
3242static int add_vlan_push_action(struct mlx5e_priv *priv,
3243                                struct mlx5_esw_flow_attr *attr,
3244                                struct net_device **out_dev,
3245                                u32 *action)
3246{
3247        struct net_device *vlan_dev = *out_dev;
3248        struct flow_action_entry vlan_act = {
3249                .id = FLOW_ACTION_VLAN_PUSH,
3250                .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3251                .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3252                .vlan.prio = 0,
3253        };
3254        int err;
3255
3256        err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3257        if (err)
3258                return err;
3259
3260        *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3261                                        dev_get_iflink(vlan_dev));
3262        if (is_vlan_dev(*out_dev))
3263                err = add_vlan_push_action(priv, attr, out_dev, action);
3264
3265        return err;
3266}
3267
3268static int add_vlan_pop_action(struct mlx5e_priv *priv,
3269                               struct mlx5_esw_flow_attr *attr,
3270                               u32 *action)
3271{
3272        int nest_level = attr->parse_attr->filter_dev->lower_level;
3273        struct flow_action_entry vlan_act = {
3274                .id = FLOW_ACTION_VLAN_POP,
3275        };
3276        int err = 0;
3277
3278        while (nest_level--) {
3279                err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
3280                if (err)
3281                        return err;
3282        }
3283
3284        return err;
3285}
3286
3287bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3288                                    struct net_device *out_dev)
3289{
3290        if (is_merged_eswitch_dev(priv, out_dev))
3291                return true;
3292
3293        return mlx5e_eswitch_rep(out_dev) &&
3294               same_hw_devs(priv, netdev_priv(out_dev));
3295}
3296
3297static bool is_duplicated_output_device(struct net_device *dev,
3298                                        struct net_device *out_dev,
3299                                        int *ifindexes, int if_count,
3300                                        struct netlink_ext_ack *extack)
3301{
3302        int i;
3303
3304        for (i = 0; i < if_count; i++) {
3305                if (ifindexes[i] == out_dev->ifindex) {
3306                        NL_SET_ERR_MSG_MOD(extack,
3307                                           "can't duplicate output to same device");
3308                        netdev_err(dev, "can't duplicate output to same device: %s\n",
3309                                   out_dev->name);
3310                        return true;
3311                }
3312        }
3313
3314        return false;
3315}
3316
3317static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
3318                                struct flow_action *flow_action,
3319                                struct mlx5e_tc_flow *flow,
3320                                struct netlink_ext_ack *extack)
3321{
3322        struct pedit_headers_action hdrs[2] = {};
3323        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3324        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3325        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3326        struct mlx5e_rep_priv *rpriv = priv->ppriv;
3327        const struct ip_tunnel_info *info = NULL;
3328        int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
3329        bool ft_flow = mlx5e_is_ft_flow(flow);
3330        const struct flow_action_entry *act;
3331        int err, i, if_count = 0;
3332        bool encap = false;
3333        u32 action = 0;
3334
3335        if (!flow_action_has_entries(flow_action))
3336                return -EINVAL;
3337
3338        flow_action_for_each(i, act, flow_action) {
3339                switch (act->id) {
3340                case FLOW_ACTION_DROP:
3341                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
3342                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3343                        break;
3344                case FLOW_ACTION_MANGLE:
3345                case FLOW_ACTION_ADD:
3346                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
3347                                                    parse_attr, hdrs, extack);
3348                        if (err)
3349                                return err;
3350
3351                        action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3352                        attr->split_count = attr->out_count;
3353                        break;
3354                case FLOW_ACTION_CSUM:
3355                        if (csum_offload_supported(priv, action,
3356                                                   act->csum_flags, extack))
3357                                break;
3358
3359                        return -EOPNOTSUPP;
3360                case FLOW_ACTION_REDIRECT:
3361                case FLOW_ACTION_MIRRED: {
3362                        struct mlx5e_priv *out_priv;
3363                        struct net_device *out_dev;
3364
3365                        out_dev = act->dev;
3366                        if (!out_dev) {
3367                                /* out_dev is NULL when filters with
3368                                 * non-existing mirred device are replayed to
3369                                 * the driver.
3370                                 */
3371                                return -EINVAL;
3372                        }
3373
3374                        if (ft_flow && out_dev == priv->netdev) {
3375                                /* Ignore forward to self rules generated
3376                                 * by adding both mlx5 devs to the flow table
3377                                 * block on a normal nft offload setup.
3378                                 */
3379                                return -EOPNOTSUPP;
3380                        }
3381
3382                        if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
3383                                NL_SET_ERR_MSG_MOD(extack,
3384                                                   "can't support more output ports, can't offload forwarding");
3385                                pr_err("can't support more than %d output ports, can't offload forwarding\n",
3386                                       attr->out_count);
3387                                return -EOPNOTSUPP;
3388                        }
3389
3390                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3391                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3392                        if (encap) {
3393                                parse_attr->mirred_ifindex[attr->out_count] =
3394                                        out_dev->ifindex;
3395                                parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
3396                                if (!parse_attr->tun_info[attr->out_count])
3397                                        return -ENOMEM;
3398                                encap = false;
3399                                attr->dests[attr->out_count].flags |=
3400                                        MLX5_ESW_DEST_ENCAP;
3401                                attr->out_count++;
3402                                /* attr->dests[].rep is resolved when we
3403                                 * handle encap
3404                                 */
3405                        } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
3406                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3407                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
3408                                struct net_device *uplink_upper;
3409
3410                                if (is_duplicated_output_device(priv->netdev,
3411                                                                out_dev,
3412                                                                ifindexes,
3413                                                                if_count,
3414                                                                extack))
3415                                        return -EOPNOTSUPP;
3416
3417                                ifindexes[if_count] = out_dev->ifindex;
3418                                if_count++;
3419
3420                                rcu_read_lock();
3421                                uplink_upper =
3422                                        netdev_master_upper_dev_get_rcu(uplink_dev);
3423                                if (uplink_upper &&
3424                                    netif_is_lag_master(uplink_upper) &&
3425                                    uplink_upper == out_dev)
3426                                        out_dev = uplink_dev;
3427                                rcu_read_unlock();
3428
3429                                if (is_vlan_dev(out_dev)) {
3430                                        err = add_vlan_push_action(priv, attr,
3431                                                                   &out_dev,
3432                                                                   &action);
3433                                        if (err)
3434                                                return err;
3435                                }
3436
3437                                if (is_vlan_dev(parse_attr->filter_dev)) {
3438                                        err = add_vlan_pop_action(priv, attr,
3439                                                                  &action);
3440                                        if (err)
3441                                                return err;
3442                                }
3443
3444                                if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
3445                                        NL_SET_ERR_MSG_MOD(extack,
3446                                                           "devices are not on same switch HW, can't offload forwarding");
3447                                        pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3448                                               priv->netdev->name, out_dev->name);
3449                                        return -EOPNOTSUPP;
3450                                }
3451
3452                                out_priv = netdev_priv(out_dev);
3453                                rpriv = out_priv->ppriv;
3454                                attr->dests[attr->out_count].rep = rpriv->rep;
3455                                attr->dests[attr->out_count].mdev = out_priv->mdev;
3456                                attr->out_count++;
3457                        } else if (parse_attr->filter_dev != priv->netdev) {
3458                                /* All mlx5 devices are called to configure
3459                                 * high level device filters. Therefore, the
3460                                 * *attempt* to  install a filter on invalid
3461                                 * eswitch should not trigger an explicit error
3462                                 */
3463                                return -EINVAL;
3464                        } else {
3465                                NL_SET_ERR_MSG_MOD(extack,
3466                                                   "devices are not on same switch HW, can't offload forwarding");
3467                                pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
3468                                       priv->netdev->name, out_dev->name);
3469                                return -EINVAL;
3470                        }
3471                        }
3472                        break;
3473                case FLOW_ACTION_TUNNEL_ENCAP:
3474                        info = act->tunnel;
3475                        if (info)
3476                                encap = true;
3477                        else
3478                                return -EOPNOTSUPP;
3479
3480                        break;
3481                case FLOW_ACTION_VLAN_PUSH:
3482                case FLOW_ACTION_VLAN_POP:
3483                        if (act->id == FLOW_ACTION_VLAN_PUSH &&
3484                            (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
3485                                /* Replace vlan pop+push with vlan modify */
3486                                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3487                                err = add_vlan_rewrite_action(priv,
3488                                                              MLX5_FLOW_NAMESPACE_FDB,
3489                                                              act, parse_attr, hdrs,
3490                                                              &action, extack);
3491                        } else {
3492                                err = parse_tc_vlan_action(priv, act, attr, &action);
3493                        }
3494                        if (err)
3495                                return err;
3496
3497                        attr->split_count = attr->out_count;
3498                        break;
3499                case FLOW_ACTION_VLAN_MANGLE:
3500                        err = add_vlan_rewrite_action(priv,
3501                                                      MLX5_FLOW_NAMESPACE_FDB,
3502                                                      act, parse_attr, hdrs,
3503                                                      &action, extack);
3504                        if (err)
3505                                return err;
3506
3507                        attr->split_count = attr->out_count;
3508                        break;
3509                case FLOW_ACTION_TUNNEL_DECAP:
3510                        action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
3511                        break;
3512                case FLOW_ACTION_GOTO: {
3513                        u32 dest_chain = act->chain_index;
3514                        u32 max_chain = mlx5_esw_chains_get_chain_range(esw);
3515
3516                        if (ft_flow) {
3517                                NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3518                                return -EOPNOTSUPP;
3519                        }
3520                        if (dest_chain <= attr->chain) {
3521                                NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
3522                                return -EOPNOTSUPP;
3523                        }
3524                        if (dest_chain > max_chain) {
3525                                NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
3526                                return -EOPNOTSUPP;
3527                        }
3528                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3529                        attr->dest_chain = dest_chain;
3530                        break;
3531                        }
3532                default:
3533                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3534                        return -EOPNOTSUPP;
3535                }
3536        }
3537
3538        if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
3539            action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
3540                /* For prio tag mode, replace vlan pop with rewrite vlan prio
3541                 * tag rewrite.
3542                 */
3543                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3544                err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
3545                                                       &action, extack);
3546                if (err)
3547                        return err;
3548        }
3549
3550        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3551            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3552                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3553                                            parse_attr, hdrs, &action, extack);
3554                if (err)
3555                        return err;
3556                /* in case all pedit actions are skipped, remove the MOD_HDR
3557                 * flag. we might have set split_count either by pedit or
3558                 * pop/push. if there is no pop/push either, reset it too.
3559                 */
3560                if (parse_attr->num_mod_hdr_actions == 0) {
3561                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3562                        kfree(parse_attr->mod_hdr_actions);
3563                        if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3564                              (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3565                                attr->split_count = 0;
3566                }
3567        }
3568
3569        attr->action = action;
3570        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3571                return -EOPNOTSUPP;
3572
3573        if (attr->dest_chain) {
3574                if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3575                        NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3576                        return -EOPNOTSUPP;
3577                }
3578                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3579        }
3580
3581        if (!(attr->action &
3582              (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3583                NL_SET_ERR_MSG(extack, "Rule must have at least one forward/drop action");
3584                return -EOPNOTSUPP;
3585        }
3586
3587        if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3588                NL_SET_ERR_MSG_MOD(extack,
3589                                   "current firmware doesn't support split rule for port mirroring");
3590                netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
3591                return -EOPNOTSUPP;
3592        }
3593
3594        return 0;
3595}
3596
3597static void get_flags(int flags, unsigned long *flow_flags)
3598{
3599        unsigned long __flow_flags = 0;
3600
3601        if (flags & MLX5_TC_FLAG(INGRESS))
3602                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
3603        if (flags & MLX5_TC_FLAG(EGRESS))
3604                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
3605
3606        if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
3607                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3608        if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
3609                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3610        if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
3611                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
3612
3613        *flow_flags = __flow_flags;
3614}
3615
3616static const struct rhashtable_params tc_ht_params = {
3617        .head_offset = offsetof(struct mlx5e_tc_flow, node),
3618        .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3619        .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3620        .automatic_shrinking = true,
3621};
3622
3623static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
3624                                    unsigned long flags)
3625{
3626        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3627        struct mlx5e_rep_priv *uplink_rpriv;
3628
3629        if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
3630                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
3631                return &uplink_rpriv->uplink_priv.tc_ht;
3632        } else /* NIC offload */
3633                return &priv->fs.tc.ht;
3634}
3635
3636static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
3637{
3638        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3639        bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
3640                flow_flag_test(flow, INGRESS);
3641        bool act_is_encap = !!(attr->action &
3642                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
3643        bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
3644                                                MLX5_DEVCOM_ESW_OFFLOADS);
3645
3646        if (!esw_paired)
3647                return false;
3648
3649        if ((mlx5_lag_is_sriov(attr->in_mdev) ||
3650             mlx5_lag_is_multipath(attr->in_mdev)) &&
3651            (is_rep_ingress || act_is_encap))
3652                return true;
3653
3654        return false;
3655}
3656
3657static int
3658mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
3659                 struct flow_cls_offload *f, unsigned long flow_flags,
3660                 struct mlx5e_tc_flow_parse_attr **__parse_attr,
3661                 struct mlx5e_tc_flow **__flow)
3662{
3663        struct mlx5e_tc_flow_parse_attr *parse_attr;
3664        struct mlx5e_tc_flow *flow;
3665        int out_index, err;
3666
3667        flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
3668        parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3669        if (!parse_attr || !flow) {
3670                err = -ENOMEM;
3671                goto err_free;
3672        }
3673
3674        flow->cookie = f->cookie;
3675        flow->flags = flow_flags;
3676        flow->priv = priv;
3677        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
3678                INIT_LIST_HEAD(&flow->encaps[out_index].list);
3679        INIT_LIST_HEAD(&flow->mod_hdr);
3680        INIT_LIST_HEAD(&flow->hairpin);
3681        refcount_set(&flow->refcnt, 1);
3682        init_completion(&flow->init_done);
3683
3684        *__flow = flow;
3685        *__parse_attr = parse_attr;
3686
3687        return 0;
3688
3689err_free:
3690        kfree(flow);
3691        kvfree(parse_attr);
3692        return err;
3693}
3694
3695static void
3696mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
3697                         struct mlx5e_priv *priv,
3698                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3699                         struct flow_cls_offload *f,
3700                         struct mlx5_eswitch_rep *in_rep,
3701                         struct mlx5_core_dev *in_mdev)
3702{
3703        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3704
3705        esw_attr->parse_attr = parse_attr;
3706        esw_attr->chain = f->common.chain_index;
3707        esw_attr->prio = f->common.prio;
3708
3709        esw_attr->in_rep = in_rep;
3710        esw_attr->in_mdev = in_mdev;
3711
3712        if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
3713            MLX5_COUNTER_SOURCE_ESWITCH)
3714                esw_attr->counter_dev = in_mdev;
3715        else
3716                esw_attr->counter_dev = priv->mdev;
3717}
3718
3719static struct mlx5e_tc_flow *
3720__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3721                     struct flow_cls_offload *f,
3722                     unsigned long flow_flags,
3723                     struct net_device *filter_dev,
3724                     struct mlx5_eswitch_rep *in_rep,
3725                     struct mlx5_core_dev *in_mdev)
3726{
3727        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3728        struct netlink_ext_ack *extack = f->common.extack;
3729        struct mlx5e_tc_flow_parse_attr *parse_attr;
3730        struct mlx5e_tc_flow *flow;
3731        int attr_size, err;
3732
3733        flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3734        attr_size  = sizeof(struct mlx5_esw_flow_attr);
3735        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3736                               &parse_attr, &flow);
3737        if (err)
3738                goto out;
3739
3740        parse_attr->filter_dev = filter_dev;
3741        mlx5e_flow_esw_attr_init(flow->esw_attr,
3742                                 priv, parse_attr,
3743                                 f, in_rep, in_mdev);
3744
3745        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3746                               f, filter_dev);
3747        if (err)
3748                goto err_free;
3749
3750        err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
3751        if (err)
3752                goto err_free;
3753
3754        err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
3755        complete_all(&flow->init_done);
3756        if (err) {
3757                if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
3758                        goto err_free;
3759
3760                add_unready_flow(flow);
3761        }
3762
3763        return flow;
3764
3765err_free:
3766        mlx5e_flow_put(priv, flow);
3767out:
3768        return ERR_PTR(err);
3769}
3770
3771static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
3772                                      struct mlx5e_tc_flow *flow,
3773                                      unsigned long flow_flags)
3774{
3775        struct mlx5e_priv *priv = flow->priv, *peer_priv;
3776        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
3777        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
3778        struct mlx5e_tc_flow_parse_attr *parse_attr;
3779        struct mlx5e_rep_priv *peer_urpriv;
3780        struct mlx5e_tc_flow *peer_flow;
3781        struct mlx5_core_dev *in_mdev;
3782        int err = 0;
3783
3784        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3785        if (!peer_esw)
3786                return -ENODEV;
3787
3788        peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
3789        peer_priv = netdev_priv(peer_urpriv->netdev);
3790
3791        /* in_mdev is assigned of which the packet originated from.
3792         * So packets redirected to uplink use the same mdev of the
3793         * original flow and packets redirected from uplink use the
3794         * peer mdev.
3795         */
3796        if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
3797                in_mdev = peer_priv->mdev;
3798        else
3799                in_mdev = priv->mdev;
3800
3801        parse_attr = flow->esw_attr->parse_attr;
3802        peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
3803                                         parse_attr->filter_dev,
3804                                         flow->esw_attr->in_rep, in_mdev);
3805        if (IS_ERR(peer_flow)) {
3806                err = PTR_ERR(peer_flow);
3807                goto out;
3808        }
3809
3810        flow->peer_flow = peer_flow;
3811        flow_flag_set(flow, DUP);
3812        mutex_lock(&esw->offloads.peer_mutex);
3813        list_add_tail(&flow->peer, &esw->offloads.peer_flows);
3814        mutex_unlock(&esw->offloads.peer_mutex);
3815
3816out:
3817        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3818        return err;
3819}
3820
3821static int
3822mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3823                   struct flow_cls_offload *f,
3824                   unsigned long flow_flags,
3825                   struct net_device *filter_dev,
3826                   struct mlx5e_tc_flow **__flow)
3827{
3828        struct mlx5e_rep_priv *rpriv = priv->ppriv;
3829        struct mlx5_eswitch_rep *in_rep = rpriv->rep;
3830        struct mlx5_core_dev *in_mdev = priv->mdev;
3831        struct mlx5e_tc_flow *flow;
3832        int err;
3833
3834        flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
3835                                    in_mdev);
3836        if (IS_ERR(flow))
3837                return PTR_ERR(flow);
3838
3839        if (is_peer_flow_needed(flow)) {
3840                err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
3841                if (err) {
3842                        mlx5e_tc_del_fdb_flow(priv, flow);
3843                        goto out;
3844                }
3845        }
3846
3847        *__flow = flow;
3848
3849        return 0;
3850
3851out:
3852        return err;
3853}
3854
3855static int
3856mlx5e_add_nic_flow(struct mlx5e_priv *priv,
3857                   struct flow_cls_offload *f,
3858                   unsigned long flow_flags,
3859                   struct net_device *filter_dev,
3860                   struct mlx5e_tc_flow **__flow)
3861{
3862        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3863        struct netlink_ext_ack *extack = f->common.extack;
3864        struct mlx5e_tc_flow_parse_attr *parse_attr;
3865        struct mlx5e_tc_flow *flow;
3866        int attr_size, err;
3867
3868        /* multi-chain not supported for NIC rules */
3869        if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
3870                return -EOPNOTSUPP;
3871
3872        flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3873        attr_size  = sizeof(struct mlx5_nic_flow_attr);
3874        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3875                               &parse_attr, &flow);
3876        if (err)
3877                goto out;
3878
3879        parse_attr->filter_dev = filter_dev;
3880        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3881                               f, filter_dev);
3882        if (err)
3883                goto err_free;
3884
3885        err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
3886        if (err)
3887                goto err_free;
3888
3889        err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
3890        if (err)
3891                goto err_free;
3892
3893        flow_flag_set(flow, OFFLOADED);
3894        kvfree(parse_attr);
3895        *__flow = flow;
3896
3897        return 0;
3898
3899err_free:
3900        mlx5e_flow_put(priv, flow);
3901        kvfree(parse_attr);
3902out:
3903        return err;
3904}
3905
3906static int
3907mlx5e_tc_add_flow(struct mlx5e_priv *priv,
3908                  struct flow_cls_offload *f,
3909                  unsigned long flags,
3910                  struct net_device *filter_dev,
3911                  struct mlx5e_tc_flow **flow)
3912{
3913        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3914        unsigned long flow_flags;
3915        int err;
3916
3917        get_flags(flags, &flow_flags);
3918
3919        if (!tc_can_offload_extack(priv->netdev, f->common.extack))
3920                return -EOPNOTSUPP;
3921
3922        if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
3923                err = mlx5e_add_fdb_flow(priv, f, flow_flags,
3924                                         filter_dev, flow);
3925        else
3926                err = mlx5e_add_nic_flow(priv, f, flow_flags,
3927                                         filter_dev, flow);
3928
3929        return err;
3930}
3931
3932int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
3933                           struct flow_cls_offload *f, unsigned long flags)
3934{
3935        struct netlink_ext_ack *extack = f->common.extack;
3936        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3937        struct mlx5e_tc_flow *flow;
3938        int err = 0;
3939
3940        rcu_read_lock();
3941        flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
3942        rcu_read_unlock();
3943        if (flow) {
3944                NL_SET_ERR_MSG_MOD(extack,
3945                                   "flow cookie already exists, ignoring");
3946                netdev_warn_once(priv->netdev,
3947                                 "flow cookie %lx already exists, ignoring\n",
3948                                 f->cookie);
3949                err = -EEXIST;
3950                goto out;
3951        }
3952
3953        trace_mlx5e_configure_flower(f);
3954        err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
3955        if (err)
3956                goto out;
3957
3958        err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
3959        if (err)
3960                goto err_free;
3961
3962        return 0;
3963
3964err_free:
3965        mlx5e_flow_put(priv, flow);
3966out:
3967        return err;
3968}
3969
3970static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
3971{
3972        bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
3973        bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
3974
3975        return flow_flag_test(flow, INGRESS) == dir_ingress &&
3976                flow_flag_test(flow, EGRESS) == dir_egress;
3977}
3978
3979int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
3980                        struct flow_cls_offload *f, unsigned long flags)
3981{
3982        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3983        struct mlx5e_tc_flow *flow;
3984        int err;
3985
3986        rcu_read_lock();
3987        flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
3988        if (!flow || !same_flow_direction(flow, flags)) {
3989                err = -EINVAL;
3990                goto errout;
3991        }
3992
3993        /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
3994         * set.
3995         */
3996        if (flow_flag_test_and_set(flow, DELETED)) {
3997                err = -EINVAL;
3998                goto errout;
3999        }
4000        rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4001        rcu_read_unlock();
4002
4003        trace_mlx5e_delete_flower(f);
4004        mlx5e_flow_put(priv, flow);
4005
4006        return 0;
4007
4008errout:
4009        rcu_read_unlock();
4010        return err;
4011}
4012
4013int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4014                       struct flow_cls_offload *f, unsigned long flags)
4015{
4016        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4017        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4018        struct mlx5_eswitch *peer_esw;
4019        struct mlx5e_tc_flow *flow;
4020        struct mlx5_fc *counter;
4021        u64 lastuse = 0;
4022        u64 packets = 0;
4023        u64 bytes = 0;
4024        int err = 0;
4025
4026        rcu_read_lock();
4027        flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4028                                                tc_ht_params));
4029        rcu_read_unlock();
4030        if (IS_ERR(flow))
4031                return PTR_ERR(flow);
4032
4033        if (!same_flow_direction(flow, flags)) {
4034                err = -EINVAL;
4035                goto errout;
4036        }
4037
4038        if (mlx5e_is_offloaded_flow(flow)) {
4039                counter = mlx5e_tc_get_counter(flow);
4040                if (!counter)
4041                        goto errout;
4042
4043                mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4044        }
4045
4046        /* Under multipath it's possible for one rule to be currently
4047         * un-offloaded while the other rule is offloaded.
4048         */
4049        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4050        if (!peer_esw)
4051                goto out;
4052
4053        if (flow_flag_test(flow, DUP) &&
4054            flow_flag_test(flow->peer_flow, OFFLOADED)) {
4055                u64 bytes2;
4056                u64 packets2;
4057                u64 lastuse2;
4058
4059                counter = mlx5e_tc_get_counter(flow->peer_flow);
4060                if (!counter)
4061                        goto no_peer_counter;
4062                mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4063
4064                bytes += bytes2;
4065                packets += packets2;
4066                lastuse = max_t(u64, lastuse, lastuse2);
4067        }
4068
4069no_peer_counter:
4070        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4071out:
4072        flow_stats_update(&f->stats, bytes, packets, lastuse);
4073        trace_mlx5e_stats_flower(f);
4074errout:
4075        mlx5e_flow_put(priv, flow);
4076        return err;
4077}
4078
4079static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
4080                               struct netlink_ext_ack *extack)
4081{
4082        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4083        struct mlx5_eswitch *esw;
4084        u16 vport_num;
4085        u32 rate_mbps;
4086        int err;
4087
4088        vport_num = rpriv->rep->vport;
4089        if (vport_num >= MLX5_VPORT_ECPF) {
4090                NL_SET_ERR_MSG_MOD(extack,
4091                                   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4092                return -EOPNOTSUPP;
4093        }
4094
4095        esw = priv->mdev->priv.eswitch;
4096        /* rate is given in bytes/sec.
4097         * First convert to bits/sec and then round to the nearest mbit/secs.
4098         * mbit means million bits.
4099         * Moreover, if rate is non zero we choose to configure to a minimum of
4100         * 1 mbit/sec.
4101         */
4102        rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
4103        err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
4104        if (err)
4105                NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4106
4107        return err;
4108}
4109
4110static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4111                                        struct flow_action *flow_action,
4112                                        struct netlink_ext_ack *extack)
4113{
4114        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4115        const struct flow_action_entry *act;
4116        int err;
4117        int i;
4118
4119        if (!flow_action_has_entries(flow_action)) {
4120                NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4121                return -EINVAL;
4122        }
4123
4124        if (!flow_offload_has_one_action(flow_action)) {
4125                NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4126                return -EOPNOTSUPP;
4127        }
4128
4129        flow_action_for_each(i, act, flow_action) {
4130                switch (act->id) {
4131                case FLOW_ACTION_POLICE:
4132                        err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4133                        if (err)
4134                                return err;
4135
4136                        rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4137                        break;
4138                default:
4139                        NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4140                        return -EOPNOTSUPP;
4141                }
4142        }
4143
4144        return 0;
4145}
4146
4147int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4148                                struct tc_cls_matchall_offload *ma)
4149{
4150        struct netlink_ext_ack *extack = ma->common.extack;
4151
4152        if (ma->common.prio != 1) {
4153                NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4154                return -EINVAL;
4155        }
4156
4157        return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4158}
4159
4160int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4161                             struct tc_cls_matchall_offload *ma)
4162{
4163        struct netlink_ext_ack *extack = ma->common.extack;
4164
4165        return apply_police_params(priv, 0, extack);
4166}
4167
4168void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4169                             struct tc_cls_matchall_offload *ma)
4170{
4171        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4172        struct rtnl_link_stats64 cur_stats;
4173        u64 dbytes;
4174        u64 dpkts;
4175
4176        cur_stats = priv->stats.vf_vport;
4177        dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4178        dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4179        rpriv->prev_vf_vport_stats = cur_stats;
4180        flow_stats_update(&ma->stats, dpkts, dbytes, jiffies);
4181}
4182
4183static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4184                                              struct mlx5e_priv *peer_priv)
4185{
4186        struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4187        struct mlx5e_hairpin_entry *hpe, *tmp;
4188        LIST_HEAD(init_wait_list);
4189        u16 peer_vhca_id;
4190        int bkt;
4191
4192        if (!same_hw_devs(priv, peer_priv))
4193                return;
4194
4195        peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4196
4197        mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4198        hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4199                if (refcount_inc_not_zero(&hpe->refcnt))
4200                        list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4201        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4202
4203        list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4204                wait_for_completion(&hpe->res_ready);
4205                if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4206                        hpe->hp->pair->peer_gone = true;
4207
4208                mlx5e_hairpin_put(priv, hpe);
4209        }
4210}
4211
4212static int mlx5e_tc_netdev_event(struct notifier_block *this,
4213                                 unsigned long event, void *ptr)
4214{
4215        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4216        struct mlx5e_flow_steering *fs;
4217        struct mlx5e_priv *peer_priv;
4218        struct mlx5e_tc_table *tc;
4219        struct mlx5e_priv *priv;
4220
4221        if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4222            event != NETDEV_UNREGISTER ||
4223            ndev->reg_state == NETREG_REGISTERED)
4224                return NOTIFY_DONE;
4225
4226        tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4227        fs = container_of(tc, struct mlx5e_flow_steering, tc);
4228        priv = container_of(fs, struct mlx5e_priv, fs);
4229        peer_priv = netdev_priv(ndev);
4230        if (priv == peer_priv ||
4231            !(priv->netdev->features & NETIF_F_HW_TC))
4232                return NOTIFY_DONE;
4233
4234        mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4235
4236        return NOTIFY_DONE;
4237}
4238
4239int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4240{
4241        struct mlx5e_tc_table *tc = &priv->fs.tc;
4242        int err;
4243
4244        mutex_init(&tc->t_lock);
4245        mutex_init(&tc->mod_hdr.lock);
4246        hash_init(tc->mod_hdr.hlist);
4247        mutex_init(&tc->hairpin_tbl_lock);
4248        hash_init(tc->hairpin_tbl);
4249
4250        err = rhashtable_init(&tc->ht, &tc_ht_params);
4251        if (err)
4252                return err;
4253
4254        tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4255        err = register_netdevice_notifier_dev_net(priv->netdev,
4256                                                  &tc->netdevice_nb,
4257                                                  &tc->netdevice_nn);
4258        if (err) {
4259                tc->netdevice_nb.notifier_call = NULL;
4260                mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4261        }
4262
4263        return err;
4264}
4265
4266static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4267{
4268        struct mlx5e_tc_flow *flow = ptr;
4269        struct mlx5e_priv *priv = flow->priv;
4270
4271        mlx5e_tc_del_flow(priv, flow);
4272        kfree(flow);
4273}
4274
4275void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4276{
4277        struct mlx5e_tc_table *tc = &priv->fs.tc;
4278
4279        if (tc->netdevice_nb.notifier_call)
4280                unregister_netdevice_notifier_dev_net(priv->netdev,
4281                                                      &tc->netdevice_nb,
4282                                                      &tc->netdevice_nn);
4283
4284        mutex_destroy(&tc->mod_hdr.lock);
4285        mutex_destroy(&tc->hairpin_tbl_lock);
4286
4287        rhashtable_destroy(&tc->ht);
4288
4289        if (!IS_ERR_OR_NULL(tc->t)) {
4290                mlx5_destroy_flow_table(tc->t);
4291                tc->t = NULL;
4292        }
4293        mutex_destroy(&tc->t_lock);
4294}
4295
4296int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
4297{
4298        return rhashtable_init(tc_ht, &tc_ht_params);
4299}
4300
4301void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
4302{
4303        rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
4304}
4305
4306int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
4307{
4308        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4309
4310        return atomic_read(&tc_ht->nelems);
4311}
4312
4313void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
4314{
4315        struct mlx5e_tc_flow *flow, *tmp;
4316
4317        list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
4318                __mlx5e_tc_del_fdb_peer_flow(flow);
4319}
4320
4321void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
4322{
4323        struct mlx5_rep_uplink_priv *rpriv =
4324                container_of(work, struct mlx5_rep_uplink_priv,
4325                             reoffload_flows_work);
4326        struct mlx5e_tc_flow *flow, *tmp;
4327
4328        mutex_lock(&rpriv->unready_flows_lock);
4329        list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
4330                if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
4331                        unready_flow_del(flow);
4332        }
4333        mutex_unlock(&rpriv->unready_flows_lock);
4334}
4335