linux/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <net/flow_dissector.h>
  34#include <net/flow_offload.h>
  35#include <net/sch_generic.h>
  36#include <net/pkt_cls.h>
  37#include <net/tc_act/tc_gact.h>
  38#include <net/tc_act/tc_skbedit.h>
  39#include <linux/mlx5/fs.h>
  40#include <linux/mlx5/device.h>
  41#include <linux/rhashtable.h>
  42#include <linux/refcount.h>
  43#include <linux/completion.h>
  44#include <net/tc_act/tc_mirred.h>
  45#include <net/tc_act/tc_vlan.h>
  46#include <net/tc_act/tc_tunnel_key.h>
  47#include <net/tc_act/tc_pedit.h>
  48#include <net/tc_act/tc_csum.h>
  49#include <net/tc_act/tc_mpls.h>
  50#include <net/arp.h>
  51#include <net/ipv6_stubs.h>
  52#include <net/bareudp.h>
  53#include <net/bonding.h>
  54#include "en.h"
  55#include "en_rep.h"
  56#include "en/rep/tc.h"
  57#include "en/rep/neigh.h"
  58#include "en_tc.h"
  59#include "eswitch.h"
  60#include "fs_core.h"
  61#include "en/port.h"
  62#include "en/tc_tun.h"
  63#include "en/mapping.h"
  64#include "en/tc_ct.h"
  65#include "en/mod_hdr.h"
  66#include "lib/devcom.h"
  67#include "lib/geneve.h"
  68#include "lib/fs_chains.h"
  69#include "diag/en_tc_tracepoint.h"
  70
  71#define nic_chains(priv) ((priv)->fs.tc.chains)
  72#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
  73#define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
  74
  75enum {
  76        MLX5E_TC_FLOW_FLAG_INGRESS      = MLX5E_TC_FLAG_INGRESS_BIT,
  77        MLX5E_TC_FLOW_FLAG_EGRESS       = MLX5E_TC_FLAG_EGRESS_BIT,
  78        MLX5E_TC_FLOW_FLAG_ESWITCH      = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT,
  79        MLX5E_TC_FLOW_FLAG_FT           = MLX5E_TC_FLAG_FT_OFFLOAD_BIT,
  80        MLX5E_TC_FLOW_FLAG_NIC          = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT,
  81        MLX5E_TC_FLOW_FLAG_OFFLOADED    = MLX5E_TC_FLOW_BASE,
  82        MLX5E_TC_FLOW_FLAG_HAIRPIN      = MLX5E_TC_FLOW_BASE + 1,
  83        MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS  = MLX5E_TC_FLOW_BASE + 2,
  84        MLX5E_TC_FLOW_FLAG_SLOW         = MLX5E_TC_FLOW_BASE + 3,
  85        MLX5E_TC_FLOW_FLAG_DUP          = MLX5E_TC_FLOW_BASE + 4,
  86        MLX5E_TC_FLOW_FLAG_NOT_READY    = MLX5E_TC_FLOW_BASE + 5,
  87        MLX5E_TC_FLOW_FLAG_DELETED      = MLX5E_TC_FLOW_BASE + 6,
  88        MLX5E_TC_FLOW_FLAG_CT           = MLX5E_TC_FLOW_BASE + 7,
  89        MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
  90};
  91
  92#define MLX5E_TC_MAX_SPLITS 1
  93
  94/* Helper struct for accessing a struct containing list_head array.
  95 * Containing struct
  96 *   |- Helper array
  97 *      [0] Helper item 0
  98 *          |- list_head item 0
  99 *          |- index (0)
 100 *      [1] Helper item 1
 101 *          |- list_head item 1
 102 *          |- index (1)
 103 * To access the containing struct from one of the list_head items:
 104 * 1. Get the helper item from the list_head item using
 105 *    helper item =
 106 *        container_of(list_head item, helper struct type, list_head field)
 107 * 2. Get the contining struct from the helper item and its index in the array:
 108 *    containing struct =
 109 *        container_of(helper item, containing struct type, helper field[index])
 110 */
 111struct encap_flow_item {
 112        struct mlx5e_encap_entry *e; /* attached encap instance */
 113        struct list_head list;
 114        int index;
 115};
 116
 117struct mlx5e_tc_flow {
 118        struct rhash_head       node;
 119        struct mlx5e_priv       *priv;
 120        u64                     cookie;
 121        unsigned long           flags;
 122        struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
 123
 124        /* flows sharing the same reformat object - currently mpls decap */
 125        struct list_head l3_to_l2_reformat;
 126        struct mlx5e_decap_entry *decap_reformat;
 127
 128        /* Flow can be associated with multiple encap IDs.
 129         * The number of encaps is bounded by the number of supported
 130         * destinations.
 131         */
 132        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
 133        struct mlx5e_tc_flow    *peer_flow;
 134        struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */
 135        struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
 136        struct list_head        hairpin; /* flows sharing the same hairpin */
 137        struct list_head        peer;    /* flows with peer flow */
 138        struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
 139        struct net_device       *orig_dev; /* netdev adding flow first */
 140        int                     tmp_efi_index;
 141        struct list_head        tmp_list; /* temporary flow list used by neigh update */
 142        refcount_t              refcnt;
 143        struct rcu_head         rcu_head;
 144        struct completion       init_done;
 145        int tunnel_id; /* the mapped tunnel id of this flow */
 146        struct mlx5_flow_attr *attr;
 147};
 148
 149struct mlx5e_tc_flow_parse_attr {
 150        const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
 151        struct net_device *filter_dev;
 152        struct mlx5_flow_spec spec;
 153        struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
 154        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 155        struct ethhdr eth;
 156};
 157
 158#define MLX5E_TC_TABLE_NUM_GROUPS 4
 159#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
 160
 161struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 162        [CHAIN_TO_REG] = {
 163                .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
 164                .moffset = 0,
 165                .mlen = 2,
 166        },
 167        [TUNNEL_TO_REG] = {
 168                .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
 169                .moffset = 1,
 170                .mlen = 3,
 171                .soffset = MLX5_BYTE_OFF(fte_match_param,
 172                                         misc_parameters_2.metadata_reg_c_1),
 173        },
 174        [ZONE_TO_REG] = zone_to_reg_ct,
 175        [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
 176        [CTSTATE_TO_REG] = ctstate_to_reg_ct,
 177        [MARK_TO_REG] = mark_to_reg_ct,
 178        [LABELS_TO_REG] = labels_to_reg_ct,
 179        [FTEID_TO_REG] = fteid_to_reg_ct,
 180        /* For NIC rules we store the retore metadata directly
 181         * into reg_b that is passed to SW since we don't
 182         * jump between steering domains.
 183         */
 184        [NIC_CHAIN_TO_REG] = {
 185                .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
 186                .moffset = 0,
 187                .mlen = 2,
 188        },
 189        [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
 190};
 191
 192static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
 193
 194void
 195mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
 196                            enum mlx5e_tc_attr_to_reg type,
 197                            u32 data,
 198                            u32 mask)
 199{
 200        int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
 201        int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
 202        void *headers_c = spec->match_criteria;
 203        void *headers_v = spec->match_value;
 204        void *fmask, *fval;
 205
 206        fmask = headers_c + soffset;
 207        fval = headers_v + soffset;
 208
 209        mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
 210        data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
 211
 212        memcpy(fmask, &mask, match_len);
 213        memcpy(fval, &data, match_len);
 214
 215        spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
 216}
 217
 218void
 219mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
 220                                enum mlx5e_tc_attr_to_reg type,
 221                                u32 *data,
 222                                u32 *mask)
 223{
 224        int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
 225        int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
 226        void *headers_c = spec->match_criteria;
 227        void *headers_v = spec->match_value;
 228        void *fmask, *fval;
 229
 230        fmask = headers_c + soffset;
 231        fval = headers_v + soffset;
 232
 233        memcpy(mask, fmask, match_len);
 234        memcpy(data, fval, match_len);
 235
 236        *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
 237        *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
 238}
 239
 240int
 241mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
 242                          struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
 243                          enum mlx5_flow_namespace_type ns,
 244                          enum mlx5e_tc_attr_to_reg type,
 245                          u32 data)
 246{
 247        int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
 248        int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
 249        int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
 250        char *modact;
 251        int err;
 252
 253        err = alloc_mod_hdr_actions(mdev, ns, mod_hdr_acts);
 254        if (err)
 255                return err;
 256
 257        modact = mod_hdr_acts->actions +
 258                 (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
 259
 260        /* Firmware has 5bit length field and 0 means 32bits */
 261        if (mlen == 4)
 262                mlen = 0;
 263
 264        MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 265        MLX5_SET(set_action_in, modact, field, mfield);
 266        MLX5_SET(set_action_in, modact, offset, moffset * 8);
 267        MLX5_SET(set_action_in, modact, length, mlen * 8);
 268        MLX5_SET(set_action_in, modact, data, data);
 269        mod_hdr_acts->num_actions++;
 270
 271        return 0;
 272}
 273
 274#define esw_offloads_mode(esw) (mlx5_eswitch_mode(esw) == MLX5_ESWITCH_OFFLOADS)
 275
 276static struct mlx5_tc_ct_priv *
 277get_ct_priv(struct mlx5e_priv *priv)
 278{
 279        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 280        struct mlx5_rep_uplink_priv *uplink_priv;
 281        struct mlx5e_rep_priv *uplink_rpriv;
 282
 283        if (esw_offloads_mode(esw)) {
 284                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 285                uplink_priv = &uplink_rpriv->uplink_priv;
 286
 287                return uplink_priv->ct_priv;
 288        }
 289
 290        return priv->fs.tc.ct;
 291}
 292
 293struct mlx5_flow_handle *
 294mlx5_tc_rule_insert(struct mlx5e_priv *priv,
 295                    struct mlx5_flow_spec *spec,
 296                    struct mlx5_flow_attr *attr)
 297{
 298        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 299
 300        if (esw_offloads_mode(esw))
 301                return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 302
 303        return  mlx5e_add_offloaded_nic_rule(priv, spec, attr);
 304}
 305
 306void
 307mlx5_tc_rule_delete(struct mlx5e_priv *priv,
 308                    struct mlx5_flow_handle *rule,
 309                    struct mlx5_flow_attr *attr)
 310{
 311        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 312
 313        if (esw_offloads_mode(esw)) {
 314                mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
 315
 316                return;
 317        }
 318
 319        mlx5e_del_offloaded_nic_rule(priv, rule, attr);
 320}
 321
 322struct mlx5e_hairpin {
 323        struct mlx5_hairpin *pair;
 324
 325        struct mlx5_core_dev *func_mdev;
 326        struct mlx5e_priv *func_priv;
 327        u32 tdn;
 328        u32 tirn;
 329
 330        int num_channels;
 331        struct mlx5e_rqt indir_rqt;
 332        u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
 333        struct mlx5e_ttc_table ttc;
 334};
 335
 336struct mlx5e_hairpin_entry {
 337        /* a node of a hash table which keeps all the  hairpin entries */
 338        struct hlist_node hairpin_hlist;
 339
 340        /* protects flows list */
 341        spinlock_t flows_lock;
 342        /* flows sharing the same hairpin */
 343        struct list_head flows;
 344        /* hpe's that were not fully initialized when dead peer update event
 345         * function traversed them.
 346         */
 347        struct list_head dead_peer_wait_list;
 348
 349        u16 peer_vhca_id;
 350        u8 prio;
 351        struct mlx5e_hairpin *hp;
 352        refcount_t refcnt;
 353        struct completion res_ready;
 354};
 355
 356static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 357                              struct mlx5e_tc_flow *flow);
 358
 359static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
 360{
 361        if (!flow || !refcount_inc_not_zero(&flow->refcnt))
 362                return ERR_PTR(-EINVAL);
 363        return flow;
 364}
 365
 366static void mlx5e_flow_put(struct mlx5e_priv *priv,
 367                           struct mlx5e_tc_flow *flow)
 368{
 369        if (refcount_dec_and_test(&flow->refcnt)) {
 370                mlx5e_tc_del_flow(priv, flow);
 371                kfree_rcu(flow, rcu_head);
 372        }
 373}
 374
 375static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag)
 376{
 377        /* Complete all memory stores before setting bit. */
 378        smp_mb__before_atomic();
 379        set_bit(flag, &flow->flags);
 380}
 381
 382#define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag)
 383
 384static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow,
 385                                     unsigned long flag)
 386{
 387        /* test_and_set_bit() provides all necessary barriers */
 388        return test_and_set_bit(flag, &flow->flags);
 389}
 390
 391#define flow_flag_test_and_set(flow, flag)                      \
 392        __flow_flag_test_and_set(flow,                          \
 393                                 MLX5E_TC_FLOW_FLAG_##flag)
 394
 395static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag)
 396{
 397        /* Complete all memory stores before clearing bit. */
 398        smp_mb__before_atomic();
 399        clear_bit(flag, &flow->flags);
 400}
 401
 402#define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \
 403                                                      MLX5E_TC_FLOW_FLAG_##flag)
 404
 405static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag)
 406{
 407        bool ret = test_bit(flag, &flow->flags);
 408
 409        /* Read fields of flow structure only after checking flags. */
 410        smp_mb__after_atomic();
 411        return ret;
 412}
 413
 414#define flow_flag_test(flow, flag) __flow_flag_test(flow, \
 415                                                    MLX5E_TC_FLOW_FLAG_##flag)
 416
 417bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
 418{
 419        return flow_flag_test(flow, ESWITCH);
 420}
 421
 422static bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
 423{
 424        return flow_flag_test(flow, FT);
 425}
 426
 427static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
 428{
 429        return flow_flag_test(flow, OFFLOADED);
 430}
 431
 432static int get_flow_name_space(struct mlx5e_tc_flow *flow)
 433{
 434        return mlx5e_is_eswitch_flow(flow) ?
 435                MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
 436}
 437
 438static struct mod_hdr_tbl *
 439get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
 440{
 441        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 442
 443        return get_flow_name_space(flow) == MLX5_FLOW_NAMESPACE_FDB ?
 444                &esw->offloads.mod_hdr :
 445                &priv->fs.tc.mod_hdr;
 446}
 447
 448static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 449                                struct mlx5e_tc_flow *flow,
 450                                struct mlx5e_tc_flow_parse_attr *parse_attr)
 451{
 452        struct mlx5_modify_hdr *modify_hdr;
 453        struct mlx5e_mod_hdr_handle *mh;
 454
 455        mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
 456                                  get_flow_name_space(flow),
 457                                  &parse_attr->mod_hdr_acts);
 458        if (IS_ERR(mh))
 459                return PTR_ERR(mh);
 460
 461        modify_hdr = mlx5e_mod_hdr_get(mh);
 462        flow->attr->modify_hdr = modify_hdr;
 463        flow->mh = mh;
 464
 465        return 0;
 466}
 467
 468static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 469                                 struct mlx5e_tc_flow *flow)
 470{
 471        /* flow wasn't fully initialized */
 472        if (!flow->mh)
 473                return;
 474
 475        mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
 476                             flow->mh);
 477        flow->mh = NULL;
 478}
 479
 480static
 481struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 482{
 483        struct net_device *netdev;
 484        struct mlx5e_priv *priv;
 485
 486        netdev = __dev_get_by_index(net, ifindex);
 487        priv = netdev_priv(netdev);
 488        return priv->mdev;
 489}
 490
 491static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 492{
 493        u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
 494        void *tirc;
 495        int err;
 496
 497        err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
 498        if (err)
 499                goto alloc_tdn_err;
 500
 501        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 502
 503        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 504        MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
 505        MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 506
 507        err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
 508        if (err)
 509                goto create_tir_err;
 510
 511        return 0;
 512
 513create_tir_err:
 514        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 515alloc_tdn_err:
 516        return err;
 517}
 518
 519static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 520{
 521        mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
 522        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 523}
 524
 525static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 526{
 527        u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
 528        struct mlx5e_priv *priv = hp->func_priv;
 529        int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
 530
 531        mlx5e_build_default_indir_rqt(indirection_rqt, sz,
 532                                      hp->num_channels);
 533
 534        for (i = 0; i < sz; i++) {
 535                ix = i;
 536                if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
 537                        ix = mlx5e_bits_invert(i, ilog2(sz));
 538                ix = indirection_rqt[ix];
 539                rqn = hp->pair->rqn[ix];
 540                MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
 541        }
 542}
 543
 544static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 545{
 546        int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
 547        struct mlx5e_priv *priv = hp->func_priv;
 548        struct mlx5_core_dev *mdev = priv->mdev;
 549        void *rqtc;
 550        u32 *in;
 551
 552        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 553        in = kvzalloc(inlen, GFP_KERNEL);
 554        if (!in)
 555                return -ENOMEM;
 556
 557        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 558
 559        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 560        MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 561
 562        mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
 563
 564        err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
 565        if (!err)
 566                hp->indir_rqt.enabled = true;
 567
 568        kvfree(in);
 569        return err;
 570}
 571
 572static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 573{
 574        struct mlx5e_priv *priv = hp->func_priv;
 575        u32 in[MLX5_ST_SZ_DW(create_tir_in)];
 576        int tt, i, err;
 577        void *tirc;
 578
 579        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 580                struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
 581
 582                memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
 583                tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 584
 585                MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 586                MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 587                MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
 588                mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
 589
 590                err = mlx5_core_create_tir(hp->func_mdev, in,
 591                                           &hp->indir_tirn[tt]);
 592                if (err) {
 593                        mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
 594                        goto err_destroy_tirs;
 595                }
 596        }
 597        return 0;
 598
 599err_destroy_tirs:
 600        for (i = 0; i < tt; i++)
 601                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
 602        return err;
 603}
 604
 605static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
 606{
 607        int tt;
 608
 609        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 610                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
 611}
 612
 613static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
 614                                         struct ttc_params *ttc_params)
 615{
 616        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
 617        int tt;
 618
 619        memset(ttc_params, 0, sizeof(*ttc_params));
 620
 621        ttc_params->any_tt_tirn = hp->tirn;
 622
 623        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 624                ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
 625
 626        ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
 627        ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
 628        ft_attr->prio = MLX5E_TC_PRIO;
 629}
 630
 631static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
 632{
 633        struct mlx5e_priv *priv = hp->func_priv;
 634        struct ttc_params ttc_params;
 635        int err;
 636
 637        err = mlx5e_hairpin_create_indirect_rqt(hp);
 638        if (err)
 639                return err;
 640
 641        err = mlx5e_hairpin_create_indirect_tirs(hp);
 642        if (err)
 643                goto err_create_indirect_tirs;
 644
 645        mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
 646        err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
 647        if (err)
 648                goto err_create_ttc_table;
 649
 650        netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
 651                   hp->num_channels, hp->ttc.ft.t->id);
 652
 653        return 0;
 654
 655err_create_ttc_table:
 656        mlx5e_hairpin_destroy_indirect_tirs(hp);
 657err_create_indirect_tirs:
 658        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 659
 660        return err;
 661}
 662
 663static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
 664{
 665        struct mlx5e_priv *priv = hp->func_priv;
 666
 667        mlx5e_destroy_ttc_table(priv, &hp->ttc);
 668        mlx5e_hairpin_destroy_indirect_tirs(hp);
 669        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 670}
 671
 672static struct mlx5e_hairpin *
 673mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
 674                     int peer_ifindex)
 675{
 676        struct mlx5_core_dev *func_mdev, *peer_mdev;
 677        struct mlx5e_hairpin *hp;
 678        struct mlx5_hairpin *pair;
 679        int err;
 680
 681        hp = kzalloc(sizeof(*hp), GFP_KERNEL);
 682        if (!hp)
 683                return ERR_PTR(-ENOMEM);
 684
 685        func_mdev = priv->mdev;
 686        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 687
 688        pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
 689        if (IS_ERR(pair)) {
 690                err = PTR_ERR(pair);
 691                goto create_pair_err;
 692        }
 693        hp->pair = pair;
 694        hp->func_mdev = func_mdev;
 695        hp->func_priv = priv;
 696        hp->num_channels = params->num_channels;
 697
 698        err = mlx5e_hairpin_create_transport(hp);
 699        if (err)
 700                goto create_transport_err;
 701
 702        if (hp->num_channels > 1) {
 703                err = mlx5e_hairpin_rss_init(hp);
 704                if (err)
 705                        goto rss_init_err;
 706        }
 707
 708        return hp;
 709
 710rss_init_err:
 711        mlx5e_hairpin_destroy_transport(hp);
 712create_transport_err:
 713        mlx5_core_hairpin_destroy(hp->pair);
 714create_pair_err:
 715        kfree(hp);
 716        return ERR_PTR(err);
 717}
 718
 719static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
 720{
 721        if (hp->num_channels > 1)
 722                mlx5e_hairpin_rss_cleanup(hp);
 723        mlx5e_hairpin_destroy_transport(hp);
 724        mlx5_core_hairpin_destroy(hp->pair);
 725        kvfree(hp);
 726}
 727
 728static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
 729{
 730        return (peer_vhca_id << 16 | prio);
 731}
 732
 733static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
 734                                                     u16 peer_vhca_id, u8 prio)
 735{
 736        struct mlx5e_hairpin_entry *hpe;
 737        u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
 738
 739        hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
 740                               hairpin_hlist, hash_key) {
 741                if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
 742                        refcount_inc(&hpe->refcnt);
 743                        return hpe;
 744                }
 745        }
 746
 747        return NULL;
 748}
 749
 750static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
 751                              struct mlx5e_hairpin_entry *hpe)
 752{
 753        /* no more hairpin flows for us, release the hairpin pair */
 754        if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
 755                return;
 756        hash_del(&hpe->hairpin_hlist);
 757        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 758
 759        if (!IS_ERR_OR_NULL(hpe->hp)) {
 760                netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 761                           dev_name(hpe->hp->pair->peer_mdev->device));
 762
 763                mlx5e_hairpin_destroy(hpe->hp);
 764        }
 765
 766        WARN_ON(!list_empty(&hpe->flows));
 767        kfree(hpe);
 768}
 769
 770#define UNKNOWN_MATCH_PRIO 8
 771
 772static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
 773                                  struct mlx5_flow_spec *spec, u8 *match_prio,
 774                                  struct netlink_ext_ack *extack)
 775{
 776        void *headers_c, *headers_v;
 777        u8 prio_val, prio_mask = 0;
 778        bool vlan_present;
 779
 780#ifdef CONFIG_MLX5_CORE_EN_DCB
 781        if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
 782                NL_SET_ERR_MSG_MOD(extack,
 783                                   "only PCP trust state supported for hairpin");
 784                return -EOPNOTSUPP;
 785        }
 786#endif
 787        headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 788        headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
 789
 790        vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
 791        if (vlan_present) {
 792                prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
 793                prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
 794        }
 795
 796        if (!vlan_present || !prio_mask) {
 797                prio_val = UNKNOWN_MATCH_PRIO;
 798        } else if (prio_mask != 0x7) {
 799                NL_SET_ERR_MSG_MOD(extack,
 800                                   "masked priority match not supported for hairpin");
 801                return -EOPNOTSUPP;
 802        }
 803
 804        *match_prio = prio_val;
 805        return 0;
 806}
 807
 808static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 809                                  struct mlx5e_tc_flow *flow,
 810                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
 811                                  struct netlink_ext_ack *extack)
 812{
 813        int peer_ifindex = parse_attr->mirred_ifindex[0];
 814        struct mlx5_hairpin_params params;
 815        struct mlx5_core_dev *peer_mdev;
 816        struct mlx5e_hairpin_entry *hpe;
 817        struct mlx5e_hairpin *hp;
 818        u64 link_speed64;
 819        u32 link_speed;
 820        u8 match_prio;
 821        u16 peer_id;
 822        int err;
 823
 824        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 825        if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
 826                NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
 827                return -EOPNOTSUPP;
 828        }
 829
 830        peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
 831        err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
 832                                     extack);
 833        if (err)
 834                return err;
 835
 836        mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
 837        hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 838        if (hpe) {
 839                mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 840                wait_for_completion(&hpe->res_ready);
 841
 842                if (IS_ERR(hpe->hp)) {
 843                        err = -EREMOTEIO;
 844                        goto out_err;
 845                }
 846                goto attach_flow;
 847        }
 848
 849        hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
 850        if (!hpe) {
 851                mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 852                return -ENOMEM;
 853        }
 854
 855        spin_lock_init(&hpe->flows_lock);
 856        INIT_LIST_HEAD(&hpe->flows);
 857        INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
 858        hpe->peer_vhca_id = peer_id;
 859        hpe->prio = match_prio;
 860        refcount_set(&hpe->refcnt, 1);
 861        init_completion(&hpe->res_ready);
 862
 863        hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 864                 hash_hairpin_info(peer_id, match_prio));
 865        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
 866
 867        params.log_data_size = 15;
 868        params.log_data_size = min_t(u8, params.log_data_size,
 869                                     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
 870        params.log_data_size = max_t(u8, params.log_data_size,
 871                                     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
 872
 873        params.log_num_packets = params.log_data_size -
 874                                 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
 875        params.log_num_packets = min_t(u8, params.log_num_packets,
 876                                       MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
 877
 878        params.q_counter = priv->q_counter;
 879        /* set hairpin pair per each 50Gbs share of the link */
 880        mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
 881        link_speed = max_t(u32, link_speed, 50000);
 882        link_speed64 = link_speed;
 883        do_div(link_speed64, 50000);
 884        params.num_channels = link_speed64;
 885
 886        hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 887        hpe->hp = hp;
 888        complete_all(&hpe->res_ready);
 889        if (IS_ERR(hp)) {
 890                err = PTR_ERR(hp);
 891                goto out_err;
 892        }
 893
 894        netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
 895                   hp->tirn, hp->pair->rqn[0],
 896                   dev_name(hp->pair->peer_mdev->device),
 897                   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
 898
 899attach_flow:
 900        if (hpe->hp->num_channels > 1) {
 901                flow_flag_set(flow, HAIRPIN_RSS);
 902                flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
 903        } else {
 904                flow->attr->nic_attr->hairpin_tirn = hpe->hp->tirn;
 905        }
 906
 907        flow->hpe = hpe;
 908        spin_lock(&hpe->flows_lock);
 909        list_add(&flow->hairpin, &hpe->flows);
 910        spin_unlock(&hpe->flows_lock);
 911
 912        return 0;
 913
 914out_err:
 915        mlx5e_hairpin_put(priv, hpe);
 916        return err;
 917}
 918
 919static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 920                                   struct mlx5e_tc_flow *flow)
 921{
 922        /* flow wasn't fully initialized */
 923        if (!flow->hpe)
 924                return;
 925
 926        spin_lock(&flow->hpe->flows_lock);
 927        list_del(&flow->hairpin);
 928        spin_unlock(&flow->hpe->flows_lock);
 929
 930        mlx5e_hairpin_put(priv, flow->hpe);
 931        flow->hpe = NULL;
 932}
 933
 934struct mlx5_flow_handle *
 935mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
 936                             struct mlx5_flow_spec *spec,
 937                             struct mlx5_flow_attr *attr)
 938{
 939        struct mlx5_flow_context *flow_context = &spec->flow_context;
 940        struct mlx5_fs_chains *nic_chains = nic_chains(priv);
 941        struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
 942        struct mlx5e_tc_table *tc = &priv->fs.tc;
 943        struct mlx5_flow_destination dest[2] = {};
 944        struct mlx5_flow_act flow_act = {
 945                .action = attr->action,
 946                .flags    = FLOW_ACT_NO_APPEND,
 947        };
 948        struct mlx5_flow_handle *rule;
 949        struct mlx5_flow_table *ft;
 950        int dest_ix = 0;
 951
 952        flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
 953        flow_context->flow_tag = nic_attr->flow_tag;
 954
 955        if (attr->dest_ft) {
 956                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 957                dest[dest_ix].ft = attr->dest_ft;
 958                dest_ix++;
 959        } else if (nic_attr->hairpin_ft) {
 960                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 961                dest[dest_ix].ft = nic_attr->hairpin_ft;
 962                dest_ix++;
 963        } else if (nic_attr->hairpin_tirn) {
 964                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 965                dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
 966                dest_ix++;
 967        } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 968                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 969                if (attr->dest_chain) {
 970                        dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
 971                                                                 attr->dest_chain, 1,
 972                                                                 MLX5E_TC_FT_LEVEL);
 973                        if (IS_ERR(dest[dest_ix].ft))
 974                                return ERR_CAST(dest[dest_ix].ft);
 975                } else {
 976                        dest[dest_ix].ft = priv->fs.vlan.ft.t;
 977                }
 978                dest_ix++;
 979        }
 980
 981        if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
 982            MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
 983                flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 984
 985        if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 986                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 987                dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
 988                dest_ix++;
 989        }
 990
 991        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 992                flow_act.modify_hdr = attr->modify_hdr;
 993
 994        mutex_lock(&tc->t_lock);
 995        if (IS_ERR_OR_NULL(tc->t)) {
 996                /* Create the root table here if doesn't exist yet */
 997                tc->t =
 998                        mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
 999
1000                if (IS_ERR(tc->t)) {
1001                        mutex_unlock(&tc->t_lock);
1002                        netdev_err(priv->netdev,
1003                                   "Failed to create tc offload table\n");
1004                        rule = ERR_CAST(priv->fs.tc.t);
1005                        goto err_ft_get;
1006                }
1007        }
1008        mutex_unlock(&tc->t_lock);
1009
1010        if (attr->chain || attr->prio)
1011                ft = mlx5_chains_get_table(nic_chains,
1012                                           attr->chain, attr->prio,
1013                                           MLX5E_TC_FT_LEVEL);
1014        else
1015                ft = attr->ft;
1016
1017        if (IS_ERR(ft)) {
1018                rule = ERR_CAST(ft);
1019                goto err_ft_get;
1020        }
1021
1022        if (attr->outer_match_level != MLX5_MATCH_NONE)
1023                spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1024
1025        rule = mlx5_add_flow_rules(ft, spec,
1026                                   &flow_act, dest, dest_ix);
1027        if (IS_ERR(rule))
1028                goto err_rule;
1029
1030        return rule;
1031
1032err_rule:
1033        if (attr->chain || attr->prio)
1034                mlx5_chains_put_table(nic_chains,
1035                                      attr->chain, attr->prio,
1036                                      MLX5E_TC_FT_LEVEL);
1037err_ft_get:
1038        if (attr->dest_chain)
1039                mlx5_chains_put_table(nic_chains,
1040                                      attr->dest_chain, 1,
1041                                      MLX5E_TC_FT_LEVEL);
1042
1043        return ERR_CAST(rule);
1044}
1045
1046static int
1047mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1048                      struct mlx5e_tc_flow_parse_attr *parse_attr,
1049                      struct mlx5e_tc_flow *flow,
1050                      struct netlink_ext_ack *extack)
1051{
1052        struct mlx5_flow_attr *attr = flow->attr;
1053        struct mlx5_core_dev *dev = priv->mdev;
1054        struct mlx5_fc *counter = NULL;
1055        int err;
1056
1057        if (flow_flag_test(flow, HAIRPIN)) {
1058                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1059                if (err)
1060                        return err;
1061        }
1062
1063        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1064                counter = mlx5_fc_create(dev, true);
1065                if (IS_ERR(counter))
1066                        return PTR_ERR(counter);
1067
1068                attr->counter = counter;
1069        }
1070
1071        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1072                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1073                dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1074                if (err)
1075                        return err;
1076        }
1077
1078        if (flow_flag_test(flow, CT))
1079                flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), flow, &parse_attr->spec,
1080                                                        attr, &parse_attr->mod_hdr_acts);
1081        else
1082                flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1083                                                             attr);
1084
1085        return PTR_ERR_OR_ZERO(flow->rule[0]);
1086}
1087
1088void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1089                                  struct mlx5_flow_handle *rule,
1090                                  struct mlx5_flow_attr *attr)
1091{
1092        struct mlx5_fs_chains *nic_chains = nic_chains(priv);
1093
1094        mlx5_del_flow_rules(rule);
1095
1096        if (attr->chain || attr->prio)
1097                mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1098                                      MLX5E_TC_FT_LEVEL);
1099
1100        if (attr->dest_chain)
1101                mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1102                                      MLX5E_TC_FT_LEVEL);
1103}
1104
1105static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1106                                  struct mlx5e_tc_flow *flow)
1107{
1108        struct mlx5_flow_attr *attr = flow->attr;
1109        struct mlx5e_tc_table *tc = &priv->fs.tc;
1110
1111        flow_flag_clear(flow, OFFLOADED);
1112
1113        if (flow_flag_test(flow, CT))
1114                mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1115        else if (!IS_ERR_OR_NULL(flow->rule[0]))
1116                mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1117
1118        /* Remove root table if no rules are left to avoid
1119         * extra steering hops.
1120         */
1121        mutex_lock(&priv->fs.tc.t_lock);
1122        if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1123            !IS_ERR_OR_NULL(tc->t)) {
1124                mlx5_chains_put_table(nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1125                priv->fs.tc.t = NULL;
1126        }
1127        mutex_unlock(&priv->fs.tc.t_lock);
1128
1129        kvfree(attr->parse_attr);
1130
1131        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1132                mlx5e_detach_mod_hdr(priv, flow);
1133
1134        mlx5_fc_destroy(priv->mdev, attr->counter);
1135
1136        if (flow_flag_test(flow, HAIRPIN))
1137                mlx5e_hairpin_flow_del(priv, flow);
1138
1139        kfree(flow->attr);
1140}
1141
1142static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1143                               struct mlx5e_tc_flow *flow, int out_index);
1144
1145static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1146                              struct mlx5e_tc_flow *flow,
1147                              struct net_device *mirred_dev,
1148                              int out_index,
1149                              struct netlink_ext_ack *extack,
1150                              struct net_device **encap_dev,
1151                              bool *encap_valid);
1152static int mlx5e_attach_decap(struct mlx5e_priv *priv,
1153                              struct mlx5e_tc_flow *flow,
1154                              struct netlink_ext_ack *extack);
1155static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1156                               struct mlx5e_tc_flow *flow);
1157
1158static struct mlx5_flow_handle *
1159mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1160                           struct mlx5e_tc_flow *flow,
1161                           struct mlx5_flow_spec *spec,
1162                           struct mlx5_flow_attr *attr)
1163{
1164        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1165        struct mlx5_flow_handle *rule;
1166
1167        if (flow_flag_test(flow, CT)) {
1168                mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1169
1170                return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
1171                                               flow, spec, attr,
1172                                               mod_hdr_acts);
1173        }
1174
1175        rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1176        if (IS_ERR(rule))
1177                return rule;
1178
1179        if (attr->esw_attr->split_count) {
1180                flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1181                if (IS_ERR(flow->rule[1])) {
1182                        mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
1183                        return flow->rule[1];
1184                }
1185        }
1186
1187        return rule;
1188}
1189
1190static void
1191mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1192                             struct mlx5e_tc_flow *flow,
1193                             struct mlx5_flow_attr *attr)
1194{
1195        flow_flag_clear(flow, OFFLOADED);
1196
1197        if (flow_flag_test(flow, CT)) {
1198                mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
1199                return;
1200        }
1201
1202        if (attr->esw_attr->split_count)
1203                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1204
1205        mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1206}
1207
1208static struct mlx5_flow_handle *
1209mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1210                              struct mlx5e_tc_flow *flow,
1211                              struct mlx5_flow_spec *spec)
1212{
1213        struct mlx5_flow_attr *slow_attr;
1214        struct mlx5_flow_handle *rule;
1215
1216        slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1217        if (!slow_attr)
1218                return ERR_PTR(-ENOMEM);
1219
1220        memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1221        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1222        slow_attr->esw_attr->split_count = 0;
1223        slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1224
1225        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1226        if (!IS_ERR(rule))
1227                flow_flag_set(flow, SLOW);
1228
1229        kfree(slow_attr);
1230
1231        return rule;
1232}
1233
1234static void
1235mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1236                                  struct mlx5e_tc_flow *flow)
1237{
1238        struct mlx5_flow_attr *slow_attr;
1239
1240        slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1241        if (!slow_attr) {
1242                mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1243                return;
1244        }
1245
1246        memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1247        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1248        slow_attr->esw_attr->split_count = 0;
1249        slow_attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
1250        mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1251        flow_flag_clear(flow, SLOW);
1252        kfree(slow_attr);
1253}
1254
1255/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1256 * function.
1257 */
1258static void unready_flow_add(struct mlx5e_tc_flow *flow,
1259                             struct list_head *unready_flows)
1260{
1261        flow_flag_set(flow, NOT_READY);
1262        list_add_tail(&flow->unready, unready_flows);
1263}
1264
1265/* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1266 * function.
1267 */
1268static void unready_flow_del(struct mlx5e_tc_flow *flow)
1269{
1270        list_del(&flow->unready);
1271        flow_flag_clear(flow, NOT_READY);
1272}
1273
1274static void add_unready_flow(struct mlx5e_tc_flow *flow)
1275{
1276        struct mlx5_rep_uplink_priv *uplink_priv;
1277        struct mlx5e_rep_priv *rpriv;
1278        struct mlx5_eswitch *esw;
1279
1280        esw = flow->priv->mdev->priv.eswitch;
1281        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1282        uplink_priv = &rpriv->uplink_priv;
1283
1284        mutex_lock(&uplink_priv->unready_flows_lock);
1285        unready_flow_add(flow, &uplink_priv->unready_flows);
1286        mutex_unlock(&uplink_priv->unready_flows_lock);
1287}
1288
1289static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1290{
1291        struct mlx5_rep_uplink_priv *uplink_priv;
1292        struct mlx5e_rep_priv *rpriv;
1293        struct mlx5_eswitch *esw;
1294
1295        esw = flow->priv->mdev->priv.eswitch;
1296        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1297        uplink_priv = &rpriv->uplink_priv;
1298
1299        mutex_lock(&uplink_priv->unready_flows_lock);
1300        unready_flow_del(flow);
1301        mutex_unlock(&uplink_priv->unready_flows_lock);
1302}
1303
1304static int
1305mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1306                      struct mlx5e_tc_flow *flow,
1307                      struct netlink_ext_ack *extack)
1308{
1309        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1310        struct net_device *out_dev, *encap_dev = NULL;
1311        struct mlx5e_tc_flow_parse_attr *parse_attr;
1312        struct mlx5_flow_attr *attr = flow->attr;
1313        struct mlx5_esw_flow_attr *esw_attr;
1314        struct mlx5_fc *counter = NULL;
1315        struct mlx5e_rep_priv *rpriv;
1316        struct mlx5e_priv *out_priv;
1317        bool encap_valid = true;
1318        u32 max_prio, max_chain;
1319        int err = 0;
1320        int out_index;
1321
1322        if (!mlx5_chains_prios_supported(esw_chains(esw)) && attr->prio != 1) {
1323                NL_SET_ERR_MSG_MOD(extack,
1324                                   "E-switch priorities unsupported, upgrade FW");
1325                return -EOPNOTSUPP;
1326        }
1327
1328        /* We check chain range only for tc flows.
1329         * For ft flows, we checked attr->chain was originally 0 and set it to
1330         * FDB_FT_CHAIN which is outside tc range.
1331         * See mlx5e_rep_setup_ft_cb().
1332         */
1333        max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1334        if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1335                NL_SET_ERR_MSG_MOD(extack,
1336                                   "Requested chain is out of supported range");
1337                return -EOPNOTSUPP;
1338        }
1339
1340        max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1341        if (attr->prio > max_prio) {
1342                NL_SET_ERR_MSG_MOD(extack,
1343                                   "Requested priority is out of supported range");
1344                return -EOPNOTSUPP;
1345        }
1346
1347        if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1348                err = mlx5e_attach_decap(priv, flow, extack);
1349                if (err)
1350                        return err;
1351        }
1352
1353        parse_attr = attr->parse_attr;
1354        esw_attr = attr->esw_attr;
1355
1356        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1357                int mirred_ifindex;
1358
1359                if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1360                        continue;
1361
1362                mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1363                out_dev = __dev_get_by_index(dev_net(priv->netdev),
1364                                             mirred_ifindex);
1365                err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1366                                         extack, &encap_dev, &encap_valid);
1367                if (err)
1368                        return err;
1369
1370                out_priv = netdev_priv(encap_dev);
1371                rpriv = out_priv->ppriv;
1372                esw_attr->dests[out_index].rep = rpriv->rep;
1373                esw_attr->dests[out_index].mdev = out_priv->mdev;
1374        }
1375
1376        err = mlx5_eswitch_add_vlan_action(esw, attr);
1377        if (err)
1378                return err;
1379
1380        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1381            !(attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR)) {
1382                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1383                dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
1384                if (err)
1385                        return err;
1386        }
1387
1388        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1389                counter = mlx5_fc_create(esw_attr->counter_dev, true);
1390                if (IS_ERR(counter))
1391                        return PTR_ERR(counter);
1392
1393                attr->counter = counter;
1394        }
1395
1396        /* we get here if one of the following takes place:
1397         * (1) there's no error
1398         * (2) there's an encap action and we don't have valid neigh
1399         */
1400        if (!encap_valid)
1401                flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1402        else
1403                flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1404
1405        if (IS_ERR(flow->rule[0]))
1406                return PTR_ERR(flow->rule[0]);
1407        else
1408                flow_flag_set(flow, OFFLOADED);
1409
1410        return 0;
1411}
1412
1413static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1414{
1415        struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1416        void *headers_v = MLX5_ADDR_OF(fte_match_param,
1417                                       spec->match_value,
1418                                       misc_parameters_3);
1419        u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1420                                             headers_v,
1421                                             geneve_tlv_option_0_data);
1422
1423        return !!geneve_tlv_opt_0_data;
1424}
1425
1426static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1427                                  struct mlx5e_tc_flow *flow)
1428{
1429        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1430        struct mlx5_flow_attr *attr = flow->attr;
1431        int out_index;
1432
1433        mlx5e_put_flow_tunnel_id(flow);
1434
1435        if (flow_flag_test(flow, NOT_READY))
1436                remove_unready_flow(flow);
1437
1438        if (mlx5e_is_offloaded_flow(flow)) {
1439                if (flow_flag_test(flow, SLOW))
1440                        mlx5e_tc_unoffload_from_slow_path(esw, flow);
1441                else
1442                        mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1443        }
1444
1445        if (mlx5_flow_has_geneve_opt(flow))
1446                mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1447
1448        mlx5_eswitch_del_vlan_action(esw, attr);
1449
1450        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1451                if (attr->esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
1452                        mlx5e_detach_encap(priv, flow, out_index);
1453                        kfree(attr->parse_attr->tun_info[out_index]);
1454                }
1455        kvfree(attr->parse_attr);
1456
1457        mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1458
1459        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1460                mlx5e_detach_mod_hdr(priv, flow);
1461
1462        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1463                mlx5_fc_destroy(attr->esw_attr->counter_dev, attr->counter);
1464
1465        if (flow_flag_test(flow, L3_TO_L2_DECAP))
1466                mlx5e_detach_decap(priv, flow);
1467
1468        kfree(flow->attr);
1469}
1470
1471void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1472                              struct mlx5e_encap_entry *e,
1473                              struct list_head *flow_list)
1474{
1475        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1476        struct mlx5_esw_flow_attr *esw_attr;
1477        struct mlx5_flow_handle *rule;
1478        struct mlx5_flow_attr *attr;
1479        struct mlx5_flow_spec *spec;
1480        struct mlx5e_tc_flow *flow;
1481        int err;
1482
1483        e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
1484                                                     e->reformat_type,
1485                                                     e->encap_size, e->encap_header,
1486                                                     MLX5_FLOW_NAMESPACE_FDB);
1487        if (IS_ERR(e->pkt_reformat)) {
1488                mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
1489                               PTR_ERR(e->pkt_reformat));
1490                return;
1491        }
1492        e->flags |= MLX5_ENCAP_ENTRY_VALID;
1493        mlx5e_rep_queue_neigh_stats_work(priv);
1494
1495        list_for_each_entry(flow, flow_list, tmp_list) {
1496                bool all_flow_encaps_valid = true;
1497                int i;
1498
1499                if (!mlx5e_is_offloaded_flow(flow))
1500                        continue;
1501                attr = flow->attr;
1502                esw_attr = attr->esw_attr;
1503                spec = &attr->parse_attr->spec;
1504
1505                esw_attr->dests[flow->tmp_efi_index].pkt_reformat = e->pkt_reformat;
1506                esw_attr->dests[flow->tmp_efi_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1507                /* Flow can be associated with multiple encap entries.
1508                 * Before offloading the flow verify that all of them have
1509                 * a valid neighbour.
1510                 */
1511                for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1512                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1513                                continue;
1514                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1515                                all_flow_encaps_valid = false;
1516                                break;
1517                        }
1518                }
1519                /* Do not offload flows with unresolved neighbors */
1520                if (!all_flow_encaps_valid)
1521                        continue;
1522                /* update from slow path rule to encap rule */
1523                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1524                if (IS_ERR(rule)) {
1525                        err = PTR_ERR(rule);
1526                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1527                                       err);
1528                        continue;
1529                }
1530
1531                mlx5e_tc_unoffload_from_slow_path(esw, flow);
1532                flow->rule[0] = rule;
1533                /* was unset when slow path rule removed */
1534                flow_flag_set(flow, OFFLOADED);
1535        }
1536}
1537
1538void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1539                              struct mlx5e_encap_entry *e,
1540                              struct list_head *flow_list)
1541{
1542        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1543        struct mlx5_esw_flow_attr *esw_attr;
1544        struct mlx5_flow_handle *rule;
1545        struct mlx5_flow_attr *attr;
1546        struct mlx5_flow_spec *spec;
1547        struct mlx5e_tc_flow *flow;
1548        int err;
1549
1550        list_for_each_entry(flow, flow_list, tmp_list) {
1551                if (!mlx5e_is_offloaded_flow(flow))
1552                        continue;
1553                attr = flow->attr;
1554                esw_attr = attr->esw_attr;
1555                spec = &attr->parse_attr->spec;
1556
1557                /* update from encap rule to slow path rule */
1558                rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1559                /* mark the flow's encap dest as non-valid */
1560                esw_attr->dests[flow->tmp_efi_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1561
1562                if (IS_ERR(rule)) {
1563                        err = PTR_ERR(rule);
1564                        mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1565                                       err);
1566                        continue;
1567                }
1568
1569                mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1570                flow->rule[0] = rule;
1571                /* was unset when fast path rule removed */
1572                flow_flag_set(flow, OFFLOADED);
1573        }
1574
1575        /* we know that the encap is valid */
1576        e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1577        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1578}
1579
1580static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1581{
1582        return flow->attr->counter;
1583}
1584
1585/* Takes reference to all flows attached to encap and adds the flows to
1586 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
1587 */
1588void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
1589{
1590        struct encap_flow_item *efi;
1591        struct mlx5e_tc_flow *flow;
1592
1593        list_for_each_entry(efi, &e->flows, list) {
1594                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1595                if (IS_ERR(mlx5e_flow_get(flow)))
1596                        continue;
1597                wait_for_completion(&flow->init_done);
1598
1599                flow->tmp_efi_index = efi->index;
1600                list_add(&flow->tmp_list, flow_list);
1601        }
1602}
1603
1604/* Iterate over tmp_list of flows attached to flow_list head. */
1605void mlx5e_put_encap_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1606{
1607        struct mlx5e_tc_flow *flow, *tmp;
1608
1609        list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1610                mlx5e_flow_put(priv, flow);
1611}
1612
1613static struct mlx5e_encap_entry *
1614mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
1615                           struct mlx5e_encap_entry *e)
1616{
1617        struct mlx5e_encap_entry *next = NULL;
1618
1619retry:
1620        rcu_read_lock();
1621
1622        /* find encap with non-zero reference counter value */
1623        for (next = e ?
1624                     list_next_or_null_rcu(&nhe->encap_list,
1625                                           &e->encap_list,
1626                                           struct mlx5e_encap_entry,
1627                                           encap_list) :
1628                     list_first_or_null_rcu(&nhe->encap_list,
1629                                            struct mlx5e_encap_entry,
1630                                            encap_list);
1631             next;
1632             next = list_next_or_null_rcu(&nhe->encap_list,
1633                                          &next->encap_list,
1634                                          struct mlx5e_encap_entry,
1635                                          encap_list))
1636                if (mlx5e_encap_take(next))
1637                        break;
1638
1639        rcu_read_unlock();
1640
1641        /* release starting encap */
1642        if (e)
1643                mlx5e_encap_put(netdev_priv(e->out_dev), e);
1644        if (!next)
1645                return next;
1646
1647        /* wait for encap to be fully initialized */
1648        wait_for_completion(&next->res_ready);
1649        /* continue searching if encap entry is not in valid state after completion */
1650        if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
1651                e = next;
1652                goto retry;
1653        }
1654
1655        return next;
1656}
1657
1658void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1659{
1660        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1661        struct mlx5e_encap_entry *e = NULL;
1662        struct mlx5e_tc_flow *flow;
1663        struct mlx5_fc *counter;
1664        struct neigh_table *tbl;
1665        bool neigh_used = false;
1666        struct neighbour *n;
1667        u64 lastuse;
1668
1669        if (m_neigh->family == AF_INET)
1670                tbl = &arp_tbl;
1671#if IS_ENABLED(CONFIG_IPV6)
1672        else if (m_neigh->family == AF_INET6)
1673                tbl = ipv6_stub->nd_tbl;
1674#endif
1675        else
1676                return;
1677
1678        /* mlx5e_get_next_valid_encap() releases previous encap before returning
1679         * next one.
1680         */
1681        while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
1682                struct mlx5e_priv *priv = netdev_priv(e->out_dev);
1683                struct encap_flow_item *efi, *tmp;
1684                struct mlx5_eswitch *esw;
1685                LIST_HEAD(flow_list);
1686
1687                esw = priv->mdev->priv.eswitch;
1688                mutex_lock(&esw->offloads.encap_tbl_lock);
1689                list_for_each_entry_safe(efi, tmp, &e->flows, list) {
1690                        flow = container_of(efi, struct mlx5e_tc_flow,
1691                                            encaps[efi->index]);
1692                        if (IS_ERR(mlx5e_flow_get(flow)))
1693                                continue;
1694                        list_add(&flow->tmp_list, &flow_list);
1695
1696                        if (mlx5e_is_offloaded_flow(flow)) {
1697                                counter = mlx5e_tc_get_counter(flow);
1698                                lastuse = mlx5_fc_query_lastuse(counter);
1699                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1700                                        neigh_used = true;
1701                                        break;
1702                                }
1703                        }
1704                }
1705                mutex_unlock(&esw->offloads.encap_tbl_lock);
1706
1707                mlx5e_put_encap_flow_list(priv, &flow_list);
1708                if (neigh_used) {
1709                        /* release current encap before breaking the loop */
1710                        mlx5e_encap_put(priv, e);
1711                        break;
1712                }
1713        }
1714
1715        trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
1716
1717        if (neigh_used) {
1718                nhe->reported_lastuse = jiffies;
1719
1720                /* find the relevant neigh according to the cached device and
1721                 * dst ip pair
1722                 */
1723                n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1724                if (!n)
1725                        return;
1726
1727                neigh_event_send(n, NULL);
1728                neigh_release(n);
1729        }
1730}
1731
1732static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1733{
1734        WARN_ON(!list_empty(&e->flows));
1735
1736        if (e->compl_result > 0) {
1737                mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1738
1739                if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1740                        mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1741        }
1742
1743        kfree(e->tun_info);
1744        kfree(e->encap_header);
1745        kfree_rcu(e, rcu);
1746}
1747
1748static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
1749                                struct mlx5e_decap_entry *d)
1750{
1751        WARN_ON(!list_empty(&d->flows));
1752
1753        if (!d->compl_result)
1754                mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
1755
1756        kfree_rcu(d, rcu);
1757}
1758
1759void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
1760{
1761        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1762
1763        if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
1764                return;
1765        hash_del_rcu(&e->encap_hlist);
1766        mutex_unlock(&esw->offloads.encap_tbl_lock);
1767
1768        mlx5e_encap_dealloc(priv, e);
1769}
1770
1771static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
1772{
1773        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1774
1775        if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
1776                return;
1777        hash_del_rcu(&d->hlist);
1778        mutex_unlock(&esw->offloads.decap_tbl_lock);
1779
1780        mlx5e_decap_dealloc(priv, d);
1781}
1782
1783static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1784                               struct mlx5e_tc_flow *flow, int out_index)
1785{
1786        struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
1787        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1788
1789        /* flow wasn't fully initialized */
1790        if (!e)
1791                return;
1792
1793        mutex_lock(&esw->offloads.encap_tbl_lock);
1794        list_del(&flow->encaps[out_index].list);
1795        flow->encaps[out_index].e = NULL;
1796        if (!refcount_dec_and_test(&e->refcnt)) {
1797                mutex_unlock(&esw->offloads.encap_tbl_lock);
1798                return;
1799        }
1800        hash_del_rcu(&e->encap_hlist);
1801        mutex_unlock(&esw->offloads.encap_tbl_lock);
1802
1803        mlx5e_encap_dealloc(priv, e);
1804}
1805
1806static void mlx5e_detach_decap(struct mlx5e_priv *priv,
1807                               struct mlx5e_tc_flow *flow)
1808{
1809        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1810        struct mlx5e_decap_entry *d = flow->decap_reformat;
1811
1812        if (!d)
1813                return;
1814
1815        mutex_lock(&esw->offloads.decap_tbl_lock);
1816        list_del(&flow->l3_to_l2_reformat);
1817        flow->decap_reformat = NULL;
1818
1819        if (!refcount_dec_and_test(&d->refcnt)) {
1820                mutex_unlock(&esw->offloads.decap_tbl_lock);
1821                return;
1822        }
1823        hash_del_rcu(&d->hlist);
1824        mutex_unlock(&esw->offloads.decap_tbl_lock);
1825
1826        mlx5e_decap_dealloc(priv, d);
1827}
1828
1829static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1830{
1831        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1832
1833        if (!flow_flag_test(flow, ESWITCH) ||
1834            !flow_flag_test(flow, DUP))
1835                return;
1836
1837        mutex_lock(&esw->offloads.peer_mutex);
1838        list_del(&flow->peer);
1839        mutex_unlock(&esw->offloads.peer_mutex);
1840
1841        flow_flag_clear(flow, DUP);
1842
1843        if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1844                mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1845                kfree(flow->peer_flow);
1846        }
1847
1848        flow->peer_flow = NULL;
1849}
1850
1851static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1852{
1853        struct mlx5_core_dev *dev = flow->priv->mdev;
1854        struct mlx5_devcom *devcom = dev->priv.devcom;
1855        struct mlx5_eswitch *peer_esw;
1856
1857        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1858        if (!peer_esw)
1859                return;
1860
1861        __mlx5e_tc_del_fdb_peer_flow(flow);
1862        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1863}
1864
1865static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1866                              struct mlx5e_tc_flow *flow)
1867{
1868        if (mlx5e_is_eswitch_flow(flow)) {
1869                mlx5e_tc_del_fdb_peer_flow(flow);
1870                mlx5e_tc_del_fdb_flow(priv, flow);
1871        } else {
1872                mlx5e_tc_del_nic_flow(priv, flow);
1873        }
1874}
1875
1876static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
1877{
1878        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1879        struct flow_action *flow_action = &rule->action;
1880        const struct flow_action_entry *act;
1881        int i;
1882
1883        flow_action_for_each(i, act, flow_action) {
1884                switch (act->id) {
1885                case FLOW_ACTION_GOTO:
1886                        return true;
1887                default:
1888                        continue;
1889                }
1890        }
1891
1892        return false;
1893}
1894
1895static int
1896enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1897                                    struct flow_dissector_key_enc_opts *opts,
1898                                    struct netlink_ext_ack *extack,
1899                                    bool *dont_care)
1900{
1901        struct geneve_opt *opt;
1902        int off = 0;
1903
1904        *dont_care = true;
1905
1906        while (opts->len > off) {
1907                opt = (struct geneve_opt *)&opts->data[off];
1908
1909                if (!(*dont_care) || opt->opt_class || opt->type ||
1910                    memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1911                        *dont_care = false;
1912
1913                        if (opt->opt_class != htons(U16_MAX) ||
1914                            opt->type != U8_MAX) {
1915                                NL_SET_ERR_MSG(extack,
1916                                               "Partial match of tunnel options in chain > 0 isn't supported");
1917                                netdev_warn(priv->netdev,
1918                                            "Partial match of tunnel options in chain > 0 isn't supported");
1919                                return -EOPNOTSUPP;
1920                        }
1921                }
1922
1923                off += sizeof(struct geneve_opt) + opt->length * 4;
1924        }
1925
1926        return 0;
1927}
1928
1929#define COPY_DISSECTOR(rule, diss_key, dst)\
1930({ \
1931        struct flow_rule *__rule = (rule);\
1932        typeof(dst) __dst = dst;\
1933\
1934        memcpy(__dst,\
1935               skb_flow_dissector_target(__rule->match.dissector,\
1936                                         diss_key,\
1937                                         __rule->match.key),\
1938               sizeof(*__dst));\
1939})
1940
1941static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1942                                    struct mlx5e_tc_flow *flow,
1943                                    struct flow_cls_offload *f,
1944                                    struct net_device *filter_dev)
1945{
1946        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1947        struct netlink_ext_ack *extack = f->common.extack;
1948        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1949        struct flow_match_enc_opts enc_opts_match;
1950        struct tunnel_match_enc_opts tun_enc_opts;
1951        struct mlx5_rep_uplink_priv *uplink_priv;
1952        struct mlx5_flow_attr *attr = flow->attr;
1953        struct mlx5e_rep_priv *uplink_rpriv;
1954        struct tunnel_match_key tunnel_key;
1955        bool enc_opts_is_dont_care = true;
1956        u32 tun_id, enc_opts_id = 0;
1957        struct mlx5_eswitch *esw;
1958        u32 value, mask;
1959        int err;
1960
1961        esw = priv->mdev->priv.eswitch;
1962        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1963        uplink_priv = &uplink_rpriv->uplink_priv;
1964
1965        memset(&tunnel_key, 0, sizeof(tunnel_key));
1966        COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1967                       &tunnel_key.enc_control);
1968        if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1969                COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1970                               &tunnel_key.enc_ipv4);
1971        else
1972                COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1973                               &tunnel_key.enc_ipv6);
1974        COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1975        COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1976                       &tunnel_key.enc_tp);
1977        COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1978                       &tunnel_key.enc_key_id);
1979        tunnel_key.filter_ifindex = filter_dev->ifindex;
1980
1981        err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1982        if (err)
1983                return err;
1984
1985        flow_rule_match_enc_opts(rule, &enc_opts_match);
1986        err = enc_opts_is_dont_care_or_full_match(priv,
1987                                                  enc_opts_match.mask,
1988                                                  extack,
1989                                                  &enc_opts_is_dont_care);
1990        if (err)
1991                goto err_enc_opts;
1992
1993        if (!enc_opts_is_dont_care) {
1994                memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1995                memcpy(&tun_enc_opts.key, enc_opts_match.key,
1996                       sizeof(*enc_opts_match.key));
1997                memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1998                       sizeof(*enc_opts_match.mask));
1999
2000                err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2001                                  &tun_enc_opts, &enc_opts_id);
2002                if (err)
2003                        goto err_enc_opts;
2004        }
2005
2006        value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2007        mask = enc_opts_id ? TUNNEL_ID_MASK :
2008                             (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2009
2010        if (attr->chain) {
2011                mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2012                                            TUNNEL_TO_REG, value, mask);
2013        } else {
2014                mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2015                err = mlx5e_tc_match_to_reg_set(priv->mdev,
2016                                                mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2017                                                TUNNEL_TO_REG, value);
2018                if (err)
2019                        goto err_set;
2020
2021                attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2022        }
2023
2024        flow->tunnel_id = value;
2025        return 0;
2026
2027err_set:
2028        if (enc_opts_id)
2029                mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2030                               enc_opts_id);
2031err_enc_opts:
2032        mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2033        return err;
2034}
2035
2036static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2037{
2038        u32 enc_opts_id = flow->tunnel_id & ENC_OPTS_BITS_MASK;
2039        u32 tun_id = flow->tunnel_id >> ENC_OPTS_BITS;
2040        struct mlx5_rep_uplink_priv *uplink_priv;
2041        struct mlx5e_rep_priv *uplink_rpriv;
2042        struct mlx5_eswitch *esw;
2043
2044        esw = flow->priv->mdev->priv.eswitch;
2045        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2046        uplink_priv = &uplink_rpriv->uplink_priv;
2047
2048        if (tun_id)
2049                mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2050        if (enc_opts_id)
2051                mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2052                               enc_opts_id);
2053}
2054
2055u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
2056{
2057        return flow->tunnel_id;
2058}
2059
2060void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2061                            struct flow_match_basic *match, bool outer,
2062                            void *headers_c, void *headers_v)
2063{
2064        bool ip_version_cap;
2065
2066        ip_version_cap = outer ?
2067                MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2068                                          ft_field_support.outer_ip_version) :
2069                MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2070                                          ft_field_support.inner_ip_version);
2071
2072        if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2073            (match->key->n_proto == htons(ETH_P_IP) ||
2074             match->key->n_proto == htons(ETH_P_IPV6))) {
2075                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2076                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2077                         match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2078        } else {
2079                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2080                         ntohs(match->mask->n_proto));
2081                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2082                         ntohs(match->key->n_proto));
2083        }
2084}
2085
2086static int parse_tunnel_attr(struct mlx5e_priv *priv,
2087                             struct mlx5e_tc_flow *flow,
2088                             struct mlx5_flow_spec *spec,
2089                             struct flow_cls_offload *f,
2090                             struct net_device *filter_dev,
2091                             u8 *match_level,
2092                             bool *match_inner)
2093{
2094        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2095        struct netlink_ext_ack *extack = f->common.extack;
2096        bool needs_mapping, sets_mapping;
2097        int err;
2098
2099        if (!mlx5e_is_eswitch_flow(flow))
2100                return -EOPNOTSUPP;
2101
2102        needs_mapping = !!flow->attr->chain;
2103        sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
2104        *match_inner = !needs_mapping;
2105
2106        if ((needs_mapping || sets_mapping) &&
2107            !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2108                NL_SET_ERR_MSG(extack,
2109                               "Chains on tunnel devices isn't supported without register loopback support");
2110                netdev_warn(priv->netdev,
2111                            "Chains on tunnel devices isn't supported without register loopback support");
2112                return -EOPNOTSUPP;
2113        }
2114
2115        if (!flow->attr->chain) {
2116                err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2117                                         match_level);
2118                if (err) {
2119                        NL_SET_ERR_MSG_MOD(extack,
2120                                           "Failed to parse tunnel attributes");
2121                        netdev_warn(priv->netdev,
2122                                    "Failed to parse tunnel attributes");
2123                        return err;
2124                }
2125
2126                /* With mpls over udp we decapsulate using packet reformat
2127                 * object
2128                 */
2129                if (!netif_is_bareudp(filter_dev))
2130                        flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2131        }
2132
2133        if (!needs_mapping && !sets_mapping)
2134                return 0;
2135
2136        return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2137}
2138
2139static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2140{
2141        return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2142                            inner_headers);
2143}
2144
2145static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2146{
2147        return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2148                            inner_headers);
2149}
2150
2151static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2152{
2153        return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2154                            outer_headers);
2155}
2156
2157static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2158{
2159        return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2160                            outer_headers);
2161}
2162
2163static void *get_match_headers_value(u32 flags,
2164                                     struct mlx5_flow_spec *spec)
2165{
2166        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2167                get_match_inner_headers_value(spec) :
2168                get_match_outer_headers_value(spec);
2169}
2170
2171static void *get_match_headers_criteria(u32 flags,
2172                                        struct mlx5_flow_spec *spec)
2173{
2174        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2175                get_match_inner_headers_criteria(spec) :
2176                get_match_outer_headers_criteria(spec);
2177}
2178
2179static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2180                                   struct flow_cls_offload *f)
2181{
2182        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2183        struct netlink_ext_ack *extack = f->common.extack;
2184        struct net_device *ingress_dev;
2185        struct flow_match_meta match;
2186
2187        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2188                return 0;
2189
2190        flow_rule_match_meta(rule, &match);
2191        if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2192                NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2193                return -EOPNOTSUPP;
2194        }
2195
2196        ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2197                                         match.key->ingress_ifindex);
2198        if (!ingress_dev) {
2199                NL_SET_ERR_MSG_MOD(extack,
2200                                   "Can't find the ingress port to match on");
2201                return -ENOENT;
2202        }
2203
2204        if (ingress_dev != filter_dev) {
2205                NL_SET_ERR_MSG_MOD(extack,
2206                                   "Can't match on the ingress filter port");
2207                return -EOPNOTSUPP;
2208        }
2209
2210        return 0;
2211}
2212
2213static bool skip_key_basic(struct net_device *filter_dev,
2214                           struct flow_cls_offload *f)
2215{
2216        /* When doing mpls over udp decap, the user needs to provide
2217         * MPLS_UC as the protocol in order to be able to match on mpls
2218         * label fields.  However, the actual ethertype is IP so we want to
2219         * avoid matching on this, otherwise we'll fail the match.
2220         */
2221        if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2222                return true;
2223
2224        return false;
2225}
2226
2227static int __parse_cls_flower(struct mlx5e_priv *priv,
2228                              struct mlx5e_tc_flow *flow,
2229                              struct mlx5_flow_spec *spec,
2230                              struct flow_cls_offload *f,
2231                              struct net_device *filter_dev,
2232                              u8 *inner_match_level, u8 *outer_match_level)
2233{
2234        struct netlink_ext_ack *extack = f->common.extack;
2235        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2236                                       outer_headers);
2237        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2238                                       outer_headers);
2239        void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2240                                    misc_parameters);
2241        void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2242                                    misc_parameters);
2243        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2244        struct flow_dissector *dissector = rule->match.dissector;
2245        u16 addr_type = 0;
2246        u8 ip_proto = 0;
2247        u8 *match_level;
2248        int err;
2249
2250        match_level = outer_match_level;
2251
2252        if (dissector->used_keys &
2253            ~(BIT(FLOW_DISSECTOR_KEY_META) |
2254              BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2255              BIT(FLOW_DISSECTOR_KEY_BASIC) |
2256              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2257              BIT(FLOW_DISSECTOR_KEY_VLAN) |
2258              BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2259              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2260              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2261              BIT(FLOW_DISSECTOR_KEY_PORTS) |
2262              BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2263              BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2264              BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2265              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2266              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2267              BIT(FLOW_DISSECTOR_KEY_TCP) |
2268              BIT(FLOW_DISSECTOR_KEY_IP)  |
2269              BIT(FLOW_DISSECTOR_KEY_CT) |
2270              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2271              BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2272              BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2273                NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2274                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
2275                            dissector->used_keys);
2276                return -EOPNOTSUPP;
2277        }
2278
2279        if (mlx5e_get_tc_tun(filter_dev)) {
2280                bool match_inner = false;
2281
2282                err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2283                                        outer_match_level, &match_inner);
2284                if (err)
2285                        return err;
2286
2287                if (match_inner) {
2288                        /* header pointers should point to the inner headers
2289                         * if the packet was decapsulated already.
2290                         * outer headers are set by parse_tunnel_attr.
2291                         */
2292                        match_level = inner_match_level;
2293                        headers_c = get_match_inner_headers_criteria(spec);
2294                        headers_v = get_match_inner_headers_value(spec);
2295                }
2296        }
2297
2298        err = mlx5e_flower_parse_meta(filter_dev, f);
2299        if (err)
2300                return err;
2301
2302        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2303            !skip_key_basic(filter_dev, f)) {
2304                struct flow_match_basic match;
2305
2306                flow_rule_match_basic(rule, &match);
2307                mlx5e_tc_set_ethertype(priv->mdev, &match,
2308                                       match_level == outer_match_level,
2309                                       headers_c, headers_v);
2310
2311                if (match.mask->n_proto)
2312                        *match_level = MLX5_MATCH_L2;
2313        }
2314        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2315            is_vlan_dev(filter_dev)) {
2316                struct flow_dissector_key_vlan filter_dev_mask;
2317                struct flow_dissector_key_vlan filter_dev_key;
2318                struct flow_match_vlan match;
2319
2320                if (is_vlan_dev(filter_dev)) {
2321                        match.key = &filter_dev_key;
2322                        match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2323                        match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2324                        match.key->vlan_priority = 0;
2325                        match.mask = &filter_dev_mask;
2326                        memset(match.mask, 0xff, sizeof(*match.mask));
2327                        match.mask->vlan_priority = 0;
2328                } else {
2329                        flow_rule_match_vlan(rule, &match);
2330                }
2331                if (match.mask->vlan_id ||
2332                    match.mask->vlan_priority ||
2333                    match.mask->vlan_tpid) {
2334                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2335                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2336                                         svlan_tag, 1);
2337                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2338                                         svlan_tag, 1);
2339                        } else {
2340                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2341                                         cvlan_tag, 1);
2342                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2343                                         cvlan_tag, 1);
2344                        }
2345
2346                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2347                                 match.mask->vlan_id);
2348                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2349                                 match.key->vlan_id);
2350
2351                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2352                                 match.mask->vlan_priority);
2353                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2354                                 match.key->vlan_priority);
2355
2356                        *match_level = MLX5_MATCH_L2;
2357                }
2358        } else if (*match_level != MLX5_MATCH_NONE) {
2359                /* cvlan_tag enabled in match criteria and
2360                 * disabled in match value means both S & C tags
2361                 * don't exist (untagged of both)
2362                 */
2363                MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2364                *match_level = MLX5_MATCH_L2;
2365        }
2366
2367        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2368                struct flow_match_vlan match;
2369
2370                flow_rule_match_cvlan(rule, &match);
2371                if (match.mask->vlan_id ||
2372                    match.mask->vlan_priority ||
2373                    match.mask->vlan_tpid) {
2374                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2375                                MLX5_SET(fte_match_set_misc, misc_c,
2376                                         outer_second_svlan_tag, 1);
2377                                MLX5_SET(fte_match_set_misc, misc_v,
2378                                         outer_second_svlan_tag, 1);
2379                        } else {
2380                                MLX5_SET(fte_match_set_misc, misc_c,
2381                                         outer_second_cvlan_tag, 1);
2382                                MLX5_SET(fte_match_set_misc, misc_v,
2383                                         outer_second_cvlan_tag, 1);
2384                        }
2385
2386                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2387                                 match.mask->vlan_id);
2388                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2389                                 match.key->vlan_id);
2390                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2391                                 match.mask->vlan_priority);
2392                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2393                                 match.key->vlan_priority);
2394
2395                        *match_level = MLX5_MATCH_L2;
2396                        spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2397                }
2398        }
2399
2400        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2401                struct flow_match_eth_addrs match;
2402
2403                flow_rule_match_eth_addrs(rule, &match);
2404                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2405                                             dmac_47_16),
2406                                match.mask->dst);
2407                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2408                                             dmac_47_16),
2409                                match.key->dst);
2410
2411                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2412                                             smac_47_16),
2413                                match.mask->src);
2414                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2415                                             smac_47_16),
2416                                match.key->src);
2417
2418                if (!is_zero_ether_addr(match.mask->src) ||
2419                    !is_zero_ether_addr(match.mask->dst))
2420                        *match_level = MLX5_MATCH_L2;
2421        }
2422
2423        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2424                struct flow_match_control match;
2425
2426                flow_rule_match_control(rule, &match);
2427                addr_type = match.key->addr_type;
2428
2429                /* the HW doesn't support frag first/later */
2430                if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
2431                        return -EOPNOTSUPP;
2432
2433                if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2434                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2435                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2436                                 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2437
2438                        /* the HW doesn't need L3 inline to match on frag=no */
2439                        if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2440                                *match_level = MLX5_MATCH_L2;
2441        /* ***  L2 attributes parsing up to here *** */
2442                        else
2443                                *match_level = MLX5_MATCH_L3;
2444                }
2445        }
2446
2447        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2448                struct flow_match_basic match;
2449
2450                flow_rule_match_basic(rule, &match);
2451                ip_proto = match.key->ip_proto;
2452
2453                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2454                         match.mask->ip_proto);
2455                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2456                         match.key->ip_proto);
2457
2458                if (match.mask->ip_proto)
2459                        *match_level = MLX5_MATCH_L3;
2460        }
2461
2462        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2463                struct flow_match_ipv4_addrs match;
2464
2465                flow_rule_match_ipv4_addrs(rule, &match);
2466                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2467                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2468                       &match.mask->src, sizeof(match.mask->src));
2469                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2470                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
2471                       &match.key->src, sizeof(match.key->src));
2472                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2473                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2474                       &match.mask->dst, sizeof(match.mask->dst));
2475                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2476                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2477                       &match.key->dst, sizeof(match.key->dst));
2478
2479                if (match.mask->src || match.mask->dst)
2480                        *match_level = MLX5_MATCH_L3;
2481        }
2482
2483        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2484                struct flow_match_ipv6_addrs match;
2485
2486                flow_rule_match_ipv6_addrs(rule, &match);
2487                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2488                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2489                       &match.mask->src, sizeof(match.mask->src));
2490                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2491                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
2492                       &match.key->src, sizeof(match.key->src));
2493
2494                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2495                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2496                       &match.mask->dst, sizeof(match.mask->dst));
2497                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2498                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2499                       &match.key->dst, sizeof(match.key->dst));
2500
2501                if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2502                    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2503                        *match_level = MLX5_MATCH_L3;
2504        }
2505
2506        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2507                struct flow_match_ip match;
2508
2509                flow_rule_match_ip(rule, &match);
2510                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2511                         match.mask->tos & 0x3);
2512                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2513                         match.key->tos & 0x3);
2514
2515                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2516                         match.mask->tos >> 2);
2517                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2518                         match.key->tos  >> 2);
2519
2520                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2521                         match.mask->ttl);
2522                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2523                         match.key->ttl);
2524
2525                if (match.mask->ttl &&
2526                    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2527                                                ft_field_support.outer_ipv4_ttl)) {
2528                        NL_SET_ERR_MSG_MOD(extack,
2529                                           "Matching on TTL is not supported");
2530                        return -EOPNOTSUPP;
2531                }
2532
2533                if (match.mask->tos || match.mask->ttl)
2534                        *match_level = MLX5_MATCH_L3;
2535        }
2536
2537        /* ***  L3 attributes parsing up to here *** */
2538
2539        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2540                struct flow_match_ports match;
2541
2542                flow_rule_match_ports(rule, &match);
2543                switch (ip_proto) {
2544                case IPPROTO_TCP:
2545                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2546                                 tcp_sport, ntohs(match.mask->src));
2547                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2548                                 tcp_sport, ntohs(match.key->src));
2549
2550                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2551                                 tcp_dport, ntohs(match.mask->dst));
2552                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2553                                 tcp_dport, ntohs(match.key->dst));
2554                        break;
2555
2556                case IPPROTO_UDP:
2557                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2558                                 udp_sport, ntohs(match.mask->src));
2559                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2560                                 udp_sport, ntohs(match.key->src));
2561
2562                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2563                                 udp_dport, ntohs(match.mask->dst));
2564                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2565                                 udp_dport, ntohs(match.key->dst));
2566                        break;
2567                default:
2568                        NL_SET_ERR_MSG_MOD(extack,
2569                                           "Only UDP and TCP transports are supported for L4 matching");
2570                        netdev_err(priv->netdev,
2571                                   "Only UDP and TCP transport are supported\n");
2572                        return -EINVAL;
2573                }
2574
2575                if (match.mask->src || match.mask->dst)
2576                        *match_level = MLX5_MATCH_L4;
2577        }
2578
2579        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2580                struct flow_match_tcp match;
2581
2582                flow_rule_match_tcp(rule, &match);
2583                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2584                         ntohs(match.mask->flags));
2585                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2586                         ntohs(match.key->flags));
2587
2588                if (match.mask->flags)
2589                        *match_level = MLX5_MATCH_L4;
2590        }
2591
2592        return 0;
2593}
2594
2595static int parse_cls_flower(struct mlx5e_priv *priv,
2596                            struct mlx5e_tc_flow *flow,
2597                            struct mlx5_flow_spec *spec,
2598                            struct flow_cls_offload *f,
2599                            struct net_device *filter_dev)
2600{
2601        u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2602        struct netlink_ext_ack *extack = f->common.extack;
2603        struct mlx5_core_dev *dev = priv->mdev;
2604        struct mlx5_eswitch *esw = dev->priv.eswitch;
2605        struct mlx5e_rep_priv *rpriv = priv->ppriv;
2606        struct mlx5_eswitch_rep *rep;
2607        bool is_eswitch_flow;
2608        int err;
2609
2610        inner_match_level = MLX5_MATCH_NONE;
2611        outer_match_level = MLX5_MATCH_NONE;
2612
2613        err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2614                                 &inner_match_level, &outer_match_level);
2615        non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2616                                 outer_match_level : inner_match_level;
2617
2618        is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2619        if (!err && is_eswitch_flow) {
2620                rep = rpriv->rep;
2621                if (rep->vport != MLX5_VPORT_UPLINK &&
2622                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2623                    esw->offloads.inline_mode < non_tunnel_match_level)) {
2624                        NL_SET_ERR_MSG_MOD(extack,
2625                                           "Flow is not offloaded due to min inline setting");
2626                        netdev_warn(priv->netdev,
2627                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2628                                    non_tunnel_match_level, esw->offloads.inline_mode);
2629                        return -EOPNOTSUPP;
2630                }
2631        }
2632
2633        flow->attr->inner_match_level = inner_match_level;
2634        flow->attr->outer_match_level = outer_match_level;
2635
2636
2637        return err;
2638}
2639
2640struct pedit_headers {
2641        struct ethhdr  eth;
2642        struct vlan_hdr vlan;
2643        struct iphdr   ip4;
2644        struct ipv6hdr ip6;
2645        struct tcphdr  tcp;
2646        struct udphdr  udp;
2647};
2648
2649struct pedit_headers_action {
2650        struct pedit_headers    vals;
2651        struct pedit_headers    masks;
2652        u32                     pedits;
2653};
2654
2655static int pedit_header_offsets[] = {
2656        [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
2657        [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
2658        [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
2659        [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
2660        [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
2661};
2662
2663#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
2664
2665static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
2666                         struct pedit_headers_action *hdrs)
2667{
2668        u32 *curr_pmask, *curr_pval;
2669
2670        curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
2671        curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
2672
2673        if (*curr_pmask & mask)  /* disallow acting twice on the same location */
2674                goto out_err;
2675
2676        *curr_pmask |= mask;
2677        *curr_pval  |= (val & mask);
2678
2679        return 0;
2680
2681out_err:
2682        return -EOPNOTSUPP;
2683}
2684
2685struct mlx5_fields {
2686        u8  field;
2687        u8  field_bsize;
2688        u32 field_mask;
2689        u32 offset;
2690        u32 match_offset;
2691};
2692
2693#define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2694                {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2695                 offsetof(struct pedit_headers, field) + (off), \
2696                 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2697
2698/* masked values are the same and there are no rewrites that do not have a
2699 * match.
2700 */
2701#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2702        type matchmaskx = *(type *)(matchmaskp); \
2703        type matchvalx = *(type *)(matchvalp); \
2704        type maskx = *(type *)(maskp); \
2705        type valx = *(type *)(valp); \
2706        \
2707        (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2708                                                                 matchmaskx)); \
2709})
2710
2711static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2712                         void *matchmaskp, u8 bsize)
2713{
2714        bool same = false;
2715
2716        switch (bsize) {
2717        case 8:
2718                same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2719                break;
2720        case 16:
2721                same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2722                break;
2723        case 32:
2724                same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2725                break;
2726        }
2727
2728        return same;
2729}
2730
2731static struct mlx5_fields fields[] = {
2732        OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2733        OFFLOAD(DMAC_15_0,  16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2734        OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2735        OFFLOAD(SMAC_15_0,  16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2736        OFFLOAD(ETHERTYPE,  16, U16_MAX, eth.h_proto, 0, ethertype),
2737        OFFLOAD(FIRST_VID,  16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2738
2739        OFFLOAD(IP_DSCP, 8,    0xfc, ip4.tos,   0, ip_dscp),
2740        OFFLOAD(IP_TTL,  8,  U8_MAX, ip4.ttl,   0, ttl_hoplimit),
2741        OFFLOAD(SIPV4,  32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2742        OFFLOAD(DIPV4,  32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2743
2744        OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2745                src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2746        OFFLOAD(SIPV6_95_64,  32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2747                src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2748        OFFLOAD(SIPV6_63_32,  32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2749                src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2750        OFFLOAD(SIPV6_31_0,   32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2751                src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2752        OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2753                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2754        OFFLOAD(DIPV6_95_64,  32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2755                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2756        OFFLOAD(DIPV6_63_32,  32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2757                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2758        OFFLOAD(DIPV6_31_0,   32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2759                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2760        OFFLOAD(IPV6_HOPLIMIT, 8,  U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2761        OFFLOAD(IP_DSCP, 16,  0xc00f, ip6, 0, ip_dscp),
2762
2763        OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source,  0, tcp_sport),
2764        OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest,    0, tcp_dport),
2765        /* in linux iphdr tcp_flags is 8 bits long */
2766        OFFLOAD(TCP_FLAGS,  8,  U8_MAX, tcp.ack_seq, 5, tcp_flags),
2767
2768        OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2769        OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest,   0, udp_dport),
2770};
2771
2772static unsigned long mask_to_le(unsigned long mask, int size)
2773{
2774        __be32 mask_be32;
2775        __be16 mask_be16;
2776
2777        if (size == 32) {
2778                mask_be32 = (__force __be32)(mask);
2779                mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2780        } else if (size == 16) {
2781                mask_be32 = (__force __be32)(mask);
2782                mask_be16 = *(__be16 *)&mask_be32;
2783                mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2784        }
2785
2786        return mask;
2787}
2788static int offload_pedit_fields(struct mlx5e_priv *priv,
2789                                int namespace,
2790                                struct pedit_headers_action *hdrs,
2791                                struct mlx5e_tc_flow_parse_attr *parse_attr,
2792                                u32 *action_flags,
2793                                struct netlink_ext_ack *extack)
2794{
2795        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2796        int i, action_size, first, last, next_z;
2797        void *headers_c, *headers_v, *action, *vals_p;
2798        u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2799        struct mlx5e_tc_mod_hdr_acts *mod_acts;
2800        struct mlx5_fields *f;
2801        unsigned long mask, field_mask;
2802        int err;
2803        u8 cmd;
2804
2805        mod_acts = &parse_attr->mod_hdr_acts;
2806        headers_c = get_match_headers_criteria(*action_flags, &parse_attr->spec);
2807        headers_v = get_match_headers_value(*action_flags, &parse_attr->spec);
2808
2809        set_masks = &hdrs[0].masks;
2810        add_masks = &hdrs[1].masks;
2811        set_vals = &hdrs[0].vals;
2812        add_vals = &hdrs[1].vals;
2813
2814        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2815
2816        for (i = 0; i < ARRAY_SIZE(fields); i++) {
2817                bool skip;
2818
2819                f = &fields[i];
2820                /* avoid seeing bits set from previous iterations */
2821                s_mask = 0;
2822                a_mask = 0;
2823
2824                s_masks_p = (void *)set_masks + f->offset;
2825                a_masks_p = (void *)add_masks + f->offset;
2826
2827                s_mask = *s_masks_p & f->field_mask;
2828                a_mask = *a_masks_p & f->field_mask;
2829
2830                if (!s_mask && !a_mask) /* nothing to offload here */
2831                        continue;
2832
2833                if (s_mask && a_mask) {
2834                        NL_SET_ERR_MSG_MOD(extack,
2835                                           "can't set and add to the same HW field");
2836                        printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2837                        return -EOPNOTSUPP;
2838                }
2839
2840                skip = false;
2841                if (s_mask) {
2842                        void *match_mask = headers_c + f->match_offset;
2843                        void *match_val = headers_v + f->match_offset;
2844
2845                        cmd  = MLX5_ACTION_TYPE_SET;
2846                        mask = s_mask;
2847                        vals_p = (void *)set_vals + f->offset;
2848                        /* don't rewrite if we have a match on the same value */
2849                        if (cmp_val_mask(vals_p, s_masks_p, match_val,
2850                                         match_mask, f->field_bsize))
2851                                skip = true;
2852                        /* clear to denote we consumed this field */
2853                        *s_masks_p &= ~f->field_mask;
2854                } else {
2855                        cmd  = MLX5_ACTION_TYPE_ADD;
2856                        mask = a_mask;
2857                        vals_p = (void *)add_vals + f->offset;
2858                        /* add 0 is no change */
2859                        if ((*(u32 *)vals_p & f->field_mask) == 0)
2860                                skip = true;
2861                        /* clear to denote we consumed this field */
2862                        *a_masks_p &= ~f->field_mask;
2863                }
2864                if (skip)
2865                        continue;
2866
2867                mask = mask_to_le(mask, f->field_bsize);
2868
2869                first = find_first_bit(&mask, f->field_bsize);
2870                next_z = find_next_zero_bit(&mask, f->field_bsize, first);
2871                last  = find_last_bit(&mask, f->field_bsize);
2872                if (first < next_z && next_z < last) {
2873                        NL_SET_ERR_MSG_MOD(extack,
2874                                           "rewrite of few sub-fields isn't supported");
2875                        printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2876                               mask);
2877                        return -EOPNOTSUPP;
2878                }
2879
2880                err = alloc_mod_hdr_actions(priv->mdev, namespace, mod_acts);
2881                if (err) {
2882                        NL_SET_ERR_MSG_MOD(extack,
2883                                           "too many pedit actions, can't offload");
2884                        mlx5_core_warn(priv->mdev,
2885                                       "mlx5: parsed %d pedit actions, can't do more\n",
2886                                       mod_acts->num_actions);
2887                        return err;
2888                }
2889
2890                action = mod_acts->actions +
2891                         (mod_acts->num_actions * action_size);
2892                MLX5_SET(set_action_in, action, action_type, cmd);
2893                MLX5_SET(set_action_in, action, field, f->field);
2894
2895                if (cmd == MLX5_ACTION_TYPE_SET) {
2896                        int start;
2897
2898                        field_mask = mask_to_le(f->field_mask, f->field_bsize);
2899
2900                        /* if field is bit sized it can start not from first bit */
2901                        start = find_first_bit(&field_mask, f->field_bsize);
2902
2903                        MLX5_SET(set_action_in, action, offset, first - start);
2904                        /* length is num of bits to be written, zero means length of 32 */
2905                        MLX5_SET(set_action_in, action, length, (last - first + 1));
2906                }
2907
2908                if (f->field_bsize == 32)
2909                        MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2910                else if (f->field_bsize == 16)
2911                        MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2912                else if (f->field_bsize == 8)
2913                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2914
2915                ++mod_acts->num_actions;
2916        }
2917
2918        return 0;
2919}
2920
2921static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2922                                                  int namespace)
2923{
2924        if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2925                return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2926        else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2927                return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2928}
2929
2930int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
2931                          int namespace,
2932                          struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2933{
2934        int action_size, new_num_actions, max_hw_actions;
2935        size_t new_sz, old_sz;
2936        void *ret;
2937
2938        if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
2939                return 0;
2940
2941        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
2942
2943        max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
2944                                                                namespace);
2945        new_num_actions = min(max_hw_actions,
2946                              mod_hdr_acts->actions ?
2947                              mod_hdr_acts->max_actions * 2 : 1);
2948        if (mod_hdr_acts->max_actions == new_num_actions)
2949                return -ENOSPC;
2950
2951        new_sz = action_size * new_num_actions;
2952        old_sz = mod_hdr_acts->max_actions * action_size;
2953        ret = krealloc(mod_hdr_acts->actions, new_sz, GFP_KERNEL);
2954        if (!ret)
2955                return -ENOMEM;
2956
2957        memset(ret + old_sz, 0, new_sz - old_sz);
2958        mod_hdr_acts->actions = ret;
2959        mod_hdr_acts->max_actions = new_num_actions;
2960
2961        return 0;
2962}
2963
2964void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
2965{
2966        kfree(mod_hdr_acts->actions);
2967        mod_hdr_acts->actions = NULL;
2968        mod_hdr_acts->num_actions = 0;
2969        mod_hdr_acts->max_actions = 0;
2970}
2971
2972static const struct pedit_headers zero_masks = {};
2973
2974static int
2975parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
2976                          const struct flow_action_entry *act, int namespace,
2977                          struct mlx5e_tc_flow_parse_attr *parse_attr,
2978                          struct pedit_headers_action *hdrs,
2979                          struct netlink_ext_ack *extack)
2980{
2981        u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2982        int err = -EOPNOTSUPP;
2983        u32 mask, val, offset;
2984        u8 htype;
2985
2986        htype = act->mangle.htype;
2987        err = -EOPNOTSUPP; /* can't be all optimistic */
2988
2989        if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2990                NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2991                goto out_err;
2992        }
2993
2994        if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2995                NL_SET_ERR_MSG_MOD(extack,
2996                                   "The pedit offload action is not supported");
2997                goto out_err;
2998        }
2999
3000        mask = act->mangle.mask;
3001        val = act->mangle.val;
3002        offset = act->mangle.offset;
3003
3004        err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
3005        if (err)
3006                goto out_err;
3007
3008        hdrs[cmd].pedits++;
3009
3010        return 0;
3011out_err:
3012        return err;
3013}
3014
3015static int
3016parse_pedit_to_reformat(struct mlx5e_priv *priv,
3017                        const struct flow_action_entry *act,
3018                        struct mlx5e_tc_flow_parse_attr *parse_attr,
3019                        struct netlink_ext_ack *extack)
3020{
3021        u32 mask, val, offset;
3022        u32 *p;
3023
3024        if (act->id != FLOW_ACTION_MANGLE)
3025                return -EOPNOTSUPP;
3026
3027        if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
3028                NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
3029                return -EOPNOTSUPP;
3030        }
3031
3032        mask = ~act->mangle.mask;
3033        val = act->mangle.val;
3034        offset = act->mangle.offset;
3035        p = (u32 *)&parse_attr->eth;
3036        *(p + (offset >> 2)) |= (val & mask);
3037
3038        return 0;
3039}
3040
3041static int parse_tc_pedit_action(struct mlx5e_priv *priv,
3042                                 const struct flow_action_entry *act, int namespace,
3043                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3044                                 struct pedit_headers_action *hdrs,
3045                                 struct mlx5e_tc_flow *flow,
3046                                 struct netlink_ext_ack *extack)
3047{
3048        if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
3049                return parse_pedit_to_reformat(priv, act, parse_attr, extack);
3050
3051        return parse_pedit_to_modify_hdr(priv, act, namespace,
3052                                         parse_attr, hdrs, extack);
3053}
3054
3055static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3056                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3057                                 struct pedit_headers_action *hdrs,
3058                                 u32 *action_flags,
3059                                 struct netlink_ext_ack *extack)
3060{
3061        struct pedit_headers *cmd_masks;
3062        int err;
3063        u8 cmd;
3064
3065        err = offload_pedit_fields(priv, namespace, hdrs, parse_attr,
3066                                   action_flags, extack);
3067        if (err < 0)
3068                goto out_dealloc_parsed_actions;
3069
3070        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3071                cmd_masks = &hdrs[cmd].masks;
3072                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3073                        NL_SET_ERR_MSG_MOD(extack,
3074                                           "attempt to offload an unsupported field");
3075                        netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3076                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3077                                       16, 1, cmd_masks, sizeof(zero_masks), true);
3078                        err = -EOPNOTSUPP;
3079                        goto out_dealloc_parsed_actions;
3080                }
3081        }
3082
3083        return 0;
3084
3085out_dealloc_parsed_actions:
3086        dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3087        return err;
3088}
3089
3090static bool csum_offload_supported(struct mlx5e_priv *priv,
3091                                   u32 action,
3092                                   u32 update_flags,
3093                                   struct netlink_ext_ack *extack)
3094{
3095        u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
3096                         TCA_CSUM_UPDATE_FLAG_UDP;
3097
3098        /*  The HW recalcs checksums only if re-writing headers */
3099        if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
3100                NL_SET_ERR_MSG_MOD(extack,
3101                                   "TC csum action is only offloaded with pedit");
3102                netdev_warn(priv->netdev,
3103                            "TC csum action is only offloaded with pedit\n");
3104                return false;
3105        }
3106
3107        if (update_flags & ~prot_flags) {
3108                NL_SET_ERR_MSG_MOD(extack,
3109                                   "can't offload TC csum action for some header/s");
3110                netdev_warn(priv->netdev,
3111                            "can't offload TC csum action for some header/s - flags %#x\n",
3112                            update_flags);
3113                return false;
3114        }
3115
3116        return true;
3117}
3118
3119struct ip_ttl_word {
3120        __u8    ttl;
3121        __u8    protocol;
3122        __sum16 check;
3123};
3124
3125struct ipv6_hoplimit_word {
3126        __be16  payload_len;
3127        __u8    nexthdr;
3128        __u8    hop_limit;
3129};
3130
3131static int is_action_keys_supported(const struct flow_action_entry *act,
3132                                    bool ct_flow, bool *modify_ip_header,
3133                                    bool *modify_tuple,
3134                                    struct netlink_ext_ack *extack)
3135{
3136        u32 mask, offset;
3137        u8 htype;
3138
3139        htype = act->mangle.htype;
3140        offset = act->mangle.offset;
3141        mask = ~act->mangle.mask;
3142        /* For IPv4 & IPv6 header check 4 byte word,
3143         * to determine that modified fields
3144         * are NOT ttl & hop_limit only.
3145         */
3146        if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3147                struct ip_ttl_word *ttl_word =
3148                        (struct ip_ttl_word *)&mask;
3149
3150                if (offset != offsetof(struct iphdr, ttl) ||
3151                    ttl_word->protocol ||
3152                    ttl_word->check) {
3153                        *modify_ip_header = true;
3154                }
3155
3156                if (offset >= offsetof(struct iphdr, saddr))
3157                        *modify_tuple = true;
3158
3159                if (ct_flow && *modify_tuple) {
3160                        NL_SET_ERR_MSG_MOD(extack,
3161                                           "can't offload re-write of ipv4 address with action ct");
3162                        return -EOPNOTSUPP;
3163                }
3164        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3165                struct ipv6_hoplimit_word *hoplimit_word =
3166                        (struct ipv6_hoplimit_word *)&mask;
3167
3168                if (offset != offsetof(struct ipv6hdr, payload_len) ||
3169                    hoplimit_word->payload_len ||
3170                    hoplimit_word->nexthdr) {
3171                        *modify_ip_header = true;
3172                }
3173
3174                if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3175                        *modify_tuple = true;
3176
3177                if (ct_flow && *modify_tuple) {
3178                        NL_SET_ERR_MSG_MOD(extack,
3179                                           "can't offload re-write of ipv6 address with action ct");
3180                        return -EOPNOTSUPP;
3181                }
3182        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3183                   htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3184                *modify_tuple = true;
3185                if (ct_flow) {
3186                        NL_SET_ERR_MSG_MOD(extack,
3187                                           "can't offload re-write of transport header ports with action ct");
3188                        return -EOPNOTSUPP;
3189                }
3190        }
3191
3192        return 0;
3193}
3194
3195static bool modify_header_match_supported(struct mlx5e_priv *priv,
3196                                          struct mlx5_flow_spec *spec,
3197                                          struct flow_action *flow_action,
3198                                          u32 actions, bool ct_flow,
3199                                          bool ct_clear,
3200                                          struct netlink_ext_ack *extack)
3201{
3202        const struct flow_action_entry *act;
3203        bool modify_ip_header, modify_tuple;
3204        void *headers_c;
3205        void *headers_v;
3206        u16 ethertype;
3207        u8 ip_proto;
3208        int i, err;
3209
3210        headers_c = get_match_headers_criteria(actions, spec);
3211        headers_v = get_match_headers_value(actions, spec);
3212        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3213
3214        /* for non-IP we only re-write MACs, so we're okay */
3215        if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3216            ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3217                goto out_ok;
3218
3219        modify_ip_header = false;
3220        modify_tuple = false;
3221        flow_action_for_each(i, act, flow_action) {
3222                if (act->id != FLOW_ACTION_MANGLE &&
3223                    act->id != FLOW_ACTION_ADD)
3224                        continue;
3225
3226                err = is_action_keys_supported(act, ct_flow,
3227                                               &modify_ip_header,
3228                                               &modify_tuple, extack);
3229                if (err)
3230                        return err;
3231        }
3232
3233        /* Add ct_state=-trk match so it will be offloaded for non ct flows
3234         * (or after clear action), as otherwise, since the tuple is changed,
3235         *  we can't restore ct state
3236         */
3237        if (!ct_clear && modify_tuple &&
3238            mlx5_tc_ct_add_no_trk_match(spec)) {
3239                NL_SET_ERR_MSG_MOD(extack,
3240                                   "can't offload tuple modify header with ct matches");
3241                netdev_info(priv->netdev,
3242                            "can't offload tuple modify header with ct matches");
3243                return false;
3244        }
3245
3246        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3247        if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3248            ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3249                NL_SET_ERR_MSG_MOD(extack,
3250                                   "can't offload re-write of non TCP/UDP");
3251                netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3252                            ip_proto);
3253                return false;
3254        }
3255
3256out_ok:
3257        return true;
3258}
3259
3260static bool actions_match_supported(struct mlx5e_priv *priv,
3261                                    struct flow_action *flow_action,
3262                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
3263                                    struct mlx5e_tc_flow *flow,
3264                                    struct netlink_ext_ack *extack)
3265{
3266        bool ct_flow = false, ct_clear = false;
3267        u32 actions;
3268
3269        ct_clear = flow->attr->ct_attr.ct_action &
3270                TCA_CT_ACT_CLEAR;
3271        ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3272        actions = flow->attr->action;
3273
3274        if (mlx5e_is_eswitch_flow(flow)) {
3275                if (flow->attr->esw_attr->split_count && ct_flow) {
3276                        /* All registers used by ct are cleared when using
3277                         * split rules.
3278                         */
3279                        NL_SET_ERR_MSG_MOD(extack,
3280                                           "Can't offload mirroring with action ct");
3281                        return false;
3282                }
3283        }
3284
3285        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3286                return modify_header_match_supported(priv, &parse_attr->spec,
3287                                                     flow_action, actions,
3288                                                     ct_flow, ct_clear,
3289                                                     extack);
3290
3291        return true;
3292}
3293
3294static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3295{
3296        return priv->mdev == peer_priv->mdev;
3297}
3298
3299static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3300{
3301        struct mlx5_core_dev *fmdev, *pmdev;
3302        u64 fsystem_guid, psystem_guid;
3303
3304        fmdev = priv->mdev;
3305        pmdev = peer_priv->mdev;
3306
3307        fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3308        psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3309
3310        return (fsystem_guid == psystem_guid);
3311}
3312
3313static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
3314                                   const struct flow_action_entry *act,
3315                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
3316                                   struct pedit_headers_action *hdrs,
3317                                   u32 *action, struct netlink_ext_ack *extack)
3318{
3319        u16 mask16 = VLAN_VID_MASK;
3320        u16 val16 = act->vlan.vid & VLAN_VID_MASK;
3321        const struct flow_action_entry pedit_act = {
3322                .id = FLOW_ACTION_MANGLE,
3323                .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
3324                .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
3325                .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
3326                .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
3327        };
3328        u8 match_prio_mask, match_prio_val;
3329        void *headers_c, *headers_v;
3330        int err;
3331
3332        headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
3333        headers_v = get_match_headers_value(*action, &parse_attr->spec);
3334
3335        if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
3336              MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
3337                NL_SET_ERR_MSG_MOD(extack,
3338                                   "VLAN rewrite action must have VLAN protocol match");
3339                return -EOPNOTSUPP;
3340        }
3341
3342        match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
3343        match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
3344        if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
3345                NL_SET_ERR_MSG_MOD(extack,
3346                                   "Changing VLAN prio is not supported");
3347                return -EOPNOTSUPP;
3348        }
3349
3350        err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
3351        *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3352
3353        return err;
3354}
3355
3356static int
3357add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
3358                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
3359                                 struct pedit_headers_action *hdrs,
3360                                 u32 *action, struct netlink_ext_ack *extack)
3361{
3362        const struct flow_action_entry prio_tag_act = {
3363                .vlan.vid = 0,
3364                .vlan.prio =
3365                        MLX5_GET(fte_match_set_lyr_2_4,
3366                                 get_match_headers_value(*action,
3367                                                         &parse_attr->spec),
3368                                 first_prio) &
3369                        MLX5_GET(fte_match_set_lyr_2_4,
3370                                 get_match_headers_criteria(*action,
3371                                                            &parse_attr->spec),
3372                                 first_prio),
3373        };
3374
3375        return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3376                                       &prio_tag_act, parse_attr, hdrs, action,
3377                                       extack);
3378}
3379
3380static int validate_goto_chain(struct mlx5e_priv *priv,
3381                               struct mlx5e_tc_flow *flow,
3382                               const struct flow_action_entry *act,
3383                               u32 actions,
3384                               struct netlink_ext_ack *extack)
3385{
3386        bool is_esw = mlx5e_is_eswitch_flow(flow);
3387        struct mlx5_flow_attr *attr = flow->attr;
3388        bool ft_flow = mlx5e_is_ft_flow(flow);
3389        u32 dest_chain = act->chain_index;
3390        struct mlx5_fs_chains *chains;
3391        struct mlx5_eswitch *esw;
3392        u32 reformat_and_fwd;
3393        u32 max_chain;
3394
3395        esw = priv->mdev->priv.eswitch;
3396        chains = is_esw ? esw_chains(esw) : nic_chains(priv);
3397        max_chain = mlx5_chains_get_chain_range(chains);
3398        reformat_and_fwd = is_esw ?
3399                           MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_and_fwd_to_table) :
3400                           MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, reformat_and_fwd_to_table);
3401
3402        if (ft_flow) {
3403                NL_SET_ERR_MSG_MOD(extack, "Goto action is not supported");
3404                return -EOPNOTSUPP;
3405        }
3406
3407        if (!mlx5_chains_backwards_supported(chains) &&
3408            dest_chain <= attr->chain) {
3409                NL_SET_ERR_MSG_MOD(extack,
3410                                   "Goto lower numbered chain isn't supported");
3411                return -EOPNOTSUPP;
3412        }
3413
3414        if (dest_chain > max_chain) {
3415                NL_SET_ERR_MSG_MOD(extack,
3416                                   "Requested destination chain is out of supported range");
3417                return -EOPNOTSUPP;
3418        }
3419
3420        if (actions & (MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
3421                       MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
3422            !reformat_and_fwd) {
3423                NL_SET_ERR_MSG_MOD(extack,
3424                                   "Goto chain is not allowed if action has reformat or decap");
3425                return -EOPNOTSUPP;
3426        }
3427
3428        return 0;
3429}
3430
3431static int parse_tc_nic_actions(struct mlx5e_priv *priv,
3432                                struct flow_action *flow_action,
3433                                struct mlx5e_tc_flow_parse_attr *parse_attr,
3434                                struct mlx5e_tc_flow *flow,
3435                                struct netlink_ext_ack *extack)
3436{
3437        struct mlx5_flow_attr *attr = flow->attr;
3438        struct pedit_headers_action hdrs[2] = {};
3439        const struct flow_action_entry *act;
3440        struct mlx5_nic_flow_attr *nic_attr;
3441        u32 action = 0;
3442        int err, i;
3443
3444        if (!flow_action_has_entries(flow_action))
3445                return -EINVAL;
3446
3447        if (!flow_action_hw_stats_check(flow_action, extack,
3448                                        FLOW_ACTION_HW_STATS_DELAYED_BIT))
3449                return -EOPNOTSUPP;
3450
3451        nic_attr = attr->nic_attr;
3452
3453        nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3454
3455        flow_action_for_each(i, act, flow_action) {
3456                switch (act->id) {
3457                case FLOW_ACTION_ACCEPT:
3458                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3459                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
3460                        break;
3461                case FLOW_ACTION_DROP:
3462                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3463                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
3464                                               flow_table_properties_nic_receive.flow_counter))
3465                                action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3466                        break;
3467                case FLOW_ACTION_MANGLE:
3468                case FLOW_ACTION_ADD:
3469                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
3470                                                    parse_attr, hdrs, NULL, extack);
3471                        if (err)
3472                                return err;
3473
3474                        action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3475                        break;
3476                case FLOW_ACTION_VLAN_MANGLE:
3477                        err = add_vlan_rewrite_action(priv,
3478                                                      MLX5_FLOW_NAMESPACE_KERNEL,
3479                                                      act, parse_attr, hdrs,
3480                                                      &action, extack);
3481                        if (err)
3482                                return err;
3483
3484                        break;
3485                case FLOW_ACTION_CSUM:
3486                        if (csum_offload_supported(priv, action,
3487                                                   act->csum_flags,
3488                                                   extack))
3489                                break;
3490
3491                        return -EOPNOTSUPP;
3492                case FLOW_ACTION_REDIRECT: {
3493                        struct net_device *peer_dev = act->dev;
3494
3495                        if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
3496                            same_hw_devs(priv, netdev_priv(peer_dev))) {
3497                                parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
3498                                flow_flag_set(flow, HAIRPIN);
3499                                action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
3500                                          MLX5_FLOW_CONTEXT_ACTION_COUNT;
3501                        } else {
3502                                NL_SET_ERR_MSG_MOD(extack,
3503                                                   "device is not on same HW, can't offload");
3504                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
3505                                            peer_dev->name);
3506                                return -EINVAL;
3507                        }
3508                        }
3509                        break;
3510                case FLOW_ACTION_MARK: {
3511                        u32 mark = act->mark;
3512
3513                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
3514                                NL_SET_ERR_MSG_MOD(extack,
3515                                                   "Bad flow mark - only 16 bit is supported");
3516                                return -EINVAL;
3517                        }
3518
3519                        nic_attr->flow_tag = mark;
3520                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3521                        }
3522                        break;
3523                case FLOW_ACTION_GOTO:
3524                        err = validate_goto_chain(priv, flow, act, action,
3525                                                  extack);
3526                        if (err)
3527                                return err;
3528
3529                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3530                        attr->dest_chain = act->chain_index;
3531                        break;
3532                case FLOW_ACTION_CT:
3533                        err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
3534                        if (err)
3535                                return err;
3536
3537                        flow_flag_set(flow, CT);
3538                        break;
3539                default:
3540                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3541                        return -EOPNOTSUPP;
3542                }
3543        }
3544
3545        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3546            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3547                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
3548                                            parse_attr, hdrs, &action, extack);
3549                if (err)
3550                        return err;
3551                /* in case all pedit actions are skipped, remove the MOD_HDR
3552                 * flag.
3553                 */
3554                if (parse_attr->mod_hdr_acts.num_actions == 0) {
3555                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3556                        dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
3557                }
3558        }
3559
3560        attr->action = action;
3561
3562        if (attr->dest_chain) {
3563                if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3564                        NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3565                        return -EOPNOTSUPP;
3566                }
3567                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3568        }
3569
3570        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
3571                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3572
3573        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3574                return -EOPNOTSUPP;
3575
3576        return 0;
3577}
3578
3579struct encap_key {
3580        const struct ip_tunnel_key *ip_tun_key;
3581        struct mlx5e_tc_tunnel *tc_tunnel;
3582};
3583
3584static inline int cmp_encap_info(struct encap_key *a,
3585                                 struct encap_key *b)
3586{
3587        return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
3588               a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
3589}
3590
3591static inline int cmp_decap_info(struct mlx5e_decap_key *a,
3592                                 struct mlx5e_decap_key *b)
3593{
3594        return memcmp(&a->key, &b->key, sizeof(b->key));
3595}
3596
3597static inline int hash_encap_info(struct encap_key *key)
3598{
3599        return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
3600                     key->tc_tunnel->tunnel_type);
3601}
3602
3603static inline int hash_decap_info(struct mlx5e_decap_key *key)
3604{
3605        return jhash(&key->key, sizeof(key->key), 0);
3606}
3607
3608static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3609                                  struct net_device *peer_netdev)
3610{
3611        struct mlx5e_priv *peer_priv;
3612
3613        peer_priv = netdev_priv(peer_netdev);
3614
3615        return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3616                mlx5e_eswitch_vf_rep(priv->netdev) &&
3617                mlx5e_eswitch_vf_rep(peer_netdev) &&
3618                same_hw_devs(priv, peer_priv));
3619}
3620
3621bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
3622{
3623        return refcount_inc_not_zero(&e->refcnt);
3624}
3625
3626static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
3627{
3628        return refcount_inc_not_zero(&e->refcnt);
3629}
3630
3631static struct mlx5e_encap_entry *
3632mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
3633                uintptr_t hash_key)
3634{
3635        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3636        struct mlx5e_encap_entry *e;
3637        struct encap_key e_key;
3638
3639        hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
3640                                   encap_hlist, hash_key) {
3641                e_key.ip_tun_key = &e->tun_info->key;
3642                e_key.tc_tunnel = e->tunnel;
3643                if (!cmp_encap_info(&e_key, key) &&
3644                    mlx5e_encap_take(e))
3645                        return e;
3646        }
3647
3648        return NULL;
3649}
3650
3651static struct mlx5e_decap_entry *
3652mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
3653                uintptr_t hash_key)
3654{
3655        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3656        struct mlx5e_decap_key r_key;
3657        struct mlx5e_decap_entry *e;
3658
3659        hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
3660                                   hlist, hash_key) {
3661                r_key = e->key;
3662                if (!cmp_decap_info(&r_key, key) &&
3663                    mlx5e_decap_take(e))
3664                        return e;
3665        }
3666        return NULL;
3667}
3668
3669static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
3670{
3671        size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
3672
3673        return kmemdup(tun_info, tun_size, GFP_KERNEL);
3674}
3675
3676static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
3677                                      struct mlx5e_tc_flow *flow,
3678                                      int out_index,
3679                                      struct mlx5e_encap_entry *e,
3680                                      struct netlink_ext_ack *extack)
3681{
3682        int i;
3683
3684        for (i = 0; i < out_index; i++) {
3685                if (flow->encaps[i].e != e)
3686                        continue;
3687                NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
3688                netdev_err(priv->netdev, "can't duplicate encap action\n");
3689                return true;
3690        }
3691
3692        return false;
3693}
3694
3695static int mlx5e_attach_encap(struct mlx5e_priv *priv,
3696                              struct mlx5e_tc_flow *flow,
3697                              struct net_device *mirred_dev,
3698                              int out_index,
3699                              struct netlink_ext_ack *extack,
3700                              struct net_device **encap_dev,
3701                              bool *encap_valid)
3702{
3703        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3704        struct mlx5e_tc_flow_parse_attr *parse_attr;
3705        struct mlx5_flow_attr *attr = flow->attr;
3706        const struct ip_tunnel_info *tun_info;
3707        struct encap_key key;
3708        struct mlx5e_encap_entry *e;
3709        unsigned short family;
3710        uintptr_t hash_key;
3711        int err = 0;
3712
3713        parse_attr = attr->parse_attr;
3714        tun_info = parse_attr->tun_info[out_index];
3715        family = ip_tunnel_info_af(tun_info);
3716        key.ip_tun_key = &tun_info->key;
3717        key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
3718        if (!key.tc_tunnel) {
3719                NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
3720                return -EOPNOTSUPP;
3721        }
3722
3723        hash_key = hash_encap_info(&key);
3724
3725        mutex_lock(&esw->offloads.encap_tbl_lock);
3726        e = mlx5e_encap_get(priv, &key, hash_key);
3727
3728        /* must verify if encap is valid or not */
3729        if (e) {
3730                /* Check that entry was not already attached to this flow */
3731                if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
3732                        err = -EOPNOTSUPP;
3733                        goto out_err;
3734                }
3735
3736                mutex_unlock(&esw->offloads.encap_tbl_lock);
3737                wait_for_completion(&e->res_ready);
3738
3739                /* Protect against concurrent neigh update. */
3740                mutex_lock(&esw->offloads.encap_tbl_lock);
3741                if (e->compl_result < 0) {
3742                        err = -EREMOTEIO;
3743                        goto out_err;
3744                }
3745                goto attach_flow;
3746        }
3747
3748        e = kzalloc(sizeof(*e), GFP_KERNEL);
3749        if (!e) {
3750                err = -ENOMEM;
3751                goto out_err;
3752        }
3753
3754        refcount_set(&e->refcnt, 1);
3755        init_completion(&e->res_ready);
3756
3757        tun_info = dup_tun_info(tun_info);
3758        if (!tun_info) {
3759                err = -ENOMEM;
3760                goto out_err_init;
3761        }
3762        e->tun_info = tun_info;
3763        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
3764        if (err)
3765                goto out_err_init;
3766
3767        INIT_LIST_HEAD(&e->flows);
3768        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
3769        mutex_unlock(&esw->offloads.encap_tbl_lock);
3770
3771        if (family == AF_INET)
3772                err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
3773        else if (family == AF_INET6)
3774                err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
3775
3776        /* Protect against concurrent neigh update. */
3777        mutex_lock(&esw->offloads.encap_tbl_lock);
3778        complete_all(&e->res_ready);
3779        if (err) {
3780                e->compl_result = err;
3781                goto out_err;
3782        }
3783        e->compl_result = 1;
3784
3785attach_flow:
3786        flow->encaps[out_index].e = e;
3787        list_add(&flow->encaps[out_index].list, &e->flows);
3788        flow->encaps[out_index].index = out_index;
3789        *encap_dev = e->out_dev;
3790        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
3791                attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
3792                attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
3793                *encap_valid = true;
3794        } else {
3795                *encap_valid = false;
3796        }
3797        mutex_unlock(&esw->offloads.encap_tbl_lock);
3798
3799        return err;
3800
3801out_err:
3802        mutex_unlock(&esw->offloads.encap_tbl_lock);
3803        if (e)
3804                mlx5e_encap_put(priv, e);
3805        return err;
3806
3807out_err_init:
3808        mutex_unlock(&esw->offloads.encap_tbl_lock);
3809        kfree(tun_info);
3810        kfree(e);
3811        return err;
3812}
3813
3814static int mlx5e_attach_decap(struct mlx5e_priv *priv,
3815                              struct mlx5e_tc_flow *flow,
3816                              struct netlink_ext_ack *extack)
3817{
3818        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3819        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
3820        struct mlx5e_tc_flow_parse_attr *parse_attr;
3821        struct mlx5e_decap_entry *d;
3822        struct mlx5e_decap_key key;
3823        uintptr_t hash_key;
3824        int err = 0;
3825
3826        parse_attr = flow->attr->parse_attr;
3827        if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
3828                NL_SET_ERR_MSG_MOD(extack,
3829                                   "encap header larger than max supported");
3830                return -EOPNOTSUPP;
3831        }
3832
3833        key.key = parse_attr->eth;
3834        hash_key = hash_decap_info(&key);
3835        mutex_lock(&esw->offloads.decap_tbl_lock);
3836        d = mlx5e_decap_get(priv, &key, hash_key);
3837        if (d) {
3838                mutex_unlock(&esw->offloads.decap_tbl_lock);
3839                wait_for_completion(&d->res_ready);
3840                mutex_lock(&esw->offloads.decap_tbl_lock);
3841                if (d->compl_result) {
3842                        err = -EREMOTEIO;
3843                        goto out_free;
3844                }
3845                goto found;
3846        }
3847
3848        d = kzalloc(sizeof(*d), GFP_KERNEL);
3849        if (!d) {
3850                err = -ENOMEM;
3851                goto out_err;
3852        }
3853
3854        d->key = key;
3855        refcount_set(&d->refcnt, 1);
3856        init_completion(&d->res_ready);
3857        INIT_LIST_HEAD(&d->flows);
3858        hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
3859        mutex_unlock(&esw->offloads.decap_tbl_lock);
3860
3861        d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
3862                                                     MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
3863                                                     sizeof(parse_attr->eth),
3864                                                     &parse_attr->eth,
3865                                                     MLX5_FLOW_NAMESPACE_FDB);
3866        if (IS_ERR(d->pkt_reformat)) {
3867                err = PTR_ERR(d->pkt_reformat);
3868                d->compl_result = err;
3869        }
3870        mutex_lock(&esw->offloads.decap_tbl_lock);
3871        complete_all(&d->res_ready);
3872        if (err)
3873                goto out_free;
3874
3875found:
3876        flow->decap_reformat = d;
3877        attr->decap_pkt_reformat = d->pkt_reformat;
3878        list_add(&flow->l3_to_l2_reformat, &d->flows);
3879        mutex_unlock(&esw->offloads.decap_tbl_lock);
3880        return 0;
3881
3882out_free:
3883        mutex_unlock(&esw->offloads.decap_tbl_lock);
3884        mlx5e_decap_put(priv, d);
3885        return err;
3886
3887out_err:
3888        mutex_unlock(&esw->offloads.decap_tbl_lock);
3889        return err;
3890}
3891
3892static int parse_tc_vlan_action(struct mlx5e_priv *priv,
3893                                const struct flow_action_entry *act,
3894                                struct mlx5_esw_flow_attr *attr,
3895                                u32 *action)
3896{
3897        u8 vlan_idx = attr->total_vlan;
3898
3899        if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
3900                return -EOPNOTSUPP;
3901
3902        switch (act->id) {
3903        case FLOW_ACTION_VLAN_POP:
3904                if (vlan_idx) {
3905                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3906                                                                 MLX5_FS_VLAN_DEPTH))
3907                                return -EOPNOTSUPP;
3908
3909                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
3910                } else {
3911                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3912                }
3913                break;
3914        case FLOW_ACTION_VLAN_PUSH:
3915                attr->vlan_vid[vlan_idx] = act->vlan.vid;
3916                attr->vlan_prio[vlan_idx] = act->vlan.prio;
3917                attr->vlan_proto[vlan_idx] = act->vlan.proto;
3918                if (!attr->vlan_proto[vlan_idx])
3919                        attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
3920
3921                if (vlan_idx) {
3922                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
3923                                                                 MLX5_FS_VLAN_DEPTH))
3924                                return -EOPNOTSUPP;
3925
3926                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
3927                } else {
3928                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
3929                            (act->vlan.proto != htons(ETH_P_8021Q) ||
3930                             act->vlan.prio))
3931                                return -EOPNOTSUPP;
3932
3933                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
3934                }
3935                break;
3936        default:
3937                return -EINVAL;
3938        }
3939
3940        attr->total_vlan = vlan_idx + 1;
3941
3942        return 0;
3943}
3944
3945static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
3946                                          struct net_device *out_dev)
3947{
3948        struct net_device *fdb_out_dev = out_dev;
3949        struct net_device *uplink_upper;
3950
3951        rcu_read_lock();
3952        uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
3953        if (uplink_upper && netif_is_lag_master(uplink_upper) &&
3954            uplink_upper == out_dev) {
3955                fdb_out_dev = uplink_dev;
3956        } else if (netif_is_lag_master(out_dev)) {
3957                fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
3958                if (fdb_out_dev &&
3959                    (!mlx5e_eswitch_rep(fdb_out_dev) ||
3960                     !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
3961                        fdb_out_dev = NULL;
3962        }
3963        rcu_read_unlock();
3964        return fdb_out_dev;
3965}
3966
3967static int add_vlan_push_action(struct mlx5e_priv *priv,
3968                                struct mlx5_flow_attr *attr,
3969                                struct net_device **out_dev,
3970                                u32 *action)
3971{
3972        struct net_device *vlan_dev = *out_dev;
3973        struct flow_action_entry vlan_act = {
3974                .id = FLOW_ACTION_VLAN_PUSH,
3975                .vlan.vid = vlan_dev_vlan_id(vlan_dev),
3976                .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
3977                .vlan.prio = 0,
3978        };
3979        int err;
3980
3981        err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
3982        if (err)
3983                return err;
3984
3985        *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
3986                                        dev_get_iflink(vlan_dev));
3987        if (is_vlan_dev(*out_dev))
3988                err = add_vlan_push_action(priv, attr, out_dev, action);
3989
3990        return err;
3991}
3992
3993static int add_vlan_pop_action(struct mlx5e_priv *priv,
3994                               struct mlx5_flow_attr *attr,
3995                               u32 *action)
3996{
3997        struct flow_action_entry vlan_act = {
3998                .id = FLOW_ACTION_VLAN_POP,
3999        };
4000        int nest_level, err = 0;
4001
4002        nest_level = attr->parse_attr->filter_dev->lower_level -
4003                                                priv->netdev->lower_level;
4004        while (nest_level--) {
4005                err = parse_tc_vlan_action(priv, &vlan_act, attr->esw_attr, action);
4006                if (err)
4007                        return err;
4008        }
4009
4010        return err;
4011}
4012
4013static bool same_hw_reps(struct mlx5e_priv *priv,
4014                         struct net_device *peer_netdev)
4015{
4016        struct mlx5e_priv *peer_priv;
4017
4018        peer_priv = netdev_priv(peer_netdev);
4019
4020        return mlx5e_eswitch_rep(priv->netdev) &&
4021               mlx5e_eswitch_rep(peer_netdev) &&
4022               same_hw_devs(priv, peer_priv);
4023}
4024
4025static bool is_lag_dev(struct mlx5e_priv *priv,
4026                       struct net_device *peer_netdev)
4027{
4028        return ((mlx5_lag_is_sriov(priv->mdev) ||
4029                 mlx5_lag_is_multipath(priv->mdev)) &&
4030                 same_hw_reps(priv, peer_netdev));
4031}
4032
4033bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4034                                    struct net_device *out_dev)
4035{
4036        if (is_merged_eswitch_vfs(priv, out_dev))
4037                return true;
4038
4039        if (is_lag_dev(priv, out_dev))
4040                return true;
4041
4042        return mlx5e_eswitch_rep(out_dev) &&
4043               same_port_devs(priv, netdev_priv(out_dev));
4044}
4045
4046static bool is_duplicated_output_device(struct net_device *dev,
4047                                        struct net_device *out_dev,
4048                                        int *ifindexes, int if_count,
4049                                        struct netlink_ext_ack *extack)
4050{
4051        int i;
4052
4053        for (i = 0; i < if_count; i++) {
4054                if (ifindexes[i] == out_dev->ifindex) {
4055                        NL_SET_ERR_MSG_MOD(extack,
4056                                           "can't duplicate output to same device");
4057                        netdev_err(dev, "can't duplicate output to same device: %s\n",
4058                                   out_dev->name);
4059                        return true;
4060                }
4061        }
4062
4063        return false;
4064}
4065
4066static int verify_uplink_forwarding(struct mlx5e_priv *priv,
4067                                    struct mlx5e_tc_flow *flow,
4068                                    struct net_device *out_dev,
4069                                    struct netlink_ext_ack *extack)
4070{
4071        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4072        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4073        struct mlx5e_rep_priv *rep_priv;
4074
4075        /* Forwarding non encapsulated traffic between
4076         * uplink ports is allowed only if
4077         * termination_table_raw_traffic cap is set.
4078         *
4079         * Input vport was stored attr->in_rep.
4080         * In LAG case, *priv* is the private data of
4081         * uplink which may be not the input vport.
4082         */
4083        rep_priv = mlx5e_rep_to_rep_priv(attr->in_rep);
4084
4085        if (!(mlx5e_eswitch_uplink_rep(rep_priv->netdev) &&
4086              mlx5e_eswitch_uplink_rep(out_dev)))
4087                return 0;
4088
4089        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev,
4090                                        termination_table_raw_traffic)) {
4091                NL_SET_ERR_MSG_MOD(extack,
4092                                   "devices are both uplink, can't offload forwarding");
4093                        pr_err("devices %s %s are both uplink, can't offload forwarding\n",
4094                               priv->netdev->name, out_dev->name);
4095                        return -EOPNOTSUPP;
4096        } else if (out_dev != rep_priv->netdev) {
4097                NL_SET_ERR_MSG_MOD(extack,
4098                                   "devices are not the same uplink, can't offload forwarding");
4099                pr_err("devices %s %s are both uplink but not the same, can't offload forwarding\n",
4100                       priv->netdev->name, out_dev->name);
4101                return -EOPNOTSUPP;
4102        }
4103        return 0;
4104}
4105
4106static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
4107                                struct flow_action *flow_action,
4108                                struct mlx5e_tc_flow *flow,
4109                                struct netlink_ext_ack *extack,
4110                                struct net_device *filter_dev)
4111{
4112        struct pedit_headers_action hdrs[2] = {};
4113        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4114        struct mlx5e_tc_flow_parse_attr *parse_attr;
4115        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4116        const struct ip_tunnel_info *info = NULL;
4117        struct mlx5_flow_attr *attr = flow->attr;
4118        int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
4119        bool ft_flow = mlx5e_is_ft_flow(flow);
4120        const struct flow_action_entry *act;
4121        struct mlx5_esw_flow_attr *esw_attr;
4122        bool encap = false, decap = false;
4123        u32 action = attr->action;
4124        int err, i, if_count = 0;
4125        bool mpls_push = false;
4126
4127        if (!flow_action_has_entries(flow_action))
4128                return -EINVAL;
4129
4130        if (!flow_action_hw_stats_check(flow_action, extack,
4131                                        FLOW_ACTION_HW_STATS_DELAYED_BIT))
4132                return -EOPNOTSUPP;
4133
4134        esw_attr = attr->esw_attr;
4135        parse_attr = attr->parse_attr;
4136
4137        flow_action_for_each(i, act, flow_action) {
4138                switch (act->id) {
4139                case FLOW_ACTION_DROP:
4140                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
4141                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
4142                        break;
4143                case FLOW_ACTION_TRAP:
4144                        if (!flow_offload_has_one_action(flow_action)) {
4145                                NL_SET_ERR_MSG_MOD(extack,
4146                                                   "action trap is supported as a sole action only");
4147                                return -EOPNOTSUPP;
4148                        }
4149                        action |= (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4150                                   MLX5_FLOW_CONTEXT_ACTION_COUNT);
4151                        attr->flags |= MLX5_ESW_ATTR_FLAG_SLOW_PATH;
4152                        break;
4153                case FLOW_ACTION_MPLS_PUSH:
4154                        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
4155                                                        reformat_l2_to_l3_tunnel) ||
4156                            act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
4157                                NL_SET_ERR_MSG_MOD(extack,
4158                                                   "mpls push is supported only for mpls_uc protocol");
4159                                return -EOPNOTSUPP;
4160                        }
4161                        mpls_push = true;
4162                        break;
4163                case FLOW_ACTION_MPLS_POP:
4164                        /* we only support mpls pop if it is the first action
4165                         * and the filter net device is bareudp. Subsequent
4166                         * actions can be pedit and the last can be mirred
4167                         * egress redirect.
4168                         */
4169                        if (i) {
4170                                NL_SET_ERR_MSG_MOD(extack,
4171                                                   "mpls pop supported only as first action");
4172                                return -EOPNOTSUPP;
4173                        }
4174                        if (!netif_is_bareudp(filter_dev)) {
4175                                NL_SET_ERR_MSG_MOD(extack,
4176                                                   "mpls pop supported only on bareudp devices");
4177                                return -EOPNOTSUPP;
4178                        }
4179
4180                        parse_attr->eth.h_proto = act->mpls_pop.proto;
4181                        action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
4182                        flow_flag_set(flow, L3_TO_L2_DECAP);
4183                        break;
4184                case FLOW_ACTION_MANGLE:
4185                case FLOW_ACTION_ADD:
4186                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
4187                                                    parse_attr, hdrs, flow, extack);
4188                        if (err)
4189                                return err;
4190
4191                        if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
4192                                action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4193                                esw_attr->split_count = esw_attr->out_count;
4194                        }
4195                        break;
4196                case FLOW_ACTION_CSUM:
4197                        if (csum_offload_supported(priv, action,
4198                                                   act->csum_flags, extack))
4199                                break;
4200
4201                        return -EOPNOTSUPP;
4202                case FLOW_ACTION_REDIRECT:
4203                case FLOW_ACTION_MIRRED: {
4204                        struct mlx5e_priv *out_priv;
4205                        struct net_device *out_dev;
4206
4207                        out_dev = act->dev;
4208                        if (!out_dev) {
4209                                /* out_dev is NULL when filters with
4210                                 * non-existing mirred device are replayed to
4211                                 * the driver.
4212                                 */
4213                                return -EINVAL;
4214                        }
4215
4216                        if (mpls_push && !netif_is_bareudp(out_dev)) {
4217                                NL_SET_ERR_MSG_MOD(extack,
4218                                                   "mpls is supported only through a bareudp device");
4219                                return -EOPNOTSUPP;
4220                        }
4221
4222                        if (ft_flow && out_dev == priv->netdev) {
4223                                /* Ignore forward to self rules generated
4224                                 * by adding both mlx5 devs to the flow table
4225                                 * block on a normal nft offload setup.
4226                                 */
4227                                return -EOPNOTSUPP;
4228                        }
4229
4230                        if (esw_attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
4231                                NL_SET_ERR_MSG_MOD(extack,
4232                                                   "can't support more output ports, can't offload forwarding");
4233                                netdev_warn(priv->netdev,
4234                                            "can't support more than %d output ports, can't offload forwarding\n",
4235                                            esw_attr->out_count);
4236                                return -EOPNOTSUPP;
4237                        }
4238
4239                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
4240                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
4241                        if (encap) {
4242                                parse_attr->mirred_ifindex[esw_attr->out_count] =
4243                                        out_dev->ifindex;
4244                                parse_attr->tun_info[esw_attr->out_count] = dup_tun_info(info);
4245                                if (!parse_attr->tun_info[esw_attr->out_count])
4246                                        return -ENOMEM;
4247                                encap = false;
4248                                esw_attr->dests[esw_attr->out_count].flags |=
4249                                        MLX5_ESW_DEST_ENCAP;
4250                                esw_attr->out_count++;
4251                                /* attr->dests[].rep is resolved when we
4252                                 * handle encap
4253                                 */
4254                        } else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
4255                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4256                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
4257
4258                                if (is_duplicated_output_device(priv->netdev,
4259                                                                out_dev,
4260                                                                ifindexes,
4261                                                                if_count,
4262                                                                extack))
4263                                        return -EOPNOTSUPP;
4264
4265                                ifindexes[if_count] = out_dev->ifindex;
4266                                if_count++;
4267
4268                                out_dev = get_fdb_out_dev(uplink_dev, out_dev);
4269                                if (!out_dev)
4270                                        return -ENODEV;
4271
4272                                if (is_vlan_dev(out_dev)) {
4273                                        err = add_vlan_push_action(priv, attr,
4274                                                                   &out_dev,
4275                                                                   &action);
4276                                        if (err)
4277                                                return err;
4278                                }
4279
4280                                if (is_vlan_dev(parse_attr->filter_dev)) {
4281                                        err = add_vlan_pop_action(priv, attr,
4282                                                                  &action);
4283                                        if (err)
4284                                                return err;
4285                                }
4286
4287                                err = verify_uplink_forwarding(priv, flow, out_dev, extack);
4288                                if (err)
4289                                        return err;
4290
4291                                if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
4292                                        NL_SET_ERR_MSG_MOD(extack,
4293                                                           "devices are not on same switch HW, can't offload forwarding");
4294                                        return -EOPNOTSUPP;
4295                                }
4296
4297                                out_priv = netdev_priv(out_dev);
4298                                rpriv = out_priv->ppriv;
4299                                esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
4300                                esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
4301                                esw_attr->out_count++;
4302                        } else if (parse_attr->filter_dev != priv->netdev) {
4303                                /* All mlx5 devices are called to configure
4304                                 * high level device filters. Therefore, the
4305                                 * *attempt* to  install a filter on invalid
4306                                 * eswitch should not trigger an explicit error
4307                                 */
4308                                return -EINVAL;
4309                        } else {
4310                                NL_SET_ERR_MSG_MOD(extack,
4311                                                   "devices are not on same switch HW, can't offload forwarding");
4312                                netdev_warn(priv->netdev,
4313                                            "devices %s %s not on same switch HW, can't offload forwarding\n",
4314                                            priv->netdev->name,
4315                                            out_dev->name);
4316                                return -EINVAL;
4317                        }
4318                        }
4319                        break;
4320                case FLOW_ACTION_TUNNEL_ENCAP:
4321                        info = act->tunnel;
4322                        if (info)
4323                                encap = true;
4324                        else
4325                                return -EOPNOTSUPP;
4326
4327                        break;
4328                case FLOW_ACTION_VLAN_PUSH:
4329                case FLOW_ACTION_VLAN_POP:
4330                        if (act->id == FLOW_ACTION_VLAN_PUSH &&
4331                            (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
4332                                /* Replace vlan pop+push with vlan modify */
4333                                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4334                                err = add_vlan_rewrite_action(priv,
4335                                                              MLX5_FLOW_NAMESPACE_FDB,
4336                                                              act, parse_attr, hdrs,
4337                                                              &action, extack);
4338                        } else {
4339                                err = parse_tc_vlan_action(priv, act, esw_attr, &action);
4340                        }
4341                        if (err)
4342                                return err;
4343
4344                        esw_attr->split_count = esw_attr->out_count;
4345                        break;
4346                case FLOW_ACTION_VLAN_MANGLE:
4347                        err = add_vlan_rewrite_action(priv,
4348                                                      MLX5_FLOW_NAMESPACE_FDB,
4349                                                      act, parse_attr, hdrs,
4350                                                      &action, extack);
4351                        if (err)
4352                                return err;
4353
4354                        esw_attr->split_count = esw_attr->out_count;
4355                        break;
4356                case FLOW_ACTION_TUNNEL_DECAP:
4357                        decap = true;
4358                        break;
4359                case FLOW_ACTION_GOTO:
4360                        err = validate_goto_chain(priv, flow, act, action,
4361                                                  extack);
4362                        if (err)
4363                                return err;
4364
4365                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4366                        attr->dest_chain = act->chain_index;
4367                        break;
4368                case FLOW_ACTION_CT:
4369                        err = mlx5_tc_ct_parse_action(get_ct_priv(priv), attr, act, extack);
4370                        if (err)
4371                                return err;
4372
4373                        flow_flag_set(flow, CT);
4374                        break;
4375                default:
4376                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
4377                        return -EOPNOTSUPP;
4378                }
4379        }
4380
4381        if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
4382            action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
4383                /* For prio tag mode, replace vlan pop with rewrite vlan prio
4384                 * tag rewrite.
4385                 */
4386                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
4387                err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
4388                                                       &action, extack);
4389                if (err)
4390                        return err;
4391        }
4392
4393        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
4394            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
4395                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
4396                                            parse_attr, hdrs, &action, extack);
4397                if (err)
4398                        return err;
4399                /* in case all pedit actions are skipped, remove the MOD_HDR
4400                 * flag. we might have set split_count either by pedit or
4401                 * pop/push. if there is no pop/push either, reset it too.
4402                 */
4403                if (parse_attr->mod_hdr_acts.num_actions == 0) {
4404                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4405                        dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4406                        if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
4407                              (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
4408                                esw_attr->split_count = 0;
4409                }
4410        }
4411
4412        attr->action = action;
4413        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
4414                return -EOPNOTSUPP;
4415
4416        if (attr->dest_chain) {
4417                if (decap) {
4418                        /* It can be supported if we'll create a mapping for
4419                         * the tunnel device only (without tunnel), and set
4420                         * this tunnel id with this decap flow.
4421                         *
4422                         * On restore (miss), we'll just set this saved tunnel
4423                         * device.
4424                         */
4425
4426                        NL_SET_ERR_MSG(extack,
4427                                       "Decap with goto isn't supported");
4428                        netdev_warn(priv->netdev,
4429                                    "Decap with goto isn't supported");
4430                        return -EOPNOTSUPP;
4431                }
4432
4433                if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
4434                        NL_SET_ERR_MSG_MOD(extack,
4435                                           "Mirroring goto chain rules isn't supported");
4436                        return -EOPNOTSUPP;
4437                }
4438                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
4439        }
4440
4441        if (!(attr->action &
4442              (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
4443                NL_SET_ERR_MSG_MOD(extack,
4444                                   "Rule must have at least one forward/drop action");
4445                return -EOPNOTSUPP;
4446        }
4447
4448        if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
4449                NL_SET_ERR_MSG_MOD(extack,
4450                                   "current firmware doesn't support split rule for port mirroring");
4451                netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
4452                return -EOPNOTSUPP;
4453        }
4454
4455        return 0;
4456}
4457
4458static void get_flags(int flags, unsigned long *flow_flags)
4459{
4460        unsigned long __flow_flags = 0;
4461
4462        if (flags & MLX5_TC_FLAG(INGRESS))
4463                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4464        if (flags & MLX5_TC_FLAG(EGRESS))
4465                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4466
4467        if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4468                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4469        if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4470                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4471        if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4472                __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4473
4474        *flow_flags = __flow_flags;
4475}
4476
4477static const struct rhashtable_params tc_ht_params = {
4478        .head_offset = offsetof(struct mlx5e_tc_flow, node),
4479        .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4480        .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4481        .automatic_shrinking = true,
4482};
4483
4484static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4485                                    unsigned long flags)
4486{
4487        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4488        struct mlx5e_rep_priv *uplink_rpriv;
4489
4490        if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4491                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
4492                return &uplink_rpriv->uplink_priv.tc_ht;
4493        } else /* NIC offload */
4494                return &priv->fs.tc.ht;
4495}
4496
4497static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4498{
4499        struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4500        struct mlx5_flow_attr *attr = flow->attr;
4501        bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4502                flow_flag_test(flow, INGRESS);
4503        bool act_is_encap = !!(attr->action &
4504                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4505        bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4506                                                MLX5_DEVCOM_ESW_OFFLOADS);
4507
4508        if (!esw_paired)
4509                return false;
4510
4511        if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4512             mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4513            (is_rep_ingress || act_is_encap))
4514                return true;
4515
4516        return false;
4517}
4518
4519struct mlx5_flow_attr *
4520mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4521{
4522        u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB)  ?
4523                                sizeof(struct mlx5_esw_flow_attr) :
4524                                sizeof(struct mlx5_nic_flow_attr);
4525        struct mlx5_flow_attr *attr;
4526
4527        return kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4528}
4529
4530static int
4531mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4532                 struct flow_cls_offload *f, unsigned long flow_flags,
4533                 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4534                 struct mlx5e_tc_flow **__flow)
4535{
4536        struct mlx5e_tc_flow_parse_attr *parse_attr;
4537        struct mlx5_flow_attr *attr;
4538        struct mlx5e_tc_flow *flow;
4539        int err = -ENOMEM;
4540        int out_index;
4541
4542        flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4543        parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4544        if (!parse_attr || !flow)
4545                goto err_free;
4546
4547        flow->flags = flow_flags;
4548        flow->cookie = f->cookie;
4549        flow->priv = priv;
4550
4551        attr = mlx5_alloc_flow_attr(get_flow_name_space(flow));
4552        if (!attr)
4553                goto err_free;
4554
4555        flow->attr = attr;
4556
4557        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4558                INIT_LIST_HEAD(&flow->encaps[out_index].list);
4559        INIT_LIST_HEAD(&flow->hairpin);
4560        INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4561        refcount_set(&flow->refcnt, 1);
4562        init_completion(&flow->init_done);
4563
4564        *__flow = flow;
4565        *__parse_attr = parse_attr;
4566
4567        return 0;
4568
4569err_free:
4570        kfree(flow);
4571        kvfree(parse_attr);
4572        return err;
4573}
4574
4575static void
4576mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4577                     struct mlx5e_tc_flow_parse_attr *parse_attr,
4578                     struct flow_cls_offload *f)
4579{
4580        attr->parse_attr = parse_attr;
4581        attr->chain = f->common.chain_index;
4582        attr->prio = f->common.prio;
4583}
4584
4585static void
4586mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4587                         struct mlx5e_priv *priv,
4588                         struct mlx5e_tc_flow_parse_attr *parse_attr,
4589                         struct flow_cls_offload *f,
4590                         struct mlx5_eswitch_rep *in_rep,
4591                         struct mlx5_core_dev *in_mdev)
4592{
4593        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4594        struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4595
4596        mlx5e_flow_attr_init(attr, parse_attr, f);
4597
4598        esw_attr->in_rep = in_rep;
4599        esw_attr->in_mdev = in_mdev;
4600
4601        if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4602            MLX5_COUNTER_SOURCE_ESWITCH)
4603                esw_attr->counter_dev = in_mdev;
4604        else
4605                esw_attr->counter_dev = priv->mdev;
4606}
4607
4608static struct mlx5e_tc_flow *
4609__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4610                     struct flow_cls_offload *f,
4611                     unsigned long flow_flags,
4612                     struct net_device *filter_dev,
4613                     struct mlx5_eswitch_rep *in_rep,
4614                     struct mlx5_core_dev *in_mdev)
4615{
4616        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4617        struct netlink_ext_ack *extack = f->common.extack;
4618        struct mlx5e_tc_flow_parse_attr *parse_attr;
4619        struct mlx5e_tc_flow *flow;
4620        int attr_size, err;
4621
4622        flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4623        attr_size  = sizeof(struct mlx5_esw_flow_attr);
4624        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4625                               &parse_attr, &flow);
4626        if (err)
4627                goto out;
4628
4629        parse_attr->filter_dev = filter_dev;
4630        mlx5e_flow_esw_attr_init(flow->attr,
4631                                 priv, parse_attr,
4632                                 f, in_rep, in_mdev);
4633
4634        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4635                               f, filter_dev);
4636        if (err)
4637                goto err_free;
4638
4639        /* actions validation depends on parsing the ct matches first */
4640        err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4641                                   &flow->attr->ct_attr, extack);
4642        if (err)
4643                goto err_free;
4644
4645        err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
4646        if (err)
4647                goto err_free;
4648
4649        err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4650        complete_all(&flow->init_done);
4651        if (err) {
4652                if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4653                        goto err_free;
4654
4655                add_unready_flow(flow);
4656        }
4657
4658        return flow;
4659
4660err_free:
4661        dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4662        mlx5e_flow_put(priv, flow);
4663out:
4664        return ERR_PTR(err);
4665}
4666
4667static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4668                                      struct mlx5e_tc_flow *flow,
4669                                      unsigned long flow_flags)
4670{
4671        struct mlx5e_priv *priv = flow->priv, *peer_priv;
4672        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4673        struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4674        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4675        struct mlx5e_tc_flow_parse_attr *parse_attr;
4676        struct mlx5e_rep_priv *peer_urpriv;
4677        struct mlx5e_tc_flow *peer_flow;
4678        struct mlx5_core_dev *in_mdev;
4679        int err = 0;
4680
4681        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4682        if (!peer_esw)
4683                return -ENODEV;
4684
4685        peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4686        peer_priv = netdev_priv(peer_urpriv->netdev);
4687
4688        /* in_mdev is assigned of which the packet originated from.
4689         * So packets redirected to uplink use the same mdev of the
4690         * original flow and packets redirected from uplink use the
4691         * peer mdev.
4692         */
4693        if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4694                in_mdev = peer_priv->mdev;
4695        else
4696                in_mdev = priv->mdev;
4697
4698        parse_attr = flow->attr->parse_attr;
4699        peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4700                                         parse_attr->filter_dev,
4701                                         attr->in_rep, in_mdev);
4702        if (IS_ERR(peer_flow)) {
4703                err = PTR_ERR(peer_flow);
4704                goto out;
4705        }
4706
4707        flow->peer_flow = peer_flow;
4708        flow_flag_set(flow, DUP);
4709        mutex_lock(&esw->offloads.peer_mutex);
4710        list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4711        mutex_unlock(&esw->offloads.peer_mutex);
4712
4713out:
4714        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4715        return err;
4716}
4717
4718static int
4719mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4720                   struct flow_cls_offload *f,
4721                   unsigned long flow_flags,
4722                   struct net_device *filter_dev,
4723                   struct mlx5e_tc_flow **__flow)
4724{
4725        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4726        struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4727        struct mlx5_core_dev *in_mdev = priv->mdev;
4728        struct mlx5e_tc_flow *flow;
4729        int err;
4730
4731        flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4732                                    in_mdev);
4733        if (IS_ERR(flow))
4734                return PTR_ERR(flow);
4735
4736        if (is_peer_flow_needed(flow)) {
4737                err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4738                if (err) {
4739                        mlx5e_tc_del_fdb_flow(priv, flow);
4740                        goto out;
4741                }
4742        }
4743
4744        *__flow = flow;
4745
4746        return 0;
4747
4748out:
4749        return err;
4750}
4751
4752static int
4753mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4754                   struct flow_cls_offload *f,
4755                   unsigned long flow_flags,
4756                   struct net_device *filter_dev,
4757                   struct mlx5e_tc_flow **__flow)
4758{
4759        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4760        struct netlink_ext_ack *extack = f->common.extack;
4761        struct mlx5e_tc_flow_parse_attr *parse_attr;
4762        struct mlx5e_tc_flow *flow;
4763        int attr_size, err;
4764
4765        if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4766                if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4767                        return -EOPNOTSUPP;
4768        } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4769                return -EOPNOTSUPP;
4770        }
4771
4772        flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4773        attr_size  = sizeof(struct mlx5_nic_flow_attr);
4774        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4775                               &parse_attr, &flow);
4776        if (err)
4777                goto out;
4778
4779        parse_attr->filter_dev = filter_dev;
4780        mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4781
4782        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4783                               f, filter_dev);
4784        if (err)
4785                goto err_free;
4786
4787        err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4788                                   &flow->attr->ct_attr, extack);
4789        if (err)
4790                goto err_free;
4791
4792        err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
4793        if (err)
4794                goto err_free;
4795
4796        err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
4797        if (err)
4798                goto err_free;
4799
4800        flow_flag_set(flow, OFFLOADED);
4801        *__flow = flow;
4802
4803        return 0;
4804
4805err_free:
4806        dealloc_mod_hdr_actions(&parse_attr->mod_hdr_acts);
4807        mlx5e_flow_put(priv, flow);
4808out:
4809        return err;
4810}
4811
4812static int
4813mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4814                  struct flow_cls_offload *f,
4815                  unsigned long flags,
4816                  struct net_device *filter_dev,
4817                  struct mlx5e_tc_flow **flow)
4818{
4819        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4820        unsigned long flow_flags;
4821        int err;
4822
4823        get_flags(flags, &flow_flags);
4824
4825        if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4826                return -EOPNOTSUPP;
4827
4828        if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4829                err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4830                                         filter_dev, flow);
4831        else
4832                err = mlx5e_add_nic_flow(priv, f, flow_flags,
4833                                         filter_dev, flow);
4834
4835        return err;
4836}
4837
4838static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4839                                           struct mlx5e_rep_priv *rpriv)
4840{
4841        /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4842         * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4843         * function is called from NIC mode.
4844         */
4845        return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4846}
4847
4848int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4849                           struct flow_cls_offload *f, unsigned long flags)
4850{
4851        struct netlink_ext_ack *extack = f->common.extack;
4852        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4853        struct mlx5e_rep_priv *rpriv = priv->ppriv;
4854        struct mlx5e_tc_flow *flow;
4855        int err = 0;
4856
4857        rcu_read_lock();
4858        flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4859        if (flow) {
4860                /* Same flow rule offloaded to non-uplink representor sharing tc block,
4861                 * just return 0.
4862                 */
4863                if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4864                        goto rcu_unlock;
4865
4866                NL_SET_ERR_MSG_MOD(extack,
4867                                   "flow cookie already exists, ignoring");
4868                netdev_warn_once(priv->netdev,
4869                                 "flow cookie %lx already exists, ignoring\n",
4870                                 f->cookie);
4871                err = -EEXIST;
4872                goto rcu_unlock;
4873        }
4874rcu_unlock:
4875        rcu_read_unlock();
4876        if (flow)
4877                goto out;
4878
4879        trace_mlx5e_configure_flower(f);
4880        err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4881        if (err)
4882                goto out;
4883
4884        /* Flow rule offloaded to non-uplink representor sharing tc block,
4885         * set the flow's owner dev.
4886         */
4887        if (is_flow_rule_duplicate_allowed(dev, rpriv))
4888                flow->orig_dev = dev;
4889
4890        err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4891        if (err)
4892                goto err_free;
4893
4894        return 0;
4895
4896err_free:
4897        mlx5e_flow_put(priv, flow);
4898out:
4899        return err;
4900}
4901
4902static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4903{
4904        bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4905        bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4906
4907        return flow_flag_test(flow, INGRESS) == dir_ingress &&
4908                flow_flag_test(flow, EGRESS) == dir_egress;
4909}
4910
4911int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4912                        struct flow_cls_offload *f, unsigned long flags)
4913{
4914        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4915        struct mlx5e_tc_flow *flow;
4916        int err;
4917
4918        rcu_read_lock();
4919        flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4920        if (!flow || !same_flow_direction(flow, flags)) {
4921                err = -EINVAL;
4922                goto errout;
4923        }
4924
4925        /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4926         * set.
4927         */
4928        if (flow_flag_test_and_set(flow, DELETED)) {
4929                err = -EINVAL;
4930                goto errout;
4931        }
4932        rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4933        rcu_read_unlock();
4934
4935        trace_mlx5e_delete_flower(f);
4936        mlx5e_flow_put(priv, flow);
4937
4938        return 0;
4939
4940errout:
4941        rcu_read_unlock();
4942        return err;
4943}
4944
4945int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4946                       struct flow_cls_offload *f, unsigned long flags)
4947{
4948        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4949        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4950        struct mlx5_eswitch *peer_esw;
4951        struct mlx5e_tc_flow *flow;
4952        struct mlx5_fc *counter;
4953        u64 lastuse = 0;
4954        u64 packets = 0;
4955        u64 bytes = 0;
4956        int err = 0;
4957
4958        rcu_read_lock();
4959        flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4960                                                tc_ht_params));
4961        rcu_read_unlock();
4962        if (IS_ERR(flow))
4963                return PTR_ERR(flow);
4964
4965        if (!same_flow_direction(flow, flags)) {
4966                err = -EINVAL;
4967                goto errout;
4968        }
4969
4970        if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4971                counter = mlx5e_tc_get_counter(flow);
4972                if (!counter)
4973                        goto errout;
4974
4975                mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4976        }
4977
4978        /* Under multipath it's possible for one rule to be currently
4979         * un-offloaded while the other rule is offloaded.
4980         */
4981        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4982        if (!peer_esw)
4983                goto out;
4984
4985        if (flow_flag_test(flow, DUP) &&
4986            flow_flag_test(flow->peer_flow, OFFLOADED)) {
4987                u64 bytes2;
4988                u64 packets2;
4989                u64 lastuse2;
4990
4991                counter = mlx5e_tc_get_counter(flow->peer_flow);
4992                if (!counter)
4993                        goto no_peer_counter;
4994                mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4995
4996                bytes += bytes2;
4997                packets += packets2;
4998                lastuse = max_t(u64, lastuse, lastuse2);
4999        }
5000
5001no_peer_counter:
5002        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
5003out:
5004        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
5005                          FLOW_ACTION_HW_STATS_DELAYED);
5006        trace_mlx5e_stats_flower(f);
5007errout:
5008        mlx5e_flow_put(priv, flow);
5009        return err;
5010}
5011
5012static int apply_police_params(struct mlx5e_priv *priv, u32 rate,
5013                               struct netlink_ext_ack *extack)
5014{
5015        struct mlx5e_rep_priv *rpriv = priv->ppriv;
5016        struct mlx5_eswitch *esw;
5017        u16 vport_num;
5018        u32 rate_mbps;
5019        int err;
5020
5021        vport_num = rpriv->rep->vport;
5022        if (vport_num >= MLX5_VPORT_ECPF) {
5023                NL_SET_ERR_MSG_MOD(extack,
5024                                   "Ingress rate limit is supported only for Eswitch ports connected to VFs");
5025                return -EOPNOTSUPP;
5026        }
5027
5028        esw = priv->mdev->priv.eswitch;
5029        /* rate is given in bytes/sec.
5030         * First convert to bits/sec and then round to the nearest mbit/secs.
5031         * mbit means million bits.
5032         * Moreover, if rate is non zero we choose to configure to a minimum of
5033         * 1 mbit/sec.
5034         */
5035        rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0;
5036        err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
5037        if (err)
5038                NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
5039
5040        return err;
5041}
5042
5043static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
5044                                        struct flow_action *flow_action,
5045                                        struct netlink_ext_ack *extack)
5046{
5047        struct mlx5e_rep_priv *rpriv = priv->ppriv;
5048        const struct flow_action_entry *act;
5049        int err;
5050        int i;
5051
5052        if (!flow_action_has_entries(flow_action)) {
5053                NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
5054                return -EINVAL;
5055        }
5056
5057        if (!flow_offload_has_one_action(flow_action)) {
5058                NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
5059                return -EOPNOTSUPP;
5060        }
5061
5062        if (!flow_action_basic_hw_stats_check(flow_action, extack))
5063                return -EOPNOTSUPP;
5064
5065        flow_action_for_each(i, act, flow_action) {
5066                switch (act->id) {
5067                case FLOW_ACTION_POLICE:
5068                        err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
5069                        if (err)
5070                                return err;
5071
5072                        rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
5073                        break;
5074                default:
5075                        NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
5076                        return -EOPNOTSUPP;
5077                }
5078        }
5079
5080        return 0;
5081}
5082
5083int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
5084                                struct tc_cls_matchall_offload *ma)
5085{
5086        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5087        struct netlink_ext_ack *extack = ma->common.extack;
5088
5089        if (!mlx5_esw_qos_enabled(esw)) {
5090                NL_SET_ERR_MSG_MOD(extack, "QoS is not supported on this device");
5091                return -EOPNOTSUPP;
5092        }
5093
5094        if (ma->common.prio != 1) {
5095                NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
5096                return -EINVAL;
5097        }
5098
5099        return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
5100}
5101
5102int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
5103                             struct tc_cls_matchall_offload *ma)
5104{
5105        struct netlink_ext_ack *extack = ma->common.extack;
5106
5107        return apply_police_params(priv, 0, extack);
5108}
5109
5110void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
5111                             struct tc_cls_matchall_offload *ma)
5112{
5113        struct mlx5e_rep_priv *rpriv = priv->ppriv;
5114        struct rtnl_link_stats64 cur_stats;
5115        u64 dbytes;
5116        u64 dpkts;
5117
5118        cur_stats = priv->stats.vf_vport;
5119        dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
5120        dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
5121        rpriv->prev_vf_vport_stats = cur_stats;
5122        flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5123                          FLOW_ACTION_HW_STATS_DELAYED);
5124}
5125
5126static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5127                                              struct mlx5e_priv *peer_priv)
5128{
5129        struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5130        struct mlx5e_hairpin_entry *hpe, *tmp;
5131        LIST_HEAD(init_wait_list);
5132        u16 peer_vhca_id;
5133        int bkt;
5134
5135        if (!same_hw_devs(priv, peer_priv))
5136                return;
5137
5138        peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5139
5140        mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
5141        hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
5142                if (refcount_inc_not_zero(&hpe->refcnt))
5143                        list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5144        mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
5145
5146        list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5147                wait_for_completion(&hpe->res_ready);
5148                if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5149                        hpe->hp->pair->peer_gone = true;
5150
5151                mlx5e_hairpin_put(priv, hpe);
5152        }
5153}
5154
5155static int mlx5e_tc_netdev_event(struct notifier_block *this,
5156                                 unsigned long event, void *ptr)
5157{
5158        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5159        struct mlx5e_flow_steering *fs;
5160        struct mlx5e_priv *peer_priv;
5161        struct mlx5e_tc_table *tc;
5162        struct mlx5e_priv *priv;
5163
5164        if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5165            event != NETDEV_UNREGISTER ||
5166            ndev->reg_state == NETREG_REGISTERED)
5167                return NOTIFY_DONE;
5168
5169        tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5170        fs = container_of(tc, struct mlx5e_flow_steering, tc);
5171        priv = container_of(fs, struct mlx5e_priv, fs);
5172        peer_priv = netdev_priv(ndev);
5173        if (priv == peer_priv ||
5174            !(priv->netdev->features & NETIF_F_HW_TC))
5175                return NOTIFY_DONE;
5176
5177        mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5178
5179        return NOTIFY_DONE;
5180}
5181
5182static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
5183{
5184        int tc_grp_size, tc_tbl_size;
5185        u32 max_flow_counter;
5186
5187        max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
5188                            MLX5_CAP_GEN(dev, max_flow_counter_15_0);
5189
5190        tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
5191
5192        tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
5193                            BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
5194
5195        return tc_tbl_size;
5196}
5197
5198int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5199{
5200        struct mlx5e_tc_table *tc = &priv->fs.tc;
5201        struct mlx5_core_dev *dev = priv->mdev;
5202        struct mlx5_chains_attr attr = {};
5203        int err;
5204
5205        mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5206        mutex_init(&tc->t_lock);
5207        mutex_init(&tc->hairpin_tbl_lock);
5208        hash_init(tc->hairpin_tbl);
5209
5210        err = rhashtable_init(&tc->ht, &tc_ht_params);
5211        if (err)
5212                return err;
5213
5214        if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
5215                attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5216                        MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5217                attr.max_restore_tag = MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5218        }
5219        attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5220        attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
5221        attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5222        attr.default_ft = priv->fs.vlan.ft.t;
5223
5224        tc->chains = mlx5_chains_create(dev, &attr);
5225        if (IS_ERR(tc->chains)) {
5226                err = PTR_ERR(tc->chains);
5227                goto err_chains;
5228        }
5229
5230        tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
5231                                 MLX5_FLOW_NAMESPACE_KERNEL);
5232        if (IS_ERR(tc->ct)) {
5233                err = PTR_ERR(tc->ct);
5234                goto err_ct;
5235        }
5236
5237        tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5238        err = register_netdevice_notifier_dev_net(priv->netdev,
5239                                                  &tc->netdevice_nb,
5240                                                  &tc->netdevice_nn);
5241        if (err) {
5242                tc->netdevice_nb.notifier_call = NULL;
5243                mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5244                goto err_reg;
5245        }
5246
5247        return 0;
5248
5249err_reg:
5250        mlx5_tc_ct_clean(tc->ct);
5251err_ct:
5252        mlx5_chains_destroy(tc->chains);
5253err_chains:
5254        rhashtable_destroy(&tc->ht);
5255        return err;
5256}
5257
5258static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5259{
5260        struct mlx5e_tc_flow *flow = ptr;
5261        struct mlx5e_priv *priv = flow->priv;
5262
5263        mlx5e_tc_del_flow(priv, flow);
5264        kfree(flow);
5265}
5266
5267void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5268{
5269        struct mlx5e_tc_table *tc = &priv->fs.tc;
5270
5271        if (tc->netdevice_nb.notifier_call)
5272                unregister_netdevice_notifier_dev_net(priv->netdev,
5273                                                      &tc->netdevice_nb,
5274                                                      &tc->netdevice_nn);
5275
5276        mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5277        mutex_destroy(&tc->hairpin_tbl_lock);
5278
5279        rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5280
5281        if (!IS_ERR_OR_NULL(tc->t)) {
5282                mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5283                tc->t = NULL;
5284        }
5285        mutex_destroy(&tc->t_lock);
5286
5287        mlx5_tc_ct_clean(tc->ct);
5288        mlx5_chains_destroy(tc->chains);
5289}
5290
5291int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
5292{
5293        const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5294        struct mlx5_rep_uplink_priv *uplink_priv;
5295        struct mlx5e_rep_priv *rpriv;
5296        struct mapping_ctx *mapping;
5297        struct mlx5_eswitch *esw;
5298        struct mlx5e_priv *priv;
5299        int err = 0;
5300
5301        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5302        rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5303        priv = netdev_priv(rpriv->netdev);
5304        esw = priv->mdev->priv.eswitch;
5305
5306        uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5307                                               esw_chains(esw),
5308                                               &esw->offloads.mod_hdr,
5309                                               MLX5_FLOW_NAMESPACE_FDB);
5310        if (IS_ERR(uplink_priv->ct_priv))
5311                goto err_ct;
5312
5313        mapping = mapping_create(sizeof(struct tunnel_match_key),
5314                                 TUNNEL_INFO_BITS_MASK, true);
5315        if (IS_ERR(mapping)) {
5316                err = PTR_ERR(mapping);
5317                goto err_tun_mapping;
5318        }
5319        uplink_priv->tunnel_mapping = mapping;
5320
5321        mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK, true);
5322        if (IS_ERR(mapping)) {
5323                err = PTR_ERR(mapping);
5324                goto err_enc_opts_mapping;
5325        }
5326        uplink_priv->tunnel_enc_opts_mapping = mapping;
5327
5328        err = rhashtable_init(tc_ht, &tc_ht_params);
5329        if (err)
5330                goto err_ht_init;
5331
5332        return err;
5333
5334err_ht_init:
5335        mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5336err_enc_opts_mapping:
5337        mapping_destroy(uplink_priv->tunnel_mapping);
5338err_tun_mapping:
5339        mlx5_tc_ct_clean(uplink_priv->ct_priv);
5340err_ct:
5341        netdev_warn(priv->netdev,
5342                    "Failed to initialize tc (eswitch), err: %d", err);
5343        return err;
5344}
5345
5346void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
5347{
5348        struct mlx5_rep_uplink_priv *uplink_priv;
5349
5350        rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5351
5352        uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht);
5353
5354        mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5355        mapping_destroy(uplink_priv->tunnel_mapping);
5356
5357        mlx5_tc_ct_clean(uplink_priv->ct_priv);
5358}
5359
5360int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5361{
5362        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5363
5364        return atomic_read(&tc_ht->nelems);
5365}
5366
5367void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5368{
5369        struct mlx5e_tc_flow *flow, *tmp;
5370
5371        list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5372                __mlx5e_tc_del_fdb_peer_flow(flow);
5373}
5374
5375void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5376{
5377        struct mlx5_rep_uplink_priv *rpriv =
5378                container_of(work, struct mlx5_rep_uplink_priv,
5379                             reoffload_flows_work);
5380        struct mlx5e_tc_flow *flow, *tmp;
5381
5382        mutex_lock(&rpriv->unready_flows_lock);
5383        list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5384                if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5385                        unready_flow_del(flow);
5386        }
5387        mutex_unlock(&rpriv->unready_flows_lock);
5388}
5389
5390static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5391                                     struct flow_cls_offload *cls_flower,
5392                                     unsigned long flags)
5393{
5394        switch (cls_flower->command) {
5395        case FLOW_CLS_REPLACE:
5396                return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5397                                              flags);
5398        case FLOW_CLS_DESTROY:
5399                return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5400                                           flags);
5401        case FLOW_CLS_STATS:
5402                return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5403                                          flags);
5404        default:
5405                return -EOPNOTSUPP;
5406        }
5407}
5408
5409int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5410                            void *cb_priv)
5411{
5412        unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
5413        struct mlx5e_priv *priv = cb_priv;
5414
5415        switch (type) {
5416        case TC_SETUP_CLSFLOWER:
5417                return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5418        default:
5419                return -EOPNOTSUPP;
5420        }
5421}
5422
5423bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5424                         struct sk_buff *skb)
5425{
5426#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5427        u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5428        struct mlx5e_priv *priv = netdev_priv(skb->dev);
5429        struct mlx5e_tc_table *tc = &priv->fs.tc;
5430        struct tc_skb_ext *tc_skb_ext;
5431        int err;
5432
5433        reg_b = be32_to_cpu(cqe->ft_metadata);
5434
5435        chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5436
5437        err = mlx5_get_chain_for_tag(nic_chains(priv), chain_tag, &chain);
5438        if (err) {
5439                netdev_dbg(priv->netdev,
5440                           "Couldn't find chain for chain tag: %d, err: %d\n",
5441                           chain_tag, err);
5442                return false;
5443        }
5444
5445        if (chain) {
5446                tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
5447                if (WARN_ON(!tc_skb_ext))
5448                        return false;
5449
5450                tc_skb_ext->chain = chain;
5451
5452                zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
5453                                  ZONE_RESTORE_MAX;
5454
5455                if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5456                                              zone_restore_id))
5457                        return false;
5458        }
5459#endif /* CONFIG_NET_TC_SKB_EXT */
5460
5461        return true;
5462}
5463