linux/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <net/flow_dissector.h>
  34#include <net/sch_generic.h>
  35#include <net/pkt_cls.h>
  36#include <net/tc_act/tc_gact.h>
  37#include <net/tc_act/tc_skbedit.h>
  38#include <linux/mlx5/fs.h>
  39#include <linux/mlx5/device.h>
  40#include <linux/rhashtable.h>
  41#include <net/tc_act/tc_mirred.h>
  42#include <net/tc_act/tc_vlan.h>
  43#include <net/tc_act/tc_tunnel_key.h>
  44#include <net/tc_act/tc_pedit.h>
  45#include <net/tc_act/tc_csum.h>
  46#include <net/arp.h>
  47#include <net/ipv6_stubs.h>
  48#include "en.h"
  49#include "en_rep.h"
  50#include "en_tc.h"
  51#include "eswitch.h"
  52#include "fs_core.h"
  53#include "en/port.h"
  54#include "en/tc_tun.h"
  55#include "lib/devcom.h"
  56#include "lib/geneve.h"
  57
  58struct mlx5_nic_flow_attr {
  59        u32 action;
  60        u32 flow_tag;
  61        u32 mod_hdr_id;
  62        u32 hairpin_tirn;
  63        u8 match_level;
  64        struct mlx5_flow_table  *hairpin_ft;
  65        struct mlx5_fc          *counter;
  66};
  67
  68#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
  69
  70enum {
  71        MLX5E_TC_FLOW_INGRESS   = MLX5E_TC_INGRESS,
  72        MLX5E_TC_FLOW_EGRESS    = MLX5E_TC_EGRESS,
  73        MLX5E_TC_FLOW_ESWITCH   = MLX5E_TC_ESW_OFFLOAD,
  74        MLX5E_TC_FLOW_NIC       = MLX5E_TC_NIC_OFFLOAD,
  75        MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE),
  76        MLX5E_TC_FLOW_HAIRPIN   = BIT(MLX5E_TC_FLOW_BASE + 1),
  77        MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2),
  78        MLX5E_TC_FLOW_SLOW        = BIT(MLX5E_TC_FLOW_BASE + 3),
  79        MLX5E_TC_FLOW_DUP         = BIT(MLX5E_TC_FLOW_BASE + 4),
  80        MLX5E_TC_FLOW_NOT_READY   = BIT(MLX5E_TC_FLOW_BASE + 5),
  81};
  82
  83#define MLX5E_TC_MAX_SPLITS 1
  84
  85/* Helper struct for accessing a struct containing list_head array.
  86 * Containing struct
  87 *   |- Helper array
  88 *      [0] Helper item 0
  89 *          |- list_head item 0
  90 *          |- index (0)
  91 *      [1] Helper item 1
  92 *          |- list_head item 1
  93 *          |- index (1)
  94 * To access the containing struct from one of the list_head items:
  95 * 1. Get the helper item from the list_head item using
  96 *    helper item =
  97 *        container_of(list_head item, helper struct type, list_head field)
  98 * 2. Get the contining struct from the helper item and its index in the array:
  99 *    containing struct =
 100 *        container_of(helper item, containing struct type, helper field[index])
 101 */
 102struct encap_flow_item {
 103        struct list_head list;
 104        int index;
 105};
 106
 107struct mlx5e_tc_flow {
 108        struct rhash_head       node;
 109        struct mlx5e_priv       *priv;
 110        u64                     cookie;
 111        u16                     flags;
 112        struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
 113        /* Flow can be associated with multiple encap IDs.
 114         * The number of encaps is bounded by the number of supported
 115         * destinations.
 116         */
 117        struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
 118        struct mlx5e_tc_flow    *peer_flow;
 119        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
 120        struct list_head        hairpin; /* flows sharing the same hairpin */
 121        struct list_head        peer;    /* flows with peer flow */
 122        struct list_head        unready; /* flows not ready to be offloaded (e.g due to missing route) */
 123        union {
 124                struct mlx5_esw_flow_attr esw_attr[0];
 125                struct mlx5_nic_flow_attr nic_attr[0];
 126        };
 127};
 128
 129struct mlx5e_tc_flow_parse_attr {
 130        const struct ip_tunnel_info *tun_info[MLX5_MAX_FLOW_FWD_VPORTS];
 131        struct net_device *filter_dev;
 132        struct mlx5_flow_spec spec;
 133        int num_mod_hdr_actions;
 134        int max_mod_hdr_actions;
 135        void *mod_hdr_actions;
 136        int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
 137};
 138
 139#define MLX5E_TC_TABLE_NUM_GROUPS 4
 140#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 141
 142struct mlx5e_hairpin {
 143        struct mlx5_hairpin *pair;
 144
 145        struct mlx5_core_dev *func_mdev;
 146        struct mlx5e_priv *func_priv;
 147        u32 tdn;
 148        u32 tirn;
 149
 150        int num_channels;
 151        struct mlx5e_rqt indir_rqt;
 152        u32 indir_tirn[MLX5E_NUM_INDIR_TIRS];
 153        struct mlx5e_ttc_table ttc;
 154};
 155
 156struct mlx5e_hairpin_entry {
 157        /* a node of a hash table which keeps all the  hairpin entries */
 158        struct hlist_node hairpin_hlist;
 159
 160        /* flows sharing the same hairpin */
 161        struct list_head flows;
 162
 163        u16 peer_vhca_id;
 164        u8 prio;
 165        struct mlx5e_hairpin *hp;
 166};
 167
 168struct mod_hdr_key {
 169        int num_actions;
 170        void *actions;
 171};
 172
 173struct mlx5e_mod_hdr_entry {
 174        /* a node of a hash table which keeps all the mod_hdr entries */
 175        struct hlist_node mod_hdr_hlist;
 176
 177        /* flows sharing the same mod_hdr entry */
 178        struct list_head flows;
 179
 180        struct mod_hdr_key key;
 181
 182        u32 mod_hdr_id;
 183};
 184
 185#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
 186
 187static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
 188{
 189        return jhash(key->actions,
 190                     key->num_actions * MLX5_MH_ACT_SZ, 0);
 191}
 192
 193static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
 194                                   struct mod_hdr_key *b)
 195{
 196        if (a->num_actions != b->num_actions)
 197                return 1;
 198
 199        return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
 200}
 201
 202static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 203                                struct mlx5e_tc_flow *flow,
 204                                struct mlx5e_tc_flow_parse_attr *parse_attr)
 205{
 206        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 207        int num_actions, actions_size, namespace, err;
 208        struct mlx5e_mod_hdr_entry *mh;
 209        struct mod_hdr_key key;
 210        bool found = false;
 211        u32 hash_key;
 212
 213        num_actions  = parse_attr->num_mod_hdr_actions;
 214        actions_size = MLX5_MH_ACT_SZ * num_actions;
 215
 216        key.actions = parse_attr->mod_hdr_actions;
 217        key.num_actions = num_actions;
 218
 219        hash_key = hash_mod_hdr_info(&key);
 220
 221        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 222                namespace = MLX5_FLOW_NAMESPACE_FDB;
 223                hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
 224                                       mod_hdr_hlist, hash_key) {
 225                        if (!cmp_mod_hdr_info(&mh->key, &key)) {
 226                                found = true;
 227                                break;
 228                        }
 229                }
 230        } else {
 231                namespace = MLX5_FLOW_NAMESPACE_KERNEL;
 232                hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
 233                                       mod_hdr_hlist, hash_key) {
 234                        if (!cmp_mod_hdr_info(&mh->key, &key)) {
 235                                found = true;
 236                                break;
 237                        }
 238                }
 239        }
 240
 241        if (found)
 242                goto attach_flow;
 243
 244        mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
 245        if (!mh)
 246                return -ENOMEM;
 247
 248        mh->key.actions = (void *)mh + sizeof(*mh);
 249        memcpy(mh->key.actions, key.actions, actions_size);
 250        mh->key.num_actions = num_actions;
 251        INIT_LIST_HEAD(&mh->flows);
 252
 253        err = mlx5_modify_header_alloc(priv->mdev, namespace,
 254                                       mh->key.num_actions,
 255                                       mh->key.actions,
 256                                       &mh->mod_hdr_id);
 257        if (err)
 258                goto out_err;
 259
 260        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 261                hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 262        else
 263                hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 264
 265attach_flow:
 266        list_add(&flow->mod_hdr, &mh->flows);
 267        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 268                flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
 269        else
 270                flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
 271
 272        return 0;
 273
 274out_err:
 275        kfree(mh);
 276        return err;
 277}
 278
 279static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 280                                 struct mlx5e_tc_flow *flow)
 281{
 282        struct list_head *next = flow->mod_hdr.next;
 283
 284        list_del(&flow->mod_hdr);
 285
 286        if (list_empty(next)) {
 287                struct mlx5e_mod_hdr_entry *mh;
 288
 289                mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
 290
 291                mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 292                hash_del(&mh->mod_hdr_hlist);
 293                kfree(mh);
 294        }
 295}
 296
 297static
 298struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 299{
 300        struct net_device *netdev;
 301        struct mlx5e_priv *priv;
 302
 303        netdev = __dev_get_by_index(net, ifindex);
 304        priv = netdev_priv(netdev);
 305        return priv->mdev;
 306}
 307
 308static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 309{
 310        u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
 311        void *tirc;
 312        int err;
 313
 314        err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
 315        if (err)
 316                goto alloc_tdn_err;
 317
 318        tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 319
 320        MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT);
 321        MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
 322        MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 323
 324        err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
 325        if (err)
 326                goto create_tir_err;
 327
 328        return 0;
 329
 330create_tir_err:
 331        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 332alloc_tdn_err:
 333        return err;
 334}
 335
 336static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
 337{
 338        mlx5_core_destroy_tir(hp->func_mdev, hp->tirn);
 339        mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
 340}
 341
 342static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
 343{
 344        u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
 345        struct mlx5e_priv *priv = hp->func_priv;
 346        int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
 347
 348        mlx5e_build_default_indir_rqt(indirection_rqt, sz,
 349                                      hp->num_channels);
 350
 351        for (i = 0; i < sz; i++) {
 352                ix = i;
 353                if (priv->rss_params.hfunc == ETH_RSS_HASH_XOR)
 354                        ix = mlx5e_bits_invert(i, ilog2(sz));
 355                ix = indirection_rqt[ix];
 356                rqn = hp->pair->rqn[ix];
 357                MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
 358        }
 359}
 360
 361static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
 362{
 363        int inlen, err, sz = MLX5E_INDIR_RQT_SIZE;
 364        struct mlx5e_priv *priv = hp->func_priv;
 365        struct mlx5_core_dev *mdev = priv->mdev;
 366        void *rqtc;
 367        u32 *in;
 368
 369        inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
 370        in = kvzalloc(inlen, GFP_KERNEL);
 371        if (!in)
 372                return -ENOMEM;
 373
 374        rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
 375
 376        MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
 377        MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
 378
 379        mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
 380
 381        err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
 382        if (!err)
 383                hp->indir_rqt.enabled = true;
 384
 385        kvfree(in);
 386        return err;
 387}
 388
 389static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 390{
 391        struct mlx5e_priv *priv = hp->func_priv;
 392        u32 in[MLX5_ST_SZ_DW(create_tir_in)];
 393        int tt, i, err;
 394        void *tirc;
 395
 396        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
 397                struct mlx5e_tirc_config ttconfig = mlx5e_tirc_get_default_config(tt);
 398
 399                memset(in, 0, MLX5_ST_SZ_BYTES(create_tir_in));
 400                tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 401
 402                MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 403                MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
 404                MLX5_SET(tirc, tirc, indirect_table, hp->indir_rqt.rqtn);
 405                mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
 406
 407                err = mlx5_core_create_tir(hp->func_mdev, in,
 408                                           MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
 409                if (err) {
 410                        mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
 411                        goto err_destroy_tirs;
 412                }
 413        }
 414        return 0;
 415
 416err_destroy_tirs:
 417        for (i = 0; i < tt; i++)
 418                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[i]);
 419        return err;
 420}
 421
 422static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
 423{
 424        int tt;
 425
 426        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 427                mlx5_core_destroy_tir(hp->func_mdev, hp->indir_tirn[tt]);
 428}
 429
 430static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
 431                                         struct ttc_params *ttc_params)
 432{
 433        struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
 434        int tt;
 435
 436        memset(ttc_params, 0, sizeof(*ttc_params));
 437
 438        ttc_params->any_tt_tirn = hp->tirn;
 439
 440        for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
 441                ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
 442
 443        ft_attr->max_fte = MLX5E_NUM_TT;
 444        ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
 445        ft_attr->prio = MLX5E_TC_PRIO;
 446}
 447
 448static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
 449{
 450        struct mlx5e_priv *priv = hp->func_priv;
 451        struct ttc_params ttc_params;
 452        int err;
 453
 454        err = mlx5e_hairpin_create_indirect_rqt(hp);
 455        if (err)
 456                return err;
 457
 458        err = mlx5e_hairpin_create_indirect_tirs(hp);
 459        if (err)
 460                goto err_create_indirect_tirs;
 461
 462        mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
 463        err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc);
 464        if (err)
 465                goto err_create_ttc_table;
 466
 467        netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
 468                   hp->num_channels, hp->ttc.ft.t->id);
 469
 470        return 0;
 471
 472err_create_ttc_table:
 473        mlx5e_hairpin_destroy_indirect_tirs(hp);
 474err_create_indirect_tirs:
 475        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 476
 477        return err;
 478}
 479
 480static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
 481{
 482        struct mlx5e_priv *priv = hp->func_priv;
 483
 484        mlx5e_destroy_ttc_table(priv, &hp->ttc);
 485        mlx5e_hairpin_destroy_indirect_tirs(hp);
 486        mlx5e_destroy_rqt(priv, &hp->indir_rqt);
 487}
 488
 489static struct mlx5e_hairpin *
 490mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
 491                     int peer_ifindex)
 492{
 493        struct mlx5_core_dev *func_mdev, *peer_mdev;
 494        struct mlx5e_hairpin *hp;
 495        struct mlx5_hairpin *pair;
 496        int err;
 497
 498        hp = kzalloc(sizeof(*hp), GFP_KERNEL);
 499        if (!hp)
 500                return ERR_PTR(-ENOMEM);
 501
 502        func_mdev = priv->mdev;
 503        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 504
 505        pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
 506        if (IS_ERR(pair)) {
 507                err = PTR_ERR(pair);
 508                goto create_pair_err;
 509        }
 510        hp->pair = pair;
 511        hp->func_mdev = func_mdev;
 512        hp->func_priv = priv;
 513        hp->num_channels = params->num_channels;
 514
 515        err = mlx5e_hairpin_create_transport(hp);
 516        if (err)
 517                goto create_transport_err;
 518
 519        if (hp->num_channels > 1) {
 520                err = mlx5e_hairpin_rss_init(hp);
 521                if (err)
 522                        goto rss_init_err;
 523        }
 524
 525        return hp;
 526
 527rss_init_err:
 528        mlx5e_hairpin_destroy_transport(hp);
 529create_transport_err:
 530        mlx5_core_hairpin_destroy(hp->pair);
 531create_pair_err:
 532        kfree(hp);
 533        return ERR_PTR(err);
 534}
 535
 536static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
 537{
 538        if (hp->num_channels > 1)
 539                mlx5e_hairpin_rss_cleanup(hp);
 540        mlx5e_hairpin_destroy_transport(hp);
 541        mlx5_core_hairpin_destroy(hp->pair);
 542        kvfree(hp);
 543}
 544
 545static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
 546{
 547        return (peer_vhca_id << 16 | prio);
 548}
 549
 550static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
 551                                                     u16 peer_vhca_id, u8 prio)
 552{
 553        struct mlx5e_hairpin_entry *hpe;
 554        u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
 555
 556        hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
 557                               hairpin_hlist, hash_key) {
 558                if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio)
 559                        return hpe;
 560        }
 561
 562        return NULL;
 563}
 564
 565#define UNKNOWN_MATCH_PRIO 8
 566
 567static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
 568                                  struct mlx5_flow_spec *spec, u8 *match_prio,
 569                                  struct netlink_ext_ack *extack)
 570{
 571        void *headers_c, *headers_v;
 572        u8 prio_val, prio_mask = 0;
 573        bool vlan_present;
 574
 575#ifdef CONFIG_MLX5_CORE_EN_DCB
 576        if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
 577                NL_SET_ERR_MSG_MOD(extack,
 578                                   "only PCP trust state supported for hairpin");
 579                return -EOPNOTSUPP;
 580        }
 581#endif
 582        headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
 583        headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
 584
 585        vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
 586        if (vlan_present) {
 587                prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
 588                prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
 589        }
 590
 591        if (!vlan_present || !prio_mask) {
 592                prio_val = UNKNOWN_MATCH_PRIO;
 593        } else if (prio_mask != 0x7) {
 594                NL_SET_ERR_MSG_MOD(extack,
 595                                   "masked priority match not supported for hairpin");
 596                return -EOPNOTSUPP;
 597        }
 598
 599        *match_prio = prio_val;
 600        return 0;
 601}
 602
 603static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 604                                  struct mlx5e_tc_flow *flow,
 605                                  struct mlx5e_tc_flow_parse_attr *parse_attr,
 606                                  struct netlink_ext_ack *extack)
 607{
 608        int peer_ifindex = parse_attr->mirred_ifindex[0];
 609        struct mlx5_hairpin_params params;
 610        struct mlx5_core_dev *peer_mdev;
 611        struct mlx5e_hairpin_entry *hpe;
 612        struct mlx5e_hairpin *hp;
 613        u64 link_speed64;
 614        u32 link_speed;
 615        u8 match_prio;
 616        u16 peer_id;
 617        int err;
 618
 619        peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
 620        if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
 621                NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
 622                return -EOPNOTSUPP;
 623        }
 624
 625        peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
 626        err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
 627                                     extack);
 628        if (err)
 629                return err;
 630        hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
 631        if (hpe)
 632                goto attach_flow;
 633
 634        hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
 635        if (!hpe)
 636                return -ENOMEM;
 637
 638        INIT_LIST_HEAD(&hpe->flows);
 639        hpe->peer_vhca_id = peer_id;
 640        hpe->prio = match_prio;
 641
 642        params.log_data_size = 15;
 643        params.log_data_size = min_t(u8, params.log_data_size,
 644                                     MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
 645        params.log_data_size = max_t(u8, params.log_data_size,
 646                                     MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
 647
 648        params.log_num_packets = params.log_data_size -
 649                                 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
 650        params.log_num_packets = min_t(u8, params.log_num_packets,
 651                                       MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
 652
 653        params.q_counter = priv->q_counter;
 654        /* set hairpin pair per each 50Gbs share of the link */
 655        mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
 656        link_speed = max_t(u32, link_speed, 50000);
 657        link_speed64 = link_speed;
 658        do_div(link_speed64, 50000);
 659        params.num_channels = link_speed64;
 660
 661        hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 662        if (IS_ERR(hp)) {
 663                err = PTR_ERR(hp);
 664                goto create_hairpin_err;
 665        }
 666
 667        netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
 668                   hp->tirn, hp->pair->rqn[0],
 669                   dev_name(hp->pair->peer_mdev->device),
 670                   hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
 671
 672        hpe->hp = hp;
 673        hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
 674                 hash_hairpin_info(peer_id, match_prio));
 675
 676attach_flow:
 677        if (hpe->hp->num_channels > 1) {
 678                flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS;
 679                flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t;
 680        } else {
 681                flow->nic_attr->hairpin_tirn = hpe->hp->tirn;
 682        }
 683        list_add(&flow->hairpin, &hpe->flows);
 684
 685        return 0;
 686
 687create_hairpin_err:
 688        kfree(hpe);
 689        return err;
 690}
 691
 692static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
 693                                   struct mlx5e_tc_flow *flow)
 694{
 695        struct list_head *next = flow->hairpin.next;
 696
 697        list_del(&flow->hairpin);
 698
 699        /* no more hairpin flows for us, release the hairpin pair */
 700        if (list_empty(next)) {
 701                struct mlx5e_hairpin_entry *hpe;
 702
 703                hpe = list_entry(next, struct mlx5e_hairpin_entry, flows);
 704
 705                netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
 706                           dev_name(hpe->hp->pair->peer_mdev->device));
 707
 708                mlx5e_hairpin_destroy(hpe->hp);
 709                hash_del(&hpe->hairpin_hlist);
 710                kfree(hpe);
 711        }
 712}
 713
 714static int
 715mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 716                      struct mlx5e_tc_flow_parse_attr *parse_attr,
 717                      struct mlx5e_tc_flow *flow,
 718                      struct netlink_ext_ack *extack)
 719{
 720        struct mlx5_flow_context *flow_context = &parse_attr->spec.flow_context;
 721        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 722        struct mlx5_core_dev *dev = priv->mdev;
 723        struct mlx5_flow_destination dest[2] = {};
 724        struct mlx5_flow_act flow_act = {
 725                .action = attr->action,
 726                .reformat_id = 0,
 727                .flags    = FLOW_ACT_NO_APPEND,
 728        };
 729        struct mlx5_fc *counter = NULL;
 730        bool table_created = false;
 731        int err, dest_ix = 0;
 732
 733        flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
 734        flow_context->flow_tag = attr->flow_tag;
 735
 736        if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
 737                err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
 738                if (err) {
 739                        goto err_add_hairpin_flow;
 740                }
 741                if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
 742                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 743                        dest[dest_ix].ft = attr->hairpin_ft;
 744                } else {
 745                        dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 746                        dest[dest_ix].tir_num = attr->hairpin_tirn;
 747                }
 748                dest_ix++;
 749        } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 750                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 751                dest[dest_ix].ft = priv->fs.vlan.ft.t;
 752                dest_ix++;
 753        }
 754
 755        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 756                counter = mlx5_fc_create(dev, true);
 757                if (IS_ERR(counter)) {
 758                        err = PTR_ERR(counter);
 759                        goto err_fc_create;
 760                }
 761                dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 762                dest[dest_ix].counter_id = mlx5_fc_id(counter);
 763                dest_ix++;
 764                attr->counter = counter;
 765        }
 766
 767        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 768                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 769                flow_act.modify_id = attr->mod_hdr_id;
 770                kfree(parse_attr->mod_hdr_actions);
 771                if (err)
 772                        goto err_create_mod_hdr_id;
 773        }
 774
 775        if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
 776                int tc_grp_size, tc_tbl_size;
 777                u32 max_flow_counter;
 778
 779                max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
 780                                    MLX5_CAP_GEN(dev, max_flow_counter_15_0);
 781
 782                tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
 783
 784                tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
 785                                    BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
 786
 787                priv->fs.tc.t =
 788                        mlx5_create_auto_grouped_flow_table(priv->fs.ns,
 789                                                            MLX5E_TC_PRIO,
 790                                                            tc_tbl_size,
 791                                                            MLX5E_TC_TABLE_NUM_GROUPS,
 792                                                            MLX5E_TC_FT_LEVEL, 0);
 793                if (IS_ERR(priv->fs.tc.t)) {
 794                        NL_SET_ERR_MSG_MOD(extack,
 795                                           "Failed to create tc offload table\n");
 796                        netdev_err(priv->netdev,
 797                                   "Failed to create tc offload table\n");
 798                        err = PTR_ERR(priv->fs.tc.t);
 799                        goto err_create_ft;
 800                }
 801
 802                table_created = true;
 803        }
 804
 805        if (attr->match_level != MLX5_MATCH_NONE)
 806                parse_attr->spec.match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
 807
 808        flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
 809                                            &flow_act, dest, dest_ix);
 810
 811        if (IS_ERR(flow->rule[0])) {
 812                err = PTR_ERR(flow->rule[0]);
 813                goto err_add_rule;
 814        }
 815
 816        return 0;
 817
 818err_add_rule:
 819        if (table_created) {
 820                mlx5_destroy_flow_table(priv->fs.tc.t);
 821                priv->fs.tc.t = NULL;
 822        }
 823err_create_ft:
 824        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 825                mlx5e_detach_mod_hdr(priv, flow);
 826err_create_mod_hdr_id:
 827        mlx5_fc_destroy(dev, counter);
 828err_fc_create:
 829        if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 830                mlx5e_hairpin_flow_del(priv, flow);
 831err_add_hairpin_flow:
 832        return err;
 833}
 834
 835static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 836                                  struct mlx5e_tc_flow *flow)
 837{
 838        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 839        struct mlx5_fc *counter = NULL;
 840
 841        counter = attr->counter;
 842        mlx5_del_flow_rules(flow->rule[0]);
 843        mlx5_fc_destroy(priv->mdev, counter);
 844
 845        if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)  && priv->fs.tc.t) {
 846                mlx5_destroy_flow_table(priv->fs.tc.t);
 847                priv->fs.tc.t = NULL;
 848        }
 849
 850        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 851                mlx5e_detach_mod_hdr(priv, flow);
 852
 853        if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
 854                mlx5e_hairpin_flow_del(priv, flow);
 855}
 856
 857static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 858                               struct mlx5e_tc_flow *flow, int out_index);
 859
 860static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 861                              struct mlx5e_tc_flow *flow,
 862                              struct net_device *mirred_dev,
 863                              int out_index,
 864                              struct netlink_ext_ack *extack,
 865                              struct net_device **encap_dev,
 866                              bool *encap_valid);
 867
 868static struct mlx5_flow_handle *
 869mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
 870                           struct mlx5e_tc_flow *flow,
 871                           struct mlx5_flow_spec *spec,
 872                           struct mlx5_esw_flow_attr *attr)
 873{
 874        struct mlx5_flow_handle *rule;
 875
 876        rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 877        if (IS_ERR(rule))
 878                return rule;
 879
 880        if (attr->split_count) {
 881                flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
 882                if (IS_ERR(flow->rule[1])) {
 883                        mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
 884                        return flow->rule[1];
 885                }
 886        }
 887
 888        flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 889        return rule;
 890}
 891
 892static void
 893mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
 894                             struct mlx5e_tc_flow *flow,
 895                           struct mlx5_esw_flow_attr *attr)
 896{
 897        flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
 898
 899        if (attr->split_count)
 900                mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
 901
 902        mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
 903}
 904
 905static struct mlx5_flow_handle *
 906mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
 907                              struct mlx5e_tc_flow *flow,
 908                              struct mlx5_flow_spec *spec,
 909                              struct mlx5_esw_flow_attr *slow_attr)
 910{
 911        struct mlx5_flow_handle *rule;
 912
 913        memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
 914        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 915        slow_attr->split_count = 0;
 916        slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
 917
 918        rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
 919        if (!IS_ERR(rule))
 920                flow->flags |= MLX5E_TC_FLOW_SLOW;
 921
 922        return rule;
 923}
 924
 925static void
 926mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
 927                                  struct mlx5e_tc_flow *flow,
 928                                  struct mlx5_esw_flow_attr *slow_attr)
 929{
 930        memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
 931        slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 932        slow_attr->split_count = 0;
 933        slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN;
 934        mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
 935        flow->flags &= ~MLX5E_TC_FLOW_SLOW;
 936}
 937
 938static void add_unready_flow(struct mlx5e_tc_flow *flow)
 939{
 940        struct mlx5_rep_uplink_priv *uplink_priv;
 941        struct mlx5e_rep_priv *rpriv;
 942        struct mlx5_eswitch *esw;
 943
 944        esw = flow->priv->mdev->priv.eswitch;
 945        rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 946        uplink_priv = &rpriv->uplink_priv;
 947
 948        flow->flags |= MLX5E_TC_FLOW_NOT_READY;
 949        list_add_tail(&flow->unready, &uplink_priv->unready_flows);
 950}
 951
 952static void remove_unready_flow(struct mlx5e_tc_flow *flow)
 953{
 954        list_del(&flow->unready);
 955        flow->flags &= ~MLX5E_TC_FLOW_NOT_READY;
 956}
 957
 958static int
 959mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 960                      struct mlx5e_tc_flow *flow,
 961                      struct netlink_ext_ack *extack)
 962{
 963        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 964        u32 max_chain = mlx5_eswitch_get_chain_range(esw);
 965        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 966        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
 967        u16 max_prio = mlx5_eswitch_get_prio_range(esw);
 968        struct net_device *out_dev, *encap_dev = NULL;
 969        struct mlx5_fc *counter = NULL;
 970        struct mlx5e_rep_priv *rpriv;
 971        struct mlx5e_priv *out_priv;
 972        bool encap_valid = true;
 973        int err = 0;
 974        int out_index;
 975
 976        if (!mlx5_eswitch_prios_supported(esw) && attr->prio != 1) {
 977                NL_SET_ERR_MSG(extack, "E-switch priorities unsupported, upgrade FW");
 978                return -EOPNOTSUPP;
 979        }
 980
 981        if (attr->chain > max_chain) {
 982                NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
 983                err = -EOPNOTSUPP;
 984                goto err_max_prio_chain;
 985        }
 986
 987        if (attr->prio > max_prio) {
 988                NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
 989                err = -EOPNOTSUPP;
 990                goto err_max_prio_chain;
 991        }
 992
 993        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
 994                int mirred_ifindex;
 995
 996                if (!(attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
 997                        continue;
 998
 999                mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1000                out_dev = __dev_get_by_index(dev_net(priv->netdev),
1001                                             mirred_ifindex);
1002                err = mlx5e_attach_encap(priv, flow, out_dev, out_index,
1003                                         extack, &encap_dev, &encap_valid);
1004                if (err)
1005                        goto err_attach_encap;
1006
1007                out_priv = netdev_priv(encap_dev);
1008                rpriv = out_priv->ppriv;
1009                attr->dests[out_index].rep = rpriv->rep;
1010                attr->dests[out_index].mdev = out_priv->mdev;
1011        }
1012
1013        err = mlx5_eswitch_add_vlan_action(esw, attr);
1014        if (err)
1015                goto err_add_vlan;
1016
1017        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1018                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1019                kfree(parse_attr->mod_hdr_actions);
1020                if (err)
1021                        goto err_mod_hdr;
1022        }
1023
1024        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1025                counter = mlx5_fc_create(attr->counter_dev, true);
1026                if (IS_ERR(counter)) {
1027                        err = PTR_ERR(counter);
1028                        goto err_create_counter;
1029                }
1030
1031                attr->counter = counter;
1032        }
1033
1034        /* we get here if one of the following takes place:
1035         * (1) there's no error
1036         * (2) there's an encap action and we don't have valid neigh
1037         */
1038        if (!encap_valid) {
1039                /* continue with goto slow path rule instead */
1040                struct mlx5_esw_flow_attr slow_attr;
1041
1042                flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
1043        } else {
1044                flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1045        }
1046
1047        if (IS_ERR(flow->rule[0])) {
1048                err = PTR_ERR(flow->rule[0]);
1049                goto err_add_rule;
1050        }
1051
1052        return 0;
1053
1054err_add_rule:
1055        mlx5_fc_destroy(attr->counter_dev, counter);
1056err_create_counter:
1057        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1058                mlx5e_detach_mod_hdr(priv, flow);
1059err_mod_hdr:
1060        mlx5_eswitch_del_vlan_action(esw, attr);
1061err_add_vlan:
1062        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1063                if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
1064                        mlx5e_detach_encap(priv, flow, out_index);
1065err_attach_encap:
1066err_max_prio_chain:
1067        return err;
1068}
1069
1070static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1071{
1072        struct mlx5_flow_spec *spec = &flow->esw_attr->parse_attr->spec;
1073        void *headers_v = MLX5_ADDR_OF(fte_match_param,
1074                                       spec->match_value,
1075                                       misc_parameters_3);
1076        u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1077                                             headers_v,
1078                                             geneve_tlv_option_0_data);
1079
1080        return !!geneve_tlv_opt_0_data;
1081}
1082
1083static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1084                                  struct mlx5e_tc_flow *flow)
1085{
1086        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1087        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1088        struct mlx5_esw_flow_attr slow_attr;
1089        int out_index;
1090
1091        if (flow->flags & MLX5E_TC_FLOW_NOT_READY) {
1092                remove_unready_flow(flow);
1093                kvfree(attr->parse_attr);
1094                return;
1095        }
1096
1097        if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
1098                if (flow->flags & MLX5E_TC_FLOW_SLOW)
1099                        mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1100                else
1101                        mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1102        }
1103
1104        if (mlx5_flow_has_geneve_opt(flow))
1105                mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1106
1107        mlx5_eswitch_del_vlan_action(esw, attr);
1108
1109        for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
1110                if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
1111                        mlx5e_detach_encap(priv, flow, out_index);
1112        kvfree(attr->parse_attr);
1113
1114        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1115                mlx5e_detach_mod_hdr(priv, flow);
1116
1117        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1118                mlx5_fc_destroy(attr->counter_dev, attr->counter);
1119}
1120
1121void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
1122                              struct mlx5e_encap_entry *e)
1123{
1124        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1125        struct mlx5_esw_flow_attr slow_attr, *esw_attr;
1126        struct mlx5_flow_handle *rule;
1127        struct mlx5_flow_spec *spec;
1128        struct encap_flow_item *efi;
1129        struct mlx5e_tc_flow *flow;
1130        int err;
1131
1132        err = mlx5_packet_reformat_alloc(priv->mdev,
1133                                         e->reformat_type,
1134                                         e->encap_size, e->encap_header,
1135                                         MLX5_FLOW_NAMESPACE_FDB,
1136                                         &e->encap_id);
1137        if (err) {
1138                mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
1139                               err);
1140                return;
1141        }
1142        e->flags |= MLX5_ENCAP_ENTRY_VALID;
1143        mlx5e_rep_queue_neigh_stats_work(priv);
1144
1145        list_for_each_entry(efi, &e->flows, list) {
1146                bool all_flow_encaps_valid = true;
1147                int i;
1148
1149                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1150                esw_attr = flow->esw_attr;
1151                spec = &esw_attr->parse_attr->spec;
1152
1153                esw_attr->dests[efi->index].encap_id = e->encap_id;
1154                esw_attr->dests[efi->index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1155                /* Flow can be associated with multiple encap entries.
1156                 * Before offloading the flow verify that all of them have
1157                 * a valid neighbour.
1158                 */
1159                for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
1160                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
1161                                continue;
1162                        if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
1163                                all_flow_encaps_valid = false;
1164                                break;
1165                        }
1166                }
1167                /* Do not offload flows with unresolved neighbors */
1168                if (!all_flow_encaps_valid)
1169                        continue;
1170                /* update from slow path rule to encap rule */
1171                rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
1172                if (IS_ERR(rule)) {
1173                        err = PTR_ERR(rule);
1174                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1175                                       err);
1176                        continue;
1177                }
1178
1179                mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
1180                flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
1181                flow->rule[0] = rule;
1182        }
1183}
1184
1185void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
1186                              struct mlx5e_encap_entry *e)
1187{
1188        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1189        struct mlx5_esw_flow_attr slow_attr;
1190        struct mlx5_flow_handle *rule;
1191        struct mlx5_flow_spec *spec;
1192        struct encap_flow_item *efi;
1193        struct mlx5e_tc_flow *flow;
1194        int err;
1195
1196        list_for_each_entry(efi, &e->flows, list) {
1197                flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
1198                spec = &flow->esw_attr->parse_attr->spec;
1199
1200                /* update from encap rule to slow path rule */
1201                rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
1202                /* mark the flow's encap dest as non-valid */
1203                flow->esw_attr->dests[efi->index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
1204
1205                if (IS_ERR(rule)) {
1206                        err = PTR_ERR(rule);
1207                        mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1208                                       err);
1209                        continue;
1210                }
1211
1212                mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
1213                flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
1214                flow->rule[0] = rule;
1215        }
1216
1217        /* we know that the encap is valid */
1218        e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1219        mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
1220}
1221
1222static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1223{
1224        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
1225                return flow->esw_attr->counter;
1226        else
1227                return flow->nic_attr->counter;
1228}
1229
1230void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
1231{
1232        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
1233        struct mlx5e_tc_flow *flow;
1234        struct mlx5e_encap_entry *e;
1235        struct mlx5_fc *counter;
1236        struct neigh_table *tbl;
1237        bool neigh_used = false;
1238        struct neighbour *n;
1239        u64 lastuse;
1240
1241        if (m_neigh->family == AF_INET)
1242                tbl = &arp_tbl;
1243#if IS_ENABLED(CONFIG_IPV6)
1244        else if (m_neigh->family == AF_INET6)
1245                tbl = &nd_tbl;
1246#endif
1247        else
1248                return;
1249
1250        list_for_each_entry(e, &nhe->encap_list, encap_list) {
1251                struct encap_flow_item *efi;
1252                if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
1253                        continue;
1254                list_for_each_entry(efi, &e->flows, list) {
1255                        flow = container_of(efi, struct mlx5e_tc_flow,
1256                                            encaps[efi->index]);
1257                        if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
1258                                counter = mlx5e_tc_get_counter(flow);
1259                                lastuse = mlx5_fc_query_lastuse(counter);
1260                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
1261                                        neigh_used = true;
1262                                        break;
1263                                }
1264                        }
1265                }
1266                if (neigh_used)
1267                        break;
1268        }
1269
1270        if (neigh_used) {
1271                nhe->reported_lastuse = jiffies;
1272
1273                /* find the relevant neigh according to the cached device and
1274                 * dst ip pair
1275                 */
1276                n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
1277                if (!n)
1278                        return;
1279
1280                neigh_event_send(n, NULL);
1281                neigh_release(n);
1282        }
1283}
1284
1285static void mlx5e_detach_encap(struct mlx5e_priv *priv,
1286                               struct mlx5e_tc_flow *flow, int out_index)
1287{
1288        struct list_head *next = flow->encaps[out_index].list.next;
1289
1290        list_del(&flow->encaps[out_index].list);
1291        if (list_empty(next)) {
1292                struct mlx5e_encap_entry *e;
1293
1294                e = list_entry(next, struct mlx5e_encap_entry, flows);
1295                mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1296
1297                if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1298                        mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
1299
1300                hash_del_rcu(&e->encap_hlist);
1301                kfree(e->encap_header);
1302                kfree(e);
1303        }
1304}
1305
1306static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1307{
1308        struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1309
1310        if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
1311            !(flow->flags & MLX5E_TC_FLOW_DUP))
1312                return;
1313
1314        mutex_lock(&esw->offloads.peer_mutex);
1315        list_del(&flow->peer);
1316        mutex_unlock(&esw->offloads.peer_mutex);
1317
1318        flow->flags &= ~MLX5E_TC_FLOW_DUP;
1319
1320        mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1321        kvfree(flow->peer_flow);
1322        flow->peer_flow = NULL;
1323}
1324
1325static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1326{
1327        struct mlx5_core_dev *dev = flow->priv->mdev;
1328        struct mlx5_devcom *devcom = dev->priv.devcom;
1329        struct mlx5_eswitch *peer_esw;
1330
1331        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1332        if (!peer_esw)
1333                return;
1334
1335        __mlx5e_tc_del_fdb_peer_flow(flow);
1336        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1337}
1338
1339static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1340                              struct mlx5e_tc_flow *flow)
1341{
1342        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1343                mlx5e_tc_del_fdb_peer_flow(flow);
1344                mlx5e_tc_del_fdb_flow(priv, flow);
1345        } else {
1346                mlx5e_tc_del_nic_flow(priv, flow);
1347        }
1348}
1349
1350
1351static int parse_tunnel_attr(struct mlx5e_priv *priv,
1352                             struct mlx5_flow_spec *spec,
1353                             struct flow_cls_offload *f,
1354                             struct net_device *filter_dev, u8 *match_level)
1355{
1356        struct netlink_ext_ack *extack = f->common.extack;
1357        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1358                                       outer_headers);
1359        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1360                                       outer_headers);
1361        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1362        int err;
1363
1364        err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
1365                                 headers_c, headers_v, match_level);
1366        if (err) {
1367                NL_SET_ERR_MSG_MOD(extack,
1368                                   "failed to parse tunnel attributes");
1369                return err;
1370        }
1371
1372        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
1373                struct flow_match_ipv4_addrs match;
1374
1375                flow_rule_match_enc_ipv4_addrs(rule, &match);
1376                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1377                         src_ipv4_src_ipv6.ipv4_layout.ipv4,
1378                         ntohl(match.mask->src));
1379                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1380                         src_ipv4_src_ipv6.ipv4_layout.ipv4,
1381                         ntohl(match.key->src));
1382
1383                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1384                         dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1385                         ntohl(match.mask->dst));
1386                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1387                         dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
1388                         ntohl(match.key->dst));
1389
1390                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1391                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
1392        } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
1393                struct flow_match_ipv6_addrs match;
1394
1395                flow_rule_match_enc_ipv6_addrs(rule, &match);
1396                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1397                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
1398                       &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1399                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1400                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
1401                       &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1402
1403                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1404                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1405                       &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1406                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1407                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1408                       &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
1409
1410                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
1411                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
1412        }
1413
1414        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
1415                struct flow_match_ip match;
1416
1417                flow_rule_match_enc_ip(rule, &match);
1418                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
1419                         match.mask->tos & 0x3);
1420                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
1421                         match.key->tos & 0x3);
1422
1423                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
1424                         match.mask->tos >> 2);
1425                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
1426                         match.key->tos  >> 2);
1427
1428                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
1429                         match.mask->ttl);
1430                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
1431                         match.key->ttl);
1432
1433                if (match.mask->ttl &&
1434                    !MLX5_CAP_ESW_FLOWTABLE_FDB
1435                        (priv->mdev,
1436                         ft_field_support.outer_ipv4_ttl)) {
1437                        NL_SET_ERR_MSG_MOD(extack,
1438                                           "Matching on TTL is not supported");
1439                        return -EOPNOTSUPP;
1440                }
1441
1442        }
1443
1444        /* Enforce DMAC when offloading incoming tunneled flows.
1445         * Flow counters require a match on the DMAC.
1446         */
1447        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
1448        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
1449        ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1450                                     dmac_47_16), priv->netdev->dev_addr);
1451
1452        /* let software handle IP fragments */
1453        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1454        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
1455
1456        return 0;
1457}
1458
1459static void *get_match_headers_criteria(u32 flags,
1460                                        struct mlx5_flow_spec *spec)
1461{
1462        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1463                MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1464                             inner_headers) :
1465                MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1466                             outer_headers);
1467}
1468
1469static void *get_match_headers_value(u32 flags,
1470                                     struct mlx5_flow_spec *spec)
1471{
1472        return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
1473                MLX5_ADDR_OF(fte_match_param, spec->match_value,
1474                             inner_headers) :
1475                MLX5_ADDR_OF(fte_match_param, spec->match_value,
1476                             outer_headers);
1477}
1478
1479static int __parse_cls_flower(struct mlx5e_priv *priv,
1480                              struct mlx5_flow_spec *spec,
1481                              struct flow_cls_offload *f,
1482                              struct net_device *filter_dev,
1483                              u8 *inner_match_level, u8 *outer_match_level)
1484{
1485        struct netlink_ext_ack *extack = f->common.extack;
1486        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1487                                       outer_headers);
1488        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1489                                       outer_headers);
1490        void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
1491                                    misc_parameters);
1492        void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
1493                                    misc_parameters);
1494        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1495        struct flow_dissector *dissector = rule->match.dissector;
1496        u16 addr_type = 0;
1497        u8 ip_proto = 0;
1498        u8 *match_level;
1499
1500        match_level = outer_match_level;
1501
1502        if (dissector->used_keys &
1503            ~(BIT(FLOW_DISSECTOR_KEY_META) |
1504              BIT(FLOW_DISSECTOR_KEY_CONTROL) |
1505              BIT(FLOW_DISSECTOR_KEY_BASIC) |
1506              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
1507              BIT(FLOW_DISSECTOR_KEY_VLAN) |
1508              BIT(FLOW_DISSECTOR_KEY_CVLAN) |
1509              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
1510              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
1511              BIT(FLOW_DISSECTOR_KEY_PORTS) |
1512              BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
1513              BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
1514              BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
1515              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
1516              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
1517              BIT(FLOW_DISSECTOR_KEY_TCP) |
1518              BIT(FLOW_DISSECTOR_KEY_IP)  |
1519              BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
1520              BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
1521                NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
1522                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
1523                            dissector->used_keys);
1524                return -EOPNOTSUPP;
1525        }
1526
1527        if (mlx5e_get_tc_tun(filter_dev)) {
1528                if (parse_tunnel_attr(priv, spec, f, filter_dev,
1529                                      outer_match_level))
1530                        return -EOPNOTSUPP;
1531
1532                /* At this point, header pointers should point to the inner
1533                 * headers, outer header were already set by parse_tunnel_attr
1534                 */
1535                match_level = inner_match_level;
1536                headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1537                                                       spec);
1538                headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP,
1539                                                    spec);
1540        }
1541
1542        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1543                struct flow_match_basic match;
1544
1545                flow_rule_match_basic(rule, &match);
1546                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
1547                         ntohs(match.mask->n_proto));
1548                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
1549                         ntohs(match.key->n_proto));
1550
1551                if (match.mask->n_proto)
1552                        *match_level = MLX5_MATCH_L2;
1553        }
1554        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
1555            is_vlan_dev(filter_dev)) {
1556                struct flow_dissector_key_vlan filter_dev_mask;
1557                struct flow_dissector_key_vlan filter_dev_key;
1558                struct flow_match_vlan match;
1559
1560                if (is_vlan_dev(filter_dev)) {
1561                        match.key = &filter_dev_key;
1562                        match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
1563                        match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
1564                        match.key->vlan_priority = 0;
1565                        match.mask = &filter_dev_mask;
1566                        memset(match.mask, 0xff, sizeof(*match.mask));
1567                        match.mask->vlan_priority = 0;
1568                } else {
1569                        flow_rule_match_vlan(rule, &match);
1570                }
1571                if (match.mask->vlan_id ||
1572                    match.mask->vlan_priority ||
1573                    match.mask->vlan_tpid) {
1574                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1575                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1576                                         svlan_tag, 1);
1577                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1578                                         svlan_tag, 1);
1579                        } else {
1580                                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1581                                         cvlan_tag, 1);
1582                                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1583                                         cvlan_tag, 1);
1584                        }
1585
1586                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
1587                                 match.mask->vlan_id);
1588                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
1589                                 match.key->vlan_id);
1590
1591                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
1592                                 match.mask->vlan_priority);
1593                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
1594                                 match.key->vlan_priority);
1595
1596                        *match_level = MLX5_MATCH_L2;
1597                }
1598        } else if (*match_level != MLX5_MATCH_NONE) {
1599                MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1);
1600                MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1601                *match_level = MLX5_MATCH_L2;
1602        }
1603
1604        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
1605                struct flow_match_vlan match;
1606
1607                flow_rule_match_cvlan(rule, &match);
1608                if (match.mask->vlan_id ||
1609                    match.mask->vlan_priority ||
1610                    match.mask->vlan_tpid) {
1611                        if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
1612                                MLX5_SET(fte_match_set_misc, misc_c,
1613                                         outer_second_svlan_tag, 1);
1614                                MLX5_SET(fte_match_set_misc, misc_v,
1615                                         outer_second_svlan_tag, 1);
1616                        } else {
1617                                MLX5_SET(fte_match_set_misc, misc_c,
1618                                         outer_second_cvlan_tag, 1);
1619                                MLX5_SET(fte_match_set_misc, misc_v,
1620                                         outer_second_cvlan_tag, 1);
1621                        }
1622
1623                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
1624                                 match.mask->vlan_id);
1625                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
1626                                 match.key->vlan_id);
1627                        MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
1628                                 match.mask->vlan_priority);
1629                        MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
1630                                 match.key->vlan_priority);
1631
1632                        *match_level = MLX5_MATCH_L2;
1633                }
1634        }
1635
1636        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
1637                struct flow_match_eth_addrs match;
1638
1639                flow_rule_match_eth_addrs(rule, &match);
1640                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1641                                             dmac_47_16),
1642                                match.mask->dst);
1643                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1644                                             dmac_47_16),
1645                                match.key->dst);
1646
1647                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1648                                             smac_47_16),
1649                                match.mask->src);
1650                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1651                                             smac_47_16),
1652                                match.key->src);
1653
1654                if (!is_zero_ether_addr(match.mask->src) ||
1655                    !is_zero_ether_addr(match.mask->dst))
1656                        *match_level = MLX5_MATCH_L2;
1657        }
1658
1659        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1660                struct flow_match_control match;
1661
1662                flow_rule_match_control(rule, &match);
1663                addr_type = match.key->addr_type;
1664
1665                /* the HW doesn't support frag first/later */
1666                if (match.mask->flags & FLOW_DIS_FIRST_FRAG)
1667                        return -EOPNOTSUPP;
1668
1669                if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
1670                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
1671                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
1672                                 match.key->flags & FLOW_DIS_IS_FRAGMENT);
1673
1674                        /* the HW doesn't need L3 inline to match on frag=no */
1675                        if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
1676                                *match_level = MLX5_MATCH_L2;
1677        /* ***  L2 attributes parsing up to here *** */
1678                        else
1679                                *match_level = MLX5_MATCH_L3;
1680                }
1681        }
1682
1683        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1684                struct flow_match_basic match;
1685
1686                flow_rule_match_basic(rule, &match);
1687                ip_proto = match.key->ip_proto;
1688
1689                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
1690                         match.mask->ip_proto);
1691                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
1692                         match.key->ip_proto);
1693
1694                if (match.mask->ip_proto)
1695                        *match_level = MLX5_MATCH_L3;
1696        }
1697
1698        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1699                struct flow_match_ipv4_addrs match;
1700
1701                flow_rule_match_ipv4_addrs(rule, &match);
1702                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1703                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
1704                       &match.mask->src, sizeof(match.mask->src));
1705                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1706                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
1707                       &match.key->src, sizeof(match.key->src));
1708                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1709                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1710                       &match.mask->dst, sizeof(match.mask->dst));
1711                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1712                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1713                       &match.key->dst, sizeof(match.key->dst));
1714
1715                if (match.mask->src || match.mask->dst)
1716                        *match_level = MLX5_MATCH_L3;
1717        }
1718
1719        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1720                struct flow_match_ipv6_addrs match;
1721
1722                flow_rule_match_ipv6_addrs(rule, &match);
1723                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1724                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
1725                       &match.mask->src, sizeof(match.mask->src));
1726                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1727                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
1728                       &match.key->src, sizeof(match.key->src));
1729
1730                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
1731                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1732                       &match.mask->dst, sizeof(match.mask->dst));
1733                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
1734                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
1735                       &match.key->dst, sizeof(match.key->dst));
1736
1737                if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
1738                    ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
1739                        *match_level = MLX5_MATCH_L3;
1740        }
1741
1742        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
1743                struct flow_match_ip match;
1744
1745                flow_rule_match_ip(rule, &match);
1746                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
1747                         match.mask->tos & 0x3);
1748                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
1749                         match.key->tos & 0x3);
1750
1751                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
1752                         match.mask->tos >> 2);
1753                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
1754                         match.key->tos  >> 2);
1755
1756                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
1757                         match.mask->ttl);
1758                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
1759                         match.key->ttl);
1760
1761                if (match.mask->ttl &&
1762                    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
1763                                                ft_field_support.outer_ipv4_ttl)) {
1764                        NL_SET_ERR_MSG_MOD(extack,
1765                                           "Matching on TTL is not supported");
1766                        return -EOPNOTSUPP;
1767                }
1768
1769                if (match.mask->tos || match.mask->ttl)
1770                        *match_level = MLX5_MATCH_L3;
1771        }
1772
1773        /* ***  L3 attributes parsing up to here *** */
1774
1775        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1776                struct flow_match_ports match;
1777
1778                flow_rule_match_ports(rule, &match);
1779                switch (ip_proto) {
1780                case IPPROTO_TCP:
1781                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1782                                 tcp_sport, ntohs(match.mask->src));
1783                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1784                                 tcp_sport, ntohs(match.key->src));
1785
1786                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1787                                 tcp_dport, ntohs(match.mask->dst));
1788                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1789                                 tcp_dport, ntohs(match.key->dst));
1790                        break;
1791
1792                case IPPROTO_UDP:
1793                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1794                                 udp_sport, ntohs(match.mask->src));
1795                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1796                                 udp_sport, ntohs(match.key->src));
1797
1798                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
1799                                 udp_dport, ntohs(match.mask->dst));
1800                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
1801                                 udp_dport, ntohs(match.key->dst));
1802                        break;
1803                default:
1804                        NL_SET_ERR_MSG_MOD(extack,
1805                                           "Only UDP and TCP transports are supported for L4 matching");
1806                        netdev_err(priv->netdev,
1807                                   "Only UDP and TCP transport are supported\n");
1808                        return -EINVAL;
1809                }
1810
1811                if (match.mask->src || match.mask->dst)
1812                        *match_level = MLX5_MATCH_L4;
1813        }
1814
1815        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
1816                struct flow_match_tcp match;
1817
1818                flow_rule_match_tcp(rule, &match);
1819                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
1820                         ntohs(match.mask->flags));
1821                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
1822                         ntohs(match.key->flags));
1823
1824                if (match.mask->flags)
1825                        *match_level = MLX5_MATCH_L4;
1826        }
1827
1828        return 0;
1829}
1830
1831static int parse_cls_flower(struct mlx5e_priv *priv,
1832                            struct mlx5e_tc_flow *flow,
1833                            struct mlx5_flow_spec *spec,
1834                            struct flow_cls_offload *f,
1835                            struct net_device *filter_dev)
1836{
1837        u8 inner_match_level, outer_match_level, non_tunnel_match_level;
1838        struct netlink_ext_ack *extack = f->common.extack;
1839        struct mlx5_core_dev *dev = priv->mdev;
1840        struct mlx5_eswitch *esw = dev->priv.eswitch;
1841        struct mlx5e_rep_priv *rpriv = priv->ppriv;
1842        struct mlx5_eswitch_rep *rep;
1843        int err;
1844
1845        inner_match_level = MLX5_MATCH_NONE;
1846        outer_match_level = MLX5_MATCH_NONE;
1847
1848        err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level,
1849                                 &outer_match_level);
1850        non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
1851                                 outer_match_level : inner_match_level;
1852
1853        if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
1854                rep = rpriv->rep;
1855                if (rep->vport != MLX5_VPORT_UPLINK &&
1856                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
1857                    esw->offloads.inline_mode < non_tunnel_match_level)) {
1858                        NL_SET_ERR_MSG_MOD(extack,
1859                                           "Flow is not offloaded due to min inline setting");
1860                        netdev_warn(priv->netdev,
1861                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
1862                                    non_tunnel_match_level, esw->offloads.inline_mode);
1863                        return -EOPNOTSUPP;
1864                }
1865        }
1866
1867        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1868                flow->esw_attr->inner_match_level = inner_match_level;
1869                flow->esw_attr->outer_match_level = outer_match_level;
1870        } else {
1871                flow->nic_attr->match_level = non_tunnel_match_level;
1872        }
1873
1874        return err;
1875}
1876
1877struct pedit_headers {
1878        struct ethhdr  eth;
1879        struct vlan_hdr vlan;
1880        struct iphdr   ip4;
1881        struct ipv6hdr ip6;
1882        struct tcphdr  tcp;
1883        struct udphdr  udp;
1884};
1885
1886struct pedit_headers_action {
1887        struct pedit_headers    vals;
1888        struct pedit_headers    masks;
1889        u32                     pedits;
1890};
1891
1892static int pedit_header_offsets[] = {
1893        [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
1894        [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
1895        [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
1896        [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
1897        [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
1898};
1899
1900#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
1901
1902static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
1903                         struct pedit_headers_action *hdrs)
1904{
1905        u32 *curr_pmask, *curr_pval;
1906
1907        curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset);
1908        curr_pval  = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
1909
1910        if (*curr_pmask & mask)  /* disallow acting twice on the same location */
1911                goto out_err;
1912
1913        *curr_pmask |= mask;
1914        *curr_pval  |= (val & mask);
1915
1916        return 0;
1917
1918out_err:
1919        return -EOPNOTSUPP;
1920}
1921
1922struct mlx5_fields {
1923        u8  field;
1924        u8  size;
1925        u32 offset;
1926        u32 match_offset;
1927};
1928
1929#define OFFLOAD(fw_field, size, field, off, match_field) \
1930                {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, \
1931                 offsetof(struct pedit_headers, field) + (off), \
1932                 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
1933
1934/* masked values are the same and there are no rewrites that do not have a
1935 * match.
1936 */
1937#define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
1938        type matchmaskx = *(type *)(matchmaskp); \
1939        type matchvalx = *(type *)(matchvalp); \
1940        type maskx = *(type *)(maskp); \
1941        type valx = *(type *)(valp); \
1942        \
1943        (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
1944                                                                 matchmaskx)); \
1945})
1946
1947static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
1948                         void *matchmaskp, int size)
1949{
1950        bool same = false;
1951
1952        switch (size) {
1953        case sizeof(u8):
1954                same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
1955                break;
1956        case sizeof(u16):
1957                same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
1958                break;
1959        case sizeof(u32):
1960                same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
1961                break;
1962        }
1963
1964        return same;
1965}
1966
1967static struct mlx5_fields fields[] = {
1968        OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0, dmac_47_16),
1969        OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0, dmac_15_0),
1970        OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0, smac_47_16),
1971        OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0, smac_15_0),
1972        OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0, ethertype),
1973        OFFLOAD(FIRST_VID,  2, vlan.h_vlan_TCI, 0, first_vid),
1974
1975        OFFLOAD(IP_TTL, 1, ip4.ttl,   0, ttl_hoplimit),
1976        OFFLOAD(SIPV4,  4, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
1977        OFFLOAD(DIPV4,  4, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
1978
1979        OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0,
1980                src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
1981        OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0,
1982                src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
1983        OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0,
1984                src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
1985        OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0,
1986                src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
1987        OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0,
1988                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
1989        OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0,
1990                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
1991        OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0,
1992                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
1993        OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0,
1994                dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
1995        OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0, ttl_hoplimit),
1996
1997        OFFLOAD(TCP_SPORT, 2, tcp.source,  0, tcp_sport),
1998        OFFLOAD(TCP_DPORT, 2, tcp.dest,    0, tcp_dport),
1999        OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5, tcp_flags),
2000
2001        OFFLOAD(UDP_SPORT, 2, udp.source, 0, udp_sport),
2002        OFFLOAD(UDP_DPORT, 2, udp.dest,   0, udp_dport),
2003};
2004
2005/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at
2006 * max from the SW pedit action. On success, attr->num_mod_hdr_actions
2007 * says how many HW actions were actually parsed.
2008 */
2009static int offload_pedit_fields(struct pedit_headers_action *hdrs,
2010                                struct mlx5e_tc_flow_parse_attr *parse_attr,
2011                                u32 *action_flags,
2012                                struct netlink_ext_ack *extack)
2013{
2014        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2015        void *headers_c = get_match_headers_criteria(*action_flags,
2016                                                     &parse_attr->spec);
2017        void *headers_v = get_match_headers_value(*action_flags,
2018                                                  &parse_attr->spec);
2019        int i, action_size, nactions, max_actions, first, last, next_z;
2020        void *s_masks_p, *a_masks_p, *vals_p;
2021        struct mlx5_fields *f;
2022        u8 cmd, field_bsize;
2023        u32 s_mask, a_mask;
2024        unsigned long mask;
2025        __be32 mask_be32;
2026        __be16 mask_be16;
2027        void *action;
2028
2029        set_masks = &hdrs[0].masks;
2030        add_masks = &hdrs[1].masks;
2031        set_vals = &hdrs[0].vals;
2032        add_vals = &hdrs[1].vals;
2033
2034        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2035        action = parse_attr->mod_hdr_actions +
2036                 parse_attr->num_mod_hdr_actions * action_size;
2037
2038        max_actions = parse_attr->max_mod_hdr_actions;
2039        nactions = parse_attr->num_mod_hdr_actions;
2040
2041        for (i = 0; i < ARRAY_SIZE(fields); i++) {
2042                bool skip;
2043
2044                f = &fields[i];
2045                /* avoid seeing bits set from previous iterations */
2046                s_mask = 0;
2047                a_mask = 0;
2048
2049                s_masks_p = (void *)set_masks + f->offset;
2050                a_masks_p = (void *)add_masks + f->offset;
2051
2052                memcpy(&s_mask, s_masks_p, f->size);
2053                memcpy(&a_mask, a_masks_p, f->size);
2054
2055                if (!s_mask && !a_mask) /* nothing to offload here */
2056                        continue;
2057
2058                if (s_mask && a_mask) {
2059                        NL_SET_ERR_MSG_MOD(extack,
2060                                           "can't set and add to the same HW field");
2061                        printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
2062                        return -EOPNOTSUPP;
2063                }
2064
2065                if (nactions == max_actions) {
2066                        NL_SET_ERR_MSG_MOD(extack,
2067                                           "too many pedit actions, can't offload");
2068                        printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
2069                        return -EOPNOTSUPP;
2070                }
2071
2072                skip = false;
2073                if (s_mask) {
2074                        void *match_mask = headers_c + f->match_offset;
2075                        void *match_val = headers_v + f->match_offset;
2076
2077                        cmd  = MLX5_ACTION_TYPE_SET;
2078                        mask = s_mask;
2079                        vals_p = (void *)set_vals + f->offset;
2080                        /* don't rewrite if we have a match on the same value */
2081                        if (cmp_val_mask(vals_p, s_masks_p, match_val,
2082                                         match_mask, f->size))
2083                                skip = true;
2084                        /* clear to denote we consumed this field */
2085                        memset(s_masks_p, 0, f->size);
2086                } else {
2087                        u32 zero = 0;
2088
2089                        cmd  = MLX5_ACTION_TYPE_ADD;
2090                        mask = a_mask;
2091                        vals_p = (void *)add_vals + f->offset;
2092                        /* add 0 is no change */
2093                        if (!memcmp(vals_p, &zero, f->size))
2094                                skip = true;
2095                        /* clear to denote we consumed this field */
2096                        memset(a_masks_p, 0, f->size);
2097                }
2098                if (skip)
2099                        continue;
2100
2101                field_bsize = f->size * BITS_PER_BYTE;
2102
2103                if (field_bsize == 32) {
2104                        mask_be32 = *(__be32 *)&mask;
2105                        mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2106                } else if (field_bsize == 16) {
2107                        mask_be16 = *(__be16 *)&mask;
2108                        mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2109                }
2110
2111                first = find_first_bit(&mask, field_bsize);
2112                next_z = find_next_zero_bit(&mask, field_bsize, first);
2113                last  = find_last_bit(&mask, field_bsize);
2114                if (first < next_z && next_z < last) {
2115                        NL_SET_ERR_MSG_MOD(extack,
2116                                           "rewrite of few sub-fields isn't supported");
2117                        printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
2118                               mask);
2119                        return -EOPNOTSUPP;
2120                }
2121
2122                MLX5_SET(set_action_in, action, action_type, cmd);
2123                MLX5_SET(set_action_in, action, field, f->field);
2124
2125                if (cmd == MLX5_ACTION_TYPE_SET) {
2126                        MLX5_SET(set_action_in, action, offset, first);
2127                        /* length is num of bits to be written, zero means length of 32 */
2128                        MLX5_SET(set_action_in, action, length, (last - first + 1));
2129                }
2130
2131                if (field_bsize == 32)
2132                        MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
2133                else if (field_bsize == 16)
2134                        MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
2135                else if (field_bsize == 8)
2136                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
2137
2138                action += action_size;
2139                nactions++;
2140        }
2141
2142        parse_attr->num_mod_hdr_actions = nactions;
2143        return 0;
2144}
2145
2146static int mlx5e_flow_namespace_max_modify_action(struct mlx5_core_dev *mdev,
2147                                                  int namespace)
2148{
2149        if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
2150                return MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, max_modify_header_actions);
2151        else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
2152                return MLX5_CAP_FLOWTABLE_NIC_RX(mdev, max_modify_header_actions);
2153}
2154
2155static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
2156                                 struct pedit_headers_action *hdrs,
2157                                 int namespace,
2158                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
2159{
2160        int nkeys, action_size, max_actions;
2161
2162        nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits +
2163                hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits;
2164        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
2165
2166        max_actions = mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace);
2167        /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
2168        max_actions = min(max_actions, nkeys * 16);
2169
2170        parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
2171        if (!parse_attr->mod_hdr_actions)
2172                return -ENOMEM;
2173
2174        parse_attr->max_mod_hdr_actions = max_actions;
2175        return 0;
2176}
2177
2178static const struct pedit_headers zero_masks = {};
2179
2180static int parse_tc_pedit_action(struct mlx5e_priv *priv,
2181                                 const struct flow_action_entry *act, int namespace,
2182                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2183                                 struct pedit_headers_action *hdrs,
2184                                 struct netlink_ext_ack *extack)
2185{
2186        u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
2187        int err = -EOPNOTSUPP;
2188        u32 mask, val, offset;
2189        u8 htype;
2190
2191        htype = act->mangle.htype;
2192        err = -EOPNOTSUPP; /* can't be all optimistic */
2193
2194        if (htype == FLOW_ACT_MANGLE_UNSPEC) {
2195                NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded");
2196                goto out_err;
2197        }
2198
2199        if (!mlx5e_flow_namespace_max_modify_action(priv->mdev, namespace)) {
2200                NL_SET_ERR_MSG_MOD(extack,
2201                                   "The pedit offload action is not supported");
2202                goto out_err;
2203        }
2204
2205        mask = act->mangle.mask;
2206        val = act->mangle.val;
2207        offset = act->mangle.offset;
2208
2209        err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]);
2210        if (err)
2211                goto out_err;
2212
2213        hdrs[cmd].pedits++;
2214
2215        return 0;
2216out_err:
2217        return err;
2218}
2219
2220static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
2221                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2222                                 struct pedit_headers_action *hdrs,
2223                                 u32 *action_flags,
2224                                 struct netlink_ext_ack *extack)
2225{
2226        struct pedit_headers *cmd_masks;
2227        int err;
2228        u8 cmd;
2229
2230        if (!parse_attr->mod_hdr_actions) {
2231                err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr);
2232                if (err)
2233                        goto out_err;
2234        }
2235
2236        err = offload_pedit_fields(hdrs, parse_attr, action_flags, extack);
2237        if (err < 0)
2238                goto out_dealloc_parsed_actions;
2239
2240        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
2241                cmd_masks = &hdrs[cmd].masks;
2242                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
2243                        NL_SET_ERR_MSG_MOD(extack,
2244                                           "attempt to offload an unsupported field");
2245                        netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
2246                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
2247                                       16, 1, cmd_masks, sizeof(zero_masks), true);
2248                        err = -EOPNOTSUPP;
2249                        goto out_dealloc_parsed_actions;
2250                }
2251        }
2252
2253        return 0;
2254
2255out_dealloc_parsed_actions:
2256        kfree(parse_attr->mod_hdr_actions);
2257out_err:
2258        return err;
2259}
2260
2261static bool csum_offload_supported(struct mlx5e_priv *priv,
2262                                   u32 action,
2263                                   u32 update_flags,
2264                                   struct netlink_ext_ack *extack)
2265{
2266        u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
2267                         TCA_CSUM_UPDATE_FLAG_UDP;
2268
2269        /*  The HW recalcs checksums only if re-writing headers */
2270        if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
2271                NL_SET_ERR_MSG_MOD(extack,
2272                                   "TC csum action is only offloaded with pedit");
2273                netdev_warn(priv->netdev,
2274                            "TC csum action is only offloaded with pedit\n");
2275                return false;
2276        }
2277
2278        if (update_flags & ~prot_flags) {
2279                NL_SET_ERR_MSG_MOD(extack,
2280                                   "can't offload TC csum action for some header/s");
2281                netdev_warn(priv->netdev,
2282                            "can't offload TC csum action for some header/s - flags %#x\n",
2283                            update_flags);
2284                return false;
2285        }
2286
2287        return true;
2288}
2289
2290struct ip_ttl_word {
2291        __u8    ttl;
2292        __u8    protocol;
2293        __sum16 check;
2294};
2295
2296struct ipv6_hoplimit_word {
2297        __be16  payload_len;
2298        __u8    nexthdr;
2299        __u8    hop_limit;
2300};
2301
2302static bool is_action_keys_supported(const struct flow_action_entry *act)
2303{
2304        u32 mask, offset;
2305        u8 htype;
2306
2307        htype = act->mangle.htype;
2308        offset = act->mangle.offset;
2309        mask = ~act->mangle.mask;
2310        /* For IPv4 & IPv6 header check 4 byte word,
2311         * to determine that modified fields
2312         * are NOT ttl & hop_limit only.
2313         */
2314        if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
2315                struct ip_ttl_word *ttl_word =
2316                        (struct ip_ttl_word *)&mask;
2317
2318                if (offset != offsetof(struct iphdr, ttl) ||
2319                    ttl_word->protocol ||
2320                    ttl_word->check) {
2321                        return true;
2322                }
2323        } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
2324                struct ipv6_hoplimit_word *hoplimit_word =
2325                        (struct ipv6_hoplimit_word *)&mask;
2326
2327                if (offset != offsetof(struct ipv6hdr, payload_len) ||
2328                    hoplimit_word->payload_len ||
2329                    hoplimit_word->nexthdr) {
2330                        return true;
2331                }
2332        }
2333        return false;
2334}
2335
2336static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
2337                                          struct flow_action *flow_action,
2338                                          u32 actions,
2339                                          struct netlink_ext_ack *extack)
2340{
2341        const struct flow_action_entry *act;
2342        bool modify_ip_header;
2343        void *headers_v;
2344        u16 ethertype;
2345        u8 ip_proto;
2346        int i;
2347
2348        headers_v = get_match_headers_value(actions, spec);
2349        ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2350
2351        /* for non-IP we only re-write MACs, so we're okay */
2352        if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
2353                goto out_ok;
2354
2355        modify_ip_header = false;
2356        flow_action_for_each(i, act, flow_action) {
2357                if (act->id != FLOW_ACTION_MANGLE &&
2358                    act->id != FLOW_ACTION_ADD)
2359                        continue;
2360
2361                if (is_action_keys_supported(act)) {
2362                        modify_ip_header = true;
2363                        break;
2364                }
2365        }
2366
2367        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
2368        if (modify_ip_header && ip_proto != IPPROTO_TCP &&
2369            ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
2370                NL_SET_ERR_MSG_MOD(extack,
2371                                   "can't offload re-write of non TCP/UDP");
2372                pr_info("can't offload re-write of ip proto %d\n", ip_proto);
2373                return false;
2374        }
2375
2376out_ok:
2377        return true;
2378}
2379
2380static bool actions_match_supported(struct mlx5e_priv *priv,
2381                                    struct flow_action *flow_action,
2382                                    struct mlx5e_tc_flow_parse_attr *parse_attr,
2383                                    struct mlx5e_tc_flow *flow,
2384                                    struct netlink_ext_ack *extack)
2385{
2386        u32 actions;
2387
2388        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
2389                actions = flow->esw_attr->action;
2390        else
2391                actions = flow->nic_attr->action;
2392
2393        if (flow->flags & MLX5E_TC_FLOW_EGRESS &&
2394            !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) ||
2395              (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)))
2396                return false;
2397
2398        if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2399                return modify_header_match_supported(&parse_attr->spec,
2400                                                     flow_action, actions,
2401                                                     extack);
2402
2403        return true;
2404}
2405
2406static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
2407{
2408        struct mlx5_core_dev *fmdev, *pmdev;
2409        u64 fsystem_guid, psystem_guid;
2410
2411        fmdev = priv->mdev;
2412        pmdev = peer_priv->mdev;
2413
2414        fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
2415        psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
2416
2417        return (fsystem_guid == psystem_guid);
2418}
2419
2420static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
2421                                   const struct flow_action_entry *act,
2422                                   struct mlx5e_tc_flow_parse_attr *parse_attr,
2423                                   struct pedit_headers_action *hdrs,
2424                                   u32 *action, struct netlink_ext_ack *extack)
2425{
2426        u16 mask16 = VLAN_VID_MASK;
2427        u16 val16 = act->vlan.vid & VLAN_VID_MASK;
2428        const struct flow_action_entry pedit_act = {
2429                .id = FLOW_ACTION_MANGLE,
2430                .mangle.htype = FLOW_ACT_MANGLE_HDR_TYPE_ETH,
2431                .mangle.offset = offsetof(struct vlan_ethhdr, h_vlan_TCI),
2432                .mangle.mask = ~(u32)be16_to_cpu(*(__be16 *)&mask16),
2433                .mangle.val = (u32)be16_to_cpu(*(__be16 *)&val16),
2434        };
2435        u8 match_prio_mask, match_prio_val;
2436        void *headers_c, *headers_v;
2437        int err;
2438
2439        headers_c = get_match_headers_criteria(*action, &parse_attr->spec);
2440        headers_v = get_match_headers_value(*action, &parse_attr->spec);
2441
2442        if (!(MLX5_GET(fte_match_set_lyr_2_4, headers_c, cvlan_tag) &&
2443              MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag))) {
2444                NL_SET_ERR_MSG_MOD(extack,
2445                                   "VLAN rewrite action must have VLAN protocol match");
2446                return -EOPNOTSUPP;
2447        }
2448
2449        match_prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
2450        match_prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
2451        if (act->vlan.prio != (match_prio_val & match_prio_mask)) {
2452                NL_SET_ERR_MSG_MOD(extack,
2453                                   "Changing VLAN prio is not supported");
2454                return -EOPNOTSUPP;
2455        }
2456
2457        err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr,
2458                                    hdrs, NULL);
2459        *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2460
2461        return err;
2462}
2463
2464static int
2465add_vlan_prio_tag_rewrite_action(struct mlx5e_priv *priv,
2466                                 struct mlx5e_tc_flow_parse_attr *parse_attr,
2467                                 struct pedit_headers_action *hdrs,
2468                                 u32 *action, struct netlink_ext_ack *extack)
2469{
2470        const struct flow_action_entry prio_tag_act = {
2471                .vlan.vid = 0,
2472                .vlan.prio =
2473                        MLX5_GET(fte_match_set_lyr_2_4,
2474                                 get_match_headers_value(*action,
2475                                                         &parse_attr->spec),
2476                                 first_prio) &
2477                        MLX5_GET(fte_match_set_lyr_2_4,
2478                                 get_match_headers_criteria(*action,
2479                                                            &parse_attr->spec),
2480                                 first_prio),
2481        };
2482
2483        return add_vlan_rewrite_action(priv, MLX5_FLOW_NAMESPACE_FDB,
2484                                       &prio_tag_act, parse_attr, hdrs, action,
2485                                       extack);
2486}
2487
2488static int parse_tc_nic_actions(struct mlx5e_priv *priv,
2489                                struct flow_action *flow_action,
2490                                struct mlx5e_tc_flow_parse_attr *parse_attr,
2491                                struct mlx5e_tc_flow *flow,
2492                                struct netlink_ext_ack *extack)
2493{
2494        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
2495        struct pedit_headers_action hdrs[2] = {};
2496        const struct flow_action_entry *act;
2497        u32 action = 0;
2498        int err, i;
2499
2500        if (!flow_action_has_entries(flow_action))
2501                return -EINVAL;
2502
2503        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
2504
2505        flow_action_for_each(i, act, flow_action) {
2506                switch (act->id) {
2507                case FLOW_ACTION_DROP:
2508                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
2509                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
2510                                               flow_table_properties_nic_receive.flow_counter))
2511                                action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
2512                        break;
2513                case FLOW_ACTION_MANGLE:
2514                case FLOW_ACTION_ADD:
2515                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
2516                                                    parse_attr, hdrs, extack);
2517                        if (err)
2518                                return err;
2519
2520                        action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
2521                                  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2522                        break;
2523                case FLOW_ACTION_VLAN_MANGLE:
2524                        err = add_vlan_rewrite_action(priv,
2525                                                      MLX5_FLOW_NAMESPACE_KERNEL,
2526                                                      act, parse_attr, hdrs,
2527                                                      &action, extack);
2528                        if (err)
2529                                return err;
2530
2531                        break;
2532                case FLOW_ACTION_CSUM:
2533                        if (csum_offload_supported(priv, action,
2534                                                   act->csum_flags,
2535                                                   extack))
2536                                break;
2537
2538                        return -EOPNOTSUPP;
2539                case FLOW_ACTION_REDIRECT: {
2540                        struct net_device *peer_dev = act->dev;
2541
2542                        if (priv->netdev->netdev_ops == peer_dev->netdev_ops &&
2543                            same_hw_devs(priv, netdev_priv(peer_dev))) {
2544                                parse_attr->mirred_ifindex[0] = peer_dev->ifindex;
2545                                flow->flags |= MLX5E_TC_FLOW_HAIRPIN;
2546                                action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2547                                          MLX5_FLOW_CONTEXT_ACTION_COUNT;
2548                        } else {
2549                                NL_SET_ERR_MSG_MOD(extack,
2550                                                   "device is not on same HW, can't offload");
2551                                netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
2552                                            peer_dev->name);
2553                                return -EINVAL;
2554                        }
2555                        }
2556                        break;
2557                case FLOW_ACTION_MARK: {
2558                        u32 mark = act->mark;
2559
2560                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
2561                                NL_SET_ERR_MSG_MOD(extack,
2562                                                   "Bad flow mark - only 16 bit is supported");
2563                                return -EINVAL;
2564                        }
2565
2566                        attr->flow_tag = mark;
2567                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
2568                        }
2569                        break;
2570                default:
2571                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
2572                        return -EOPNOTSUPP;
2573                }
2574        }
2575
2576        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
2577            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
2578                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL,
2579                                            parse_attr, hdrs, &action, extack);
2580                if (err)
2581                        return err;
2582                /* in case all pedit actions are skipped, remove the MOD_HDR
2583                 * flag.
2584                 */
2585                if (parse_attr->num_mod_hdr_actions == 0) {
2586                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2587                        kfree(parse_attr->mod_hdr_actions);
2588                }
2589        }
2590
2591        attr->action = action;
2592        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
2593                return -EOPNOTSUPP;
2594
2595        return 0;
2596}
2597
2598struct encap_key {
2599        const struct ip_tunnel_key *ip_tun_key;
2600        struct mlx5e_tc_tunnel *tc_tunnel;
2601};
2602
2603static inline int cmp_encap_info(struct encap_key *a,
2604                                 struct encap_key *b)
2605{
2606        return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
2607               a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
2608}
2609
2610static inline int hash_encap_info(struct encap_key *key)
2611{
2612        return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
2613                     key->tc_tunnel->tunnel_type);
2614}
2615
2616
2617static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
2618                                  struct net_device *peer_netdev)
2619{
2620        struct mlx5e_priv *peer_priv;
2621
2622        peer_priv = netdev_priv(peer_netdev);
2623
2624        return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
2625                mlx5e_eswitch_rep(priv->netdev) &&
2626                mlx5e_eswitch_rep(peer_netdev) &&
2627                same_hw_devs(priv, peer_priv));
2628}
2629
2630
2631
2632static int mlx5e_attach_encap(struct mlx5e_priv *priv,
2633                              struct mlx5e_tc_flow *flow,
2634                              struct net_device *mirred_dev,
2635                              int out_index,
2636                              struct netlink_ext_ack *extack,
2637                              struct net_device **encap_dev,
2638                              bool *encap_valid)
2639{
2640        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2641        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2642        struct mlx5e_tc_flow_parse_attr *parse_attr;
2643        const struct ip_tunnel_info *tun_info;
2644        struct encap_key key, e_key;
2645        struct mlx5e_encap_entry *e;
2646        unsigned short family;
2647        uintptr_t hash_key;
2648        bool found = false;
2649        int err = 0;
2650
2651        parse_attr = attr->parse_attr;
2652        tun_info = parse_attr->tun_info[out_index];
2653        family = ip_tunnel_info_af(tun_info);
2654        key.ip_tun_key = &tun_info->key;
2655        key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
2656        if (!key.tc_tunnel) {
2657                NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
2658                return -EOPNOTSUPP;
2659        }
2660
2661        hash_key = hash_encap_info(&key);
2662
2663        hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
2664                                   encap_hlist, hash_key) {
2665                e_key.ip_tun_key = &e->tun_info->key;
2666                e_key.tc_tunnel = e->tunnel;
2667                if (!cmp_encap_info(&e_key, &key)) {
2668                        found = true;
2669                        break;
2670                }
2671        }
2672
2673        /* must verify if encap is valid or not */
2674        if (found)
2675                goto attach_flow;
2676
2677        e = kzalloc(sizeof(*e), GFP_KERNEL);
2678        if (!e)
2679                return -ENOMEM;
2680
2681        e->tun_info = tun_info;
2682        err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
2683        if (err)
2684                goto out_err;
2685
2686        INIT_LIST_HEAD(&e->flows);
2687
2688        if (family == AF_INET)
2689                err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
2690        else if (family == AF_INET6)
2691                err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
2692
2693        if (err)
2694                goto out_err;
2695
2696        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
2697
2698attach_flow:
2699        list_add(&flow->encaps[out_index].list, &e->flows);
2700        flow->encaps[out_index].index = out_index;
2701        *encap_dev = e->out_dev;
2702        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
2703                attr->dests[out_index].encap_id = e->encap_id;
2704                attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
2705                *encap_valid = true;
2706        } else {
2707                *encap_valid = false;
2708        }
2709
2710        return err;
2711
2712out_err:
2713        kfree(e);
2714        return err;
2715}
2716
2717static int parse_tc_vlan_action(struct mlx5e_priv *priv,
2718                                const struct flow_action_entry *act,
2719                                struct mlx5_esw_flow_attr *attr,
2720                                u32 *action)
2721{
2722        u8 vlan_idx = attr->total_vlan;
2723
2724        if (vlan_idx >= MLX5_FS_VLAN_DEPTH)
2725                return -EOPNOTSUPP;
2726
2727        switch (act->id) {
2728        case FLOW_ACTION_VLAN_POP:
2729                if (vlan_idx) {
2730                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
2731                                                                 MLX5_FS_VLAN_DEPTH))
2732                                return -EOPNOTSUPP;
2733
2734                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2;
2735                } else {
2736                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
2737                }
2738                break;
2739        case FLOW_ACTION_VLAN_PUSH:
2740                attr->vlan_vid[vlan_idx] = act->vlan.vid;
2741                attr->vlan_prio[vlan_idx] = act->vlan.prio;
2742                attr->vlan_proto[vlan_idx] = act->vlan.proto;
2743                if (!attr->vlan_proto[vlan_idx])
2744                        attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
2745
2746                if (vlan_idx) {
2747                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev,
2748                                                                 MLX5_FS_VLAN_DEPTH))
2749                                return -EOPNOTSUPP;
2750
2751                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2;
2752                } else {
2753                        if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) &&
2754                            (act->vlan.proto != htons(ETH_P_8021Q) ||
2755                             act->vlan.prio))
2756                                return -EOPNOTSUPP;
2757
2758                        *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
2759                }
2760                break;
2761        default:
2762                return -EINVAL;
2763        }
2764
2765        attr->total_vlan = vlan_idx + 1;
2766
2767        return 0;
2768}
2769
2770static int add_vlan_push_action(struct mlx5e_priv *priv,
2771                                struct mlx5_esw_flow_attr *attr,
2772                                struct net_device **out_dev,
2773                                u32 *action)
2774{
2775        struct net_device *vlan_dev = *out_dev;
2776        struct flow_action_entry vlan_act = {
2777                .id = FLOW_ACTION_VLAN_PUSH,
2778                .vlan.vid = vlan_dev_vlan_id(vlan_dev),
2779                .vlan.proto = vlan_dev_vlan_proto(vlan_dev),
2780                .vlan.prio = 0,
2781        };
2782        int err;
2783
2784        err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
2785        if (err)
2786                return err;
2787
2788        *out_dev = dev_get_by_index_rcu(dev_net(vlan_dev),
2789                                        dev_get_iflink(vlan_dev));
2790        if (is_vlan_dev(*out_dev))
2791                err = add_vlan_push_action(priv, attr, out_dev, action);
2792
2793        return err;
2794}
2795
2796static int add_vlan_pop_action(struct mlx5e_priv *priv,
2797                               struct mlx5_esw_flow_attr *attr,
2798                               u32 *action)
2799{
2800        int nest_level = vlan_get_encap_level(attr->parse_attr->filter_dev);
2801        struct flow_action_entry vlan_act = {
2802                .id = FLOW_ACTION_VLAN_POP,
2803        };
2804        int err = 0;
2805
2806        while (nest_level--) {
2807                err = parse_tc_vlan_action(priv, &vlan_act, attr, action);
2808                if (err)
2809                        return err;
2810        }
2811
2812        return err;
2813}
2814
2815bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
2816                                    struct net_device *out_dev)
2817{
2818        if (is_merged_eswitch_dev(priv, out_dev))
2819                return true;
2820
2821        return mlx5e_eswitch_rep(out_dev) &&
2822               same_hw_devs(priv, netdev_priv(out_dev));
2823}
2824
2825static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
2826                                struct flow_action *flow_action,
2827                                struct mlx5e_tc_flow *flow,
2828                                struct netlink_ext_ack *extack)
2829{
2830        struct pedit_headers_action hdrs[2] = {};
2831        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2832        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
2833        struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
2834        struct mlx5e_rep_priv *rpriv = priv->ppriv;
2835        const struct ip_tunnel_info *info = NULL;
2836        const struct flow_action_entry *act;
2837        bool encap = false;
2838        u32 action = 0;
2839        int err, i;
2840
2841        if (!flow_action_has_entries(flow_action))
2842                return -EINVAL;
2843
2844        flow_action_for_each(i, act, flow_action) {
2845                switch (act->id) {
2846                case FLOW_ACTION_DROP:
2847                        action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
2848                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
2849                        break;
2850                case FLOW_ACTION_MANGLE:
2851                case FLOW_ACTION_ADD:
2852                        err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
2853                                                    parse_attr, hdrs, extack);
2854                        if (err)
2855                                return err;
2856
2857                        action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2858                        attr->split_count = attr->out_count;
2859                        break;
2860                case FLOW_ACTION_CSUM:
2861                        if (csum_offload_supported(priv, action,
2862                                                   act->csum_flags, extack))
2863                                break;
2864
2865                        return -EOPNOTSUPP;
2866                case FLOW_ACTION_REDIRECT:
2867                case FLOW_ACTION_MIRRED: {
2868                        struct mlx5e_priv *out_priv;
2869                        struct net_device *out_dev;
2870
2871                        out_dev = act->dev;
2872                        if (!out_dev) {
2873                                /* out_dev is NULL when filters with
2874                                 * non-existing mirred device are replayed to
2875                                 * the driver.
2876                                 */
2877                                return -EINVAL;
2878                        }
2879
2880                        if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
2881                                NL_SET_ERR_MSG_MOD(extack,
2882                                                   "can't support more output ports, can't offload forwarding");
2883                                pr_err("can't support more than %d output ports, can't offload forwarding\n",
2884                                       attr->out_count);
2885                                return -EOPNOTSUPP;
2886                        }
2887
2888                        action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
2889                                  MLX5_FLOW_CONTEXT_ACTION_COUNT;
2890                        if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
2891                                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2892                                struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
2893                                struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev);
2894
2895                                if (uplink_upper &&
2896                                    netif_is_lag_master(uplink_upper) &&
2897                                    uplink_upper == out_dev)
2898                                        out_dev = uplink_dev;
2899
2900                                if (is_vlan_dev(out_dev)) {
2901                                        err = add_vlan_push_action(priv, attr,
2902                                                                   &out_dev,
2903                                                                   &action);
2904                                        if (err)
2905                                                return err;
2906                                }
2907
2908                                if (is_vlan_dev(parse_attr->filter_dev)) {
2909                                        err = add_vlan_pop_action(priv, attr,
2910                                                                  &action);
2911                                        if (err)
2912                                                return err;
2913                                }
2914
2915                                if (!mlx5e_is_valid_eswitch_fwd_dev(priv, out_dev)) {
2916                                        NL_SET_ERR_MSG_MOD(extack,
2917                                                           "devices are not on same switch HW, can't offload forwarding");
2918                                        pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
2919                                               priv->netdev->name, out_dev->name);
2920                                        return -EOPNOTSUPP;
2921                                }
2922
2923                                out_priv = netdev_priv(out_dev);
2924                                rpriv = out_priv->ppriv;
2925                                attr->dests[attr->out_count].rep = rpriv->rep;
2926                                attr->dests[attr->out_count].mdev = out_priv->mdev;
2927                                attr->out_count++;
2928                        } else if (encap) {
2929                                parse_attr->mirred_ifindex[attr->out_count] =
2930                                        out_dev->ifindex;
2931                                parse_attr->tun_info[attr->out_count] = info;
2932                                encap = false;
2933                                attr->dests[attr->out_count].flags |=
2934                                        MLX5_ESW_DEST_ENCAP;
2935                                attr->out_count++;
2936                                /* attr->dests[].rep is resolved when we
2937                                 * handle encap
2938                                 */
2939                        } else if (parse_attr->filter_dev != priv->netdev) {
2940                                /* All mlx5 devices are called to configure
2941                                 * high level device filters. Therefore, the
2942                                 * *attempt* to  install a filter on invalid
2943                                 * eswitch should not trigger an explicit error
2944                                 */
2945                                return -EINVAL;
2946                        } else {
2947                                NL_SET_ERR_MSG_MOD(extack,
2948                                                   "devices are not on same switch HW, can't offload forwarding");
2949                                pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
2950                                       priv->netdev->name, out_dev->name);
2951                                return -EINVAL;
2952                        }
2953                        }
2954                        break;
2955                case FLOW_ACTION_TUNNEL_ENCAP:
2956                        info = act->tunnel;
2957                        if (info)
2958                                encap = true;
2959                        else
2960                                return -EOPNOTSUPP;
2961
2962                        break;
2963                case FLOW_ACTION_VLAN_PUSH:
2964                case FLOW_ACTION_VLAN_POP:
2965                        if (act->id == FLOW_ACTION_VLAN_PUSH &&
2966                            (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP)) {
2967                                /* Replace vlan pop+push with vlan modify */
2968                                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
2969                                err = add_vlan_rewrite_action(priv,
2970                                                              MLX5_FLOW_NAMESPACE_FDB,
2971                                                              act, parse_attr, hdrs,
2972                                                              &action, extack);
2973                        } else {
2974                                err = parse_tc_vlan_action(priv, act, attr, &action);
2975                        }
2976                        if (err)
2977                                return err;
2978
2979                        attr->split_count = attr->out_count;
2980                        break;
2981                case FLOW_ACTION_VLAN_MANGLE:
2982                        err = add_vlan_rewrite_action(priv,
2983                                                      MLX5_FLOW_NAMESPACE_FDB,
2984                                                      act, parse_attr, hdrs,
2985                                                      &action, extack);
2986                        if (err)
2987                                return err;
2988
2989                        attr->split_count = attr->out_count;
2990                        break;
2991                case FLOW_ACTION_TUNNEL_DECAP:
2992                        action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2993                        break;
2994                case FLOW_ACTION_GOTO: {
2995                        u32 dest_chain = act->chain_index;
2996                        u32 max_chain = mlx5_eswitch_get_chain_range(esw);
2997
2998                        if (dest_chain <= attr->chain) {
2999                                NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
3000                                return -EOPNOTSUPP;
3001                        }
3002                        if (dest_chain > max_chain) {
3003                                NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
3004                                return -EOPNOTSUPP;
3005                        }
3006                        action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3007                        attr->dest_chain = dest_chain;
3008                        break;
3009                        }
3010                default:
3011                        NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
3012                        return -EOPNOTSUPP;
3013                }
3014        }
3015
3016        if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
3017            action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
3018                /* For prio tag mode, replace vlan pop with rewrite vlan prio
3019                 * tag rewrite.
3020                 */
3021                action &= ~MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
3022                err = add_vlan_prio_tag_rewrite_action(priv, parse_attr, hdrs,
3023                                                       &action, extack);
3024                if (err)
3025                        return err;
3026        }
3027
3028        if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits ||
3029            hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) {
3030                err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_FDB,
3031                                            parse_attr, hdrs, &action, extack);
3032                if (err)
3033                        return err;
3034                /* in case all pedit actions are skipped, remove the MOD_HDR
3035                 * flag. we might have set split_count either by pedit or
3036                 * pop/push. if there is no pop/push either, reset it too.
3037                 */
3038                if (parse_attr->num_mod_hdr_actions == 0) {
3039                        action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3040                        kfree(parse_attr->mod_hdr_actions);
3041                        if (!((action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3042                              (action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3043                                attr->split_count = 0;
3044                }
3045        }
3046
3047        attr->action = action;
3048        if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack))
3049                return -EOPNOTSUPP;
3050
3051        if (attr->dest_chain) {
3052                if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
3053                        NL_SET_ERR_MSG(extack, "Mirroring goto chain rules isn't supported");
3054                        return -EOPNOTSUPP;
3055                }
3056                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3057        }
3058
3059        if (attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3060                NL_SET_ERR_MSG_MOD(extack,
3061                                   "current firmware doesn't support split rule for port mirroring");
3062                netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
3063                return -EOPNOTSUPP;
3064        }
3065
3066        return 0;
3067}
3068
3069static void get_flags(int flags, u16 *flow_flags)
3070{
3071        u16 __flow_flags = 0;
3072
3073        if (flags & MLX5E_TC_INGRESS)
3074                __flow_flags |= MLX5E_TC_FLOW_INGRESS;
3075        if (flags & MLX5E_TC_EGRESS)
3076                __flow_flags |= MLX5E_TC_FLOW_EGRESS;
3077
3078        if (flags & MLX5E_TC_ESW_OFFLOAD)
3079                __flow_flags |= MLX5E_TC_FLOW_ESWITCH;
3080        if (flags & MLX5E_TC_NIC_OFFLOAD)
3081                __flow_flags |= MLX5E_TC_FLOW_NIC;
3082
3083        *flow_flags = __flow_flags;
3084}
3085
3086static const struct rhashtable_params tc_ht_params = {
3087        .head_offset = offsetof(struct mlx5e_tc_flow, node),
3088        .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3089        .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3090        .automatic_shrinking = true,
3091};
3092
3093static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags)
3094{
3095        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3096        struct mlx5e_rep_priv *uplink_rpriv;
3097
3098        if (flags & MLX5E_TC_ESW_OFFLOAD) {
3099                uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
3100                return &uplink_rpriv->uplink_priv.tc_ht;
3101        } else /* NIC offload */
3102                return &priv->fs.tc.ht;
3103}
3104
3105static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
3106{
3107        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
3108        bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK &&
3109                              flow->flags & MLX5E_TC_FLOW_INGRESS;
3110        bool act_is_encap = !!(attr->action &
3111                               MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
3112        bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom,
3113                                                MLX5_DEVCOM_ESW_OFFLOADS);
3114
3115        if (!esw_paired)
3116                return false;
3117
3118        if ((mlx5_lag_is_sriov(attr->in_mdev) ||
3119             mlx5_lag_is_multipath(attr->in_mdev)) &&
3120            (is_rep_ingress || act_is_encap))
3121                return true;
3122
3123        return false;
3124}
3125
3126static int
3127mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
3128                 struct flow_cls_offload *f, u16 flow_flags,
3129                 struct mlx5e_tc_flow_parse_attr **__parse_attr,
3130                 struct mlx5e_tc_flow **__flow)
3131{
3132        struct mlx5e_tc_flow_parse_attr *parse_attr;
3133        struct mlx5e_tc_flow *flow;
3134        int err;
3135
3136        flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
3137        parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3138        if (!parse_attr || !flow) {
3139                err = -ENOMEM;
3140                goto err_free;
3141        }
3142
3143        flow->cookie = f->cookie;
3144        flow->flags = flow_flags;
3145        flow->priv = priv;
3146
3147        *__flow = flow;
3148        *__parse_attr = parse_attr;
3149
3150        return 0;
3151
3152err_free:
3153        kfree(flow);
3154        kvfree(parse_attr);
3155        return err;
3156}
3157
3158static void
3159mlx5e_flow_esw_attr_init(struct mlx5_esw_flow_attr *esw_attr,
3160                         struct mlx5e_priv *priv,
3161                         struct mlx5e_tc_flow_parse_attr *parse_attr,
3162                         struct flow_cls_offload *f,
3163                         struct mlx5_eswitch_rep *in_rep,
3164                         struct mlx5_core_dev *in_mdev)
3165{
3166        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3167
3168        esw_attr->parse_attr = parse_attr;
3169        esw_attr->chain = f->common.chain_index;
3170        esw_attr->prio = f->common.prio;
3171
3172        esw_attr->in_rep = in_rep;
3173        esw_attr->in_mdev = in_mdev;
3174
3175        if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
3176            MLX5_COUNTER_SOURCE_ESWITCH)
3177                esw_attr->counter_dev = in_mdev;
3178        else
3179                esw_attr->counter_dev = priv->mdev;
3180}
3181
3182static struct mlx5e_tc_flow *
3183__mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3184                     struct flow_cls_offload *f,
3185                     u16 flow_flags,
3186                     struct net_device *filter_dev,
3187                     struct mlx5_eswitch_rep *in_rep,
3188                     struct mlx5_core_dev *in_mdev)
3189{
3190        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3191        struct netlink_ext_ack *extack = f->common.extack;
3192        struct mlx5e_tc_flow_parse_attr *parse_attr;
3193        struct mlx5e_tc_flow *flow;
3194        int attr_size, err;
3195
3196        flow_flags |= MLX5E_TC_FLOW_ESWITCH;
3197        attr_size  = sizeof(struct mlx5_esw_flow_attr);
3198        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3199                               &parse_attr, &flow);
3200        if (err)
3201                goto out;
3202
3203        parse_attr->filter_dev = filter_dev;
3204        mlx5e_flow_esw_attr_init(flow->esw_attr,
3205                                 priv, parse_attr,
3206                                 f, in_rep, in_mdev);
3207
3208        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3209                               f, filter_dev);
3210        if (err)
3211                goto err_free;
3212
3213        err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
3214        if (err)
3215                goto err_free;
3216
3217        err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
3218        if (err) {
3219                if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
3220                        goto err_free;
3221
3222                add_unready_flow(flow);
3223        }
3224
3225        return flow;
3226
3227err_free:
3228        kfree(flow);
3229        kvfree(parse_attr);
3230out:
3231        return ERR_PTR(err);
3232}
3233
3234static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
3235                                      struct mlx5e_tc_flow *flow,
3236                                      u16 flow_flags)
3237{
3238        struct mlx5e_priv *priv = flow->priv, *peer_priv;
3239        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
3240        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
3241        struct mlx5e_tc_flow_parse_attr *parse_attr;
3242        struct mlx5e_rep_priv *peer_urpriv;
3243        struct mlx5e_tc_flow *peer_flow;
3244        struct mlx5_core_dev *in_mdev;
3245        int err = 0;
3246
3247        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3248        if (!peer_esw)
3249                return -ENODEV;
3250
3251        peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
3252        peer_priv = netdev_priv(peer_urpriv->netdev);
3253
3254        /* in_mdev is assigned of which the packet originated from.
3255         * So packets redirected to uplink use the same mdev of the
3256         * original flow and packets redirected from uplink use the
3257         * peer mdev.
3258         */
3259        if (flow->esw_attr->in_rep->vport == MLX5_VPORT_UPLINK)
3260                in_mdev = peer_priv->mdev;
3261        else
3262                in_mdev = priv->mdev;
3263
3264        parse_attr = flow->esw_attr->parse_attr;
3265        peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
3266                                         parse_attr->filter_dev,
3267                                         flow->esw_attr->in_rep, in_mdev);
3268        if (IS_ERR(peer_flow)) {
3269                err = PTR_ERR(peer_flow);
3270                goto out;
3271        }
3272
3273        flow->peer_flow = peer_flow;
3274        flow->flags |= MLX5E_TC_FLOW_DUP;
3275        mutex_lock(&esw->offloads.peer_mutex);
3276        list_add_tail(&flow->peer, &esw->offloads.peer_flows);
3277        mutex_unlock(&esw->offloads.peer_mutex);
3278
3279out:
3280        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3281        return err;
3282}
3283
3284static int
3285mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
3286                   struct flow_cls_offload *f,
3287                   u16 flow_flags,
3288                   struct net_device *filter_dev,
3289                   struct mlx5e_tc_flow **__flow)
3290{
3291        struct mlx5e_rep_priv *rpriv = priv->ppriv;
3292        struct mlx5_eswitch_rep *in_rep = rpriv->rep;
3293        struct mlx5_core_dev *in_mdev = priv->mdev;
3294        struct mlx5e_tc_flow *flow;
3295        int err;
3296
3297        flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
3298                                    in_mdev);
3299        if (IS_ERR(flow))
3300                return PTR_ERR(flow);
3301
3302        if (is_peer_flow_needed(flow)) {
3303                err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
3304                if (err) {
3305                        mlx5e_tc_del_fdb_flow(priv, flow);
3306                        goto out;
3307                }
3308        }
3309
3310        *__flow = flow;
3311
3312        return 0;
3313
3314out:
3315        return err;
3316}
3317
3318static int
3319mlx5e_add_nic_flow(struct mlx5e_priv *priv,
3320                   struct flow_cls_offload *f,
3321                   u16 flow_flags,
3322                   struct net_device *filter_dev,
3323                   struct mlx5e_tc_flow **__flow)
3324{
3325        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
3326        struct netlink_ext_ack *extack = f->common.extack;
3327        struct mlx5e_tc_flow_parse_attr *parse_attr;
3328        struct mlx5e_tc_flow *flow;
3329        int attr_size, err;
3330
3331        /* multi-chain not supported for NIC rules */
3332        if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
3333                return -EOPNOTSUPP;
3334
3335        flow_flags |= MLX5E_TC_FLOW_NIC;
3336        attr_size  = sizeof(struct mlx5_nic_flow_attr);
3337        err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
3338                               &parse_attr, &flow);
3339        if (err)
3340                goto out;
3341
3342        parse_attr->filter_dev = filter_dev;
3343        err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
3344                               f, filter_dev);
3345        if (err)
3346                goto err_free;
3347
3348        err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack);
3349        if (err)
3350                goto err_free;
3351
3352        err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
3353        if (err)
3354                goto err_free;
3355
3356        flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
3357        kvfree(parse_attr);
3358        *__flow = flow;
3359
3360        return 0;
3361
3362err_free:
3363        kfree(flow);
3364        kvfree(parse_attr);
3365out:
3366        return err;
3367}
3368
3369static int
3370mlx5e_tc_add_flow(struct mlx5e_priv *priv,
3371                  struct flow_cls_offload *f,
3372                  int flags,
3373                  struct net_device *filter_dev,
3374                  struct mlx5e_tc_flow **flow)
3375{
3376        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
3377        u16 flow_flags;
3378        int err;
3379
3380        get_flags(flags, &flow_flags);
3381
3382        if (!tc_can_offload_extack(priv->netdev, f->common.extack))
3383                return -EOPNOTSUPP;
3384
3385        if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
3386                err = mlx5e_add_fdb_flow(priv, f, flow_flags,
3387                                         filter_dev, flow);
3388        else
3389                err = mlx5e_add_nic_flow(priv, f, flow_flags,
3390                                         filter_dev, flow);
3391
3392        return err;
3393}
3394
3395int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
3396                           struct flow_cls_offload *f, int flags)
3397{
3398        struct netlink_ext_ack *extack = f->common.extack;
3399        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3400        struct mlx5e_tc_flow *flow;
3401        int err = 0;
3402
3403        flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
3404        if (flow) {
3405                NL_SET_ERR_MSG_MOD(extack,
3406                                   "flow cookie already exists, ignoring");
3407                netdev_warn_once(priv->netdev,
3408                                 "flow cookie %lx already exists, ignoring\n",
3409                                 f->cookie);
3410                err = -EEXIST;
3411                goto out;
3412        }
3413
3414        err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
3415        if (err)
3416                goto out;
3417
3418        err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
3419        if (err)
3420                goto err_free;
3421
3422        return 0;
3423
3424err_free:
3425        mlx5e_tc_del_flow(priv, flow);
3426        kfree(flow);
3427out:
3428        return err;
3429}
3430
3431#define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS)
3432#define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS)
3433
3434static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
3435{
3436        if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK))
3437                return true;
3438
3439        return false;
3440}
3441
3442int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
3443                        struct flow_cls_offload *f, int flags)
3444{
3445        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3446        struct mlx5e_tc_flow *flow;
3447
3448        flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
3449        if (!flow || !same_flow_direction(flow, flags))
3450                return -EINVAL;
3451
3452        rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
3453
3454        mlx5e_tc_del_flow(priv, flow);
3455
3456        kfree(flow);
3457
3458        return 0;
3459}
3460
3461int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
3462                       struct flow_cls_offload *f, int flags)
3463{
3464        struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
3465        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3466        struct mlx5_eswitch *peer_esw;
3467        struct mlx5e_tc_flow *flow;
3468        struct mlx5_fc *counter;
3469        u64 lastuse = 0;
3470        u64 packets = 0;
3471        u64 bytes = 0;
3472
3473        flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
3474        if (!flow || !same_flow_direction(flow, flags))
3475                return -EINVAL;
3476
3477        if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
3478                counter = mlx5e_tc_get_counter(flow);
3479                if (!counter)
3480                        return 0;
3481
3482                mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
3483        }
3484
3485        /* Under multipath it's possible for one rule to be currently
3486         * un-offloaded while the other rule is offloaded.
3487         */
3488        peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3489        if (!peer_esw)
3490                goto out;
3491
3492        if ((flow->flags & MLX5E_TC_FLOW_DUP) &&
3493            (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) {
3494                u64 bytes2;
3495                u64 packets2;
3496                u64 lastuse2;
3497
3498                counter = mlx5e_tc_get_counter(flow->peer_flow);
3499                if (!counter)
3500                        goto no_peer_counter;
3501                mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
3502
3503                bytes += bytes2;
3504                packets += packets2;
3505                lastuse = max_t(u64, lastuse, lastuse2);
3506        }
3507
3508no_peer_counter:
3509        mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
3510out:
3511        flow_stats_update(&f->stats, bytes, packets, lastuse);
3512
3513        return 0;
3514}
3515
3516static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
3517                                              struct mlx5e_priv *peer_priv)
3518{
3519        struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
3520        struct mlx5e_hairpin_entry *hpe;
3521        u16 peer_vhca_id;
3522        int bkt;
3523
3524        if (!same_hw_devs(priv, peer_priv))
3525                return;
3526
3527        peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
3528
3529        hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
3530                if (hpe->peer_vhca_id == peer_vhca_id)
3531                        hpe->hp->pair->peer_gone = true;
3532        }
3533}
3534
3535static int mlx5e_tc_netdev_event(struct notifier_block *this,
3536                                 unsigned long event, void *ptr)
3537{
3538        struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
3539        struct mlx5e_flow_steering *fs;
3540        struct mlx5e_priv *peer_priv;
3541        struct mlx5e_tc_table *tc;
3542        struct mlx5e_priv *priv;
3543
3544        if (ndev->netdev_ops != &mlx5e_netdev_ops ||
3545            event != NETDEV_UNREGISTER ||
3546            ndev->reg_state == NETREG_REGISTERED)
3547                return NOTIFY_DONE;
3548
3549        tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
3550        fs = container_of(tc, struct mlx5e_flow_steering, tc);
3551        priv = container_of(fs, struct mlx5e_priv, fs);
3552        peer_priv = netdev_priv(ndev);
3553        if (priv == peer_priv ||
3554            !(priv->netdev->features & NETIF_F_HW_TC))
3555                return NOTIFY_DONE;
3556
3557        mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
3558
3559        return NOTIFY_DONE;
3560}
3561
3562int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
3563{
3564        struct mlx5e_tc_table *tc = &priv->fs.tc;
3565        int err;
3566
3567        hash_init(tc->mod_hdr_tbl);
3568        hash_init(tc->hairpin_tbl);
3569
3570        err = rhashtable_init(&tc->ht, &tc_ht_params);
3571        if (err)
3572                return err;
3573
3574        tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
3575        if (register_netdevice_notifier(&tc->netdevice_nb)) {
3576                tc->netdevice_nb.notifier_call = NULL;
3577                mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
3578        }
3579
3580        return err;
3581}
3582
3583static void _mlx5e_tc_del_flow(void *ptr, void *arg)
3584{
3585        struct mlx5e_tc_flow *flow = ptr;
3586        struct mlx5e_priv *priv = flow->priv;
3587
3588        mlx5e_tc_del_flow(priv, flow);
3589        kfree(flow);
3590}
3591
3592void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
3593{
3594        struct mlx5e_tc_table *tc = &priv->fs.tc;
3595
3596        if (tc->netdevice_nb.notifier_call)
3597                unregister_netdevice_notifier(&tc->netdevice_nb);
3598
3599        rhashtable_destroy(&tc->ht);
3600
3601        if (!IS_ERR_OR_NULL(tc->t)) {
3602                mlx5_destroy_flow_table(tc->t);
3603                tc->t = NULL;
3604        }
3605}
3606
3607int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
3608{
3609        return rhashtable_init(tc_ht, &tc_ht_params);
3610}
3611
3612void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
3613{
3614        rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
3615}
3616
3617int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags)
3618{
3619        struct rhashtable *tc_ht = get_tc_ht(priv, flags);
3620
3621        return atomic_read(&tc_ht->nelems);
3622}
3623
3624void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
3625{
3626        struct mlx5e_tc_flow *flow, *tmp;
3627
3628        list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
3629                __mlx5e_tc_del_fdb_peer_flow(flow);
3630}
3631
3632void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
3633{
3634        struct mlx5_rep_uplink_priv *rpriv =
3635                container_of(work, struct mlx5_rep_uplink_priv,
3636                             reoffload_flows_work);
3637        struct mlx5e_tc_flow *flow, *tmp;
3638
3639        rtnl_lock();
3640        list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
3641                if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
3642                        remove_unready_flow(flow);
3643        }
3644        rtnl_unlock();
3645}
3646