linux/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
<<
>>
Prefs
   1/*
   2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
   3 *
   4 * This software is available to you under a choice of one of two
   5 * licenses.  You may choose to be licensed under the terms of the GNU
   6 * General Public License (GPL) Version 2, available from the file
   7 * COPYING in the main directory of this source tree, or the
   8 * OpenIB.org BSD license below:
   9 *
  10 *     Redistribution and use in source and binary forms, with or
  11 *     without modification, are permitted provided that the following
  12 *     conditions are met:
  13 *
  14 *      - Redistributions of source code must retain the above
  15 *        copyright notice, this list of conditions and the following
  16 *        disclaimer.
  17 *
  18 *      - Redistributions in binary form must reproduce the above
  19 *        copyright notice, this list of conditions and the following
  20 *        disclaimer in the documentation and/or other materials
  21 *        provided with the distribution.
  22 *
  23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30 * SOFTWARE.
  31 */
  32
  33#include <net/flow_dissector.h>
  34#include <net/sch_generic.h>
  35#include <net/pkt_cls.h>
  36#include <net/tc_act/tc_gact.h>
  37#include <net/tc_act/tc_skbedit.h>
  38#include <linux/mlx5/fs.h>
  39#include <linux/mlx5/device.h>
  40#include <linux/rhashtable.h>
  41#include <net/switchdev.h>
  42#include <net/tc_act/tc_mirred.h>
  43#include <net/tc_act/tc_vlan.h>
  44#include <net/tc_act/tc_tunnel_key.h>
  45#include <net/tc_act/tc_pedit.h>
  46#include <net/tc_act/tc_csum.h>
  47#include <net/vxlan.h>
  48#include <net/arp.h>
  49#include "en.h"
  50#include "en_rep.h"
  51#include "en_tc.h"
  52#include "eswitch.h"
  53#include "vxlan.h"
  54
  55struct mlx5_nic_flow_attr {
  56        u32 action;
  57        u32 flow_tag;
  58        u32 mod_hdr_id;
  59};
  60
  61enum {
  62        MLX5E_TC_FLOW_ESWITCH   = BIT(0),
  63        MLX5E_TC_FLOW_NIC       = BIT(1),
  64        MLX5E_TC_FLOW_OFFLOADED = BIT(2),
  65};
  66
  67struct mlx5e_tc_flow {
  68        struct rhash_head       node;
  69        u64                     cookie;
  70        u8                      flags;
  71        struct mlx5_flow_handle *rule;
  72        struct list_head        encap;   /* flows sharing the same encap ID */
  73        struct list_head        mod_hdr; /* flows sharing the same mod hdr ID */
  74        union {
  75                struct mlx5_esw_flow_attr esw_attr[0];
  76                struct mlx5_nic_flow_attr nic_attr[0];
  77        };
  78};
  79
  80struct mlx5e_tc_flow_parse_attr {
  81        struct mlx5_flow_spec spec;
  82        int num_mod_hdr_actions;
  83        void *mod_hdr_actions;
  84};
  85
  86enum {
  87        MLX5_HEADER_TYPE_VXLAN = 0x0,
  88        MLX5_HEADER_TYPE_NVGRE = 0x1,
  89};
  90
  91#define MLX5E_TC_TABLE_NUM_ENTRIES 1024
  92#define MLX5E_TC_TABLE_NUM_GROUPS 4
  93
  94struct mod_hdr_key {
  95        int num_actions;
  96        void *actions;
  97};
  98
  99struct mlx5e_mod_hdr_entry {
 100        /* a node of a hash table which keeps all the mod_hdr entries */
 101        struct hlist_node mod_hdr_hlist;
 102
 103        /* flows sharing the same mod_hdr entry */
 104        struct list_head flows;
 105
 106        struct mod_hdr_key key;
 107
 108        u32 mod_hdr_id;
 109};
 110
 111#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
 112
 113static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key)
 114{
 115        return jhash(key->actions,
 116                     key->num_actions * MLX5_MH_ACT_SZ, 0);
 117}
 118
 119static inline int cmp_mod_hdr_info(struct mod_hdr_key *a,
 120                                   struct mod_hdr_key *b)
 121{
 122        if (a->num_actions != b->num_actions)
 123                return 1;
 124
 125        return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ);
 126}
 127
 128static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
 129                                struct mlx5e_tc_flow *flow,
 130                                struct mlx5e_tc_flow_parse_attr *parse_attr)
 131{
 132        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 133        int num_actions, actions_size, namespace, err;
 134        struct mlx5e_mod_hdr_entry *mh;
 135        struct mod_hdr_key key;
 136        bool found = false;
 137        u32 hash_key;
 138
 139        num_actions  = parse_attr->num_mod_hdr_actions;
 140        actions_size = MLX5_MH_ACT_SZ * num_actions;
 141
 142        key.actions = parse_attr->mod_hdr_actions;
 143        key.num_actions = num_actions;
 144
 145        hash_key = hash_mod_hdr_info(&key);
 146
 147        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 148                namespace = MLX5_FLOW_NAMESPACE_FDB;
 149                hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh,
 150                                       mod_hdr_hlist, hash_key) {
 151                        if (!cmp_mod_hdr_info(&mh->key, &key)) {
 152                                found = true;
 153                                break;
 154                        }
 155                }
 156        } else {
 157                namespace = MLX5_FLOW_NAMESPACE_KERNEL;
 158                hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh,
 159                                       mod_hdr_hlist, hash_key) {
 160                        if (!cmp_mod_hdr_info(&mh->key, &key)) {
 161                                found = true;
 162                                break;
 163                        }
 164                }
 165        }
 166
 167        if (found)
 168                goto attach_flow;
 169
 170        mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL);
 171        if (!mh)
 172                return -ENOMEM;
 173
 174        mh->key.actions = (void *)mh + sizeof(*mh);
 175        memcpy(mh->key.actions, key.actions, actions_size);
 176        mh->key.num_actions = num_actions;
 177        INIT_LIST_HEAD(&mh->flows);
 178
 179        err = mlx5_modify_header_alloc(priv->mdev, namespace,
 180                                       mh->key.num_actions,
 181                                       mh->key.actions,
 182                                       &mh->mod_hdr_id);
 183        if (err)
 184                goto out_err;
 185
 186        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 187                hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 188        else
 189                hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key);
 190
 191attach_flow:
 192        list_add(&flow->mod_hdr, &mh->flows);
 193        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 194                flow->esw_attr->mod_hdr_id = mh->mod_hdr_id;
 195        else
 196                flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
 197
 198        return 0;
 199
 200out_err:
 201        kfree(mh);
 202        return err;
 203}
 204
 205static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
 206                                 struct mlx5e_tc_flow *flow)
 207{
 208        struct list_head *next = flow->mod_hdr.next;
 209
 210        list_del(&flow->mod_hdr);
 211
 212        if (list_empty(next)) {
 213                struct mlx5e_mod_hdr_entry *mh;
 214
 215                mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows);
 216
 217                mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id);
 218                hash_del(&mh->mod_hdr_hlist);
 219                kfree(mh);
 220        }
 221}
 222
 223static struct mlx5_flow_handle *
 224mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 225                      struct mlx5e_tc_flow_parse_attr *parse_attr,
 226                      struct mlx5e_tc_flow *flow)
 227{
 228        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 229        struct mlx5_core_dev *dev = priv->mdev;
 230        struct mlx5_flow_destination dest = {};
 231        struct mlx5_flow_act flow_act = {
 232                .action = attr->action,
 233                .flow_tag = attr->flow_tag,
 234                .encap_id = 0,
 235        };
 236        struct mlx5_fc *counter = NULL;
 237        struct mlx5_flow_handle *rule;
 238        bool table_created = false;
 239        int err;
 240
 241        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
 242                dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 243                dest.ft = priv->fs.vlan.ft.t;
 244        } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 245                counter = mlx5_fc_create(dev, true);
 246                if (IS_ERR(counter))
 247                        return ERR_CAST(counter);
 248
 249                dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 250                dest.counter = counter;
 251        }
 252
 253        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 254                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 255                flow_act.modify_id = attr->mod_hdr_id;
 256                kfree(parse_attr->mod_hdr_actions);
 257                if (err) {
 258                        rule = ERR_PTR(err);
 259                        goto err_create_mod_hdr_id;
 260                }
 261        }
 262
 263        if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
 264                priv->fs.tc.t =
 265                        mlx5_create_auto_grouped_flow_table(priv->fs.ns,
 266                                                            MLX5E_TC_PRIO,
 267                                                            MLX5E_TC_TABLE_NUM_ENTRIES,
 268                                                            MLX5E_TC_TABLE_NUM_GROUPS,
 269                                                            0, 0);
 270                if (IS_ERR(priv->fs.tc.t)) {
 271                        netdev_err(priv->netdev,
 272                                   "Failed to create tc offload table\n");
 273                        rule = ERR_CAST(priv->fs.tc.t);
 274                        goto err_create_ft;
 275                }
 276
 277                table_created = true;
 278        }
 279
 280        parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
 281        rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
 282                                   &flow_act, &dest, 1);
 283
 284        if (IS_ERR(rule))
 285                goto err_add_rule;
 286
 287        return rule;
 288
 289err_add_rule:
 290        if (table_created) {
 291                mlx5_destroy_flow_table(priv->fs.tc.t);
 292                priv->fs.tc.t = NULL;
 293        }
 294err_create_ft:
 295        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 296                mlx5e_detach_mod_hdr(priv, flow);
 297err_create_mod_hdr_id:
 298        mlx5_fc_destroy(dev, counter);
 299
 300        return rule;
 301}
 302
 303static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 304                                  struct mlx5e_tc_flow *flow)
 305{
 306        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 307        struct mlx5_fc *counter = NULL;
 308
 309        counter = mlx5_flow_rule_counter(flow->rule);
 310        mlx5_del_flow_rules(flow->rule);
 311        mlx5_fc_destroy(priv->mdev, counter);
 312
 313        if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) {
 314                mlx5_destroy_flow_table(priv->fs.tc.t);
 315                priv->fs.tc.t = NULL;
 316        }
 317
 318        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 319                mlx5e_detach_mod_hdr(priv, flow);
 320}
 321
 322static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 323                               struct mlx5e_tc_flow *flow);
 324
 325static struct mlx5_flow_handle *
 326mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 327                      struct mlx5e_tc_flow_parse_attr *parse_attr,
 328                      struct mlx5e_tc_flow *flow)
 329{
 330        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 331        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 332        struct mlx5_flow_handle *rule;
 333        int err;
 334
 335        err = mlx5_eswitch_add_vlan_action(esw, attr);
 336        if (err) {
 337                rule = ERR_PTR(err);
 338                goto err_add_vlan;
 339        }
 340
 341        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
 342                err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
 343                kfree(parse_attr->mod_hdr_actions);
 344                if (err) {
 345                        rule = ERR_PTR(err);
 346                        goto err_mod_hdr;
 347                }
 348        }
 349
 350        rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
 351        if (IS_ERR(rule))
 352                goto err_add_rule;
 353
 354        return rule;
 355
 356err_add_rule:
 357        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 358                mlx5e_detach_mod_hdr(priv, flow);
 359err_mod_hdr:
 360        mlx5_eswitch_del_vlan_action(esw, attr);
 361err_add_vlan:
 362        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 363                mlx5e_detach_encap(priv, flow);
 364        return rule;
 365}
 366
 367static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 368                                  struct mlx5e_tc_flow *flow)
 369{
 370        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 371        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 372
 373        if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 374                flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
 375                mlx5_eswitch_del_offloaded_rule(esw, flow->rule, attr);
 376        }
 377
 378        mlx5_eswitch_del_vlan_action(esw, attr);
 379
 380        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
 381                mlx5e_detach_encap(priv, flow);
 382                kvfree(attr->parse_attr);
 383        }
 384
 385        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 386                mlx5e_detach_mod_hdr(priv, flow);
 387}
 388
 389void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 390                              struct mlx5e_encap_entry *e)
 391{
 392        struct mlx5e_tc_flow *flow;
 393        int err;
 394
 395        err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
 396                               e->encap_size, e->encap_header,
 397                               &e->encap_id);
 398        if (err) {
 399                mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
 400                               err);
 401                return;
 402        }
 403        e->flags |= MLX5_ENCAP_ENTRY_VALID;
 404        mlx5e_rep_queue_neigh_stats_work(priv);
 405
 406        list_for_each_entry(flow, &e->flows, encap) {
 407                flow->esw_attr->encap_id = e->encap_id;
 408                flow->rule = mlx5e_tc_add_fdb_flow(priv,
 409                                                   flow->esw_attr->parse_attr,
 410                                                   flow);
 411                if (IS_ERR(flow->rule)) {
 412                        err = PTR_ERR(flow->rule);
 413                        mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
 414                                       err);
 415                        continue;
 416                }
 417                flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 418        }
 419}
 420
 421void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 422                              struct mlx5e_encap_entry *e)
 423{
 424        struct mlx5e_tc_flow *flow;
 425        struct mlx5_fc *counter;
 426
 427        list_for_each_entry(flow, &e->flows, encap) {
 428                if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 429                        flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
 430                        counter = mlx5_flow_rule_counter(flow->rule);
 431                        mlx5_del_flow_rules(flow->rule);
 432                        mlx5_fc_destroy(priv->mdev, counter);
 433                }
 434        }
 435
 436        if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
 437                e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
 438                mlx5_encap_dealloc(priv->mdev, e->encap_id);
 439        }
 440}
 441
 442void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 443{
 444        struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
 445        u64 bytes, packets, lastuse = 0;
 446        struct mlx5e_tc_flow *flow;
 447        struct mlx5e_encap_entry *e;
 448        struct mlx5_fc *counter;
 449        struct neigh_table *tbl;
 450        bool neigh_used = false;
 451        struct neighbour *n;
 452
 453        if (m_neigh->family == AF_INET)
 454                tbl = &arp_tbl;
 455#if IS_ENABLED(CONFIG_IPV6)
 456        else if (m_neigh->family == AF_INET6)
 457                tbl = ipv6_stub->nd_tbl;
 458#endif
 459        else
 460                return;
 461
 462        list_for_each_entry(e, &nhe->encap_list, encap_list) {
 463                if (!(e->flags & MLX5_ENCAP_ENTRY_VALID))
 464                        continue;
 465                list_for_each_entry(flow, &e->flows, encap) {
 466                        if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 467                                counter = mlx5_flow_rule_counter(flow->rule);
 468                                mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 469                                if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 470                                        neigh_used = true;
 471                                        break;
 472                                }
 473                        }
 474                }
 475        }
 476
 477        if (neigh_used) {
 478                nhe->reported_lastuse = jiffies;
 479
 480                /* find the relevant neigh according to the cached device and
 481                 * dst ip pair
 482                 */
 483                n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev);
 484                if (!n) {
 485                        WARN(1, "The neighbour already freed\n");
 486                        return;
 487                }
 488
 489                neigh_event_send(n, NULL);
 490                neigh_release(n);
 491        }
 492}
 493
 494static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 495                               struct mlx5e_tc_flow *flow)
 496{
 497        struct list_head *next = flow->encap.next;
 498
 499        list_del(&flow->encap);
 500        if (list_empty(next)) {
 501                struct mlx5e_encap_entry *e;
 502
 503                e = list_entry(next, struct mlx5e_encap_entry, flows);
 504                mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 505
 506                if (e->flags & MLX5_ENCAP_ENTRY_VALID)
 507                        mlx5_encap_dealloc(priv->mdev, e->encap_id);
 508
 509                hash_del_rcu(&e->encap_hlist);
 510                kfree(e->encap_header);
 511                kfree(e);
 512        }
 513}
 514
 515static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 516                              struct mlx5e_tc_flow *flow)
 517{
 518        if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
 519                mlx5e_tc_del_fdb_flow(priv, flow);
 520        else
 521                mlx5e_tc_del_nic_flow(priv, flow);
 522}
 523
 524static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
 525                             struct tc_cls_flower_offload *f)
 526{
 527        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 528                                       outer_headers);
 529        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 530                                       outer_headers);
 531        void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 532                                    misc_parameters);
 533        void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 534                                    misc_parameters);
 535
 536        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
 537        MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
 538
 539        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
 540                struct flow_dissector_key_keyid *key =
 541                        skb_flow_dissector_target(f->dissector,
 542                                                  FLOW_DISSECTOR_KEY_ENC_KEYID,
 543                                                  f->key);
 544                struct flow_dissector_key_keyid *mask =
 545                        skb_flow_dissector_target(f->dissector,
 546                                                  FLOW_DISSECTOR_KEY_ENC_KEYID,
 547                                                  f->mask);
 548                MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
 549                         be32_to_cpu(mask->keyid));
 550                MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
 551                         be32_to_cpu(key->keyid));
 552        }
 553}
 554
 555static int parse_tunnel_attr(struct mlx5e_priv *priv,
 556                             struct mlx5_flow_spec *spec,
 557                             struct tc_cls_flower_offload *f)
 558{
 559        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 560                                       outer_headers);
 561        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 562                                       outer_headers);
 563
 564        struct flow_dissector_key_control *enc_control =
 565                skb_flow_dissector_target(f->dissector,
 566                                          FLOW_DISSECTOR_KEY_ENC_CONTROL,
 567                                          f->key);
 568
 569        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 570                struct flow_dissector_key_ports *key =
 571                        skb_flow_dissector_target(f->dissector,
 572                                                  FLOW_DISSECTOR_KEY_ENC_PORTS,
 573                                                  f->key);
 574                struct flow_dissector_key_ports *mask =
 575                        skb_flow_dissector_target(f->dissector,
 576                                                  FLOW_DISSECTOR_KEY_ENC_PORTS,
 577                                                  f->mask);
 578                struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 579                struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
 580                struct mlx5e_priv *up_priv = netdev_priv(up_dev);
 581
 582                /* Full udp dst port must be given */
 583                if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
 584                        goto vxlan_match_offload_err;
 585
 586                if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->dst)) &&
 587                    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
 588                        parse_vxlan_attr(spec, f);
 589                else {
 590                        netdev_warn(priv->netdev,
 591                                    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
 592                        return -EOPNOTSUPP;
 593                }
 594
 595                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 596                         udp_dport, ntohs(mask->dst));
 597                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 598                         udp_dport, ntohs(key->dst));
 599
 600                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 601                         udp_sport, ntohs(mask->src));
 602                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 603                         udp_sport, ntohs(key->src));
 604        } else { /* udp dst port must be given */
 605vxlan_match_offload_err:
 606                netdev_warn(priv->netdev,
 607                            "IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
 608                return -EOPNOTSUPP;
 609        }
 610
 611        if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 612                struct flow_dissector_key_ipv4_addrs *key =
 613                        skb_flow_dissector_target(f->dissector,
 614                                                  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
 615                                                  f->key);
 616                struct flow_dissector_key_ipv4_addrs *mask =
 617                        skb_flow_dissector_target(f->dissector,
 618                                                  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
 619                                                  f->mask);
 620                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 621                         src_ipv4_src_ipv6.ipv4_layout.ipv4,
 622                         ntohl(mask->src));
 623                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 624                         src_ipv4_src_ipv6.ipv4_layout.ipv4,
 625                         ntohl(key->src));
 626
 627                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 628                         dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 629                         ntohl(mask->dst));
 630                MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 631                         dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 632                         ntohl(key->dst));
 633
 634                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
 635                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
 636        } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 637                struct flow_dissector_key_ipv6_addrs *key =
 638                        skb_flow_dissector_target(f->dissector,
 639                                                  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
 640                                                  f->key);
 641                struct flow_dissector_key_ipv6_addrs *mask =
 642                        skb_flow_dissector_target(f->dissector,
 643                                                  FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
 644                                                  f->mask);
 645
 646                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 647                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 648                       &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 649                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 650                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 651                       &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 652
 653                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 654                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 655                       &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 656                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 657                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 658                       &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
 659
 660                MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
 661                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6);
 662        }
 663
 664        /* Enforce DMAC when offloading incoming tunneled flows.
 665         * Flow counters require a match on the DMAC.
 666         */
 667        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
 668        MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
 669        ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 670                                     dmac_47_16), priv->netdev->dev_addr);
 671
 672        /* let software handle IP fragments */
 673        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 674        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
 675
 676        return 0;
 677}
 678
 679static int __parse_cls_flower(struct mlx5e_priv *priv,
 680                              struct mlx5_flow_spec *spec,
 681                              struct tc_cls_flower_offload *f,
 682                              u8 *min_inline)
 683{
 684        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 685                                       outer_headers);
 686        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 687                                       outer_headers);
 688        u16 addr_type = 0;
 689        u8 ip_proto = 0;
 690
 691        *min_inline = MLX5_INLINE_MODE_L2;
 692
 693        if (f->dissector->used_keys &
 694            ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 695              BIT(FLOW_DISSECTOR_KEY_BASIC) |
 696              BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
 697              BIT(FLOW_DISSECTOR_KEY_VLAN) |
 698              BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 699              BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 700              BIT(FLOW_DISSECTOR_KEY_PORTS) |
 701              BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
 702              BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
 703              BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
 704              BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
 705              BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
 706              BIT(FLOW_DISSECTOR_KEY_TCP) |
 707              BIT(FLOW_DISSECTOR_KEY_IP))) {
 708                netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
 709                            f->dissector->used_keys);
 710                return -EOPNOTSUPP;
 711        }
 712
 713        if ((dissector_uses_key(f->dissector,
 714                                FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
 715             dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
 716             dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
 717            dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
 718                struct flow_dissector_key_control *key =
 719                        skb_flow_dissector_target(f->dissector,
 720                                                  FLOW_DISSECTOR_KEY_ENC_CONTROL,
 721                                                  f->key);
 722                switch (key->addr_type) {
 723                case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
 724                case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
 725                        if (parse_tunnel_attr(priv, spec, f))
 726                                return -EOPNOTSUPP;
 727                        break;
 728                default:
 729                        return -EOPNOTSUPP;
 730                }
 731
 732                /* In decap flow, header pointers should point to the inner
 733                 * headers, outer header were already set by parse_tunnel_attr
 734                 */
 735                headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 736                                         inner_headers);
 737                headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 738                                         inner_headers);
 739        }
 740
 741        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 742                struct flow_dissector_key_control *key =
 743                        skb_flow_dissector_target(f->dissector,
 744                                                  FLOW_DISSECTOR_KEY_CONTROL,
 745                                                  f->key);
 746
 747                struct flow_dissector_key_control *mask =
 748                        skb_flow_dissector_target(f->dissector,
 749                                                  FLOW_DISSECTOR_KEY_CONTROL,
 750                                                  f->mask);
 751                addr_type = key->addr_type;
 752
 753                if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
 754                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
 755                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
 756                                 key->flags & FLOW_DIS_IS_FRAGMENT);
 757
 758                        /* the HW doesn't need L3 inline to match on frag=no */
 759                        if (key->flags & FLOW_DIS_IS_FRAGMENT)
 760                                *min_inline = MLX5_INLINE_MODE_IP;
 761                }
 762        }
 763
 764        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
 765                struct flow_dissector_key_basic *key =
 766                        skb_flow_dissector_target(f->dissector,
 767                                                  FLOW_DISSECTOR_KEY_BASIC,
 768                                                  f->key);
 769                struct flow_dissector_key_basic *mask =
 770                        skb_flow_dissector_target(f->dissector,
 771                                                  FLOW_DISSECTOR_KEY_BASIC,
 772                                                  f->mask);
 773                ip_proto = key->ip_proto;
 774
 775                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
 776                         ntohs(mask->n_proto));
 777                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
 778                         ntohs(key->n_proto));
 779
 780                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 781                         mask->ip_proto);
 782                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 783                         key->ip_proto);
 784
 785                if (mask->ip_proto)
 786                        *min_inline = MLX5_INLINE_MODE_IP;
 787        }
 788
 789        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 790                struct flow_dissector_key_eth_addrs *key =
 791                        skb_flow_dissector_target(f->dissector,
 792                                                  FLOW_DISSECTOR_KEY_ETH_ADDRS,
 793                                                  f->key);
 794                struct flow_dissector_key_eth_addrs *mask =
 795                        skb_flow_dissector_target(f->dissector,
 796                                                  FLOW_DISSECTOR_KEY_ETH_ADDRS,
 797                                                  f->mask);
 798
 799                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 800                                             dmac_47_16),
 801                                mask->dst);
 802                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 803                                             dmac_47_16),
 804                                key->dst);
 805
 806                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 807                                             smac_47_16),
 808                                mask->src);
 809                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 810                                             smac_47_16),
 811                                key->src);
 812        }
 813
 814        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
 815                struct flow_dissector_key_vlan *key =
 816                        skb_flow_dissector_target(f->dissector,
 817                                                  FLOW_DISSECTOR_KEY_VLAN,
 818                                                  f->key);
 819                struct flow_dissector_key_vlan *mask =
 820                        skb_flow_dissector_target(f->dissector,
 821                                                  FLOW_DISSECTOR_KEY_VLAN,
 822                                                  f->mask);
 823                if (mask->vlan_id || mask->vlan_priority) {
 824                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
 825                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
 826
 827                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id);
 828                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id);
 829
 830                        MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority);
 831                        MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority);
 832                }
 833        }
 834
 835        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 836                struct flow_dissector_key_ipv4_addrs *key =
 837                        skb_flow_dissector_target(f->dissector,
 838                                                  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
 839                                                  f->key);
 840                struct flow_dissector_key_ipv4_addrs *mask =
 841                        skb_flow_dissector_target(f->dissector,
 842                                                  FLOW_DISSECTOR_KEY_IPV4_ADDRS,
 843                                                  f->mask);
 844
 845                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 846                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 847                       &mask->src, sizeof(mask->src));
 848                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 849                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 850                       &key->src, sizeof(key->src));
 851                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 852                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 853                       &mask->dst, sizeof(mask->dst));
 854                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 855                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 856                       &key->dst, sizeof(key->dst));
 857
 858                if (mask->src || mask->dst)
 859                        *min_inline = MLX5_INLINE_MODE_IP;
 860        }
 861
 862        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 863                struct flow_dissector_key_ipv6_addrs *key =
 864                        skb_flow_dissector_target(f->dissector,
 865                                                  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
 866                                                  f->key);
 867                struct flow_dissector_key_ipv6_addrs *mask =
 868                        skb_flow_dissector_target(f->dissector,
 869                                                  FLOW_DISSECTOR_KEY_IPV6_ADDRS,
 870                                                  f->mask);
 871
 872                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 873                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 874                       &mask->src, sizeof(mask->src));
 875                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 876                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 877                       &key->src, sizeof(key->src));
 878
 879                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 880                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 881                       &mask->dst, sizeof(mask->dst));
 882                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 883                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 884                       &key->dst, sizeof(key->dst));
 885
 886                if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
 887                    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
 888                        *min_inline = MLX5_INLINE_MODE_IP;
 889        }
 890
 891        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) {
 892                struct flow_dissector_key_ip *key =
 893                        skb_flow_dissector_target(f->dissector,
 894                                                  FLOW_DISSECTOR_KEY_IP,
 895                                                  f->key);
 896                struct flow_dissector_key_ip *mask =
 897                        skb_flow_dissector_target(f->dissector,
 898                                                  FLOW_DISSECTOR_KEY_IP,
 899                                                  f->mask);
 900
 901                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3);
 902                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3);
 903
 904                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2);
 905                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos  >> 2);
 906
 907                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
 908                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
 909
 910                if (mask->ttl &&
 911                    !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
 912                                                ft_field_support.outer_ipv4_ttl))
 913                        return -EOPNOTSUPP;
 914
 915                if (mask->tos || mask->ttl)
 916                        *min_inline = MLX5_INLINE_MODE_IP;
 917        }
 918
 919        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
 920                struct flow_dissector_key_ports *key =
 921                        skb_flow_dissector_target(f->dissector,
 922                                                  FLOW_DISSECTOR_KEY_PORTS,
 923                                                  f->key);
 924                struct flow_dissector_key_ports *mask =
 925                        skb_flow_dissector_target(f->dissector,
 926                                                  FLOW_DISSECTOR_KEY_PORTS,
 927                                                  f->mask);
 928                switch (ip_proto) {
 929                case IPPROTO_TCP:
 930                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 931                                 tcp_sport, ntohs(mask->src));
 932                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 933                                 tcp_sport, ntohs(key->src));
 934
 935                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 936                                 tcp_dport, ntohs(mask->dst));
 937                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 938                                 tcp_dport, ntohs(key->dst));
 939                        break;
 940
 941                case IPPROTO_UDP:
 942                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 943                                 udp_sport, ntohs(mask->src));
 944                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 945                                 udp_sport, ntohs(key->src));
 946
 947                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 948                                 udp_dport, ntohs(mask->dst));
 949                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 950                                 udp_dport, ntohs(key->dst));
 951                        break;
 952                default:
 953                        netdev_err(priv->netdev,
 954                                   "Only UDP and TCP transport are supported\n");
 955                        return -EINVAL;
 956                }
 957
 958                if (mask->src || mask->dst)
 959                        *min_inline = MLX5_INLINE_MODE_TCP_UDP;
 960        }
 961
 962        if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) {
 963                struct flow_dissector_key_tcp *key =
 964                        skb_flow_dissector_target(f->dissector,
 965                                                  FLOW_DISSECTOR_KEY_TCP,
 966                                                  f->key);
 967                struct flow_dissector_key_tcp *mask =
 968                        skb_flow_dissector_target(f->dissector,
 969                                                  FLOW_DISSECTOR_KEY_TCP,
 970                                                  f->mask);
 971
 972                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 973                         ntohs(mask->flags));
 974                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
 975                         ntohs(key->flags));
 976
 977                if (mask->flags)
 978                        *min_inline = MLX5_INLINE_MODE_TCP_UDP;
 979        }
 980
 981        return 0;
 982}
 983
 984static int parse_cls_flower(struct mlx5e_priv *priv,
 985                            struct mlx5e_tc_flow *flow,
 986                            struct mlx5_flow_spec *spec,
 987                            struct tc_cls_flower_offload *f)
 988{
 989        struct mlx5_core_dev *dev = priv->mdev;
 990        struct mlx5_eswitch *esw = dev->priv.eswitch;
 991        struct mlx5e_rep_priv *rpriv = priv->ppriv;
 992        struct mlx5_eswitch_rep *rep;
 993        u8 min_inline;
 994        int err;
 995
 996        err = __parse_cls_flower(priv, spec, f, &min_inline);
 997
 998        if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) {
 999                rep = rpriv->rep;
1000                if (rep->vport != FDB_UPLINK_VPORT &&
1001                    (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
1002                    esw->offloads.inline_mode < min_inline)) {
1003                        netdev_warn(priv->netdev,
1004                                    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
1005                                    min_inline, esw->offloads.inline_mode);
1006                        return -EOPNOTSUPP;
1007                }
1008        }
1009
1010        return err;
1011}
1012
1013struct pedit_headers {
1014        struct ethhdr  eth;
1015        struct iphdr   ip4;
1016        struct ipv6hdr ip6;
1017        struct tcphdr  tcp;
1018        struct udphdr  udp;
1019};
1020
1021static int pedit_header_offsets[] = {
1022        [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth),
1023        [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4),
1024        [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6),
1025        [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp),
1026        [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp),
1027};
1028
1029#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
1030
1031static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset,
1032                         struct pedit_headers *masks,
1033                         struct pedit_headers *vals)
1034{
1035        u32 *curr_pmask, *curr_pval;
1036
1037        if (hdr_type >= __PEDIT_HDR_TYPE_MAX)
1038                goto out_err;
1039
1040        curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset);
1041        curr_pval  = (u32 *)(pedit_header(vals, hdr_type) + offset);
1042
1043        if (*curr_pmask & mask)  /* disallow acting twice on the same location */
1044                goto out_err;
1045
1046        *curr_pmask |= mask;
1047        *curr_pval  |= (val & mask);
1048
1049        return 0;
1050
1051out_err:
1052        return -EOPNOTSUPP;
1053}
1054
1055struct mlx5_fields {
1056        u8  field;
1057        u8  size;
1058        u32 offset;
1059};
1060
1061#define OFFLOAD(fw_field, size, field, off) \
1062                {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, size, offsetof(struct pedit_headers, field) + (off)}
1063
1064static struct mlx5_fields fields[] = {
1065        OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
1066        OFFLOAD(DMAC_47_16, 4, eth.h_dest[0], 0),
1067        OFFLOAD(DMAC_15_0,  2, eth.h_dest[4], 0),
1068        OFFLOAD(SMAC_47_16, 4, eth.h_source[0], 0),
1069        OFFLOAD(SMAC_15_0,  2, eth.h_source[4], 0),
1070        OFFLOAD(ETHERTYPE,  2, eth.h_proto, 0),
1071
1072        OFFLOAD(IP_TTL, 1, ip4.ttl,   0),
1073        OFFLOAD(SIPV4,  4, ip4.saddr, 0),
1074        OFFLOAD(DIPV4,  4, ip4.daddr, 0),
1075
1076        OFFLOAD(SIPV6_127_96, 4, ip6.saddr.s6_addr32[0], 0),
1077        OFFLOAD(SIPV6_95_64,  4, ip6.saddr.s6_addr32[1], 0),
1078        OFFLOAD(SIPV6_63_32,  4, ip6.saddr.s6_addr32[2], 0),
1079        OFFLOAD(SIPV6_31_0,   4, ip6.saddr.s6_addr32[3], 0),
1080        OFFLOAD(DIPV6_127_96, 4, ip6.daddr.s6_addr32[0], 0),
1081        OFFLOAD(DIPV6_95_64,  4, ip6.daddr.s6_addr32[1], 0),
1082        OFFLOAD(DIPV6_63_32,  4, ip6.daddr.s6_addr32[2], 0),
1083        OFFLOAD(DIPV6_31_0,   4, ip6.daddr.s6_addr32[3], 0),
1084        OFFLOAD(IPV6_HOPLIMIT, 1, ip6.hop_limit, 0),
1085
1086        OFFLOAD(TCP_SPORT, 2, tcp.source,  0),
1087        OFFLOAD(TCP_DPORT, 2, tcp.dest,    0),
1088        OFFLOAD(TCP_FLAGS, 1, tcp.ack_seq, 5),
1089
1090        OFFLOAD(UDP_SPORT, 2, udp.source, 0),
1091        OFFLOAD(UDP_DPORT, 2, udp.dest,   0),
1092};
1093
1094/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at
1095 * max from the SW pedit action. On success, it says how many HW actions were
1096 * actually parsed.
1097 */
1098static int offload_pedit_fields(struct pedit_headers *masks,
1099                                struct pedit_headers *vals,
1100                                struct mlx5e_tc_flow_parse_attr *parse_attr)
1101{
1102        struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
1103        int i, action_size, nactions, max_actions, first, last, next_z;
1104        void *s_masks_p, *a_masks_p, *vals_p;
1105        struct mlx5_fields *f;
1106        u8 cmd, field_bsize;
1107        u32 s_mask, a_mask;
1108        unsigned long mask;
1109        __be32 mask_be32;
1110        __be16 mask_be16;
1111        void *action;
1112
1113        set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET];
1114        add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD];
1115        set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET];
1116        add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD];
1117
1118        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1119        action = parse_attr->mod_hdr_actions;
1120        max_actions = parse_attr->num_mod_hdr_actions;
1121        nactions = 0;
1122
1123        for (i = 0; i < ARRAY_SIZE(fields); i++) {
1124                f = &fields[i];
1125                /* avoid seeing bits set from previous iterations */
1126                s_mask = 0;
1127                a_mask = 0;
1128
1129                s_masks_p = (void *)set_masks + f->offset;
1130                a_masks_p = (void *)add_masks + f->offset;
1131
1132                memcpy(&s_mask, s_masks_p, f->size);
1133                memcpy(&a_mask, a_masks_p, f->size);
1134
1135                if (!s_mask && !a_mask) /* nothing to offload here */
1136                        continue;
1137
1138                if (s_mask && a_mask) {
1139                        printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
1140                        return -EOPNOTSUPP;
1141                }
1142
1143                if (nactions == max_actions) {
1144                        printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
1145                        return -EOPNOTSUPP;
1146                }
1147
1148                if (s_mask) {
1149                        cmd  = MLX5_ACTION_TYPE_SET;
1150                        mask = s_mask;
1151                        vals_p = (void *)set_vals + f->offset;
1152                        /* clear to denote we consumed this field */
1153                        memset(s_masks_p, 0, f->size);
1154                } else {
1155                        cmd  = MLX5_ACTION_TYPE_ADD;
1156                        mask = a_mask;
1157                        vals_p = (void *)add_vals + f->offset;
1158                        /* clear to denote we consumed this field */
1159                        memset(a_masks_p, 0, f->size);
1160                }
1161
1162                field_bsize = f->size * BITS_PER_BYTE;
1163
1164                if (field_bsize == 32) {
1165                        mask_be32 = *(__be32 *)&mask;
1166                        mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
1167                } else if (field_bsize == 16) {
1168                        mask_be16 = *(__be16 *)&mask;
1169                        mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
1170                }
1171
1172                first = find_first_bit(&mask, field_bsize);
1173                next_z = find_next_zero_bit(&mask, field_bsize, first);
1174                last  = find_last_bit(&mask, field_bsize);
1175                if (first < next_z && next_z < last) {
1176                        printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
1177                               mask);
1178                        return -EOPNOTSUPP;
1179                }
1180
1181                MLX5_SET(set_action_in, action, action_type, cmd);
1182                MLX5_SET(set_action_in, action, field, f->field);
1183
1184                if (cmd == MLX5_ACTION_TYPE_SET) {
1185                        MLX5_SET(set_action_in, action, offset, first);
1186                        /* length is num of bits to be written, zero means length of 32 */
1187                        MLX5_SET(set_action_in, action, length, (last - first + 1));
1188                }
1189
1190                if (field_bsize == 32)
1191                        MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
1192                else if (field_bsize == 16)
1193                        MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
1194                else if (field_bsize == 8)
1195                        MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
1196
1197                action += action_size;
1198                nactions++;
1199        }
1200
1201        parse_attr->num_mod_hdr_actions = nactions;
1202        return 0;
1203}
1204
1205static int alloc_mod_hdr_actions(struct mlx5e_priv *priv,
1206                                 const struct tc_action *a, int namespace,
1207                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
1208{
1209        int nkeys, action_size, max_actions;
1210
1211        nkeys = tcf_pedit_nkeys(a);
1212        action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
1213
1214        if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */
1215                max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions);
1216        else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */
1217                max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions);
1218
1219        /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */
1220        max_actions = min(max_actions, nkeys * 16);
1221
1222        parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL);
1223        if (!parse_attr->mod_hdr_actions)
1224                return -ENOMEM;
1225
1226        parse_attr->num_mod_hdr_actions = max_actions;
1227        return 0;
1228}
1229
1230static const struct pedit_headers zero_masks = {};
1231
1232static int parse_tc_pedit_action(struct mlx5e_priv *priv,
1233                                 const struct tc_action *a, int namespace,
1234                                 struct mlx5e_tc_flow_parse_attr *parse_attr)
1235{
1236        struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
1237        int nkeys, i, err = -EOPNOTSUPP;
1238        u32 mask, val, offset;
1239        u8 cmd, htype;
1240
1241        nkeys = tcf_pedit_nkeys(a);
1242
1243        memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1244        memset(vals,  0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX);
1245
1246        for (i = 0; i < nkeys; i++) {
1247                htype = tcf_pedit_htype(a, i);
1248                cmd = tcf_pedit_cmd(a, i);
1249                err = -EOPNOTSUPP; /* can't be all optimistic */
1250
1251                if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
1252                        printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n");
1253                        goto out_err;
1254                }
1255
1256                if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
1257                        printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd);
1258                        goto out_err;
1259                }
1260
1261                mask = tcf_pedit_mask(a, i);
1262                val = tcf_pedit_val(a, i);
1263                offset = tcf_pedit_offset(a, i);
1264
1265                err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]);
1266                if (err)
1267                        goto out_err;
1268        }
1269
1270        err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr);
1271        if (err)
1272                goto out_err;
1273
1274        err = offload_pedit_fields(masks, vals, parse_attr);
1275        if (err < 0)
1276                goto out_dealloc_parsed_actions;
1277
1278        for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
1279                cmd_masks = &masks[cmd];
1280                if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
1281                        printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n",
1282                               cmd);
1283                        print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
1284                                       16, 1, cmd_masks, sizeof(zero_masks), true);
1285                        err = -EOPNOTSUPP;
1286                        goto out_dealloc_parsed_actions;
1287                }
1288        }
1289
1290        return 0;
1291
1292out_dealloc_parsed_actions:
1293        kfree(parse_attr->mod_hdr_actions);
1294out_err:
1295        return err;
1296}
1297
1298static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
1299{
1300        u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
1301                         TCA_CSUM_UPDATE_FLAG_UDP;
1302
1303        /*  The HW recalcs checksums only if re-writing headers */
1304        if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
1305                netdev_warn(priv->netdev,
1306                            "TC csum action is only offloaded with pedit\n");
1307                return false;
1308        }
1309
1310        if (update_flags & ~prot_flags) {
1311                netdev_warn(priv->netdev,
1312                            "can't offload TC csum action for some header/s - flags %#x\n",
1313                            update_flags);
1314                return false;
1315        }
1316
1317        return true;
1318}
1319
1320static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1321                                struct mlx5e_tc_flow_parse_attr *parse_attr,
1322                                struct mlx5e_tc_flow *flow)
1323{
1324        struct mlx5_nic_flow_attr *attr = flow->nic_attr;
1325        const struct tc_action *a;
1326        LIST_HEAD(actions);
1327        int err;
1328
1329        if (tc_no_actions(exts))
1330                return -EINVAL;
1331
1332        attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
1333        attr->action = 0;
1334
1335        tcf_exts_to_list(exts, &actions);
1336        list_for_each_entry(a, &actions, list) {
1337                if (is_tcf_gact_shot(a)) {
1338                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
1339                        if (MLX5_CAP_FLOWTABLE(priv->mdev,
1340                                               flow_table_properties_nic_receive.flow_counter))
1341                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1342                        continue;
1343                }
1344
1345                if (is_tcf_pedit(a)) {
1346                        err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
1347                                                    parse_attr);
1348                        if (err)
1349                                return err;
1350
1351                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
1352                                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1353                        continue;
1354                }
1355
1356                if (is_tcf_csum(a)) {
1357                        if (csum_offload_supported(priv, attr->action,
1358                                                   tcf_csum_update_flags(a)))
1359                                continue;
1360
1361                        return -EOPNOTSUPP;
1362                }
1363
1364                if (is_tcf_skbedit_mark(a)) {
1365                        u32 mark = tcf_skbedit_mark(a);
1366
1367                        if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
1368                                netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
1369                                            mark);
1370                                return -EINVAL;
1371                        }
1372
1373                        attr->flow_tag = mark;
1374                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1375                        continue;
1376                }
1377
1378                return -EINVAL;
1379        }
1380
1381        return 0;
1382}
1383
1384static inline int cmp_encap_info(struct ip_tunnel_key *a,
1385                                 struct ip_tunnel_key *b)
1386{
1387        return memcmp(a, b, sizeof(*a));
1388}
1389
1390static inline int hash_encap_info(struct ip_tunnel_key *key)
1391{
1392        return jhash(key, sizeof(*key), 0);
1393}
1394
1395static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
1396                                   struct net_device *mirred_dev,
1397                                   struct net_device **out_dev,
1398                                   struct flowi4 *fl4,
1399                                   struct neighbour **out_n,
1400                                   int *out_ttl)
1401{
1402        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1403        struct rtable *rt;
1404        struct neighbour *n = NULL;
1405
1406#if IS_ENABLED(CONFIG_INET)
1407        int ret;
1408
1409        rt = ip_route_output_key(dev_net(mirred_dev), fl4);
1410        ret = PTR_ERR_OR_ZERO(rt);
1411        if (ret)
1412                return ret;
1413#else
1414        return -EOPNOTSUPP;
1415#endif
1416        /* if the egress device isn't on the same HW e-switch, we use the uplink */
1417        if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
1418                *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1419        else
1420                *out_dev = rt->dst.dev;
1421
1422        *out_ttl = ip4_dst_hoplimit(&rt->dst);
1423        n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
1424        ip_rt_put(rt);
1425        if (!n)
1426                return -ENOMEM;
1427
1428        *out_n = n;
1429        return 0;
1430}
1431
1432static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
1433                                   struct net_device *mirred_dev,
1434                                   struct net_device **out_dev,
1435                                   struct flowi6 *fl6,
1436                                   struct neighbour **out_n,
1437                                   int *out_ttl)
1438{
1439        struct neighbour *n = NULL;
1440        struct dst_entry *dst;
1441
1442#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
1443        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1444        int ret;
1445
1446        ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
1447                                         fl6);
1448        if (ret < 0)
1449                return ret;
1450
1451        *out_ttl = ip6_dst_hoplimit(dst);
1452
1453        /* if the egress device isn't on the same HW e-switch, we use the uplink */
1454        if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
1455                *out_dev = mlx5_eswitch_get_uplink_netdev(esw);
1456        else
1457                *out_dev = dst->dev;
1458#else
1459        return -EOPNOTSUPP;
1460#endif
1461
1462        n = dst_neigh_lookup(dst, &fl6->daddr);
1463        dst_release(dst);
1464        if (!n)
1465                return -ENOMEM;
1466
1467        *out_n = n;
1468        return 0;
1469}
1470
1471static void gen_vxlan_header_ipv4(struct net_device *out_dev,
1472                                  char buf[], int encap_size,
1473                                  unsigned char h_dest[ETH_ALEN],
1474                                  int ttl,
1475                                  __be32 daddr,
1476                                  __be32 saddr,
1477                                  __be16 udp_dst_port,
1478                                  __be32 vx_vni)
1479{
1480        struct ethhdr *eth = (struct ethhdr *)buf;
1481        struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
1482        struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
1483        struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1484
1485        memset(buf, 0, encap_size);
1486
1487        ether_addr_copy(eth->h_dest, h_dest);
1488        ether_addr_copy(eth->h_source, out_dev->dev_addr);
1489        eth->h_proto = htons(ETH_P_IP);
1490
1491        ip->daddr = daddr;
1492        ip->saddr = saddr;
1493
1494        ip->ttl = ttl;
1495        ip->protocol = IPPROTO_UDP;
1496        ip->version = 0x4;
1497        ip->ihl = 0x5;
1498
1499        udp->dest = udp_dst_port;
1500        vxh->vx_flags = VXLAN_HF_VNI;
1501        vxh->vx_vni = vxlan_vni_field(vx_vni);
1502}
1503
1504static void gen_vxlan_header_ipv6(struct net_device *out_dev,
1505                                  char buf[], int encap_size,
1506                                  unsigned char h_dest[ETH_ALEN],
1507                                  int ttl,
1508                                  struct in6_addr *daddr,
1509                                  struct in6_addr *saddr,
1510                                  __be16 udp_dst_port,
1511                                  __be32 vx_vni)
1512{
1513        struct ethhdr *eth = (struct ethhdr *)buf;
1514        struct ipv6hdr *ip6h = (struct ipv6hdr *)((char *)eth + sizeof(struct ethhdr));
1515        struct udphdr *udp = (struct udphdr *)((char *)ip6h + sizeof(struct ipv6hdr));
1516        struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
1517
1518        memset(buf, 0, encap_size);
1519
1520        ether_addr_copy(eth->h_dest, h_dest);
1521        ether_addr_copy(eth->h_source, out_dev->dev_addr);
1522        eth->h_proto = htons(ETH_P_IPV6);
1523
1524        ip6_flow_hdr(ip6h, 0, 0);
1525        /* the HW fills up ipv6 payload len */
1526        ip6h->nexthdr     = IPPROTO_UDP;
1527        ip6h->hop_limit   = ttl;
1528        ip6h->daddr       = *daddr;
1529        ip6h->saddr       = *saddr;
1530
1531        udp->dest = udp_dst_port;
1532        vxh->vx_flags = VXLAN_HF_VNI;
1533        vxh->vx_vni = vxlan_vni_field(vx_vni);
1534}
1535
1536static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
1537                                          struct net_device *mirred_dev,
1538                                          struct mlx5e_encap_entry *e)
1539{
1540        int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1541        int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN;
1542        struct ip_tunnel_key *tun_key = &e->tun_info.key;
1543        struct net_device *out_dev;
1544        struct neighbour *n = NULL;
1545        struct flowi4 fl4 = {};
1546        char *encap_header;
1547        int ttl, err;
1548        u8 nud_state;
1549
1550        if (max_encap_size < ipv4_encap_size) {
1551                mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
1552                               ipv4_encap_size, max_encap_size);
1553                return -EOPNOTSUPP;
1554        }
1555
1556        encap_header = kzalloc(ipv4_encap_size, GFP_KERNEL);
1557        if (!encap_header)
1558                return -ENOMEM;
1559
1560        switch (e->tunnel_type) {
1561        case MLX5_HEADER_TYPE_VXLAN:
1562                fl4.flowi4_proto = IPPROTO_UDP;
1563                fl4.fl4_dport = tun_key->tp_dst;
1564                break;
1565        default:
1566                err = -EOPNOTSUPP;
1567                goto out;
1568        }
1569        fl4.flowi4_tos = tun_key->tos;
1570        fl4.daddr = tun_key->u.ipv4.dst;
1571        fl4.saddr = tun_key->u.ipv4.src;
1572
1573        err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev,
1574                                      &fl4, &n, &ttl);
1575        if (err)
1576                goto out;
1577
1578        /* used by mlx5e_detach_encap to lookup a neigh hash table
1579         * entry in the neigh hash table when a user deletes a rule
1580         */
1581        e->m_neigh.dev = n->dev;
1582        e->m_neigh.family = n->ops->family;
1583        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
1584        e->out_dev = out_dev;
1585
1586        /* It's importent to add the neigh to the hash table before checking
1587         * the neigh validity state. So if we'll get a notification, in case the
1588         * neigh changes it's validity state, we would find the relevant neigh
1589         * in the hash.
1590         */
1591        err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
1592        if (err)
1593                goto out;
1594
1595        read_lock_bh(&n->lock);
1596        nud_state = n->nud_state;
1597        ether_addr_copy(e->h_dest, n->ha);
1598        read_unlock_bh(&n->lock);
1599
1600        switch (e->tunnel_type) {
1601        case MLX5_HEADER_TYPE_VXLAN:
1602                gen_vxlan_header_ipv4(out_dev, encap_header,
1603                                      ipv4_encap_size, e->h_dest, ttl,
1604                                      fl4.daddr,
1605                                      fl4.saddr, tun_key->tp_dst,
1606                                      tunnel_id_to_key32(tun_key->tun_id));
1607                break;
1608        default:
1609                err = -EOPNOTSUPP;
1610                goto destroy_neigh_entry;
1611        }
1612        e->encap_size = ipv4_encap_size;
1613        e->encap_header = encap_header;
1614
1615        if (!(nud_state & NUD_VALID)) {
1616                neigh_event_send(n, NULL);
1617                err = -EAGAIN;
1618                goto out;
1619        }
1620
1621        err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1622                               ipv4_encap_size, encap_header, &e->encap_id);
1623        if (err)
1624                goto destroy_neigh_entry;
1625
1626        e->flags |= MLX5_ENCAP_ENTRY_VALID;
1627        mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
1628        neigh_release(n);
1629        return err;
1630
1631destroy_neigh_entry:
1632        mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1633out:
1634        kfree(encap_header);
1635        if (n)
1636                neigh_release(n);
1637        return err;
1638}
1639
1640static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
1641                                          struct net_device *mirred_dev,
1642                                          struct mlx5e_encap_entry *e)
1643{
1644        int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
1645        int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN;
1646        struct ip_tunnel_key *tun_key = &e->tun_info.key;
1647        struct net_device *out_dev;
1648        struct neighbour *n = NULL;
1649        struct flowi6 fl6 = {};
1650        char *encap_header;
1651        int err, ttl = 0;
1652        u8 nud_state;
1653
1654        if (max_encap_size < ipv6_encap_size) {
1655                mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n",
1656                               ipv6_encap_size, max_encap_size);
1657                return -EOPNOTSUPP;
1658        }
1659
1660        encap_header = kzalloc(ipv6_encap_size, GFP_KERNEL);
1661        if (!encap_header)
1662                return -ENOMEM;
1663
1664        switch (e->tunnel_type) {
1665        case MLX5_HEADER_TYPE_VXLAN:
1666                fl6.flowi6_proto = IPPROTO_UDP;
1667                fl6.fl6_dport = tun_key->tp_dst;
1668                break;
1669        default:
1670                err = -EOPNOTSUPP;
1671                goto out;
1672        }
1673
1674        fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tun_key->tos), tun_key->label);
1675        fl6.daddr = tun_key->u.ipv6.dst;
1676        fl6.saddr = tun_key->u.ipv6.src;
1677
1678        err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev,
1679                                      &fl6, &n, &ttl);
1680        if (err)
1681                goto out;
1682
1683        /* used by mlx5e_detach_encap to lookup a neigh hash table
1684         * entry in the neigh hash table when a user deletes a rule
1685         */
1686        e->m_neigh.dev = n->dev;
1687        e->m_neigh.family = n->ops->family;
1688        memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
1689        e->out_dev = out_dev;
1690
1691        /* It's importent to add the neigh to the hash table before checking
1692         * the neigh validity state. So if we'll get a notification, in case the
1693         * neigh changes it's validity state, we would find the relevant neigh
1694         * in the hash.
1695         */
1696        err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e);
1697        if (err)
1698                goto out;
1699
1700        read_lock_bh(&n->lock);
1701        nud_state = n->nud_state;
1702        ether_addr_copy(e->h_dest, n->ha);
1703        read_unlock_bh(&n->lock);
1704
1705        switch (e->tunnel_type) {
1706        case MLX5_HEADER_TYPE_VXLAN:
1707                gen_vxlan_header_ipv6(out_dev, encap_header,
1708                                      ipv6_encap_size, e->h_dest, ttl,
1709                                      &fl6.daddr,
1710                                      &fl6.saddr, tun_key->tp_dst,
1711                                      tunnel_id_to_key32(tun_key->tun_id));
1712                break;
1713        default:
1714                err = -EOPNOTSUPP;
1715                goto destroy_neigh_entry;
1716        }
1717
1718        e->encap_size = ipv6_encap_size;
1719        e->encap_header = encap_header;
1720
1721        if (!(nud_state & NUD_VALID)) {
1722                neigh_event_send(n, NULL);
1723                err = -EAGAIN;
1724                goto out;
1725        }
1726
1727        err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
1728                               ipv6_encap_size, encap_header, &e->encap_id);
1729        if (err)
1730                goto destroy_neigh_entry;
1731
1732        e->flags |= MLX5_ENCAP_ENTRY_VALID;
1733        mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev));
1734        neigh_release(n);
1735        return err;
1736
1737destroy_neigh_entry:
1738        mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
1739out:
1740        kfree(encap_header);
1741        if (n)
1742                neigh_release(n);
1743        return err;
1744}
1745
1746static int mlx5e_attach_encap(struct mlx5e_priv *priv,
1747                              struct ip_tunnel_info *tun_info,
1748                              struct net_device *mirred_dev,
1749                              struct net_device **encap_dev,
1750                              struct mlx5e_tc_flow *flow)
1751{
1752        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1753        struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw);
1754        unsigned short family = ip_tunnel_info_af(tun_info);
1755        struct mlx5e_priv *up_priv = netdev_priv(up_dev);
1756        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1757        struct ip_tunnel_key *key = &tun_info->key;
1758        struct mlx5e_encap_entry *e;
1759        int tunnel_type, err = 0;
1760        uintptr_t hash_key;
1761        bool found = false;
1762
1763        /* udp dst port must be set */
1764        if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
1765                goto vxlan_encap_offload_err;
1766
1767        /* setting udp src port isn't supported */
1768        if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
1769vxlan_encap_offload_err:
1770                netdev_warn(priv->netdev,
1771                            "must set udp dst port and not set udp src port\n");
1772                return -EOPNOTSUPP;
1773        }
1774
1775        if (mlx5e_vxlan_lookup_port(up_priv, be16_to_cpu(key->tp_dst)) &&
1776            MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
1777                tunnel_type = MLX5_HEADER_TYPE_VXLAN;
1778        } else {
1779                netdev_warn(priv->netdev,
1780                            "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
1781                return -EOPNOTSUPP;
1782        }
1783
1784        hash_key = hash_encap_info(key);
1785
1786        hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
1787                                   encap_hlist, hash_key) {
1788                if (!cmp_encap_info(&e->tun_info.key, key)) {
1789                        found = true;
1790                        break;
1791                }
1792        }
1793
1794        if (found)
1795                goto attach_flow;
1796
1797        e = kzalloc(sizeof(*e), GFP_KERNEL);
1798        if (!e)
1799                return -ENOMEM;
1800
1801        e->tun_info = *tun_info;
1802        e->tunnel_type = tunnel_type;
1803        INIT_LIST_HEAD(&e->flows);
1804
1805        if (family == AF_INET)
1806                err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e);
1807        else if (family == AF_INET6)
1808                err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e);
1809
1810        if (err && err != -EAGAIN)
1811                goto out_err;
1812
1813        hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
1814
1815attach_flow:
1816        list_add(&flow->encap, &e->flows);
1817        *encap_dev = e->out_dev;
1818        if (e->flags & MLX5_ENCAP_ENTRY_VALID)
1819                attr->encap_id = e->encap_id;
1820
1821        return err;
1822
1823out_err:
1824        kfree(e);
1825        return err;
1826}
1827
1828static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
1829                                struct mlx5e_tc_flow_parse_attr *parse_attr,
1830                                struct mlx5e_tc_flow *flow)
1831{
1832        struct mlx5_esw_flow_attr *attr = flow->esw_attr;
1833        struct mlx5e_rep_priv *rpriv = priv->ppriv;
1834        struct ip_tunnel_info *info = NULL;
1835        const struct tc_action *a;
1836        LIST_HEAD(actions);
1837        bool encap = false;
1838        int err = 0;
1839
1840        if (tc_no_actions(exts))
1841                return -EINVAL;
1842
1843        memset(attr, 0, sizeof(*attr));
1844        attr->in_rep = rpriv->rep;
1845
1846        tcf_exts_to_list(exts, &actions);
1847        list_for_each_entry(a, &actions, list) {
1848                if (is_tcf_gact_shot(a)) {
1849                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
1850                                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
1851                        continue;
1852                }
1853
1854                if (is_tcf_pedit(a)) {
1855                        err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
1856                                                    parse_attr);
1857                        if (err)
1858                                return err;
1859
1860                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1861                        continue;
1862                }
1863
1864                if (is_tcf_csum(a)) {
1865                        if (csum_offload_supported(priv, attr->action,
1866                                                   tcf_csum_update_flags(a)))
1867                                continue;
1868
1869                        return -EOPNOTSUPP;
1870                }
1871
1872                if (is_tcf_mirred_egress_redirect(a)) {
1873                        int ifindex = tcf_mirred_ifindex(a);
1874                        struct net_device *out_dev, *encap_dev = NULL;
1875                        struct mlx5e_priv *out_priv;
1876
1877                        out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
1878
1879                        if (switchdev_port_same_parent_id(priv->netdev,
1880                                                          out_dev)) {
1881                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1882                                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
1883                                out_priv = netdev_priv(out_dev);
1884                                rpriv = out_priv->ppriv;
1885                                attr->out_rep = rpriv->rep;
1886                        } else if (encap) {
1887                                err = mlx5e_attach_encap(priv, info,
1888                                                         out_dev, &encap_dev, flow);
1889                                if (err && err != -EAGAIN)
1890                                        return err;
1891                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
1892                                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1893                                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
1894                                out_priv = netdev_priv(encap_dev);
1895                                rpriv = out_priv->ppriv;
1896                                attr->out_rep = rpriv->rep;
1897                                attr->parse_attr = parse_attr;
1898                        } else {
1899                                pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
1900                                       priv->netdev->name, out_dev->name);
1901                                return -EINVAL;
1902                        }
1903                        continue;
1904                }
1905
1906                if (is_tcf_tunnel_set(a)) {
1907                        info = tcf_tunnel_info(a);
1908                        if (info)
1909                                encap = true;
1910                        else
1911                                return -EOPNOTSUPP;
1912                        continue;
1913                }
1914
1915                if (is_tcf_vlan(a)) {
1916                        if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) {
1917                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
1918                        } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) {
1919                                if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q))
1920                                        return -EOPNOTSUPP;
1921
1922                                attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
1923                                attr->vlan = tcf_vlan_push_vid(a);
1924                        } else { /* action is TCA_VLAN_ACT_MODIFY */
1925                                return -EOPNOTSUPP;
1926                        }
1927                        continue;
1928                }
1929
1930                if (is_tcf_tunnel_release(a)) {
1931                        attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
1932                        continue;
1933                }
1934
1935                return -EINVAL;
1936        }
1937        return err;
1938}
1939
1940int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
1941                           struct tc_cls_flower_offload *f)
1942{
1943        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1944        struct mlx5e_tc_flow_parse_attr *parse_attr;
1945        struct mlx5e_tc_table *tc = &priv->fs.tc;
1946        struct mlx5e_tc_flow *flow;
1947        int attr_size, err = 0;
1948        u8 flow_flags = 0;
1949
1950        if (esw && esw->mode == SRIOV_OFFLOADS) {
1951                flow_flags = MLX5E_TC_FLOW_ESWITCH;
1952                attr_size  = sizeof(struct mlx5_esw_flow_attr);
1953        } else {
1954                flow_flags = MLX5E_TC_FLOW_NIC;
1955                attr_size  = sizeof(struct mlx5_nic_flow_attr);
1956        }
1957
1958        flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
1959        parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
1960        if (!parse_attr || !flow) {
1961                err = -ENOMEM;
1962                goto err_free;
1963        }
1964
1965        flow->cookie = f->cookie;
1966        flow->flags = flow_flags;
1967
1968        err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
1969        if (err < 0)
1970                goto err_free;
1971
1972        if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
1973                err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
1974                if (err < 0)
1975                        goto err_handle_encap_flow;
1976                flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
1977        } else {
1978                err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
1979                if (err < 0)
1980                        goto err_free;
1981                flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
1982        }
1983
1984        if (IS_ERR(flow->rule)) {
1985                err = PTR_ERR(flow->rule);
1986                goto err_free;
1987        }
1988
1989        flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
1990        err = rhashtable_insert_fast(&tc->ht, &flow->node,
1991                                     tc->ht_params);
1992        if (err)
1993                goto err_del_rule;
1994
1995        if (flow->flags & MLX5E_TC_FLOW_ESWITCH &&
1996            !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
1997                kvfree(parse_attr);
1998        return err;
1999
2000err_del_rule:
2001        mlx5e_tc_del_flow(priv, flow);
2002
2003err_handle_encap_flow:
2004        if (err == -EAGAIN) {
2005                err = rhashtable_insert_fast(&tc->ht, &flow->node,
2006                                             tc->ht_params);
2007                if (err)
2008                        mlx5e_tc_del_flow(priv, flow);
2009                else
2010                        return 0;
2011        }
2012
2013err_free:
2014        kvfree(parse_attr);
2015        kfree(flow);
2016        return err;
2017}
2018
2019int mlx5e_delete_flower(struct mlx5e_priv *priv,
2020                        struct tc_cls_flower_offload *f)
2021{
2022        struct mlx5e_tc_flow *flow;
2023        struct mlx5e_tc_table *tc = &priv->fs.tc;
2024
2025        flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
2026                                      tc->ht_params);
2027        if (!flow)
2028                return -EINVAL;
2029
2030        rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
2031
2032        mlx5e_tc_del_flow(priv, flow);
2033
2034        kfree(flow);
2035
2036        return 0;
2037}
2038
2039int mlx5e_stats_flower(struct mlx5e_priv *priv,
2040                       struct tc_cls_flower_offload *f)
2041{
2042        struct mlx5e_tc_table *tc = &priv->fs.tc;
2043        struct mlx5e_tc_flow *flow;
2044        struct mlx5_fc *counter;
2045        u64 bytes;
2046        u64 packets;
2047        u64 lastuse;
2048
2049        flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
2050                                      tc->ht_params);
2051        if (!flow)
2052                return -EINVAL;
2053
2054        if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
2055                return 0;
2056
2057        counter = mlx5_flow_rule_counter(flow->rule);
2058        if (!counter)
2059                return 0;
2060
2061        mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
2062
2063        tcf_exts_stats_update(f->exts, bytes, packets, lastuse);
2064
2065        return 0;
2066}
2067
2068static const struct rhashtable_params mlx5e_tc_flow_ht_params = {
2069        .head_offset = offsetof(struct mlx5e_tc_flow, node),
2070        .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
2071        .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
2072        .automatic_shrinking = true,
2073};
2074
2075int mlx5e_tc_init(struct mlx5e_priv *priv)
2076{
2077        struct mlx5e_tc_table *tc = &priv->fs.tc;
2078
2079        hash_init(tc->mod_hdr_tbl);
2080
2081        tc->ht_params = mlx5e_tc_flow_ht_params;
2082        return rhashtable_init(&tc->ht, &tc->ht_params);
2083}
2084
2085static void _mlx5e_tc_del_flow(void *ptr, void *arg)
2086{
2087        struct mlx5e_tc_flow *flow = ptr;
2088        struct mlx5e_priv *priv = arg;
2089
2090        mlx5e_tc_del_flow(priv, flow);
2091        kfree(flow);
2092}
2093
2094void mlx5e_tc_cleanup(struct mlx5e_priv *priv)
2095{
2096        struct mlx5e_tc_table *tc = &priv->fs.tc;
2097
2098        rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv);
2099
2100        if (!IS_ERR_OR_NULL(tc->t)) {
2101                mlx5_destroy_flow_table(tc->t);
2102                tc->t = NULL;
2103        }
2104}
2105