linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <net/netfilter/nf_conntrack.h>
   5#include <net/netfilter/nf_conntrack_core.h>
   6#include <net/netfilter/nf_conntrack_zones.h>
   7#include <net/netfilter/nf_conntrack_labels.h>
   8#include <net/netfilter/nf_conntrack_helper.h>
   9#include <net/netfilter/nf_conntrack_acct.h>
  10#include <uapi/linux/tc_act/tc_pedit.h>
  11#include <net/tc_act/tc_ct.h>
  12#include <net/flow_offload.h>
  13#include <net/netfilter/nf_flow_table.h>
  14#include <linux/workqueue.h>
  15#include <linux/xarray.h>
  16
  17#include "lib/fs_chains.h"
  18#include "en/tc_ct.h"
  19#include "en/mod_hdr.h"
  20#include "en/mapping.h"
  21#include "en.h"
  22#include "en_tc.h"
  23#include "en_rep.h"
  24
  25#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
  26#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
  27#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
  28#define MLX5_CT_STATE_TRK_BIT BIT(2)
  29#define MLX5_CT_STATE_NAT_BIT BIT(3)
  30
  31#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
  32#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
  33#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
  34
  35#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
  36#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
  37
  38#define ct_dbg(fmt, args...)\
  39        netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
  40
  41struct mlx5_tc_ct_priv {
  42        struct mlx5_core_dev *dev;
  43        const struct net_device *netdev;
  44        struct mod_hdr_tbl *mod_hdr_tbl;
  45        struct idr fte_ids;
  46        struct xarray tuple_ids;
  47        struct rhashtable zone_ht;
  48        struct rhashtable ct_tuples_ht;
  49        struct rhashtable ct_tuples_nat_ht;
  50        struct mlx5_flow_table *ct;
  51        struct mlx5_flow_table *ct_nat;
  52        struct mlx5_flow_table *post_ct;
  53        struct mutex control_lock; /* guards parallel adds/dels */
  54        struct mutex shared_counter_lock;
  55        struct mapping_ctx *zone_mapping;
  56        struct mapping_ctx *labels_mapping;
  57        enum mlx5_flow_namespace_type ns_type;
  58        struct mlx5_fs_chains *chains;
  59};
  60
  61struct mlx5_ct_flow {
  62        struct mlx5_flow_attr *pre_ct_attr;
  63        struct mlx5_flow_attr *post_ct_attr;
  64        struct mlx5_flow_handle *pre_ct_rule;
  65        struct mlx5_flow_handle *post_ct_rule;
  66        struct mlx5_ct_ft *ft;
  67        u32 fte_id;
  68        u32 chain_mapping;
  69};
  70
  71struct mlx5_ct_zone_rule {
  72        struct mlx5_flow_handle *rule;
  73        struct mlx5e_mod_hdr_handle *mh;
  74        struct mlx5_flow_attr *attr;
  75        bool nat;
  76};
  77
  78struct mlx5_tc_ct_pre {
  79        struct mlx5_flow_table *ft;
  80        struct mlx5_flow_group *flow_grp;
  81        struct mlx5_flow_group *miss_grp;
  82        struct mlx5_flow_handle *flow_rule;
  83        struct mlx5_flow_handle *miss_rule;
  84        struct mlx5_modify_hdr *modify_hdr;
  85};
  86
  87struct mlx5_ct_ft {
  88        struct rhash_head node;
  89        u16 zone;
  90        u32 zone_restore_id;
  91        refcount_t refcount;
  92        struct nf_flowtable *nf_ft;
  93        struct mlx5_tc_ct_priv *ct_priv;
  94        struct rhashtable ct_entries_ht;
  95        struct mlx5_tc_ct_pre pre_ct;
  96        struct mlx5_tc_ct_pre pre_ct_nat;
  97};
  98
  99struct mlx5_ct_tuple {
 100        u16 addr_type;
 101        __be16 n_proto;
 102        u8 ip_proto;
 103        struct {
 104                union {
 105                        __be32 src_v4;
 106                        struct in6_addr src_v6;
 107                };
 108                union {
 109                        __be32 dst_v4;
 110                        struct in6_addr dst_v6;
 111                };
 112        } ip;
 113        struct {
 114                __be16 src;
 115                __be16 dst;
 116        } port;
 117
 118        u16 zone;
 119};
 120
 121struct mlx5_ct_shared_counter {
 122        struct mlx5_fc *counter;
 123        refcount_t refcount;
 124};
 125
 126struct mlx5_ct_entry {
 127        struct rhash_head node;
 128        struct rhash_head tuple_node;
 129        struct rhash_head tuple_nat_node;
 130        struct mlx5_ct_shared_counter *shared_counter;
 131        unsigned long cookie;
 132        unsigned long restore_cookie;
 133        struct mlx5_ct_tuple tuple;
 134        struct mlx5_ct_tuple tuple_nat;
 135        struct mlx5_ct_zone_rule zone_rules[2];
 136};
 137
 138static const struct rhashtable_params cts_ht_params = {
 139        .head_offset = offsetof(struct mlx5_ct_entry, node),
 140        .key_offset = offsetof(struct mlx5_ct_entry, cookie),
 141        .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
 142        .automatic_shrinking = true,
 143        .min_size = 16 * 1024,
 144};
 145
 146static const struct rhashtable_params zone_params = {
 147        .head_offset = offsetof(struct mlx5_ct_ft, node),
 148        .key_offset = offsetof(struct mlx5_ct_ft, zone),
 149        .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
 150        .automatic_shrinking = true,
 151};
 152
 153static const struct rhashtable_params tuples_ht_params = {
 154        .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
 155        .key_offset = offsetof(struct mlx5_ct_entry, tuple),
 156        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
 157        .automatic_shrinking = true,
 158        .min_size = 16 * 1024,
 159};
 160
 161static const struct rhashtable_params tuples_nat_ht_params = {
 162        .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
 163        .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
 164        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
 165        .automatic_shrinking = true,
 166        .min_size = 16 * 1024,
 167};
 168
 169static int
 170mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
 171{
 172        struct flow_match_control control;
 173        struct flow_match_basic basic;
 174
 175        flow_rule_match_basic(rule, &basic);
 176        flow_rule_match_control(rule, &control);
 177
 178        tuple->n_proto = basic.key->n_proto;
 179        tuple->ip_proto = basic.key->ip_proto;
 180        tuple->addr_type = control.key->addr_type;
 181
 182        if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 183                struct flow_match_ipv4_addrs match;
 184
 185                flow_rule_match_ipv4_addrs(rule, &match);
 186                tuple->ip.src_v4 = match.key->src;
 187                tuple->ip.dst_v4 = match.key->dst;
 188        } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 189                struct flow_match_ipv6_addrs match;
 190
 191                flow_rule_match_ipv6_addrs(rule, &match);
 192                tuple->ip.src_v6 = match.key->src;
 193                tuple->ip.dst_v6 = match.key->dst;
 194        } else {
 195                return -EOPNOTSUPP;
 196        }
 197
 198        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 199                struct flow_match_ports match;
 200
 201                flow_rule_match_ports(rule, &match);
 202                switch (tuple->ip_proto) {
 203                case IPPROTO_TCP:
 204                case IPPROTO_UDP:
 205                        tuple->port.src = match.key->src;
 206                        tuple->port.dst = match.key->dst;
 207                        break;
 208                default:
 209                        return -EOPNOTSUPP;
 210                }
 211        } else {
 212                return -EOPNOTSUPP;
 213        }
 214
 215        return 0;
 216}
 217
 218static int
 219mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 220                             struct flow_rule *rule)
 221{
 222        struct flow_action *flow_action = &rule->action;
 223        struct flow_action_entry *act;
 224        u32 offset, val, ip6_offset;
 225        int i;
 226
 227        flow_action_for_each(i, act, flow_action) {
 228                if (act->id != FLOW_ACTION_MANGLE)
 229                        continue;
 230
 231                offset = act->mangle.offset;
 232                val = act->mangle.val;
 233                switch (act->mangle.htype) {
 234                case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 235                        if (offset == offsetof(struct iphdr, saddr))
 236                                tuple->ip.src_v4 = cpu_to_be32(val);
 237                        else if (offset == offsetof(struct iphdr, daddr))
 238                                tuple->ip.dst_v4 = cpu_to_be32(val);
 239                        else
 240                                return -EOPNOTSUPP;
 241                        break;
 242
 243                case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 244                        ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
 245                        ip6_offset /= 4;
 246                        if (ip6_offset < 4)
 247                                tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
 248                        else if (ip6_offset < 8)
 249                                tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
 250                        else
 251                                return -EOPNOTSUPP;
 252                        break;
 253
 254                case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 255                        if (offset == offsetof(struct tcphdr, source))
 256                                tuple->port.src = cpu_to_be16(val);
 257                        else if (offset == offsetof(struct tcphdr, dest))
 258                                tuple->port.dst = cpu_to_be16(val);
 259                        else
 260                                return -EOPNOTSUPP;
 261                        break;
 262
 263                case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 264                        if (offset == offsetof(struct udphdr, source))
 265                                tuple->port.src = cpu_to_be16(val);
 266                        else if (offset == offsetof(struct udphdr, dest))
 267                                tuple->port.dst = cpu_to_be16(val);
 268                        else
 269                                return -EOPNOTSUPP;
 270                        break;
 271
 272                default:
 273                        return -EOPNOTSUPP;
 274                }
 275        }
 276
 277        return 0;
 278}
 279
 280static int
 281mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 282                           struct flow_rule *rule)
 283{
 284        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 285                                       outer_headers);
 286        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 287                                       outer_headers);
 288        u16 addr_type = 0;
 289        u8 ip_proto = 0;
 290
 291        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 292                struct flow_match_basic match;
 293
 294                flow_rule_match_basic(rule, &match);
 295
 296                mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
 297                                       headers_v);
 298                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 299                         match.mask->ip_proto);
 300                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 301                         match.key->ip_proto);
 302
 303                ip_proto = match.key->ip_proto;
 304        }
 305
 306        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 307                struct flow_match_control match;
 308
 309                flow_rule_match_control(rule, &match);
 310                addr_type = match.key->addr_type;
 311        }
 312
 313        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 314                struct flow_match_ipv4_addrs match;
 315
 316                flow_rule_match_ipv4_addrs(rule, &match);
 317                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 318                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 319                       &match.mask->src, sizeof(match.mask->src));
 320                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 321                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 322                       &match.key->src, sizeof(match.key->src));
 323                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 324                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 325                       &match.mask->dst, sizeof(match.mask->dst));
 326                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 327                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 328                       &match.key->dst, sizeof(match.key->dst));
 329        }
 330
 331        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 332                struct flow_match_ipv6_addrs match;
 333
 334                flow_rule_match_ipv6_addrs(rule, &match);
 335                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 336                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 337                       &match.mask->src, sizeof(match.mask->src));
 338                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 339                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 340                       &match.key->src, sizeof(match.key->src));
 341
 342                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 343                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 344                       &match.mask->dst, sizeof(match.mask->dst));
 345                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 346                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 347                       &match.key->dst, sizeof(match.key->dst));
 348        }
 349
 350        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 351                struct flow_match_ports match;
 352
 353                flow_rule_match_ports(rule, &match);
 354                switch (ip_proto) {
 355                case IPPROTO_TCP:
 356                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 357                                 tcp_sport, ntohs(match.mask->src));
 358                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 359                                 tcp_sport, ntohs(match.key->src));
 360
 361                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 362                                 tcp_dport, ntohs(match.mask->dst));
 363                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 364                                 tcp_dport, ntohs(match.key->dst));
 365                        break;
 366
 367                case IPPROTO_UDP:
 368                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 369                                 udp_sport, ntohs(match.mask->src));
 370                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 371                                 udp_sport, ntohs(match.key->src));
 372
 373                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 374                                 udp_dport, ntohs(match.mask->dst));
 375                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 376                                 udp_dport, ntohs(match.key->dst));
 377                        break;
 378                default:
 379                        break;
 380                }
 381        }
 382
 383        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
 384                struct flow_match_tcp match;
 385
 386                flow_rule_match_tcp(rule, &match);
 387                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 388                         ntohs(match.mask->flags));
 389                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
 390                         ntohs(match.key->flags));
 391        }
 392
 393        return 0;
 394}
 395
 396static void
 397mlx5_tc_ct_shared_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
 398{
 399        if (!refcount_dec_and_test(&entry->shared_counter->refcount))
 400                return;
 401
 402        mlx5_fc_destroy(ct_priv->dev, entry->shared_counter->counter);
 403        kfree(entry->shared_counter);
 404}
 405
 406static void
 407mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 408                          struct mlx5_ct_entry *entry,
 409                          bool nat)
 410{
 411        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 412        struct mlx5_flow_attr *attr = zone_rule->attr;
 413
 414        ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 415
 416        mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
 417        mlx5e_mod_hdr_detach(ct_priv->dev,
 418                             ct_priv->mod_hdr_tbl, zone_rule->mh);
 419        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 420        kfree(attr);
 421}
 422
 423static void
 424mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 425                           struct mlx5_ct_entry *entry)
 426{
 427        mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 428        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 429}
 430
 431static struct flow_action_entry *
 432mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
 433{
 434        struct flow_action *flow_action = &flow_rule->action;
 435        struct flow_action_entry *act;
 436        int i;
 437
 438        flow_action_for_each(i, act, flow_action) {
 439                if (act->id == FLOW_ACTION_CT_METADATA)
 440                        return act;
 441        }
 442
 443        return NULL;
 444}
 445
 446static int
 447mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
 448                               struct mlx5e_tc_mod_hdr_acts *mod_acts,
 449                               u8 ct_state,
 450                               u32 mark,
 451                               u32 labels_id,
 452                               u8 zone_restore_id)
 453{
 454        enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
 455        struct mlx5_core_dev *dev = ct_priv->dev;
 456        int err;
 457
 458        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 459                                        CTSTATE_TO_REG, ct_state);
 460        if (err)
 461                return err;
 462
 463        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 464                                        MARK_TO_REG, mark);
 465        if (err)
 466                return err;
 467
 468        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 469                                        LABELS_TO_REG, labels_id);
 470        if (err)
 471                return err;
 472
 473        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 474                                        ZONE_RESTORE_TO_REG, zone_restore_id);
 475        if (err)
 476                return err;
 477
 478        /* Make another copy of zone id in reg_b for
 479         * NIC rx flows since we don't copy reg_c1 to
 480         * reg_b upon miss.
 481         */
 482        if (ns != MLX5_FLOW_NAMESPACE_FDB) {
 483                err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 484                                                NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
 485                if (err)
 486                        return err;
 487        }
 488        return 0;
 489}
 490
 491static int
 492mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
 493                                   char *modact)
 494{
 495        u32 offset = act->mangle.offset, field;
 496
 497        switch (act->mangle.htype) {
 498        case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 499                MLX5_SET(set_action_in, modact, length, 0);
 500                if (offset == offsetof(struct iphdr, saddr))
 501                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
 502                else if (offset == offsetof(struct iphdr, daddr))
 503                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
 504                else
 505                        return -EOPNOTSUPP;
 506                break;
 507
 508        case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 509                MLX5_SET(set_action_in, modact, length, 0);
 510                if (offset == offsetof(struct ipv6hdr, saddr) + 12)
 511                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
 512                else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
 513                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
 514                else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
 515                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
 516                else if (offset == offsetof(struct ipv6hdr, saddr))
 517                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
 518                else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
 519                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
 520                else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
 521                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
 522                else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
 523                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
 524                else if (offset == offsetof(struct ipv6hdr, daddr))
 525                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
 526                else
 527                        return -EOPNOTSUPP;
 528                break;
 529
 530        case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 531                MLX5_SET(set_action_in, modact, length, 16);
 532                if (offset == offsetof(struct tcphdr, source))
 533                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
 534                else if (offset == offsetof(struct tcphdr, dest))
 535                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
 536                else
 537                        return -EOPNOTSUPP;
 538                break;
 539
 540        case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 541                MLX5_SET(set_action_in, modact, length, 16);
 542                if (offset == offsetof(struct udphdr, source))
 543                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
 544                else if (offset == offsetof(struct udphdr, dest))
 545                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
 546                else
 547                        return -EOPNOTSUPP;
 548                break;
 549
 550        default:
 551                return -EOPNOTSUPP;
 552        }
 553
 554        MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 555        MLX5_SET(set_action_in, modact, offset, 0);
 556        MLX5_SET(set_action_in, modact, field, field);
 557        MLX5_SET(set_action_in, modact, data, act->mangle.val);
 558
 559        return 0;
 560}
 561
 562static int
 563mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 564                            struct flow_rule *flow_rule,
 565                            struct mlx5e_tc_mod_hdr_acts *mod_acts)
 566{
 567        struct flow_action *flow_action = &flow_rule->action;
 568        struct mlx5_core_dev *mdev = ct_priv->dev;
 569        struct flow_action_entry *act;
 570        size_t action_size;
 571        char *modact;
 572        int err, i;
 573
 574        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 575
 576        flow_action_for_each(i, act, flow_action) {
 577                switch (act->id) {
 578                case FLOW_ACTION_MANGLE: {
 579                        err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
 580                                                    mod_acts);
 581                        if (err)
 582                                return err;
 583
 584                        modact = mod_acts->actions +
 585                                 mod_acts->num_actions * action_size;
 586
 587                        err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
 588                        if (err)
 589                                return err;
 590
 591                        mod_acts->num_actions++;
 592                }
 593                break;
 594
 595                case FLOW_ACTION_CT_METADATA:
 596                        /* Handled earlier */
 597                        continue;
 598                default:
 599                        return -EOPNOTSUPP;
 600                }
 601        }
 602
 603        return 0;
 604}
 605
 606static int
 607mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 608                                struct mlx5_flow_attr *attr,
 609                                struct flow_rule *flow_rule,
 610                                struct mlx5e_mod_hdr_handle **mh,
 611                                u8 zone_restore_id, bool nat)
 612{
 613        struct mlx5e_tc_mod_hdr_acts mod_acts = {};
 614        struct flow_action_entry *meta;
 615        u16 ct_state = 0;
 616        int err;
 617
 618        meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 619        if (!meta)
 620                return -EOPNOTSUPP;
 621
 622        err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
 623                          &attr->ct_attr.ct_labels_id);
 624        if (err)
 625                return -EOPNOTSUPP;
 626        if (nat) {
 627                err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
 628                                                  &mod_acts);
 629                if (err)
 630                        goto err_mapping;
 631
 632                ct_state |= MLX5_CT_STATE_NAT_BIT;
 633        }
 634
 635        ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
 636        err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
 637                                             ct_state,
 638                                             meta->ct_metadata.mark,
 639                                             attr->ct_attr.ct_labels_id,
 640                                             zone_restore_id);
 641        if (err)
 642                goto err_mapping;
 643
 644        *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
 645                                   ct_priv->mod_hdr_tbl,
 646                                   ct_priv->ns_type,
 647                                   &mod_acts);
 648        if (IS_ERR(*mh)) {
 649                err = PTR_ERR(*mh);
 650                goto err_mapping;
 651        }
 652        attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 653
 654        dealloc_mod_hdr_actions(&mod_acts);
 655        return 0;
 656
 657err_mapping:
 658        dealloc_mod_hdr_actions(&mod_acts);
 659        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 660        return err;
 661}
 662
 663static int
 664mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 665                          struct flow_rule *flow_rule,
 666                          struct mlx5_ct_entry *entry,
 667                          bool nat, u8 zone_restore_id)
 668{
 669        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 670        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 671        struct mlx5_flow_spec *spec = NULL;
 672        struct mlx5_flow_attr *attr;
 673        int err;
 674
 675        zone_rule->nat = nat;
 676
 677        spec = kzalloc(sizeof(*spec), GFP_KERNEL);
 678        if (!spec)
 679                return -ENOMEM;
 680
 681        attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
 682        if (!attr) {
 683                err = -ENOMEM;
 684                goto err_attr;
 685        }
 686
 687        err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
 688                                              &zone_rule->mh,
 689                                              zone_restore_id, nat);
 690        if (err) {
 691                ct_dbg("Failed to create ct entry mod hdr");
 692                goto err_mod_hdr;
 693        }
 694
 695        attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 696                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 697                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
 698        attr->dest_chain = 0;
 699        attr->dest_ft = ct_priv->post_ct;
 700        attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
 701        attr->outer_match_level = MLX5_MATCH_L4;
 702        attr->counter = entry->shared_counter->counter;
 703        attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 704
 705        mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
 706        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
 707                                    entry->tuple.zone & MLX5_CT_ZONE_MASK,
 708                                    MLX5_CT_ZONE_MASK);
 709
 710        zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
 711        if (IS_ERR(zone_rule->rule)) {
 712                err = PTR_ERR(zone_rule->rule);
 713                ct_dbg("Failed to add ct entry rule, nat: %d", nat);
 714                goto err_rule;
 715        }
 716
 717        zone_rule->attr = attr;
 718
 719        kfree(spec);
 720        ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 721
 722        return 0;
 723
 724err_rule:
 725        mlx5e_mod_hdr_detach(ct_priv->dev,
 726                             ct_priv->mod_hdr_tbl, zone_rule->mh);
 727        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 728err_mod_hdr:
 729        kfree(attr);
 730err_attr:
 731        kfree(spec);
 732        return err;
 733}
 734
 735static struct mlx5_ct_shared_counter *
 736mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 737                              struct mlx5_ct_entry *entry)
 738{
 739        struct mlx5_ct_tuple rev_tuple = entry->tuple;
 740        struct mlx5_ct_shared_counter *shared_counter;
 741        struct mlx5_core_dev *dev = ct_priv->dev;
 742        struct mlx5_ct_entry *rev_entry;
 743        __be16 tmp_port;
 744        int ret;
 745
 746        /* get the reversed tuple */
 747        tmp_port = rev_tuple.port.src;
 748        rev_tuple.port.src = rev_tuple.port.dst;
 749        rev_tuple.port.dst = tmp_port;
 750
 751        if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 752                __be32 tmp_addr = rev_tuple.ip.src_v4;
 753
 754                rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
 755                rev_tuple.ip.dst_v4 = tmp_addr;
 756        } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 757                struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
 758
 759                rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
 760                rev_tuple.ip.dst_v6 = tmp_addr;
 761        } else {
 762                return ERR_PTR(-EOPNOTSUPP);
 763        }
 764
 765        /* Use the same counter as the reverse direction */
 766        mutex_lock(&ct_priv->shared_counter_lock);
 767        rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
 768                                           tuples_ht_params);
 769        if (rev_entry) {
 770                if (refcount_inc_not_zero(&rev_entry->shared_counter->refcount)) {
 771                        mutex_unlock(&ct_priv->shared_counter_lock);
 772                        return rev_entry->shared_counter;
 773                }
 774        }
 775        mutex_unlock(&ct_priv->shared_counter_lock);
 776
 777        shared_counter = kzalloc(sizeof(*shared_counter), GFP_KERNEL);
 778        if (!shared_counter)
 779                return ERR_PTR(-ENOMEM);
 780
 781        shared_counter->counter = mlx5_fc_create(dev, true);
 782        if (IS_ERR(shared_counter->counter)) {
 783                ct_dbg("Failed to create counter for ct entry");
 784                ret = PTR_ERR(shared_counter->counter);
 785                kfree(shared_counter);
 786                return ERR_PTR(ret);
 787        }
 788
 789        refcount_set(&shared_counter->refcount, 1);
 790        return shared_counter;
 791}
 792
 793static int
 794mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 795                           struct flow_rule *flow_rule,
 796                           struct mlx5_ct_entry *entry,
 797                           u8 zone_restore_id)
 798{
 799        int err;
 800
 801        entry->shared_counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
 802        if (IS_ERR(entry->shared_counter)) {
 803                err = PTR_ERR(entry->shared_counter);
 804                ct_dbg("Failed to create counter for ct entry");
 805                return err;
 806        }
 807
 808        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
 809                                        zone_restore_id);
 810        if (err)
 811                goto err_orig;
 812
 813        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
 814                                        zone_restore_id);
 815        if (err)
 816                goto err_nat;
 817
 818        return 0;
 819
 820err_nat:
 821        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 822err_orig:
 823        mlx5_tc_ct_shared_counter_put(ct_priv, entry);
 824        return err;
 825}
 826
 827static int
 828mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 829                                  struct flow_cls_offload *flow)
 830{
 831        struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
 832        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
 833        struct flow_action_entry *meta_action;
 834        unsigned long cookie = flow->cookie;
 835        struct mlx5_ct_entry *entry;
 836        int err;
 837
 838        meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 839        if (!meta_action)
 840                return -EOPNOTSUPP;
 841
 842        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 843                                       cts_ht_params);
 844        if (entry)
 845                return 0;
 846
 847        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 848        if (!entry)
 849                return -ENOMEM;
 850
 851        entry->tuple.zone = ft->zone;
 852        entry->cookie = flow->cookie;
 853        entry->restore_cookie = meta_action->ct_metadata.cookie;
 854
 855        err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
 856        if (err)
 857                goto err_set;
 858
 859        memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
 860        err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
 861        if (err)
 862                goto err_set;
 863
 864        err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
 865                                     &entry->tuple_node,
 866                                     tuples_ht_params);
 867        if (err)
 868                goto err_tuple;
 869
 870        if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
 871                err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
 872                                             &entry->tuple_nat_node,
 873                                             tuples_nat_ht_params);
 874                if (err)
 875                        goto err_tuple_nat;
 876        }
 877
 878        err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
 879                                         ft->zone_restore_id);
 880        if (err)
 881                goto err_rules;
 882
 883        err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
 884                                     cts_ht_params);
 885        if (err)
 886                goto err_insert;
 887
 888        return 0;
 889
 890err_insert:
 891        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 892err_rules:
 893        rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 894                               &entry->tuple_nat_node, tuples_nat_ht_params);
 895err_tuple_nat:
 896        if (entry->tuple_node.next)
 897                rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
 898                                       &entry->tuple_node,
 899                                       tuples_ht_params);
 900err_tuple:
 901err_set:
 902        kfree(entry);
 903        netdev_warn(ct_priv->netdev,
 904                    "Failed to offload ct entry, err: %d\n", err);
 905        return err;
 906}
 907
 908static void
 909mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
 910                        struct mlx5_ct_entry *entry)
 911{
 912        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 913        mutex_lock(&ct_priv->shared_counter_lock);
 914        if (entry->tuple_node.next)
 915                rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 916                                       &entry->tuple_nat_node,
 917                                       tuples_nat_ht_params);
 918        rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 919                               tuples_ht_params);
 920        mutex_unlock(&ct_priv->shared_counter_lock);
 921        mlx5_tc_ct_shared_counter_put(ct_priv, entry);
 922
 923}
 924
 925static int
 926mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
 927                                  struct flow_cls_offload *flow)
 928{
 929        unsigned long cookie = flow->cookie;
 930        struct mlx5_ct_entry *entry;
 931
 932        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 933                                       cts_ht_params);
 934        if (!entry)
 935                return -ENOENT;
 936
 937        mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
 938        WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
 939                                       &entry->node,
 940                                       cts_ht_params));
 941        kfree(entry);
 942
 943        return 0;
 944}
 945
 946static int
 947mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
 948                                    struct flow_cls_offload *f)
 949{
 950        unsigned long cookie = f->cookie;
 951        struct mlx5_ct_entry *entry;
 952        u64 lastuse, packets, bytes;
 953
 954        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 955                                       cts_ht_params);
 956        if (!entry)
 957                return -ENOENT;
 958
 959        mlx5_fc_query_cached(entry->shared_counter->counter, &bytes, &packets, &lastuse);
 960        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
 961                          FLOW_ACTION_HW_STATS_DELAYED);
 962
 963        return 0;
 964}
 965
 966static int
 967mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
 968                              void *cb_priv)
 969{
 970        struct flow_cls_offload *f = type_data;
 971        struct mlx5_ct_ft *ft = cb_priv;
 972
 973        if (type != TC_SETUP_CLSFLOWER)
 974                return -EOPNOTSUPP;
 975
 976        switch (f->command) {
 977        case FLOW_CLS_REPLACE:
 978                return mlx5_tc_ct_block_flow_offload_add(ft, f);
 979        case FLOW_CLS_DESTROY:
 980                return mlx5_tc_ct_block_flow_offload_del(ft, f);
 981        case FLOW_CLS_STATS:
 982                return mlx5_tc_ct_block_flow_offload_stats(ft, f);
 983        default:
 984                break;
 985        }
 986
 987        return -EOPNOTSUPP;
 988}
 989
 990static bool
 991mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
 992                        u16 zone)
 993{
 994        struct flow_keys flow_keys;
 995
 996        skb_reset_network_header(skb);
 997        skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
 998
 999        tuple->zone = zone;
1000
1001        if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1002            flow_keys.basic.ip_proto != IPPROTO_UDP)
1003                return false;
1004
1005        tuple->port.src = flow_keys.ports.src;
1006        tuple->port.dst = flow_keys.ports.dst;
1007        tuple->n_proto = flow_keys.basic.n_proto;
1008        tuple->ip_proto = flow_keys.basic.ip_proto;
1009
1010        switch (flow_keys.basic.n_proto) {
1011        case htons(ETH_P_IP):
1012                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1013                tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1014                tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1015                break;
1016
1017        case htons(ETH_P_IPV6):
1018                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1019                tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1020                tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1021                break;
1022        default:
1023                goto out;
1024        }
1025
1026        return true;
1027
1028out:
1029        return false;
1030}
1031
1032int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1033{
1034        u32 ctstate = 0, ctstate_mask = 0;
1035
1036        mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1037                                        &ctstate, &ctstate_mask);
1038        if (ctstate_mask)
1039                return -EOPNOTSUPP;
1040
1041        ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1042        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1043                                    ctstate, ctstate_mask);
1044
1045        return 0;
1046}
1047
1048void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1049{
1050        if (!priv || !ct_attr->ct_labels_id)
1051                return;
1052
1053        mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1054}
1055
1056int
1057mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1058                     struct mlx5_flow_spec *spec,
1059                     struct flow_cls_offload *f,
1060                     struct mlx5_ct_attr *ct_attr,
1061                     struct netlink_ext_ack *extack)
1062{
1063        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1064        struct flow_dissector_key_ct *mask, *key;
1065        bool trk, est, untrk, unest, new;
1066        u32 ctstate = 0, ctstate_mask = 0;
1067        u16 ct_state_on, ct_state_off;
1068        u16 ct_state, ct_state_mask;
1069        struct flow_match_ct match;
1070        u32 ct_labels[4];
1071
1072        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1073                return 0;
1074
1075        if (!priv) {
1076                NL_SET_ERR_MSG_MOD(extack,
1077                                   "offload of ct matching isn't available");
1078                return -EOPNOTSUPP;
1079        }
1080
1081        flow_rule_match_ct(rule, &match);
1082
1083        key = match.key;
1084        mask = match.mask;
1085
1086        ct_state = key->ct_state;
1087        ct_state_mask = mask->ct_state;
1088
1089        if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1090                              TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1091                              TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1092                NL_SET_ERR_MSG_MOD(extack,
1093                                   "only ct_state trk, est and new are supported for offload");
1094                return -EOPNOTSUPP;
1095        }
1096
1097        ct_state_on = ct_state & ct_state_mask;
1098        ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1099        trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1100        new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1101        est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1102        untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1103        unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1104
1105        ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1106        ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1107        ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1108        ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1109
1110        if (new) {
1111                NL_SET_ERR_MSG_MOD(extack,
1112                                   "matching on ct_state +new isn't supported");
1113                return -EOPNOTSUPP;
1114        }
1115
1116        if (mask->ct_zone)
1117                mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1118                                            key->ct_zone, MLX5_CT_ZONE_MASK);
1119        if (ctstate_mask)
1120                mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1121                                            ctstate, ctstate_mask);
1122        if (mask->ct_mark)
1123                mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1124                                            key->ct_mark, mask->ct_mark);
1125        if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1126            mask->ct_labels[3]) {
1127                ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1128                ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1129                ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1130                ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1131                if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1132                        return -EOPNOTSUPP;
1133                mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1134                                            MLX5_CT_LABELS_MASK);
1135        }
1136
1137        return 0;
1138}
1139
1140int
1141mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1142                        struct mlx5_flow_attr *attr,
1143                        const struct flow_action_entry *act,
1144                        struct netlink_ext_ack *extack)
1145{
1146        if (!priv) {
1147                NL_SET_ERR_MSG_MOD(extack,
1148                                   "offload of ct action isn't available");
1149                return -EOPNOTSUPP;
1150        }
1151
1152        attr->ct_attr.zone = act->ct.zone;
1153        attr->ct_attr.ct_action = act->ct.action;
1154        attr->ct_attr.nf_ft = act->ct.flow_table;
1155
1156        return 0;
1157}
1158
1159static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1160                                  struct mlx5_tc_ct_pre *pre_ct,
1161                                  bool nat)
1162{
1163        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1164        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1165        struct mlx5_core_dev *dev = ct_priv->dev;
1166        struct mlx5_flow_table *ft = pre_ct->ft;
1167        struct mlx5_flow_destination dest = {};
1168        struct mlx5_flow_act flow_act = {};
1169        struct mlx5_modify_hdr *mod_hdr;
1170        struct mlx5_flow_handle *rule;
1171        struct mlx5_flow_spec *spec;
1172        u32 ctstate;
1173        u16 zone;
1174        int err;
1175
1176        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1177        if (!spec)
1178                return -ENOMEM;
1179
1180        zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1181        err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1182                                        ZONE_TO_REG, zone);
1183        if (err) {
1184                ct_dbg("Failed to set zone register mapping");
1185                goto err_mapping;
1186        }
1187
1188        mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1189                                           pre_mod_acts.num_actions,
1190                                           pre_mod_acts.actions);
1191
1192        if (IS_ERR(mod_hdr)) {
1193                err = PTR_ERR(mod_hdr);
1194                ct_dbg("Failed to create pre ct mod hdr");
1195                goto err_mapping;
1196        }
1197        pre_ct->modify_hdr = mod_hdr;
1198
1199        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1200                          MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1201        flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1202        flow_act.modify_hdr = mod_hdr;
1203        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1204
1205        /* add flow rule */
1206        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1207                                    zone, MLX5_CT_ZONE_MASK);
1208        ctstate = MLX5_CT_STATE_TRK_BIT;
1209        if (nat)
1210                ctstate |= MLX5_CT_STATE_NAT_BIT;
1211        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1212
1213        dest.ft = ct_priv->post_ct;
1214        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1215        if (IS_ERR(rule)) {
1216                err = PTR_ERR(rule);
1217                ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1218                goto err_flow_rule;
1219        }
1220        pre_ct->flow_rule = rule;
1221
1222        /* add miss rule */
1223        memset(spec, 0, sizeof(*spec));
1224        dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1225        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1226        if (IS_ERR(rule)) {
1227                err = PTR_ERR(rule);
1228                ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1229                goto err_miss_rule;
1230        }
1231        pre_ct->miss_rule = rule;
1232
1233        dealloc_mod_hdr_actions(&pre_mod_acts);
1234        kvfree(spec);
1235        return 0;
1236
1237err_miss_rule:
1238        mlx5_del_flow_rules(pre_ct->flow_rule);
1239err_flow_rule:
1240        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1241err_mapping:
1242        dealloc_mod_hdr_actions(&pre_mod_acts);
1243        kvfree(spec);
1244        return err;
1245}
1246
1247static void
1248tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1249                       struct mlx5_tc_ct_pre *pre_ct)
1250{
1251        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1252        struct mlx5_core_dev *dev = ct_priv->dev;
1253
1254        mlx5_del_flow_rules(pre_ct->flow_rule);
1255        mlx5_del_flow_rules(pre_ct->miss_rule);
1256        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1257}
1258
1259static int
1260mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1261                        struct mlx5_tc_ct_pre *pre_ct,
1262                        bool nat)
1263{
1264        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1265        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1266        struct mlx5_core_dev *dev = ct_priv->dev;
1267        struct mlx5_flow_table_attr ft_attr = {};
1268        struct mlx5_flow_namespace *ns;
1269        struct mlx5_flow_table *ft;
1270        struct mlx5_flow_group *g;
1271        u32 metadata_reg_c_2_mask;
1272        u32 *flow_group_in;
1273        void *misc;
1274        int err;
1275
1276        ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1277        if (!ns) {
1278                err = -EOPNOTSUPP;
1279                ct_dbg("Failed to get flow namespace");
1280                return err;
1281        }
1282
1283        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1284        if (!flow_group_in)
1285                return -ENOMEM;
1286
1287        ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1288        ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1289                        FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1290        ft_attr.max_fte = 2;
1291        ft_attr.level = 1;
1292        ft = mlx5_create_flow_table(ns, &ft_attr);
1293        if (IS_ERR(ft)) {
1294                err = PTR_ERR(ft);
1295                ct_dbg("Failed to create pre ct table");
1296                goto out_free;
1297        }
1298        pre_ct->ft = ft;
1299
1300        /* create flow group */
1301        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1302        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1303        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1304                 MLX5_MATCH_MISC_PARAMETERS_2);
1305
1306        misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1307                            match_criteria.misc_parameters_2);
1308
1309        metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1310        metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1311        if (nat)
1312                metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1313
1314        MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1315                 metadata_reg_c_2_mask);
1316
1317        g = mlx5_create_flow_group(ft, flow_group_in);
1318        if (IS_ERR(g)) {
1319                err = PTR_ERR(g);
1320                ct_dbg("Failed to create pre ct group");
1321                goto err_flow_grp;
1322        }
1323        pre_ct->flow_grp = g;
1324
1325        /* create miss group */
1326        memset(flow_group_in, 0, inlen);
1327        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1328        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1329        g = mlx5_create_flow_group(ft, flow_group_in);
1330        if (IS_ERR(g)) {
1331                err = PTR_ERR(g);
1332                ct_dbg("Failed to create pre ct miss group");
1333                goto err_miss_grp;
1334        }
1335        pre_ct->miss_grp = g;
1336
1337        err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1338        if (err)
1339                goto err_add_rules;
1340
1341        kvfree(flow_group_in);
1342        return 0;
1343
1344err_add_rules:
1345        mlx5_destroy_flow_group(pre_ct->miss_grp);
1346err_miss_grp:
1347        mlx5_destroy_flow_group(pre_ct->flow_grp);
1348err_flow_grp:
1349        mlx5_destroy_flow_table(ft);
1350out_free:
1351        kvfree(flow_group_in);
1352        return err;
1353}
1354
1355static void
1356mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1357                       struct mlx5_tc_ct_pre *pre_ct)
1358{
1359        tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1360        mlx5_destroy_flow_group(pre_ct->miss_grp);
1361        mlx5_destroy_flow_group(pre_ct->flow_grp);
1362        mlx5_destroy_flow_table(pre_ct->ft);
1363}
1364
1365static int
1366mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1367{
1368        int err;
1369
1370        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1371        if (err)
1372                return err;
1373
1374        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1375        if (err)
1376                goto err_pre_ct_nat;
1377
1378        return 0;
1379
1380err_pre_ct_nat:
1381        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1382        return err;
1383}
1384
1385static void
1386mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1387{
1388        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1389        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1390}
1391
1392static struct mlx5_ct_ft *
1393mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1394                     struct nf_flowtable *nf_ft)
1395{
1396        struct mlx5_ct_ft *ft;
1397        int err;
1398
1399        ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1400        if (ft) {
1401                refcount_inc(&ft->refcount);
1402                return ft;
1403        }
1404
1405        ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1406        if (!ft)
1407                return ERR_PTR(-ENOMEM);
1408
1409        err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1410        if (err)
1411                goto err_mapping;
1412
1413        ft->zone = zone;
1414        ft->nf_ft = nf_ft;
1415        ft->ct_priv = ct_priv;
1416        refcount_set(&ft->refcount, 1);
1417
1418        err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1419        if (err)
1420                goto err_alloc_pre_ct;
1421
1422        err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1423        if (err)
1424                goto err_init;
1425
1426        err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1427                                     zone_params);
1428        if (err)
1429                goto err_insert;
1430
1431        err = nf_flow_table_offload_add_cb(ft->nf_ft,
1432                                           mlx5_tc_ct_block_flow_offload, ft);
1433        if (err)
1434                goto err_add_cb;
1435
1436        return ft;
1437
1438err_add_cb:
1439        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1440err_insert:
1441        rhashtable_destroy(&ft->ct_entries_ht);
1442err_init:
1443        mlx5_tc_ct_free_pre_ct_tables(ft);
1444err_alloc_pre_ct:
1445        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1446err_mapping:
1447        kfree(ft);
1448        return ERR_PTR(err);
1449}
1450
1451static void
1452mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1453{
1454        struct mlx5_tc_ct_priv *ct_priv = arg;
1455        struct mlx5_ct_entry *entry = ptr;
1456
1457        mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1458        kfree(entry);
1459}
1460
1461static void
1462mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1463{
1464        if (!refcount_dec_and_test(&ft->refcount))
1465                return;
1466
1467        nf_flow_table_offload_del_cb(ft->nf_ft,
1468                                     mlx5_tc_ct_block_flow_offload, ft);
1469        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1470        rhashtable_free_and_destroy(&ft->ct_entries_ht,
1471                                    mlx5_tc_ct_flush_ft_entry,
1472                                    ct_priv);
1473        mlx5_tc_ct_free_pre_ct_tables(ft);
1474        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1475        kfree(ft);
1476}
1477
1478/* We translate the tc filter with CT action to the following HW model:
1479 *
1480 * +---------------------+
1481 * + ft prio (tc chain) +
1482 * + original match      +
1483 * +---------------------+
1484 *      | set chain miss mapping
1485 *      | set fte_id
1486 *      | set tunnel_id
1487 *      | do decap
1488 *      v
1489 * +---------------------+
1490 * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1491 * + zone+nat match      +---------------->+ post_ct (see below) +
1492 * +---------------------+  set zone       +---------------------+
1493 *      | set zone
1494 *      v
1495 * +--------------------+
1496 * + CT (nat or no nat) +
1497 * + tuple + zone match +
1498 * +--------------------+
1499 *      | set mark
1500 *      | set labels_id
1501 *      | set established
1502 *      | set zone_restore
1503 *      | do nat (if needed)
1504 *      v
1505 * +--------------+
1506 * + post_ct      + original filter actions
1507 * + fte_id match +------------------------>
1508 * +--------------+
1509 */
1510static struct mlx5_flow_handle *
1511__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1512                          struct mlx5e_tc_flow *flow,
1513                          struct mlx5_flow_spec *orig_spec,
1514                          struct mlx5_flow_attr *attr)
1515{
1516        bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1517        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1518        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1519        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1520        struct mlx5_flow_spec *post_ct_spec = NULL;
1521        struct mlx5_flow_attr *pre_ct_attr;
1522        struct mlx5_modify_hdr *mod_hdr;
1523        struct mlx5_flow_handle *rule;
1524        struct mlx5_ct_flow *ct_flow;
1525        int chain_mapping = 0, err;
1526        struct mlx5_ct_ft *ft;
1527        u32 fte_id = 1;
1528
1529        post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1530        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1531        if (!post_ct_spec || !ct_flow) {
1532                kfree(post_ct_spec);
1533                kfree(ct_flow);
1534                return ERR_PTR(-ENOMEM);
1535        }
1536
1537        /* Register for CT established events */
1538        ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1539                                  attr->ct_attr.nf_ft);
1540        if (IS_ERR(ft)) {
1541                err = PTR_ERR(ft);
1542                ct_dbg("Failed to register to ft callback");
1543                goto err_ft;
1544        }
1545        ct_flow->ft = ft;
1546
1547        err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1548                            MLX5_FTE_ID_MAX, GFP_KERNEL);
1549        if (err) {
1550                netdev_warn(priv->netdev,
1551                            "Failed to allocate fte id, err: %d\n", err);
1552                goto err_idr;
1553        }
1554        ct_flow->fte_id = fte_id;
1555
1556        /* Base flow attributes of both rules on original rule attribute */
1557        ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1558        if (!ct_flow->pre_ct_attr) {
1559                err = -ENOMEM;
1560                goto err_alloc_pre;
1561        }
1562
1563        ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1564        if (!ct_flow->post_ct_attr) {
1565                err = -ENOMEM;
1566                goto err_alloc_post;
1567        }
1568
1569        pre_ct_attr = ct_flow->pre_ct_attr;
1570        memcpy(pre_ct_attr, attr, attr_sz);
1571        memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1572
1573        /* Modify the original rule's action to fwd and modify, leave decap */
1574        pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1575        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1576                               MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1577
1578        /* Write chain miss tag for miss in ct table as we
1579         * don't go though all prios of this chain as normal tc rules
1580         * miss.
1581         */
1582        err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1583                                            &chain_mapping);
1584        if (err) {
1585                ct_dbg("Failed to get chain register mapping for chain");
1586                goto err_get_chain;
1587        }
1588        ct_flow->chain_mapping = chain_mapping;
1589
1590        err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1591                                        CHAIN_TO_REG, chain_mapping);
1592        if (err) {
1593                ct_dbg("Failed to set chain register mapping");
1594                goto err_mapping;
1595        }
1596
1597        err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1598                                        FTEID_TO_REG, fte_id);
1599        if (err) {
1600                ct_dbg("Failed to set fte_id register mapping");
1601                goto err_mapping;
1602        }
1603
1604        /* If original flow is decap, we do it before going into ct table
1605         * so add a rewrite for the tunnel match_id.
1606         */
1607        if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1608            attr->chain == 0) {
1609                u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1610
1611                err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1612                                                ct_priv->ns_type,
1613                                                TUNNEL_TO_REG,
1614                                                tun_id);
1615                if (err) {
1616                        ct_dbg("Failed to set tunnel register mapping");
1617                        goto err_mapping;
1618                }
1619        }
1620
1621        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1622                                           pre_mod_acts.num_actions,
1623                                           pre_mod_acts.actions);
1624        if (IS_ERR(mod_hdr)) {
1625                err = PTR_ERR(mod_hdr);
1626                ct_dbg("Failed to create pre ct mod hdr");
1627                goto err_mapping;
1628        }
1629        pre_ct_attr->modify_hdr = mod_hdr;
1630
1631        /* Post ct rule matches on fte_id and executes original rule's
1632         * tc rule action
1633         */
1634        mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1635                                    fte_id, MLX5_FTE_ID_MASK);
1636
1637        /* Put post_ct rule on post_ct flow table */
1638        ct_flow->post_ct_attr->chain = 0;
1639        ct_flow->post_ct_attr->prio = 0;
1640        ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1641
1642        ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1643        ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1644        ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1645        rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1646                                   ct_flow->post_ct_attr);
1647        ct_flow->post_ct_rule = rule;
1648        if (IS_ERR(ct_flow->post_ct_rule)) {
1649                err = PTR_ERR(ct_flow->post_ct_rule);
1650                ct_dbg("Failed to add post ct rule");
1651                goto err_insert_post_ct;
1652        }
1653
1654        /* Change original rule point to ct table */
1655        pre_ct_attr->dest_chain = 0;
1656        pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1657        ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1658                                                   pre_ct_attr);
1659        if (IS_ERR(ct_flow->pre_ct_rule)) {
1660                err = PTR_ERR(ct_flow->pre_ct_rule);
1661                ct_dbg("Failed to add pre ct rule");
1662                goto err_insert_orig;
1663        }
1664
1665        attr->ct_attr.ct_flow = ct_flow;
1666        dealloc_mod_hdr_actions(&pre_mod_acts);
1667        kfree(post_ct_spec);
1668
1669        return rule;
1670
1671err_insert_orig:
1672        mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1673                            ct_flow->post_ct_attr);
1674err_insert_post_ct:
1675        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1676err_mapping:
1677        dealloc_mod_hdr_actions(&pre_mod_acts);
1678        mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1679err_get_chain:
1680        kfree(ct_flow->post_ct_attr);
1681err_alloc_post:
1682        kfree(ct_flow->pre_ct_attr);
1683err_alloc_pre:
1684        idr_remove(&ct_priv->fte_ids, fte_id);
1685err_idr:
1686        mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1687err_ft:
1688        kfree(post_ct_spec);
1689        kfree(ct_flow);
1690        netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1691        return ERR_PTR(err);
1692}
1693
1694static struct mlx5_flow_handle *
1695__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1696                                struct mlx5_flow_spec *orig_spec,
1697                                struct mlx5_flow_attr *attr,
1698                                struct mlx5e_tc_mod_hdr_acts *mod_acts)
1699{
1700        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1701        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1702        struct mlx5_flow_attr *pre_ct_attr;
1703        struct mlx5_modify_hdr *mod_hdr;
1704        struct mlx5_flow_handle *rule;
1705        struct mlx5_ct_flow *ct_flow;
1706        int err;
1707
1708        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1709        if (!ct_flow)
1710                return ERR_PTR(-ENOMEM);
1711
1712        /* Base esw attributes on original rule attribute */
1713        pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1714        if (!pre_ct_attr) {
1715                err = -ENOMEM;
1716                goto err_attr;
1717        }
1718
1719        memcpy(pre_ct_attr, attr, attr_sz);
1720
1721        err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1722        if (err) {
1723                ct_dbg("Failed to set register for ct clear");
1724                goto err_set_registers;
1725        }
1726
1727        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1728                                           mod_acts->num_actions,
1729                                           mod_acts->actions);
1730        if (IS_ERR(mod_hdr)) {
1731                err = PTR_ERR(mod_hdr);
1732                ct_dbg("Failed to add create ct clear mod hdr");
1733                goto err_set_registers;
1734        }
1735
1736        dealloc_mod_hdr_actions(mod_acts);
1737        pre_ct_attr->modify_hdr = mod_hdr;
1738        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1739
1740        rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1741        if (IS_ERR(rule)) {
1742                err = PTR_ERR(rule);
1743                ct_dbg("Failed to add ct clear rule");
1744                goto err_insert;
1745        }
1746
1747        attr->ct_attr.ct_flow = ct_flow;
1748        ct_flow->pre_ct_attr = pre_ct_attr;
1749        ct_flow->pre_ct_rule = rule;
1750        return rule;
1751
1752err_insert:
1753        mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1754err_set_registers:
1755        netdev_warn(priv->netdev,
1756                    "Failed to offload ct clear flow, err %d\n", err);
1757        kfree(pre_ct_attr);
1758err_attr:
1759        kfree(ct_flow);
1760
1761        return ERR_PTR(err);
1762}
1763
1764struct mlx5_flow_handle *
1765mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1766                        struct mlx5e_tc_flow *flow,
1767                        struct mlx5_flow_spec *spec,
1768                        struct mlx5_flow_attr *attr,
1769                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1770{
1771        bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1772        struct mlx5_flow_handle *rule;
1773
1774        if (!priv)
1775                return ERR_PTR(-EOPNOTSUPP);
1776
1777        mutex_lock(&priv->control_lock);
1778
1779        if (clear_action)
1780                rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1781        else
1782                rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1783        mutex_unlock(&priv->control_lock);
1784
1785        return rule;
1786}
1787
1788static void
1789__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1790                         struct mlx5e_tc_flow *flow,
1791                         struct mlx5_ct_flow *ct_flow)
1792{
1793        struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1794        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1795
1796        mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1797                            pre_ct_attr);
1798        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1799
1800        if (ct_flow->post_ct_rule) {
1801                mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1802                                    ct_flow->post_ct_attr);
1803                mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1804                idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1805                mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1806        }
1807
1808        kfree(ct_flow->pre_ct_attr);
1809        kfree(ct_flow->post_ct_attr);
1810        kfree(ct_flow);
1811}
1812
1813void
1814mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1815                       struct mlx5e_tc_flow *flow,
1816                       struct mlx5_flow_attr *attr)
1817{
1818        struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1819
1820        /* We are called on error to clean up stuff from parsing
1821         * but we don't have anything for now
1822         */
1823        if (!ct_flow)
1824                return;
1825
1826        mutex_lock(&priv->control_lock);
1827        __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1828        mutex_unlock(&priv->control_lock);
1829}
1830
1831static int
1832mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1833                                  const char **err_msg)
1834{
1835        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1836                *err_msg = "firmware level support is missing";
1837                return -EOPNOTSUPP;
1838        }
1839
1840        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1841                /* vlan workaround should be avoided for multi chain rules.
1842                 * This is just a sanity check as pop vlan action should
1843                 * be supported by any FW that supports ignore_flow_level
1844                 */
1845
1846                *err_msg = "firmware vlan actions support is missing";
1847                return -EOPNOTSUPP;
1848        }
1849
1850        if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1851                                    fdb_modify_header_fwd_to_table)) {
1852                /* CT always writes to registers which are mod header actions.
1853                 * Therefore, mod header and goto is required
1854                 */
1855
1856                *err_msg = "firmware fwd and modify support is missing";
1857                return -EOPNOTSUPP;
1858        }
1859
1860        if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1861                *err_msg = "register loopback isn't supported";
1862                return -EOPNOTSUPP;
1863        }
1864
1865        return 0;
1866}
1867
1868static int
1869mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
1870                                  const char **err_msg)
1871{
1872        if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
1873                *err_msg = "firmware level support is missing";
1874                return -EOPNOTSUPP;
1875        }
1876
1877        return 0;
1878}
1879
1880static int
1881mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
1882                              enum mlx5_flow_namespace_type ns_type,
1883                              const char **err_msg)
1884{
1885        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1886
1887#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1888        /* cannot restore chain ID on HW miss */
1889
1890        *err_msg = "tc skb extension missing";
1891        return -EOPNOTSUPP;
1892#endif
1893        if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
1894                return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
1895        else
1896                return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
1897}
1898
1899#define INIT_ERR_PREFIX "tc ct offload init failed"
1900
1901struct mlx5_tc_ct_priv *
1902mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
1903                struct mod_hdr_tbl *mod_hdr,
1904                enum mlx5_flow_namespace_type ns_type)
1905{
1906        struct mlx5_tc_ct_priv *ct_priv;
1907        struct mlx5_core_dev *dev;
1908        const char *msg;
1909        int err;
1910
1911        dev = priv->mdev;
1912        err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
1913        if (err) {
1914                mlx5_core_warn(dev,
1915                               "tc ct offload not supported, %s\n",
1916                               msg);
1917                goto err_support;
1918        }
1919
1920        ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1921        if (!ct_priv)
1922                goto err_alloc;
1923
1924        ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1925        if (IS_ERR(ct_priv->zone_mapping)) {
1926                err = PTR_ERR(ct_priv->zone_mapping);
1927                goto err_mapping_zone;
1928        }
1929
1930        ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1931        if (IS_ERR(ct_priv->labels_mapping)) {
1932                err = PTR_ERR(ct_priv->labels_mapping);
1933                goto err_mapping_labels;
1934        }
1935
1936        ct_priv->ns_type = ns_type;
1937        ct_priv->chains = chains;
1938        ct_priv->netdev = priv->netdev;
1939        ct_priv->dev = priv->mdev;
1940        ct_priv->mod_hdr_tbl = mod_hdr;
1941        ct_priv->ct = mlx5_chains_create_global_table(chains);
1942        if (IS_ERR(ct_priv->ct)) {
1943                err = PTR_ERR(ct_priv->ct);
1944                mlx5_core_warn(dev,
1945                               "%s, failed to create ct table err: %d\n",
1946                               INIT_ERR_PREFIX, err);
1947                goto err_ct_tbl;
1948        }
1949
1950        ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
1951        if (IS_ERR(ct_priv->ct_nat)) {
1952                err = PTR_ERR(ct_priv->ct_nat);
1953                mlx5_core_warn(dev,
1954                               "%s, failed to create ct nat table err: %d\n",
1955                               INIT_ERR_PREFIX, err);
1956                goto err_ct_nat_tbl;
1957        }
1958
1959        ct_priv->post_ct = mlx5_chains_create_global_table(chains);
1960        if (IS_ERR(ct_priv->post_ct)) {
1961                err = PTR_ERR(ct_priv->post_ct);
1962                mlx5_core_warn(dev,
1963                               "%s, failed to create post ct table err: %d\n",
1964                               INIT_ERR_PREFIX, err);
1965                goto err_post_ct_tbl;
1966        }
1967
1968        idr_init(&ct_priv->fte_ids);
1969        mutex_init(&ct_priv->control_lock);
1970        mutex_init(&ct_priv->shared_counter_lock);
1971        rhashtable_init(&ct_priv->zone_ht, &zone_params);
1972        rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
1973        rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
1974
1975        return ct_priv;
1976
1977err_post_ct_tbl:
1978        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
1979err_ct_nat_tbl:
1980        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
1981err_ct_tbl:
1982        mapping_destroy(ct_priv->labels_mapping);
1983err_mapping_labels:
1984        mapping_destroy(ct_priv->zone_mapping);
1985err_mapping_zone:
1986        kfree(ct_priv);
1987err_alloc:
1988err_support:
1989
1990        return NULL;
1991}
1992
1993void
1994mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
1995{
1996        struct mlx5_fs_chains *chains;
1997
1998        if (!ct_priv)
1999                return;
2000
2001        chains = ct_priv->chains;
2002
2003        mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2004        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2005        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2006        mapping_destroy(ct_priv->zone_mapping);
2007        mapping_destroy(ct_priv->labels_mapping);
2008
2009        rhashtable_destroy(&ct_priv->ct_tuples_ht);
2010        rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2011        rhashtable_destroy(&ct_priv->zone_ht);
2012        mutex_destroy(&ct_priv->control_lock);
2013        mutex_destroy(&ct_priv->shared_counter_lock);
2014        idr_destroy(&ct_priv->fte_ids);
2015        kfree(ct_priv);
2016}
2017
2018bool
2019mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2020                         struct sk_buff *skb, u8 zone_restore_id)
2021{
2022        struct mlx5_ct_tuple tuple = {};
2023        struct mlx5_ct_entry *entry;
2024        u16 zone;
2025
2026        if (!ct_priv || !zone_restore_id)
2027                return true;
2028
2029        if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2030                return false;
2031
2032        if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2033                return false;
2034
2035        entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
2036                                       tuples_ht_params);
2037        if (!entry)
2038                entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
2039                                               &tuple, tuples_nat_ht_params);
2040        if (!entry)
2041                return false;
2042
2043        tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2044        return true;
2045}
2046