linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <net/netfilter/nf_conntrack.h>
   5#include <net/netfilter/nf_conntrack_core.h>
   6#include <net/netfilter/nf_conntrack_zones.h>
   7#include <net/netfilter/nf_conntrack_labels.h>
   8#include <net/netfilter/nf_conntrack_helper.h>
   9#include <net/netfilter/nf_conntrack_acct.h>
  10#include <uapi/linux/tc_act/tc_pedit.h>
  11#include <net/tc_act/tc_ct.h>
  12#include <net/flow_offload.h>
  13#include <net/netfilter/nf_flow_table.h>
  14#include <linux/workqueue.h>
  15#include <linux/xarray.h>
  16
  17#include "lib/fs_chains.h"
  18#include "en/tc_ct.h"
  19#include "en/mod_hdr.h"
  20#include "en/mapping.h"
  21#include "en.h"
  22#include "en_tc.h"
  23#include "en_rep.h"
  24
  25#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
  26#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
  27#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
  28#define MLX5_CT_STATE_TRK_BIT BIT(2)
  29#define MLX5_CT_STATE_NAT_BIT BIT(3)
  30
  31#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
  32#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
  33#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
  34
  35#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
  36#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
  37
  38#define ct_dbg(fmt, args...)\
  39        netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
  40
  41struct mlx5_tc_ct_priv {
  42        struct mlx5_core_dev *dev;
  43        const struct net_device *netdev;
  44        struct mod_hdr_tbl *mod_hdr_tbl;
  45        struct idr fte_ids;
  46        struct xarray tuple_ids;
  47        struct rhashtable zone_ht;
  48        struct rhashtable ct_tuples_ht;
  49        struct rhashtable ct_tuples_nat_ht;
  50        struct mlx5_flow_table *ct;
  51        struct mlx5_flow_table *ct_nat;
  52        struct mlx5_flow_table *post_ct;
  53        struct mutex control_lock; /* guards parallel adds/dels */
  54        struct mutex shared_counter_lock;
  55        struct mapping_ctx *zone_mapping;
  56        struct mapping_ctx *labels_mapping;
  57        enum mlx5_flow_namespace_type ns_type;
  58        struct mlx5_fs_chains *chains;
  59};
  60
  61struct mlx5_ct_flow {
  62        struct mlx5_flow_attr *pre_ct_attr;
  63        struct mlx5_flow_attr *post_ct_attr;
  64        struct mlx5_flow_handle *pre_ct_rule;
  65        struct mlx5_flow_handle *post_ct_rule;
  66        struct mlx5_ct_ft *ft;
  67        u32 fte_id;
  68        u32 chain_mapping;
  69};
  70
  71struct mlx5_ct_zone_rule {
  72        struct mlx5_flow_handle *rule;
  73        struct mlx5e_mod_hdr_handle *mh;
  74        struct mlx5_flow_attr *attr;
  75        bool nat;
  76};
  77
  78struct mlx5_tc_ct_pre {
  79        struct mlx5_flow_table *ft;
  80        struct mlx5_flow_group *flow_grp;
  81        struct mlx5_flow_group *miss_grp;
  82        struct mlx5_flow_handle *flow_rule;
  83        struct mlx5_flow_handle *miss_rule;
  84        struct mlx5_modify_hdr *modify_hdr;
  85};
  86
  87struct mlx5_ct_ft {
  88        struct rhash_head node;
  89        u16 zone;
  90        u32 zone_restore_id;
  91        refcount_t refcount;
  92        struct nf_flowtable *nf_ft;
  93        struct mlx5_tc_ct_priv *ct_priv;
  94        struct rhashtable ct_entries_ht;
  95        struct mlx5_tc_ct_pre pre_ct;
  96        struct mlx5_tc_ct_pre pre_ct_nat;
  97};
  98
  99struct mlx5_ct_tuple {
 100        u16 addr_type;
 101        __be16 n_proto;
 102        u8 ip_proto;
 103        struct {
 104                union {
 105                        __be32 src_v4;
 106                        struct in6_addr src_v6;
 107                };
 108                union {
 109                        __be32 dst_v4;
 110                        struct in6_addr dst_v6;
 111                };
 112        } ip;
 113        struct {
 114                __be16 src;
 115                __be16 dst;
 116        } port;
 117
 118        u16 zone;
 119};
 120
 121struct mlx5_ct_counter {
 122        struct mlx5_fc *counter;
 123        refcount_t refcount;
 124        bool is_shared;
 125};
 126
 127struct mlx5_ct_entry {
 128        struct rhash_head node;
 129        struct rhash_head tuple_node;
 130        struct rhash_head tuple_nat_node;
 131        struct mlx5_ct_counter *counter;
 132        unsigned long cookie;
 133        unsigned long restore_cookie;
 134        struct mlx5_ct_tuple tuple;
 135        struct mlx5_ct_tuple tuple_nat;
 136        struct mlx5_ct_zone_rule zone_rules[2];
 137};
 138
 139static const struct rhashtable_params cts_ht_params = {
 140        .head_offset = offsetof(struct mlx5_ct_entry, node),
 141        .key_offset = offsetof(struct mlx5_ct_entry, cookie),
 142        .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
 143        .automatic_shrinking = true,
 144        .min_size = 16 * 1024,
 145};
 146
 147static const struct rhashtable_params zone_params = {
 148        .head_offset = offsetof(struct mlx5_ct_ft, node),
 149        .key_offset = offsetof(struct mlx5_ct_ft, zone),
 150        .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
 151        .automatic_shrinking = true,
 152};
 153
 154static const struct rhashtable_params tuples_ht_params = {
 155        .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
 156        .key_offset = offsetof(struct mlx5_ct_entry, tuple),
 157        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
 158        .automatic_shrinking = true,
 159        .min_size = 16 * 1024,
 160};
 161
 162static const struct rhashtable_params tuples_nat_ht_params = {
 163        .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
 164        .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
 165        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
 166        .automatic_shrinking = true,
 167        .min_size = 16 * 1024,
 168};
 169
 170static bool
 171mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
 172{
 173        return !!(entry->tuple_nat_node.next);
 174}
 175
 176static int
 177mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
 178{
 179        struct flow_match_control control;
 180        struct flow_match_basic basic;
 181
 182        flow_rule_match_basic(rule, &basic);
 183        flow_rule_match_control(rule, &control);
 184
 185        tuple->n_proto = basic.key->n_proto;
 186        tuple->ip_proto = basic.key->ip_proto;
 187        tuple->addr_type = control.key->addr_type;
 188
 189        if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 190                struct flow_match_ipv4_addrs match;
 191
 192                flow_rule_match_ipv4_addrs(rule, &match);
 193                tuple->ip.src_v4 = match.key->src;
 194                tuple->ip.dst_v4 = match.key->dst;
 195        } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 196                struct flow_match_ipv6_addrs match;
 197
 198                flow_rule_match_ipv6_addrs(rule, &match);
 199                tuple->ip.src_v6 = match.key->src;
 200                tuple->ip.dst_v6 = match.key->dst;
 201        } else {
 202                return -EOPNOTSUPP;
 203        }
 204
 205        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 206                struct flow_match_ports match;
 207
 208                flow_rule_match_ports(rule, &match);
 209                switch (tuple->ip_proto) {
 210                case IPPROTO_TCP:
 211                case IPPROTO_UDP:
 212                        tuple->port.src = match.key->src;
 213                        tuple->port.dst = match.key->dst;
 214                        break;
 215                default:
 216                        return -EOPNOTSUPP;
 217                }
 218        } else {
 219                return -EOPNOTSUPP;
 220        }
 221
 222        return 0;
 223}
 224
 225static int
 226mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 227                             struct flow_rule *rule)
 228{
 229        struct flow_action *flow_action = &rule->action;
 230        struct flow_action_entry *act;
 231        u32 offset, val, ip6_offset;
 232        int i;
 233
 234        flow_action_for_each(i, act, flow_action) {
 235                if (act->id != FLOW_ACTION_MANGLE)
 236                        continue;
 237
 238                offset = act->mangle.offset;
 239                val = act->mangle.val;
 240                switch (act->mangle.htype) {
 241                case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 242                        if (offset == offsetof(struct iphdr, saddr))
 243                                tuple->ip.src_v4 = cpu_to_be32(val);
 244                        else if (offset == offsetof(struct iphdr, daddr))
 245                                tuple->ip.dst_v4 = cpu_to_be32(val);
 246                        else
 247                                return -EOPNOTSUPP;
 248                        break;
 249
 250                case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 251                        ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
 252                        ip6_offset /= 4;
 253                        if (ip6_offset < 4)
 254                                tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
 255                        else if (ip6_offset < 8)
 256                                tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
 257                        else
 258                                return -EOPNOTSUPP;
 259                        break;
 260
 261                case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 262                        if (offset == offsetof(struct tcphdr, source))
 263                                tuple->port.src = cpu_to_be16(val);
 264                        else if (offset == offsetof(struct tcphdr, dest))
 265                                tuple->port.dst = cpu_to_be16(val);
 266                        else
 267                                return -EOPNOTSUPP;
 268                        break;
 269
 270                case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 271                        if (offset == offsetof(struct udphdr, source))
 272                                tuple->port.src = cpu_to_be16(val);
 273                        else if (offset == offsetof(struct udphdr, dest))
 274                                tuple->port.dst = cpu_to_be16(val);
 275                        else
 276                                return -EOPNOTSUPP;
 277                        break;
 278
 279                default:
 280                        return -EOPNOTSUPP;
 281                }
 282        }
 283
 284        return 0;
 285}
 286
 287static int
 288mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 289                           struct flow_rule *rule)
 290{
 291        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 292                                       outer_headers);
 293        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 294                                       outer_headers);
 295        u16 addr_type = 0;
 296        u8 ip_proto = 0;
 297
 298        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 299                struct flow_match_basic match;
 300
 301                flow_rule_match_basic(rule, &match);
 302
 303                mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
 304                                       headers_v);
 305                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 306                         match.mask->ip_proto);
 307                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 308                         match.key->ip_proto);
 309
 310                ip_proto = match.key->ip_proto;
 311        }
 312
 313        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 314                struct flow_match_control match;
 315
 316                flow_rule_match_control(rule, &match);
 317                addr_type = match.key->addr_type;
 318        }
 319
 320        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 321                struct flow_match_ipv4_addrs match;
 322
 323                flow_rule_match_ipv4_addrs(rule, &match);
 324                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 325                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 326                       &match.mask->src, sizeof(match.mask->src));
 327                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 328                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 329                       &match.key->src, sizeof(match.key->src));
 330                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 331                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 332                       &match.mask->dst, sizeof(match.mask->dst));
 333                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 334                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 335                       &match.key->dst, sizeof(match.key->dst));
 336        }
 337
 338        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 339                struct flow_match_ipv6_addrs match;
 340
 341                flow_rule_match_ipv6_addrs(rule, &match);
 342                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 343                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 344                       &match.mask->src, sizeof(match.mask->src));
 345                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 346                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 347                       &match.key->src, sizeof(match.key->src));
 348
 349                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 350                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 351                       &match.mask->dst, sizeof(match.mask->dst));
 352                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 353                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 354                       &match.key->dst, sizeof(match.key->dst));
 355        }
 356
 357        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 358                struct flow_match_ports match;
 359
 360                flow_rule_match_ports(rule, &match);
 361                switch (ip_proto) {
 362                case IPPROTO_TCP:
 363                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 364                                 tcp_sport, ntohs(match.mask->src));
 365                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 366                                 tcp_sport, ntohs(match.key->src));
 367
 368                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 369                                 tcp_dport, ntohs(match.mask->dst));
 370                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 371                                 tcp_dport, ntohs(match.key->dst));
 372                        break;
 373
 374                case IPPROTO_UDP:
 375                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 376                                 udp_sport, ntohs(match.mask->src));
 377                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 378                                 udp_sport, ntohs(match.key->src));
 379
 380                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 381                                 udp_dport, ntohs(match.mask->dst));
 382                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 383                                 udp_dport, ntohs(match.key->dst));
 384                        break;
 385                default:
 386                        break;
 387                }
 388        }
 389
 390        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
 391                struct flow_match_tcp match;
 392
 393                flow_rule_match_tcp(rule, &match);
 394                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 395                         ntohs(match.mask->flags));
 396                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
 397                         ntohs(match.key->flags));
 398        }
 399
 400        return 0;
 401}
 402
 403static void
 404mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
 405{
 406        if (entry->counter->is_shared &&
 407            !refcount_dec_and_test(&entry->counter->refcount))
 408                return;
 409
 410        mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
 411        kfree(entry->counter);
 412}
 413
 414static void
 415mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 416                          struct mlx5_ct_entry *entry,
 417                          bool nat)
 418{
 419        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 420        struct mlx5_flow_attr *attr = zone_rule->attr;
 421
 422        ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 423
 424        mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
 425        mlx5e_mod_hdr_detach(ct_priv->dev,
 426                             ct_priv->mod_hdr_tbl, zone_rule->mh);
 427        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 428        kfree(attr);
 429}
 430
 431static void
 432mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 433                           struct mlx5_ct_entry *entry)
 434{
 435        mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 436        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 437}
 438
 439static struct flow_action_entry *
 440mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
 441{
 442        struct flow_action *flow_action = &flow_rule->action;
 443        struct flow_action_entry *act;
 444        int i;
 445
 446        flow_action_for_each(i, act, flow_action) {
 447                if (act->id == FLOW_ACTION_CT_METADATA)
 448                        return act;
 449        }
 450
 451        return NULL;
 452}
 453
 454static int
 455mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
 456                               struct mlx5e_tc_mod_hdr_acts *mod_acts,
 457                               u8 ct_state,
 458                               u32 mark,
 459                               u32 labels_id,
 460                               u8 zone_restore_id)
 461{
 462        enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
 463        struct mlx5_core_dev *dev = ct_priv->dev;
 464        int err;
 465
 466        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 467                                        CTSTATE_TO_REG, ct_state);
 468        if (err)
 469                return err;
 470
 471        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 472                                        MARK_TO_REG, mark);
 473        if (err)
 474                return err;
 475
 476        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 477                                        LABELS_TO_REG, labels_id);
 478        if (err)
 479                return err;
 480
 481        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 482                                        ZONE_RESTORE_TO_REG, zone_restore_id);
 483        if (err)
 484                return err;
 485
 486        /* Make another copy of zone id in reg_b for
 487         * NIC rx flows since we don't copy reg_c1 to
 488         * reg_b upon miss.
 489         */
 490        if (ns != MLX5_FLOW_NAMESPACE_FDB) {
 491                err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 492                                                NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
 493                if (err)
 494                        return err;
 495        }
 496        return 0;
 497}
 498
 499static int
 500mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
 501                                   char *modact)
 502{
 503        u32 offset = act->mangle.offset, field;
 504
 505        switch (act->mangle.htype) {
 506        case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 507                MLX5_SET(set_action_in, modact, length, 0);
 508                if (offset == offsetof(struct iphdr, saddr))
 509                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
 510                else if (offset == offsetof(struct iphdr, daddr))
 511                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
 512                else
 513                        return -EOPNOTSUPP;
 514                break;
 515
 516        case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 517                MLX5_SET(set_action_in, modact, length, 0);
 518                if (offset == offsetof(struct ipv6hdr, saddr) + 12)
 519                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
 520                else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
 521                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
 522                else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
 523                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
 524                else if (offset == offsetof(struct ipv6hdr, saddr))
 525                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
 526                else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
 527                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
 528                else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
 529                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
 530                else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
 531                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
 532                else if (offset == offsetof(struct ipv6hdr, daddr))
 533                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
 534                else
 535                        return -EOPNOTSUPP;
 536                break;
 537
 538        case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 539                MLX5_SET(set_action_in, modact, length, 16);
 540                if (offset == offsetof(struct tcphdr, source))
 541                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
 542                else if (offset == offsetof(struct tcphdr, dest))
 543                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
 544                else
 545                        return -EOPNOTSUPP;
 546                break;
 547
 548        case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 549                MLX5_SET(set_action_in, modact, length, 16);
 550                if (offset == offsetof(struct udphdr, source))
 551                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
 552                else if (offset == offsetof(struct udphdr, dest))
 553                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
 554                else
 555                        return -EOPNOTSUPP;
 556                break;
 557
 558        default:
 559                return -EOPNOTSUPP;
 560        }
 561
 562        MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 563        MLX5_SET(set_action_in, modact, offset, 0);
 564        MLX5_SET(set_action_in, modact, field, field);
 565        MLX5_SET(set_action_in, modact, data, act->mangle.val);
 566
 567        return 0;
 568}
 569
 570static int
 571mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 572                            struct flow_rule *flow_rule,
 573                            struct mlx5e_tc_mod_hdr_acts *mod_acts)
 574{
 575        struct flow_action *flow_action = &flow_rule->action;
 576        struct mlx5_core_dev *mdev = ct_priv->dev;
 577        struct flow_action_entry *act;
 578        size_t action_size;
 579        char *modact;
 580        int err, i;
 581
 582        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 583
 584        flow_action_for_each(i, act, flow_action) {
 585                switch (act->id) {
 586                case FLOW_ACTION_MANGLE: {
 587                        err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
 588                                                    mod_acts);
 589                        if (err)
 590                                return err;
 591
 592                        modact = mod_acts->actions +
 593                                 mod_acts->num_actions * action_size;
 594
 595                        err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
 596                        if (err)
 597                                return err;
 598
 599                        mod_acts->num_actions++;
 600                }
 601                break;
 602
 603                case FLOW_ACTION_CT_METADATA:
 604                        /* Handled earlier */
 605                        continue;
 606                default:
 607                        return -EOPNOTSUPP;
 608                }
 609        }
 610
 611        return 0;
 612}
 613
 614static int
 615mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 616                                struct mlx5_flow_attr *attr,
 617                                struct flow_rule *flow_rule,
 618                                struct mlx5e_mod_hdr_handle **mh,
 619                                u8 zone_restore_id, bool nat)
 620{
 621        struct mlx5e_tc_mod_hdr_acts mod_acts = {};
 622        struct flow_action_entry *meta;
 623        u16 ct_state = 0;
 624        int err;
 625
 626        meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 627        if (!meta)
 628                return -EOPNOTSUPP;
 629
 630        err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
 631                          &attr->ct_attr.ct_labels_id);
 632        if (err)
 633                return -EOPNOTSUPP;
 634        if (nat) {
 635                err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
 636                                                  &mod_acts);
 637                if (err)
 638                        goto err_mapping;
 639
 640                ct_state |= MLX5_CT_STATE_NAT_BIT;
 641        }
 642
 643        ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
 644        err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
 645                                             ct_state,
 646                                             meta->ct_metadata.mark,
 647                                             attr->ct_attr.ct_labels_id,
 648                                             zone_restore_id);
 649        if (err)
 650                goto err_mapping;
 651
 652        *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
 653                                   ct_priv->mod_hdr_tbl,
 654                                   ct_priv->ns_type,
 655                                   &mod_acts);
 656        if (IS_ERR(*mh)) {
 657                err = PTR_ERR(*mh);
 658                goto err_mapping;
 659        }
 660        attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 661
 662        dealloc_mod_hdr_actions(&mod_acts);
 663        return 0;
 664
 665err_mapping:
 666        dealloc_mod_hdr_actions(&mod_acts);
 667        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 668        return err;
 669}
 670
 671static int
 672mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 673                          struct flow_rule *flow_rule,
 674                          struct mlx5_ct_entry *entry,
 675                          bool nat, u8 zone_restore_id)
 676{
 677        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 678        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 679        struct mlx5_flow_spec *spec = NULL;
 680        struct mlx5_flow_attr *attr;
 681        int err;
 682
 683        zone_rule->nat = nat;
 684
 685        spec = kzalloc(sizeof(*spec), GFP_KERNEL);
 686        if (!spec)
 687                return -ENOMEM;
 688
 689        attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
 690        if (!attr) {
 691                err = -ENOMEM;
 692                goto err_attr;
 693        }
 694
 695        err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
 696                                              &zone_rule->mh,
 697                                              zone_restore_id, nat);
 698        if (err) {
 699                ct_dbg("Failed to create ct entry mod hdr");
 700                goto err_mod_hdr;
 701        }
 702
 703        attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 704                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 705                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
 706        attr->dest_chain = 0;
 707        attr->dest_ft = ct_priv->post_ct;
 708        attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
 709        attr->outer_match_level = MLX5_MATCH_L4;
 710        attr->counter = entry->counter->counter;
 711        attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 712
 713        mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
 714        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
 715                                    entry->tuple.zone & MLX5_CT_ZONE_MASK,
 716                                    MLX5_CT_ZONE_MASK);
 717
 718        zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
 719        if (IS_ERR(zone_rule->rule)) {
 720                err = PTR_ERR(zone_rule->rule);
 721                ct_dbg("Failed to add ct entry rule, nat: %d", nat);
 722                goto err_rule;
 723        }
 724
 725        zone_rule->attr = attr;
 726
 727        kfree(spec);
 728        ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 729
 730        return 0;
 731
 732err_rule:
 733        mlx5e_mod_hdr_detach(ct_priv->dev,
 734                             ct_priv->mod_hdr_tbl, zone_rule->mh);
 735        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 736err_mod_hdr:
 737        kfree(attr);
 738err_attr:
 739        kfree(spec);
 740        return err;
 741}
 742
 743static struct mlx5_ct_counter *
 744mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
 745{
 746        struct mlx5_ct_counter *counter;
 747        int ret;
 748
 749        counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 750        if (!counter)
 751                return ERR_PTR(-ENOMEM);
 752
 753        counter->is_shared = false;
 754        counter->counter = mlx5_fc_create(ct_priv->dev, true);
 755        if (IS_ERR(counter->counter)) {
 756                ct_dbg("Failed to create counter for ct entry");
 757                ret = PTR_ERR(counter->counter);
 758                kfree(counter);
 759                return ERR_PTR(ret);
 760        }
 761
 762        return counter;
 763}
 764
 765static struct mlx5_ct_counter *
 766mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 767                              struct mlx5_ct_entry *entry)
 768{
 769        struct mlx5_ct_tuple rev_tuple = entry->tuple;
 770        struct mlx5_ct_counter *shared_counter;
 771        struct mlx5_ct_entry *rev_entry;
 772        __be16 tmp_port;
 773        int ret;
 774
 775        /* get the reversed tuple */
 776        tmp_port = rev_tuple.port.src;
 777        rev_tuple.port.src = rev_tuple.port.dst;
 778        rev_tuple.port.dst = tmp_port;
 779
 780        if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 781                __be32 tmp_addr = rev_tuple.ip.src_v4;
 782
 783                rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
 784                rev_tuple.ip.dst_v4 = tmp_addr;
 785        } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 786                struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
 787
 788                rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
 789                rev_tuple.ip.dst_v6 = tmp_addr;
 790        } else {
 791                return ERR_PTR(-EOPNOTSUPP);
 792        }
 793
 794        /* Use the same counter as the reverse direction */
 795        mutex_lock(&ct_priv->shared_counter_lock);
 796        rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
 797                                           tuples_ht_params);
 798        if (rev_entry) {
 799                if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
 800                        mutex_unlock(&ct_priv->shared_counter_lock);
 801                        return rev_entry->counter;
 802                }
 803        }
 804        mutex_unlock(&ct_priv->shared_counter_lock);
 805
 806        shared_counter = mlx5_tc_ct_counter_create(ct_priv);
 807        if (IS_ERR(shared_counter)) {
 808                ret = PTR_ERR(shared_counter);
 809                return ERR_PTR(ret);
 810        }
 811
 812        shared_counter->is_shared = true;
 813        refcount_set(&shared_counter->refcount, 1);
 814        return shared_counter;
 815}
 816
 817static int
 818mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 819                           struct flow_rule *flow_rule,
 820                           struct mlx5_ct_entry *entry,
 821                           u8 zone_restore_id)
 822{
 823        int err;
 824
 825        if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
 826                entry->counter = mlx5_tc_ct_counter_create(ct_priv);
 827        else
 828                entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
 829
 830        if (IS_ERR(entry->counter)) {
 831                err = PTR_ERR(entry->counter);
 832                return err;
 833        }
 834
 835        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
 836                                        zone_restore_id);
 837        if (err)
 838                goto err_orig;
 839
 840        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
 841                                        zone_restore_id);
 842        if (err)
 843                goto err_nat;
 844
 845        return 0;
 846
 847err_nat:
 848        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 849err_orig:
 850        mlx5_tc_ct_counter_put(ct_priv, entry);
 851        return err;
 852}
 853
 854static int
 855mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 856                                  struct flow_cls_offload *flow)
 857{
 858        struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
 859        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
 860        struct flow_action_entry *meta_action;
 861        unsigned long cookie = flow->cookie;
 862        struct mlx5_ct_entry *entry;
 863        int err;
 864
 865        meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 866        if (!meta_action)
 867                return -EOPNOTSUPP;
 868
 869        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 870                                       cts_ht_params);
 871        if (entry)
 872                return 0;
 873
 874        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 875        if (!entry)
 876                return -ENOMEM;
 877
 878        entry->tuple.zone = ft->zone;
 879        entry->cookie = flow->cookie;
 880        entry->restore_cookie = meta_action->ct_metadata.cookie;
 881
 882        err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
 883        if (err)
 884                goto err_set;
 885
 886        memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
 887        err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
 888        if (err)
 889                goto err_set;
 890
 891        err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
 892                                     &entry->tuple_node,
 893                                     tuples_ht_params);
 894        if (err)
 895                goto err_tuple;
 896
 897        if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
 898                err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
 899                                             &entry->tuple_nat_node,
 900                                             tuples_nat_ht_params);
 901                if (err)
 902                        goto err_tuple_nat;
 903        }
 904
 905        err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
 906                                         ft->zone_restore_id);
 907        if (err)
 908                goto err_rules;
 909
 910        err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
 911                                     cts_ht_params);
 912        if (err)
 913                goto err_insert;
 914
 915        return 0;
 916
 917err_insert:
 918        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 919err_rules:
 920        if (mlx5_tc_ct_entry_has_nat(entry))
 921                rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 922                                       &entry->tuple_nat_node, tuples_nat_ht_params);
 923err_tuple_nat:
 924        rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
 925                               &entry->tuple_node,
 926                               tuples_ht_params);
 927err_tuple:
 928err_set:
 929        kfree(entry);
 930        netdev_warn(ct_priv->netdev,
 931                    "Failed to offload ct entry, err: %d\n", err);
 932        return err;
 933}
 934
 935static void
 936mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
 937                        struct mlx5_ct_entry *entry)
 938{
 939        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 940        mutex_lock(&ct_priv->shared_counter_lock);
 941        if (mlx5_tc_ct_entry_has_nat(entry))
 942                rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 943                                       &entry->tuple_nat_node,
 944                                       tuples_nat_ht_params);
 945        rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 946                               tuples_ht_params);
 947        mutex_unlock(&ct_priv->shared_counter_lock);
 948        mlx5_tc_ct_counter_put(ct_priv, entry);
 949
 950}
 951
 952static int
 953mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
 954                                  struct flow_cls_offload *flow)
 955{
 956        unsigned long cookie = flow->cookie;
 957        struct mlx5_ct_entry *entry;
 958
 959        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 960                                       cts_ht_params);
 961        if (!entry)
 962                return -ENOENT;
 963
 964        mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
 965        WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
 966                                       &entry->node,
 967                                       cts_ht_params));
 968        kfree(entry);
 969
 970        return 0;
 971}
 972
 973static int
 974mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
 975                                    struct flow_cls_offload *f)
 976{
 977        unsigned long cookie = f->cookie;
 978        struct mlx5_ct_entry *entry;
 979        u64 lastuse, packets, bytes;
 980
 981        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 982                                       cts_ht_params);
 983        if (!entry)
 984                return -ENOENT;
 985
 986        mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
 987        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
 988                          FLOW_ACTION_HW_STATS_DELAYED);
 989
 990        return 0;
 991}
 992
 993static int
 994mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
 995                              void *cb_priv)
 996{
 997        struct flow_cls_offload *f = type_data;
 998        struct mlx5_ct_ft *ft = cb_priv;
 999
1000        if (type != TC_SETUP_CLSFLOWER)
1001                return -EOPNOTSUPP;
1002
1003        switch (f->command) {
1004        case FLOW_CLS_REPLACE:
1005                return mlx5_tc_ct_block_flow_offload_add(ft, f);
1006        case FLOW_CLS_DESTROY:
1007                return mlx5_tc_ct_block_flow_offload_del(ft, f);
1008        case FLOW_CLS_STATS:
1009                return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1010        default:
1011                break;
1012        }
1013
1014        return -EOPNOTSUPP;
1015}
1016
1017static bool
1018mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1019                        u16 zone)
1020{
1021        struct flow_keys flow_keys;
1022
1023        skb_reset_network_header(skb);
1024        skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1025
1026        tuple->zone = zone;
1027
1028        if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1029            flow_keys.basic.ip_proto != IPPROTO_UDP)
1030                return false;
1031
1032        tuple->port.src = flow_keys.ports.src;
1033        tuple->port.dst = flow_keys.ports.dst;
1034        tuple->n_proto = flow_keys.basic.n_proto;
1035        tuple->ip_proto = flow_keys.basic.ip_proto;
1036
1037        switch (flow_keys.basic.n_proto) {
1038        case htons(ETH_P_IP):
1039                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1040                tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1041                tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1042                break;
1043
1044        case htons(ETH_P_IPV6):
1045                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1046                tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1047                tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1048                break;
1049        default:
1050                goto out;
1051        }
1052
1053        return true;
1054
1055out:
1056        return false;
1057}
1058
1059int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1060{
1061        u32 ctstate = 0, ctstate_mask = 0;
1062
1063        mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1064                                        &ctstate, &ctstate_mask);
1065        if (ctstate_mask)
1066                return -EOPNOTSUPP;
1067
1068        ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1069        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1070                                    ctstate, ctstate_mask);
1071
1072        return 0;
1073}
1074
1075void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1076{
1077        if (!priv || !ct_attr->ct_labels_id)
1078                return;
1079
1080        mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1081}
1082
1083int
1084mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1085                     struct mlx5_flow_spec *spec,
1086                     struct flow_cls_offload *f,
1087                     struct mlx5_ct_attr *ct_attr,
1088                     struct netlink_ext_ack *extack)
1089{
1090        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1091        struct flow_dissector_key_ct *mask, *key;
1092        bool trk, est, untrk, unest, new;
1093        u32 ctstate = 0, ctstate_mask = 0;
1094        u16 ct_state_on, ct_state_off;
1095        u16 ct_state, ct_state_mask;
1096        struct flow_match_ct match;
1097        u32 ct_labels[4];
1098
1099        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1100                return 0;
1101
1102        if (!priv) {
1103                NL_SET_ERR_MSG_MOD(extack,
1104                                   "offload of ct matching isn't available");
1105                return -EOPNOTSUPP;
1106        }
1107
1108        flow_rule_match_ct(rule, &match);
1109
1110        key = match.key;
1111        mask = match.mask;
1112
1113        ct_state = key->ct_state;
1114        ct_state_mask = mask->ct_state;
1115
1116        if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1117                              TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1118                              TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1119                NL_SET_ERR_MSG_MOD(extack,
1120                                   "only ct_state trk, est and new are supported for offload");
1121                return -EOPNOTSUPP;
1122        }
1123
1124        ct_state_on = ct_state & ct_state_mask;
1125        ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1126        trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1127        new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1128        est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1129        untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1130        unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1131
1132        ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1133        ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1134        ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1135        ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1136
1137        if (new) {
1138                NL_SET_ERR_MSG_MOD(extack,
1139                                   "matching on ct_state +new isn't supported");
1140                return -EOPNOTSUPP;
1141        }
1142
1143        if (mask->ct_zone)
1144                mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1145                                            key->ct_zone, MLX5_CT_ZONE_MASK);
1146        if (ctstate_mask)
1147                mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1148                                            ctstate, ctstate_mask);
1149        if (mask->ct_mark)
1150                mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1151                                            key->ct_mark, mask->ct_mark);
1152        if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1153            mask->ct_labels[3]) {
1154                ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1155                ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1156                ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1157                ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1158                if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1159                        return -EOPNOTSUPP;
1160                mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1161                                            MLX5_CT_LABELS_MASK);
1162        }
1163
1164        return 0;
1165}
1166
1167int
1168mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1169                        struct mlx5_flow_attr *attr,
1170                        const struct flow_action_entry *act,
1171                        struct netlink_ext_ack *extack)
1172{
1173        if (!priv) {
1174                NL_SET_ERR_MSG_MOD(extack,
1175                                   "offload of ct action isn't available");
1176                return -EOPNOTSUPP;
1177        }
1178
1179        attr->ct_attr.zone = act->ct.zone;
1180        attr->ct_attr.ct_action = act->ct.action;
1181        attr->ct_attr.nf_ft = act->ct.flow_table;
1182
1183        return 0;
1184}
1185
1186static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1187                                  struct mlx5_tc_ct_pre *pre_ct,
1188                                  bool nat)
1189{
1190        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1191        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1192        struct mlx5_core_dev *dev = ct_priv->dev;
1193        struct mlx5_flow_table *ft = pre_ct->ft;
1194        struct mlx5_flow_destination dest = {};
1195        struct mlx5_flow_act flow_act = {};
1196        struct mlx5_modify_hdr *mod_hdr;
1197        struct mlx5_flow_handle *rule;
1198        struct mlx5_flow_spec *spec;
1199        u32 ctstate;
1200        u16 zone;
1201        int err;
1202
1203        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1204        if (!spec)
1205                return -ENOMEM;
1206
1207        zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1208        err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1209                                        ZONE_TO_REG, zone);
1210        if (err) {
1211                ct_dbg("Failed to set zone register mapping");
1212                goto err_mapping;
1213        }
1214
1215        mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1216                                           pre_mod_acts.num_actions,
1217                                           pre_mod_acts.actions);
1218
1219        if (IS_ERR(mod_hdr)) {
1220                err = PTR_ERR(mod_hdr);
1221                ct_dbg("Failed to create pre ct mod hdr");
1222                goto err_mapping;
1223        }
1224        pre_ct->modify_hdr = mod_hdr;
1225
1226        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1227                          MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1228        flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1229        flow_act.modify_hdr = mod_hdr;
1230        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1231
1232        /* add flow rule */
1233        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1234                                    zone, MLX5_CT_ZONE_MASK);
1235        ctstate = MLX5_CT_STATE_TRK_BIT;
1236        if (nat)
1237                ctstate |= MLX5_CT_STATE_NAT_BIT;
1238        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1239
1240        dest.ft = ct_priv->post_ct;
1241        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1242        if (IS_ERR(rule)) {
1243                err = PTR_ERR(rule);
1244                ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1245                goto err_flow_rule;
1246        }
1247        pre_ct->flow_rule = rule;
1248
1249        /* add miss rule */
1250        memset(spec, 0, sizeof(*spec));
1251        dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1252        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1253        if (IS_ERR(rule)) {
1254                err = PTR_ERR(rule);
1255                ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1256                goto err_miss_rule;
1257        }
1258        pre_ct->miss_rule = rule;
1259
1260        dealloc_mod_hdr_actions(&pre_mod_acts);
1261        kvfree(spec);
1262        return 0;
1263
1264err_miss_rule:
1265        mlx5_del_flow_rules(pre_ct->flow_rule);
1266err_flow_rule:
1267        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1268err_mapping:
1269        dealloc_mod_hdr_actions(&pre_mod_acts);
1270        kvfree(spec);
1271        return err;
1272}
1273
1274static void
1275tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1276                       struct mlx5_tc_ct_pre *pre_ct)
1277{
1278        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1279        struct mlx5_core_dev *dev = ct_priv->dev;
1280
1281        mlx5_del_flow_rules(pre_ct->flow_rule);
1282        mlx5_del_flow_rules(pre_ct->miss_rule);
1283        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1284}
1285
1286static int
1287mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1288                        struct mlx5_tc_ct_pre *pre_ct,
1289                        bool nat)
1290{
1291        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1292        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1293        struct mlx5_core_dev *dev = ct_priv->dev;
1294        struct mlx5_flow_table_attr ft_attr = {};
1295        struct mlx5_flow_namespace *ns;
1296        struct mlx5_flow_table *ft;
1297        struct mlx5_flow_group *g;
1298        u32 metadata_reg_c_2_mask;
1299        u32 *flow_group_in;
1300        void *misc;
1301        int err;
1302
1303        ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1304        if (!ns) {
1305                err = -EOPNOTSUPP;
1306                ct_dbg("Failed to get flow namespace");
1307                return err;
1308        }
1309
1310        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1311        if (!flow_group_in)
1312                return -ENOMEM;
1313
1314        ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1315        ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1316                        FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1317        ft_attr.max_fte = 2;
1318        ft_attr.level = 1;
1319        ft = mlx5_create_flow_table(ns, &ft_attr);
1320        if (IS_ERR(ft)) {
1321                err = PTR_ERR(ft);
1322                ct_dbg("Failed to create pre ct table");
1323                goto out_free;
1324        }
1325        pre_ct->ft = ft;
1326
1327        /* create flow group */
1328        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1329        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1330        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1331                 MLX5_MATCH_MISC_PARAMETERS_2);
1332
1333        misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1334                            match_criteria.misc_parameters_2);
1335
1336        metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1337        metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1338        if (nat)
1339                metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1340
1341        MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1342                 metadata_reg_c_2_mask);
1343
1344        g = mlx5_create_flow_group(ft, flow_group_in);
1345        if (IS_ERR(g)) {
1346                err = PTR_ERR(g);
1347                ct_dbg("Failed to create pre ct group");
1348                goto err_flow_grp;
1349        }
1350        pre_ct->flow_grp = g;
1351
1352        /* create miss group */
1353        memset(flow_group_in, 0, inlen);
1354        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1355        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1356        g = mlx5_create_flow_group(ft, flow_group_in);
1357        if (IS_ERR(g)) {
1358                err = PTR_ERR(g);
1359                ct_dbg("Failed to create pre ct miss group");
1360                goto err_miss_grp;
1361        }
1362        pre_ct->miss_grp = g;
1363
1364        err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1365        if (err)
1366                goto err_add_rules;
1367
1368        kvfree(flow_group_in);
1369        return 0;
1370
1371err_add_rules:
1372        mlx5_destroy_flow_group(pre_ct->miss_grp);
1373err_miss_grp:
1374        mlx5_destroy_flow_group(pre_ct->flow_grp);
1375err_flow_grp:
1376        mlx5_destroy_flow_table(ft);
1377out_free:
1378        kvfree(flow_group_in);
1379        return err;
1380}
1381
1382static void
1383mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1384                       struct mlx5_tc_ct_pre *pre_ct)
1385{
1386        tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1387        mlx5_destroy_flow_group(pre_ct->miss_grp);
1388        mlx5_destroy_flow_group(pre_ct->flow_grp);
1389        mlx5_destroy_flow_table(pre_ct->ft);
1390}
1391
1392static int
1393mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1394{
1395        int err;
1396
1397        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1398        if (err)
1399                return err;
1400
1401        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1402        if (err)
1403                goto err_pre_ct_nat;
1404
1405        return 0;
1406
1407err_pre_ct_nat:
1408        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1409        return err;
1410}
1411
1412static void
1413mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1414{
1415        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1416        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1417}
1418
1419static struct mlx5_ct_ft *
1420mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1421                     struct nf_flowtable *nf_ft)
1422{
1423        struct mlx5_ct_ft *ft;
1424        int err;
1425
1426        ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1427        if (ft) {
1428                refcount_inc(&ft->refcount);
1429                return ft;
1430        }
1431
1432        ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1433        if (!ft)
1434                return ERR_PTR(-ENOMEM);
1435
1436        err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1437        if (err)
1438                goto err_mapping;
1439
1440        ft->zone = zone;
1441        ft->nf_ft = nf_ft;
1442        ft->ct_priv = ct_priv;
1443        refcount_set(&ft->refcount, 1);
1444
1445        err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1446        if (err)
1447                goto err_alloc_pre_ct;
1448
1449        err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1450        if (err)
1451                goto err_init;
1452
1453        err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1454                                     zone_params);
1455        if (err)
1456                goto err_insert;
1457
1458        err = nf_flow_table_offload_add_cb(ft->nf_ft,
1459                                           mlx5_tc_ct_block_flow_offload, ft);
1460        if (err)
1461                goto err_add_cb;
1462
1463        return ft;
1464
1465err_add_cb:
1466        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1467err_insert:
1468        rhashtable_destroy(&ft->ct_entries_ht);
1469err_init:
1470        mlx5_tc_ct_free_pre_ct_tables(ft);
1471err_alloc_pre_ct:
1472        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1473err_mapping:
1474        kfree(ft);
1475        return ERR_PTR(err);
1476}
1477
1478static void
1479mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1480{
1481        struct mlx5_tc_ct_priv *ct_priv = arg;
1482        struct mlx5_ct_entry *entry = ptr;
1483
1484        mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1485        kfree(entry);
1486}
1487
1488static void
1489mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1490{
1491        if (!refcount_dec_and_test(&ft->refcount))
1492                return;
1493
1494        nf_flow_table_offload_del_cb(ft->nf_ft,
1495                                     mlx5_tc_ct_block_flow_offload, ft);
1496        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1497        rhashtable_free_and_destroy(&ft->ct_entries_ht,
1498                                    mlx5_tc_ct_flush_ft_entry,
1499                                    ct_priv);
1500        mlx5_tc_ct_free_pre_ct_tables(ft);
1501        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1502        kfree(ft);
1503}
1504
1505/* We translate the tc filter with CT action to the following HW model:
1506 *
1507 * +---------------------+
1508 * + ft prio (tc chain) +
1509 * + original match      +
1510 * +---------------------+
1511 *      | set chain miss mapping
1512 *      | set fte_id
1513 *      | set tunnel_id
1514 *      | do decap
1515 *      v
1516 * +---------------------+
1517 * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1518 * + zone+nat match      +---------------->+ post_ct (see below) +
1519 * +---------------------+  set zone       +---------------------+
1520 *      | set zone
1521 *      v
1522 * +--------------------+
1523 * + CT (nat or no nat) +
1524 * + tuple + zone match +
1525 * +--------------------+
1526 *      | set mark
1527 *      | set labels_id
1528 *      | set established
1529 *      | set zone_restore
1530 *      | do nat (if needed)
1531 *      v
1532 * +--------------+
1533 * + post_ct      + original filter actions
1534 * + fte_id match +------------------------>
1535 * +--------------+
1536 */
1537static struct mlx5_flow_handle *
1538__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1539                          struct mlx5e_tc_flow *flow,
1540                          struct mlx5_flow_spec *orig_spec,
1541                          struct mlx5_flow_attr *attr)
1542{
1543        bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1544        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1545        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1546        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1547        struct mlx5_flow_spec *post_ct_spec = NULL;
1548        struct mlx5_flow_attr *pre_ct_attr;
1549        struct mlx5_modify_hdr *mod_hdr;
1550        struct mlx5_flow_handle *rule;
1551        struct mlx5_ct_flow *ct_flow;
1552        int chain_mapping = 0, err;
1553        struct mlx5_ct_ft *ft;
1554        u32 fte_id = 1;
1555
1556        post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1557        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1558        if (!post_ct_spec || !ct_flow) {
1559                kfree(post_ct_spec);
1560                kfree(ct_flow);
1561                return ERR_PTR(-ENOMEM);
1562        }
1563
1564        /* Register for CT established events */
1565        ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1566                                  attr->ct_attr.nf_ft);
1567        if (IS_ERR(ft)) {
1568                err = PTR_ERR(ft);
1569                ct_dbg("Failed to register to ft callback");
1570                goto err_ft;
1571        }
1572        ct_flow->ft = ft;
1573
1574        err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1575                            MLX5_FTE_ID_MAX, GFP_KERNEL);
1576        if (err) {
1577                netdev_warn(priv->netdev,
1578                            "Failed to allocate fte id, err: %d\n", err);
1579                goto err_idr;
1580        }
1581        ct_flow->fte_id = fte_id;
1582
1583        /* Base flow attributes of both rules on original rule attribute */
1584        ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1585        if (!ct_flow->pre_ct_attr) {
1586                err = -ENOMEM;
1587                goto err_alloc_pre;
1588        }
1589
1590        ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1591        if (!ct_flow->post_ct_attr) {
1592                err = -ENOMEM;
1593                goto err_alloc_post;
1594        }
1595
1596        pre_ct_attr = ct_flow->pre_ct_attr;
1597        memcpy(pre_ct_attr, attr, attr_sz);
1598        memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1599
1600        /* Modify the original rule's action to fwd and modify, leave decap */
1601        pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1602        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1603                               MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1604
1605        /* Write chain miss tag for miss in ct table as we
1606         * don't go though all prios of this chain as normal tc rules
1607         * miss.
1608         */
1609        err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1610                                            &chain_mapping);
1611        if (err) {
1612                ct_dbg("Failed to get chain register mapping for chain");
1613                goto err_get_chain;
1614        }
1615        ct_flow->chain_mapping = chain_mapping;
1616
1617        err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1618                                        CHAIN_TO_REG, chain_mapping);
1619        if (err) {
1620                ct_dbg("Failed to set chain register mapping");
1621                goto err_mapping;
1622        }
1623
1624        err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1625                                        FTEID_TO_REG, fte_id);
1626        if (err) {
1627                ct_dbg("Failed to set fte_id register mapping");
1628                goto err_mapping;
1629        }
1630
1631        /* If original flow is decap, we do it before going into ct table
1632         * so add a rewrite for the tunnel match_id.
1633         */
1634        if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1635            attr->chain == 0) {
1636                u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1637
1638                err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1639                                                ct_priv->ns_type,
1640                                                TUNNEL_TO_REG,
1641                                                tun_id);
1642                if (err) {
1643                        ct_dbg("Failed to set tunnel register mapping");
1644                        goto err_mapping;
1645                }
1646        }
1647
1648        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1649                                           pre_mod_acts.num_actions,
1650                                           pre_mod_acts.actions);
1651        if (IS_ERR(mod_hdr)) {
1652                err = PTR_ERR(mod_hdr);
1653                ct_dbg("Failed to create pre ct mod hdr");
1654                goto err_mapping;
1655        }
1656        pre_ct_attr->modify_hdr = mod_hdr;
1657
1658        /* Post ct rule matches on fte_id and executes original rule's
1659         * tc rule action
1660         */
1661        mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1662                                    fte_id, MLX5_FTE_ID_MASK);
1663
1664        /* Put post_ct rule on post_ct flow table */
1665        ct_flow->post_ct_attr->chain = 0;
1666        ct_flow->post_ct_attr->prio = 0;
1667        ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1668
1669        ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1670        ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1671        ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1672        rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1673                                   ct_flow->post_ct_attr);
1674        ct_flow->post_ct_rule = rule;
1675        if (IS_ERR(ct_flow->post_ct_rule)) {
1676                err = PTR_ERR(ct_flow->post_ct_rule);
1677                ct_dbg("Failed to add post ct rule");
1678                goto err_insert_post_ct;
1679        }
1680
1681        /* Change original rule point to ct table */
1682        pre_ct_attr->dest_chain = 0;
1683        pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1684        ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1685                                                   pre_ct_attr);
1686        if (IS_ERR(ct_flow->pre_ct_rule)) {
1687                err = PTR_ERR(ct_flow->pre_ct_rule);
1688                ct_dbg("Failed to add pre ct rule");
1689                goto err_insert_orig;
1690        }
1691
1692        attr->ct_attr.ct_flow = ct_flow;
1693        dealloc_mod_hdr_actions(&pre_mod_acts);
1694        kfree(post_ct_spec);
1695
1696        return rule;
1697
1698err_insert_orig:
1699        mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1700                            ct_flow->post_ct_attr);
1701err_insert_post_ct:
1702        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1703err_mapping:
1704        dealloc_mod_hdr_actions(&pre_mod_acts);
1705        mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1706err_get_chain:
1707        kfree(ct_flow->post_ct_attr);
1708err_alloc_post:
1709        kfree(ct_flow->pre_ct_attr);
1710err_alloc_pre:
1711        idr_remove(&ct_priv->fte_ids, fte_id);
1712err_idr:
1713        mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1714err_ft:
1715        kfree(post_ct_spec);
1716        kfree(ct_flow);
1717        netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1718        return ERR_PTR(err);
1719}
1720
1721static struct mlx5_flow_handle *
1722__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1723                                struct mlx5_flow_spec *orig_spec,
1724                                struct mlx5_flow_attr *attr,
1725                                struct mlx5e_tc_mod_hdr_acts *mod_acts)
1726{
1727        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1728        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1729        struct mlx5_flow_attr *pre_ct_attr;
1730        struct mlx5_modify_hdr *mod_hdr;
1731        struct mlx5_flow_handle *rule;
1732        struct mlx5_ct_flow *ct_flow;
1733        int err;
1734
1735        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1736        if (!ct_flow)
1737                return ERR_PTR(-ENOMEM);
1738
1739        /* Base esw attributes on original rule attribute */
1740        pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1741        if (!pre_ct_attr) {
1742                err = -ENOMEM;
1743                goto err_attr;
1744        }
1745
1746        memcpy(pre_ct_attr, attr, attr_sz);
1747
1748        err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1749        if (err) {
1750                ct_dbg("Failed to set register for ct clear");
1751                goto err_set_registers;
1752        }
1753
1754        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1755                                           mod_acts->num_actions,
1756                                           mod_acts->actions);
1757        if (IS_ERR(mod_hdr)) {
1758                err = PTR_ERR(mod_hdr);
1759                ct_dbg("Failed to add create ct clear mod hdr");
1760                goto err_set_registers;
1761        }
1762
1763        dealloc_mod_hdr_actions(mod_acts);
1764        pre_ct_attr->modify_hdr = mod_hdr;
1765        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1766
1767        rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1768        if (IS_ERR(rule)) {
1769                err = PTR_ERR(rule);
1770                ct_dbg("Failed to add ct clear rule");
1771                goto err_insert;
1772        }
1773
1774        attr->ct_attr.ct_flow = ct_flow;
1775        ct_flow->pre_ct_attr = pre_ct_attr;
1776        ct_flow->pre_ct_rule = rule;
1777        return rule;
1778
1779err_insert:
1780        mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1781err_set_registers:
1782        netdev_warn(priv->netdev,
1783                    "Failed to offload ct clear flow, err %d\n", err);
1784        kfree(pre_ct_attr);
1785err_attr:
1786        kfree(ct_flow);
1787
1788        return ERR_PTR(err);
1789}
1790
1791struct mlx5_flow_handle *
1792mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1793                        struct mlx5e_tc_flow *flow,
1794                        struct mlx5_flow_spec *spec,
1795                        struct mlx5_flow_attr *attr,
1796                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1797{
1798        bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1799        struct mlx5_flow_handle *rule;
1800
1801        if (!priv)
1802                return ERR_PTR(-EOPNOTSUPP);
1803
1804        mutex_lock(&priv->control_lock);
1805
1806        if (clear_action)
1807                rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1808        else
1809                rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1810        mutex_unlock(&priv->control_lock);
1811
1812        return rule;
1813}
1814
1815static void
1816__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1817                         struct mlx5e_tc_flow *flow,
1818                         struct mlx5_ct_flow *ct_flow)
1819{
1820        struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1821        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1822
1823        mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1824                            pre_ct_attr);
1825        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1826
1827        if (ct_flow->post_ct_rule) {
1828                mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1829                                    ct_flow->post_ct_attr);
1830                mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1831                idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1832                mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1833        }
1834
1835        kfree(ct_flow->pre_ct_attr);
1836        kfree(ct_flow->post_ct_attr);
1837        kfree(ct_flow);
1838}
1839
1840void
1841mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1842                       struct mlx5e_tc_flow *flow,
1843                       struct mlx5_flow_attr *attr)
1844{
1845        struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1846
1847        /* We are called on error to clean up stuff from parsing
1848         * but we don't have anything for now
1849         */
1850        if (!ct_flow)
1851                return;
1852
1853        mutex_lock(&priv->control_lock);
1854        __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1855        mutex_unlock(&priv->control_lock);
1856}
1857
1858static int
1859mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1860                                  const char **err_msg)
1861{
1862        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1863                *err_msg = "firmware level support is missing";
1864                return -EOPNOTSUPP;
1865        }
1866
1867        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1868                /* vlan workaround should be avoided for multi chain rules.
1869                 * This is just a sanity check as pop vlan action should
1870                 * be supported by any FW that supports ignore_flow_level
1871                 */
1872
1873                *err_msg = "firmware vlan actions support is missing";
1874                return -EOPNOTSUPP;
1875        }
1876
1877        if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1878                                    fdb_modify_header_fwd_to_table)) {
1879                /* CT always writes to registers which are mod header actions.
1880                 * Therefore, mod header and goto is required
1881                 */
1882
1883                *err_msg = "firmware fwd and modify support is missing";
1884                return -EOPNOTSUPP;
1885        }
1886
1887        if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1888                *err_msg = "register loopback isn't supported";
1889                return -EOPNOTSUPP;
1890        }
1891
1892        return 0;
1893}
1894
1895static int
1896mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
1897                                  const char **err_msg)
1898{
1899        if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
1900                *err_msg = "firmware level support is missing";
1901                return -EOPNOTSUPP;
1902        }
1903
1904        return 0;
1905}
1906
1907static int
1908mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
1909                              enum mlx5_flow_namespace_type ns_type,
1910                              const char **err_msg)
1911{
1912        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1913
1914#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1915        /* cannot restore chain ID on HW miss */
1916
1917        *err_msg = "tc skb extension missing";
1918        return -EOPNOTSUPP;
1919#endif
1920        if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
1921                return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
1922        else
1923                return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
1924}
1925
1926#define INIT_ERR_PREFIX "tc ct offload init failed"
1927
1928struct mlx5_tc_ct_priv *
1929mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
1930                struct mod_hdr_tbl *mod_hdr,
1931                enum mlx5_flow_namespace_type ns_type)
1932{
1933        struct mlx5_tc_ct_priv *ct_priv;
1934        struct mlx5_core_dev *dev;
1935        const char *msg;
1936        int err;
1937
1938        dev = priv->mdev;
1939        err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
1940        if (err) {
1941                mlx5_core_warn(dev,
1942                               "tc ct offload not supported, %s\n",
1943                               msg);
1944                goto err_support;
1945        }
1946
1947        ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1948        if (!ct_priv)
1949                goto err_alloc;
1950
1951        ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1952        if (IS_ERR(ct_priv->zone_mapping)) {
1953                err = PTR_ERR(ct_priv->zone_mapping);
1954                goto err_mapping_zone;
1955        }
1956
1957        ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1958        if (IS_ERR(ct_priv->labels_mapping)) {
1959                err = PTR_ERR(ct_priv->labels_mapping);
1960                goto err_mapping_labels;
1961        }
1962
1963        ct_priv->ns_type = ns_type;
1964        ct_priv->chains = chains;
1965        ct_priv->netdev = priv->netdev;
1966        ct_priv->dev = priv->mdev;
1967        ct_priv->mod_hdr_tbl = mod_hdr;
1968        ct_priv->ct = mlx5_chains_create_global_table(chains);
1969        if (IS_ERR(ct_priv->ct)) {
1970                err = PTR_ERR(ct_priv->ct);
1971                mlx5_core_warn(dev,
1972                               "%s, failed to create ct table err: %d\n",
1973                               INIT_ERR_PREFIX, err);
1974                goto err_ct_tbl;
1975        }
1976
1977        ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
1978        if (IS_ERR(ct_priv->ct_nat)) {
1979                err = PTR_ERR(ct_priv->ct_nat);
1980                mlx5_core_warn(dev,
1981                               "%s, failed to create ct nat table err: %d\n",
1982                               INIT_ERR_PREFIX, err);
1983                goto err_ct_nat_tbl;
1984        }
1985
1986        ct_priv->post_ct = mlx5_chains_create_global_table(chains);
1987        if (IS_ERR(ct_priv->post_ct)) {
1988                err = PTR_ERR(ct_priv->post_ct);
1989                mlx5_core_warn(dev,
1990                               "%s, failed to create post ct table err: %d\n",
1991                               INIT_ERR_PREFIX, err);
1992                goto err_post_ct_tbl;
1993        }
1994
1995        idr_init(&ct_priv->fte_ids);
1996        mutex_init(&ct_priv->control_lock);
1997        mutex_init(&ct_priv->shared_counter_lock);
1998        rhashtable_init(&ct_priv->zone_ht, &zone_params);
1999        rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2000        rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2001
2002        return ct_priv;
2003
2004err_post_ct_tbl:
2005        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2006err_ct_nat_tbl:
2007        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2008err_ct_tbl:
2009        mapping_destroy(ct_priv->labels_mapping);
2010err_mapping_labels:
2011        mapping_destroy(ct_priv->zone_mapping);
2012err_mapping_zone:
2013        kfree(ct_priv);
2014err_alloc:
2015err_support:
2016
2017        return NULL;
2018}
2019
2020void
2021mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2022{
2023        struct mlx5_fs_chains *chains;
2024
2025        if (!ct_priv)
2026                return;
2027
2028        chains = ct_priv->chains;
2029
2030        mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2031        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2032        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2033        mapping_destroy(ct_priv->zone_mapping);
2034        mapping_destroy(ct_priv->labels_mapping);
2035
2036        rhashtable_destroy(&ct_priv->ct_tuples_ht);
2037        rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2038        rhashtable_destroy(&ct_priv->zone_ht);
2039        mutex_destroy(&ct_priv->control_lock);
2040        mutex_destroy(&ct_priv->shared_counter_lock);
2041        idr_destroy(&ct_priv->fte_ids);
2042        kfree(ct_priv);
2043}
2044
2045bool
2046mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2047                         struct sk_buff *skb, u8 zone_restore_id)
2048{
2049        struct mlx5_ct_tuple tuple = {};
2050        struct mlx5_ct_entry *entry;
2051        u16 zone;
2052
2053        if (!ct_priv || !zone_restore_id)
2054                return true;
2055
2056        if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2057                return false;
2058
2059        if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2060                return false;
2061
2062        entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
2063                                       tuples_ht_params);
2064        if (!entry)
2065                entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
2066                                               &tuple, tuples_nat_ht_params);
2067        if (!entry)
2068                return false;
2069
2070        tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2071        return true;
2072}
2073