linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <net/netfilter/nf_conntrack.h>
   5#include <net/netfilter/nf_conntrack_core.h>
   6#include <net/netfilter/nf_conntrack_zones.h>
   7#include <net/netfilter/nf_conntrack_labels.h>
   8#include <net/netfilter/nf_conntrack_helper.h>
   9#include <net/netfilter/nf_conntrack_acct.h>
  10#include <uapi/linux/tc_act/tc_pedit.h>
  11#include <net/tc_act/tc_ct.h>
  12#include <net/flow_offload.h>
  13#include <net/netfilter/nf_flow_table.h>
  14#include <linux/workqueue.h>
  15#include <linux/xarray.h>
  16
  17#include "esw/chains.h"
  18#include "en/tc_ct.h"
  19#include "en/mod_hdr.h"
  20#include "en/mapping.h"
  21#include "en.h"
  22#include "en_tc.h"
  23#include "en_rep.h"
  24
  25#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
  26#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
  27#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
  28#define MLX5_CT_STATE_TRK_BIT BIT(2)
  29#define MLX5_CT_STATE_NAT_BIT BIT(3)
  30
  31#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
  32#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
  33#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
  34
  35#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
  36#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
  37
  38#define ct_dbg(fmt, args...)\
  39        netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
  40
  41struct mlx5_tc_ct_priv {
  42        struct mlx5_eswitch *esw;
  43        const struct net_device *netdev;
  44        struct idr fte_ids;
  45        struct xarray tuple_ids;
  46        struct rhashtable zone_ht;
  47        struct rhashtable ct_tuples_ht;
  48        struct rhashtable ct_tuples_nat_ht;
  49        struct mlx5_flow_table *ct;
  50        struct mlx5_flow_table *ct_nat;
  51        struct mlx5_flow_table *post_ct;
  52        struct mutex control_lock; /* guards parallel adds/dels */
  53        struct mapping_ctx *zone_mapping;
  54        struct mapping_ctx *labels_mapping;
  55};
  56
  57struct mlx5_ct_flow {
  58        struct mlx5_esw_flow_attr pre_ct_attr;
  59        struct mlx5_esw_flow_attr post_ct_attr;
  60        struct mlx5_flow_handle *pre_ct_rule;
  61        struct mlx5_flow_handle *post_ct_rule;
  62        struct mlx5_ct_ft *ft;
  63        u32 fte_id;
  64        u32 chain_mapping;
  65};
  66
  67struct mlx5_ct_zone_rule {
  68        struct mlx5_flow_handle *rule;
  69        struct mlx5e_mod_hdr_handle *mh;
  70        struct mlx5_esw_flow_attr attr;
  71        bool nat;
  72};
  73
  74struct mlx5_tc_ct_pre {
  75        struct mlx5_flow_table *fdb;
  76        struct mlx5_flow_group *flow_grp;
  77        struct mlx5_flow_group *miss_grp;
  78        struct mlx5_flow_handle *flow_rule;
  79        struct mlx5_flow_handle *miss_rule;
  80        struct mlx5_modify_hdr *modify_hdr;
  81};
  82
  83struct mlx5_ct_ft {
  84        struct rhash_head node;
  85        u16 zone;
  86        u32 zone_restore_id;
  87        refcount_t refcount;
  88        struct nf_flowtable *nf_ft;
  89        struct mlx5_tc_ct_priv *ct_priv;
  90        struct rhashtable ct_entries_ht;
  91        struct mlx5_tc_ct_pre pre_ct;
  92        struct mlx5_tc_ct_pre pre_ct_nat;
  93};
  94
  95struct mlx5_ct_tuple {
  96        u16 addr_type;
  97        __be16 n_proto;
  98        u8 ip_proto;
  99        struct {
 100                union {
 101                        __be32 src_v4;
 102                        struct in6_addr src_v6;
 103                };
 104                union {
 105                        __be32 dst_v4;
 106                        struct in6_addr dst_v6;
 107                };
 108        } ip;
 109        struct {
 110                __be16 src;
 111                __be16 dst;
 112        } port;
 113
 114        u16 zone;
 115};
 116
 117struct mlx5_ct_entry {
 118        struct rhash_head node;
 119        struct rhash_head tuple_node;
 120        struct rhash_head tuple_nat_node;
 121        struct mlx5_fc *counter;
 122        unsigned long cookie;
 123        unsigned long restore_cookie;
 124        struct mlx5_ct_tuple tuple;
 125        struct mlx5_ct_tuple tuple_nat;
 126        struct mlx5_ct_zone_rule zone_rules[2];
 127};
 128
 129static const struct rhashtable_params cts_ht_params = {
 130        .head_offset = offsetof(struct mlx5_ct_entry, node),
 131        .key_offset = offsetof(struct mlx5_ct_entry, cookie),
 132        .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
 133        .automatic_shrinking = true,
 134        .min_size = 16 * 1024,
 135};
 136
 137static const struct rhashtable_params zone_params = {
 138        .head_offset = offsetof(struct mlx5_ct_ft, node),
 139        .key_offset = offsetof(struct mlx5_ct_ft, zone),
 140        .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
 141        .automatic_shrinking = true,
 142};
 143
 144static const struct rhashtable_params tuples_ht_params = {
 145        .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
 146        .key_offset = offsetof(struct mlx5_ct_entry, tuple),
 147        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
 148        .automatic_shrinking = true,
 149        .min_size = 16 * 1024,
 150};
 151
 152static const struct rhashtable_params tuples_nat_ht_params = {
 153        .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
 154        .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
 155        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
 156        .automatic_shrinking = true,
 157        .min_size = 16 * 1024,
 158};
 159
 160static struct mlx5_tc_ct_priv *
 161mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
 162{
 163        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 164        struct mlx5_rep_uplink_priv *uplink_priv;
 165        struct mlx5e_rep_priv *uplink_rpriv;
 166
 167        uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
 168        uplink_priv = &uplink_rpriv->uplink_priv;
 169        return uplink_priv->ct_priv;
 170}
 171
 172static int
 173mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
 174{
 175        struct flow_match_control control;
 176        struct flow_match_basic basic;
 177
 178        flow_rule_match_basic(rule, &basic);
 179        flow_rule_match_control(rule, &control);
 180
 181        tuple->n_proto = basic.key->n_proto;
 182        tuple->ip_proto = basic.key->ip_proto;
 183        tuple->addr_type = control.key->addr_type;
 184
 185        if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 186                struct flow_match_ipv4_addrs match;
 187
 188                flow_rule_match_ipv4_addrs(rule, &match);
 189                tuple->ip.src_v4 = match.key->src;
 190                tuple->ip.dst_v4 = match.key->dst;
 191        } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 192                struct flow_match_ipv6_addrs match;
 193
 194                flow_rule_match_ipv6_addrs(rule, &match);
 195                tuple->ip.src_v6 = match.key->src;
 196                tuple->ip.dst_v6 = match.key->dst;
 197        } else {
 198                return -EOPNOTSUPP;
 199        }
 200
 201        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 202                struct flow_match_ports match;
 203
 204                flow_rule_match_ports(rule, &match);
 205                switch (tuple->ip_proto) {
 206                case IPPROTO_TCP:
 207                case IPPROTO_UDP:
 208                        tuple->port.src = match.key->src;
 209                        tuple->port.dst = match.key->dst;
 210                        break;
 211                default:
 212                        return -EOPNOTSUPP;
 213                }
 214        } else {
 215                return -EOPNOTSUPP;
 216        }
 217
 218        return 0;
 219}
 220
 221static int
 222mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 223                             struct flow_rule *rule)
 224{
 225        struct flow_action *flow_action = &rule->action;
 226        struct flow_action_entry *act;
 227        u32 offset, val, ip6_offset;
 228        int i;
 229
 230        flow_action_for_each(i, act, flow_action) {
 231                if (act->id != FLOW_ACTION_MANGLE)
 232                        continue;
 233
 234                offset = act->mangle.offset;
 235                val = act->mangle.val;
 236                switch (act->mangle.htype) {
 237                case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 238                        if (offset == offsetof(struct iphdr, saddr))
 239                                tuple->ip.src_v4 = cpu_to_be32(val);
 240                        else if (offset == offsetof(struct iphdr, daddr))
 241                                tuple->ip.dst_v4 = cpu_to_be32(val);
 242                        else
 243                                return -EOPNOTSUPP;
 244                        break;
 245
 246                case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 247                        ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
 248                        ip6_offset /= 4;
 249                        if (ip6_offset < 4)
 250                                tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
 251                        else if (ip6_offset < 8)
 252                                tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
 253                        else
 254                                return -EOPNOTSUPP;
 255                        break;
 256
 257                case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 258                        if (offset == offsetof(struct tcphdr, source))
 259                                tuple->port.src = cpu_to_be16(val);
 260                        else if (offset == offsetof(struct tcphdr, dest))
 261                                tuple->port.dst = cpu_to_be16(val);
 262                        else
 263                                return -EOPNOTSUPP;
 264                        break;
 265
 266                case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 267                        if (offset == offsetof(struct udphdr, source))
 268                                tuple->port.src = cpu_to_be16(val);
 269                        else if (offset == offsetof(struct udphdr, dest))
 270                                tuple->port.dst = cpu_to_be16(val);
 271                        else
 272                                return -EOPNOTSUPP;
 273                        break;
 274
 275                default:
 276                        return -EOPNOTSUPP;
 277                }
 278        }
 279
 280        return 0;
 281}
 282
 283static int
 284mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 285                           struct flow_rule *rule)
 286{
 287        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 288                                       outer_headers);
 289        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 290                                       outer_headers);
 291        u16 addr_type = 0;
 292        u8 ip_proto = 0;
 293
 294        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 295                struct flow_match_basic match;
 296
 297                flow_rule_match_basic(rule, &match);
 298
 299                mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
 300                                       headers_v);
 301                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 302                         match.mask->ip_proto);
 303                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 304                         match.key->ip_proto);
 305
 306                ip_proto = match.key->ip_proto;
 307        }
 308
 309        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 310                struct flow_match_control match;
 311
 312                flow_rule_match_control(rule, &match);
 313                addr_type = match.key->addr_type;
 314        }
 315
 316        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 317                struct flow_match_ipv4_addrs match;
 318
 319                flow_rule_match_ipv4_addrs(rule, &match);
 320                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 321                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 322                       &match.mask->src, sizeof(match.mask->src));
 323                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 324                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 325                       &match.key->src, sizeof(match.key->src));
 326                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 327                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 328                       &match.mask->dst, sizeof(match.mask->dst));
 329                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 330                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 331                       &match.key->dst, sizeof(match.key->dst));
 332        }
 333
 334        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 335                struct flow_match_ipv6_addrs match;
 336
 337                flow_rule_match_ipv6_addrs(rule, &match);
 338                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 339                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 340                       &match.mask->src, sizeof(match.mask->src));
 341                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 342                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 343                       &match.key->src, sizeof(match.key->src));
 344
 345                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 346                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 347                       &match.mask->dst, sizeof(match.mask->dst));
 348                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 349                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 350                       &match.key->dst, sizeof(match.key->dst));
 351        }
 352
 353        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 354                struct flow_match_ports match;
 355
 356                flow_rule_match_ports(rule, &match);
 357                switch (ip_proto) {
 358                case IPPROTO_TCP:
 359                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 360                                 tcp_sport, ntohs(match.mask->src));
 361                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 362                                 tcp_sport, ntohs(match.key->src));
 363
 364                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 365                                 tcp_dport, ntohs(match.mask->dst));
 366                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 367                                 tcp_dport, ntohs(match.key->dst));
 368                        break;
 369
 370                case IPPROTO_UDP:
 371                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 372                                 udp_sport, ntohs(match.mask->src));
 373                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 374                                 udp_sport, ntohs(match.key->src));
 375
 376                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 377                                 udp_dport, ntohs(match.mask->dst));
 378                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 379                                 udp_dport, ntohs(match.key->dst));
 380                        break;
 381                default:
 382                        break;
 383                }
 384        }
 385
 386        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
 387                struct flow_match_tcp match;
 388
 389                flow_rule_match_tcp(rule, &match);
 390                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 391                         ntohs(match.mask->flags));
 392                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
 393                         ntohs(match.key->flags));
 394        }
 395
 396        return 0;
 397}
 398
 399static void
 400mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 401                          struct mlx5_ct_entry *entry,
 402                          bool nat)
 403{
 404        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 405        struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
 406        struct mlx5_eswitch *esw = ct_priv->esw;
 407
 408        ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 409
 410        mlx5_eswitch_del_offloaded_rule(esw, zone_rule->rule, attr);
 411        mlx5e_mod_hdr_detach(ct_priv->esw->dev,
 412                             &esw->offloads.mod_hdr, zone_rule->mh);
 413        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 414}
 415
 416static void
 417mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 418                           struct mlx5_ct_entry *entry)
 419{
 420        mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 421        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 422
 423        mlx5_fc_destroy(ct_priv->esw->dev, entry->counter);
 424}
 425
 426static struct flow_action_entry *
 427mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
 428{
 429        struct flow_action *flow_action = &flow_rule->action;
 430        struct flow_action_entry *act;
 431        int i;
 432
 433        flow_action_for_each(i, act, flow_action) {
 434                if (act->id == FLOW_ACTION_CT_METADATA)
 435                        return act;
 436        }
 437
 438        return NULL;
 439}
 440
 441static int
 442mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
 443                               struct mlx5e_tc_mod_hdr_acts *mod_acts,
 444                               u8 ct_state,
 445                               u32 mark,
 446                               u32 labels_id,
 447                               u8 zone_restore_id)
 448{
 449        struct mlx5_eswitch *esw = ct_priv->esw;
 450        int err;
 451
 452        err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
 453                                        CTSTATE_TO_REG, ct_state);
 454        if (err)
 455                return err;
 456
 457        err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
 458                                        MARK_TO_REG, mark);
 459        if (err)
 460                return err;
 461
 462        err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
 463                                        LABELS_TO_REG, labels_id);
 464        if (err)
 465                return err;
 466
 467        err = mlx5e_tc_match_to_reg_set(esw->dev, mod_acts,
 468                                        ZONE_RESTORE_TO_REG, zone_restore_id);
 469        if (err)
 470                return err;
 471
 472        return 0;
 473}
 474
 475static int
 476mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
 477                                   char *modact)
 478{
 479        u32 offset = act->mangle.offset, field;
 480
 481        switch (act->mangle.htype) {
 482        case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 483                MLX5_SET(set_action_in, modact, length, 0);
 484                if (offset == offsetof(struct iphdr, saddr))
 485                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
 486                else if (offset == offsetof(struct iphdr, daddr))
 487                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
 488                else
 489                        return -EOPNOTSUPP;
 490                break;
 491
 492        case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 493                MLX5_SET(set_action_in, modact, length, 0);
 494                if (offset == offsetof(struct ipv6hdr, saddr) + 12)
 495                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
 496                else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
 497                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
 498                else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
 499                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
 500                else if (offset == offsetof(struct ipv6hdr, saddr))
 501                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
 502                else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
 503                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
 504                else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
 505                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
 506                else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
 507                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
 508                else if (offset == offsetof(struct ipv6hdr, daddr))
 509                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
 510                else
 511                        return -EOPNOTSUPP;
 512                break;
 513
 514        case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 515                MLX5_SET(set_action_in, modact, length, 16);
 516                if (offset == offsetof(struct tcphdr, source))
 517                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
 518                else if (offset == offsetof(struct tcphdr, dest))
 519                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
 520                else
 521                        return -EOPNOTSUPP;
 522                break;
 523
 524        case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 525                MLX5_SET(set_action_in, modact, length, 16);
 526                if (offset == offsetof(struct udphdr, source))
 527                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
 528                else if (offset == offsetof(struct udphdr, dest))
 529                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
 530                else
 531                        return -EOPNOTSUPP;
 532                break;
 533
 534        default:
 535                return -EOPNOTSUPP;
 536        }
 537
 538        MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 539        MLX5_SET(set_action_in, modact, offset, 0);
 540        MLX5_SET(set_action_in, modact, field, field);
 541        MLX5_SET(set_action_in, modact, data, act->mangle.val);
 542
 543        return 0;
 544}
 545
 546static int
 547mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 548                            struct flow_rule *flow_rule,
 549                            struct mlx5e_tc_mod_hdr_acts *mod_acts)
 550{
 551        struct flow_action *flow_action = &flow_rule->action;
 552        struct mlx5_core_dev *mdev = ct_priv->esw->dev;
 553        struct flow_action_entry *act;
 554        size_t action_size;
 555        char *modact;
 556        int err, i;
 557
 558        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 559
 560        flow_action_for_each(i, act, flow_action) {
 561                switch (act->id) {
 562                case FLOW_ACTION_MANGLE: {
 563                        err = alloc_mod_hdr_actions(mdev,
 564                                                    MLX5_FLOW_NAMESPACE_FDB,
 565                                                    mod_acts);
 566                        if (err)
 567                                return err;
 568
 569                        modact = mod_acts->actions +
 570                                 mod_acts->num_actions * action_size;
 571
 572                        err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
 573                        if (err)
 574                                return err;
 575
 576                        mod_acts->num_actions++;
 577                }
 578                break;
 579
 580                case FLOW_ACTION_CT_METADATA:
 581                        /* Handled earlier */
 582                        continue;
 583                default:
 584                        return -EOPNOTSUPP;
 585                }
 586        }
 587
 588        return 0;
 589}
 590
 591static int
 592mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 593                                struct mlx5_esw_flow_attr *attr,
 594                                struct flow_rule *flow_rule,
 595                                struct mlx5e_mod_hdr_handle **mh,
 596                                u8 zone_restore_id, bool nat)
 597{
 598        struct mlx5e_tc_mod_hdr_acts mod_acts = {};
 599        struct flow_action_entry *meta;
 600        u16 ct_state = 0;
 601        int err;
 602
 603        meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 604        if (!meta)
 605                return -EOPNOTSUPP;
 606
 607        err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
 608                          &attr->ct_attr.ct_labels_id);
 609        if (err)
 610                return -EOPNOTSUPP;
 611        if (nat) {
 612                err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
 613                                                  &mod_acts);
 614                if (err)
 615                        goto err_mapping;
 616
 617                ct_state |= MLX5_CT_STATE_NAT_BIT;
 618        }
 619
 620        ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
 621        err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
 622                                             ct_state,
 623                                             meta->ct_metadata.mark,
 624                                             attr->ct_attr.ct_labels_id,
 625                                             zone_restore_id);
 626        if (err)
 627                goto err_mapping;
 628
 629        *mh = mlx5e_mod_hdr_attach(ct_priv->esw->dev,
 630                                   &ct_priv->esw->offloads.mod_hdr,
 631                                   MLX5_FLOW_NAMESPACE_FDB,
 632                                   &mod_acts);
 633        if (IS_ERR(*mh)) {
 634                err = PTR_ERR(*mh);
 635                goto err_mapping;
 636        }
 637        attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 638
 639        dealloc_mod_hdr_actions(&mod_acts);
 640        return 0;
 641
 642err_mapping:
 643        dealloc_mod_hdr_actions(&mod_acts);
 644        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 645        return err;
 646}
 647
 648static int
 649mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 650                          struct flow_rule *flow_rule,
 651                          struct mlx5_ct_entry *entry,
 652                          bool nat, u8 zone_restore_id)
 653{
 654        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 655        struct mlx5_esw_flow_attr *attr = &zone_rule->attr;
 656        struct mlx5_eswitch *esw = ct_priv->esw;
 657        struct mlx5_flow_spec *spec = NULL;
 658        int err;
 659
 660        zone_rule->nat = nat;
 661
 662        spec = kzalloc(sizeof(*spec), GFP_KERNEL);
 663        if (!spec)
 664                return -ENOMEM;
 665
 666        err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
 667                                              &zone_rule->mh,
 668                                              zone_restore_id, nat);
 669        if (err) {
 670                ct_dbg("Failed to create ct entry mod hdr");
 671                goto err_mod_hdr;
 672        }
 673
 674        attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 675                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 676                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
 677        attr->dest_chain = 0;
 678        attr->dest_ft = ct_priv->post_ct;
 679        attr->fdb = nat ? ct_priv->ct_nat : ct_priv->ct;
 680        attr->outer_match_level = MLX5_MATCH_L4;
 681        attr->counter = entry->counter;
 682        attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 683
 684        mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
 685        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
 686                                    entry->tuple.zone & MLX5_CT_ZONE_MASK,
 687                                    MLX5_CT_ZONE_MASK);
 688
 689        zone_rule->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 690        if (IS_ERR(zone_rule->rule)) {
 691                err = PTR_ERR(zone_rule->rule);
 692                ct_dbg("Failed to add ct entry rule, nat: %d", nat);
 693                goto err_rule;
 694        }
 695
 696        kfree(spec);
 697        ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 698
 699        return 0;
 700
 701err_rule:
 702        mlx5e_mod_hdr_detach(ct_priv->esw->dev,
 703                             &esw->offloads.mod_hdr, zone_rule->mh);
 704        mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
 705err_mod_hdr:
 706        kfree(spec);
 707        return err;
 708}
 709
 710static int
 711mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 712                           struct flow_rule *flow_rule,
 713                           struct mlx5_ct_entry *entry,
 714                           u8 zone_restore_id)
 715{
 716        struct mlx5_eswitch *esw = ct_priv->esw;
 717        int err;
 718
 719        entry->counter = mlx5_fc_create(esw->dev, true);
 720        if (IS_ERR(entry->counter)) {
 721                err = PTR_ERR(entry->counter);
 722                ct_dbg("Failed to create counter for ct entry");
 723                return err;
 724        }
 725
 726        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
 727                                        zone_restore_id);
 728        if (err)
 729                goto err_orig;
 730
 731        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
 732                                        zone_restore_id);
 733        if (err)
 734                goto err_nat;
 735
 736        return 0;
 737
 738err_nat:
 739        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 740err_orig:
 741        mlx5_fc_destroy(esw->dev, entry->counter);
 742        return err;
 743}
 744
 745static int
 746mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 747                                  struct flow_cls_offload *flow)
 748{
 749        struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
 750        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
 751        struct flow_action_entry *meta_action;
 752        unsigned long cookie = flow->cookie;
 753        struct mlx5_ct_entry *entry;
 754        int err;
 755
 756        meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 757        if (!meta_action)
 758                return -EOPNOTSUPP;
 759
 760        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 761                                       cts_ht_params);
 762        if (entry)
 763                return 0;
 764
 765        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 766        if (!entry)
 767                return -ENOMEM;
 768
 769        entry->tuple.zone = ft->zone;
 770        entry->cookie = flow->cookie;
 771        entry->restore_cookie = meta_action->ct_metadata.cookie;
 772
 773        err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
 774        if (err)
 775                goto err_set;
 776
 777        memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
 778        err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
 779        if (err)
 780                goto err_set;
 781
 782        err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
 783                                     &entry->tuple_node,
 784                                     tuples_ht_params);
 785        if (err)
 786                goto err_tuple;
 787
 788        if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
 789                err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
 790                                             &entry->tuple_nat_node,
 791                                             tuples_nat_ht_params);
 792                if (err)
 793                        goto err_tuple_nat;
 794        }
 795
 796        err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
 797                                         ft->zone_restore_id);
 798        if (err)
 799                goto err_rules;
 800
 801        err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
 802                                     cts_ht_params);
 803        if (err)
 804                goto err_insert;
 805
 806        return 0;
 807
 808err_insert:
 809        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 810err_rules:
 811        rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 812                               &entry->tuple_nat_node, tuples_nat_ht_params);
 813err_tuple_nat:
 814        if (entry->tuple_node.next)
 815                rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
 816                                       &entry->tuple_node,
 817                                       tuples_ht_params);
 818err_tuple:
 819err_set:
 820        kfree(entry);
 821        netdev_warn(ct_priv->netdev,
 822                    "Failed to offload ct entry, err: %d\n", err);
 823        return err;
 824}
 825
 826static void
 827mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
 828                        struct mlx5_ct_entry *entry)
 829{
 830        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 831        if (entry->tuple_node.next)
 832                rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 833                                       &entry->tuple_nat_node,
 834                                       tuples_nat_ht_params);
 835        rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 836                               tuples_ht_params);
 837}
 838
 839static int
 840mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
 841                                  struct flow_cls_offload *flow)
 842{
 843        unsigned long cookie = flow->cookie;
 844        struct mlx5_ct_entry *entry;
 845
 846        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 847                                       cts_ht_params);
 848        if (!entry)
 849                return -ENOENT;
 850
 851        mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
 852        WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
 853                                       &entry->node,
 854                                       cts_ht_params));
 855        kfree(entry);
 856
 857        return 0;
 858}
 859
 860static int
 861mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
 862                                    struct flow_cls_offload *f)
 863{
 864        unsigned long cookie = f->cookie;
 865        struct mlx5_ct_entry *entry;
 866        u64 lastuse, packets, bytes;
 867
 868        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
 869                                       cts_ht_params);
 870        if (!entry)
 871                return -ENOENT;
 872
 873        mlx5_fc_query_cached(entry->counter, &bytes, &packets, &lastuse);
 874        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
 875                          FLOW_ACTION_HW_STATS_DELAYED);
 876
 877        return 0;
 878}
 879
 880static int
 881mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
 882                              void *cb_priv)
 883{
 884        struct flow_cls_offload *f = type_data;
 885        struct mlx5_ct_ft *ft = cb_priv;
 886
 887        if (type != TC_SETUP_CLSFLOWER)
 888                return -EOPNOTSUPP;
 889
 890        switch (f->command) {
 891        case FLOW_CLS_REPLACE:
 892                return mlx5_tc_ct_block_flow_offload_add(ft, f);
 893        case FLOW_CLS_DESTROY:
 894                return mlx5_tc_ct_block_flow_offload_del(ft, f);
 895        case FLOW_CLS_STATS:
 896                return mlx5_tc_ct_block_flow_offload_stats(ft, f);
 897        default:
 898                break;
 899        }
 900
 901        return -EOPNOTSUPP;
 902}
 903
 904static bool
 905mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
 906                        u16 zone)
 907{
 908        struct flow_keys flow_keys;
 909
 910        skb_reset_network_header(skb);
 911        skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
 912
 913        tuple->zone = zone;
 914
 915        if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
 916            flow_keys.basic.ip_proto != IPPROTO_UDP)
 917                return false;
 918
 919        tuple->port.src = flow_keys.ports.src;
 920        tuple->port.dst = flow_keys.ports.dst;
 921        tuple->n_proto = flow_keys.basic.n_proto;
 922        tuple->ip_proto = flow_keys.basic.ip_proto;
 923
 924        switch (flow_keys.basic.n_proto) {
 925        case htons(ETH_P_IP):
 926                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 927                tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
 928                tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
 929                break;
 930
 931        case htons(ETH_P_IPV6):
 932                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 933                tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
 934                tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
 935                break;
 936        default:
 937                goto out;
 938        }
 939
 940        return true;
 941
 942out:
 943        return false;
 944}
 945
 946int
 947mlx5_tc_ct_add_no_trk_match(struct mlx5e_priv *priv,
 948                            struct mlx5_flow_spec *spec)
 949{
 950        u32 ctstate = 0, ctstate_mask = 0;
 951
 952        mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
 953                                        &ctstate, &ctstate_mask);
 954        if (ctstate_mask)
 955                return -EOPNOTSUPP;
 956
 957        ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
 958        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
 959                                    ctstate, ctstate_mask);
 960
 961        return 0;
 962}
 963
 964void mlx5_tc_ct_match_del(struct mlx5e_priv *priv, struct mlx5_ct_attr *ct_attr)
 965{
 966        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 967
 968        if (!ct_priv || !ct_attr->ct_labels_id)
 969                return;
 970
 971        mapping_remove(ct_priv->labels_mapping, ct_attr->ct_labels_id);
 972}
 973
 974int
 975mlx5_tc_ct_match_add(struct mlx5e_priv *priv,
 976                     struct mlx5_flow_spec *spec,
 977                     struct flow_cls_offload *f,
 978                     struct mlx5_ct_attr *ct_attr,
 979                     struct netlink_ext_ack *extack)
 980{
 981        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
 982        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 983        struct flow_dissector_key_ct *mask, *key;
 984        bool trk, est, untrk, unest, new;
 985        u32 ctstate = 0, ctstate_mask = 0;
 986        u16 ct_state_on, ct_state_off;
 987        u16 ct_state, ct_state_mask;
 988        struct flow_match_ct match;
 989        u32 ct_labels[4];
 990
 991        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
 992                return 0;
 993
 994        if (!ct_priv) {
 995                NL_SET_ERR_MSG_MOD(extack,
 996                                   "offload of ct matching isn't available");
 997                return -EOPNOTSUPP;
 998        }
 999
1000        flow_rule_match_ct(rule, &match);
1001
1002        key = match.key;
1003        mask = match.mask;
1004
1005        ct_state = key->ct_state;
1006        ct_state_mask = mask->ct_state;
1007
1008        if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1009                              TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1010                              TCA_FLOWER_KEY_CT_FLAGS_NEW)) {
1011                NL_SET_ERR_MSG_MOD(extack,
1012                                   "only ct_state trk, est and new are supported for offload");
1013                return -EOPNOTSUPP;
1014        }
1015
1016        ct_state_on = ct_state & ct_state_mask;
1017        ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1018        trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1019        new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1020        est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1021        untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1022        unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1023
1024        ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1025        ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1026        ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1027        ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1028
1029        if (new) {
1030                NL_SET_ERR_MSG_MOD(extack,
1031                                   "matching on ct_state +new isn't supported");
1032                return -EOPNOTSUPP;
1033        }
1034
1035        if (mask->ct_zone)
1036                mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1037                                            key->ct_zone, MLX5_CT_ZONE_MASK);
1038        if (ctstate_mask)
1039                mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1040                                            ctstate, ctstate_mask);
1041        if (mask->ct_mark)
1042                mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1043                                            key->ct_mark, mask->ct_mark);
1044        if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1045            mask->ct_labels[3]) {
1046                ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1047                ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1048                ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1049                ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1050                if (mapping_add(ct_priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1051                        return -EOPNOTSUPP;
1052                mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1053                                            MLX5_CT_LABELS_MASK);
1054        }
1055
1056        return 0;
1057}
1058
1059int
1060mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
1061                        struct mlx5_esw_flow_attr *attr,
1062                        const struct flow_action_entry *act,
1063                        struct netlink_ext_ack *extack)
1064{
1065        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1066
1067        if (!ct_priv) {
1068                NL_SET_ERR_MSG_MOD(extack,
1069                                   "offload of ct action isn't available");
1070                return -EOPNOTSUPP;
1071        }
1072
1073        attr->ct_attr.zone = act->ct.zone;
1074        attr->ct_attr.ct_action = act->ct.action;
1075        attr->ct_attr.nf_ft = act->ct.flow_table;
1076
1077        return 0;
1078}
1079
1080static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1081                                  struct mlx5_tc_ct_pre *pre_ct,
1082                                  bool nat)
1083{
1084        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1085        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1086        struct mlx5_core_dev *dev = ct_priv->esw->dev;
1087        struct mlx5_flow_table *fdb = pre_ct->fdb;
1088        struct mlx5_flow_destination dest = {};
1089        struct mlx5_flow_act flow_act = {};
1090        struct mlx5_modify_hdr *mod_hdr;
1091        struct mlx5_flow_handle *rule;
1092        struct mlx5_flow_spec *spec;
1093        u32 ctstate;
1094        u16 zone;
1095        int err;
1096
1097        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1098        if (!spec)
1099                return -ENOMEM;
1100
1101        zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1102        err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
1103        if (err) {
1104                ct_dbg("Failed to set zone register mapping");
1105                goto err_mapping;
1106        }
1107
1108        mod_hdr = mlx5_modify_header_alloc(dev,
1109                                           MLX5_FLOW_NAMESPACE_FDB,
1110                                           pre_mod_acts.num_actions,
1111                                           pre_mod_acts.actions);
1112
1113        if (IS_ERR(mod_hdr)) {
1114                err = PTR_ERR(mod_hdr);
1115                ct_dbg("Failed to create pre ct mod hdr");
1116                goto err_mapping;
1117        }
1118        pre_ct->modify_hdr = mod_hdr;
1119
1120        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1121                          MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1122        flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1123        flow_act.modify_hdr = mod_hdr;
1124        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1125
1126        /* add flow rule */
1127        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1128                                    zone, MLX5_CT_ZONE_MASK);
1129        ctstate = MLX5_CT_STATE_TRK_BIT;
1130        if (nat)
1131                ctstate |= MLX5_CT_STATE_NAT_BIT;
1132        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1133
1134        dest.ft = ct_priv->post_ct;
1135        rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1136        if (IS_ERR(rule)) {
1137                err = PTR_ERR(rule);
1138                ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1139                goto err_flow_rule;
1140        }
1141        pre_ct->flow_rule = rule;
1142
1143        /* add miss rule */
1144        memset(spec, 0, sizeof(*spec));
1145        dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1146        rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
1147        if (IS_ERR(rule)) {
1148                err = PTR_ERR(rule);
1149                ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1150                goto err_miss_rule;
1151        }
1152        pre_ct->miss_rule = rule;
1153
1154        dealloc_mod_hdr_actions(&pre_mod_acts);
1155        kvfree(spec);
1156        return 0;
1157
1158err_miss_rule:
1159        mlx5_del_flow_rules(pre_ct->flow_rule);
1160err_flow_rule:
1161        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1162err_mapping:
1163        dealloc_mod_hdr_actions(&pre_mod_acts);
1164        kvfree(spec);
1165        return err;
1166}
1167
1168static void
1169tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1170                       struct mlx5_tc_ct_pre *pre_ct)
1171{
1172        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1173        struct mlx5_core_dev *dev = ct_priv->esw->dev;
1174
1175        mlx5_del_flow_rules(pre_ct->flow_rule);
1176        mlx5_del_flow_rules(pre_ct->miss_rule);
1177        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1178}
1179
1180static int
1181mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1182                        struct mlx5_tc_ct_pre *pre_ct,
1183                        bool nat)
1184{
1185        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1186        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1187        struct mlx5_core_dev *dev = ct_priv->esw->dev;
1188        struct mlx5_flow_table_attr ft_attr = {};
1189        struct mlx5_flow_namespace *ns;
1190        struct mlx5_flow_table *ft;
1191        struct mlx5_flow_group *g;
1192        u32 metadata_reg_c_2_mask;
1193        u32 *flow_group_in;
1194        void *misc;
1195        int err;
1196
1197        ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
1198        if (!ns) {
1199                err = -EOPNOTSUPP;
1200                ct_dbg("Failed to get FDB flow namespace");
1201                return err;
1202        }
1203
1204        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1205        if (!flow_group_in)
1206                return -ENOMEM;
1207
1208        ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1209        ft_attr.prio = FDB_TC_OFFLOAD;
1210        ft_attr.max_fte = 2;
1211        ft_attr.level = 1;
1212        ft = mlx5_create_flow_table(ns, &ft_attr);
1213        if (IS_ERR(ft)) {
1214                err = PTR_ERR(ft);
1215                ct_dbg("Failed to create pre ct table");
1216                goto out_free;
1217        }
1218        pre_ct->fdb = ft;
1219
1220        /* create flow group */
1221        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1222        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1223        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1224                 MLX5_MATCH_MISC_PARAMETERS_2);
1225
1226        misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1227                            match_criteria.misc_parameters_2);
1228
1229        metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1230        metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1231        if (nat)
1232                metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1233
1234        MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1235                 metadata_reg_c_2_mask);
1236
1237        g = mlx5_create_flow_group(ft, flow_group_in);
1238        if (IS_ERR(g)) {
1239                err = PTR_ERR(g);
1240                ct_dbg("Failed to create pre ct group");
1241                goto err_flow_grp;
1242        }
1243        pre_ct->flow_grp = g;
1244
1245        /* create miss group */
1246        memset(flow_group_in, 0, inlen);
1247        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1248        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1249        g = mlx5_create_flow_group(ft, flow_group_in);
1250        if (IS_ERR(g)) {
1251                err = PTR_ERR(g);
1252                ct_dbg("Failed to create pre ct miss group");
1253                goto err_miss_grp;
1254        }
1255        pre_ct->miss_grp = g;
1256
1257        err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1258        if (err)
1259                goto err_add_rules;
1260
1261        kvfree(flow_group_in);
1262        return 0;
1263
1264err_add_rules:
1265        mlx5_destroy_flow_group(pre_ct->miss_grp);
1266err_miss_grp:
1267        mlx5_destroy_flow_group(pre_ct->flow_grp);
1268err_flow_grp:
1269        mlx5_destroy_flow_table(ft);
1270out_free:
1271        kvfree(flow_group_in);
1272        return err;
1273}
1274
1275static void
1276mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1277                       struct mlx5_tc_ct_pre *pre_ct)
1278{
1279        tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1280        mlx5_destroy_flow_group(pre_ct->miss_grp);
1281        mlx5_destroy_flow_group(pre_ct->flow_grp);
1282        mlx5_destroy_flow_table(pre_ct->fdb);
1283}
1284
1285static int
1286mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1287{
1288        int err;
1289
1290        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1291        if (err)
1292                return err;
1293
1294        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1295        if (err)
1296                goto err_pre_ct_nat;
1297
1298        return 0;
1299
1300err_pre_ct_nat:
1301        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1302        return err;
1303}
1304
1305static void
1306mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1307{
1308        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1309        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1310}
1311
1312static struct mlx5_ct_ft *
1313mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1314                     struct nf_flowtable *nf_ft)
1315{
1316        struct mlx5_ct_ft *ft;
1317        int err;
1318
1319        ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1320        if (ft) {
1321                refcount_inc(&ft->refcount);
1322                return ft;
1323        }
1324
1325        ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1326        if (!ft)
1327                return ERR_PTR(-ENOMEM);
1328
1329        err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1330        if (err)
1331                goto err_mapping;
1332
1333        ft->zone = zone;
1334        ft->nf_ft = nf_ft;
1335        ft->ct_priv = ct_priv;
1336        refcount_set(&ft->refcount, 1);
1337
1338        err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1339        if (err)
1340                goto err_alloc_pre_ct;
1341
1342        err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1343        if (err)
1344                goto err_init;
1345
1346        err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1347                                     zone_params);
1348        if (err)
1349                goto err_insert;
1350
1351        err = nf_flow_table_offload_add_cb(ft->nf_ft,
1352                                           mlx5_tc_ct_block_flow_offload, ft);
1353        if (err)
1354                goto err_add_cb;
1355
1356        return ft;
1357
1358err_add_cb:
1359        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1360err_insert:
1361        rhashtable_destroy(&ft->ct_entries_ht);
1362err_init:
1363        mlx5_tc_ct_free_pre_ct_tables(ft);
1364err_alloc_pre_ct:
1365        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1366err_mapping:
1367        kfree(ft);
1368        return ERR_PTR(err);
1369}
1370
1371static void
1372mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1373{
1374        struct mlx5_tc_ct_priv *ct_priv = arg;
1375        struct mlx5_ct_entry *entry = ptr;
1376
1377        mlx5_tc_ct_del_ft_entry(ct_priv, entry);
1378        kfree(entry);
1379}
1380
1381static void
1382mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1383{
1384        if (!refcount_dec_and_test(&ft->refcount))
1385                return;
1386
1387        nf_flow_table_offload_del_cb(ft->nf_ft,
1388                                     mlx5_tc_ct_block_flow_offload, ft);
1389        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1390        rhashtable_free_and_destroy(&ft->ct_entries_ht,
1391                                    mlx5_tc_ct_flush_ft_entry,
1392                                    ct_priv);
1393        mlx5_tc_ct_free_pre_ct_tables(ft);
1394        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1395        kfree(ft);
1396}
1397
1398/* We translate the tc filter with CT action to the following HW model:
1399 *
1400 * +---------------------+
1401 * + fdb prio (tc chain) +
1402 * + original match      +
1403 * +---------------------+
1404 *      | set chain miss mapping
1405 *      | set fte_id
1406 *      | set tunnel_id
1407 *      | do decap
1408 *      v
1409 * +---------------------+
1410 * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1411 * + zone+nat match      +---------------->+ post_ct (see below) +
1412 * +---------------------+  set zone       +---------------------+
1413 *      | set zone
1414 *      v
1415 * +--------------------+
1416 * + CT (nat or no nat) +
1417 * + tuple + zone match +
1418 * +--------------------+
1419 *      | set mark
1420 *      | set labels_id
1421 *      | set established
1422 *      | set zone_restore
1423 *      | do nat (if needed)
1424 *      v
1425 * +--------------+
1426 * + post_ct      + original filter actions
1427 * + fte_id match +------------------------>
1428 * +--------------+
1429 */
1430static struct mlx5_flow_handle *
1431__mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1432                          struct mlx5e_tc_flow *flow,
1433                          struct mlx5_flow_spec *orig_spec,
1434                          struct mlx5_esw_flow_attr *attr)
1435{
1436        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1437        bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1438        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1439        struct mlx5_flow_spec *post_ct_spec = NULL;
1440        struct mlx5_eswitch *esw = ct_priv->esw;
1441        struct mlx5_esw_flow_attr *pre_ct_attr;
1442        struct mlx5_modify_hdr *mod_hdr;
1443        struct mlx5_flow_handle *rule;
1444        struct mlx5_ct_flow *ct_flow;
1445        int chain_mapping = 0, err;
1446        struct mlx5_ct_ft *ft;
1447        u32 fte_id = 1;
1448
1449        post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1450        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1451        if (!post_ct_spec || !ct_flow) {
1452                kfree(post_ct_spec);
1453                kfree(ct_flow);
1454                return ERR_PTR(-ENOMEM);
1455        }
1456
1457        /* Register for CT established events */
1458        ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1459                                  attr->ct_attr.nf_ft);
1460        if (IS_ERR(ft)) {
1461                err = PTR_ERR(ft);
1462                ct_dbg("Failed to register to ft callback");
1463                goto err_ft;
1464        }
1465        ct_flow->ft = ft;
1466
1467        err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1468                            MLX5_FTE_ID_MAX, GFP_KERNEL);
1469        if (err) {
1470                netdev_warn(priv->netdev,
1471                            "Failed to allocate fte id, err: %d\n", err);
1472                goto err_idr;
1473        }
1474        ct_flow->fte_id = fte_id;
1475
1476        /* Base esw attributes of both rules on original rule attribute */
1477        pre_ct_attr = &ct_flow->pre_ct_attr;
1478        memcpy(pre_ct_attr, attr, sizeof(*attr));
1479        memcpy(&ct_flow->post_ct_attr, attr, sizeof(*attr));
1480
1481        /* Modify the original rule's action to fwd and modify, leave decap */
1482        pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1483        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1484                               MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1485
1486        /* Write chain miss tag for miss in ct table as we
1487         * don't go though all prios of this chain as normal tc rules
1488         * miss.
1489         */
1490        err = mlx5_esw_chains_get_chain_mapping(esw, attr->chain,
1491                                                &chain_mapping);
1492        if (err) {
1493                ct_dbg("Failed to get chain register mapping for chain");
1494                goto err_get_chain;
1495        }
1496        ct_flow->chain_mapping = chain_mapping;
1497
1498        err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1499                                        CHAIN_TO_REG, chain_mapping);
1500        if (err) {
1501                ct_dbg("Failed to set chain register mapping");
1502                goto err_mapping;
1503        }
1504
1505        err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1506                                        FTEID_TO_REG, fte_id);
1507        if (err) {
1508                ct_dbg("Failed to set fte_id register mapping");
1509                goto err_mapping;
1510        }
1511
1512        /* If original flow is decap, we do it before going into ct table
1513         * so add a rewrite for the tunnel match_id.
1514         */
1515        if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1516            attr->chain == 0) {
1517                u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1518
1519                err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
1520                                                TUNNEL_TO_REG,
1521                                                tun_id);
1522                if (err) {
1523                        ct_dbg("Failed to set tunnel register mapping");
1524                        goto err_mapping;
1525                }
1526        }
1527
1528        mod_hdr = mlx5_modify_header_alloc(esw->dev,
1529                                           MLX5_FLOW_NAMESPACE_FDB,
1530                                           pre_mod_acts.num_actions,
1531                                           pre_mod_acts.actions);
1532        if (IS_ERR(mod_hdr)) {
1533                err = PTR_ERR(mod_hdr);
1534                ct_dbg("Failed to create pre ct mod hdr");
1535                goto err_mapping;
1536        }
1537        pre_ct_attr->modify_hdr = mod_hdr;
1538
1539        /* Post ct rule matches on fte_id and executes original rule's
1540         * tc rule action
1541         */
1542        mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1543                                    fte_id, MLX5_FTE_ID_MASK);
1544
1545        /* Put post_ct rule on post_ct fdb */
1546        ct_flow->post_ct_attr.chain = 0;
1547        ct_flow->post_ct_attr.prio = 0;
1548        ct_flow->post_ct_attr.fdb = ct_priv->post_ct;
1549
1550        ct_flow->post_ct_attr.inner_match_level = MLX5_MATCH_NONE;
1551        ct_flow->post_ct_attr.outer_match_level = MLX5_MATCH_NONE;
1552        ct_flow->post_ct_attr.action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1553        rule = mlx5_eswitch_add_offloaded_rule(esw, post_ct_spec,
1554                                               &ct_flow->post_ct_attr);
1555        ct_flow->post_ct_rule = rule;
1556        if (IS_ERR(ct_flow->post_ct_rule)) {
1557                err = PTR_ERR(ct_flow->post_ct_rule);
1558                ct_dbg("Failed to add post ct rule");
1559                goto err_insert_post_ct;
1560        }
1561
1562        /* Change original rule point to ct table */
1563        pre_ct_attr->dest_chain = 0;
1564        pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
1565        ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
1566                                                               orig_spec,
1567                                                               pre_ct_attr);
1568        if (IS_ERR(ct_flow->pre_ct_rule)) {
1569                err = PTR_ERR(ct_flow->pre_ct_rule);
1570                ct_dbg("Failed to add pre ct rule");
1571                goto err_insert_orig;
1572        }
1573
1574        attr->ct_attr.ct_flow = ct_flow;
1575        dealloc_mod_hdr_actions(&pre_mod_acts);
1576        kfree(post_ct_spec);
1577
1578        return rule;
1579
1580err_insert_orig:
1581        mlx5_eswitch_del_offloaded_rule(ct_priv->esw, ct_flow->post_ct_rule,
1582                                        &ct_flow->post_ct_attr);
1583err_insert_post_ct:
1584        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1585err_mapping:
1586        dealloc_mod_hdr_actions(&pre_mod_acts);
1587        mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1588err_get_chain:
1589        idr_remove(&ct_priv->fte_ids, fte_id);
1590err_idr:
1591        mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1592err_ft:
1593        kfree(post_ct_spec);
1594        kfree(ct_flow);
1595        netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1596        return ERR_PTR(err);
1597}
1598
1599static struct mlx5_flow_handle *
1600__mlx5_tc_ct_flow_offload_clear(struct mlx5e_priv *priv,
1601                                struct mlx5_flow_spec *orig_spec,
1602                                struct mlx5_esw_flow_attr *attr,
1603                                struct mlx5e_tc_mod_hdr_acts *mod_acts)
1604{
1605        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1606        struct mlx5_eswitch *esw = ct_priv->esw;
1607        struct mlx5_esw_flow_attr *pre_ct_attr;
1608        struct mlx5_modify_hdr *mod_hdr;
1609        struct mlx5_flow_handle *rule;
1610        struct mlx5_ct_flow *ct_flow;
1611        int err;
1612
1613        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1614        if (!ct_flow)
1615                return ERR_PTR(-ENOMEM);
1616
1617        /* Base esw attributes on original rule attribute */
1618        pre_ct_attr = &ct_flow->pre_ct_attr;
1619        memcpy(pre_ct_attr, attr, sizeof(*attr));
1620
1621        err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1622        if (err) {
1623                ct_dbg("Failed to set register for ct clear");
1624                goto err_set_registers;
1625        }
1626
1627        mod_hdr = mlx5_modify_header_alloc(esw->dev,
1628                                           MLX5_FLOW_NAMESPACE_FDB,
1629                                           mod_acts->num_actions,
1630                                           mod_acts->actions);
1631        if (IS_ERR(mod_hdr)) {
1632                err = PTR_ERR(mod_hdr);
1633                ct_dbg("Failed to add create ct clear mod hdr");
1634                goto err_set_registers;
1635        }
1636
1637        dealloc_mod_hdr_actions(mod_acts);
1638        pre_ct_attr->modify_hdr = mod_hdr;
1639        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1640
1641        rule = mlx5_eswitch_add_offloaded_rule(esw, orig_spec, pre_ct_attr);
1642        if (IS_ERR(rule)) {
1643                err = PTR_ERR(rule);
1644                ct_dbg("Failed to add ct clear rule");
1645                goto err_insert;
1646        }
1647
1648        attr->ct_attr.ct_flow = ct_flow;
1649        ct_flow->pre_ct_rule = rule;
1650        return rule;
1651
1652err_insert:
1653        mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1654err_set_registers:
1655        netdev_warn(priv->netdev,
1656                    "Failed to offload ct clear flow, err %d\n", err);
1657        return ERR_PTR(err);
1658}
1659
1660struct mlx5_flow_handle *
1661mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
1662                        struct mlx5e_tc_flow *flow,
1663                        struct mlx5_flow_spec *spec,
1664                        struct mlx5_esw_flow_attr *attr,
1665                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1666{
1667        bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1668        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1669        struct mlx5_flow_handle *rule;
1670
1671        if (!ct_priv)
1672                return ERR_PTR(-EOPNOTSUPP);
1673
1674        mutex_lock(&ct_priv->control_lock);
1675
1676        if (clear_action)
1677                rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1678        else
1679                rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1680        mutex_unlock(&ct_priv->control_lock);
1681
1682        return rule;
1683}
1684
1685static void
1686__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1687                         struct mlx5_ct_flow *ct_flow)
1688{
1689        struct mlx5_esw_flow_attr *pre_ct_attr = &ct_flow->pre_ct_attr;
1690        struct mlx5_eswitch *esw = ct_priv->esw;
1691
1692        mlx5_eswitch_del_offloaded_rule(esw, ct_flow->pre_ct_rule,
1693                                        pre_ct_attr);
1694        mlx5_modify_header_dealloc(esw->dev, pre_ct_attr->modify_hdr);
1695
1696        if (ct_flow->post_ct_rule) {
1697                mlx5_eswitch_del_offloaded_rule(esw, ct_flow->post_ct_rule,
1698                                                &ct_flow->post_ct_attr);
1699                mlx5_esw_chains_put_chain_mapping(esw, ct_flow->chain_mapping);
1700                idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1701                mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1702        }
1703
1704        kfree(ct_flow);
1705}
1706
1707void
1708mlx5_tc_ct_delete_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow,
1709                       struct mlx5_esw_flow_attr *attr)
1710{
1711        struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
1712        struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1713
1714        /* We are called on error to clean up stuff from parsing
1715         * but we don't have anything for now
1716         */
1717        if (!ct_flow)
1718                return;
1719
1720        mutex_lock(&ct_priv->control_lock);
1721        __mlx5_tc_ct_delete_flow(ct_priv, ct_flow);
1722        mutex_unlock(&ct_priv->control_lock);
1723}
1724
1725static int
1726mlx5_tc_ct_init_check_support(struct mlx5_eswitch *esw,
1727                              const char **err_msg)
1728{
1729#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1730        /* cannot restore chain ID on HW miss */
1731
1732        *err_msg = "tc skb extension missing";
1733        return -EOPNOTSUPP;
1734#endif
1735
1736        if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1737                *err_msg = "firmware level support is missing";
1738                return -EOPNOTSUPP;
1739        }
1740
1741        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1742                /* vlan workaround should be avoided for multi chain rules.
1743                 * This is just a sanity check as pop vlan action should
1744                 * be supported by any FW that supports ignore_flow_level
1745                 */
1746
1747                *err_msg = "firmware vlan actions support is missing";
1748                return -EOPNOTSUPP;
1749        }
1750
1751        if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1752                                    fdb_modify_header_fwd_to_table)) {
1753                /* CT always writes to registers which are mod header actions.
1754                 * Therefore, mod header and goto is required
1755                 */
1756
1757                *err_msg = "firmware fwd and modify support is missing";
1758                return -EOPNOTSUPP;
1759        }
1760
1761        if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
1762                *err_msg = "register loopback isn't supported";
1763                return -EOPNOTSUPP;
1764        }
1765
1766        return 0;
1767}
1768
1769static void
1770mlx5_tc_ct_init_err(struct mlx5e_rep_priv *rpriv, const char *msg, int err)
1771{
1772        if (msg)
1773                netdev_warn(rpriv->netdev,
1774                            "tc ct offload not supported, %s, err: %d\n",
1775                            msg, err);
1776        else
1777                netdev_warn(rpriv->netdev,
1778                            "tc ct offload not supported, err: %d\n",
1779                            err);
1780}
1781
1782int
1783mlx5_tc_ct_init(struct mlx5_rep_uplink_priv *uplink_priv)
1784{
1785        struct mlx5_tc_ct_priv *ct_priv;
1786        struct mlx5e_rep_priv *rpriv;
1787        struct mlx5_eswitch *esw;
1788        struct mlx5e_priv *priv;
1789        const char *msg;
1790        int err;
1791
1792        rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
1793        priv = netdev_priv(rpriv->netdev);
1794        esw = priv->mdev->priv.eswitch;
1795
1796        err = mlx5_tc_ct_init_check_support(esw, &msg);
1797        if (err) {
1798                mlx5_tc_ct_init_err(rpriv, msg, err);
1799                goto err_support;
1800        }
1801
1802        ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
1803        if (!ct_priv) {
1804                mlx5_tc_ct_init_err(rpriv, NULL, -ENOMEM);
1805                goto err_alloc;
1806        }
1807
1808        ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
1809        if (IS_ERR(ct_priv->zone_mapping)) {
1810                err = PTR_ERR(ct_priv->zone_mapping);
1811                goto err_mapping_zone;
1812        }
1813
1814        ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
1815        if (IS_ERR(ct_priv->labels_mapping)) {
1816                err = PTR_ERR(ct_priv->labels_mapping);
1817                goto err_mapping_labels;
1818        }
1819
1820        ct_priv->esw = esw;
1821        ct_priv->netdev = rpriv->netdev;
1822        ct_priv->ct = mlx5_esw_chains_create_global_table(esw);
1823        if (IS_ERR(ct_priv->ct)) {
1824                err = PTR_ERR(ct_priv->ct);
1825                mlx5_tc_ct_init_err(rpriv, "failed to create ct table", err);
1826                goto err_ct_tbl;
1827        }
1828
1829        ct_priv->ct_nat = mlx5_esw_chains_create_global_table(esw);
1830        if (IS_ERR(ct_priv->ct_nat)) {
1831                err = PTR_ERR(ct_priv->ct_nat);
1832                mlx5_tc_ct_init_err(rpriv, "failed to create ct nat table",
1833                                    err);
1834                goto err_ct_nat_tbl;
1835        }
1836
1837        ct_priv->post_ct = mlx5_esw_chains_create_global_table(esw);
1838        if (IS_ERR(ct_priv->post_ct)) {
1839                err = PTR_ERR(ct_priv->post_ct);
1840                mlx5_tc_ct_init_err(rpriv, "failed to create post ct table",
1841                                    err);
1842                goto err_post_ct_tbl;
1843        }
1844
1845        idr_init(&ct_priv->fte_ids);
1846        mutex_init(&ct_priv->control_lock);
1847        rhashtable_init(&ct_priv->zone_ht, &zone_params);
1848        rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
1849        rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
1850
1851        /* Done, set ct_priv to know it initializted */
1852        uplink_priv->ct_priv = ct_priv;
1853
1854        return 0;
1855
1856err_post_ct_tbl:
1857        mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct_nat);
1858err_ct_nat_tbl:
1859        mlx5_esw_chains_destroy_global_table(esw, ct_priv->ct);
1860err_ct_tbl:
1861        mapping_destroy(ct_priv->labels_mapping);
1862err_mapping_labels:
1863        mapping_destroy(ct_priv->zone_mapping);
1864err_mapping_zone:
1865        kfree(ct_priv);
1866err_alloc:
1867err_support:
1868
1869        return 0;
1870}
1871
1872void
1873mlx5_tc_ct_clean(struct mlx5_rep_uplink_priv *uplink_priv)
1874{
1875        struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1876
1877        if (!ct_priv)
1878                return;
1879
1880        mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->post_ct);
1881        mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct_nat);
1882        mlx5_esw_chains_destroy_global_table(ct_priv->esw, ct_priv->ct);
1883        mapping_destroy(ct_priv->zone_mapping);
1884        mapping_destroy(ct_priv->labels_mapping);
1885
1886        rhashtable_destroy(&ct_priv->ct_tuples_ht);
1887        rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
1888        rhashtable_destroy(&ct_priv->zone_ht);
1889        mutex_destroy(&ct_priv->control_lock);
1890        idr_destroy(&ct_priv->fte_ids);
1891        kfree(ct_priv);
1892
1893        uplink_priv->ct_priv = NULL;
1894}
1895
1896bool
1897mlx5e_tc_ct_restore_flow(struct mlx5_rep_uplink_priv *uplink_priv,
1898                         struct sk_buff *skb, u8 zone_restore_id)
1899{
1900        struct mlx5_tc_ct_priv *ct_priv = uplink_priv->ct_priv;
1901        struct mlx5_ct_tuple tuple = {};
1902        struct mlx5_ct_entry *entry;
1903        u16 zone;
1904
1905        if (!ct_priv || !zone_restore_id)
1906                return true;
1907
1908        if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
1909                return false;
1910
1911        if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
1912                return false;
1913
1914        entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
1915                                       tuples_ht_params);
1916        if (!entry)
1917                entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
1918                                               &tuple, tuples_nat_ht_params);
1919        if (!entry)
1920                return false;
1921
1922        tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
1923        return true;
1924}
1925