linux/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2/* Copyright (c) 2019 Mellanox Technologies. */
   3
   4#include <net/netfilter/nf_conntrack.h>
   5#include <net/netfilter/nf_conntrack_core.h>
   6#include <net/netfilter/nf_conntrack_zones.h>
   7#include <net/netfilter/nf_conntrack_labels.h>
   8#include <net/netfilter/nf_conntrack_helper.h>
   9#include <net/netfilter/nf_conntrack_acct.h>
  10#include <uapi/linux/tc_act/tc_pedit.h>
  11#include <net/tc_act/tc_ct.h>
  12#include <net/flow_offload.h>
  13#include <net/netfilter/nf_flow_table.h>
  14#include <linux/workqueue.h>
  15#include <linux/refcount.h>
  16#include <linux/xarray.h>
  17
  18#include "lib/fs_chains.h"
  19#include "en/tc_ct.h"
  20#include "en/mod_hdr.h"
  21#include "en/mapping.h"
  22#include "en/tc/post_act.h"
  23#include "en.h"
  24#include "en_tc.h"
  25#include "en_rep.h"
  26
  27#define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen)
  28#define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
  29#define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
  30#define MLX5_CT_STATE_TRK_BIT BIT(2)
  31#define MLX5_CT_STATE_NAT_BIT BIT(3)
  32#define MLX5_CT_STATE_REPLY_BIT BIT(4)
  33#define MLX5_CT_STATE_RELATED_BIT BIT(5)
  34#define MLX5_CT_STATE_INVALID_BIT BIT(6)
  35
  36#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
  37#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
  38
  39#define ct_dbg(fmt, args...)\
  40        netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
  41
  42struct mlx5_tc_ct_priv {
  43        struct mlx5_core_dev *dev;
  44        const struct net_device *netdev;
  45        struct mod_hdr_tbl *mod_hdr_tbl;
  46        struct xarray tuple_ids;
  47        struct rhashtable zone_ht;
  48        struct rhashtable ct_tuples_ht;
  49        struct rhashtable ct_tuples_nat_ht;
  50        struct mlx5_flow_table *ct;
  51        struct mlx5_flow_table *ct_nat;
  52        struct mlx5e_post_act *post_act;
  53        struct mutex control_lock; /* guards parallel adds/dels */
  54        struct mapping_ctx *zone_mapping;
  55        struct mapping_ctx *labels_mapping;
  56        enum mlx5_flow_namespace_type ns_type;
  57        struct mlx5_fs_chains *chains;
  58        spinlock_t ht_lock; /* protects ft entries */
  59};
  60
  61struct mlx5_ct_flow {
  62        struct mlx5_flow_attr *pre_ct_attr;
  63        struct mlx5_flow_handle *pre_ct_rule;
  64        struct mlx5e_post_act_handle *post_act_handle;
  65        struct mlx5_ct_ft *ft;
  66        u32 chain_mapping;
  67};
  68
  69struct mlx5_ct_zone_rule {
  70        struct mlx5_flow_handle *rule;
  71        struct mlx5e_mod_hdr_handle *mh;
  72        struct mlx5_flow_attr *attr;
  73        bool nat;
  74};
  75
  76struct mlx5_tc_ct_pre {
  77        struct mlx5_flow_table *ft;
  78        struct mlx5_flow_group *flow_grp;
  79        struct mlx5_flow_group *miss_grp;
  80        struct mlx5_flow_handle *flow_rule;
  81        struct mlx5_flow_handle *miss_rule;
  82        struct mlx5_modify_hdr *modify_hdr;
  83};
  84
  85struct mlx5_ct_ft {
  86        struct rhash_head node;
  87        u16 zone;
  88        u32 zone_restore_id;
  89        refcount_t refcount;
  90        struct nf_flowtable *nf_ft;
  91        struct mlx5_tc_ct_priv *ct_priv;
  92        struct rhashtable ct_entries_ht;
  93        struct mlx5_tc_ct_pre pre_ct;
  94        struct mlx5_tc_ct_pre pre_ct_nat;
  95};
  96
  97struct mlx5_ct_tuple {
  98        u16 addr_type;
  99        __be16 n_proto;
 100        u8 ip_proto;
 101        struct {
 102                union {
 103                        __be32 src_v4;
 104                        struct in6_addr src_v6;
 105                };
 106                union {
 107                        __be32 dst_v4;
 108                        struct in6_addr dst_v6;
 109                };
 110        } ip;
 111        struct {
 112                __be16 src;
 113                __be16 dst;
 114        } port;
 115
 116        u16 zone;
 117};
 118
 119struct mlx5_ct_counter {
 120        struct mlx5_fc *counter;
 121        refcount_t refcount;
 122        bool is_shared;
 123};
 124
 125enum {
 126        MLX5_CT_ENTRY_FLAG_VALID,
 127};
 128
 129struct mlx5_ct_entry {
 130        struct rhash_head node;
 131        struct rhash_head tuple_node;
 132        struct rhash_head tuple_nat_node;
 133        struct mlx5_ct_counter *counter;
 134        unsigned long cookie;
 135        unsigned long restore_cookie;
 136        struct mlx5_ct_tuple tuple;
 137        struct mlx5_ct_tuple tuple_nat;
 138        struct mlx5_ct_zone_rule zone_rules[2];
 139
 140        struct mlx5_tc_ct_priv *ct_priv;
 141        struct work_struct work;
 142
 143        refcount_t refcnt;
 144        unsigned long flags;
 145};
 146
 147static void
 148mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 149                                 struct mlx5_flow_attr *attr,
 150                                 struct mlx5e_mod_hdr_handle *mh);
 151
 152static const struct rhashtable_params cts_ht_params = {
 153        .head_offset = offsetof(struct mlx5_ct_entry, node),
 154        .key_offset = offsetof(struct mlx5_ct_entry, cookie),
 155        .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
 156        .automatic_shrinking = true,
 157        .min_size = 16 * 1024,
 158};
 159
 160static const struct rhashtable_params zone_params = {
 161        .head_offset = offsetof(struct mlx5_ct_ft, node),
 162        .key_offset = offsetof(struct mlx5_ct_ft, zone),
 163        .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
 164        .automatic_shrinking = true,
 165};
 166
 167static const struct rhashtable_params tuples_ht_params = {
 168        .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
 169        .key_offset = offsetof(struct mlx5_ct_entry, tuple),
 170        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
 171        .automatic_shrinking = true,
 172        .min_size = 16 * 1024,
 173};
 174
 175static const struct rhashtable_params tuples_nat_ht_params = {
 176        .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
 177        .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
 178        .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
 179        .automatic_shrinking = true,
 180        .min_size = 16 * 1024,
 181};
 182
 183static bool
 184mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
 185{
 186        return !!(entry->tuple_nat_node.next);
 187}
 188
 189static int
 190mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
 191                       u32 *labels, u32 *id)
 192{
 193        if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
 194                *id = 0;
 195                return 0;
 196        }
 197
 198        if (mapping_add(ct_priv->labels_mapping, labels, id))
 199                return -EOPNOTSUPP;
 200
 201        return 0;
 202}
 203
 204static void
 205mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
 206{
 207        if (id)
 208                mapping_remove(ct_priv->labels_mapping, id);
 209}
 210
 211static int
 212mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
 213{
 214        struct flow_match_control control;
 215        struct flow_match_basic basic;
 216
 217        flow_rule_match_basic(rule, &basic);
 218        flow_rule_match_control(rule, &control);
 219
 220        tuple->n_proto = basic.key->n_proto;
 221        tuple->ip_proto = basic.key->ip_proto;
 222        tuple->addr_type = control.key->addr_type;
 223
 224        if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 225                struct flow_match_ipv4_addrs match;
 226
 227                flow_rule_match_ipv4_addrs(rule, &match);
 228                tuple->ip.src_v4 = match.key->src;
 229                tuple->ip.dst_v4 = match.key->dst;
 230        } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 231                struct flow_match_ipv6_addrs match;
 232
 233                flow_rule_match_ipv6_addrs(rule, &match);
 234                tuple->ip.src_v6 = match.key->src;
 235                tuple->ip.dst_v6 = match.key->dst;
 236        } else {
 237                return -EOPNOTSUPP;
 238        }
 239
 240        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 241                struct flow_match_ports match;
 242
 243                flow_rule_match_ports(rule, &match);
 244                switch (tuple->ip_proto) {
 245                case IPPROTO_TCP:
 246                case IPPROTO_UDP:
 247                        tuple->port.src = match.key->src;
 248                        tuple->port.dst = match.key->dst;
 249                        break;
 250                default:
 251                        return -EOPNOTSUPP;
 252                }
 253        } else {
 254                return -EOPNOTSUPP;
 255        }
 256
 257        return 0;
 258}
 259
 260static int
 261mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 262                             struct flow_rule *rule)
 263{
 264        struct flow_action *flow_action = &rule->action;
 265        struct flow_action_entry *act;
 266        u32 offset, val, ip6_offset;
 267        int i;
 268
 269        flow_action_for_each(i, act, flow_action) {
 270                if (act->id != FLOW_ACTION_MANGLE)
 271                        continue;
 272
 273                offset = act->mangle.offset;
 274                val = act->mangle.val;
 275                switch (act->mangle.htype) {
 276                case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 277                        if (offset == offsetof(struct iphdr, saddr))
 278                                tuple->ip.src_v4 = cpu_to_be32(val);
 279                        else if (offset == offsetof(struct iphdr, daddr))
 280                                tuple->ip.dst_v4 = cpu_to_be32(val);
 281                        else
 282                                return -EOPNOTSUPP;
 283                        break;
 284
 285                case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 286                        ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
 287                        ip6_offset /= 4;
 288                        if (ip6_offset < 4)
 289                                tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
 290                        else if (ip6_offset < 8)
 291                                tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
 292                        else
 293                                return -EOPNOTSUPP;
 294                        break;
 295
 296                case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 297                        if (offset == offsetof(struct tcphdr, source))
 298                                tuple->port.src = cpu_to_be16(val);
 299                        else if (offset == offsetof(struct tcphdr, dest))
 300                                tuple->port.dst = cpu_to_be16(val);
 301                        else
 302                                return -EOPNOTSUPP;
 303                        break;
 304
 305                case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 306                        if (offset == offsetof(struct udphdr, source))
 307                                tuple->port.src = cpu_to_be16(val);
 308                        else if (offset == offsetof(struct udphdr, dest))
 309                                tuple->port.dst = cpu_to_be16(val);
 310                        else
 311                                return -EOPNOTSUPP;
 312                        break;
 313
 314                default:
 315                        return -EOPNOTSUPP;
 316                }
 317        }
 318
 319        return 0;
 320}
 321
 322static int
 323mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 324                           struct flow_rule *rule)
 325{
 326        void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 327                                       outer_headers);
 328        void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
 329                                       outer_headers);
 330        u16 addr_type = 0;
 331        u8 ip_proto = 0;
 332
 333        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 334                struct flow_match_basic match;
 335
 336                flow_rule_match_basic(rule, &match);
 337
 338                mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
 339                                       headers_v);
 340                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 341                         match.mask->ip_proto);
 342                MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 343                         match.key->ip_proto);
 344
 345                ip_proto = match.key->ip_proto;
 346        }
 347
 348        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
 349                struct flow_match_control match;
 350
 351                flow_rule_match_control(rule, &match);
 352                addr_type = match.key->addr_type;
 353        }
 354
 355        if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 356                struct flow_match_ipv4_addrs match;
 357
 358                flow_rule_match_ipv4_addrs(rule, &match);
 359                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 360                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 361                       &match.mask->src, sizeof(match.mask->src));
 362                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 363                                    src_ipv4_src_ipv6.ipv4_layout.ipv4),
 364                       &match.key->src, sizeof(match.key->src));
 365                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 366                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 367                       &match.mask->dst, sizeof(match.mask->dst));
 368                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 369                                    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 370                       &match.key->dst, sizeof(match.key->dst));
 371        }
 372
 373        if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 374                struct flow_match_ipv6_addrs match;
 375
 376                flow_rule_match_ipv6_addrs(rule, &match);
 377                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 378                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 379                       &match.mask->src, sizeof(match.mask->src));
 380                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 381                                    src_ipv4_src_ipv6.ipv6_layout.ipv6),
 382                       &match.key->src, sizeof(match.key->src));
 383
 384                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
 385                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 386                       &match.mask->dst, sizeof(match.mask->dst));
 387                memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 388                                    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 389                       &match.key->dst, sizeof(match.key->dst));
 390        }
 391
 392        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 393                struct flow_match_ports match;
 394
 395                flow_rule_match_ports(rule, &match);
 396                switch (ip_proto) {
 397                case IPPROTO_TCP:
 398                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 399                                 tcp_sport, ntohs(match.mask->src));
 400                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 401                                 tcp_sport, ntohs(match.key->src));
 402
 403                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 404                                 tcp_dport, ntohs(match.mask->dst));
 405                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 406                                 tcp_dport, ntohs(match.key->dst));
 407                        break;
 408
 409                case IPPROTO_UDP:
 410                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 411                                 udp_sport, ntohs(match.mask->src));
 412                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 413                                 udp_sport, ntohs(match.key->src));
 414
 415                        MLX5_SET(fte_match_set_lyr_2_4, headers_c,
 416                                 udp_dport, ntohs(match.mask->dst));
 417                        MLX5_SET(fte_match_set_lyr_2_4, headers_v,
 418                                 udp_dport, ntohs(match.key->dst));
 419                        break;
 420                default:
 421                        break;
 422                }
 423        }
 424
 425        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
 426                struct flow_match_tcp match;
 427
 428                flow_rule_match_tcp(rule, &match);
 429                MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
 430                         ntohs(match.mask->flags));
 431                MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
 432                         ntohs(match.key->flags));
 433        }
 434
 435        return 0;
 436}
 437
 438static void
 439mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
 440{
 441        if (entry->counter->is_shared &&
 442            !refcount_dec_and_test(&entry->counter->refcount))
 443                return;
 444
 445        mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
 446        kfree(entry->counter);
 447}
 448
 449static void
 450mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
 451                          struct mlx5_ct_entry *entry,
 452                          bool nat)
 453{
 454        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 455        struct mlx5_flow_attr *attr = zone_rule->attr;
 456
 457        ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
 458
 459        mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
 460        mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
 461        mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 462        kfree(attr);
 463}
 464
 465static void
 466mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
 467                           struct mlx5_ct_entry *entry)
 468{
 469        mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
 470        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 471}
 472
 473static struct flow_action_entry *
 474mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
 475{
 476        struct flow_action *flow_action = &flow_rule->action;
 477        struct flow_action_entry *act;
 478        int i;
 479
 480        flow_action_for_each(i, act, flow_action) {
 481                if (act->id == FLOW_ACTION_CT_METADATA)
 482                        return act;
 483        }
 484
 485        return NULL;
 486}
 487
 488static int
 489mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
 490                               struct mlx5e_tc_mod_hdr_acts *mod_acts,
 491                               u8 ct_state,
 492                               u32 mark,
 493                               u32 labels_id,
 494                               u8 zone_restore_id)
 495{
 496        enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
 497        struct mlx5_core_dev *dev = ct_priv->dev;
 498        int err;
 499
 500        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 501                                        CTSTATE_TO_REG, ct_state);
 502        if (err)
 503                return err;
 504
 505        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 506                                        MARK_TO_REG, mark);
 507        if (err)
 508                return err;
 509
 510        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 511                                        LABELS_TO_REG, labels_id);
 512        if (err)
 513                return err;
 514
 515        err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 516                                        ZONE_RESTORE_TO_REG, zone_restore_id);
 517        if (err)
 518                return err;
 519
 520        /* Make another copy of zone id in reg_b for
 521         * NIC rx flows since we don't copy reg_c1 to
 522         * reg_b upon miss.
 523         */
 524        if (ns != MLX5_FLOW_NAMESPACE_FDB) {
 525                err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
 526                                                NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
 527                if (err)
 528                        return err;
 529        }
 530        return 0;
 531}
 532
 533static int
 534mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
 535                                   char *modact)
 536{
 537        u32 offset = act->mangle.offset, field;
 538
 539        switch (act->mangle.htype) {
 540        case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 541                MLX5_SET(set_action_in, modact, length, 0);
 542                if (offset == offsetof(struct iphdr, saddr))
 543                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
 544                else if (offset == offsetof(struct iphdr, daddr))
 545                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
 546                else
 547                        return -EOPNOTSUPP;
 548                break;
 549
 550        case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 551                MLX5_SET(set_action_in, modact, length, 0);
 552                if (offset == offsetof(struct ipv6hdr, saddr) + 12)
 553                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
 554                else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
 555                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
 556                else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
 557                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
 558                else if (offset == offsetof(struct ipv6hdr, saddr))
 559                        field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
 560                else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
 561                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
 562                else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
 563                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
 564                else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
 565                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
 566                else if (offset == offsetof(struct ipv6hdr, daddr))
 567                        field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
 568                else
 569                        return -EOPNOTSUPP;
 570                break;
 571
 572        case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 573                MLX5_SET(set_action_in, modact, length, 16);
 574                if (offset == offsetof(struct tcphdr, source))
 575                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
 576                else if (offset == offsetof(struct tcphdr, dest))
 577                        field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
 578                else
 579                        return -EOPNOTSUPP;
 580                break;
 581
 582        case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 583                MLX5_SET(set_action_in, modact, length, 16);
 584                if (offset == offsetof(struct udphdr, source))
 585                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
 586                else if (offset == offsetof(struct udphdr, dest))
 587                        field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
 588                else
 589                        return -EOPNOTSUPP;
 590                break;
 591
 592        default:
 593                return -EOPNOTSUPP;
 594        }
 595
 596        MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
 597        MLX5_SET(set_action_in, modact, offset, 0);
 598        MLX5_SET(set_action_in, modact, field, field);
 599        MLX5_SET(set_action_in, modact, data, act->mangle.val);
 600
 601        return 0;
 602}
 603
 604static int
 605mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 606                            struct flow_rule *flow_rule,
 607                            struct mlx5e_tc_mod_hdr_acts *mod_acts)
 608{
 609        struct flow_action *flow_action = &flow_rule->action;
 610        struct mlx5_core_dev *mdev = ct_priv->dev;
 611        struct flow_action_entry *act;
 612        size_t action_size;
 613        char *modact;
 614        int err, i;
 615
 616        action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 617
 618        flow_action_for_each(i, act, flow_action) {
 619                switch (act->id) {
 620                case FLOW_ACTION_MANGLE: {
 621                        err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
 622                                                    mod_acts);
 623                        if (err)
 624                                return err;
 625
 626                        modact = mod_acts->actions +
 627                                 mod_acts->num_actions * action_size;
 628
 629                        err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
 630                        if (err)
 631                                return err;
 632
 633                        mod_acts->num_actions++;
 634                }
 635                break;
 636
 637                case FLOW_ACTION_CT_METADATA:
 638                        /* Handled earlier */
 639                        continue;
 640                default:
 641                        return -EOPNOTSUPP;
 642                }
 643        }
 644
 645        return 0;
 646}
 647
 648static int
 649mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 650                                struct mlx5_flow_attr *attr,
 651                                struct flow_rule *flow_rule,
 652                                struct mlx5e_mod_hdr_handle **mh,
 653                                u8 zone_restore_id, bool nat)
 654{
 655        struct mlx5e_tc_mod_hdr_acts mod_acts = {};
 656        struct flow_action_entry *meta;
 657        u16 ct_state = 0;
 658        int err;
 659
 660        meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
 661        if (!meta)
 662                return -EOPNOTSUPP;
 663
 664        err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
 665                                     &attr->ct_attr.ct_labels_id);
 666        if (err)
 667                return -EOPNOTSUPP;
 668        if (nat) {
 669                err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
 670                                                  &mod_acts);
 671                if (err)
 672                        goto err_mapping;
 673
 674                ct_state |= MLX5_CT_STATE_NAT_BIT;
 675        }
 676
 677        ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
 678        ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
 679        err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
 680                                             ct_state,
 681                                             meta->ct_metadata.mark,
 682                                             attr->ct_attr.ct_labels_id,
 683                                             zone_restore_id);
 684        if (err)
 685                goto err_mapping;
 686
 687        if (nat) {
 688                attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
 689                                                            mod_acts.num_actions,
 690                                                            mod_acts.actions);
 691                if (IS_ERR(attr->modify_hdr)) {
 692                        err = PTR_ERR(attr->modify_hdr);
 693                        goto err_mapping;
 694                }
 695
 696                *mh = NULL;
 697        } else {
 698                *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
 699                                           ct_priv->mod_hdr_tbl,
 700                                           ct_priv->ns_type,
 701                                           &mod_acts);
 702                if (IS_ERR(*mh)) {
 703                        err = PTR_ERR(*mh);
 704                        goto err_mapping;
 705                }
 706                attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
 707        }
 708
 709        dealloc_mod_hdr_actions(&mod_acts);
 710        return 0;
 711
 712err_mapping:
 713        dealloc_mod_hdr_actions(&mod_acts);
 714        mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 715        return err;
 716}
 717
 718static void
 719mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 720                                 struct mlx5_flow_attr *attr,
 721                                 struct mlx5e_mod_hdr_handle *mh)
 722{
 723        if (mh)
 724                mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
 725        else
 726                mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
 727}
 728
 729static int
 730mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 731                          struct flow_rule *flow_rule,
 732                          struct mlx5_ct_entry *entry,
 733                          bool nat, u8 zone_restore_id)
 734{
 735        struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
 736        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
 737        struct mlx5_flow_spec *spec = NULL;
 738        struct mlx5_flow_attr *attr;
 739        int err;
 740
 741        zone_rule->nat = nat;
 742
 743        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
 744        if (!spec)
 745                return -ENOMEM;
 746
 747        attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
 748        if (!attr) {
 749                err = -ENOMEM;
 750                goto err_attr;
 751        }
 752
 753        err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
 754                                              &zone_rule->mh,
 755                                              zone_restore_id, nat);
 756        if (err) {
 757                ct_dbg("Failed to create ct entry mod hdr");
 758                goto err_mod_hdr;
 759        }
 760
 761        attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
 762                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 763                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
 764        attr->dest_chain = 0;
 765        attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
 766        attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
 767        attr->outer_match_level = MLX5_MATCH_L4;
 768        attr->counter = entry->counter->counter;
 769        attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 770        if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
 771                attr->esw_attr->in_mdev = priv->mdev;
 772
 773        mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
 774        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
 775
 776        zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
 777        if (IS_ERR(zone_rule->rule)) {
 778                err = PTR_ERR(zone_rule->rule);
 779                ct_dbg("Failed to add ct entry rule, nat: %d", nat);
 780                goto err_rule;
 781        }
 782
 783        zone_rule->attr = attr;
 784
 785        kvfree(spec);
 786        ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
 787
 788        return 0;
 789
 790err_rule:
 791        mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
 792        mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
 793err_mod_hdr:
 794        kfree(attr);
 795err_attr:
 796        kvfree(spec);
 797        return err;
 798}
 799
 800static bool
 801mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
 802{
 803        return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
 804}
 805
 806static struct mlx5_ct_entry *
 807mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
 808{
 809        struct mlx5_ct_entry *entry;
 810
 811        entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
 812                                       tuples_ht_params);
 813        if (entry && mlx5_tc_ct_entry_valid(entry) &&
 814            refcount_inc_not_zero(&entry->refcnt)) {
 815                return entry;
 816        } else if (!entry) {
 817                entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
 818                                               tuple, tuples_nat_ht_params);
 819                if (entry && mlx5_tc_ct_entry_valid(entry) &&
 820                    refcount_inc_not_zero(&entry->refcnt))
 821                        return entry;
 822        }
 823
 824        return entry ? ERR_PTR(-EINVAL) : NULL;
 825}
 826
 827static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
 828{
 829        struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
 830
 831        rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
 832                               &entry->tuple_nat_node,
 833                               tuples_nat_ht_params);
 834        rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
 835                               tuples_ht_params);
 836}
 837
 838static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
 839{
 840        struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
 841
 842        mlx5_tc_ct_entry_del_rules(ct_priv, entry);
 843
 844        spin_lock_bh(&ct_priv->ht_lock);
 845        mlx5_tc_ct_entry_remove_from_tuples(entry);
 846        spin_unlock_bh(&ct_priv->ht_lock);
 847
 848        mlx5_tc_ct_counter_put(ct_priv, entry);
 849        kfree(entry);
 850}
 851
 852static void
 853mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
 854{
 855        if (!refcount_dec_and_test(&entry->refcnt))
 856                return;
 857
 858        mlx5_tc_ct_entry_del(entry);
 859}
 860
 861static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
 862{
 863        struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
 864
 865        mlx5_tc_ct_entry_del(entry);
 866}
 867
 868static void
 869__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
 870{
 871        struct mlx5e_priv *priv;
 872
 873        if (!refcount_dec_and_test(&entry->refcnt))
 874                return;
 875
 876        priv = netdev_priv(entry->ct_priv->netdev);
 877        INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
 878        queue_work(priv->wq, &entry->work);
 879}
 880
 881static struct mlx5_ct_counter *
 882mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
 883{
 884        struct mlx5_ct_counter *counter;
 885        int ret;
 886
 887        counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 888        if (!counter)
 889                return ERR_PTR(-ENOMEM);
 890
 891        counter->is_shared = false;
 892        counter->counter = mlx5_fc_create(ct_priv->dev, true);
 893        if (IS_ERR(counter->counter)) {
 894                ct_dbg("Failed to create counter for ct entry");
 895                ret = PTR_ERR(counter->counter);
 896                kfree(counter);
 897                return ERR_PTR(ret);
 898        }
 899
 900        return counter;
 901}
 902
 903static struct mlx5_ct_counter *
 904mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
 905                              struct mlx5_ct_entry *entry)
 906{
 907        struct mlx5_ct_tuple rev_tuple = entry->tuple;
 908        struct mlx5_ct_counter *shared_counter;
 909        struct mlx5_ct_entry *rev_entry;
 910        __be16 tmp_port;
 911
 912        /* get the reversed tuple */
 913        tmp_port = rev_tuple.port.src;
 914        rev_tuple.port.src = rev_tuple.port.dst;
 915        rev_tuple.port.dst = tmp_port;
 916
 917        if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 918                __be32 tmp_addr = rev_tuple.ip.src_v4;
 919
 920                rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
 921                rev_tuple.ip.dst_v4 = tmp_addr;
 922        } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 923                struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
 924
 925                rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
 926                rev_tuple.ip.dst_v6 = tmp_addr;
 927        } else {
 928                return ERR_PTR(-EOPNOTSUPP);
 929        }
 930
 931        /* Use the same counter as the reverse direction */
 932        spin_lock_bh(&ct_priv->ht_lock);
 933        rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
 934
 935        if (IS_ERR(rev_entry)) {
 936                spin_unlock_bh(&ct_priv->ht_lock);
 937                goto create_counter;
 938        }
 939
 940        if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
 941                ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
 942                shared_counter = rev_entry->counter;
 943                spin_unlock_bh(&ct_priv->ht_lock);
 944
 945                mlx5_tc_ct_entry_put(rev_entry);
 946                return shared_counter;
 947        }
 948
 949        spin_unlock_bh(&ct_priv->ht_lock);
 950
 951create_counter:
 952
 953        shared_counter = mlx5_tc_ct_counter_create(ct_priv);
 954        if (IS_ERR(shared_counter))
 955                return shared_counter;
 956
 957        shared_counter->is_shared = true;
 958        refcount_set(&shared_counter->refcount, 1);
 959        return shared_counter;
 960}
 961
 962static int
 963mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
 964                           struct flow_rule *flow_rule,
 965                           struct mlx5_ct_entry *entry,
 966                           u8 zone_restore_id)
 967{
 968        int err;
 969
 970        if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
 971                entry->counter = mlx5_tc_ct_counter_create(ct_priv);
 972        else
 973                entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
 974
 975        if (IS_ERR(entry->counter)) {
 976                err = PTR_ERR(entry->counter);
 977                return err;
 978        }
 979
 980        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
 981                                        zone_restore_id);
 982        if (err)
 983                goto err_orig;
 984
 985        err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
 986                                        zone_restore_id);
 987        if (err)
 988                goto err_nat;
 989
 990        return 0;
 991
 992err_nat:
 993        mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
 994err_orig:
 995        mlx5_tc_ct_counter_put(ct_priv, entry);
 996        return err;
 997}
 998
 999static int
1000mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1001                                  struct flow_cls_offload *flow)
1002{
1003        struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1004        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1005        struct flow_action_entry *meta_action;
1006        unsigned long cookie = flow->cookie;
1007        struct mlx5_ct_entry *entry;
1008        int err;
1009
1010        meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1011        if (!meta_action)
1012                return -EOPNOTSUPP;
1013
1014        spin_lock_bh(&ct_priv->ht_lock);
1015        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1016        if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1017                spin_unlock_bh(&ct_priv->ht_lock);
1018                mlx5_tc_ct_entry_put(entry);
1019                return -EEXIST;
1020        }
1021        spin_unlock_bh(&ct_priv->ht_lock);
1022
1023        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1024        if (!entry)
1025                return -ENOMEM;
1026
1027        entry->tuple.zone = ft->zone;
1028        entry->cookie = flow->cookie;
1029        entry->restore_cookie = meta_action->ct_metadata.cookie;
1030        refcount_set(&entry->refcnt, 2);
1031        entry->ct_priv = ct_priv;
1032
1033        err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1034        if (err)
1035                goto err_set;
1036
1037        memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1038        err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1039        if (err)
1040                goto err_set;
1041
1042        spin_lock_bh(&ct_priv->ht_lock);
1043
1044        err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1045                                            cts_ht_params);
1046        if (err)
1047                goto err_entries;
1048
1049        err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1050                                            &entry->tuple_node,
1051                                            tuples_ht_params);
1052        if (err)
1053                goto err_tuple;
1054
1055        if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1056                err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1057                                                    &entry->tuple_nat_node,
1058                                                    tuples_nat_ht_params);
1059                if (err)
1060                        goto err_tuple_nat;
1061        }
1062        spin_unlock_bh(&ct_priv->ht_lock);
1063
1064        err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1065                                         ft->zone_restore_id);
1066        if (err)
1067                goto err_rules;
1068
1069        set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1070        mlx5_tc_ct_entry_put(entry); /* this function reference */
1071
1072        return 0;
1073
1074err_rules:
1075        spin_lock_bh(&ct_priv->ht_lock);
1076        if (mlx5_tc_ct_entry_has_nat(entry))
1077                rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1078                                       &entry->tuple_nat_node, tuples_nat_ht_params);
1079err_tuple_nat:
1080        rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1081                               &entry->tuple_node,
1082                               tuples_ht_params);
1083err_tuple:
1084        rhashtable_remove_fast(&ft->ct_entries_ht,
1085                               &entry->node,
1086                               cts_ht_params);
1087err_entries:
1088        spin_unlock_bh(&ct_priv->ht_lock);
1089err_set:
1090        kfree(entry);
1091        if (err != -EEXIST)
1092                netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1093        return err;
1094}
1095
1096static int
1097mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1098                                  struct flow_cls_offload *flow)
1099{
1100        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1101        unsigned long cookie = flow->cookie;
1102        struct mlx5_ct_entry *entry;
1103
1104        spin_lock_bh(&ct_priv->ht_lock);
1105        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1106        if (!entry) {
1107                spin_unlock_bh(&ct_priv->ht_lock);
1108                return -ENOENT;
1109        }
1110
1111        if (!mlx5_tc_ct_entry_valid(entry)) {
1112                spin_unlock_bh(&ct_priv->ht_lock);
1113                return -EINVAL;
1114        }
1115
1116        rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1117        mlx5_tc_ct_entry_remove_from_tuples(entry);
1118        spin_unlock_bh(&ct_priv->ht_lock);
1119
1120        mlx5_tc_ct_entry_put(entry);
1121
1122        return 0;
1123}
1124
1125static int
1126mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1127                                    struct flow_cls_offload *f)
1128{
1129        struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1130        unsigned long cookie = f->cookie;
1131        struct mlx5_ct_entry *entry;
1132        u64 lastuse, packets, bytes;
1133
1134        spin_lock_bh(&ct_priv->ht_lock);
1135        entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1136        if (!entry) {
1137                spin_unlock_bh(&ct_priv->ht_lock);
1138                return -ENOENT;
1139        }
1140
1141        if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1142                spin_unlock_bh(&ct_priv->ht_lock);
1143                return -EINVAL;
1144        }
1145
1146        spin_unlock_bh(&ct_priv->ht_lock);
1147
1148        mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1149        flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1150                          FLOW_ACTION_HW_STATS_DELAYED);
1151
1152        mlx5_tc_ct_entry_put(entry);
1153        return 0;
1154}
1155
1156static int
1157mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1158                              void *cb_priv)
1159{
1160        struct flow_cls_offload *f = type_data;
1161        struct mlx5_ct_ft *ft = cb_priv;
1162
1163        if (type != TC_SETUP_CLSFLOWER)
1164                return -EOPNOTSUPP;
1165
1166        switch (f->command) {
1167        case FLOW_CLS_REPLACE:
1168                return mlx5_tc_ct_block_flow_offload_add(ft, f);
1169        case FLOW_CLS_DESTROY:
1170                return mlx5_tc_ct_block_flow_offload_del(ft, f);
1171        case FLOW_CLS_STATS:
1172                return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1173        default:
1174                break;
1175        }
1176
1177        return -EOPNOTSUPP;
1178}
1179
1180static bool
1181mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1182                        u16 zone)
1183{
1184        struct flow_keys flow_keys;
1185
1186        skb_reset_network_header(skb);
1187        skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1188
1189        tuple->zone = zone;
1190
1191        if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1192            flow_keys.basic.ip_proto != IPPROTO_UDP)
1193                return false;
1194
1195        tuple->port.src = flow_keys.ports.src;
1196        tuple->port.dst = flow_keys.ports.dst;
1197        tuple->n_proto = flow_keys.basic.n_proto;
1198        tuple->ip_proto = flow_keys.basic.ip_proto;
1199
1200        switch (flow_keys.basic.n_proto) {
1201        case htons(ETH_P_IP):
1202                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1203                tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1204                tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1205                break;
1206
1207        case htons(ETH_P_IPV6):
1208                tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1209                tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1210                tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1211                break;
1212        default:
1213                goto out;
1214        }
1215
1216        return true;
1217
1218out:
1219        return false;
1220}
1221
1222int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1223{
1224        u32 ctstate = 0, ctstate_mask = 0;
1225
1226        mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1227                                        &ctstate, &ctstate_mask);
1228
1229        if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1230                return -EOPNOTSUPP;
1231
1232        ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1233        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1234                                    ctstate, ctstate_mask);
1235
1236        return 0;
1237}
1238
1239void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1240{
1241        if (!priv || !ct_attr->ct_labels_id)
1242                return;
1243
1244        mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1245}
1246
1247int
1248mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1249                     struct mlx5_flow_spec *spec,
1250                     struct flow_cls_offload *f,
1251                     struct mlx5_ct_attr *ct_attr,
1252                     struct netlink_ext_ack *extack)
1253{
1254        bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1255        struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1256        struct flow_dissector_key_ct *mask, *key;
1257        u32 ctstate = 0, ctstate_mask = 0;
1258        u16 ct_state_on, ct_state_off;
1259        u16 ct_state, ct_state_mask;
1260        struct flow_match_ct match;
1261        u32 ct_labels[4];
1262
1263        if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1264                return 0;
1265
1266        if (!priv) {
1267                NL_SET_ERR_MSG_MOD(extack,
1268                                   "offload of ct matching isn't available");
1269                return -EOPNOTSUPP;
1270        }
1271
1272        flow_rule_match_ct(rule, &match);
1273
1274        key = match.key;
1275        mask = match.mask;
1276
1277        ct_state = key->ct_state;
1278        ct_state_mask = mask->ct_state;
1279
1280        if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1281                              TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1282                              TCA_FLOWER_KEY_CT_FLAGS_NEW |
1283                              TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1284                              TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1285                              TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1286                NL_SET_ERR_MSG_MOD(extack,
1287                                   "only ct_state trk, est, new and rpl are supported for offload");
1288                return -EOPNOTSUPP;
1289        }
1290
1291        ct_state_on = ct_state & ct_state_mask;
1292        ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1293        trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1294        new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1295        est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1296        rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1297        rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1298        inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1299        untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1300        unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1301        unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1302        unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1303        uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1304
1305        ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1306        ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1307        ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1308        ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1309        ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1310        ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1311        ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1312        ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1313
1314        if (rel) {
1315                NL_SET_ERR_MSG_MOD(extack,
1316                                   "matching on ct_state +rel isn't supported");
1317                return -EOPNOTSUPP;
1318        }
1319
1320        if (inv) {
1321                NL_SET_ERR_MSG_MOD(extack,
1322                                   "matching on ct_state +inv isn't supported");
1323                return -EOPNOTSUPP;
1324        }
1325
1326        if (new) {
1327                NL_SET_ERR_MSG_MOD(extack,
1328                                   "matching on ct_state +new isn't supported");
1329                return -EOPNOTSUPP;
1330        }
1331
1332        if (mask->ct_zone)
1333                mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1334                                            key->ct_zone, MLX5_CT_ZONE_MASK);
1335        if (ctstate_mask)
1336                mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1337                                            ctstate, ctstate_mask);
1338        if (mask->ct_mark)
1339                mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1340                                            key->ct_mark, mask->ct_mark);
1341        if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1342            mask->ct_labels[3]) {
1343                ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1344                ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1345                ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1346                ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1347                if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1348                        return -EOPNOTSUPP;
1349                mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1350                                            MLX5_CT_LABELS_MASK);
1351        }
1352
1353        return 0;
1354}
1355
1356int
1357mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1358                        struct mlx5_flow_attr *attr,
1359                        const struct flow_action_entry *act,
1360                        struct netlink_ext_ack *extack)
1361{
1362        if (!priv) {
1363                NL_SET_ERR_MSG_MOD(extack,
1364                                   "offload of ct action isn't available");
1365                return -EOPNOTSUPP;
1366        }
1367
1368        attr->ct_attr.zone = act->ct.zone;
1369        attr->ct_attr.ct_action = act->ct.action;
1370        attr->ct_attr.nf_ft = act->ct.flow_table;
1371
1372        return 0;
1373}
1374
1375static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1376                                  struct mlx5_tc_ct_pre *pre_ct,
1377                                  bool nat)
1378{
1379        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1380        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1381        struct mlx5_core_dev *dev = ct_priv->dev;
1382        struct mlx5_flow_table *ft = pre_ct->ft;
1383        struct mlx5_flow_destination dest = {};
1384        struct mlx5_flow_act flow_act = {};
1385        struct mlx5_modify_hdr *mod_hdr;
1386        struct mlx5_flow_handle *rule;
1387        struct mlx5_flow_spec *spec;
1388        u32 ctstate;
1389        u16 zone;
1390        int err;
1391
1392        spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1393        if (!spec)
1394                return -ENOMEM;
1395
1396        zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1397        err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1398                                        ZONE_TO_REG, zone);
1399        if (err) {
1400                ct_dbg("Failed to set zone register mapping");
1401                goto err_mapping;
1402        }
1403
1404        mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1405                                           pre_mod_acts.num_actions,
1406                                           pre_mod_acts.actions);
1407
1408        if (IS_ERR(mod_hdr)) {
1409                err = PTR_ERR(mod_hdr);
1410                ct_dbg("Failed to create pre ct mod hdr");
1411                goto err_mapping;
1412        }
1413        pre_ct->modify_hdr = mod_hdr;
1414
1415        flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1416                          MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1417        flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1418        flow_act.modify_hdr = mod_hdr;
1419        dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1420
1421        /* add flow rule */
1422        mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1423                                    zone, MLX5_CT_ZONE_MASK);
1424        ctstate = MLX5_CT_STATE_TRK_BIT;
1425        if (nat)
1426                ctstate |= MLX5_CT_STATE_NAT_BIT;
1427        mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1428
1429        dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1430        rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1431        if (IS_ERR(rule)) {
1432                err = PTR_ERR(rule);
1433                ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1434                goto err_flow_rule;
1435        }
1436        pre_ct->flow_rule = rule;
1437
1438        /* add miss rule */
1439        dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1440        rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1441        if (IS_ERR(rule)) {
1442                err = PTR_ERR(rule);
1443                ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1444                goto err_miss_rule;
1445        }
1446        pre_ct->miss_rule = rule;
1447
1448        dealloc_mod_hdr_actions(&pre_mod_acts);
1449        kvfree(spec);
1450        return 0;
1451
1452err_miss_rule:
1453        mlx5_del_flow_rules(pre_ct->flow_rule);
1454err_flow_rule:
1455        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1456err_mapping:
1457        dealloc_mod_hdr_actions(&pre_mod_acts);
1458        kvfree(spec);
1459        return err;
1460}
1461
1462static void
1463tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1464                       struct mlx5_tc_ct_pre *pre_ct)
1465{
1466        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1467        struct mlx5_core_dev *dev = ct_priv->dev;
1468
1469        mlx5_del_flow_rules(pre_ct->flow_rule);
1470        mlx5_del_flow_rules(pre_ct->miss_rule);
1471        mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1472}
1473
1474static int
1475mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1476                        struct mlx5_tc_ct_pre *pre_ct,
1477                        bool nat)
1478{
1479        int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1480        struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1481        struct mlx5_core_dev *dev = ct_priv->dev;
1482        struct mlx5_flow_table_attr ft_attr = {};
1483        struct mlx5_flow_namespace *ns;
1484        struct mlx5_flow_table *ft;
1485        struct mlx5_flow_group *g;
1486        u32 metadata_reg_c_2_mask;
1487        u32 *flow_group_in;
1488        void *misc;
1489        int err;
1490
1491        ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1492        if (!ns) {
1493                err = -EOPNOTSUPP;
1494                ct_dbg("Failed to get flow namespace");
1495                return err;
1496        }
1497
1498        flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1499        if (!flow_group_in)
1500                return -ENOMEM;
1501
1502        ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1503        ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1504                        FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1505        ft_attr.max_fte = 2;
1506        ft_attr.level = 1;
1507        ft = mlx5_create_flow_table(ns, &ft_attr);
1508        if (IS_ERR(ft)) {
1509                err = PTR_ERR(ft);
1510                ct_dbg("Failed to create pre ct table");
1511                goto out_free;
1512        }
1513        pre_ct->ft = ft;
1514
1515        /* create flow group */
1516        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1517        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1518        MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1519                 MLX5_MATCH_MISC_PARAMETERS_2);
1520
1521        misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1522                            match_criteria.misc_parameters_2);
1523
1524        metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1525        metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1526        if (nat)
1527                metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1528
1529        MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1530                 metadata_reg_c_2_mask);
1531
1532        g = mlx5_create_flow_group(ft, flow_group_in);
1533        if (IS_ERR(g)) {
1534                err = PTR_ERR(g);
1535                ct_dbg("Failed to create pre ct group");
1536                goto err_flow_grp;
1537        }
1538        pre_ct->flow_grp = g;
1539
1540        /* create miss group */
1541        memset(flow_group_in, 0, inlen);
1542        MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1543        MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1544        g = mlx5_create_flow_group(ft, flow_group_in);
1545        if (IS_ERR(g)) {
1546                err = PTR_ERR(g);
1547                ct_dbg("Failed to create pre ct miss group");
1548                goto err_miss_grp;
1549        }
1550        pre_ct->miss_grp = g;
1551
1552        err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1553        if (err)
1554                goto err_add_rules;
1555
1556        kvfree(flow_group_in);
1557        return 0;
1558
1559err_add_rules:
1560        mlx5_destroy_flow_group(pre_ct->miss_grp);
1561err_miss_grp:
1562        mlx5_destroy_flow_group(pre_ct->flow_grp);
1563err_flow_grp:
1564        mlx5_destroy_flow_table(ft);
1565out_free:
1566        kvfree(flow_group_in);
1567        return err;
1568}
1569
1570static void
1571mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1572                       struct mlx5_tc_ct_pre *pre_ct)
1573{
1574        tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1575        mlx5_destroy_flow_group(pre_ct->miss_grp);
1576        mlx5_destroy_flow_group(pre_ct->flow_grp);
1577        mlx5_destroy_flow_table(pre_ct->ft);
1578}
1579
1580static int
1581mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1582{
1583        int err;
1584
1585        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1586        if (err)
1587                return err;
1588
1589        err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1590        if (err)
1591                goto err_pre_ct_nat;
1592
1593        return 0;
1594
1595err_pre_ct_nat:
1596        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1597        return err;
1598}
1599
1600static void
1601mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1602{
1603        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1604        mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1605}
1606
1607/* To avoid false lock dependency warning set the ct_entries_ht lock
1608 * class different than the lock class of the ht being used when deleting
1609 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1610 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1611 * it's different than the ht->mutex here.
1612 */
1613static struct lock_class_key ct_entries_ht_lock_key;
1614
1615static struct mlx5_ct_ft *
1616mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1617                     struct nf_flowtable *nf_ft)
1618{
1619        struct mlx5_ct_ft *ft;
1620        int err;
1621
1622        ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1623        if (ft) {
1624                refcount_inc(&ft->refcount);
1625                return ft;
1626        }
1627
1628        ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1629        if (!ft)
1630                return ERR_PTR(-ENOMEM);
1631
1632        err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1633        if (err)
1634                goto err_mapping;
1635
1636        ft->zone = zone;
1637        ft->nf_ft = nf_ft;
1638        ft->ct_priv = ct_priv;
1639        refcount_set(&ft->refcount, 1);
1640
1641        err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1642        if (err)
1643                goto err_alloc_pre_ct;
1644
1645        err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1646        if (err)
1647                goto err_init;
1648
1649        lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1650
1651        err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1652                                     zone_params);
1653        if (err)
1654                goto err_insert;
1655
1656        err = nf_flow_table_offload_add_cb(ft->nf_ft,
1657                                           mlx5_tc_ct_block_flow_offload, ft);
1658        if (err)
1659                goto err_add_cb;
1660
1661        return ft;
1662
1663err_add_cb:
1664        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1665err_insert:
1666        rhashtable_destroy(&ft->ct_entries_ht);
1667err_init:
1668        mlx5_tc_ct_free_pre_ct_tables(ft);
1669err_alloc_pre_ct:
1670        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1671err_mapping:
1672        kfree(ft);
1673        return ERR_PTR(err);
1674}
1675
1676static void
1677mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1678{
1679        struct mlx5_ct_entry *entry = ptr;
1680
1681        mlx5_tc_ct_entry_put(entry);
1682}
1683
1684static void
1685mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1686{
1687        if (!refcount_dec_and_test(&ft->refcount))
1688                return;
1689
1690        nf_flow_table_offload_del_cb(ft->nf_ft,
1691                                     mlx5_tc_ct_block_flow_offload, ft);
1692        rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1693        rhashtable_free_and_destroy(&ft->ct_entries_ht,
1694                                    mlx5_tc_ct_flush_ft_entry,
1695                                    ct_priv);
1696        mlx5_tc_ct_free_pre_ct_tables(ft);
1697        mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1698        kfree(ft);
1699}
1700
1701/* We translate the tc filter with CT action to the following HW model:
1702 *
1703 * +---------------------+
1704 * + ft prio (tc chain) +
1705 * + original match      +
1706 * +---------------------+
1707 *      | set chain miss mapping
1708 *      | set fte_id
1709 *      | set tunnel_id
1710 *      | do decap
1711 *      v
1712 * +---------------------+
1713 * + pre_ct/pre_ct_nat   +  if matches     +-------------------------+
1714 * + zone+nat match      +---------------->+ post_act (see below) +
1715 * +---------------------+  set zone       +-------------------------+
1716 *      | set zone
1717 *      v
1718 * +--------------------+
1719 * + CT (nat or no nat) +
1720 * + tuple + zone match +
1721 * +--------------------+
1722 *      | set mark
1723 *      | set labels_id
1724 *      | set established
1725 *      | set zone_restore
1726 *      | do nat (if needed)
1727 *      v
1728 * +--------------+
1729 * + post_act  + original filter actions
1730 * + fte_id match +------------------------>
1731 * +--------------+
1732 */
1733static struct mlx5_flow_handle *
1734__mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1735                          struct mlx5e_tc_flow *flow,
1736                          struct mlx5_flow_spec *orig_spec,
1737                          struct mlx5_flow_attr *attr)
1738{
1739        bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1740        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1741        struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1742        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1743        struct mlx5e_post_act_handle *handle;
1744        struct mlx5_flow_attr *pre_ct_attr;
1745        struct mlx5_modify_hdr *mod_hdr;
1746        struct mlx5_ct_flow *ct_flow;
1747        int chain_mapping = 0, err;
1748        struct mlx5_ct_ft *ft;
1749
1750        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1751        if (!ct_flow) {
1752                kfree(ct_flow);
1753                return ERR_PTR(-ENOMEM);
1754        }
1755
1756        /* Register for CT established events */
1757        ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1758                                  attr->ct_attr.nf_ft);
1759        if (IS_ERR(ft)) {
1760                err = PTR_ERR(ft);
1761                ct_dbg("Failed to register to ft callback");
1762                goto err_ft;
1763        }
1764        ct_flow->ft = ft;
1765
1766        handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
1767        if (IS_ERR(handle)) {
1768                err = PTR_ERR(handle);
1769                ct_dbg("Failed to allocate post action handle");
1770                goto err_post_act_handle;
1771        }
1772        ct_flow->post_act_handle = handle;
1773
1774        /* Base flow attributes of both rules on original rule attribute */
1775        ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1776        if (!ct_flow->pre_ct_attr) {
1777                err = -ENOMEM;
1778                goto err_alloc_pre;
1779        }
1780
1781        pre_ct_attr = ct_flow->pre_ct_attr;
1782        memcpy(pre_ct_attr, attr, attr_sz);
1783
1784        /* Modify the original rule's action to fwd and modify, leave decap */
1785        pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1786        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1787                               MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1788
1789        /* Write chain miss tag for miss in ct table as we
1790         * don't go though all prios of this chain as normal tc rules
1791         * miss.
1792         */
1793        err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1794                                            &chain_mapping);
1795        if (err) {
1796                ct_dbg("Failed to get chain register mapping for chain");
1797                goto err_get_chain;
1798        }
1799        ct_flow->chain_mapping = chain_mapping;
1800
1801        err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1802                                        CHAIN_TO_REG, chain_mapping);
1803        if (err) {
1804                ct_dbg("Failed to set chain register mapping");
1805                goto err_mapping;
1806        }
1807
1808        err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
1809        if (err) {
1810                ct_dbg("Failed to set post action handle");
1811                goto err_mapping;
1812        }
1813
1814        /* If original flow is decap, we do it before going into ct table
1815         * so add a rewrite for the tunnel match_id.
1816         */
1817        if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1818            attr->chain == 0) {
1819                u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1820
1821                err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1822                                                ct_priv->ns_type,
1823                                                TUNNEL_TO_REG,
1824                                                tun_id);
1825                if (err) {
1826                        ct_dbg("Failed to set tunnel register mapping");
1827                        goto err_mapping;
1828                }
1829        }
1830
1831        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1832                                           pre_mod_acts.num_actions,
1833                                           pre_mod_acts.actions);
1834        if (IS_ERR(mod_hdr)) {
1835                err = PTR_ERR(mod_hdr);
1836                ct_dbg("Failed to create pre ct mod hdr");
1837                goto err_mapping;
1838        }
1839        pre_ct_attr->modify_hdr = mod_hdr;
1840
1841        /* Change original rule point to ct table */
1842        pre_ct_attr->dest_chain = 0;
1843        pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1844        ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1845                                                   pre_ct_attr);
1846        if (IS_ERR(ct_flow->pre_ct_rule)) {
1847                err = PTR_ERR(ct_flow->pre_ct_rule);
1848                ct_dbg("Failed to add pre ct rule");
1849                goto err_insert_orig;
1850        }
1851
1852        attr->ct_attr.ct_flow = ct_flow;
1853        dealloc_mod_hdr_actions(&pre_mod_acts);
1854
1855        return ct_flow->pre_ct_rule;
1856
1857err_insert_orig:
1858        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1859err_mapping:
1860        dealloc_mod_hdr_actions(&pre_mod_acts);
1861        mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1862err_get_chain:
1863        kfree(ct_flow->pre_ct_attr);
1864err_alloc_pre:
1865        mlx5e_tc_post_act_del(ct_priv->post_act, handle);
1866err_post_act_handle:
1867        mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1868err_ft:
1869        kfree(ct_flow);
1870        netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1871        return ERR_PTR(err);
1872}
1873
1874static struct mlx5_flow_handle *
1875__mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1876                                struct mlx5_flow_spec *orig_spec,
1877                                struct mlx5_flow_attr *attr,
1878                                struct mlx5e_tc_mod_hdr_acts *mod_acts)
1879{
1880        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1881        u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1882        struct mlx5_flow_attr *pre_ct_attr;
1883        struct mlx5_modify_hdr *mod_hdr;
1884        struct mlx5_flow_handle *rule;
1885        struct mlx5_ct_flow *ct_flow;
1886        int err;
1887
1888        ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1889        if (!ct_flow)
1890                return ERR_PTR(-ENOMEM);
1891
1892        /* Base esw attributes on original rule attribute */
1893        pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1894        if (!pre_ct_attr) {
1895                err = -ENOMEM;
1896                goto err_attr;
1897        }
1898
1899        memcpy(pre_ct_attr, attr, attr_sz);
1900
1901        err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1902        if (err) {
1903                ct_dbg("Failed to set register for ct clear");
1904                goto err_set_registers;
1905        }
1906
1907        mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1908                                           mod_acts->num_actions,
1909                                           mod_acts->actions);
1910        if (IS_ERR(mod_hdr)) {
1911                err = PTR_ERR(mod_hdr);
1912                ct_dbg("Failed to add create ct clear mod hdr");
1913                goto err_set_registers;
1914        }
1915
1916        pre_ct_attr->modify_hdr = mod_hdr;
1917        pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1918
1919        rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1920        if (IS_ERR(rule)) {
1921                err = PTR_ERR(rule);
1922                ct_dbg("Failed to add ct clear rule");
1923                goto err_insert;
1924        }
1925
1926        attr->ct_attr.ct_flow = ct_flow;
1927        ct_flow->pre_ct_attr = pre_ct_attr;
1928        ct_flow->pre_ct_rule = rule;
1929        return rule;
1930
1931err_insert:
1932        mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1933err_set_registers:
1934        netdev_warn(priv->netdev,
1935                    "Failed to offload ct clear flow, err %d\n", err);
1936        kfree(pre_ct_attr);
1937err_attr:
1938        kfree(ct_flow);
1939
1940        return ERR_PTR(err);
1941}
1942
1943struct mlx5_flow_handle *
1944mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1945                        struct mlx5e_tc_flow *flow,
1946                        struct mlx5_flow_spec *spec,
1947                        struct mlx5_flow_attr *attr,
1948                        struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1949{
1950        bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1951        struct mlx5_flow_handle *rule;
1952
1953        if (!priv)
1954                return ERR_PTR(-EOPNOTSUPP);
1955
1956        mutex_lock(&priv->control_lock);
1957
1958        if (clear_action)
1959                rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1960        else
1961                rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1962        mutex_unlock(&priv->control_lock);
1963
1964        return rule;
1965}
1966
1967static void
1968__mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1969                         struct mlx5e_tc_flow *flow,
1970                         struct mlx5_ct_flow *ct_flow)
1971{
1972        struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1973        struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1974
1975        mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1976                            pre_ct_attr);
1977        mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1978
1979        if (ct_flow->post_act_handle) {
1980                mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1981                mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
1982                mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1983        }
1984
1985        kfree(ct_flow->pre_ct_attr);
1986        kfree(ct_flow);
1987}
1988
1989void
1990mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1991                       struct mlx5e_tc_flow *flow,
1992                       struct mlx5_flow_attr *attr)
1993{
1994        struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1995
1996        /* We are called on error to clean up stuff from parsing
1997         * but we don't have anything for now
1998         */
1999        if (!ct_flow)
2000                return;
2001
2002        mutex_lock(&priv->control_lock);
2003        __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
2004        mutex_unlock(&priv->control_lock);
2005}
2006
2007static int
2008mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2009                                  const char **err_msg)
2010{
2011        if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2012                /* vlan workaround should be avoided for multi chain rules.
2013                 * This is just a sanity check as pop vlan action should
2014                 * be supported by any FW that supports ignore_flow_level
2015                 */
2016
2017                *err_msg = "firmware vlan actions support is missing";
2018                return -EOPNOTSUPP;
2019        }
2020
2021        if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2022                                    fdb_modify_header_fwd_to_table)) {
2023                /* CT always writes to registers which are mod header actions.
2024                 * Therefore, mod header and goto is required
2025                 */
2026
2027                *err_msg = "firmware fwd and modify support is missing";
2028                return -EOPNOTSUPP;
2029        }
2030
2031        if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2032                *err_msg = "register loopback isn't supported";
2033                return -EOPNOTSUPP;
2034        }
2035
2036        return 0;
2037}
2038
2039static int
2040mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2041                              enum mlx5_flow_namespace_type ns_type,
2042                              struct mlx5e_post_act *post_act,
2043                              const char **err_msg)
2044{
2045        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2046
2047#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2048        /* cannot restore chain ID on HW miss */
2049
2050        *err_msg = "tc skb extension missing";
2051        return -EOPNOTSUPP;
2052#endif
2053        if (IS_ERR_OR_NULL(post_act)) {
2054                *err_msg = "tc ct offload not supported, post action is missing";
2055                return -EOPNOTSUPP;
2056        }
2057
2058        if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2059                return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2060        return 0;
2061}
2062
2063#define INIT_ERR_PREFIX "tc ct offload init failed"
2064
2065struct mlx5_tc_ct_priv *
2066mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2067                struct mod_hdr_tbl *mod_hdr,
2068                enum mlx5_flow_namespace_type ns_type,
2069                struct mlx5e_post_act *post_act)
2070{
2071        struct mlx5_tc_ct_priv *ct_priv;
2072        struct mlx5_core_dev *dev;
2073        const char *msg;
2074        u64 mapping_id;
2075        int err;
2076
2077        dev = priv->mdev;
2078        err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg);
2079        if (err) {
2080                mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg);
2081                goto err_support;
2082        }
2083
2084        ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2085        if (!ct_priv)
2086                goto err_alloc;
2087
2088        mapping_id = mlx5_query_nic_system_image_guid(dev);
2089
2090        ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2091                                                      sizeof(u16), 0, true);
2092        if (IS_ERR(ct_priv->zone_mapping)) {
2093                err = PTR_ERR(ct_priv->zone_mapping);
2094                goto err_mapping_zone;
2095        }
2096
2097        ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2098                                                        sizeof(u32) * 4, 0, true);
2099        if (IS_ERR(ct_priv->labels_mapping)) {
2100                err = PTR_ERR(ct_priv->labels_mapping);
2101                goto err_mapping_labels;
2102        }
2103
2104        spin_lock_init(&ct_priv->ht_lock);
2105        ct_priv->ns_type = ns_type;
2106        ct_priv->chains = chains;
2107        ct_priv->netdev = priv->netdev;
2108        ct_priv->dev = priv->mdev;
2109        ct_priv->mod_hdr_tbl = mod_hdr;
2110        ct_priv->ct = mlx5_chains_create_global_table(chains);
2111        if (IS_ERR(ct_priv->ct)) {
2112                err = PTR_ERR(ct_priv->ct);
2113                mlx5_core_warn(dev,
2114                               "%s, failed to create ct table err: %d\n",
2115                               INIT_ERR_PREFIX, err);
2116                goto err_ct_tbl;
2117        }
2118
2119        ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2120        if (IS_ERR(ct_priv->ct_nat)) {
2121                err = PTR_ERR(ct_priv->ct_nat);
2122                mlx5_core_warn(dev,
2123                               "%s, failed to create ct nat table err: %d\n",
2124                               INIT_ERR_PREFIX, err);
2125                goto err_ct_nat_tbl;
2126        }
2127
2128        ct_priv->post_act = post_act;
2129        mutex_init(&ct_priv->control_lock);
2130        rhashtable_init(&ct_priv->zone_ht, &zone_params);
2131        rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2132        rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2133
2134        return ct_priv;
2135
2136err_ct_nat_tbl:
2137        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2138err_ct_tbl:
2139        mapping_destroy(ct_priv->labels_mapping);
2140err_mapping_labels:
2141        mapping_destroy(ct_priv->zone_mapping);
2142err_mapping_zone:
2143        kfree(ct_priv);
2144err_alloc:
2145err_support:
2146
2147        return NULL;
2148}
2149
2150void
2151mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2152{
2153        struct mlx5_fs_chains *chains;
2154
2155        if (!ct_priv)
2156                return;
2157
2158        chains = ct_priv->chains;
2159
2160        mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2161        mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2162        mapping_destroy(ct_priv->zone_mapping);
2163        mapping_destroy(ct_priv->labels_mapping);
2164
2165        rhashtable_destroy(&ct_priv->ct_tuples_ht);
2166        rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2167        rhashtable_destroy(&ct_priv->zone_ht);
2168        mutex_destroy(&ct_priv->control_lock);
2169        kfree(ct_priv);
2170}
2171
2172bool
2173mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2174                         struct sk_buff *skb, u8 zone_restore_id)
2175{
2176        struct mlx5_ct_tuple tuple = {};
2177        struct mlx5_ct_entry *entry;
2178        u16 zone;
2179
2180        if (!ct_priv || !zone_restore_id)
2181                return true;
2182
2183        if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2184                return false;
2185
2186        if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2187                return false;
2188
2189        spin_lock(&ct_priv->ht_lock);
2190
2191        entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2192        if (!entry) {
2193                spin_unlock(&ct_priv->ht_lock);
2194                return false;
2195        }
2196
2197        if (IS_ERR(entry)) {
2198                spin_unlock(&ct_priv->ht_lock);
2199                return false;
2200        }
2201        spin_unlock(&ct_priv->ht_lock);
2202
2203        tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2204        __mlx5_tc_ct_entry_put(entry);
2205
2206        return true;
2207}
2208