linux/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
<<
>>
Prefs
   1/* Broadcom NetXtreme-C/E network driver.
   2 *
   3 * Copyright (c) 2017 Broadcom Limited
   4 *
   5 * This program is free software; you can redistribute it and/or modify
   6 * it under the terms of the GNU General Public License as published by
   7 * the Free Software Foundation.
   8 */
   9
  10#include <linux/netdevice.h>
  11#include <linux/inetdevice.h>
  12#include <linux/if_vlan.h>
  13#include <net/flow_dissector.h>
  14#include <net/pkt_cls.h>
  15#include <net/tc_act/tc_gact.h>
  16#include <net/tc_act/tc_skbedit.h>
  17#include <net/tc_act/tc_mirred.h>
  18#include <net/tc_act/tc_vlan.h>
  19#include <net/tc_act/tc_pedit.h>
  20#include <net/tc_act/tc_tunnel_key.h>
  21#include <net/vxlan.h>
  22
  23#include "bnxt_hsi.h"
  24#include "bnxt.h"
  25#include "bnxt_sriov.h"
  26#include "bnxt_tc.h"
  27#include "bnxt_vfr.h"
  28
  29#define BNXT_FID_INVALID                        0xffff
  30#define VLAN_TCI(vid, prio)     ((vid) | ((prio) << VLAN_PRIO_SHIFT))
  31
  32#define is_vlan_pcp_wildcarded(vlan_tci_mask)   \
  33        ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
  34#define is_vlan_pcp_exactmatch(vlan_tci_mask)   \
  35        ((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
  36#define is_vlan_pcp_zero(vlan_tci)      \
  37        ((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
  38#define is_vid_exactmatch(vlan_tci_mask)        \
  39        ((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
  40
  41static bool is_wildcard(void *mask, int len);
  42static bool is_exactmatch(void *mask, int len);
  43/* Return the dst fid of the func for flow forwarding
  44 * For PFs: src_fid is the fid of the PF
  45 * For VF-reps: src_fid the fid of the VF
  46 */
  47static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
  48{
  49        struct bnxt *bp;
  50
  51        /* check if dev belongs to the same switch */
  52        if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
  53                netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
  54                            dev->ifindex);
  55                return BNXT_FID_INVALID;
  56        }
  57
  58        /* Is dev a VF-rep? */
  59        if (bnxt_dev_is_vf_rep(dev))
  60                return bnxt_vf_rep_get_fid(dev);
  61
  62        bp = netdev_priv(dev);
  63        return bp->pf.fw_fid;
  64}
  65
  66static int bnxt_tc_parse_redir(struct bnxt *bp,
  67                               struct bnxt_tc_actions *actions,
  68                               const struct flow_action_entry *act)
  69{
  70        struct net_device *dev = act->dev;
  71
  72        if (!dev) {
  73                netdev_info(bp->dev, "no dev in mirred action\n");
  74                return -EINVAL;
  75        }
  76
  77        actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
  78        actions->dst_dev = dev;
  79        return 0;
  80}
  81
  82static int bnxt_tc_parse_vlan(struct bnxt *bp,
  83                              struct bnxt_tc_actions *actions,
  84                              const struct flow_action_entry *act)
  85{
  86        switch (act->id) {
  87        case FLOW_ACTION_VLAN_POP:
  88                actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
  89                break;
  90        case FLOW_ACTION_VLAN_PUSH:
  91                actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
  92                actions->push_vlan_tci = htons(act->vlan.vid);
  93                actions->push_vlan_tpid = act->vlan.proto;
  94                break;
  95        default:
  96                return -EOPNOTSUPP;
  97        }
  98        return 0;
  99}
 100
 101static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
 102                                    struct bnxt_tc_actions *actions,
 103                                    const struct flow_action_entry *act)
 104{
 105        const struct ip_tunnel_info *tun_info = act->tunnel;
 106        const struct ip_tunnel_key *tun_key = &tun_info->key;
 107
 108        if (ip_tunnel_info_af(tun_info) != AF_INET) {
 109                netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
 110                return -EOPNOTSUPP;
 111        }
 112
 113        actions->tun_encap_key = *tun_key;
 114        actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
 115        return 0;
 116}
 117
 118/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
 119 * each(u32).
 120 * This routine consolidates such multiple unaligned values into one
 121 * field each for Key & Mask (for src and dst macs separately)
 122 * For example,
 123 *                      Mask/Key        Offset  Iteration
 124 *                      ==========      ======  =========
 125 *      dst mac         0xffffffff      0       1
 126 *      dst mac         0x0000ffff      4       2
 127 *
 128 *      src mac         0xffff0000      4       1
 129 *      src mac         0xffffffff      8       2
 130 *
 131 * The above combination coming from the stack will be consolidated as
 132 *                      Mask/Key
 133 *                      ==============
 134 *      src mac:        0xffffffffffff
 135 *      dst mac:        0xffffffffffff
 136 */
 137static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
 138                                 u8 *actual_key, u8 *actual_mask)
 139{
 140        u32 key = get_unaligned((u32 *)actual_key);
 141        u32 mask = get_unaligned((u32 *)actual_mask);
 142
 143        part_key &= part_mask;
 144        part_key |= key & ~part_mask;
 145
 146        put_unaligned(mask | part_mask, (u32 *)actual_mask);
 147        put_unaligned(part_key, (u32 *)actual_key);
 148}
 149
 150static int
 151bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
 152                            u16 *eth_addr, u16 *eth_addr_mask)
 153{
 154        u16 *p;
 155        int j;
 156
 157        if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
 158                return -EINVAL;
 159
 160        if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
 161                if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
 162                        return -EINVAL;
 163                /* FW expects dmac to be in u16 array format */
 164                p = eth_addr;
 165                for (j = 0; j < 3; j++)
 166                        actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
 167        }
 168
 169        if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
 170                if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
 171                        return -EINVAL;
 172                /* FW expects smac to be in u16 array format */
 173                p = &eth_addr[ETH_ALEN / 2];
 174                for (j = 0; j < 3; j++)
 175                        actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
 176        }
 177
 178        return 0;
 179}
 180
 181static int
 182bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
 183                    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
 184                    u8 *eth_addr_mask)
 185{
 186        size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
 187        size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
 188        u32 mask, val, offset, idx;
 189        u8 htype;
 190
 191        offset = act->mangle.offset;
 192        htype = act->mangle.htype;
 193        mask = ~act->mangle.mask;
 194        val = act->mangle.val;
 195
 196        switch (htype) {
 197        case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
 198                if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
 199                        netdev_err(bp->dev,
 200                                   "%s: eth_hdr: Invalid pedit field\n",
 201                                   __func__);
 202                        return -EINVAL;
 203                }
 204                actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
 205
 206                bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
 207                                     &eth_addr_mask[offset]);
 208                break;
 209        case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
 210                actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
 211                actions->nat.l3_is_ipv4 = true;
 212                if (offset ==  offsetof(struct iphdr, saddr)) {
 213                        actions->nat.src_xlate = true;
 214                        actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
 215                } else if (offset ==  offsetof(struct iphdr, daddr)) {
 216                        actions->nat.src_xlate = false;
 217                        actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
 218                } else {
 219                        netdev_err(bp->dev,
 220                                   "%s: IPv4_hdr: Invalid pedit field\n",
 221                                   __func__);
 222                        return -EINVAL;
 223                }
 224
 225                netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
 226                           actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
 227                           &actions->nat.l3.ipv4.daddr);
 228                break;
 229
 230        case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
 231                actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
 232                actions->nat.l3_is_ipv4 = false;
 233                if (offset >= offsetof(struct ipv6hdr, saddr) &&
 234                    offset < offset_of_ip6_daddr) {
 235                        /* 16 byte IPv6 address comes in 4 iterations of
 236                         * 4byte chunks each
 237                         */
 238                        actions->nat.src_xlate = true;
 239                        idx = (offset - offset_of_ip6_saddr) / 4;
 240                        /* First 4bytes will be copied to idx 0 and so on */
 241                        actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
 242                } else if (offset >= offset_of_ip6_daddr &&
 243                           offset < offset_of_ip6_daddr + 16) {
 244                        actions->nat.src_xlate = false;
 245                        idx = (offset - offset_of_ip6_daddr) / 4;
 246                        actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
 247                } else {
 248                        netdev_err(bp->dev,
 249                                   "%s: IPv6_hdr: Invalid pedit field\n",
 250                                   __func__);
 251                        return -EINVAL;
 252                }
 253                break;
 254        case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
 255        case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
 256                /* HW does not support L4 rewrite alone without L3
 257                 * rewrite
 258                 */
 259                if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
 260                        netdev_err(bp->dev,
 261                                   "Need to specify L3 rewrite as well\n");
 262                        return -EINVAL;
 263                }
 264                if (actions->nat.src_xlate)
 265                        actions->nat.l4.ports.sport = htons(val);
 266                else
 267                        actions->nat.l4.ports.dport = htons(val);
 268                netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
 269                           actions->nat.l4.ports.sport,
 270                           actions->nat.l4.ports.dport);
 271                break;
 272        default:
 273                netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
 274                           __func__);
 275                return -EINVAL;
 276        }
 277        return 0;
 278}
 279
 280static int bnxt_tc_parse_actions(struct bnxt *bp,
 281                                 struct bnxt_tc_actions *actions,
 282                                 struct flow_action *flow_action,
 283                                 struct netlink_ext_ack *extack)
 284{
 285        /* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
 286         * smac (6 bytes) if rewrite of both is specified, otherwise either
 287         * dmac or smac
 288         */
 289        u16 eth_addr_mask[ETH_ALEN] = { 0 };
 290        /* Used to store the L2 rewrite key for dmac (6 bytes) followed by
 291         * smac (6 bytes) if rewrite of both is specified, otherwise either
 292         * dmac or smac
 293         */
 294        u16 eth_addr[ETH_ALEN] = { 0 };
 295        struct flow_action_entry *act;
 296        int i, rc;
 297
 298        if (!flow_action_has_entries(flow_action)) {
 299                netdev_info(bp->dev, "no actions\n");
 300                return -EINVAL;
 301        }
 302
 303        if (!flow_action_basic_hw_stats_check(flow_action, extack))
 304                return -EOPNOTSUPP;
 305
 306        flow_action_for_each(i, act, flow_action) {
 307                switch (act->id) {
 308                case FLOW_ACTION_DROP:
 309                        actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
 310                        return 0; /* don't bother with other actions */
 311                case FLOW_ACTION_REDIRECT:
 312                        rc = bnxt_tc_parse_redir(bp, actions, act);
 313                        if (rc)
 314                                return rc;
 315                        break;
 316                case FLOW_ACTION_VLAN_POP:
 317                case FLOW_ACTION_VLAN_PUSH:
 318                case FLOW_ACTION_VLAN_MANGLE:
 319                        rc = bnxt_tc_parse_vlan(bp, actions, act);
 320                        if (rc)
 321                                return rc;
 322                        break;
 323                case FLOW_ACTION_TUNNEL_ENCAP:
 324                        rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
 325                        if (rc)
 326                                return rc;
 327                        break;
 328                case FLOW_ACTION_TUNNEL_DECAP:
 329                        actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
 330                        break;
 331                /* Packet edit: L2 rewrite, NAT, NAPT */
 332                case FLOW_ACTION_MANGLE:
 333                        rc = bnxt_tc_parse_pedit(bp, actions, act, i,
 334                                                 (u8 *)eth_addr,
 335                                                 (u8 *)eth_addr_mask);
 336                        if (rc)
 337                                return rc;
 338                        break;
 339                default:
 340                        break;
 341                }
 342        }
 343
 344        if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
 345                rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
 346                                                 eth_addr_mask);
 347                if (rc)
 348                        return rc;
 349        }
 350
 351        if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
 352                if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
 353                        /* dst_fid is PF's fid */
 354                        actions->dst_fid = bp->pf.fw_fid;
 355                } else {
 356                        /* find the FID from dst_dev */
 357                        actions->dst_fid =
 358                                bnxt_flow_get_dst_fid(bp, actions->dst_dev);
 359                        if (actions->dst_fid == BNXT_FID_INVALID)
 360                                return -EINVAL;
 361                }
 362        }
 363
 364        return 0;
 365}
 366
 367static int bnxt_tc_parse_flow(struct bnxt *bp,
 368                              struct flow_cls_offload *tc_flow_cmd,
 369                              struct bnxt_tc_flow *flow)
 370{
 371        struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
 372        struct flow_dissector *dissector = rule->match.dissector;
 373
 374        /* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
 375        if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
 376            (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
 377                netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
 378                            dissector->used_keys);
 379                return -EOPNOTSUPP;
 380        }
 381
 382        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
 383                struct flow_match_basic match;
 384
 385                flow_rule_match_basic(rule, &match);
 386                flow->l2_key.ether_type = match.key->n_proto;
 387                flow->l2_mask.ether_type = match.mask->n_proto;
 388
 389                if (match.key->n_proto == htons(ETH_P_IP) ||
 390                    match.key->n_proto == htons(ETH_P_IPV6)) {
 391                        flow->l4_key.ip_proto = match.key->ip_proto;
 392                        flow->l4_mask.ip_proto = match.mask->ip_proto;
 393                }
 394        }
 395
 396        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 397                struct flow_match_eth_addrs match;
 398
 399                flow_rule_match_eth_addrs(rule, &match);
 400                flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
 401                ether_addr_copy(flow->l2_key.dmac, match.key->dst);
 402                ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
 403                ether_addr_copy(flow->l2_key.smac, match.key->src);
 404                ether_addr_copy(flow->l2_mask.smac, match.mask->src);
 405        }
 406
 407        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
 408                struct flow_match_vlan match;
 409
 410                flow_rule_match_vlan(rule, &match);
 411                flow->l2_key.inner_vlan_tci =
 412                        cpu_to_be16(VLAN_TCI(match.key->vlan_id,
 413                                             match.key->vlan_priority));
 414                flow->l2_mask.inner_vlan_tci =
 415                        cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
 416                                              match.mask->vlan_priority)));
 417                flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
 418                flow->l2_mask.inner_vlan_tpid = htons(0xffff);
 419                flow->l2_key.num_vlans = 1;
 420        }
 421
 422        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
 423                struct flow_match_ipv4_addrs match;
 424
 425                flow_rule_match_ipv4_addrs(rule, &match);
 426                flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
 427                flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
 428                flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
 429                flow->l3_key.ipv4.saddr.s_addr = match.key->src;
 430                flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
 431        } else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
 432                struct flow_match_ipv6_addrs match;
 433
 434                flow_rule_match_ipv6_addrs(rule, &match);
 435                flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
 436                flow->l3_key.ipv6.daddr = match.key->dst;
 437                flow->l3_mask.ipv6.daddr = match.mask->dst;
 438                flow->l3_key.ipv6.saddr = match.key->src;
 439                flow->l3_mask.ipv6.saddr = match.mask->src;
 440        }
 441
 442        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
 443                struct flow_match_ports match;
 444
 445                flow_rule_match_ports(rule, &match);
 446                flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
 447                flow->l4_key.ports.dport = match.key->dst;
 448                flow->l4_mask.ports.dport = match.mask->dst;
 449                flow->l4_key.ports.sport = match.key->src;
 450                flow->l4_mask.ports.sport = match.mask->src;
 451        }
 452
 453        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
 454                struct flow_match_icmp match;
 455
 456                flow_rule_match_icmp(rule, &match);
 457                flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
 458                flow->l4_key.icmp.type = match.key->type;
 459                flow->l4_key.icmp.code = match.key->code;
 460                flow->l4_mask.icmp.type = match.mask->type;
 461                flow->l4_mask.icmp.code = match.mask->code;
 462        }
 463
 464        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
 465                struct flow_match_ipv4_addrs match;
 466
 467                flow_rule_match_enc_ipv4_addrs(rule, &match);
 468                flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
 469                flow->tun_key.u.ipv4.dst = match.key->dst;
 470                flow->tun_mask.u.ipv4.dst = match.mask->dst;
 471                flow->tun_key.u.ipv4.src = match.key->src;
 472                flow->tun_mask.u.ipv4.src = match.mask->src;
 473        } else if (flow_rule_match_key(rule,
 474                                      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
 475                return -EOPNOTSUPP;
 476        }
 477
 478        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
 479                struct flow_match_enc_keyid match;
 480
 481                flow_rule_match_enc_keyid(rule, &match);
 482                flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
 483                flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
 484                flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
 485        }
 486
 487        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
 488                struct flow_match_ports match;
 489
 490                flow_rule_match_enc_ports(rule, &match);
 491                flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
 492                flow->tun_key.tp_dst = match.key->dst;
 493                flow->tun_mask.tp_dst = match.mask->dst;
 494                flow->tun_key.tp_src = match.key->src;
 495                flow->tun_mask.tp_src = match.mask->src;
 496        }
 497
 498        return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
 499                                     tc_flow_cmd->common.extack);
 500}
 501
 502static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
 503                                   struct bnxt_tc_flow_node *flow_node)
 504{
 505        struct hwrm_cfa_flow_free_input req = { 0 };
 506        int rc;
 507
 508        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
 509        if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
 510                req.ext_flow_handle = flow_node->ext_flow_handle;
 511        else
 512                req.flow_handle = flow_node->flow_handle;
 513
 514        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 515        if (rc)
 516                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 517
 518        return rc;
 519}
 520
 521static int ipv6_mask_len(struct in6_addr *mask)
 522{
 523        int mask_len = 0, i;
 524
 525        for (i = 0; i < 4; i++)
 526                mask_len += inet_mask_len(mask->s6_addr32[i]);
 527
 528        return mask_len;
 529}
 530
 531static bool is_wildcard(void *mask, int len)
 532{
 533        const u8 *p = mask;
 534        int i;
 535
 536        for (i = 0; i < len; i++) {
 537                if (p[i] != 0)
 538                        return false;
 539        }
 540        return true;
 541}
 542
 543static bool is_exactmatch(void *mask, int len)
 544{
 545        const u8 *p = mask;
 546        int i;
 547
 548        for (i = 0; i < len; i++)
 549                if (p[i] != 0xff)
 550                        return false;
 551
 552        return true;
 553}
 554
 555static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
 556                                __be16  vlan_tci)
 557{
 558        /* VLAN priority must be either exactly zero or fully wildcarded and
 559         * VLAN id must be exact match.
 560         */
 561        if (is_vid_exactmatch(vlan_tci_mask) &&
 562            ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
 563              is_vlan_pcp_zero(vlan_tci)) ||
 564             is_vlan_pcp_wildcarded(vlan_tci_mask)))
 565                return true;
 566
 567        return false;
 568}
 569
 570static bool bits_set(void *key, int len)
 571{
 572        const u8 *p = key;
 573        int i;
 574
 575        for (i = 0; i < len; i++)
 576                if (p[i] != 0)
 577                        return true;
 578
 579        return false;
 580}
 581
 582static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
 583                                    __le16 ref_flow_handle,
 584                                    __le32 tunnel_handle,
 585                                    struct bnxt_tc_flow_node *flow_node)
 586{
 587        struct bnxt_tc_actions *actions = &flow->actions;
 588        struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
 589        struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
 590        struct hwrm_cfa_flow_alloc_input req = { 0 };
 591        struct hwrm_cfa_flow_alloc_output *resp;
 592        u16 flow_flags = 0, action_flags = 0;
 593        int rc;
 594
 595        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
 596
 597        req.src_fid = cpu_to_le16(flow->src_fid);
 598        req.ref_flow_handle = ref_flow_handle;
 599
 600        if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
 601                memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
 602                       ETH_ALEN);
 603                memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
 604                       ETH_ALEN);
 605                action_flags |=
 606                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
 607        }
 608
 609        if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
 610                if (actions->nat.l3_is_ipv4) {
 611                        action_flags |=
 612                                CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
 613
 614                        if (actions->nat.src_xlate) {
 615                                action_flags |=
 616                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
 617                                /* L3 source rewrite */
 618                                req.nat_ip_address[0] =
 619                                        actions->nat.l3.ipv4.saddr.s_addr;
 620                                /* L4 source port */
 621                                if (actions->nat.l4.ports.sport)
 622                                        req.nat_port =
 623                                                actions->nat.l4.ports.sport;
 624                        } else {
 625                                action_flags |=
 626                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
 627                                /* L3 destination rewrite */
 628                                req.nat_ip_address[0] =
 629                                        actions->nat.l3.ipv4.daddr.s_addr;
 630                                /* L4 destination port */
 631                                if (actions->nat.l4.ports.dport)
 632                                        req.nat_port =
 633                                                actions->nat.l4.ports.dport;
 634                        }
 635                        netdev_dbg(bp->dev,
 636                                   "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
 637                                   req.nat_ip_address, actions->nat.src_xlate,
 638                                   req.nat_port);
 639                } else {
 640                        if (actions->nat.src_xlate) {
 641                                action_flags |=
 642                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
 643                                /* L3 source rewrite */
 644                                memcpy(req.nat_ip_address,
 645                                       actions->nat.l3.ipv6.saddr.s6_addr32,
 646                                       sizeof(req.nat_ip_address));
 647                                /* L4 source port */
 648                                if (actions->nat.l4.ports.sport)
 649                                        req.nat_port =
 650                                                actions->nat.l4.ports.sport;
 651                        } else {
 652                                action_flags |=
 653                                        CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
 654                                /* L3 destination rewrite */
 655                                memcpy(req.nat_ip_address,
 656                                       actions->nat.l3.ipv6.daddr.s6_addr32,
 657                                       sizeof(req.nat_ip_address));
 658                                /* L4 destination port */
 659                                if (actions->nat.l4.ports.dport)
 660                                        req.nat_port =
 661                                                actions->nat.l4.ports.dport;
 662                        }
 663                        netdev_dbg(bp->dev,
 664                                   "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
 665                                   req.nat_ip_address, actions->nat.src_xlate,
 666                                   req.nat_port);
 667                }
 668        }
 669
 670        if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
 671            actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
 672                req.tunnel_handle = tunnel_handle;
 673                flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
 674                action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
 675        }
 676
 677        req.ethertype = flow->l2_key.ether_type;
 678        req.ip_proto = flow->l4_key.ip_proto;
 679
 680        if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
 681                memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
 682                memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
 683        }
 684
 685        if (flow->l2_key.num_vlans > 0) {
 686                flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
 687                /* FW expects the inner_vlan_tci value to be set
 688                 * in outer_vlan_tci when num_vlans is 1 (which is
 689                 * always the case in TC.)
 690                 */
 691                req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
 692        }
 693
 694        /* If all IP and L4 fields are wildcarded then this is an L2 flow */
 695        if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
 696            is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
 697                flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
 698        } else {
 699                flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
 700                                CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
 701                                CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
 702
 703                if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
 704                        req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
 705                        req.ip_dst_mask_len =
 706                                inet_mask_len(l3_mask->ipv4.daddr.s_addr);
 707                        req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
 708                        req.ip_src_mask_len =
 709                                inet_mask_len(l3_mask->ipv4.saddr.s_addr);
 710                } else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
 711                        memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
 712                               sizeof(req.ip_dst));
 713                        req.ip_dst_mask_len =
 714                                        ipv6_mask_len(&l3_mask->ipv6.daddr);
 715                        memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
 716                               sizeof(req.ip_src));
 717                        req.ip_src_mask_len =
 718                                        ipv6_mask_len(&l3_mask->ipv6.saddr);
 719                }
 720        }
 721
 722        if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
 723                req.l4_src_port = flow->l4_key.ports.sport;
 724                req.l4_src_port_mask = flow->l4_mask.ports.sport;
 725                req.l4_dst_port = flow->l4_key.ports.dport;
 726                req.l4_dst_port_mask = flow->l4_mask.ports.dport;
 727        } else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
 728                /* l4 ports serve as type/code when ip_proto is ICMP */
 729                req.l4_src_port = htons(flow->l4_key.icmp.type);
 730                req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
 731                req.l4_dst_port = htons(flow->l4_key.icmp.code);
 732                req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
 733        }
 734        req.flags = cpu_to_le16(flow_flags);
 735
 736        if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
 737                action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
 738        } else {
 739                if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
 740                        action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
 741                        req.dst_fid = cpu_to_le16(actions->dst_fid);
 742                }
 743                if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
 744                        action_flags |=
 745                            CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
 746                        req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
 747                        req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
 748                        memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
 749                        memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
 750                }
 751                if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
 752                        action_flags |=
 753                            CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
 754                        /* Rewrite config with tpid = 0 implies vlan pop */
 755                        req.l2_rewrite_vlan_tpid = 0;
 756                        memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
 757                        memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
 758                }
 759        }
 760        req.action_flags = cpu_to_le16(action_flags);
 761
 762        mutex_lock(&bp->hwrm_cmd_lock);
 763        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 764        if (!rc) {
 765                resp = bnxt_get_hwrm_resp_addr(bp, &req);
 766                /* CFA_FLOW_ALLOC response interpretation:
 767                 *                  fw with          fw with
 768                 *                  16-bit           64-bit
 769                 *                  flow handle      flow handle
 770                 *                  ===========      ===========
 771                 * flow_handle      flow handle      flow context id
 772                 * ext_flow_handle  INVALID          flow handle
 773                 * flow_id          INVALID          flow counter id
 774                 */
 775                flow_node->flow_handle = resp->flow_handle;
 776                if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
 777                        flow_node->ext_flow_handle = resp->ext_flow_handle;
 778                        flow_node->flow_id = resp->flow_id;
 779                }
 780        }
 781        mutex_unlock(&bp->hwrm_cmd_lock);
 782        return rc;
 783}
 784
 785static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
 786                                       struct bnxt_tc_flow *flow,
 787                                       struct bnxt_tc_l2_key *l2_info,
 788                                       __le32 ref_decap_handle,
 789                                       __le32 *decap_filter_handle)
 790{
 791        struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
 792        struct hwrm_cfa_decap_filter_alloc_output *resp;
 793        struct ip_tunnel_key *tun_key = &flow->tun_key;
 794        u32 enables = 0;
 795        int rc;
 796
 797        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
 798
 799        req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
 800        enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
 801                   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
 802        req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
 803        req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
 804
 805        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
 806                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
 807                /* tunnel_id is wrongly defined in hsi defn. as __le32 */
 808                req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
 809        }
 810
 811        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
 812                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
 813                ether_addr_copy(req.dst_macaddr, l2_info->dmac);
 814        }
 815        if (l2_info->num_vlans) {
 816                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
 817                req.t_ivlan_vid = l2_info->inner_vlan_tci;
 818        }
 819
 820        enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
 821        req.ethertype = htons(ETH_P_IP);
 822
 823        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
 824                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
 825                           CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
 826                           CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
 827                req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
 828                req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
 829                req.src_ipaddr[0] = tun_key->u.ipv4.src;
 830        }
 831
 832        if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
 833                enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
 834                req.dst_port = tun_key->tp_dst;
 835        }
 836
 837        /* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
 838         * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
 839         */
 840        req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
 841        req.enables = cpu_to_le32(enables);
 842
 843        mutex_lock(&bp->hwrm_cmd_lock);
 844        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 845        if (!rc) {
 846                resp = bnxt_get_hwrm_resp_addr(bp, &req);
 847                *decap_filter_handle = resp->decap_filter_id;
 848        } else {
 849                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 850        }
 851        mutex_unlock(&bp->hwrm_cmd_lock);
 852
 853        return rc;
 854}
 855
 856static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
 857                                      __le32 decap_filter_handle)
 858{
 859        struct hwrm_cfa_decap_filter_free_input req = { 0 };
 860        int rc;
 861
 862        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
 863        req.decap_filter_id = decap_filter_handle;
 864
 865        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 866        if (rc)
 867                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 868
 869        return rc;
 870}
 871
 872static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
 873                                       struct ip_tunnel_key *encap_key,
 874                                       struct bnxt_tc_l2_key *l2_info,
 875                                       __le32 *encap_record_handle)
 876{
 877        struct hwrm_cfa_encap_record_alloc_input req = { 0 };
 878        struct hwrm_cfa_encap_record_alloc_output *resp;
 879        struct hwrm_cfa_encap_data_vxlan *encap =
 880                        (struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
 881        struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
 882                                (struct hwrm_vxlan_ipv4_hdr *)encap->l3;
 883        int rc;
 884
 885        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
 886
 887        req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
 888
 889        ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
 890        ether_addr_copy(encap->src_mac_addr, l2_info->smac);
 891        if (l2_info->num_vlans) {
 892                encap->num_vlan_tags = l2_info->num_vlans;
 893                encap->ovlan_tci = l2_info->inner_vlan_tci;
 894                encap->ovlan_tpid = l2_info->inner_vlan_tpid;
 895        }
 896
 897        encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
 898        encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
 899        encap_ipv4->ttl = encap_key->ttl;
 900
 901        encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
 902        encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
 903        encap_ipv4->protocol = IPPROTO_UDP;
 904
 905        encap->dst_port = encap_key->tp_dst;
 906        encap->vni = tunnel_id_to_key32(encap_key->tun_id);
 907
 908        mutex_lock(&bp->hwrm_cmd_lock);
 909        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 910        if (!rc) {
 911                resp = bnxt_get_hwrm_resp_addr(bp, &req);
 912                *encap_record_handle = resp->encap_record_id;
 913        } else {
 914                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 915        }
 916        mutex_unlock(&bp->hwrm_cmd_lock);
 917
 918        return rc;
 919}
 920
 921static int hwrm_cfa_encap_record_free(struct bnxt *bp,
 922                                      __le32 encap_record_handle)
 923{
 924        struct hwrm_cfa_encap_record_free_input req = { 0 };
 925        int rc;
 926
 927        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
 928        req.encap_record_id = encap_record_handle;
 929
 930        rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 931        if (rc)
 932                netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
 933
 934        return rc;
 935}
 936
 937static int bnxt_tc_put_l2_node(struct bnxt *bp,
 938                               struct bnxt_tc_flow_node *flow_node)
 939{
 940        struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
 941        struct bnxt_tc_info *tc_info = bp->tc_info;
 942        int rc;
 943
 944        /* remove flow_node from the L2 shared flow list */
 945        list_del(&flow_node->l2_list_node);
 946        if (--l2_node->refcount == 0) {
 947                rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
 948                                             tc_info->l2_ht_params);
 949                if (rc)
 950                        netdev_err(bp->dev,
 951                                   "Error: %s: rhashtable_remove_fast: %d\n",
 952                                   __func__, rc);
 953                kfree_rcu(l2_node, rcu);
 954        }
 955        return 0;
 956}
 957
 958static struct bnxt_tc_l2_node *
 959bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
 960                    struct rhashtable_params ht_params,
 961                    struct bnxt_tc_l2_key *l2_key)
 962{
 963        struct bnxt_tc_l2_node *l2_node;
 964        int rc;
 965
 966        l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
 967        if (!l2_node) {
 968                l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
 969                if (!l2_node) {
 970                        rc = -ENOMEM;
 971                        return NULL;
 972                }
 973
 974                l2_node->key = *l2_key;
 975                rc = rhashtable_insert_fast(l2_table, &l2_node->node,
 976                                            ht_params);
 977                if (rc) {
 978                        kfree_rcu(l2_node, rcu);
 979                        netdev_err(bp->dev,
 980                                   "Error: %s: rhashtable_insert_fast: %d\n",
 981                                   __func__, rc);
 982                        return NULL;
 983                }
 984                INIT_LIST_HEAD(&l2_node->common_l2_flows);
 985        }
 986        return l2_node;
 987}
 988
 989/* Get the ref_flow_handle for a flow by checking if there are any other
 990 * flows that share the same L2 key as this flow.
 991 */
 992static int
 993bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
 994                            struct bnxt_tc_flow_node *flow_node,
 995                            __le16 *ref_flow_handle)
 996{
 997        struct bnxt_tc_info *tc_info = bp->tc_info;
 998        struct bnxt_tc_flow_node *ref_flow_node;
 999        struct bnxt_tc_l2_node *l2_node;
1000
1001        l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
1002                                      tc_info->l2_ht_params,
1003                                      &flow->l2_key);
1004        if (!l2_node)
1005                return -1;
1006
1007        /* If any other flow is using this l2_node, use it's flow_handle
1008         * as the ref_flow_handle
1009         */
1010        if (l2_node->refcount > 0) {
1011                ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1012                                                 struct bnxt_tc_flow_node,
1013                                                 l2_list_node);
1014                *ref_flow_handle = ref_flow_node->flow_handle;
1015        } else {
1016                *ref_flow_handle = cpu_to_le16(0xffff);
1017        }
1018
1019        /* Insert the l2_node into the flow_node so that subsequent flows
1020         * with a matching l2 key can use the flow_handle of this flow
1021         * as their ref_flow_handle
1022         */
1023        flow_node->l2_node = l2_node;
1024        list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1025        l2_node->refcount++;
1026        return 0;
1027}
1028
1029/* After the flow parsing is done, this routine is used for checking
1030 * if there are any aspects of the flow that prevent it from being
1031 * offloaded.
1032 */
1033static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1034{
1035        /* If L4 ports are specified then ip_proto must be TCP or UDP */
1036        if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1037            (flow->l4_key.ip_proto != IPPROTO_TCP &&
1038             flow->l4_key.ip_proto != IPPROTO_UDP)) {
1039                netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
1040                            flow->l4_key.ip_proto);
1041                return false;
1042        }
1043
1044        /* Currently source/dest MAC cannot be partial wildcard  */
1045        if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1046            !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1047                netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1048                return false;
1049        }
1050        if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1051            !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1052                netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1053                return false;
1054        }
1055
1056        /* Currently VLAN fields cannot be partial wildcard */
1057        if (bits_set(&flow->l2_key.inner_vlan_tci,
1058                     sizeof(flow->l2_key.inner_vlan_tci)) &&
1059            !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1060                                 flow->l2_key.inner_vlan_tci)) {
1061                netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1062                return false;
1063        }
1064        if (bits_set(&flow->l2_key.inner_vlan_tpid,
1065                     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1066            !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1067                           sizeof(flow->l2_mask.inner_vlan_tpid))) {
1068                netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1069                return false;
1070        }
1071
1072        /* Currently Ethertype must be set */
1073        if (!is_exactmatch(&flow->l2_mask.ether_type,
1074                           sizeof(flow->l2_mask.ether_type))) {
1075                netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1076                return false;
1077        }
1078
1079        return true;
1080}
1081
1082/* Returns the final refcount of the node on success
1083 * or a -ve error code on failure
1084 */
1085static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1086                                   struct rhashtable *tunnel_table,
1087                                   struct rhashtable_params *ht_params,
1088                                   struct bnxt_tc_tunnel_node *tunnel_node)
1089{
1090        int rc;
1091
1092        if (--tunnel_node->refcount == 0) {
1093                rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1094                                             *ht_params);
1095                if (rc) {
1096                        netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1097                        rc = -1;
1098                }
1099                kfree_rcu(tunnel_node, rcu);
1100                return rc;
1101        } else {
1102                return tunnel_node->refcount;
1103        }
1104}
1105
1106/* Get (or add) either encap or decap tunnel node from/to the supplied
1107 * hash table.
1108 */
1109static struct bnxt_tc_tunnel_node *
1110bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1111                        struct rhashtable_params *ht_params,
1112                        struct ip_tunnel_key *tun_key)
1113{
1114        struct bnxt_tc_tunnel_node *tunnel_node;
1115        int rc;
1116
1117        tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1118        if (!tunnel_node) {
1119                tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1120                if (!tunnel_node) {
1121                        rc = -ENOMEM;
1122                        goto err;
1123                }
1124
1125                tunnel_node->key = *tun_key;
1126                tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1127                rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1128                                            *ht_params);
1129                if (rc) {
1130                        kfree_rcu(tunnel_node, rcu);
1131                        goto err;
1132                }
1133        }
1134        tunnel_node->refcount++;
1135        return tunnel_node;
1136err:
1137        netdev_info(bp->dev, "error rc=%d\n", rc);
1138        return NULL;
1139}
1140
1141static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1142                                        struct bnxt_tc_flow *flow,
1143                                        struct bnxt_tc_l2_key *l2_key,
1144                                        struct bnxt_tc_flow_node *flow_node,
1145                                        __le32 *ref_decap_handle)
1146{
1147        struct bnxt_tc_info *tc_info = bp->tc_info;
1148        struct bnxt_tc_flow_node *ref_flow_node;
1149        struct bnxt_tc_l2_node *decap_l2_node;
1150
1151        decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1152                                            tc_info->decap_l2_ht_params,
1153                                            l2_key);
1154        if (!decap_l2_node)
1155                return -1;
1156
1157        /* If any other flow is using this decap_l2_node, use it's decap_handle
1158         * as the ref_decap_handle
1159         */
1160        if (decap_l2_node->refcount > 0) {
1161                ref_flow_node =
1162                        list_first_entry(&decap_l2_node->common_l2_flows,
1163                                         struct bnxt_tc_flow_node,
1164                                         decap_l2_list_node);
1165                *ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1166        } else {
1167                *ref_decap_handle = INVALID_TUNNEL_HANDLE;
1168        }
1169
1170        /* Insert the l2_node into the flow_node so that subsequent flows
1171         * with a matching decap l2 key can use the decap_filter_handle of
1172         * this flow as their ref_decap_handle
1173         */
1174        flow_node->decap_l2_node = decap_l2_node;
1175        list_add(&flow_node->decap_l2_list_node,
1176                 &decap_l2_node->common_l2_flows);
1177        decap_l2_node->refcount++;
1178        return 0;
1179}
1180
1181static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1182                                      struct bnxt_tc_flow_node *flow_node)
1183{
1184        struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1185        struct bnxt_tc_info *tc_info = bp->tc_info;
1186        int rc;
1187
1188        /* remove flow_node from the decap L2 sharing flow list */
1189        list_del(&flow_node->decap_l2_list_node);
1190        if (--decap_l2_node->refcount == 0) {
1191                rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1192                                             &decap_l2_node->node,
1193                                             tc_info->decap_l2_ht_params);
1194                if (rc)
1195                        netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1196                kfree_rcu(decap_l2_node, rcu);
1197        }
1198}
1199
1200static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1201                                     struct bnxt_tc_flow_node *flow_node)
1202{
1203        __le32 decap_handle = flow_node->decap_node->tunnel_handle;
1204        struct bnxt_tc_info *tc_info = bp->tc_info;
1205        int rc;
1206
1207        if (flow_node->decap_l2_node)
1208                bnxt_tc_put_decap_l2_node(bp, flow_node);
1209
1210        rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1211                                     &tc_info->decap_ht_params,
1212                                     flow_node->decap_node);
1213        if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1214                hwrm_cfa_decap_filter_free(bp, decap_handle);
1215}
1216
1217static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1218                                       struct ip_tunnel_key *tun_key,
1219                                       struct bnxt_tc_l2_key *l2_info)
1220{
1221#ifdef CONFIG_INET
1222        struct net_device *real_dst_dev = bp->dev;
1223        struct flowi4 flow = { {0} };
1224        struct net_device *dst_dev;
1225        struct neighbour *nbr;
1226        struct rtable *rt;
1227        int rc;
1228
1229        flow.flowi4_proto = IPPROTO_UDP;
1230        flow.fl4_dport = tun_key->tp_dst;
1231        flow.daddr = tun_key->u.ipv4.dst;
1232
1233        rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1234        if (IS_ERR(rt)) {
1235                netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
1236                return -EOPNOTSUPP;
1237        }
1238
1239        /* The route must either point to the real_dst_dev or a dst_dev that
1240         * uses the real_dst_dev.
1241         */
1242        dst_dev = rt->dst.dev;
1243        if (is_vlan_dev(dst_dev)) {
1244#if IS_ENABLED(CONFIG_VLAN_8021Q)
1245                struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1246
1247                if (vlan->real_dev != real_dst_dev) {
1248                        netdev_info(bp->dev,
1249                                    "dst_dev(%s) doesn't use PF-if(%s)\n",
1250                                    netdev_name(dst_dev),
1251                                    netdev_name(real_dst_dev));
1252                        rc = -EOPNOTSUPP;
1253                        goto put_rt;
1254                }
1255                l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1256                l2_info->inner_vlan_tpid = vlan->vlan_proto;
1257                l2_info->num_vlans = 1;
1258#endif
1259        } else if (dst_dev != real_dst_dev) {
1260                netdev_info(bp->dev,
1261                            "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
1262                            netdev_name(dst_dev), &flow.daddr,
1263                            netdev_name(real_dst_dev));
1264                rc = -EOPNOTSUPP;
1265                goto put_rt;
1266        }
1267
1268        nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1269        if (!nbr) {
1270                netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
1271                            &flow.daddr);
1272                rc = -EOPNOTSUPP;
1273                goto put_rt;
1274        }
1275
1276        tun_key->u.ipv4.src = flow.saddr;
1277        tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1278        neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1279        ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1280        neigh_release(nbr);
1281        ip_rt_put(rt);
1282
1283        return 0;
1284put_rt:
1285        ip_rt_put(rt);
1286        return rc;
1287#else
1288        return -EOPNOTSUPP;
1289#endif
1290}
1291
1292static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1293                                    struct bnxt_tc_flow_node *flow_node,
1294                                    __le32 *decap_filter_handle)
1295{
1296        struct ip_tunnel_key *decap_key = &flow->tun_key;
1297        struct bnxt_tc_info *tc_info = bp->tc_info;
1298        struct bnxt_tc_l2_key l2_info = { {0} };
1299        struct bnxt_tc_tunnel_node *decap_node;
1300        struct ip_tunnel_key tun_key = { 0 };
1301        struct bnxt_tc_l2_key *decap_l2_info;
1302        __le32 ref_decap_handle;
1303        int rc;
1304
1305        /* Check if there's another flow using the same tunnel decap.
1306         * If not, add this tunnel to the table and resolve the other
1307         * tunnel header fileds. Ignore src_port in the tunnel_key,
1308         * since it is not required for decap filters.
1309         */
1310        decap_key->tp_src = 0;
1311        decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1312                                             &tc_info->decap_ht_params,
1313                                             decap_key);
1314        if (!decap_node)
1315                return -ENOMEM;
1316
1317        flow_node->decap_node = decap_node;
1318
1319        if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1320                goto done;
1321
1322        /* Resolve the L2 fields for tunnel decap
1323         * Resolve the route for remote vtep (saddr) of the decap key
1324         * Find it's next-hop mac addrs
1325         */
1326        tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1327        tun_key.tp_dst = flow->tun_key.tp_dst;
1328        rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1329        if (rc)
1330                goto put_decap;
1331
1332        decap_l2_info = &decap_node->l2_info;
1333        /* decap smac is wildcarded */
1334        ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1335        if (l2_info.num_vlans) {
1336                decap_l2_info->num_vlans = l2_info.num_vlans;
1337                decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1338                decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1339        }
1340        flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1341
1342        /* For getting a decap_filter_handle we first need to check if
1343         * there are any other decap flows that share the same tunnel L2
1344         * key and if so, pass that flow's decap_filter_handle as the
1345         * ref_decap_handle for this flow.
1346         */
1347        rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1348                                          &ref_decap_handle);
1349        if (rc)
1350                goto put_decap;
1351
1352        /* Issue the hwrm cmd to allocate a decap filter handle */
1353        rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1354                                         ref_decap_handle,
1355                                         &decap_node->tunnel_handle);
1356        if (rc)
1357                goto put_decap_l2;
1358
1359done:
1360        *decap_filter_handle = decap_node->tunnel_handle;
1361        return 0;
1362
1363put_decap_l2:
1364        bnxt_tc_put_decap_l2_node(bp, flow_node);
1365put_decap:
1366        bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1367                                &tc_info->decap_ht_params,
1368                                flow_node->decap_node);
1369        return rc;
1370}
1371
1372static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1373                                     struct bnxt_tc_tunnel_node *encap_node)
1374{
1375        __le32 encap_handle = encap_node->tunnel_handle;
1376        struct bnxt_tc_info *tc_info = bp->tc_info;
1377        int rc;
1378
1379        rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1380                                     &tc_info->encap_ht_params, encap_node);
1381        if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1382                hwrm_cfa_encap_record_free(bp, encap_handle);
1383}
1384
1385/* Lookup the tunnel encap table and check if there's an encap_handle
1386 * alloc'd already.
1387 * If not, query L2 info via a route lookup and issue an encap_record_alloc
1388 * cmd to FW.
1389 */
1390static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1391                                    struct bnxt_tc_flow_node *flow_node,
1392                                    __le32 *encap_handle)
1393{
1394        struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1395        struct bnxt_tc_info *tc_info = bp->tc_info;
1396        struct bnxt_tc_tunnel_node *encap_node;
1397        int rc;
1398
1399        /* Check if there's another flow using the same tunnel encap.
1400         * If not, add this tunnel to the table and resolve the other
1401         * tunnel header fileds
1402         */
1403        encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1404                                             &tc_info->encap_ht_params,
1405                                             encap_key);
1406        if (!encap_node)
1407                return -ENOMEM;
1408
1409        flow_node->encap_node = encap_node;
1410
1411        if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1412                goto done;
1413
1414        rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1415        if (rc)
1416                goto put_encap;
1417
1418        /* Allocate a new tunnel encap record */
1419        rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1420                                         &encap_node->tunnel_handle);
1421        if (rc)
1422                goto put_encap;
1423
1424done:
1425        *encap_handle = encap_node->tunnel_handle;
1426        return 0;
1427
1428put_encap:
1429        bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1430                                &tc_info->encap_ht_params, encap_node);
1431        return rc;
1432}
1433
1434static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1435                                      struct bnxt_tc_flow *flow,
1436                                      struct bnxt_tc_flow_node *flow_node)
1437{
1438        if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1439                bnxt_tc_put_decap_handle(bp, flow_node);
1440        else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1441                bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1442}
1443
1444static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1445                                     struct bnxt_tc_flow *flow,
1446                                     struct bnxt_tc_flow_node *flow_node,
1447                                     __le32 *tunnel_handle)
1448{
1449        if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1450                return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1451                                                tunnel_handle);
1452        else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1453                return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1454                                                tunnel_handle);
1455        else
1456                return 0;
1457}
1458static int __bnxt_tc_del_flow(struct bnxt *bp,
1459                              struct bnxt_tc_flow_node *flow_node)
1460{
1461        struct bnxt_tc_info *tc_info = bp->tc_info;
1462        int rc;
1463
1464        /* send HWRM cmd to free the flow-id */
1465        bnxt_hwrm_cfa_flow_free(bp, flow_node);
1466
1467        mutex_lock(&tc_info->lock);
1468
1469        /* release references to any tunnel encap/decap nodes */
1470        bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1471
1472        /* release reference to l2 node */
1473        bnxt_tc_put_l2_node(bp, flow_node);
1474
1475        mutex_unlock(&tc_info->lock);
1476
1477        rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1478                                    tc_info->flow_ht_params);
1479        if (rc)
1480                netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
1481                           __func__, rc);
1482
1483        kfree_rcu(flow_node, rcu);
1484        return 0;
1485}
1486
1487static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1488                                 u16 src_fid)
1489{
1490        flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1491}
1492
1493static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1494                                u16 src_fid)
1495{
1496        if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1497                flow->src_fid = bp->pf.fw_fid;
1498        else
1499                flow->src_fid = src_fid;
1500}
1501
1502/* Add a new flow or replace an existing flow.
1503 * Notes on locking:
1504 * There are essentially two critical sections here.
1505 * 1. while adding a new flow
1506 *    a) lookup l2-key
1507 *    b) issue HWRM cmd and get flow_handle
1508 *    c) link l2-key with flow
1509 * 2. while deleting a flow
1510 *    a) unlinking l2-key from flow
1511 * A lock is needed to protect these two critical sections.
1512 *
1513 * The hash-tables are already protected by the rhashtable API.
1514 */
1515static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1516                            struct flow_cls_offload *tc_flow_cmd)
1517{
1518        struct bnxt_tc_flow_node *new_node, *old_node;
1519        struct bnxt_tc_info *tc_info = bp->tc_info;
1520        struct bnxt_tc_flow *flow;
1521        __le32 tunnel_handle = 0;
1522        __le16 ref_flow_handle;
1523        int rc;
1524
1525        /* allocate memory for the new flow and it's node */
1526        new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1527        if (!new_node) {
1528                rc = -ENOMEM;
1529                goto done;
1530        }
1531        new_node->cookie = tc_flow_cmd->cookie;
1532        flow = &new_node->flow;
1533
1534        rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1535        if (rc)
1536                goto free_node;
1537
1538        bnxt_tc_set_src_fid(bp, flow, src_fid);
1539        bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1540
1541        if (!bnxt_tc_can_offload(bp, flow)) {
1542                rc = -EOPNOTSUPP;
1543                kfree_rcu(new_node, rcu);
1544                return rc;
1545        }
1546
1547        /* If a flow exists with the same cookie, delete it */
1548        old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1549                                          &tc_flow_cmd->cookie,
1550                                          tc_info->flow_ht_params);
1551        if (old_node)
1552                __bnxt_tc_del_flow(bp, old_node);
1553
1554        /* Check if the L2 part of the flow has been offloaded already.
1555         * If so, bump up it's refcnt and get it's reference handle.
1556         */
1557        mutex_lock(&tc_info->lock);
1558        rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1559        if (rc)
1560                goto unlock;
1561
1562        /* If the flow involves tunnel encap/decap, get tunnel_handle */
1563        rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1564        if (rc)
1565                goto put_l2;
1566
1567        /* send HWRM cmd to alloc the flow */
1568        rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1569                                      tunnel_handle, new_node);
1570        if (rc)
1571                goto put_tunnel;
1572
1573        flow->lastused = jiffies;
1574        spin_lock_init(&flow->stats_lock);
1575        /* add new flow to flow-table */
1576        rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1577                                    tc_info->flow_ht_params);
1578        if (rc)
1579                goto hwrm_flow_free;
1580
1581        mutex_unlock(&tc_info->lock);
1582        return 0;
1583
1584hwrm_flow_free:
1585        bnxt_hwrm_cfa_flow_free(bp, new_node);
1586put_tunnel:
1587        bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1588put_l2:
1589        bnxt_tc_put_l2_node(bp, new_node);
1590unlock:
1591        mutex_unlock(&tc_info->lock);
1592free_node:
1593        kfree_rcu(new_node, rcu);
1594done:
1595        netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
1596                   __func__, tc_flow_cmd->cookie, rc);
1597        return rc;
1598}
1599
1600static int bnxt_tc_del_flow(struct bnxt *bp,
1601                            struct flow_cls_offload *tc_flow_cmd)
1602{
1603        struct bnxt_tc_info *tc_info = bp->tc_info;
1604        struct bnxt_tc_flow_node *flow_node;
1605
1606        flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1607                                           &tc_flow_cmd->cookie,
1608                                           tc_info->flow_ht_params);
1609        if (!flow_node)
1610                return -EINVAL;
1611
1612        return __bnxt_tc_del_flow(bp, flow_node);
1613}
1614
1615static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1616                                  struct flow_cls_offload *tc_flow_cmd)
1617{
1618        struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1619        struct bnxt_tc_info *tc_info = bp->tc_info;
1620        struct bnxt_tc_flow_node *flow_node;
1621        struct bnxt_tc_flow *flow;
1622        unsigned long lastused;
1623
1624        flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1625                                           &tc_flow_cmd->cookie,
1626                                           tc_info->flow_ht_params);
1627        if (!flow_node)
1628                return -1;
1629
1630        flow = &flow_node->flow;
1631        curr_stats = &flow->stats;
1632        prev_stats = &flow->prev_stats;
1633
1634        spin_lock(&flow->stats_lock);
1635        stats.packets = curr_stats->packets - prev_stats->packets;
1636        stats.bytes = curr_stats->bytes - prev_stats->bytes;
1637        *prev_stats = *curr_stats;
1638        lastused = flow->lastused;
1639        spin_unlock(&flow->stats_lock);
1640
1641        flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, 0,
1642                          lastused, FLOW_ACTION_HW_STATS_DELAYED);
1643        return 0;
1644}
1645
1646static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1647                                    struct bnxt_tc_flow_node *flow_node,
1648                                    __le16 *flow_handle, __le32 *flow_id)
1649{
1650        u16 handle;
1651
1652        if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1653                *flow_id = flow_node->flow_id;
1654
1655                /* If flow_id is used to fetch flow stats then:
1656                 * 1. lower 12 bits of flow_handle must be set to all 1s.
1657                 * 2. 15th bit of flow_handle must specify the flow
1658                 *    direction (TX/RX).
1659                 */
1660                if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1661                        handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1662                                 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1663                else
1664                        handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1665
1666                *flow_handle = cpu_to_le16(handle);
1667        } else {
1668                *flow_handle = flow_node->flow_handle;
1669        }
1670}
1671
1672static int
1673bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1674                             struct bnxt_tc_stats_batch stats_batch[])
1675{
1676        struct hwrm_cfa_flow_stats_input req = { 0 };
1677        struct hwrm_cfa_flow_stats_output *resp;
1678        __le16 *req_flow_handles = &req.flow_handle_0;
1679        __le32 *req_flow_ids = &req.flow_id_0;
1680        int rc, i;
1681
1682        bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
1683        req.num_flows = cpu_to_le16(num_flows);
1684        for (i = 0; i < num_flows; i++) {
1685                struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1686
1687                bnxt_fill_cfa_stats_req(bp, flow_node,
1688                                        &req_flow_handles[i], &req_flow_ids[i]);
1689        }
1690
1691        mutex_lock(&bp->hwrm_cmd_lock);
1692        rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
1693        if (!rc) {
1694                __le64 *resp_packets;
1695                __le64 *resp_bytes;
1696
1697                resp = bnxt_get_hwrm_resp_addr(bp, &req);
1698                resp_packets = &resp->packet_0;
1699                resp_bytes = &resp->byte_0;
1700
1701                for (i = 0; i < num_flows; i++) {
1702                        stats_batch[i].hw_stats.packets =
1703                                                le64_to_cpu(resp_packets[i]);
1704                        stats_batch[i].hw_stats.bytes =
1705                                                le64_to_cpu(resp_bytes[i]);
1706                }
1707        } else {
1708                netdev_info(bp->dev, "error rc=%d\n", rc);
1709        }
1710        mutex_unlock(&bp->hwrm_cmd_lock);
1711
1712        return rc;
1713}
1714
1715/* Add val to accum while handling a possible wraparound
1716 * of val. Eventhough val is of type u64, its actual width
1717 * is denoted by mask and will wrap-around beyond that width.
1718 */
1719static void accumulate_val(u64 *accum, u64 val, u64 mask)
1720{
1721#define low_bits(x, mask)               ((x) & (mask))
1722#define high_bits(x, mask)              ((x) & ~(mask))
1723        bool wrapped = val < low_bits(*accum, mask);
1724
1725        *accum = high_bits(*accum, mask) + val;
1726        if (wrapped)
1727                *accum += (mask + 1);
1728}
1729
1730/* The HW counters' width is much less than 64bits.
1731 * Handle possible wrap-around while updating the stat counters
1732 */
1733static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1734                                  struct bnxt_tc_flow_stats *acc_stats,
1735                                  struct bnxt_tc_flow_stats *hw_stats)
1736{
1737        accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1738        accumulate_val(&acc_stats->packets, hw_stats->packets,
1739                       tc_info->packets_mask);
1740}
1741
1742static int
1743bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1744                                struct bnxt_tc_stats_batch stats_batch[])
1745{
1746        struct bnxt_tc_info *tc_info = bp->tc_info;
1747        int rc, i;
1748
1749        rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1750        if (rc)
1751                return rc;
1752
1753        for (i = 0; i < num_flows; i++) {
1754                struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1755                struct bnxt_tc_flow *flow = &flow_node->flow;
1756
1757                spin_lock(&flow->stats_lock);
1758                bnxt_flow_stats_accum(tc_info, &flow->stats,
1759                                      &stats_batch[i].hw_stats);
1760                if (flow->stats.packets != flow->prev_stats.packets)
1761                        flow->lastused = jiffies;
1762                spin_unlock(&flow->stats_lock);
1763        }
1764
1765        return 0;
1766}
1767
1768static int
1769bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1770                              struct bnxt_tc_stats_batch stats_batch[],
1771                              int *num_flows)
1772{
1773        struct bnxt_tc_info *tc_info = bp->tc_info;
1774        struct rhashtable_iter *iter = &tc_info->iter;
1775        void *flow_node;
1776        int rc, i;
1777
1778        rhashtable_walk_start(iter);
1779
1780        rc = 0;
1781        for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1782                flow_node = rhashtable_walk_next(iter);
1783                if (IS_ERR(flow_node)) {
1784                        i = 0;
1785                        if (PTR_ERR(flow_node) == -EAGAIN) {
1786                                continue;
1787                        } else {
1788                                rc = PTR_ERR(flow_node);
1789                                goto done;
1790                        }
1791                }
1792
1793                /* No more flows */
1794                if (!flow_node)
1795                        goto done;
1796
1797                stats_batch[i].flow_node = flow_node;
1798        }
1799done:
1800        rhashtable_walk_stop(iter);
1801        *num_flows = i;
1802        return rc;
1803}
1804
1805void bnxt_tc_flow_stats_work(struct bnxt *bp)
1806{
1807        struct bnxt_tc_info *tc_info = bp->tc_info;
1808        int num_flows, rc;
1809
1810        num_flows = atomic_read(&tc_info->flow_table.nelems);
1811        if (!num_flows)
1812                return;
1813
1814        rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1815
1816        for (;;) {
1817                rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1818                                                   &num_flows);
1819                if (rc) {
1820                        if (rc == -EAGAIN)
1821                                continue;
1822                        break;
1823                }
1824
1825                if (!num_flows)
1826                        break;
1827
1828                bnxt_tc_flow_stats_batch_update(bp, num_flows,
1829                                                tc_info->stats_batch);
1830        }
1831
1832        rhashtable_walk_exit(&tc_info->iter);
1833}
1834
1835int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1836                         struct flow_cls_offload *cls_flower)
1837{
1838        switch (cls_flower->command) {
1839        case FLOW_CLS_REPLACE:
1840                return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1841        case FLOW_CLS_DESTROY:
1842                return bnxt_tc_del_flow(bp, cls_flower);
1843        case FLOW_CLS_STATS:
1844                return bnxt_tc_get_flow_stats(bp, cls_flower);
1845        default:
1846                return -EOPNOTSUPP;
1847        }
1848}
1849
1850static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1851                                       void *type_data, void *cb_priv)
1852{
1853        struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1854        struct flow_cls_offload *flower = type_data;
1855        struct bnxt *bp = priv->bp;
1856
1857        if (flower->common.chain_index)
1858                return -EOPNOTSUPP;
1859
1860        switch (type) {
1861        case TC_SETUP_CLSFLOWER:
1862                return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1863        default:
1864                return -EOPNOTSUPP;
1865        }
1866}
1867
1868static struct bnxt_flower_indr_block_cb_priv *
1869bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1870{
1871        struct bnxt_flower_indr_block_cb_priv *cb_priv;
1872
1873        /* All callback list access should be protected by RTNL. */
1874        ASSERT_RTNL();
1875
1876        list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1877                if (cb_priv->tunnel_netdev == netdev)
1878                        return cb_priv;
1879
1880        return NULL;
1881}
1882
1883static void bnxt_tc_setup_indr_rel(void *cb_priv)
1884{
1885        struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1886
1887        list_del(&priv->list);
1888        kfree(priv);
1889}
1890
1891static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
1892                                    struct flow_block_offload *f, void *data,
1893                                    void (*cleanup)(struct flow_block_cb *block_cb))
1894{
1895        struct bnxt_flower_indr_block_cb_priv *cb_priv;
1896        struct flow_block_cb *block_cb;
1897
1898        if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1899                return -EOPNOTSUPP;
1900
1901        switch (f->command) {
1902        case FLOW_BLOCK_BIND:
1903                cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1904                if (!cb_priv)
1905                        return -ENOMEM;
1906
1907                cb_priv->tunnel_netdev = netdev;
1908                cb_priv->bp = bp;
1909                list_add(&cb_priv->list, &bp->tc_indr_block_list);
1910
1911                block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1912                                                    cb_priv, cb_priv,
1913                                                    bnxt_tc_setup_indr_rel, f,
1914                                                    netdev, sch, data, bp, cleanup);
1915                if (IS_ERR(block_cb)) {
1916                        list_del(&cb_priv->list);
1917                        kfree(cb_priv);
1918                        return PTR_ERR(block_cb);
1919                }
1920
1921                flow_block_cb_add(block_cb, f);
1922                list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1923                break;
1924        case FLOW_BLOCK_UNBIND:
1925                cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1926                if (!cb_priv)
1927                        return -ENOENT;
1928
1929                block_cb = flow_block_cb_lookup(f->block,
1930                                                bnxt_tc_setup_indr_block_cb,
1931                                                cb_priv);
1932                if (!block_cb)
1933                        return -ENOENT;
1934
1935                flow_indr_block_cb_remove(block_cb, f);
1936                list_del(&block_cb->driver_list);
1937                break;
1938        default:
1939                return -EOPNOTSUPP;
1940        }
1941        return 0;
1942}
1943
1944static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1945{
1946        return netif_is_vxlan(netdev);
1947}
1948
1949static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
1950                                 enum tc_setup_type type, void *type_data,
1951                                 void *data,
1952                                 void (*cleanup)(struct flow_block_cb *block_cb))
1953{
1954        if (!bnxt_is_netdev_indr_offload(netdev))
1955                return -EOPNOTSUPP;
1956
1957        switch (type) {
1958        case TC_SETUP_BLOCK:
1959                return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
1960        default:
1961                break;
1962        }
1963
1964        return -EOPNOTSUPP;
1965}
1966
1967static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1968        .head_offset = offsetof(struct bnxt_tc_flow_node, node),
1969        .key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1970        .key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1971        .automatic_shrinking = true
1972};
1973
1974static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1975        .head_offset = offsetof(struct bnxt_tc_l2_node, node),
1976        .key_offset = offsetof(struct bnxt_tc_l2_node, key),
1977        .key_len = BNXT_TC_L2_KEY_LEN,
1978        .automatic_shrinking = true
1979};
1980
1981static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
1982        .head_offset = offsetof(struct bnxt_tc_l2_node, node),
1983        .key_offset = offsetof(struct bnxt_tc_l2_node, key),
1984        .key_len = BNXT_TC_L2_KEY_LEN,
1985        .automatic_shrinking = true
1986};
1987
1988static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
1989        .head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
1990        .key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
1991        .key_len = sizeof(struct ip_tunnel_key),
1992        .automatic_shrinking = true
1993};
1994
1995/* convert counter width in bits to a mask */
1996#define mask(width)             ((u64)~0 >> (64 - (width)))
1997
1998int bnxt_init_tc(struct bnxt *bp)
1999{
2000        struct bnxt_tc_info *tc_info;
2001        int rc;
2002
2003        if (bp->hwrm_spec_code < 0x10803)
2004                return 0;
2005
2006        tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2007        if (!tc_info)
2008                return -ENOMEM;
2009        mutex_init(&tc_info->lock);
2010
2011        /* Counter widths are programmed by FW */
2012        tc_info->bytes_mask = mask(36);
2013        tc_info->packets_mask = mask(28);
2014
2015        tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2016        rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2017        if (rc)
2018                goto free_tc_info;
2019
2020        tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2021        rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2022        if (rc)
2023                goto destroy_flow_table;
2024
2025        tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2026        rc = rhashtable_init(&tc_info->decap_l2_table,
2027                             &tc_info->decap_l2_ht_params);
2028        if (rc)
2029                goto destroy_l2_table;
2030
2031        tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2032        rc = rhashtable_init(&tc_info->decap_table,
2033                             &tc_info->decap_ht_params);
2034        if (rc)
2035                goto destroy_decap_l2_table;
2036
2037        tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2038        rc = rhashtable_init(&tc_info->encap_table,
2039                             &tc_info->encap_ht_params);
2040        if (rc)
2041                goto destroy_decap_table;
2042
2043        tc_info->enabled = true;
2044        bp->dev->hw_features |= NETIF_F_HW_TC;
2045        bp->dev->features |= NETIF_F_HW_TC;
2046        bp->tc_info = tc_info;
2047
2048        /* init indirect block notifications */
2049        INIT_LIST_HEAD(&bp->tc_indr_block_list);
2050
2051        rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
2052        if (!rc)
2053                return 0;
2054
2055        rhashtable_destroy(&tc_info->encap_table);
2056
2057destroy_decap_table:
2058        rhashtable_destroy(&tc_info->decap_table);
2059destroy_decap_l2_table:
2060        rhashtable_destroy(&tc_info->decap_l2_table);
2061destroy_l2_table:
2062        rhashtable_destroy(&tc_info->l2_table);
2063destroy_flow_table:
2064        rhashtable_destroy(&tc_info->flow_table);
2065free_tc_info:
2066        kfree(tc_info);
2067        return rc;
2068}
2069
2070void bnxt_shutdown_tc(struct bnxt *bp)
2071{
2072        struct bnxt_tc_info *tc_info = bp->tc_info;
2073
2074        if (!bnxt_tc_flower_enabled(bp))
2075                return;
2076
2077        flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
2078                                 bnxt_tc_setup_indr_rel);
2079        rhashtable_destroy(&tc_info->flow_table);
2080        rhashtable_destroy(&tc_info->l2_table);
2081        rhashtable_destroy(&tc_info->decap_l2_table);
2082        rhashtable_destroy(&tc_info->decap_table);
2083        rhashtable_destroy(&tc_info->encap_table);
2084        kfree(tc_info);
2085        bp->tc_info = NULL;
2086}
2087