dpdk/drivers/net/mlx4/mlx4_flow.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6/**
   7 * @file
   8 * Flow API operations for mlx4 driver.
   9 */
  10
  11#include <arpa/inet.h>
  12#include <errno.h>
  13#include <stdalign.h>
  14#include <stddef.h>
  15#include <stdint.h>
  16#include <string.h>
  17#include <sys/queue.h>
  18
  19/* Verbs headers do not support -pedantic. */
  20#ifdef PEDANTIC
  21#pragma GCC diagnostic ignored "-Wpedantic"
  22#endif
  23#include <infiniband/verbs.h>
  24#ifdef PEDANTIC
  25#pragma GCC diagnostic error "-Wpedantic"
  26#endif
  27
  28#include <rte_byteorder.h>
  29#include <rte_errno.h>
  30#include <ethdev_driver.h>
  31#include <rte_ether.h>
  32#include <rte_flow.h>
  33#include <rte_flow_driver.h>
  34#include <rte_malloc.h>
  35
  36/* PMD headers. */
  37#include "mlx4.h"
  38#include "mlx4_glue.h"
  39#include "mlx4_flow.h"
  40#include "mlx4_rxtx.h"
  41#include "mlx4_utils.h"
  42
  43/** Static initializer for a list of subsequent item types. */
  44#define NEXT_ITEM(...) \
  45        (const enum rte_flow_item_type []){ \
  46                __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
  47        }
  48
  49/** Processor structure associated with a flow item. */
  50struct mlx4_flow_proc_item {
  51        /** Bit-mask for fields supported by this PMD. */
  52        const void *mask_support;
  53        /** Bit-mask to use when @p item->mask is not provided. */
  54        const void *mask_default;
  55        /** Size in bytes for @p mask_support and @p mask_default. */
  56        const unsigned int mask_sz;
  57        /** Merge a pattern item into a flow rule handle. */
  58        int (*merge)(struct rte_flow *flow,
  59                     const struct rte_flow_item *item,
  60                     const struct mlx4_flow_proc_item *proc,
  61                     struct rte_flow_error *error);
  62        /** Size in bytes of the destination structure. */
  63        const unsigned int dst_sz;
  64        /** List of possible subsequent items. */
  65        const enum rte_flow_item_type *const next_item;
  66};
  67
  68/** Shared resources for drop flow rules. */
  69struct mlx4_drop {
  70        struct ibv_qp *qp; /**< QP target. */
  71        struct ibv_cq *cq; /**< CQ associated with above QP. */
  72        struct mlx4_priv *priv; /**< Back pointer to private data. */
  73        uint32_t refcnt; /**< Reference count. */
  74};
  75
  76/**
  77 * Convert supported RSS hash field types between DPDK and Verbs formats.
  78 *
  79 * This function returns the supported (default) set when @p types has
  80 * special value 0.
  81 *
  82 * @param priv
  83 *   Pointer to private structure.
  84 * @param types
  85 *   Depending on @p verbs_to_dpdk, hash types in either DPDK (see struct
  86 *   rte_eth_rss_conf) or Verbs format.
  87 * @param verbs_to_dpdk
  88 *   A zero value converts @p types from DPDK to Verbs, a nonzero value
  89 *   performs the reverse operation.
  90 *
  91 * @return
  92 *   Converted RSS hash fields on success, (uint64_t)-1 otherwise and
  93 *   rte_errno is set.
  94 */
  95uint64_t
  96mlx4_conv_rss_types(struct mlx4_priv *priv, uint64_t types, int verbs_to_dpdk)
  97{
  98        enum {
  99                INNER,
 100                IPV4, IPV4_1, IPV4_2, IPV6, IPV6_1, IPV6_2, IPV6_3,
 101                TCP, UDP,
 102                IPV4_TCP, IPV4_UDP, IPV6_TCP, IPV6_TCP_1, IPV6_UDP, IPV6_UDP_1,
 103        };
 104        enum {
 105                VERBS_IPV4 = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4,
 106                VERBS_IPV6 = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6,
 107                VERBS_TCP = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP,
 108                VERBS_UDP = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP,
 109        };
 110        static const uint64_t dpdk[] = {
 111                [INNER] = 0,
 112                [IPV4] = ETH_RSS_IPV4,
 113                [IPV4_1] = ETH_RSS_FRAG_IPV4,
 114                [IPV4_2] = ETH_RSS_NONFRAG_IPV4_OTHER,
 115                [IPV6] = ETH_RSS_IPV6,
 116                [IPV6_1] = ETH_RSS_FRAG_IPV6,
 117                [IPV6_2] = ETH_RSS_NONFRAG_IPV6_OTHER,
 118                [IPV6_3] = ETH_RSS_IPV6_EX,
 119                [TCP] = 0,
 120                [UDP] = 0,
 121                [IPV4_TCP] = ETH_RSS_NONFRAG_IPV4_TCP,
 122                [IPV4_UDP] = ETH_RSS_NONFRAG_IPV4_UDP,
 123                [IPV6_TCP] = ETH_RSS_NONFRAG_IPV6_TCP,
 124                [IPV6_TCP_1] = ETH_RSS_IPV6_TCP_EX,
 125                [IPV6_UDP] = ETH_RSS_NONFRAG_IPV6_UDP,
 126                [IPV6_UDP_1] = ETH_RSS_IPV6_UDP_EX,
 127        };
 128        static const uint64_t verbs[RTE_DIM(dpdk)] = {
 129                [INNER] = IBV_RX_HASH_INNER,
 130                [IPV4] = VERBS_IPV4,
 131                [IPV4_1] = VERBS_IPV4,
 132                [IPV4_2] = VERBS_IPV4,
 133                [IPV6] = VERBS_IPV6,
 134                [IPV6_1] = VERBS_IPV6,
 135                [IPV6_2] = VERBS_IPV6,
 136                [IPV6_3] = VERBS_IPV6,
 137                [TCP] = VERBS_TCP,
 138                [UDP] = VERBS_UDP,
 139                [IPV4_TCP] = VERBS_IPV4 | VERBS_TCP,
 140                [IPV4_UDP] = VERBS_IPV4 | VERBS_UDP,
 141                [IPV6_TCP] = VERBS_IPV6 | VERBS_TCP,
 142                [IPV6_TCP_1] = VERBS_IPV6 | VERBS_TCP,
 143                [IPV6_UDP] = VERBS_IPV6 | VERBS_UDP,
 144                [IPV6_UDP_1] = VERBS_IPV6 | VERBS_UDP,
 145        };
 146        const uint64_t *in = verbs_to_dpdk ? verbs : dpdk;
 147        const uint64_t *out = verbs_to_dpdk ? dpdk : verbs;
 148        uint64_t seen = 0;
 149        uint64_t conv = 0;
 150        unsigned int i;
 151
 152        if (!types) {
 153                if (!verbs_to_dpdk)
 154                        return priv->hw_rss_sup;
 155                types = priv->hw_rss_sup;
 156        }
 157        for (i = 0; i != RTE_DIM(dpdk); ++i)
 158                if (in[i] && (types & in[i]) == in[i]) {
 159                        seen |= types & in[i];
 160                        conv |= out[i];
 161                }
 162        if ((verbs_to_dpdk || (conv & priv->hw_rss_sup) == conv) &&
 163            !(types & ~seen))
 164                return conv;
 165        rte_errno = ENOTSUP;
 166        return (uint64_t)-1;
 167}
 168
 169/**
 170 * Merge Ethernet pattern item into flow rule handle.
 171 *
 172 * Additional mlx4-specific constraints on supported fields:
 173 *
 174 * - No support for partial masks, except in the specific case of matching
 175 *   all multicast traffic (@p spec->dst and @p mask->dst equal to
 176 *   01:00:00:00:00:00).
 177 * - Not providing @p item->spec or providing an empty @p mask->dst is
 178 *   *only* supported if the rule doesn't specify additional matching
 179 *   criteria (i.e. rule is promiscuous-like).
 180 *
 181 * @param[in, out] flow
 182 *   Flow rule handle to update.
 183 * @param[in] item
 184 *   Pattern item to merge.
 185 * @param[in] proc
 186 *   Associated item-processing object.
 187 * @param[out] error
 188 *   Perform verbose error reporting if not NULL.
 189 *
 190 * @return
 191 *   0 on success, a negative errno value otherwise and rte_errno is set.
 192 */
 193static int
 194mlx4_flow_merge_eth(struct rte_flow *flow,
 195                    const struct rte_flow_item *item,
 196                    const struct mlx4_flow_proc_item *proc,
 197                    struct rte_flow_error *error)
 198{
 199        const struct rte_flow_item_eth *spec = item->spec;
 200        const struct rte_flow_item_eth *mask =
 201                spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 202        struct ibv_flow_spec_eth *eth;
 203        const char *msg;
 204        unsigned int i;
 205
 206        if (mask) {
 207                uint32_t sum_dst = 0;
 208                uint32_t sum_src = 0;
 209
 210                for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
 211                        sum_dst += mask->dst.addr_bytes[i];
 212                        sum_src += mask->src.addr_bytes[i];
 213                }
 214                if (sum_src) {
 215                        msg = "mlx4 does not support source MAC matching";
 216                        goto error;
 217                } else if (!sum_dst) {
 218                        flow->promisc = 1;
 219                } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) {
 220                        if (!(spec->dst.addr_bytes[0] & 1)) {
 221                                msg = "mlx4 does not support the explicit"
 222                                        " exclusion of all multicast traffic";
 223                                goto error;
 224                        }
 225                        flow->allmulti = 1;
 226                } else if (sum_dst != (UINT8_C(0xff) * RTE_ETHER_ADDR_LEN)) {
 227                        msg = "mlx4 does not support matching partial"
 228                                " Ethernet fields";
 229                        goto error;
 230                }
 231        }
 232        if (!flow->ibv_attr)
 233                return 0;
 234        if (flow->promisc) {
 235                flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
 236                return 0;
 237        }
 238        if (flow->allmulti) {
 239                flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT;
 240                return 0;
 241        }
 242        ++flow->ibv_attr->num_of_specs;
 243        eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 244        *eth = (struct ibv_flow_spec_eth) {
 245                .type = IBV_FLOW_SPEC_ETH,
 246                .size = sizeof(*eth),
 247        };
 248        if (!mask) {
 249                eth->val.dst_mac[0] = 0xff;
 250                flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
 251                flow->promisc = 1;
 252                return 0;
 253        }
 254        memcpy(eth->val.dst_mac, spec->dst.addr_bytes, RTE_ETHER_ADDR_LEN);
 255        memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, RTE_ETHER_ADDR_LEN);
 256        /* Remove unwanted bits from values. */
 257        for (i = 0; i < RTE_ETHER_ADDR_LEN; ++i)
 258                eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
 259
 260        return 0;
 261error:
 262        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 263                                  item, msg);
 264}
 265
 266/**
 267 * Merge VLAN pattern item into flow rule handle.
 268 *
 269 * Additional mlx4-specific constraints on supported fields:
 270 *
 271 * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
 272 *   empty @p item->mask would also include non-VLAN traffic. Doing so is
 273 *   therefore unsupported.
 274 * - No support for partial masks.
 275 *
 276 * @param[in, out] flow
 277 *   Flow rule handle to update.
 278 * @param[in] item
 279 *   Pattern item to merge.
 280 * @param[in] proc
 281 *   Associated item-processing object.
 282 * @param[out] error
 283 *   Perform verbose error reporting if not NULL.
 284 *
 285 * @return
 286 *   0 on success, a negative errno value otherwise and rte_errno is set.
 287 */
 288static int
 289mlx4_flow_merge_vlan(struct rte_flow *flow,
 290                     const struct rte_flow_item *item,
 291                     const struct mlx4_flow_proc_item *proc,
 292                     struct rte_flow_error *error)
 293{
 294        const struct rte_flow_item_vlan *spec = item->spec;
 295        const struct rte_flow_item_vlan *mask =
 296                spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 297        struct ibv_flow_spec_eth *eth;
 298        const char *msg;
 299
 300        if (!mask || !mask->tci) {
 301                msg = "mlx4 cannot match all VLAN traffic while excluding"
 302                        " non-VLAN traffic, TCI VID must be specified";
 303                goto error;
 304        }
 305        if (mask->tci != RTE_BE16(0x0fff)) {
 306                msg = "mlx4 does not support partial TCI VID matching";
 307                goto error;
 308        }
 309        if (!flow->ibv_attr)
 310                return 0;
 311        eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
 312                       sizeof(*eth));
 313        eth->val.vlan_tag = spec->tci;
 314        eth->mask.vlan_tag = mask->tci;
 315        eth->val.vlan_tag &= eth->mask.vlan_tag;
 316        if (flow->ibv_attr->type == IBV_FLOW_ATTR_ALL_DEFAULT)
 317                flow->ibv_attr->type = IBV_FLOW_ATTR_NORMAL;
 318        return 0;
 319error:
 320        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 321                                  item, msg);
 322}
 323
 324/**
 325 * Merge IPv4 pattern item into flow rule handle.
 326 *
 327 * Additional mlx4-specific constraints on supported fields:
 328 *
 329 * - No support for partial masks.
 330 *
 331 * @param[in, out] flow
 332 *   Flow rule handle to update.
 333 * @param[in] item
 334 *   Pattern item to merge.
 335 * @param[in] proc
 336 *   Associated item-processing object.
 337 * @param[out] error
 338 *   Perform verbose error reporting if not NULL.
 339 *
 340 * @return
 341 *   0 on success, a negative errno value otherwise and rte_errno is set.
 342 */
 343static int
 344mlx4_flow_merge_ipv4(struct rte_flow *flow,
 345                     const struct rte_flow_item *item,
 346                     const struct mlx4_flow_proc_item *proc,
 347                     struct rte_flow_error *error)
 348{
 349        const struct rte_flow_item_ipv4 *spec = item->spec;
 350        const struct rte_flow_item_ipv4 *mask =
 351                spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 352        struct ibv_flow_spec_ipv4 *ipv4;
 353        const char *msg;
 354
 355        if (mask &&
 356            ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
 357             (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
 358                msg = "mlx4 does not support matching partial IPv4 fields";
 359                goto error;
 360        }
 361        if (!flow->ibv_attr)
 362                return 0;
 363        ++flow->ibv_attr->num_of_specs;
 364        ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 365        *ipv4 = (struct ibv_flow_spec_ipv4) {
 366                .type = IBV_FLOW_SPEC_IPV4,
 367                .size = sizeof(*ipv4),
 368        };
 369        if (!spec)
 370                return 0;
 371        ipv4->val = (struct ibv_flow_ipv4_filter) {
 372                .src_ip = spec->hdr.src_addr,
 373                .dst_ip = spec->hdr.dst_addr,
 374        };
 375        ipv4->mask = (struct ibv_flow_ipv4_filter) {
 376                .src_ip = mask->hdr.src_addr,
 377                .dst_ip = mask->hdr.dst_addr,
 378        };
 379        /* Remove unwanted bits from values. */
 380        ipv4->val.src_ip &= ipv4->mask.src_ip;
 381        ipv4->val.dst_ip &= ipv4->mask.dst_ip;
 382        return 0;
 383error:
 384        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 385                                  item, msg);
 386}
 387
 388/**
 389 * Merge UDP pattern item into flow rule handle.
 390 *
 391 * Additional mlx4-specific constraints on supported fields:
 392 *
 393 * - No support for partial masks.
 394 * - Due to HW/FW limitation, flow rule priority is not taken into account
 395 *   when matching UDP destination ports, doing is therefore only supported
 396 *   at the highest priority level (0).
 397 *
 398 * @param[in, out] flow
 399 *   Flow rule handle to update.
 400 * @param[in] item
 401 *   Pattern item to merge.
 402 * @param[in] proc
 403 *   Associated item-processing object.
 404 * @param[out] error
 405 *   Perform verbose error reporting if not NULL.
 406 *
 407 * @return
 408 *   0 on success, a negative errno value otherwise and rte_errno is set.
 409 */
 410static int
 411mlx4_flow_merge_udp(struct rte_flow *flow,
 412                    const struct rte_flow_item *item,
 413                    const struct mlx4_flow_proc_item *proc,
 414                    struct rte_flow_error *error)
 415{
 416        const struct rte_flow_item_udp *spec = item->spec;
 417        const struct rte_flow_item_udp *mask =
 418                spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 419        struct ibv_flow_spec_tcp_udp *udp;
 420        const char *msg;
 421
 422        if (mask &&
 423            ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
 424             (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
 425                msg = "mlx4 does not support matching partial UDP fields";
 426                goto error;
 427        }
 428        if (mask && mask->hdr.dst_port && flow->priority) {
 429                msg = "combining UDP destination port matching with a nonzero"
 430                        " priority level is not supported";
 431                goto error;
 432        }
 433        if (!flow->ibv_attr)
 434                return 0;
 435        ++flow->ibv_attr->num_of_specs;
 436        udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 437        *udp = (struct ibv_flow_spec_tcp_udp) {
 438                .type = IBV_FLOW_SPEC_UDP,
 439                .size = sizeof(*udp),
 440        };
 441        if (!spec)
 442                return 0;
 443        udp->val.dst_port = spec->hdr.dst_port;
 444        udp->val.src_port = spec->hdr.src_port;
 445        udp->mask.dst_port = mask->hdr.dst_port;
 446        udp->mask.src_port = mask->hdr.src_port;
 447        /* Remove unwanted bits from values. */
 448        udp->val.src_port &= udp->mask.src_port;
 449        udp->val.dst_port &= udp->mask.dst_port;
 450        return 0;
 451error:
 452        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 453                                  item, msg);
 454}
 455
 456/**
 457 * Merge TCP pattern item into flow rule handle.
 458 *
 459 * Additional mlx4-specific constraints on supported fields:
 460 *
 461 * - No support for partial masks.
 462 *
 463 * @param[in, out] flow
 464 *   Flow rule handle to update.
 465 * @param[in] item
 466 *   Pattern item to merge.
 467 * @param[in] proc
 468 *   Associated item-processing object.
 469 * @param[out] error
 470 *   Perform verbose error reporting if not NULL.
 471 *
 472 * @return
 473 *   0 on success, a negative errno value otherwise and rte_errno is set.
 474 */
 475static int
 476mlx4_flow_merge_tcp(struct rte_flow *flow,
 477                    const struct rte_flow_item *item,
 478                    const struct mlx4_flow_proc_item *proc,
 479                    struct rte_flow_error *error)
 480{
 481        const struct rte_flow_item_tcp *spec = item->spec;
 482        const struct rte_flow_item_tcp *mask =
 483                spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
 484        struct ibv_flow_spec_tcp_udp *tcp;
 485        const char *msg;
 486
 487        if (mask &&
 488            ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
 489             (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
 490                msg = "mlx4 does not support matching partial TCP fields";
 491                goto error;
 492        }
 493        if (!flow->ibv_attr)
 494                return 0;
 495        ++flow->ibv_attr->num_of_specs;
 496        tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
 497        *tcp = (struct ibv_flow_spec_tcp_udp) {
 498                .type = IBV_FLOW_SPEC_TCP,
 499                .size = sizeof(*tcp),
 500        };
 501        if (!spec)
 502                return 0;
 503        tcp->val.dst_port = spec->hdr.dst_port;
 504        tcp->val.src_port = spec->hdr.src_port;
 505        tcp->mask.dst_port = mask->hdr.dst_port;
 506        tcp->mask.src_port = mask->hdr.src_port;
 507        /* Remove unwanted bits from values. */
 508        tcp->val.src_port &= tcp->mask.src_port;
 509        tcp->val.dst_port &= tcp->mask.dst_port;
 510        return 0;
 511error:
 512        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 513                                  item, msg);
 514}
 515
 516/**
 517 * Perform basic sanity checks on a pattern item.
 518 *
 519 * @param[in] item
 520 *   Item specification.
 521 * @param[in] proc
 522 *   Associated item-processing object.
 523 * @param[out] error
 524 *   Perform verbose error reporting if not NULL.
 525 *
 526 * @return
 527 *   0 on success, a negative errno value otherwise and rte_errno is set.
 528 */
 529static int
 530mlx4_flow_item_check(const struct rte_flow_item *item,
 531                     const struct mlx4_flow_proc_item *proc,
 532                     struct rte_flow_error *error)
 533{
 534        const uint8_t *mask;
 535        unsigned int i;
 536
 537        /* item->last and item->mask cannot exist without item->spec. */
 538        if (!item->spec && (item->mask || item->last))
 539                return rte_flow_error_set
 540                        (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
 541                         "\"mask\" or \"last\" field provided without a"
 542                         " corresponding \"spec\"");
 543        /* No spec, no mask, no problem. */
 544        if (!item->spec)
 545                return 0;
 546        mask = item->mask ?
 547                (const uint8_t *)item->mask :
 548                (const uint8_t *)proc->mask_default;
 549        MLX4_ASSERT(mask);
 550        /*
 551         * Single-pass check to make sure that:
 552         * - Mask is supported, no bits are set outside proc->mask_support.
 553         * - Both item->spec and item->last are included in mask.
 554         */
 555        for (i = 0; i != proc->mask_sz; ++i) {
 556                if (!mask[i])
 557                        continue;
 558                if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
 559                    ((const uint8_t *)proc->mask_support)[i])
 560                        return rte_flow_error_set
 561                                (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 562                                 item, "unsupported field found in \"mask\"");
 563                if (item->last &&
 564                    (((const uint8_t *)item->spec)[i] & mask[i]) !=
 565                    (((const uint8_t *)item->last)[i] & mask[i]))
 566                        return rte_flow_error_set
 567                                (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 568                                 item,
 569                                 "range between \"spec\" and \"last\""
 570                                 " is larger than \"mask\"");
 571        }
 572        return 0;
 573}
 574
 575/** Graph of supported items and associated actions. */
 576static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
 577        [RTE_FLOW_ITEM_TYPE_END] = {
 578                .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
 579        },
 580        [RTE_FLOW_ITEM_TYPE_ETH] = {
 581                .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
 582                                       RTE_FLOW_ITEM_TYPE_IPV4),
 583                .mask_support = &(const struct rte_flow_item_eth){
 584                        /* Only destination MAC can be matched. */
 585                        .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
 586                },
 587                .mask_default = &rte_flow_item_eth_mask,
 588                .mask_sz = sizeof(struct rte_flow_item_eth),
 589                .merge = mlx4_flow_merge_eth,
 590                .dst_sz = sizeof(struct ibv_flow_spec_eth),
 591        },
 592        [RTE_FLOW_ITEM_TYPE_VLAN] = {
 593                .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
 594                .mask_support = &(const struct rte_flow_item_vlan){
 595                        /* Only TCI VID matching is supported. */
 596                        .tci = RTE_BE16(0x0fff),
 597                },
 598                .mask_default = &rte_flow_item_vlan_mask,
 599                .mask_sz = sizeof(struct rte_flow_item_vlan),
 600                .merge = mlx4_flow_merge_vlan,
 601                .dst_sz = 0,
 602        },
 603        [RTE_FLOW_ITEM_TYPE_IPV4] = {
 604                .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
 605                                       RTE_FLOW_ITEM_TYPE_TCP),
 606                .mask_support = &(const struct rte_flow_item_ipv4){
 607                        .hdr = {
 608                                .src_addr = RTE_BE32(0xffffffff),
 609                                .dst_addr = RTE_BE32(0xffffffff),
 610                        },
 611                },
 612                .mask_default = &rte_flow_item_ipv4_mask,
 613                .mask_sz = sizeof(struct rte_flow_item_ipv4),
 614                .merge = mlx4_flow_merge_ipv4,
 615                .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
 616        },
 617        [RTE_FLOW_ITEM_TYPE_UDP] = {
 618                .mask_support = &(const struct rte_flow_item_udp){
 619                        .hdr = {
 620                                .src_port = RTE_BE16(0xffff),
 621                                .dst_port = RTE_BE16(0xffff),
 622                        },
 623                },
 624                .mask_default = &rte_flow_item_udp_mask,
 625                .mask_sz = sizeof(struct rte_flow_item_udp),
 626                .merge = mlx4_flow_merge_udp,
 627                .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 628        },
 629        [RTE_FLOW_ITEM_TYPE_TCP] = {
 630                .mask_support = &(const struct rte_flow_item_tcp){
 631                        .hdr = {
 632                                .src_port = RTE_BE16(0xffff),
 633                                .dst_port = RTE_BE16(0xffff),
 634                        },
 635                },
 636                .mask_default = &rte_flow_item_tcp_mask,
 637                .mask_sz = sizeof(struct rte_flow_item_tcp),
 638                .merge = mlx4_flow_merge_tcp,
 639                .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
 640        },
 641};
 642
 643/**
 644 * Make sure a flow rule is supported and initialize associated structure.
 645 *
 646 * @param priv
 647 *   Pointer to private structure.
 648 * @param[in] attr
 649 *   Flow rule attributes.
 650 * @param[in] pattern
 651 *   Pattern specification (list terminated by the END pattern item).
 652 * @param[in] actions
 653 *   Associated actions (list terminated by the END action).
 654 * @param[out] error
 655 *   Perform verbose error reporting if not NULL.
 656 * @param[in, out] addr
 657 *   Buffer where the resulting flow rule handle pointer must be stored.
 658 *   If NULL, stop processing after validation stage.
 659 *
 660 * @return
 661 *   0 on success, a negative errno value otherwise and rte_errno is set.
 662 */
 663static int
 664mlx4_flow_prepare(struct mlx4_priv *priv,
 665                  const struct rte_flow_attr *attr,
 666                  const struct rte_flow_item pattern[],
 667                  const struct rte_flow_action actions[],
 668                  struct rte_flow_error *error,
 669                  struct rte_flow **addr)
 670{
 671        const struct rte_flow_item *item;
 672        const struct rte_flow_action *action;
 673        const struct mlx4_flow_proc_item *proc;
 674        struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
 675        struct rte_flow *flow = &temp;
 676        const char *msg = NULL;
 677        int overlap;
 678
 679        if (attr->group)
 680                return rte_flow_error_set
 681                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
 682                         NULL, "groups are not supported");
 683        if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
 684                return rte_flow_error_set
 685                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
 686                         NULL, "maximum priority level is "
 687                         MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
 688        if (attr->egress)
 689                return rte_flow_error_set
 690                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
 691                         NULL, "egress is not supported");
 692        if (attr->transfer)
 693                return rte_flow_error_set
 694                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
 695                         NULL, "transfer is not supported");
 696        if (!attr->ingress)
 697                return rte_flow_error_set
 698                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
 699                         NULL, "only ingress is supported");
 700fill:
 701        overlap = 0;
 702        proc = mlx4_flow_proc_item_list;
 703        flow->priority = attr->priority;
 704        /* Go over pattern. */
 705        for (item = pattern; item->type; ++item) {
 706                const struct mlx4_flow_proc_item *next = NULL;
 707                unsigned int i;
 708                int err;
 709
 710                if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
 711                        continue;
 712                if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
 713                        flow->internal = 1;
 714                        continue;
 715                }
 716                if (flow->promisc || flow->allmulti) {
 717                        msg = "mlx4 does not support additional matching"
 718                                " criteria combined with indiscriminate"
 719                                " matching on Ethernet headers";
 720                        goto exit_item_not_supported;
 721                }
 722                for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
 723                        if (proc->next_item[i] == item->type) {
 724                                next = &mlx4_flow_proc_item_list[item->type];
 725                                break;
 726                        }
 727                }
 728                if (!next)
 729                        goto exit_item_not_supported;
 730                proc = next;
 731                /*
 732                 * Perform basic sanity checks only once, while handle is
 733                 * not allocated.
 734                 */
 735                if (flow == &temp) {
 736                        err = mlx4_flow_item_check(item, proc, error);
 737                        if (err)
 738                                return err;
 739                }
 740                if (proc->merge) {
 741                        err = proc->merge(flow, item, proc, error);
 742                        if (err)
 743                                return err;
 744                }
 745                flow->ibv_attr_size += proc->dst_sz;
 746        }
 747        /* Go over actions list. */
 748        for (action = actions; action->type; ++action) {
 749                /* This one may appear anywhere multiple times. */
 750                if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
 751                        continue;
 752                /* Fate-deciding actions may appear exactly once. */
 753                if (overlap) {
 754                        msg = "cannot combine several fate-deciding actions,"
 755                                " choose between DROP, QUEUE or RSS";
 756                        goto exit_action_not_supported;
 757                }
 758                overlap = 1;
 759                switch (action->type) {
 760                        const struct rte_flow_action_queue *queue;
 761                        const struct rte_flow_action_rss *rss;
 762                        const uint8_t *rss_key;
 763                        uint32_t rss_key_len;
 764                        uint64_t fields;
 765                        unsigned int i;
 766
 767                case RTE_FLOW_ACTION_TYPE_DROP:
 768                        flow->drop = 1;
 769                        break;
 770                case RTE_FLOW_ACTION_TYPE_QUEUE:
 771                        if (flow->rss)
 772                                break;
 773                        queue = action->conf;
 774                        if (queue->index >= ETH_DEV(priv)->data->nb_rx_queues) {
 775                                msg = "queue target index beyond number of"
 776                                        " configured Rx queues";
 777                                goto exit_action_not_supported;
 778                        }
 779                        flow->rss = mlx4_rss_get
 780                                (priv, 0, mlx4_rss_hash_key_default, 1,
 781                                 &queue->index);
 782                        if (!flow->rss) {
 783                                msg = "not enough resources for additional"
 784                                        " single-queue RSS context";
 785                                goto exit_action_not_supported;
 786                        }
 787                        break;
 788                case RTE_FLOW_ACTION_TYPE_RSS:
 789                        if (flow->rss)
 790                                break;
 791                        rss = action->conf;
 792                        /* Default RSS configuration if none is provided. */
 793                        if (rss->key_len) {
 794                                rss_key = rss->key ?
 795                                          rss->key : mlx4_rss_hash_key_default;
 796                                rss_key_len = rss->key_len;
 797                        } else {
 798                                rss_key = mlx4_rss_hash_key_default;
 799                                rss_key_len = MLX4_RSS_HASH_KEY_SIZE;
 800                        }
 801                        /* Sanity checks. */
 802                        for (i = 0; i < rss->queue_num; ++i)
 803                                if (rss->queue[i] >=
 804                                    ETH_DEV(priv)->data->nb_rx_queues)
 805                                        break;
 806                        if (i != rss->queue_num) {
 807                                msg = "queue index target beyond number of"
 808                                        " configured Rx queues";
 809                                goto exit_action_not_supported;
 810                        }
 811                        if (!rte_is_power_of_2(rss->queue_num)) {
 812                                msg = "for RSS, mlx4 requires the number of"
 813                                        " queues to be a power of two";
 814                                goto exit_action_not_supported;
 815                        }
 816                        if (rss_key_len != sizeof(flow->rss->key)) {
 817                                msg = "mlx4 supports exactly one RSS hash key"
 818                                        " length: "
 819                                        MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE);
 820                                goto exit_action_not_supported;
 821                        }
 822                        for (i = 1; i < rss->queue_num; ++i)
 823                                if (rss->queue[i] - rss->queue[i - 1] != 1)
 824                                        break;
 825                        if (i != rss->queue_num) {
 826                                msg = "mlx4 requires RSS contexts to use"
 827                                        " consecutive queue indices only";
 828                                goto exit_action_not_supported;
 829                        }
 830                        if (rss->queue[0] % rss->queue_num) {
 831                                msg = "mlx4 requires the first queue of a RSS"
 832                                        " context to be aligned on a multiple"
 833                                        " of the context size";
 834                                goto exit_action_not_supported;
 835                        }
 836                        if (rss->func &&
 837                            rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ) {
 838                                msg = "the only supported RSS hash function"
 839                                        " is Toeplitz";
 840                                goto exit_action_not_supported;
 841                        }
 842                        if (rss->level) {
 843                                msg = "a nonzero RSS encapsulation level is"
 844                                        " not supported";
 845                                goto exit_action_not_supported;
 846                        }
 847                        rte_errno = 0;
 848                        fields = mlx4_conv_rss_types(priv, rss->types, 0);
 849                        if (fields == (uint64_t)-1 && rte_errno) {
 850                                msg = "unsupported RSS hash type requested";
 851                                goto exit_action_not_supported;
 852                        }
 853                        flow->rss = mlx4_rss_get
 854                                (priv, fields, rss_key, rss->queue_num,
 855                                 rss->queue);
 856                        if (!flow->rss) {
 857                                msg = "either invalid parameters or not enough"
 858                                        " resources for additional multi-queue"
 859                                        " RSS context";
 860                                goto exit_action_not_supported;
 861                        }
 862                        break;
 863                default:
 864                        goto exit_action_not_supported;
 865                }
 866        }
 867        /* When fate is unknown, drop traffic. */
 868        if (!overlap)
 869                flow->drop = 1;
 870        /* Validation ends here. */
 871        if (!addr) {
 872                if (flow->rss)
 873                        mlx4_rss_put(flow->rss);
 874                return 0;
 875        }
 876        if (flow == &temp) {
 877                /* Allocate proper handle based on collected data. */
 878                const struct mlx4_malloc_vec vec[] = {
 879                        {
 880                                .align = alignof(struct rte_flow),
 881                                .size = sizeof(*flow),
 882                                .addr = (void **)&flow,
 883                        },
 884                        {
 885                                .align = alignof(struct ibv_flow_attr),
 886                                .size = temp.ibv_attr_size,
 887                                .addr = (void **)&temp.ibv_attr,
 888                        },
 889                };
 890
 891                if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) {
 892                        if (temp.rss)
 893                                mlx4_rss_put(temp.rss);
 894                        return rte_flow_error_set
 895                                (error, -rte_errno,
 896                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
 897                                 "flow rule handle allocation failure");
 898                }
 899                /* Most fields will be updated by second pass. */
 900                *flow = (struct rte_flow){
 901                        .ibv_attr = temp.ibv_attr,
 902                        .ibv_attr_size = sizeof(*flow->ibv_attr),
 903                        .rss = temp.rss,
 904                };
 905                *flow->ibv_attr = (struct ibv_flow_attr){
 906                        .type = IBV_FLOW_ATTR_NORMAL,
 907                        .size = sizeof(*flow->ibv_attr),
 908                        .priority = attr->priority,
 909                        .port = priv->port,
 910                };
 911                goto fill;
 912        }
 913        *addr = flow;
 914        return 0;
 915exit_item_not_supported:
 916        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
 917                                  item, msg ? msg : "item not supported");
 918exit_action_not_supported:
 919        return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
 920                                  action, msg ? msg : "action not supported");
 921}
 922
 923/**
 924 * Validate a flow supported by the NIC.
 925 *
 926 * @see rte_flow_validate()
 927 * @see rte_flow_ops
 928 */
 929static int
 930mlx4_flow_validate(struct rte_eth_dev *dev,
 931                   const struct rte_flow_attr *attr,
 932                   const struct rte_flow_item pattern[],
 933                   const struct rte_flow_action actions[],
 934                   struct rte_flow_error *error)
 935{
 936        struct mlx4_priv *priv = dev->data->dev_private;
 937
 938        return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
 939}
 940
 941/**
 942 * Get a drop flow rule resources instance.
 943 *
 944 * @param priv
 945 *   Pointer to private structure.
 946 *
 947 * @return
 948 *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
 949 *   is set.
 950 */
 951static struct mlx4_drop *
 952mlx4_drop_get(struct mlx4_priv *priv)
 953{
 954        struct mlx4_drop *drop = priv->drop;
 955
 956        if (drop) {
 957                MLX4_ASSERT(drop->refcnt);
 958                MLX4_ASSERT(drop->priv == priv);
 959                ++drop->refcnt;
 960                return drop;
 961        }
 962        drop = rte_malloc(__func__, sizeof(*drop), 0);
 963        if (!drop)
 964                goto error;
 965        *drop = (struct mlx4_drop){
 966                .priv = priv,
 967                .refcnt = 1,
 968        };
 969        drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
 970        if (!drop->cq)
 971                goto error;
 972        drop->qp = mlx4_glue->create_qp
 973                (priv->pd,
 974                 &(struct ibv_qp_init_attr){
 975                        .send_cq = drop->cq,
 976                        .recv_cq = drop->cq,
 977                        .qp_type = IBV_QPT_RAW_PACKET,
 978                 });
 979        if (!drop->qp)
 980                goto error;
 981        priv->drop = drop;
 982        return drop;
 983error:
 984        if (drop) {
 985                if (drop->qp)
 986                        claim_zero(mlx4_glue->destroy_qp(drop->qp));
 987                if (drop->cq)
 988                        claim_zero(mlx4_glue->destroy_cq(drop->cq));
 989                rte_free(drop);
 990        }
 991        rte_errno = ENOMEM;
 992        return NULL;
 993}
 994
 995/**
 996 * Give back a drop flow rule resources instance.
 997 *
 998 * @param drop
 999 *   Pointer to drop flow rule resources.
1000 */
1001static void
1002mlx4_drop_put(struct mlx4_drop *drop)
1003{
1004        MLX4_ASSERT(drop->refcnt);
1005        if (--drop->refcnt)
1006                return;
1007        drop->priv->drop = NULL;
1008        claim_zero(mlx4_glue->destroy_qp(drop->qp));
1009        claim_zero(mlx4_glue->destroy_cq(drop->cq));
1010        rte_free(drop);
1011}
1012
1013/**
1014 * Toggle a configured flow rule.
1015 *
1016 * @param priv
1017 *   Pointer to private structure.
1018 * @param flow
1019 *   Flow rule handle to toggle.
1020 * @param enable
1021 *   Whether associated Verbs flow must be created or removed.
1022 * @param[out] error
1023 *   Perform verbose error reporting if not NULL.
1024 *
1025 * @return
1026 *   0 on success, a negative errno value otherwise and rte_errno is set.
1027 */
1028static int
1029mlx4_flow_toggle(struct mlx4_priv *priv,
1030                 struct rte_flow *flow,
1031                 int enable,
1032                 struct rte_flow_error *error)
1033{
1034        struct ibv_qp *qp = NULL;
1035        const char *msg;
1036        int err;
1037
1038        if (!enable) {
1039                if (!flow->ibv_flow)
1040                        return 0;
1041                claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1042                flow->ibv_flow = NULL;
1043                if (flow->drop)
1044                        mlx4_drop_put(priv->drop);
1045                else if (flow->rss)
1046                        mlx4_rss_detach(flow->rss);
1047                return 0;
1048        }
1049        MLX4_ASSERT(flow->ibv_attr);
1050        if (!flow->internal &&
1051            !priv->isolated &&
1052            flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
1053                if (flow->ibv_flow) {
1054                        claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1055                        flow->ibv_flow = NULL;
1056                        if (flow->drop)
1057                                mlx4_drop_put(priv->drop);
1058                        else if (flow->rss)
1059                                mlx4_rss_detach(flow->rss);
1060                }
1061                err = EACCES;
1062                msg = ("priority level "
1063                       MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
1064                       " is reserved when not in isolated mode");
1065                goto error;
1066        }
1067        if (flow->rss) {
1068                struct mlx4_rss *rss = flow->rss;
1069                int missing = 0;
1070                unsigned int i;
1071
1072                /* Stop at the first nonexistent target queue. */
1073                for (i = 0; i != rss->queues; ++i)
1074                        if (rss->queue_id[i] >=
1075                            ETH_DEV(priv)->data->nb_rx_queues ||
1076                            !ETH_DEV(priv)->data->rx_queues[rss->queue_id[i]]) {
1077                                missing = 1;
1078                                break;
1079                        }
1080                if (flow->ibv_flow) {
1081                        if (missing ^ !flow->drop)
1082                                return 0;
1083                        /* Verbs flow needs updating. */
1084                        claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow));
1085                        flow->ibv_flow = NULL;
1086                        if (flow->drop)
1087                                mlx4_drop_put(priv->drop);
1088                        else
1089                                mlx4_rss_detach(rss);
1090                }
1091                if (!missing) {
1092                        err = mlx4_rss_attach(rss);
1093                        if (err) {
1094                                err = -err;
1095                                msg = "cannot create indirection table or hash"
1096                                        " QP to associate flow rule with";
1097                                goto error;
1098                        }
1099                        qp = rss->qp;
1100                }
1101                /* A missing target queue drops traffic implicitly. */
1102                flow->drop = missing;
1103        }
1104        if (flow->drop) {
1105                if (flow->ibv_flow)
1106                        return 0;
1107                mlx4_drop_get(priv);
1108                if (!priv->drop) {
1109                        err = rte_errno;
1110                        msg = "resources for drop flow rule cannot be created";
1111                        goto error;
1112                }
1113                qp = priv->drop->qp;
1114        }
1115        MLX4_ASSERT(qp);
1116        if (flow->ibv_flow)
1117                return 0;
1118        flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr);
1119        if (flow->ibv_flow)
1120                return 0;
1121        if (flow->drop)
1122                mlx4_drop_put(priv->drop);
1123        else if (flow->rss)
1124                mlx4_rss_detach(flow->rss);
1125        err = errno;
1126        msg = "flow rule rejected by device";
1127error:
1128        return rte_flow_error_set
1129                (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
1130}
1131
1132/**
1133 * Create a flow.
1134 *
1135 * @see rte_flow_create()
1136 * @see rte_flow_ops
1137 */
1138static struct rte_flow *
1139mlx4_flow_create(struct rte_eth_dev *dev,
1140                 const struct rte_flow_attr *attr,
1141                 const struct rte_flow_item pattern[],
1142                 const struct rte_flow_action actions[],
1143                 struct rte_flow_error *error)
1144{
1145        struct mlx4_priv *priv = dev->data->dev_private;
1146        struct rte_flow *flow;
1147        int err;
1148
1149        err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
1150        if (err)
1151                return NULL;
1152        err = mlx4_flow_toggle(priv, flow, priv->started, error);
1153        if (!err) {
1154                struct rte_flow *curr = LIST_FIRST(&priv->flows);
1155
1156                /* New rules are inserted after internal ones. */
1157                if (!curr || !curr->internal) {
1158                        LIST_INSERT_HEAD(&priv->flows, flow, next);
1159                } else {
1160                        while (LIST_NEXT(curr, next) &&
1161                               LIST_NEXT(curr, next)->internal)
1162                                curr = LIST_NEXT(curr, next);
1163                        LIST_INSERT_AFTER(curr, flow, next);
1164                }
1165                return flow;
1166        }
1167        if (flow->rss)
1168                mlx4_rss_put(flow->rss);
1169        rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
1170                           error->message);
1171        rte_free(flow);
1172        return NULL;
1173}
1174
1175/**
1176 * Configure isolated mode.
1177 *
1178 * @see rte_flow_isolate()
1179 * @see rte_flow_ops
1180 */
1181static int
1182mlx4_flow_isolate(struct rte_eth_dev *dev,
1183                  int enable,
1184                  struct rte_flow_error *error)
1185{
1186        struct mlx4_priv *priv = dev->data->dev_private;
1187
1188        if (!!enable == !!priv->isolated)
1189                return 0;
1190        priv->isolated = !!enable;
1191        if (mlx4_flow_sync(priv, error)) {
1192                priv->isolated = !enable;
1193                return -rte_errno;
1194        }
1195        return 0;
1196}
1197
1198/**
1199 * Destroy a flow rule.
1200 *
1201 * @see rte_flow_destroy()
1202 * @see rte_flow_ops
1203 */
1204static int
1205mlx4_flow_destroy(struct rte_eth_dev *dev,
1206                  struct rte_flow *flow,
1207                  struct rte_flow_error *error)
1208{
1209        struct mlx4_priv *priv = dev->data->dev_private;
1210        int err = mlx4_flow_toggle(priv, flow, 0, error);
1211
1212        if (err)
1213                return err;
1214        LIST_REMOVE(flow, next);
1215        if (flow->rss)
1216                mlx4_rss_put(flow->rss);
1217        rte_free(flow);
1218        return 0;
1219}
1220
1221/**
1222 * Destroy user-configured flow rules.
1223 *
1224 * This function skips internal flows rules.
1225 *
1226 * @see rte_flow_flush()
1227 * @see rte_flow_ops
1228 */
1229static int
1230mlx4_flow_flush(struct rte_eth_dev *dev,
1231                struct rte_flow_error *error)
1232{
1233        struct mlx4_priv *priv = dev->data->dev_private;
1234        struct rte_flow *flow = LIST_FIRST(&priv->flows);
1235
1236        while (flow) {
1237                struct rte_flow *next = LIST_NEXT(flow, next);
1238
1239                if (!flow->internal)
1240                        mlx4_flow_destroy(dev, flow, error);
1241                flow = next;
1242        }
1243        return 0;
1244}
1245
1246/**
1247 * Helper function to determine the next configured VLAN filter.
1248 *
1249 * @param priv
1250 *   Pointer to private structure.
1251 * @param vlan
1252 *   VLAN ID to use as a starting point.
1253 *
1254 * @return
1255 *   Next configured VLAN ID or a high value (>= 4096) if there is none.
1256 */
1257static uint16_t
1258mlx4_flow_internal_next_vlan(struct mlx4_priv *priv, uint16_t vlan)
1259{
1260        while (vlan < 4096) {
1261                if (ETH_DEV(priv)->data->vlan_filter_conf.ids[vlan / 64] &
1262                    (UINT64_C(1) << (vlan % 64)))
1263                        return vlan;
1264                ++vlan;
1265        }
1266        return vlan;
1267}
1268
1269/**
1270 * Generate internal flow rules.
1271 *
1272 * Various flow rules are created depending on the mode the device is in:
1273 *
1274 * 1. Promiscuous:
1275 *       port MAC + broadcast + catch-all (VLAN filtering is ignored).
1276 * 2. All multicast:
1277 *       port MAC/VLAN + broadcast + catch-all multicast.
1278 * 3. Otherwise:
1279 *       port MAC/VLAN + broadcast MAC/VLAN.
1280 *
1281 * About MAC flow rules:
1282 *
1283 * - MAC flow rules are generated from @p dev->data->mac_addrs
1284 *   (@p priv->mac array).
1285 * - An additional flow rule for Ethernet broadcasts is also generated.
1286 * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER
1287 *   is enabled and VLAN filters are configured.
1288 *
1289 * @param priv
1290 *   Pointer to private structure.
1291 * @param[out] error
1292 *   Perform verbose error reporting if not NULL.
1293 *
1294 * @return
1295 *   0 on success, a negative errno value otherwise and rte_errno is set.
1296 */
1297static int
1298mlx4_flow_internal(struct mlx4_priv *priv, struct rte_flow_error *error)
1299{
1300        struct rte_flow_attr attr = {
1301                .priority = MLX4_FLOW_PRIORITY_LAST,
1302                .ingress = 1,
1303        };
1304        struct rte_flow_item_eth eth_spec;
1305        const struct rte_flow_item_eth eth_mask = {
1306                .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1307        };
1308        const struct rte_flow_item_eth eth_allmulti = {
1309                .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
1310        };
1311        struct rte_flow_item_vlan vlan_spec;
1312        const struct rte_flow_item_vlan vlan_mask = {
1313                .tci = RTE_BE16(0x0fff),
1314        };
1315        struct rte_flow_item pattern[] = {
1316                {
1317                        .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1318                },
1319                {
1320                        .type = RTE_FLOW_ITEM_TYPE_ETH,
1321                        .spec = &eth_spec,
1322                        .mask = &eth_mask,
1323                },
1324                {
1325                        /* Replaced with VLAN if filtering is enabled. */
1326                        .type = RTE_FLOW_ITEM_TYPE_END,
1327                },
1328                {
1329                        .type = RTE_FLOW_ITEM_TYPE_END,
1330                },
1331        };
1332        /*
1333         * Round number of queues down to their previous power of 2 to
1334         * comply with RSS context limitations. Extra queues silently do not
1335         * get RSS by default.
1336         */
1337        uint32_t queues =
1338                rte_align32pow2(ETH_DEV(priv)->data->nb_rx_queues + 1) >> 1;
1339        uint16_t queue[queues];
1340        struct rte_flow_action_rss action_rss = {
1341                .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1342                .level = 0,
1343                .types = 0,
1344                .key_len = MLX4_RSS_HASH_KEY_SIZE,
1345                .queue_num = queues,
1346                .key = mlx4_rss_hash_key_default,
1347                .queue = queue,
1348        };
1349        struct rte_flow_action actions[] = {
1350                {
1351                        .type = RTE_FLOW_ACTION_TYPE_RSS,
1352                        .conf = &action_rss,
1353                },
1354                {
1355                        .type = RTE_FLOW_ACTION_TYPE_END,
1356                },
1357        };
1358        struct rte_ether_addr *rule_mac = &eth_spec.dst;
1359        rte_be16_t *rule_vlan =
1360                (ETH_DEV(priv)->data->dev_conf.rxmode.offloads &
1361                 DEV_RX_OFFLOAD_VLAN_FILTER) &&
1362                !ETH_DEV(priv)->data->promiscuous ?
1363                &vlan_spec.tci :
1364                NULL;
1365        uint16_t vlan = 0;
1366        struct rte_flow *flow;
1367        unsigned int i;
1368        int err = 0;
1369
1370        /* Nothing to be done if there are no Rx queues. */
1371        if (!queues)
1372                goto error;
1373        /* Prepare default RSS configuration. */
1374        for (i = 0; i != queues; ++i)
1375                queue[i] = i;
1376        /*
1377         * Set up VLAN item if filtering is enabled and at least one VLAN
1378         * filter is configured.
1379         */
1380        if (rule_vlan) {
1381                vlan = mlx4_flow_internal_next_vlan(priv, 0);
1382                if (vlan < 4096) {
1383                        pattern[2] = (struct rte_flow_item){
1384                                .type = RTE_FLOW_ITEM_TYPE_VLAN,
1385                                .spec = &vlan_spec,
1386                                .mask = &vlan_mask,
1387                        };
1388next_vlan:
1389                        *rule_vlan = rte_cpu_to_be_16(vlan);
1390                } else {
1391                        rule_vlan = NULL;
1392                }
1393        }
1394        for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) {
1395                const struct rte_ether_addr *mac;
1396
1397                /* Broadcasts are handled by an extra iteration. */
1398                if (i < RTE_DIM(priv->mac))
1399                        mac = &priv->mac[i];
1400                else
1401                        mac = &eth_mask.dst;
1402                if (rte_is_zero_ether_addr(mac))
1403                        continue;
1404                /* Check if MAC flow rule is already present. */
1405                for (flow = LIST_FIRST(&priv->flows);
1406                     flow && flow->internal;
1407                     flow = LIST_NEXT(flow, next)) {
1408                        const struct ibv_flow_spec_eth *eth =
1409                                (const void *)((uintptr_t)flow->ibv_attr +
1410                                               sizeof(*flow->ibv_attr));
1411                        unsigned int j;
1412
1413                        if (!flow->mac)
1414                                continue;
1415                        MLX4_ASSERT(flow->ibv_attr->type ==
1416                                    IBV_FLOW_ATTR_NORMAL);
1417                        MLX4_ASSERT(flow->ibv_attr->num_of_specs == 1);
1418                        MLX4_ASSERT(eth->type == IBV_FLOW_SPEC_ETH);
1419                        MLX4_ASSERT(flow->rss);
1420                        if (rule_vlan &&
1421                            (eth->val.vlan_tag != *rule_vlan ||
1422                             eth->mask.vlan_tag != RTE_BE16(0x0fff)))
1423                                continue;
1424                        if (!rule_vlan && eth->mask.vlan_tag)
1425                                continue;
1426                        for (j = 0; j != sizeof(mac->addr_bytes); ++j)
1427                                if (eth->val.dst_mac[j] != mac->addr_bytes[j] ||
1428                                    eth->mask.dst_mac[j] != UINT8_C(0xff) ||
1429                                    eth->val.src_mac[j] != UINT8_C(0x00) ||
1430                                    eth->mask.src_mac[j] != UINT8_C(0x00))
1431                                        break;
1432                        if (j != sizeof(mac->addr_bytes))
1433                                continue;
1434                        if (flow->rss->queues != queues ||
1435                            memcmp(flow->rss->queue_id, action_rss.queue,
1436                                   queues * sizeof(flow->rss->queue_id[0])))
1437                                continue;
1438                        break;
1439                }
1440                if (!flow || !flow->internal) {
1441                        /* Not found, create a new flow rule. */
1442                        memcpy(rule_mac, mac, sizeof(*mac));
1443                        flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
1444                                                actions, error);
1445                        if (!flow) {
1446                                err = -rte_errno;
1447                                goto error;
1448                        }
1449                }
1450                flow->select = 1;
1451                flow->mac = 1;
1452        }
1453        if (rule_vlan) {
1454                vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1);
1455                if (vlan < 4096)
1456                        goto next_vlan;
1457        }
1458        /* Take care of promiscuous and all multicast flow rules. */
1459        if (ETH_DEV(priv)->data->promiscuous ||
1460            ETH_DEV(priv)->data->all_multicast) {
1461                for (flow = LIST_FIRST(&priv->flows);
1462                     flow && flow->internal;
1463                     flow = LIST_NEXT(flow, next)) {
1464                        if (ETH_DEV(priv)->data->promiscuous) {
1465                                if (flow->promisc)
1466                                        break;
1467                        } else {
1468                                MLX4_ASSERT(ETH_DEV(priv)->data->all_multicast);
1469                                if (flow->allmulti)
1470                                        break;
1471                        }
1472                }
1473                if (flow && flow->internal) {
1474                        MLX4_ASSERT(flow->rss);
1475                        if (flow->rss->queues != queues ||
1476                            memcmp(flow->rss->queue_id, action_rss.queue,
1477                                   queues * sizeof(flow->rss->queue_id[0])))
1478                                flow = NULL;
1479                }
1480                if (!flow || !flow->internal) {
1481                        /* Not found, create a new flow rule. */
1482                        if (ETH_DEV(priv)->data->promiscuous) {
1483                                pattern[1].spec = NULL;
1484                                pattern[1].mask = NULL;
1485                        } else {
1486                                MLX4_ASSERT(ETH_DEV(priv)->data->all_multicast);
1487                                pattern[1].spec = &eth_allmulti;
1488                                pattern[1].mask = &eth_allmulti;
1489                        }
1490                        pattern[2] = pattern[3];
1491                        flow = mlx4_flow_create(ETH_DEV(priv), &attr, pattern,
1492                                                actions, error);
1493                        if (!flow) {
1494                                err = -rte_errno;
1495                                goto error;
1496                        }
1497                }
1498                MLX4_ASSERT(flow->promisc || flow->allmulti);
1499                flow->select = 1;
1500        }
1501error:
1502        /* Clear selection and clean up stale internal flow rules. */
1503        flow = LIST_FIRST(&priv->flows);
1504        while (flow && flow->internal) {
1505                struct rte_flow *next = LIST_NEXT(flow, next);
1506
1507                if (!flow->select)
1508                        claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
1509                                                     error));
1510                else
1511                        flow->select = 0;
1512                flow = next;
1513        }
1514        return err;
1515}
1516
1517/**
1518 * Synchronize flow rules.
1519 *
1520 * This function synchronizes flow rules with the state of the device by
1521 * taking into account isolated mode and whether target queues are
1522 * configured.
1523 *
1524 * @param priv
1525 *   Pointer to private structure.
1526 * @param[out] error
1527 *   Perform verbose error reporting if not NULL.
1528 *
1529 * @return
1530 *   0 on success, a negative errno value otherwise and rte_errno is set.
1531 */
1532int
1533mlx4_flow_sync(struct mlx4_priv *priv, struct rte_flow_error *error)
1534{
1535        struct rte_flow *flow;
1536        int ret;
1537
1538        /* Internal flow rules are guaranteed to come first in the list. */
1539        if (priv->isolated) {
1540                /*
1541                 * Get rid of them in isolated mode, stop at the first
1542                 * non-internal rule found.
1543                 */
1544                for (flow = LIST_FIRST(&priv->flows);
1545                     flow && flow->internal;
1546                     flow = LIST_FIRST(&priv->flows))
1547                        claim_zero(mlx4_flow_destroy(ETH_DEV(priv), flow,
1548                                                     error));
1549        } else {
1550                /* Refresh internal rules. */
1551                ret = mlx4_flow_internal(priv, error);
1552                if (ret)
1553                        return ret;
1554        }
1555        /* Toggle the remaining flow rules . */
1556        LIST_FOREACH(flow, &priv->flows, next) {
1557                ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1558                if (ret)
1559                        return ret;
1560        }
1561        if (!priv->started)
1562                MLX4_ASSERT(!priv->drop);
1563        return 0;
1564}
1565
1566/**
1567 * Clean up all flow rules.
1568 *
1569 * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1570 * rules regardless of whether they are internal or user-configured.
1571 *
1572 * @param priv
1573 *   Pointer to private structure.
1574 */
1575void
1576mlx4_flow_clean(struct mlx4_priv *priv)
1577{
1578        struct rte_flow *flow;
1579
1580        while ((flow = LIST_FIRST(&priv->flows)))
1581                mlx4_flow_destroy(ETH_DEV(priv), flow, NULL);
1582        MLX4_ASSERT(LIST_EMPTY(&priv->rss));
1583}
1584
1585static const struct rte_flow_ops mlx4_flow_ops = {
1586        .validate = mlx4_flow_validate,
1587        .create = mlx4_flow_create,
1588        .destroy = mlx4_flow_destroy,
1589        .flush = mlx4_flow_flush,
1590        .isolate = mlx4_flow_isolate,
1591};
1592
1593/**
1594 * Get rte_flow callbacks.
1595 *
1596 * @param dev
1597 *   Pointer to Ethernet device structure.
1598 * @param ops
1599 *   Pointer to operation-specific structure.
1600 *
1601 * @return 0
1602 */
1603int
1604mlx4_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
1605                  const struct rte_flow_ops **ops)
1606{
1607        *ops = &mlx4_flow_ops;
1608        return 0;
1609}
1610