dpdk/drivers/net/mlx4/mlx4_rxq.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6/**
   7 * @file
   8 * Rx queues configuration for mlx4 driver.
   9 */
  10
  11#include <errno.h>
  12#include <stddef.h>
  13#include <stdint.h>
  14#include <string.h>
  15
  16/* Verbs headers do not support -pedantic. */
  17#ifdef PEDANTIC
  18#pragma GCC diagnostic ignored "-Wpedantic"
  19#endif
  20#include <infiniband/mlx4dv.h>
  21#include <infiniband/verbs.h>
  22#ifdef PEDANTIC
  23#pragma GCC diagnostic error "-Wpedantic"
  24#endif
  25
  26#include <rte_byteorder.h>
  27#include <rte_common.h>
  28#include <rte_errno.h>
  29#include <ethdev_driver.h>
  30#include <rte_flow.h>
  31#include <rte_malloc.h>
  32#include <rte_mbuf.h>
  33#include <rte_mempool.h>
  34
  35#include "mlx4.h"
  36#include "mlx4_glue.h"
  37#include "mlx4_flow.h"
  38#include "mlx4_rxtx.h"
  39#include "mlx4_utils.h"
  40
  41/**
  42 * Historical RSS hash key.
  43 *
  44 * This used to be the default for mlx4 in Linux before v3.19 switched to
  45 * generating random hash keys through netdev_rss_key_fill().
  46 *
  47 * It is used in this PMD for consistency with past DPDK releases but can
  48 * now be overridden through user configuration.
  49 *
  50 * Note: this is not const to work around API quirks.
  51 */
  52uint8_t
  53mlx4_rss_hash_key_default[MLX4_RSS_HASH_KEY_SIZE] = {
  54        0x2c, 0xc6, 0x81, 0xd1,
  55        0x5b, 0xdb, 0xf4, 0xf7,
  56        0xfc, 0xa2, 0x83, 0x19,
  57        0xdb, 0x1a, 0x3e, 0x94,
  58        0x6b, 0x9e, 0x38, 0xd9,
  59        0x2c, 0x9c, 0x03, 0xd1,
  60        0xad, 0x99, 0x44, 0xa7,
  61        0xd9, 0x56, 0x3d, 0x59,
  62        0x06, 0x3c, 0x25, 0xf3,
  63        0xfc, 0x1f, 0xdc, 0x2a,
  64};
  65
  66/**
  67 * Obtain a RSS context with specified properties.
  68 *
  69 * Used when creating a flow rule targeting one or several Rx queues.
  70 *
  71 * If a matching RSS context already exists, it is returned with its
  72 * reference count incremented.
  73 *
  74 * @param priv
  75 *   Pointer to private structure.
  76 * @param fields
  77 *   Fields for RSS processing (Verbs format).
  78 * @param[in] key
  79 *   Hash key to use (whose size is exactly MLX4_RSS_HASH_KEY_SIZE).
  80 * @param queues
  81 *   Number of target queues.
  82 * @param[in] queue_id
  83 *   Target queues.
  84 *
  85 * @return
  86 *   Pointer to RSS context on success, NULL otherwise and rte_errno is set.
  87 */
  88struct mlx4_rss *
  89mlx4_rss_get(struct mlx4_priv *priv, uint64_t fields,
  90             const uint8_t key[MLX4_RSS_HASH_KEY_SIZE],
  91             uint16_t queues, const uint16_t queue_id[])
  92{
  93        struct mlx4_rss *rss;
  94        size_t queue_id_size = sizeof(queue_id[0]) * queues;
  95
  96        LIST_FOREACH(rss, &priv->rss, next)
  97                if (fields == rss->fields &&
  98                    queues == rss->queues &&
  99                    !memcmp(key, rss->key, MLX4_RSS_HASH_KEY_SIZE) &&
 100                    !memcmp(queue_id, rss->queue_id, queue_id_size)) {
 101                        ++rss->refcnt;
 102                        return rss;
 103                }
 104        rss = rte_malloc(__func__, offsetof(struct mlx4_rss, queue_id) +
 105                         queue_id_size, 0);
 106        if (!rss)
 107                goto error;
 108        *rss = (struct mlx4_rss){
 109                .priv = priv,
 110                .refcnt = 1,
 111                .usecnt = 0,
 112                .qp = NULL,
 113                .ind = NULL,
 114                .fields = fields,
 115                .queues = queues,
 116        };
 117        memcpy(rss->key, key, MLX4_RSS_HASH_KEY_SIZE);
 118        memcpy(rss->queue_id, queue_id, queue_id_size);
 119        LIST_INSERT_HEAD(&priv->rss, rss, next);
 120        return rss;
 121error:
 122        rte_errno = ENOMEM;
 123        return NULL;
 124}
 125
 126/**
 127 * Release a RSS context instance.
 128 *
 129 * Used when destroying a flow rule targeting one or several Rx queues.
 130 *
 131 * This function decrements the reference count of the context and destroys
 132 * it after reaching 0. The context must have no users at this point; all
 133 * prior calls to mlx4_rss_attach() must have been followed by matching
 134 * calls to mlx4_rss_detach().
 135 *
 136 * @param rss
 137 *   RSS context to release.
 138 */
 139void
 140mlx4_rss_put(struct mlx4_rss *rss)
 141{
 142        MLX4_ASSERT(rss->refcnt);
 143        if (--rss->refcnt)
 144                return;
 145        MLX4_ASSERT(!rss->usecnt);
 146        MLX4_ASSERT(!rss->qp);
 147        MLX4_ASSERT(!rss->ind);
 148        LIST_REMOVE(rss, next);
 149        rte_free(rss);
 150}
 151
 152/**
 153 * Attach a user to a RSS context instance.
 154 *
 155 * Used when the RSS QP and indirection table objects must be instantiated,
 156 * that is, when a flow rule must be enabled.
 157 *
 158 * This function increments the usage count of the context.
 159 *
 160 * @param rss
 161 *   RSS context to attach to.
 162 *
 163 * @return
 164 *   0 on success, a negative errno value otherwise and rte_errno is set.
 165 */
 166int
 167mlx4_rss_attach(struct mlx4_rss *rss)
 168{
 169        MLX4_ASSERT(rss->refcnt);
 170        if (rss->usecnt++) {
 171                MLX4_ASSERT(rss->qp);
 172                MLX4_ASSERT(rss->ind);
 173                return 0;
 174        }
 175
 176        struct ibv_wq *ind_tbl[rss->queues];
 177        struct mlx4_priv *priv = rss->priv;
 178        struct rte_eth_dev *dev = ETH_DEV(priv);
 179        const char *msg;
 180        unsigned int i = 0;
 181        int ret;
 182
 183        if (!rte_is_power_of_2(RTE_DIM(ind_tbl))) {
 184                ret = EINVAL;
 185                msg = "number of RSS queues must be a power of two";
 186                goto error;
 187        }
 188        for (i = 0; i != RTE_DIM(ind_tbl); ++i) {
 189                uint16_t id = rss->queue_id[i];
 190                struct rxq *rxq = NULL;
 191
 192                if (id < dev->data->nb_rx_queues)
 193                        rxq = dev->data->rx_queues[id];
 194                if (!rxq) {
 195                        ret = EINVAL;
 196                        msg = "RSS target queue is not configured";
 197                        goto error;
 198                }
 199                ret = mlx4_rxq_attach(rxq);
 200                if (ret) {
 201                        ret = -ret;
 202                        msg = "unable to attach RSS target queue";
 203                        goto error;
 204                }
 205                ind_tbl[i] = rxq->wq;
 206        }
 207        rss->ind = mlx4_glue->create_rwq_ind_table
 208                (priv->ctx,
 209                 &(struct ibv_rwq_ind_table_init_attr){
 210                        .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)),
 211                        .ind_tbl = ind_tbl,
 212                        .comp_mask = 0,
 213                 });
 214        if (!rss->ind) {
 215                ret = errno ? errno : EINVAL;
 216                msg = "RSS indirection table creation failure";
 217                goto error;
 218        }
 219        rss->qp = mlx4_glue->create_qp_ex
 220                (priv->ctx,
 221                 &(struct ibv_qp_init_attr_ex){
 222                        .comp_mask = (IBV_QP_INIT_ATTR_PD |
 223                                      IBV_QP_INIT_ATTR_RX_HASH |
 224                                      IBV_QP_INIT_ATTR_IND_TABLE),
 225                        .qp_type = IBV_QPT_RAW_PACKET,
 226                        .pd = priv->pd,
 227                        .rwq_ind_tbl = rss->ind,
 228                        .rx_hash_conf = {
 229                                .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
 230                                .rx_hash_key_len = MLX4_RSS_HASH_KEY_SIZE,
 231                                .rx_hash_key = rss->key,
 232                                .rx_hash_fields_mask = rss->fields,
 233                        },
 234                 });
 235        if (!rss->qp) {
 236                ret = errno ? errno : EINVAL;
 237                msg = "RSS hash QP creation failure";
 238                goto error;
 239        }
 240        ret = mlx4_glue->modify_qp
 241                (rss->qp,
 242                 &(struct ibv_qp_attr){
 243                        .qp_state = IBV_QPS_INIT,
 244                        .port_num = priv->port,
 245                 },
 246                 IBV_QP_STATE | IBV_QP_PORT);
 247        if (ret) {
 248                msg = "failed to switch RSS hash QP to INIT state";
 249                goto error;
 250        }
 251        ret = mlx4_glue->modify_qp
 252                (rss->qp,
 253                 &(struct ibv_qp_attr){
 254                        .qp_state = IBV_QPS_RTR,
 255                 },
 256                 IBV_QP_STATE);
 257        if (ret) {
 258                msg = "failed to switch RSS hash QP to RTR state";
 259                goto error;
 260        }
 261        return 0;
 262error:
 263        if (rss->qp) {
 264                claim_zero(mlx4_glue->destroy_qp(rss->qp));
 265                rss->qp = NULL;
 266        }
 267        if (rss->ind) {
 268                claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
 269                rss->ind = NULL;
 270        }
 271        while (i--)
 272                mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
 273        ERROR("mlx4: %s", msg);
 274        --rss->usecnt;
 275        rte_errno = ret;
 276        return -ret;
 277}
 278
 279/**
 280 * Detach a user from a RSS context instance.
 281 *
 282 * Used when disabling (not destroying) a flow rule.
 283 *
 284 * This function decrements the usage count of the context and destroys
 285 * usage resources after reaching 0.
 286 *
 287 * @param rss
 288 *   RSS context to detach from.
 289 */
 290void
 291mlx4_rss_detach(struct mlx4_rss *rss)
 292{
 293        struct mlx4_priv *priv = rss->priv;
 294        struct rte_eth_dev *dev = ETH_DEV(priv);
 295        unsigned int i;
 296
 297        MLX4_ASSERT(rss->refcnt);
 298        MLX4_ASSERT(rss->qp);
 299        MLX4_ASSERT(rss->ind);
 300        if (--rss->usecnt)
 301                return;
 302        claim_zero(mlx4_glue->destroy_qp(rss->qp));
 303        rss->qp = NULL;
 304        claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind));
 305        rss->ind = NULL;
 306        for (i = 0; i != rss->queues; ++i)
 307                mlx4_rxq_detach(dev->data->rx_queues[rss->queue_id[i]]);
 308}
 309
 310/**
 311 * Initialize common RSS context resources.
 312 *
 313 * Because ConnectX-3 hardware limitations require a fixed order in the
 314 * indirection table, WQs must be allocated sequentially to be part of a
 315 * common RSS context.
 316 *
 317 * Since a newly created WQ cannot be moved to a different context, this
 318 * function allocates them all at once, one for each configured Rx queue,
 319 * as well as all related resources (CQs and mbufs).
 320 *
 321 * This must therefore be done before creating any Rx flow rules relying on
 322 * indirection tables.
 323 *
 324 * @param priv
 325 *   Pointer to private structure.
 326 *
 327 * @return
 328 *   0 on success, a negative errno value otherwise and rte_errno is set.
 329 */
 330int
 331mlx4_rss_init(struct mlx4_priv *priv)
 332{
 333        struct rte_eth_dev *dev = ETH_DEV(priv);
 334        uint8_t log2_range = rte_log2_u32(dev->data->nb_rx_queues);
 335        uint32_t wq_num_prev = 0;
 336        const char *msg;
 337        unsigned int i;
 338        int ret;
 339
 340        if (priv->rss_init)
 341                return 0;
 342        if (ETH_DEV(priv)->data->nb_rx_queues > priv->hw_rss_max_qps) {
 343                ERROR("RSS does not support more than %d queues",
 344                      priv->hw_rss_max_qps);
 345                rte_errno = EINVAL;
 346                return -rte_errno;
 347        }
 348        /* Prepare range for RSS contexts before creating the first WQ. */
 349        ret = mlx4_glue->dv_set_context_attr
 350                (priv->ctx,
 351                 MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ,
 352                 &log2_range);
 353        if (ret) {
 354                ERROR("cannot set up range size for RSS context to %u"
 355                      " (for %u Rx queues), error: %s",
 356                      1 << log2_range, dev->data->nb_rx_queues, strerror(ret));
 357                rte_errno = ret;
 358                return -ret;
 359        }
 360        for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
 361                struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
 362                struct ibv_cq *cq;
 363                struct ibv_wq *wq;
 364                uint32_t wq_num;
 365
 366                /* Attach the configured Rx queues. */
 367                if (rxq) {
 368                        MLX4_ASSERT(!rxq->usecnt);
 369                        ret = mlx4_rxq_attach(rxq);
 370                        if (!ret) {
 371                                wq_num = rxq->wq->wq_num;
 372                                goto wq_num_check;
 373                        }
 374                        ret = -ret;
 375                        msg = "unable to create Rx queue resources";
 376                        goto error;
 377                }
 378                /*
 379                 * WQs are temporarily allocated for unconfigured Rx queues
 380                 * to maintain proper index alignment in indirection table
 381                 * by skipping unused WQ numbers.
 382                 *
 383                 * The reason this works at all even though these WQs are
 384                 * immediately destroyed is that WQNs are allocated
 385                 * sequentially and are guaranteed to never be reused in the
 386                 * same context by the underlying implementation.
 387                 */
 388                cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0);
 389                if (!cq) {
 390                        ret = ENOMEM;
 391                        msg = "placeholder CQ creation failure";
 392                        goto error;
 393                }
 394                wq = mlx4_glue->create_wq
 395                        (priv->ctx,
 396                         &(struct ibv_wq_init_attr){
 397                                .wq_type = IBV_WQT_RQ,
 398                                .max_wr = 1,
 399                                .max_sge = 1,
 400                                .pd = priv->pd,
 401                                .cq = cq,
 402                         });
 403                if (wq) {
 404                        wq_num = wq->wq_num;
 405                        claim_zero(mlx4_glue->destroy_wq(wq));
 406                } else {
 407                        wq_num = 0; /* Shut up GCC 4.8 warnings. */
 408                }
 409                claim_zero(mlx4_glue->destroy_cq(cq));
 410                if (!wq) {
 411                        ret = ENOMEM;
 412                        msg = "placeholder WQ creation failure";
 413                        goto error;
 414                }
 415wq_num_check:
 416                /*
 417                 * While guaranteed by the implementation, make sure WQ
 418                 * numbers are really sequential (as the saying goes,
 419                 * trust, but verify).
 420                 */
 421                if (i && wq_num - wq_num_prev != 1) {
 422                        if (rxq)
 423                                mlx4_rxq_detach(rxq);
 424                        ret = ERANGE;
 425                        msg = "WQ numbers are not sequential";
 426                        goto error;
 427                }
 428                wq_num_prev = wq_num;
 429        }
 430        priv->rss_init = 1;
 431        return 0;
 432error:
 433        ERROR("cannot initialize common RSS resources (queue %u): %s: %s",
 434              i, msg, strerror(ret));
 435        while (i--) {
 436                struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
 437
 438                if (rxq)
 439                        mlx4_rxq_detach(rxq);
 440        }
 441        rte_errno = ret;
 442        return -ret;
 443}
 444
 445/**
 446 * Release common RSS context resources.
 447 *
 448 * As the reverse of mlx4_rss_init(), this must be done after removing all
 449 * flow rules relying on indirection tables.
 450 *
 451 * @param priv
 452 *   Pointer to private structure.
 453 */
 454void
 455mlx4_rss_deinit(struct mlx4_priv *priv)
 456{
 457        unsigned int i;
 458
 459        if (!priv->rss_init)
 460                return;
 461        for (i = 0; i != ETH_DEV(priv)->data->nb_rx_queues; ++i) {
 462                struct rxq *rxq = ETH_DEV(priv)->data->rx_queues[i];
 463
 464                if (rxq) {
 465                        MLX4_ASSERT(rxq->usecnt == 1);
 466                        mlx4_rxq_detach(rxq);
 467                }
 468        }
 469        priv->rss_init = 0;
 470}
 471
 472/**
 473 * Attach a user to a Rx queue.
 474 *
 475 * Used when the resources of an Rx queue must be instantiated for it to
 476 * become in a usable state.
 477 *
 478 * This function increments the usage count of the Rx queue.
 479 *
 480 * @param rxq
 481 *   Pointer to Rx queue structure.
 482 *
 483 * @return
 484 *   0 on success, negative errno value otherwise and rte_errno is set.
 485 */
 486int
 487mlx4_rxq_attach(struct rxq *rxq)
 488{
 489        if (rxq->usecnt++) {
 490                MLX4_ASSERT(rxq->cq);
 491                MLX4_ASSERT(rxq->wq);
 492                MLX4_ASSERT(rxq->wqes);
 493                MLX4_ASSERT(rxq->rq_db);
 494                return 0;
 495        }
 496
 497        struct mlx4_priv *priv = rxq->priv;
 498        struct rte_eth_dev *dev = ETH_DEV(priv);
 499        const uint32_t elts_n = 1 << rxq->elts_n;
 500        const uint32_t sges_n = 1 << rxq->sges_n;
 501        struct rte_mbuf *(*elts)[elts_n] = rxq->elts;
 502        struct mlx4dv_obj mlxdv;
 503        struct mlx4dv_rwq dv_rwq;
 504        struct mlx4dv_cq dv_cq = { .comp_mask = MLX4DV_CQ_MASK_UAR, };
 505        const char *msg;
 506        struct ibv_cq *cq = NULL;
 507        struct ibv_wq *wq = NULL;
 508        uint32_t create_flags = 0;
 509        uint32_t comp_mask = 0;
 510        volatile struct mlx4_wqe_data_seg (*wqes)[];
 511        unsigned int i;
 512        int ret;
 513
 514        MLX4_ASSERT(rte_is_power_of_2(elts_n));
 515        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_RX_QUEUE;
 516        priv->verbs_alloc_ctx.obj = rxq;
 517        cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL,
 518                                  rxq->channel, 0);
 519        if (!cq) {
 520                ret = ENOMEM;
 521                msg = "CQ creation failure";
 522                goto error;
 523        }
 524        /* By default, FCS (CRC) is stripped by hardware. */
 525        if (rxq->crc_present) {
 526                create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
 527                comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
 528        }
 529        wq = mlx4_glue->create_wq
 530                (priv->ctx,
 531                 &(struct ibv_wq_init_attr){
 532                        .wq_type = IBV_WQT_RQ,
 533                        .max_wr = elts_n / sges_n,
 534                        .max_sge = sges_n,
 535                        .pd = priv->pd,
 536                        .cq = cq,
 537                        .comp_mask = comp_mask,
 538                        .create_flags = create_flags,
 539                 });
 540        if (!wq) {
 541                ret = errno ? errno : EINVAL;
 542                msg = "WQ creation failure";
 543                goto error;
 544        }
 545        ret = mlx4_glue->modify_wq
 546                (wq,
 547                 &(struct ibv_wq_attr){
 548                        .attr_mask = IBV_WQ_ATTR_STATE,
 549                        .wq_state = IBV_WQS_RDY,
 550                 });
 551        if (ret) {
 552                msg = "WQ state change to IBV_WQS_RDY failed";
 553                goto error;
 554        }
 555        /* Retrieve device queue information. */
 556        mlxdv.cq.in = cq;
 557        mlxdv.cq.out = &dv_cq;
 558        mlxdv.rwq.in = wq;
 559        mlxdv.rwq.out = &dv_rwq;
 560        ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ);
 561        if (ret) {
 562                msg = "failed to obtain device information from WQ/CQ objects";
 563                goto error;
 564        }
 565        /* Pre-register Rx mempool. */
 566        DEBUG("port %u Rx queue %u registering mp %s having %u chunks",
 567              ETH_DEV(priv)->data->port_id, rxq->stats.idx,
 568              rxq->mp->name, rxq->mp->nb_mem_chunks);
 569        mlx4_mr_update_mp(dev, &rxq->mr_ctrl, rxq->mp);
 570        wqes = (volatile struct mlx4_wqe_data_seg (*)[])
 571                ((uintptr_t)dv_rwq.buf.buf + dv_rwq.rq.offset);
 572        for (i = 0; i != RTE_DIM(*elts); ++i) {
 573                volatile struct mlx4_wqe_data_seg *scat = &(*wqes)[i];
 574                struct rte_mbuf *buf = rte_pktmbuf_alloc(rxq->mp);
 575
 576                if (buf == NULL) {
 577                        while (i--) {
 578                                rte_pktmbuf_free_seg((*elts)[i]);
 579                                (*elts)[i] = NULL;
 580                        }
 581                        ret = ENOMEM;
 582                        msg = "cannot allocate mbuf";
 583                        goto error;
 584                }
 585                /* Headroom is reserved by rte_pktmbuf_alloc(). */
 586                MLX4_ASSERT(buf->data_off == RTE_PKTMBUF_HEADROOM);
 587                /* Buffer is supposed to be empty. */
 588                MLX4_ASSERT(rte_pktmbuf_data_len(buf) == 0);
 589                MLX4_ASSERT(rte_pktmbuf_pkt_len(buf) == 0);
 590                /* Only the first segment keeps headroom. */
 591                if (i % sges_n)
 592                        buf->data_off = 0;
 593                buf->port = rxq->port_id;
 594                buf->data_len = rte_pktmbuf_tailroom(buf);
 595                buf->pkt_len = rte_pktmbuf_tailroom(buf);
 596                buf->nb_segs = 1;
 597                *scat = (struct mlx4_wqe_data_seg){
 598                        .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
 599                                                                  uintptr_t)),
 600                        .byte_count = rte_cpu_to_be_32(buf->data_len),
 601                        .lkey = mlx4_rx_mb2mr(rxq, buf),
 602                };
 603                (*elts)[i] = buf;
 604        }
 605        DEBUG("%p: allocated and configured %u segments (max %u packets)",
 606              (void *)rxq, elts_n, elts_n / sges_n);
 607        rxq->cq = cq;
 608        rxq->wq = wq;
 609        rxq->wqes = wqes;
 610        rxq->rq_db = dv_rwq.rdb;
 611        rxq->mcq.buf = dv_cq.buf.buf;
 612        rxq->mcq.cqe_cnt = dv_cq.cqe_cnt;
 613        rxq->mcq.set_ci_db = dv_cq.set_ci_db;
 614        rxq->mcq.cqe_64 = (dv_cq.cqe_size & 64) ? 1 : 0;
 615        rxq->mcq.arm_db = dv_cq.arm_db;
 616        rxq->mcq.arm_sn = dv_cq.arm_sn;
 617        rxq->mcq.cqn = dv_cq.cqn;
 618        rxq->mcq.cq_uar = dv_cq.cq_uar;
 619        rxq->mcq.cq_db_reg = (uint8_t *)dv_cq.cq_uar + MLX4_CQ_DOORBELL;
 620        /* Update doorbell counter. */
 621        rxq->rq_ci = elts_n / sges_n;
 622        rte_wmb();
 623        *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
 624        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
 625        return 0;
 626error:
 627        if (wq)
 628                claim_zero(mlx4_glue->destroy_wq(wq));
 629        if (cq)
 630                claim_zero(mlx4_glue->destroy_cq(cq));
 631        --rxq->usecnt;
 632        rte_errno = ret;
 633        ERROR("error while attaching Rx queue %p: %s: %s",
 634              (void *)rxq, msg, strerror(ret));
 635        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
 636        return -ret;
 637}
 638
 639/**
 640 * Detach a user from a Rx queue.
 641 *
 642 * This function decrements the usage count of the Rx queue and destroys
 643 * usage resources after reaching 0.
 644 *
 645 * @param rxq
 646 *   Pointer to Rx queue structure.
 647 */
 648void
 649mlx4_rxq_detach(struct rxq *rxq)
 650{
 651        unsigned int i;
 652        struct rte_mbuf *(*elts)[1 << rxq->elts_n] = rxq->elts;
 653
 654        if (--rxq->usecnt)
 655                return;
 656        rxq->rq_ci = 0;
 657        memset(&rxq->mcq, 0, sizeof(rxq->mcq));
 658        rxq->rq_db = NULL;
 659        rxq->wqes = NULL;
 660        claim_zero(mlx4_glue->destroy_wq(rxq->wq));
 661        rxq->wq = NULL;
 662        claim_zero(mlx4_glue->destroy_cq(rxq->cq));
 663        rxq->cq = NULL;
 664        DEBUG("%p: freeing Rx queue elements", (void *)rxq);
 665        for (i = 0; (i != RTE_DIM(*elts)); ++i) {
 666                if (!(*elts)[i])
 667                        continue;
 668                rte_pktmbuf_free_seg((*elts)[i]);
 669                (*elts)[i] = NULL;
 670        }
 671}
 672
 673/**
 674 * Returns the per-queue supported offloads.
 675 *
 676 * @param priv
 677 *   Pointer to private structure.
 678 *
 679 * @return
 680 *   Supported Tx offloads.
 681 */
 682uint64_t
 683mlx4_get_rx_queue_offloads(struct mlx4_priv *priv)
 684{
 685        uint64_t offloads = RTE_ETH_RX_OFFLOAD_SCATTER |
 686                            RTE_ETH_RX_OFFLOAD_KEEP_CRC |
 687                            RTE_ETH_RX_OFFLOAD_RSS_HASH;
 688
 689        if (priv->hw_csum)
 690                offloads |= RTE_ETH_RX_OFFLOAD_CHECKSUM;
 691        return offloads;
 692}
 693
 694/**
 695 * Returns the per-port supported offloads.
 696 *
 697 * @param priv
 698 *   Pointer to private structure.
 699 *
 700 * @return
 701 *   Supported Rx offloads.
 702 */
 703uint64_t
 704mlx4_get_rx_port_offloads(struct mlx4_priv *priv)
 705{
 706        uint64_t offloads = RTE_ETH_RX_OFFLOAD_VLAN_FILTER;
 707
 708        (void)priv;
 709        return offloads;
 710}
 711
 712/**
 713 * DPDK callback to configure a Rx queue.
 714 *
 715 * @param dev
 716 *   Pointer to Ethernet device structure.
 717 * @param idx
 718 *   Rx queue index.
 719 * @param desc
 720 *   Number of descriptors to configure in queue.
 721 * @param socket
 722 *   NUMA socket on which memory must be allocated.
 723 * @param[in] conf
 724 *   Thresholds parameters.
 725 * @param mp
 726 *   Memory pool for buffer allocations.
 727 *
 728 * @return
 729 *   0 on success, negative errno value otherwise and rte_errno is set.
 730 */
 731int
 732mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 733                    unsigned int socket, const struct rte_eth_rxconf *conf,
 734                    struct rte_mempool *mp)
 735{
 736        struct mlx4_priv *priv = dev->data->dev_private;
 737        uint32_t mb_len = rte_pktmbuf_data_room_size(mp);
 738        struct rte_mbuf *(*elts)[rte_align32pow2(desc)];
 739        struct rxq *rxq;
 740        struct mlx4_malloc_vec vec[] = {
 741                {
 742                        .align = RTE_CACHE_LINE_SIZE,
 743                        .size = sizeof(*rxq),
 744                        .addr = (void **)&rxq,
 745                },
 746                {
 747                        .align = RTE_CACHE_LINE_SIZE,
 748                        .size = sizeof(*elts),
 749                        .addr = (void **)&elts,
 750                },
 751        };
 752        int ret;
 753        uint32_t crc_present;
 754        uint64_t offloads;
 755        uint32_t max_rx_pktlen;
 756
 757        offloads = conf->offloads | dev->data->dev_conf.rxmode.offloads;
 758
 759        DEBUG("%p: configuring queue %u for %u descriptors",
 760              (void *)dev, idx, desc);
 761
 762        if (idx >= dev->data->nb_rx_queues) {
 763                rte_errno = EOVERFLOW;
 764                ERROR("%p: queue index out of range (%u >= %u)",
 765                      (void *)dev, idx, dev->data->nb_rx_queues);
 766                return -rte_errno;
 767        }
 768        rxq = dev->data->rx_queues[idx];
 769        if (rxq) {
 770                rte_errno = EEXIST;
 771                ERROR("%p: Rx queue %u already configured, release it first",
 772                      (void *)dev, idx);
 773                return -rte_errno;
 774        }
 775        if (!desc) {
 776                rte_errno = EINVAL;
 777                ERROR("%p: invalid number of Rx descriptors", (void *)dev);
 778                return -rte_errno;
 779        }
 780        if (desc != RTE_DIM(*elts)) {
 781                desc = RTE_DIM(*elts);
 782                WARN("%p: increased number of descriptors in Rx queue %u"
 783                     " to the next power of two (%u)",
 784                     (void *)dev, idx, desc);
 785        }
 786        /* By default, FCS (CRC) is stripped by hardware. */
 787        crc_present = 0;
 788        if (offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) {
 789                if (priv->hw_fcs_strip) {
 790                        crc_present = 1;
 791                } else {
 792                        WARN("%p: CRC stripping has been disabled but will still"
 793                             " be performed by hardware, make sure MLNX_OFED and"
 794                             " firmware are up to date",
 795                             (void *)dev);
 796                }
 797        }
 798        DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
 799              " incoming frames to hide it",
 800              (void *)dev,
 801              crc_present ? "disabled" : "enabled",
 802              crc_present << 2);
 803        /* Allocate and initialize Rx queue. */
 804        mlx4_zmallocv_socket("RXQ", vec, RTE_DIM(vec), socket);
 805        if (!rxq) {
 806                ERROR("%p: unable to allocate queue index %u",
 807                      (void *)dev, idx);
 808                return -rte_errno;
 809        }
 810        *rxq = (struct rxq){
 811                .priv = priv,
 812                .mp = mp,
 813                .port_id = dev->data->port_id,
 814                .sges_n = 0,
 815                .elts_n = rte_log2_u32(desc),
 816                .elts = elts,
 817                /* Toggle Rx checksum offload if hardware supports it. */
 818                .csum = priv->hw_csum &&
 819                        (offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM),
 820                .csum_l2tun = priv->hw_csum_l2tun &&
 821                              (offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM),
 822                .crc_present = crc_present,
 823                .l2tun_offload = priv->hw_csum_l2tun,
 824                .stats = {
 825                        .idx = idx,
 826                },
 827                .socket = socket,
 828        };
 829        dev->data->rx_queues[idx] = rxq;
 830        /* Enable scattered packets support for this queue if necessary. */
 831        MLX4_ASSERT(mb_len >= RTE_PKTMBUF_HEADROOM);
 832        max_rx_pktlen = dev->data->mtu + RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN;
 833        if (max_rx_pktlen <= (mb_len - RTE_PKTMBUF_HEADROOM)) {
 834                ;
 835        } else if (offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
 836                uint32_t size = RTE_PKTMBUF_HEADROOM + max_rx_pktlen;
 837                uint32_t sges_n;
 838
 839                /*
 840                 * Determine the number of SGEs needed for a full packet
 841                 * and round it to the next power of two.
 842                 */
 843                sges_n = rte_log2_u32((size / mb_len) + !!(size % mb_len));
 844                rxq->sges_n = sges_n;
 845                /* Make sure sges_n did not overflow. */
 846                size = mb_len * (1 << rxq->sges_n);
 847                size -= RTE_PKTMBUF_HEADROOM;
 848                if (size < max_rx_pktlen) {
 849                        rte_errno = EOVERFLOW;
 850                        ERROR("%p: too many SGEs (%u) needed to handle"
 851                              " requested maximum packet size %u",
 852                              (void *)dev,
 853                              1 << sges_n, max_rx_pktlen);
 854                        goto error;
 855                }
 856        } else {
 857                WARN("%p: the requested maximum Rx packet size (%u) is"
 858                     " larger than a single mbuf (%u) and scattered"
 859                     " mode has not been requested",
 860                     (void *)dev, max_rx_pktlen,
 861                     mb_len - RTE_PKTMBUF_HEADROOM);
 862        }
 863        DEBUG("%p: maximum number of segments per packet: %u",
 864              (void *)dev, 1 << rxq->sges_n);
 865        if (desc % (1 << rxq->sges_n)) {
 866                rte_errno = EINVAL;
 867                ERROR("%p: number of Rx queue descriptors (%u) is not a"
 868                      " multiple of maximum segments per packet (%u)",
 869                      (void *)dev,
 870                      desc,
 871                      1 << rxq->sges_n);
 872                goto error;
 873        }
 874        if (mlx4_mr_btree_init(&rxq->mr_ctrl.cache_bh,
 875                               MLX4_MR_BTREE_CACHE_N, socket)) {
 876                /* rte_errno is already set. */
 877                goto error;
 878        }
 879        if (dev->data->dev_conf.intr_conf.rxq) {
 880                rxq->channel = mlx4_glue->create_comp_channel(priv->ctx);
 881                if (rxq->channel == NULL) {
 882                        rte_errno = ENOMEM;
 883                        ERROR("%p: Rx interrupt completion channel creation"
 884                              " failure: %s",
 885                              (void *)dev, strerror(rte_errno));
 886                        goto error;
 887                }
 888                if (mlx4_fd_set_non_blocking(rxq->channel->fd) < 0) {
 889                        ERROR("%p: unable to make Rx interrupt completion"
 890                              " channel non-blocking: %s",
 891                              (void *)dev, strerror(rte_errno));
 892                        goto error;
 893                }
 894        }
 895        DEBUG("%p: adding Rx queue %p to list", (void *)dev, (void *)rxq);
 896        return 0;
 897error:
 898        ret = rte_errno;
 899        mlx4_rx_queue_release(dev, idx);
 900        rte_errno = ret;
 901        MLX4_ASSERT(rte_errno > 0);
 902        return -rte_errno;
 903}
 904
 905/**
 906 * DPDK callback to release a Rx queue.
 907 *
 908 * @param dev
 909 *   Pointer to Ethernet device structure.
 910 * @param idx
 911 *   Receive queue index.
 912 */
 913void
 914mlx4_rx_queue_release(struct rte_eth_dev *dev, uint16_t idx)
 915{
 916        struct rxq *rxq = dev->data->rx_queues[idx];
 917
 918        if (rxq == NULL)
 919                return;
 920        dev->data->rx_queues[idx] = NULL;
 921        DEBUG("%p: removing Rx queue %hu from list", (void *)dev, idx);
 922        MLX4_ASSERT(!rxq->cq);
 923        MLX4_ASSERT(!rxq->wq);
 924        MLX4_ASSERT(!rxq->wqes);
 925        MLX4_ASSERT(!rxq->rq_db);
 926        if (rxq->channel)
 927                claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel));
 928        mlx4_mr_btree_free(&rxq->mr_ctrl.cache_bh);
 929        rte_free(rxq);
 930}
 931