dpdk/drivers/net/mlx4/mlx4_txq.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2017 6WIND S.A.
   3 * Copyright 2017 Mellanox Technologies, Ltd
   4 */
   5
   6/**
   7 * @file
   8 * Tx queues configuration for mlx4 driver.
   9 */
  10
  11#include <errno.h>
  12#include <stddef.h>
  13#include <stdint.h>
  14#include <string.h>
  15#include <sys/mman.h>
  16#include <inttypes.h>
  17#include <unistd.h>
  18
  19/* Verbs headers do not support -pedantic. */
  20#ifdef PEDANTIC
  21#pragma GCC diagnostic ignored "-Wpedantic"
  22#endif
  23#include <infiniband/verbs.h>
  24#ifdef PEDANTIC
  25#pragma GCC diagnostic error "-Wpedantic"
  26#endif
  27
  28#include <rte_common.h>
  29#include <rte_errno.h>
  30#include <ethdev_driver.h>
  31#include <rte_malloc.h>
  32#include <rte_mbuf.h>
  33#include <rte_mempool.h>
  34
  35#include "mlx4.h"
  36#include "mlx4_glue.h"
  37#include "mlx4_prm.h"
  38#include "mlx4_rxtx.h"
  39#include "mlx4_utils.h"
  40
  41/**
  42 * Initialize Tx UAR registers for primary process.
  43 *
  44 * @param txq
  45 *   Pointer to Tx queue structure.
  46 */
  47static void
  48txq_uar_init(struct txq *txq)
  49{
  50        struct mlx4_priv *priv = txq->priv;
  51        struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
  52
  53        MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
  54        MLX4_ASSERT(ppriv);
  55        ppriv->uar_table[txq->stats.idx] = txq->msq.db;
  56}
  57
  58#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
  59/**
  60 * Remap UAR register of a Tx queue for secondary process.
  61 *
  62 * Remapped address is stored at the table in the process private structure of
  63 * the device, indexed by queue index.
  64 *
  65 * @param txq
  66 *   Pointer to Tx queue structure.
  67 * @param fd
  68 *   Verbs file descriptor to map UAR pages.
  69 *
  70 * @return
  71 *   0 on success, a negative errno value otherwise and rte_errno is set.
  72 */
  73static int
  74txq_uar_init_secondary(struct txq *txq, int fd)
  75{
  76        struct mlx4_priv *priv = txq->priv;
  77        struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(priv));
  78        void *addr;
  79        uintptr_t uar_va;
  80        uintptr_t offset;
  81        const size_t page_size = sysconf(_SC_PAGESIZE);
  82
  83        MLX4_ASSERT(ppriv);
  84        /*
  85         * As rdma-core, UARs are mapped in size of OS page
  86         * size. Ref to libmlx4 function: mlx4_init_context()
  87         */
  88        uar_va = (uintptr_t)txq->msq.db;
  89        offset = uar_va & (page_size - 1); /* Offset in page. */
  90        addr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, fd,
  91                        txq->msq.uar_mmap_offset);
  92        if (addr == MAP_FAILED) {
  93                ERROR("port %u mmap failed for BF reg of txq %u",
  94                      txq->port_id, txq->stats.idx);
  95                rte_errno = ENXIO;
  96                return -rte_errno;
  97        }
  98        addr = RTE_PTR_ADD(addr, offset);
  99        ppriv->uar_table[txq->stats.idx] = addr;
 100        return 0;
 101}
 102
 103/**
 104 * Unmap UAR register of a Tx queue for secondary process.
 105 *
 106 * @param txq
 107 *   Pointer to Tx queue structure.
 108 */
 109static void
 110txq_uar_uninit_secondary(struct txq *txq)
 111{
 112        struct mlx4_proc_priv *ppriv = MLX4_PROC_PRIV(PORT_ID(txq->priv));
 113        const size_t page_size = sysconf(_SC_PAGESIZE);
 114        void *addr;
 115
 116        addr = ppriv->uar_table[txq->stats.idx];
 117        munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
 118}
 119
 120/**
 121 * Initialize Tx UAR registers for secondary process.
 122 *
 123 * @param dev
 124 *   Pointer to Ethernet device.
 125 * @param fd
 126 *   Verbs file descriptor to map UAR pages.
 127 *
 128 * @return
 129 *   0 on success, a negative errno value otherwise and rte_errno is set.
 130 */
 131int
 132mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd)
 133{
 134        const unsigned int txqs_n = dev->data->nb_tx_queues;
 135        struct txq *txq;
 136        unsigned int i;
 137        int ret;
 138
 139        MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);
 140        for (i = 0; i != txqs_n; ++i) {
 141                txq = dev->data->tx_queues[i];
 142                if (!txq)
 143                        continue;
 144                MLX4_ASSERT(txq->stats.idx == (uint16_t)i);
 145                ret = txq_uar_init_secondary(txq, fd);
 146                if (ret)
 147                        goto error;
 148        }
 149        return 0;
 150error:
 151        /* Rollback. */
 152        do {
 153                txq = dev->data->tx_queues[i];
 154                if (!txq)
 155                        continue;
 156                txq_uar_uninit_secondary(txq);
 157        } while (i--);
 158        return -rte_errno;
 159}
 160
 161void
 162mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev)
 163{
 164        struct mlx4_proc_priv *ppriv =
 165                        (struct mlx4_proc_priv *)dev->process_private;
 166        const size_t page_size = sysconf(_SC_PAGESIZE);
 167        void *addr;
 168        size_t i;
 169
 170        if (page_size == (size_t)-1) {
 171                ERROR("Failed to get mem page size");
 172                return;
 173        }
 174        for (i = 0; i < ppriv->uar_table_sz; i++) {
 175                addr = ppriv->uar_table[i];
 176                if (addr)
 177                        munmap(RTE_PTR_ALIGN_FLOOR(addr, page_size), page_size);
 178        }
 179}
 180
 181#else
 182int
 183mlx4_tx_uar_init_secondary(struct rte_eth_dev *dev __rte_unused,
 184                           int fd __rte_unused)
 185{
 186        MLX4_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);
 187        ERROR("UAR remap is not supported");
 188        rte_errno = ENOTSUP;
 189        return -rte_errno;
 190}
 191
 192void
 193mlx4_tx_uar_uninit_secondary(struct rte_eth_dev *dev __rte_unused)
 194{
 195        assert(rte_eal_process_type() == RTE_PROC_SECONDARY);
 196        ERROR("UAR remap is not supported");
 197}
 198#endif
 199
 200/**
 201 * Free Tx queue elements.
 202 *
 203 * @param txq
 204 *   Pointer to Tx queue structure.
 205 */
 206static void
 207mlx4_txq_free_elts(struct txq *txq)
 208{
 209        struct txq_elt (*elts)[txq->elts_n] = txq->elts;
 210        unsigned int n = txq->elts_n;
 211
 212        DEBUG("%p: freeing WRs, %u", (void *)txq, n);
 213        while (n--) {
 214                struct txq_elt *elt = &(*elts)[n];
 215
 216                if (elt->buf) {
 217                        rte_pktmbuf_free(elt->buf);
 218                        elt->buf = NULL;
 219                        elt->wqe = NULL;
 220                }
 221        }
 222        txq->elts_tail = txq->elts_head;
 223}
 224
 225/**
 226 * Retrieves information needed in order to directly access the Tx queue.
 227 *
 228 * @param txq
 229 *   Pointer to Tx queue structure.
 230 * @param mlxdv
 231 *   Pointer to device information for this Tx queue.
 232 */
 233static void
 234mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv)
 235{
 236        struct mlx4_sq *sq = &txq->msq;
 237        struct mlx4_cq *cq = &txq->mcq;
 238        struct mlx4dv_qp *dqp = mlxdv->qp.out;
 239        struct mlx4dv_cq *dcq = mlxdv->cq.out;
 240
 241        /* Total length, including headroom and spare WQEs. */
 242        sq->size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset;
 243        sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset;
 244        sq->eob = sq->buf + sq->size;
 245        uint32_t headroom_size = 2048 + (1 << dqp->sq.wqe_shift);
 246        /* Continuous headroom size bytes must always stay freed. */
 247        sq->remain_size = sq->size - headroom_size;
 248        sq->owner_opcode = MLX4_OPCODE_SEND | (0u << MLX4_SQ_OWNER_BIT);
 249        sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL |
 250                                     (0u << MLX4_SQ_OWNER_BIT));
 251#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 252        sq->uar_mmap_offset = dqp->uar_mmap_offset;
 253#else
 254        sq->uar_mmap_offset = -1; /* Make mmap() fail. */
 255#endif
 256        sq->db = dqp->sdb;
 257        sq->doorbell_qpn = dqp->doorbell_qpn;
 258        cq->buf = dcq->buf.buf;
 259        cq->cqe_cnt = dcq->cqe_cnt;
 260        cq->set_ci_db = dcq->set_ci_db;
 261        cq->cqe_64 = (dcq->cqe_size & 64) ? 1 : 0;
 262}
 263
 264/**
 265 * Returns the per-port supported offloads.
 266 *
 267 * @param priv
 268 *   Pointer to private structure.
 269 *
 270 * @return
 271 *   Supported Tx offloads.
 272 */
 273uint64_t
 274mlx4_get_tx_port_offloads(struct mlx4_priv *priv)
 275{
 276        uint64_t offloads = DEV_TX_OFFLOAD_MULTI_SEGS;
 277
 278        if (priv->hw_csum) {
 279                offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM |
 280                             DEV_TX_OFFLOAD_UDP_CKSUM |
 281                             DEV_TX_OFFLOAD_TCP_CKSUM);
 282        }
 283        if (priv->tso)
 284                offloads |= DEV_TX_OFFLOAD_TCP_TSO;
 285        if (priv->hw_csum_l2tun) {
 286                offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
 287                if (priv->tso)
 288                        offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
 289                                     DEV_TX_OFFLOAD_GRE_TNL_TSO);
 290        }
 291        return offloads;
 292}
 293
 294/**
 295 * DPDK callback to configure a Tx queue.
 296 *
 297 * @param dev
 298 *   Pointer to Ethernet device structure.
 299 * @param idx
 300 *   Tx queue index.
 301 * @param desc
 302 *   Number of descriptors to configure in queue.
 303 * @param socket
 304 *   NUMA socket on which memory must be allocated.
 305 * @param[in] conf
 306 *   Thresholds parameters.
 307 *
 308 * @return
 309 *   0 on success, negative errno value otherwise and rte_errno is set.
 310 */
 311int
 312mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 313                    unsigned int socket, const struct rte_eth_txconf *conf)
 314{
 315        struct mlx4_priv *priv = dev->data->dev_private;
 316        struct mlx4dv_obj mlxdv;
 317        struct mlx4dv_qp dv_qp;
 318        struct mlx4dv_cq dv_cq;
 319        struct txq_elt (*elts)[rte_align32pow2(desc)];
 320        struct ibv_qp_init_attr qp_init_attr;
 321        struct txq *txq;
 322        uint8_t *bounce_buf;
 323        struct mlx4_malloc_vec vec[] = {
 324                {
 325                        .align = RTE_CACHE_LINE_SIZE,
 326                        .size = sizeof(*txq),
 327                        .addr = (void **)&txq,
 328                },
 329                {
 330                        .align = RTE_CACHE_LINE_SIZE,
 331                        .size = sizeof(*elts),
 332                        .addr = (void **)&elts,
 333                },
 334                {
 335                        .align = RTE_CACHE_LINE_SIZE,
 336                        .size = MLX4_MAX_WQE_SIZE,
 337                        .addr = (void **)&bounce_buf,
 338                },
 339        };
 340        int ret;
 341        uint64_t offloads;
 342
 343        offloads = conf->offloads | dev->data->dev_conf.txmode.offloads;
 344        DEBUG("%p: configuring queue %u for %u descriptors",
 345              (void *)dev, idx, desc);
 346        if (idx >= dev->data->nb_tx_queues) {
 347                rte_errno = EOVERFLOW;
 348                ERROR("%p: queue index out of range (%u >= %u)",
 349                      (void *)dev, idx, dev->data->nb_tx_queues);
 350                return -rte_errno;
 351        }
 352        txq = dev->data->tx_queues[idx];
 353        if (txq) {
 354                rte_errno = EEXIST;
 355                DEBUG("%p: Tx queue %u already configured, release it first",
 356                      (void *)dev, idx);
 357                return -rte_errno;
 358        }
 359        if (!desc) {
 360                rte_errno = EINVAL;
 361                ERROR("%p: invalid number of Tx descriptors", (void *)dev);
 362                return -rte_errno;
 363        }
 364        if (desc != RTE_DIM(*elts)) {
 365                desc = RTE_DIM(*elts);
 366                WARN("%p: increased number of descriptors in Tx queue %u"
 367                     " to the next power of two (%u)",
 368                     (void *)dev, idx, desc);
 369        }
 370        /* Allocate and initialize Tx queue. */
 371        mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket);
 372        if (!txq) {
 373                ERROR("%p: unable to allocate queue index %u",
 374                      (void *)dev, idx);
 375                return -rte_errno;
 376        }
 377        *txq = (struct txq){
 378                .priv = priv,
 379                .port_id = dev->data->port_id,
 380                .stats = {
 381                        .idx = idx,
 382                },
 383                .socket = socket,
 384                .elts_n = desc,
 385                .elts = elts,
 386                .elts_head = 0,
 387                .elts_tail = 0,
 388                /*
 389                 * Request send completion every MLX4_PMD_TX_PER_COMP_REQ
 390                 * packets or at least 4 times per ring.
 391                 */
 392                .elts_comp_cd =
 393                        RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
 394                .elts_comp_cd_init =
 395                        RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4),
 396                .csum = priv->hw_csum &&
 397                        (offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM |
 398                                           DEV_TX_OFFLOAD_UDP_CKSUM |
 399                                           DEV_TX_OFFLOAD_TCP_CKSUM)),
 400                .csum_l2tun = priv->hw_csum_l2tun &&
 401                              (offloads &
 402                               DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM),
 403                /* Enable Tx loopback for VF devices. */
 404                .lb = !!priv->vf,
 405                .bounce_buf = bounce_buf,
 406        };
 407        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_TX_QUEUE;
 408        priv->verbs_alloc_ctx.obj = txq;
 409        txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0);
 410        if (!txq->cq) {
 411                rte_errno = ENOMEM;
 412                ERROR("%p: CQ creation failure: %s",
 413                      (void *)dev, strerror(rte_errno));
 414                goto error;
 415        }
 416        qp_init_attr = (struct ibv_qp_init_attr){
 417                .send_cq = txq->cq,
 418                .recv_cq = txq->cq,
 419                .cap = {
 420                        .max_send_wr =
 421                                RTE_MIN(priv->device_attr.max_qp_wr, desc),
 422                        .max_send_sge = 1,
 423                        .max_inline_data = MLX4_PMD_MAX_INLINE,
 424                },
 425                .qp_type = IBV_QPT_RAW_PACKET,
 426                /* No completion events must occur by default. */
 427                .sq_sig_all = 0,
 428        };
 429        txq->qp = mlx4_glue->create_qp(priv->pd, &qp_init_attr);
 430        if (!txq->qp) {
 431                rte_errno = errno ? errno : EINVAL;
 432                ERROR("%p: QP creation failure: %s",
 433                      (void *)dev, strerror(rte_errno));
 434                goto error;
 435        }
 436        txq->max_inline = qp_init_attr.cap.max_inline_data;
 437        ret = mlx4_glue->modify_qp
 438                (txq->qp,
 439                 &(struct ibv_qp_attr){
 440                        .qp_state = IBV_QPS_INIT,
 441                        .port_num = priv->port,
 442                 },
 443                 IBV_QP_STATE | IBV_QP_PORT);
 444        if (ret) {
 445                rte_errno = ret;
 446                ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
 447                      (void *)dev, strerror(rte_errno));
 448                goto error;
 449        }
 450        ret = mlx4_glue->modify_qp
 451                (txq->qp,
 452                 &(struct ibv_qp_attr){
 453                        .qp_state = IBV_QPS_RTR,
 454                 },
 455                 IBV_QP_STATE);
 456        if (ret) {
 457                rte_errno = ret;
 458                ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
 459                      (void *)dev, strerror(rte_errno));
 460                goto error;
 461        }
 462        ret = mlx4_glue->modify_qp
 463                (txq->qp,
 464                 &(struct ibv_qp_attr){
 465                        .qp_state = IBV_QPS_RTS,
 466                 },
 467                 IBV_QP_STATE);
 468        if (ret) {
 469                rte_errno = ret;
 470                ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
 471                      (void *)dev, strerror(rte_errno));
 472                goto error;
 473        }
 474        /* Retrieve device queue information. */
 475#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 476        dv_qp = (struct mlx4dv_qp){
 477                .comp_mask = MLX4DV_QP_MASK_UAR_MMAP_OFFSET,
 478        };
 479#endif
 480        mlxdv.cq.in = txq->cq;
 481        mlxdv.cq.out = &dv_cq;
 482        mlxdv.qp.in = txq->qp;
 483        mlxdv.qp.out = &dv_qp;
 484        ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ);
 485        if (ret) {
 486                rte_errno = EINVAL;
 487                ERROR("%p: failed to obtain information needed for"
 488                      " accessing the device queues", (void *)dev);
 489                goto error;
 490        }
 491#ifdef HAVE_IBV_MLX4_UAR_MMAP_OFFSET
 492        if (!(dv_qp.comp_mask & MLX4DV_QP_MASK_UAR_MMAP_OFFSET)) {
 493                WARN("%p: failed to obtain UAR mmap offset", (void *)dev);
 494                dv_qp.uar_mmap_offset = -1; /* Make mmap() fail. */
 495        }
 496#endif
 497        mlx4_txq_fill_dv_obj_info(txq, &mlxdv);
 498        txq_uar_init(txq);
 499        /* Save first wqe pointer in the first element. */
 500        (&(*txq->elts)[0])->wqe =
 501                (volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf;
 502        if (mlx4_mr_btree_init(&txq->mr_ctrl.cache_bh,
 503                               MLX4_MR_BTREE_CACHE_N, socket)) {
 504                /* rte_errno is already set. */
 505                goto error;
 506        }
 507        /* Save pointer of global generation number to check memory event. */
 508        txq->mr_ctrl.dev_gen_ptr = &priv->mr.dev_gen;
 509        DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq);
 510        dev->data->tx_queues[idx] = txq;
 511        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
 512        return 0;
 513error:
 514        dev->data->tx_queues[idx] = NULL;
 515        ret = rte_errno;
 516        mlx4_tx_queue_release(txq);
 517        rte_errno = ret;
 518        MLX4_ASSERT(rte_errno > 0);
 519        priv->verbs_alloc_ctx.type = MLX4_VERBS_ALLOC_TYPE_NONE;
 520        return -rte_errno;
 521}
 522
 523/**
 524 * DPDK callback to release a Tx queue.
 525 *
 526 * @param dpdk_txq
 527 *   Generic Tx queue pointer.
 528 */
 529void
 530mlx4_tx_queue_release(void *dpdk_txq)
 531{
 532        struct txq *txq = (struct txq *)dpdk_txq;
 533        struct mlx4_priv *priv;
 534        unsigned int i;
 535
 536        if (txq == NULL)
 537                return;
 538        priv = txq->priv;
 539        for (i = 0; i != ETH_DEV(priv)->data->nb_tx_queues; ++i)
 540                if (ETH_DEV(priv)->data->tx_queues[i] == txq) {
 541                        DEBUG("%p: removing Tx queue %p from list",
 542                              (void *)ETH_DEV(priv), (void *)txq);
 543                        ETH_DEV(priv)->data->tx_queues[i] = NULL;
 544                        break;
 545                }
 546        mlx4_txq_free_elts(txq);
 547        if (txq->qp)
 548                claim_zero(mlx4_glue->destroy_qp(txq->qp));
 549        if (txq->cq)
 550                claim_zero(mlx4_glue->destroy_cq(txq->cq));
 551        mlx4_mr_btree_free(&txq->mr_ctrl.cache_bh);
 552        rte_free(txq);
 553}
 554