dpdk/drivers/net/mlx5/mlx5.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2015 6WIND S.A.
   3 * Copyright 2015 Mellanox Technologies, Ltd
   4 */
   5
   6#include <stddef.h>
   7#include <unistd.h>
   8#include <string.h>
   9#include <stdint.h>
  10#include <stdlib.h>
  11#include <errno.h>
  12#include <fcntl.h>
  13
  14#include <rte_malloc.h>
  15#include <ethdev_driver.h>
  16#include <rte_pci.h>
  17#include <rte_bus_pci.h>
  18#include <rte_common.h>
  19#include <rte_kvargs.h>
  20#include <rte_rwlock.h>
  21#include <rte_spinlock.h>
  22#include <rte_string_fns.h>
  23#include <rte_eal_paging.h>
  24#include <rte_alarm.h>
  25#include <rte_cycles.h>
  26#include <rte_interrupts.h>
  27
  28#include <mlx5_glue.h>
  29#include <mlx5_devx_cmds.h>
  30#include <mlx5_common.h>
  31#include <mlx5_common_os.h>
  32#include <mlx5_common_mp.h>
  33#include <mlx5_malloc.h>
  34
  35#include "mlx5_defs.h"
  36#include "mlx5.h"
  37#include "mlx5_utils.h"
  38#include "mlx5_rxtx.h"
  39#include "mlx5_rx.h"
  40#include "mlx5_tx.h"
  41#include "mlx5_autoconf.h"
  42#include "mlx5_flow.h"
  43#include "mlx5_flow_os.h"
  44#include "rte_pmd_mlx5.h"
  45
  46#define MLX5_ETH_DRIVER_NAME mlx5_eth
  47
  48/* Device parameter to enable RX completion queue compression. */
  49#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
  50
  51/* Device parameter to enable padding Rx packet to cacheline size. */
  52#define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
  53
  54/* Device parameter to enable Multi-Packet Rx queue. */
  55#define MLX5_RX_MPRQ_EN "mprq_en"
  56
  57/* Device parameter to configure log 2 of the number of strides for MPRQ. */
  58#define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num"
  59
  60/* Device parameter to configure log 2 of the stride size for MPRQ. */
  61#define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size"
  62
  63/* Device parameter to limit the size of memcpy'd packet for MPRQ. */
  64#define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len"
  65
  66/* Device parameter to set the minimum number of Rx queues to enable MPRQ. */
  67#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
  68
  69/* Device parameter to configure inline send. Deprecated, ignored.*/
  70#define MLX5_TXQ_INLINE "txq_inline"
  71
  72/* Device parameter to limit packet size to inline with ordinary SEND. */
  73#define MLX5_TXQ_INLINE_MAX "txq_inline_max"
  74
  75/* Device parameter to configure minimal data size to inline. */
  76#define MLX5_TXQ_INLINE_MIN "txq_inline_min"
  77
  78/* Device parameter to limit packet size to inline with Enhanced MPW. */
  79#define MLX5_TXQ_INLINE_MPW "txq_inline_mpw"
  80
  81/*
  82 * Device parameter to configure the number of TX queues threshold for
  83 * enabling inline send.
  84 */
  85#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
  86
  87/*
  88 * Device parameter to configure the number of TX queues threshold for
  89 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines).
  90 */
  91#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
  92
  93/* Device parameter to enable multi-packet send WQEs. */
  94#define MLX5_TXQ_MPW_EN "txq_mpw_en"
  95
  96/*
  97 * Device parameter to include 2 dsegs in the title WQEBB.
  98 * Deprecated, ignored.
  99 */
 100#define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en"
 101
 102/*
 103 * Device parameter to limit the size of inlining packet.
 104 * Deprecated, ignored.
 105 */
 106#define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len"
 107
 108/*
 109 * Device parameter to enable Tx scheduling on timestamps
 110 * and specify the packet pacing granularity in nanoseconds.
 111 */
 112#define MLX5_TX_PP "tx_pp"
 113
 114/*
 115 * Device parameter to specify skew in nanoseconds on Tx datapath,
 116 * it represents the time between SQ start WQE processing and
 117 * appearing actual packet data on the wire.
 118 */
 119#define MLX5_TX_SKEW "tx_skew"
 120
 121/*
 122 * Device parameter to enable hardware Tx vector.
 123 * Deprecated, ignored (no vectorized Tx routines anymore).
 124 */
 125#define MLX5_TX_VEC_EN "tx_vec_en"
 126
 127/* Device parameter to enable hardware Rx vector. */
 128#define MLX5_RX_VEC_EN "rx_vec_en"
 129
 130/* Allow L3 VXLAN flow creation. */
 131#define MLX5_L3_VXLAN_EN "l3_vxlan_en"
 132
 133/* Activate DV E-Switch flow steering. */
 134#define MLX5_DV_ESW_EN "dv_esw_en"
 135
 136/* Activate DV flow steering. */
 137#define MLX5_DV_FLOW_EN "dv_flow_en"
 138
 139/* Enable extensive flow metadata support. */
 140#define MLX5_DV_XMETA_EN "dv_xmeta_en"
 141
 142/* Device parameter to let the user manage the lacp traffic of bonded device */
 143#define MLX5_LACP_BY_USER "lacp_by_user"
 144
 145/* Activate Netlink support in VF mode. */
 146#define MLX5_VF_NL_EN "vf_nl_en"
 147
 148/* Select port representors to instantiate. */
 149#define MLX5_REPRESENTOR "representor"
 150
 151/* Device parameter to configure the maximum number of dump files per queue. */
 152#define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num"
 153
 154/* Configure timeout of LRO session (in microseconds). */
 155#define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec"
 156
 157/*
 158 * Device parameter to configure the total data buffer size for a single
 159 * hairpin queue (logarithm value).
 160 */
 161#define MLX5_HP_BUF_SIZE "hp_buf_log_sz"
 162
 163/* Flow memory reclaim mode. */
 164#define MLX5_RECLAIM_MEM "reclaim_mem_mode"
 165
 166/* Decap will be used or not. */
 167#define MLX5_DECAP_EN "decap_en"
 168
 169/* Device parameter to configure allow or prevent duplicate rules pattern. */
 170#define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern"
 171
 172/* Device parameter to configure the delay drop when creating Rxqs. */
 173#define MLX5_DELAY_DROP "delay_drop"
 174
 175/* Shared memory between primary and secondary processes. */
 176struct mlx5_shared_data *mlx5_shared_data;
 177
 178/** Driver-specific log messages type. */
 179int mlx5_logtype;
 180
 181static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list =
 182                                                LIST_HEAD_INITIALIZER();
 183static pthread_mutex_t mlx5_dev_ctx_list_mutex;
 184static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = {
 185#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
 186        [MLX5_IPOOL_DECAP_ENCAP] = {
 187                .size = sizeof(struct mlx5_flow_dv_encap_decap_resource),
 188                .trunk_size = 64,
 189                .grow_trunk = 3,
 190                .grow_shift = 2,
 191                .need_lock = 1,
 192                .release_mem_en = 1,
 193                .malloc = mlx5_malloc,
 194                .free = mlx5_free,
 195                .type = "mlx5_encap_decap_ipool",
 196        },
 197        [MLX5_IPOOL_PUSH_VLAN] = {
 198                .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource),
 199                .trunk_size = 64,
 200                .grow_trunk = 3,
 201                .grow_shift = 2,
 202                .need_lock = 1,
 203                .release_mem_en = 1,
 204                .malloc = mlx5_malloc,
 205                .free = mlx5_free,
 206                .type = "mlx5_push_vlan_ipool",
 207        },
 208        [MLX5_IPOOL_TAG] = {
 209                .size = sizeof(struct mlx5_flow_dv_tag_resource),
 210                .trunk_size = 64,
 211                .grow_trunk = 3,
 212                .grow_shift = 2,
 213                .need_lock = 1,
 214                .release_mem_en = 0,
 215                .per_core_cache = (1 << 16),
 216                .malloc = mlx5_malloc,
 217                .free = mlx5_free,
 218                .type = "mlx5_tag_ipool",
 219        },
 220        [MLX5_IPOOL_PORT_ID] = {
 221                .size = sizeof(struct mlx5_flow_dv_port_id_action_resource),
 222                .trunk_size = 64,
 223                .grow_trunk = 3,
 224                .grow_shift = 2,
 225                .need_lock = 1,
 226                .release_mem_en = 1,
 227                .malloc = mlx5_malloc,
 228                .free = mlx5_free,
 229                .type = "mlx5_port_id_ipool",
 230        },
 231        [MLX5_IPOOL_JUMP] = {
 232                .size = sizeof(struct mlx5_flow_tbl_data_entry),
 233                .trunk_size = 64,
 234                .grow_trunk = 3,
 235                .grow_shift = 2,
 236                .need_lock = 1,
 237                .release_mem_en = 1,
 238                .malloc = mlx5_malloc,
 239                .free = mlx5_free,
 240                .type = "mlx5_jump_ipool",
 241        },
 242        [MLX5_IPOOL_SAMPLE] = {
 243                .size = sizeof(struct mlx5_flow_dv_sample_resource),
 244                .trunk_size = 64,
 245                .grow_trunk = 3,
 246                .grow_shift = 2,
 247                .need_lock = 1,
 248                .release_mem_en = 1,
 249                .malloc = mlx5_malloc,
 250                .free = mlx5_free,
 251                .type = "mlx5_sample_ipool",
 252        },
 253        [MLX5_IPOOL_DEST_ARRAY] = {
 254                .size = sizeof(struct mlx5_flow_dv_dest_array_resource),
 255                .trunk_size = 64,
 256                .grow_trunk = 3,
 257                .grow_shift = 2,
 258                .need_lock = 1,
 259                .release_mem_en = 1,
 260                .malloc = mlx5_malloc,
 261                .free = mlx5_free,
 262                .type = "mlx5_dest_array_ipool",
 263        },
 264        [MLX5_IPOOL_TUNNEL_ID] = {
 265                .size = sizeof(struct mlx5_flow_tunnel),
 266                .trunk_size = MLX5_MAX_TUNNELS,
 267                .need_lock = 1,
 268                .release_mem_en = 1,
 269                .type = "mlx5_tunnel_offload",
 270        },
 271        [MLX5_IPOOL_TNL_TBL_ID] = {
 272                .size = 0,
 273                .need_lock = 1,
 274                .type = "mlx5_flow_tnl_tbl_ipool",
 275        },
 276#endif
 277        [MLX5_IPOOL_MTR] = {
 278                /**
 279                 * The ipool index should grow continually from small to big,
 280                 * for meter idx, so not set grow_trunk to avoid meter index
 281                 * not jump continually.
 282                 */
 283                .size = sizeof(struct mlx5_legacy_flow_meter),
 284                .trunk_size = 64,
 285                .need_lock = 1,
 286                .release_mem_en = 1,
 287                .malloc = mlx5_malloc,
 288                .free = mlx5_free,
 289                .type = "mlx5_meter_ipool",
 290        },
 291        [MLX5_IPOOL_MCP] = {
 292                .size = sizeof(struct mlx5_flow_mreg_copy_resource),
 293                .trunk_size = 64,
 294                .grow_trunk = 3,
 295                .grow_shift = 2,
 296                .need_lock = 1,
 297                .release_mem_en = 1,
 298                .malloc = mlx5_malloc,
 299                .free = mlx5_free,
 300                .type = "mlx5_mcp_ipool",
 301        },
 302        [MLX5_IPOOL_HRXQ] = {
 303                .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN),
 304                .trunk_size = 64,
 305                .grow_trunk = 3,
 306                .grow_shift = 2,
 307                .need_lock = 1,
 308                .release_mem_en = 1,
 309                .malloc = mlx5_malloc,
 310                .free = mlx5_free,
 311                .type = "mlx5_hrxq_ipool",
 312        },
 313        [MLX5_IPOOL_MLX5_FLOW] = {
 314                /*
 315                 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows.
 316                 * It set in run time according to PCI function configuration.
 317                 */
 318                .size = 0,
 319                .trunk_size = 64,
 320                .grow_trunk = 3,
 321                .grow_shift = 2,
 322                .need_lock = 1,
 323                .release_mem_en = 0,
 324                .per_core_cache = 1 << 19,
 325                .malloc = mlx5_malloc,
 326                .free = mlx5_free,
 327                .type = "mlx5_flow_handle_ipool",
 328        },
 329        [MLX5_IPOOL_RTE_FLOW] = {
 330                .size = sizeof(struct rte_flow),
 331                .trunk_size = 4096,
 332                .need_lock = 1,
 333                .release_mem_en = 1,
 334                .malloc = mlx5_malloc,
 335                .free = mlx5_free,
 336                .type = "rte_flow_ipool",
 337        },
 338        [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = {
 339                .size = 0,
 340                .need_lock = 1,
 341                .type = "mlx5_flow_rss_id_ipool",
 342        },
 343        [MLX5_IPOOL_RSS_SHARED_ACTIONS] = {
 344                .size = sizeof(struct mlx5_shared_action_rss),
 345                .trunk_size = 64,
 346                .grow_trunk = 3,
 347                .grow_shift = 2,
 348                .need_lock = 1,
 349                .release_mem_en = 1,
 350                .malloc = mlx5_malloc,
 351                .free = mlx5_free,
 352                .type = "mlx5_shared_action_rss",
 353        },
 354        [MLX5_IPOOL_MTR_POLICY] = {
 355                /**
 356                 * The ipool index should grow continually from small to big,
 357                 * for policy idx, so not set grow_trunk to avoid policy index
 358                 * not jump continually.
 359                 */
 360                .size = sizeof(struct mlx5_flow_meter_sub_policy),
 361                .trunk_size = 64,
 362                .need_lock = 1,
 363                .release_mem_en = 1,
 364                .malloc = mlx5_malloc,
 365                .free = mlx5_free,
 366                .type = "mlx5_meter_policy_ipool",
 367        },
 368};
 369
 370#define MLX5_FLOW_MIN_ID_POOL_SIZE 512
 371#define MLX5_ID_GENERATION_ARRAY_FACTOR 16
 372
 373#define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024
 374
 375/**
 376 * Decide whether representor ID is a HPF(host PF) port on BF2.
 377 *
 378 * @param dev
 379 *   Pointer to Ethernet device structure.
 380 *
 381 * @return
 382 *   Non-zero if HPF, otherwise 0.
 383 */
 384bool
 385mlx5_is_hpf(struct rte_eth_dev *dev)
 386{
 387        struct mlx5_priv *priv = dev->data->dev_private;
 388        uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id);
 389        int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
 390
 391        return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF &&
 392               MLX5_REPRESENTOR_REPR(-1) == repr;
 393}
 394
 395/**
 396 * Decide whether representor ID is a SF port representor.
 397 *
 398 * @param dev
 399 *   Pointer to Ethernet device structure.
 400 *
 401 * @return
 402 *   Non-zero if HPF, otherwise 0.
 403 */
 404bool
 405mlx5_is_sf_repr(struct rte_eth_dev *dev)
 406{
 407        struct mlx5_priv *priv = dev->data->dev_private;
 408        int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
 409
 410        return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF;
 411}
 412
 413/**
 414 * Initialize the ASO aging management structure.
 415 *
 416 * @param[in] sh
 417 *   Pointer to mlx5_dev_ctx_shared object to free
 418 *
 419 * @return
 420 *   0 on success, a negative errno value otherwise and rte_errno is set.
 421 */
 422int
 423mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh)
 424{
 425        int err;
 426
 427        if (sh->aso_age_mng)
 428                return 0;
 429        sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng),
 430                                      RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 431        if (!sh->aso_age_mng) {
 432                DRV_LOG(ERR, "aso_age_mng allocation was failed.");
 433                rte_errno = ENOMEM;
 434                return -ENOMEM;
 435        }
 436        err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT);
 437        if (err) {
 438                mlx5_free(sh->aso_age_mng);
 439                return -1;
 440        }
 441        rte_rwlock_init(&sh->aso_age_mng->resize_rwl);
 442        rte_spinlock_init(&sh->aso_age_mng->free_sl);
 443        LIST_INIT(&sh->aso_age_mng->free);
 444        return 0;
 445}
 446
 447/**
 448 * Close and release all the resources of the ASO aging management structure.
 449 *
 450 * @param[in] sh
 451 *   Pointer to mlx5_dev_ctx_shared object to free.
 452 */
 453static void
 454mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh)
 455{
 456        int i, j;
 457
 458        mlx5_aso_flow_hit_queue_poll_stop(sh);
 459        mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT);
 460        if (sh->aso_age_mng->pools) {
 461                struct mlx5_aso_age_pool *pool;
 462
 463                for (i = 0; i < sh->aso_age_mng->next; ++i) {
 464                        pool = sh->aso_age_mng->pools[i];
 465                        claim_zero(mlx5_devx_cmd_destroy
 466                                                (pool->flow_hit_aso_obj));
 467                        for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j)
 468                                if (pool->actions[j].dr_action)
 469                                        claim_zero
 470                                            (mlx5_flow_os_destroy_flow_action
 471                                              (pool->actions[j].dr_action));
 472                        mlx5_free(pool);
 473                }
 474                mlx5_free(sh->aso_age_mng->pools);
 475        }
 476        mlx5_free(sh->aso_age_mng);
 477}
 478
 479/**
 480 * Initialize the shared aging list information per port.
 481 *
 482 * @param[in] sh
 483 *   Pointer to mlx5_dev_ctx_shared object.
 484 */
 485static void
 486mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh)
 487{
 488        uint32_t i;
 489        struct mlx5_age_info *age_info;
 490
 491        for (i = 0; i < sh->max_port; i++) {
 492                age_info = &sh->port[i].age_info;
 493                age_info->flags = 0;
 494                TAILQ_INIT(&age_info->aged_counters);
 495                LIST_INIT(&age_info->aged_aso);
 496                rte_spinlock_init(&age_info->aged_sl);
 497                MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
 498        }
 499}
 500
 501/**
 502 * DV flow counter mode detect and config.
 503 *
 504 * @param dev
 505 *   Pointer to rte_eth_dev structure.
 506 *
 507 */
 508void
 509mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused)
 510{
 511#ifdef HAVE_IBV_FLOW_DV_SUPPORT
 512        struct mlx5_priv *priv = dev->data->dev_private;
 513        struct mlx5_dev_ctx_shared *sh = priv->sh;
 514        struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
 515        bool fallback;
 516
 517#ifndef HAVE_IBV_DEVX_ASYNC
 518        fallback = true;
 519#else
 520        fallback = false;
 521        if (!sh->cdev->config.devx || !sh->config.dv_flow_en ||
 522            !hca_attr->flow_counters_dump ||
 523            !(hca_attr->flow_counter_bulk_alloc_bitmap & 0x4) ||
 524            (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP))
 525                fallback = true;
 526#endif
 527        if (fallback)
 528                DRV_LOG(INFO, "Use fall-back DV counter management. Flow "
 529                        "counter dump:%d, bulk_alloc_bitmap:0x%hhx.",
 530                        hca_attr->flow_counters_dump,
 531                        hca_attr->flow_counter_bulk_alloc_bitmap);
 532        /* Initialize fallback mode only on the port initializes sh. */
 533        if (sh->refcnt == 1)
 534                sh->cmng.counter_fallback = fallback;
 535        else if (fallback != sh->cmng.counter_fallback)
 536                DRV_LOG(WARNING, "Port %d in sh has different fallback mode "
 537                        "with others:%d.", PORT_ID(priv), fallback);
 538#endif
 539}
 540
 541/**
 542 * Initialize the counters management structure.
 543 *
 544 * @param[in] sh
 545 *   Pointer to mlx5_dev_ctx_shared object to free
 546 */
 547static void
 548mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 549{
 550        int i;
 551
 552        memset(&sh->cmng, 0, sizeof(sh->cmng));
 553        TAILQ_INIT(&sh->cmng.flow_counters);
 554        sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET;
 555        sh->cmng.max_id = -1;
 556        sh->cmng.last_pool_idx = POOL_IDX_INVALID;
 557        rte_spinlock_init(&sh->cmng.pool_update_sl);
 558        for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
 559                TAILQ_INIT(&sh->cmng.counters[i]);
 560                rte_spinlock_init(&sh->cmng.csl[i]);
 561        }
 562}
 563
 564/**
 565 * Destroy all the resources allocated for a counter memory management.
 566 *
 567 * @param[in] mng
 568 *   Pointer to the memory management structure.
 569 */
 570static void
 571mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng)
 572{
 573        uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data;
 574
 575        LIST_REMOVE(mng, next);
 576        mlx5_os_wrapped_mkey_destroy(&mng->wm);
 577        mlx5_free(mem);
 578}
 579
 580/**
 581 * Close and release all the resources of the counters management.
 582 *
 583 * @param[in] sh
 584 *   Pointer to mlx5_dev_ctx_shared object to free.
 585 */
 586static void
 587mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 588{
 589        struct mlx5_counter_stats_mem_mng *mng;
 590        int i, j;
 591        int retries = 1024;
 592
 593        rte_errno = 0;
 594        while (--retries) {
 595                rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh);
 596                if (rte_errno != EINPROGRESS)
 597                        break;
 598                rte_pause();
 599        }
 600
 601        if (sh->cmng.pools) {
 602                struct mlx5_flow_counter_pool *pool;
 603                uint16_t n_valid = sh->cmng.n_valid;
 604                bool fallback = sh->cmng.counter_fallback;
 605
 606                for (i = 0; i < n_valid; ++i) {
 607                        pool = sh->cmng.pools[i];
 608                        if (!fallback && pool->min_dcs)
 609                                claim_zero(mlx5_devx_cmd_destroy
 610                                                               (pool->min_dcs));
 611                        for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
 612                                struct mlx5_flow_counter *cnt =
 613                                                MLX5_POOL_GET_CNT(pool, j);
 614
 615                                if (cnt->action)
 616                                        claim_zero
 617                                         (mlx5_flow_os_destroy_flow_action
 618                                          (cnt->action));
 619                                if (fallback && MLX5_POOL_GET_CNT
 620                                    (pool, j)->dcs_when_free)
 621                                        claim_zero(mlx5_devx_cmd_destroy
 622                                                   (cnt->dcs_when_free));
 623                        }
 624                        mlx5_free(pool);
 625                }
 626                mlx5_free(sh->cmng.pools);
 627        }
 628        mng = LIST_FIRST(&sh->cmng.mem_mngs);
 629        while (mng) {
 630                mlx5_flow_destroy_counter_stat_mem_mng(mng);
 631                mng = LIST_FIRST(&sh->cmng.mem_mngs);
 632        }
 633        memset(&sh->cmng, 0, sizeof(sh->cmng));
 634}
 635
 636/**
 637 * Initialize the aso flow meters management structure.
 638 *
 639 * @param[in] sh
 640 *   Pointer to mlx5_dev_ctx_shared object to free
 641 */
 642int
 643mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh)
 644{
 645        if (!sh->mtrmng) {
 646                sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO,
 647                        sizeof(*sh->mtrmng),
 648                        RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 649                if (!sh->mtrmng) {
 650                        DRV_LOG(ERR,
 651                        "meter management allocation was failed.");
 652                        rte_errno = ENOMEM;
 653                        return -ENOMEM;
 654                }
 655                if (sh->meter_aso_en) {
 656                        rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl);
 657                        rte_rwlock_init(&sh->mtrmng->pools_mng.resize_mtrwl);
 658                        LIST_INIT(&sh->mtrmng->pools_mng.meters);
 659                }
 660                sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID;
 661        }
 662        return 0;
 663}
 664
 665/**
 666 * Close and release all the resources of
 667 * the ASO flow meter management structure.
 668 *
 669 * @param[in] sh
 670 *   Pointer to mlx5_dev_ctx_shared object to free.
 671 */
 672static void
 673mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh)
 674{
 675        struct mlx5_aso_mtr_pool *mtr_pool;
 676        struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng;
 677        uint32_t idx;
 678#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO
 679        struct mlx5_aso_mtr *aso_mtr;
 680        int i;
 681#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
 682
 683        if (sh->meter_aso_en) {
 684                mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER);
 685                idx = mtrmng->pools_mng.n_valid;
 686                while (idx--) {
 687                        mtr_pool = mtrmng->pools_mng.pools[idx];
 688#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO
 689                        for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) {
 690                                aso_mtr = &mtr_pool->mtrs[i];
 691                                if (aso_mtr->fm.meter_action_g)
 692                                        claim_zero
 693                                        (mlx5_glue->destroy_flow_action
 694                                        (aso_mtr->fm.meter_action_g));
 695                                if (aso_mtr->fm.meter_action_y)
 696                                        claim_zero
 697                                        (mlx5_glue->destroy_flow_action
 698                                        (aso_mtr->fm.meter_action_y));
 699                        }
 700#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
 701                        claim_zero(mlx5_devx_cmd_destroy
 702                                                (mtr_pool->devx_obj));
 703                        mtrmng->pools_mng.n_valid--;
 704                        mlx5_free(mtr_pool);
 705                }
 706                mlx5_free(sh->mtrmng->pools_mng.pools);
 707        }
 708        mlx5_free(sh->mtrmng);
 709        sh->mtrmng = NULL;
 710}
 711
 712/* Send FLOW_AGED event if needed. */
 713void
 714mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 715{
 716        struct mlx5_age_info *age_info;
 717        uint32_t i;
 718
 719        for (i = 0; i < sh->max_port; i++) {
 720                age_info = &sh->port[i].age_info;
 721                if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
 722                        continue;
 723                MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW);
 724                if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) {
 725                        MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER);
 726                        rte_eth_dev_callback_process
 727                                (&rte_eth_devices[sh->port[i].devx_ih_port_id],
 728                                RTE_ETH_EVENT_FLOW_AGED, NULL);
 729                }
 730        }
 731}
 732
 733/*
 734 * Initialize the ASO connection tracking structure.
 735 *
 736 * @param[in] sh
 737 *   Pointer to mlx5_dev_ctx_shared object.
 738 *
 739 * @return
 740 *   0 on success, a negative errno value otherwise and rte_errno is set.
 741 */
 742int
 743mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 744{
 745        int err;
 746
 747        if (sh->ct_mng)
 748                return 0;
 749        sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
 750                                 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 751        if (!sh->ct_mng) {
 752                DRV_LOG(ERR, "ASO CT management allocation failed.");
 753                rte_errno = ENOMEM;
 754                return -rte_errno;
 755        }
 756        err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
 757        if (err) {
 758                mlx5_free(sh->ct_mng);
 759                /* rte_errno should be extracted from the failure. */
 760                rte_errno = EINVAL;
 761                return -rte_errno;
 762        }
 763        rte_spinlock_init(&sh->ct_mng->ct_sl);
 764        rte_rwlock_init(&sh->ct_mng->resize_rwl);
 765        LIST_INIT(&sh->ct_mng->free_cts);
 766        return 0;
 767}
 768
 769/*
 770 * Close and release all the resources of the
 771 * ASO connection tracking management structure.
 772 *
 773 * @param[in] sh
 774 *   Pointer to mlx5_dev_ctx_shared object to free.
 775 */
 776static void
 777mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
 778{
 779        struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
 780        struct mlx5_aso_ct_pool *ct_pool;
 781        struct mlx5_aso_ct_action *ct;
 782        uint32_t idx;
 783        uint32_t val;
 784        uint32_t cnt;
 785        int i;
 786
 787        mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
 788        idx = mng->next;
 789        while (idx--) {
 790                cnt = 0;
 791                ct_pool = mng->pools[idx];
 792                for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
 793                        ct = &ct_pool->actions[i];
 794                        val = __atomic_fetch_sub(&ct->refcnt, 1,
 795                                                 __ATOMIC_RELAXED);
 796                        MLX5_ASSERT(val == 1);
 797                        if (val > 1)
 798                                cnt++;
 799#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
 800                        if (ct->dr_action_orig)
 801                                claim_zero(mlx5_glue->destroy_flow_action
 802                                                        (ct->dr_action_orig));
 803                        if (ct->dr_action_rply)
 804                                claim_zero(mlx5_glue->destroy_flow_action
 805                                                        (ct->dr_action_rply));
 806#endif
 807                }
 808                claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
 809                if (cnt) {
 810                        DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
 811                                cnt, i);
 812                }
 813                mlx5_free(ct_pool);
 814                /* in case of failure. */
 815                mng->next--;
 816        }
 817        mlx5_free(mng->pools);
 818        mlx5_free(mng);
 819        /* Management structure must be cleared to 0s during allocation. */
 820        sh->ct_mng = NULL;
 821}
 822
 823/**
 824 * Initialize the flow resources' indexed mempool.
 825 *
 826 * @param[in] sh
 827 *   Pointer to mlx5_dev_ctx_shared object.
 828 */
 829static void
 830mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh)
 831{
 832        uint8_t i;
 833        struct mlx5_indexed_pool_config cfg;
 834
 835        for (i = 0; i < MLX5_IPOOL_MAX; ++i) {
 836                cfg = mlx5_ipool_cfg[i];
 837                switch (i) {
 838                default:
 839                        break;
 840                /*
 841                 * Set MLX5_IPOOL_MLX5_FLOW ipool size
 842                 * according to PCI function flow configuration.
 843                 */
 844                case MLX5_IPOOL_MLX5_FLOW:
 845                        cfg.size = sh->config.dv_flow_en ?
 846                                sizeof(struct mlx5_flow_handle) :
 847                                MLX5_FLOW_HANDLE_VERBS_SIZE;
 848                        break;
 849                }
 850                if (sh->config.reclaim_mode) {
 851                        cfg.release_mem_en = 1;
 852                        cfg.per_core_cache = 0;
 853                } else {
 854                        cfg.release_mem_en = 0;
 855                }
 856                sh->ipool[i] = mlx5_ipool_create(&cfg);
 857        }
 858}
 859
 860
 861/**
 862 * Release the flow resources' indexed mempool.
 863 *
 864 * @param[in] sh
 865 *   Pointer to mlx5_dev_ctx_shared object.
 866 */
 867static void
 868mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh)
 869{
 870        uint8_t i;
 871
 872        for (i = 0; i < MLX5_IPOOL_MAX; ++i)
 873                mlx5_ipool_destroy(sh->ipool[i]);
 874        for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i)
 875                if (sh->mdh_ipools[i])
 876                        mlx5_ipool_destroy(sh->mdh_ipools[i]);
 877}
 878
 879/*
 880 * Check if dynamic flex parser for eCPRI already exists.
 881 *
 882 * @param dev
 883 *   Pointer to Ethernet device structure.
 884 *
 885 * @return
 886 *   true on exists, false on not.
 887 */
 888bool
 889mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev)
 890{
 891        struct mlx5_priv *priv = dev->data->dev_private;
 892        struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser;
 893
 894        return !!prf->obj;
 895}
 896
 897/*
 898 * Allocation of a flex parser for eCPRI. Once created, this parser related
 899 * resources will be held until the device is closed.
 900 *
 901 * @param dev
 902 *   Pointer to Ethernet device structure.
 903 *
 904 * @return
 905 *   0 on success, a negative errno value otherwise and rte_errno is set.
 906 */
 907int
 908mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev)
 909{
 910        struct mlx5_priv *priv = dev->data->dev_private;
 911        struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser;
 912        struct mlx5_devx_graph_node_attr node = {
 913                .modify_field_select = 0,
 914        };
 915        uint32_t ids[8];
 916        int ret;
 917
 918        if (!priv->sh->cdev->config.hca_attr.parse_graph_flex_node) {
 919                DRV_LOG(ERR, "Dynamic flex parser is not supported "
 920                        "for device %s.", priv->dev_data->name);
 921                return -ENOTSUP;
 922        }
 923        node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED;
 924        /* 8 bytes now: 4B common header + 4B message body header. */
 925        node.header_length_base_value = 0x8;
 926        /* After MAC layer: Ether / VLAN. */
 927        node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC;
 928        /* Type of compared condition should be 0xAEFE in the L2 layer. */
 929        node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI;
 930        /* Sample #0: type in common header. */
 931        node.sample[0].flow_match_sample_en = 1;
 932        /* Fixed offset. */
 933        node.sample[0].flow_match_sample_offset_mode = 0x0;
 934        /* Only the 2nd byte will be used. */
 935        node.sample[0].flow_match_sample_field_base_offset = 0x0;
 936        /* Sample #1: message payload. */
 937        node.sample[1].flow_match_sample_en = 1;
 938        /* Fixed offset. */
 939        node.sample[1].flow_match_sample_offset_mode = 0x0;
 940        /*
 941         * Only the first two bytes will be used right now, and its offset will
 942         * start after the common header that with the length of a DW(u32).
 943         */
 944        node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t);
 945        prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->cdev->ctx, &node);
 946        if (!prf->obj) {
 947                DRV_LOG(ERR, "Failed to create flex parser node object.");
 948                return (rte_errno == 0) ? -ENODEV : -rte_errno;
 949        }
 950        prf->num = 2;
 951        ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num);
 952        if (ret) {
 953                DRV_LOG(ERR, "Failed to query sample IDs.");
 954                return (rte_errno == 0) ? -ENODEV : -rte_errno;
 955        }
 956        prf->offset[0] = 0x0;
 957        prf->offset[1] = sizeof(uint32_t);
 958        prf->ids[0] = ids[0];
 959        prf->ids[1] = ids[1];
 960        return 0;
 961}
 962
 963/*
 964 * Destroy the flex parser node, including the parser itself, input / output
 965 * arcs and DW samples. Resources could be reused then.
 966 *
 967 * @param dev
 968 *   Pointer to Ethernet device structure.
 969 */
 970static void
 971mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev)
 972{
 973        struct mlx5_priv *priv = dev->data->dev_private;
 974        struct mlx5_ecpri_parser_profile *prf = &priv->sh->ecpri_parser;
 975
 976        if (prf->obj)
 977                mlx5_devx_cmd_destroy(prf->obj);
 978        prf->obj = NULL;
 979}
 980
 981uint32_t
 982mlx5_get_supported_sw_parsing_offloads(const struct mlx5_hca_attr *attr)
 983{
 984        uint32_t sw_parsing_offloads = 0;
 985
 986        if (attr->swp) {
 987                sw_parsing_offloads |= MLX5_SW_PARSING_CAP;
 988                if (attr->swp_csum)
 989                        sw_parsing_offloads |= MLX5_SW_PARSING_CSUM_CAP;
 990
 991                if (attr->swp_lso)
 992                        sw_parsing_offloads |= MLX5_SW_PARSING_TSO_CAP;
 993        }
 994        return sw_parsing_offloads;
 995}
 996
 997uint32_t
 998mlx5_get_supported_tunneling_offloads(const struct mlx5_hca_attr *attr)
 999{
1000        uint32_t tn_offloads = 0;
1001
1002        if (attr->tunnel_stateless_vxlan)
1003                tn_offloads |= MLX5_TUNNELED_OFFLOADS_VXLAN_CAP;
1004        if (attr->tunnel_stateless_gre)
1005                tn_offloads |= MLX5_TUNNELED_OFFLOADS_GRE_CAP;
1006        if (attr->tunnel_stateless_geneve_rx)
1007                tn_offloads |= MLX5_TUNNELED_OFFLOADS_GENEVE_CAP;
1008        return tn_offloads;
1009}
1010
1011/* Fill all fields of UAR structure. */
1012static int
1013mlx5_rxtx_uars_prepare(struct mlx5_dev_ctx_shared *sh)
1014{
1015        int ret;
1016
1017        ret = mlx5_devx_uar_prepare(sh->cdev, &sh->tx_uar);
1018        if (ret) {
1019                DRV_LOG(ERR, "Failed to prepare Tx DevX UAR.");
1020                return -rte_errno;
1021        }
1022        MLX5_ASSERT(sh->tx_uar.obj);
1023        MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar.obj));
1024        ret = mlx5_devx_uar_prepare(sh->cdev, &sh->rx_uar);
1025        if (ret) {
1026                DRV_LOG(ERR, "Failed to prepare Rx DevX UAR.");
1027                mlx5_devx_uar_release(&sh->tx_uar);
1028                return -rte_errno;
1029        }
1030        MLX5_ASSERT(sh->rx_uar.obj);
1031        MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->rx_uar.obj));
1032        return 0;
1033}
1034
1035static void
1036mlx5_rxtx_uars_release(struct mlx5_dev_ctx_shared *sh)
1037{
1038        mlx5_devx_uar_release(&sh->rx_uar);
1039        mlx5_devx_uar_release(&sh->tx_uar);
1040}
1041
1042/**
1043 * rte_mempool_walk() callback to unregister Rx mempools.
1044 * It used when implicit mempool registration is disabled.
1045 *
1046 * @param mp
1047 *   The mempool being walked.
1048 * @param arg
1049 *   Pointer to the device shared context.
1050 */
1051static void
1052mlx5_dev_ctx_shared_rx_mempool_unregister_cb(struct rte_mempool *mp, void *arg)
1053{
1054        struct mlx5_dev_ctx_shared *sh = arg;
1055
1056        mlx5_dev_mempool_unregister(sh->cdev, mp);
1057}
1058
1059/**
1060 * Callback used when implicit mempool registration is disabled
1061 * in order to track Rx mempool destruction.
1062 *
1063 * @param event
1064 *   Mempool life cycle event.
1065 * @param mp
1066 *   An Rx mempool registered explicitly when the port is started.
1067 * @param arg
1068 *   Pointer to a device shared context.
1069 */
1070static void
1071mlx5_dev_ctx_shared_rx_mempool_event_cb(enum rte_mempool_event event,
1072                                        struct rte_mempool *mp, void *arg)
1073{
1074        struct mlx5_dev_ctx_shared *sh = arg;
1075
1076        if (event == RTE_MEMPOOL_EVENT_DESTROY)
1077                mlx5_dev_mempool_unregister(sh->cdev, mp);
1078}
1079
1080int
1081mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
1082{
1083        struct mlx5_priv *priv = dev->data->dev_private;
1084        struct mlx5_dev_ctx_shared *sh = priv->sh;
1085        int ret;
1086
1087        /* Check if we only need to track Rx mempool destruction. */
1088        if (!sh->cdev->config.mr_mempool_reg_en) {
1089                ret = rte_mempool_event_callback_register
1090                                (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh);
1091                return ret == 0 || rte_errno == EEXIST ? 0 : ret;
1092        }
1093        return mlx5_dev_mempool_subscribe(sh->cdev);
1094}
1095
1096/**
1097 * Set up multiple TISs with different affinities according to
1098 * number of bonding ports
1099 *
1100 * @param priv
1101 * Pointer of shared context.
1102 *
1103 * @return
1104 * Zero on success, -1 otherwise.
1105 */
1106static int
1107mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
1108{
1109        int i;
1110        struct mlx5_devx_lag_context lag_ctx = { 0 };
1111        struct mlx5_devx_tis_attr tis_attr = { 0 };
1112
1113        tis_attr.transport_domain = sh->td->id;
1114        if (sh->bond.n_port) {
1115                if (!mlx5_devx_cmd_query_lag(sh->cdev->ctx, &lag_ctx)) {
1116                        sh->lag.tx_remap_affinity[0] =
1117                                lag_ctx.tx_remap_affinity_1;
1118                        sh->lag.tx_remap_affinity[1] =
1119                                lag_ctx.tx_remap_affinity_2;
1120                        sh->lag.affinity_mode = lag_ctx.port_select_mode;
1121                } else {
1122                        DRV_LOG(ERR, "Failed to query lag affinity.");
1123                        return -1;
1124                }
1125                if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
1126                        for (i = 0; i < sh->bond.n_port; i++) {
1127                                tis_attr.lag_tx_port_affinity =
1128                                        MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
1129                                                        sh->bond.n_port);
1130                                sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
1131                                                &tis_attr);
1132                                if (!sh->tis[i]) {
1133                                        DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
1134                                                " %s.", i, sh->bond.n_port,
1135                                                sh->ibdev_name);
1136                                        return -1;
1137                                }
1138                        }
1139                        DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
1140                                sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
1141                                lag_ctx.tx_remap_affinity_2);
1142                        return 0;
1143                }
1144                if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
1145                        DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
1146                                        sh->ibdev_name);
1147        }
1148        tis_attr.lag_tx_port_affinity = 0;
1149        sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
1150        if (!sh->tis[0]) {
1151                DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
1152                        " %s.", sh->ibdev_name);
1153                return -1;
1154        }
1155        return 0;
1156}
1157
1158/**
1159 * Verify and store value for share device argument.
1160 *
1161 * @param[in] key
1162 *   Key argument to verify.
1163 * @param[in] val
1164 *   Value associated with key.
1165 * @param opaque
1166 *   User data.
1167 *
1168 * @return
1169 *   0 on success, a negative errno value otherwise and rte_errno is set.
1170 */
1171static int
1172mlx5_dev_args_check_handler(const char *key, const char *val, void *opaque)
1173{
1174        struct mlx5_sh_config *config = opaque;
1175        signed long tmp;
1176
1177        errno = 0;
1178        tmp = strtol(val, NULL, 0);
1179        if (errno) {
1180                rte_errno = errno;
1181                DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
1182                return -rte_errno;
1183        }
1184        if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) {
1185                /* Negative values are acceptable for some keys only. */
1186                rte_errno = EINVAL;
1187                DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val);
1188                return -rte_errno;
1189        }
1190        if (strcmp(MLX5_TX_PP, key) == 0) {
1191                unsigned long mod = tmp >= 0 ? tmp : -tmp;
1192
1193                if (!mod) {
1194                        DRV_LOG(ERR, "Zero Tx packet pacing parameter.");
1195                        rte_errno = EINVAL;
1196                        return -rte_errno;
1197                }
1198                config->tx_pp = tmp;
1199        } else if (strcmp(MLX5_TX_SKEW, key) == 0) {
1200                config->tx_skew = tmp;
1201        } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
1202                config->l3_vxlan_en = !!tmp;
1203        } else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
1204                config->vf_nl_en = !!tmp;
1205        } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) {
1206                config->dv_esw_en = !!tmp;
1207        } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
1208                if (tmp > 2) {
1209                        DRV_LOG(ERR, "Invalid %s parameter.", key);
1210                        rte_errno = EINVAL;
1211                        return -rte_errno;
1212                }
1213                config->dv_flow_en = tmp;
1214        } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) {
1215                if (tmp != MLX5_XMETA_MODE_LEGACY &&
1216                    tmp != MLX5_XMETA_MODE_META16 &&
1217                    tmp != MLX5_XMETA_MODE_META32 &&
1218                    tmp != MLX5_XMETA_MODE_MISS_INFO) {
1219                        DRV_LOG(ERR, "Invalid extensive metadata parameter.");
1220                        rte_errno = EINVAL;
1221                        return -rte_errno;
1222                }
1223                if (tmp != MLX5_XMETA_MODE_MISS_INFO)
1224                        config->dv_xmeta_en = tmp;
1225                else
1226                        config->dv_miss_info = 1;
1227        } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) {
1228                config->lacp_by_user = !!tmp;
1229        } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) {
1230                if (tmp != MLX5_RCM_NONE &&
1231                    tmp != MLX5_RCM_LIGHT &&
1232                    tmp != MLX5_RCM_AGGR) {
1233                        DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val);
1234                        rte_errno = EINVAL;
1235                        return -rte_errno;
1236                }
1237                config->reclaim_mode = tmp;
1238        } else if (strcmp(MLX5_DECAP_EN, key) == 0) {
1239                config->decap_en = !!tmp;
1240        } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) {
1241                config->allow_duplicate_pattern = !!tmp;
1242        }
1243        return 0;
1244}
1245
1246/**
1247 * Parse user device parameters and adjust them according to device
1248 * capabilities.
1249 *
1250 * @param sh
1251 *   Pointer to shared device context.
1252 * @param mkvlist
1253 *   Pointer to mlx5 kvargs control, can be NULL if there is no devargs.
1254 * @param config
1255 *   Pointer to shared device configuration structure.
1256 *
1257 * @return
1258 *   0 on success, a negative errno value otherwise and rte_errno is set.
1259 */
1260static int
1261mlx5_shared_dev_ctx_args_config(struct mlx5_dev_ctx_shared *sh,
1262                                struct mlx5_kvargs_ctrl *mkvlist,
1263                                struct mlx5_sh_config *config)
1264{
1265        const char **params = (const char *[]){
1266                MLX5_TX_PP,
1267                MLX5_TX_SKEW,
1268                MLX5_L3_VXLAN_EN,
1269                MLX5_VF_NL_EN,
1270                MLX5_DV_ESW_EN,
1271                MLX5_DV_FLOW_EN,
1272                MLX5_DV_XMETA_EN,
1273                MLX5_LACP_BY_USER,
1274                MLX5_RECLAIM_MEM,
1275                MLX5_DECAP_EN,
1276                MLX5_ALLOW_DUPLICATE_PATTERN,
1277                NULL,
1278        };
1279        int ret = 0;
1280
1281        /* Default configuration. */
1282        memset(config, 0, sizeof(*config));
1283        config->vf_nl_en = 1;
1284        config->dv_esw_en = 1;
1285        config->dv_flow_en = 1;
1286        config->decap_en = 1;
1287        config->allow_duplicate_pattern = 1;
1288        if (mkvlist != NULL) {
1289                /* Process parameters. */
1290                ret = mlx5_kvargs_process(mkvlist, params,
1291                                          mlx5_dev_args_check_handler, config);
1292                if (ret) {
1293                        DRV_LOG(ERR, "Failed to process device arguments: %s",
1294                                strerror(rte_errno));
1295                        return -rte_errno;
1296                }
1297        }
1298        /* Adjust parameters according to device capabilities. */
1299        if (config->dv_flow_en && !sh->dev_cap.dv_flow_en) {
1300                DRV_LOG(WARNING, "DV flow is not supported.");
1301                config->dv_flow_en = 0;
1302        }
1303        if (config->dv_esw_en && !sh->dev_cap.dv_esw_en) {
1304                DRV_LOG(DEBUG, "E-Switch DV flow is not supported.");
1305                config->dv_esw_en = 0;
1306        }
1307        if (config->dv_esw_en && !config->dv_flow_en) {
1308                DRV_LOG(DEBUG,
1309                        "E-Switch DV flow is supported only when DV flow is enabled.");
1310                config->dv_esw_en = 0;
1311        }
1312        if (config->dv_miss_info && config->dv_esw_en)
1313                config->dv_xmeta_en = MLX5_XMETA_MODE_META16;
1314        if (!config->dv_esw_en &&
1315            config->dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {
1316                DRV_LOG(WARNING,
1317                        "Metadata mode %u is not supported (no E-Switch).",
1318                        config->dv_xmeta_en);
1319                config->dv_xmeta_en = MLX5_XMETA_MODE_LEGACY;
1320        }
1321        if (config->tx_pp && !sh->dev_cap.txpp_en) {
1322                DRV_LOG(ERR, "Packet pacing is not supported.");
1323                rte_errno = ENODEV;
1324                return -rte_errno;
1325        }
1326        if (!config->tx_pp && config->tx_skew) {
1327                DRV_LOG(WARNING,
1328                        "\"tx_skew\" doesn't affect without \"tx_pp\".");
1329        }
1330        /*
1331         * If HW has bug working with tunnel packet decapsulation and scatter
1332         * FCS, and decapsulation is needed, clear the hw_fcs_strip bit.
1333         * Then RTE_ETH_RX_OFFLOAD_KEEP_CRC bit will not be set anymore.
1334         */
1335        if (sh->dev_cap.scatter_fcs_w_decap_disable && sh->config.decap_en)
1336                config->hw_fcs_strip = 0;
1337        else
1338                config->hw_fcs_strip = sh->dev_cap.hw_fcs_strip;
1339        DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported",
1340                (config->hw_fcs_strip ? "" : "not "));
1341        DRV_LOG(DEBUG, "\"tx_pp\" is %d.", config->tx_pp);
1342        DRV_LOG(DEBUG, "\"tx_skew\" is %d.", config->tx_skew);
1343        DRV_LOG(DEBUG, "\"reclaim_mode\" is %u.", config->reclaim_mode);
1344        DRV_LOG(DEBUG, "\"dv_esw_en\" is %u.", config->dv_esw_en);
1345        DRV_LOG(DEBUG, "\"dv_flow_en\" is %u.", config->dv_flow_en);
1346        DRV_LOG(DEBUG, "\"dv_xmeta_en\" is %u.", config->dv_xmeta_en);
1347        DRV_LOG(DEBUG, "\"dv_miss_info\" is %u.", config->dv_miss_info);
1348        DRV_LOG(DEBUG, "\"l3_vxlan_en\" is %u.", config->l3_vxlan_en);
1349        DRV_LOG(DEBUG, "\"vf_nl_en\" is %u.", config->vf_nl_en);
1350        DRV_LOG(DEBUG, "\"lacp_by_user\" is %u.", config->lacp_by_user);
1351        DRV_LOG(DEBUG, "\"decap_en\" is %u.", config->decap_en);
1352        DRV_LOG(DEBUG, "\"allow_duplicate_pattern\" is %u.",
1353                config->allow_duplicate_pattern);
1354        return 0;
1355}
1356
1357/**
1358 * Configure realtime timestamp format.
1359 *
1360 * @param sh
1361 *   Pointer to mlx5_dev_ctx_shared object.
1362 * @param hca_attr
1363 *   Pointer to DevX HCA capabilities structure.
1364 */
1365void
1366mlx5_rt_timestamp_config(struct mlx5_dev_ctx_shared *sh,
1367                         struct mlx5_hca_attr *hca_attr)
1368{
1369        uint32_t dw_cnt = MLX5_ST_SZ_DW(register_mtutc);
1370        uint32_t reg[dw_cnt];
1371        int ret = ENOTSUP;
1372
1373        if (hca_attr->access_register_user)
1374                ret = mlx5_devx_cmd_register_read(sh->cdev->ctx,
1375                                                  MLX5_REGISTER_ID_MTUTC, 0,
1376                                                  reg, dw_cnt);
1377        if (!ret) {
1378                uint32_t ts_mode;
1379
1380                /* MTUTC register is read successfully. */
1381                ts_mode = MLX5_GET(register_mtutc, reg, time_stamp_mode);
1382                if (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME)
1383                        sh->dev_cap.rt_timestamp = 1;
1384        } else {
1385                /* Kernel does not support register reading. */
1386                if (hca_attr->dev_freq_khz == (NS_PER_S / MS_PER_S))
1387                        sh->dev_cap.rt_timestamp = 1;
1388        }
1389}
1390
1391/**
1392 * Allocate shared device context. If there is multiport device the
1393 * master and representors will share this context, if there is single
1394 * port dedicated device, the context will be used by only given
1395 * port due to unification.
1396 *
1397 * Routine first searches the context for the specified device name,
1398 * if found the shared context assumed and reference counter is incremented.
1399 * If no context found the new one is created and initialized with specified
1400 * device context and parameters.
1401 *
1402 * @param[in] spawn
1403 *   Pointer to the device attributes (name, port, etc).
1404 * @param mkvlist
1405 *   Pointer to mlx5 kvargs control, can be NULL if there is no devargs.
1406 *
1407 * @return
1408 *   Pointer to mlx5_dev_ctx_shared object on success,
1409 *   otherwise NULL and rte_errno is set.
1410 */
1411struct mlx5_dev_ctx_shared *
1412mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
1413                          struct mlx5_kvargs_ctrl *mkvlist)
1414{
1415        struct mlx5_dev_ctx_shared *sh;
1416        int err = 0;
1417        uint32_t i;
1418
1419        MLX5_ASSERT(spawn);
1420        /* Secondary process should not create the shared context. */
1421        MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
1422        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
1423        /* Search for IB context by device name. */
1424        LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) {
1425                if (!strcmp(sh->ibdev_name, spawn->phys_dev_name)) {
1426                        sh->refcnt++;
1427                        goto exit;
1428                }
1429        }
1430        /* No device found, we have to create new shared context. */
1431        MLX5_ASSERT(spawn->max_port);
1432        sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE,
1433                         sizeof(struct mlx5_dev_ctx_shared) +
1434                         spawn->max_port * sizeof(struct mlx5_dev_shared_port),
1435                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
1436        if (!sh) {
1437                DRV_LOG(ERR, "Shared context allocation failure.");
1438                rte_errno = ENOMEM;
1439                goto exit;
1440        }
1441        pthread_mutex_init(&sh->txpp.mutex, NULL);
1442        sh->numa_node = spawn->cdev->dev->numa_node;
1443        sh->cdev = spawn->cdev;
1444        sh->esw_mode = !!(spawn->info.master || spawn->info.representor);
1445        if (spawn->bond_info)
1446                sh->bond = *spawn->bond_info;
1447        err = mlx5_os_capabilities_prepare(sh);
1448        if (err) {
1449                DRV_LOG(ERR, "Fail to configure device capabilities.");
1450                goto error;
1451        }
1452        err = mlx5_shared_dev_ctx_args_config(sh, mkvlist, &sh->config);
1453        if (err) {
1454                DRV_LOG(ERR, "Failed to process device configure: %s",
1455                        strerror(rte_errno));
1456                goto error;
1457        }
1458        sh->refcnt = 1;
1459        sh->max_port = spawn->max_port;
1460        strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->cdev->ctx),
1461                sizeof(sh->ibdev_name) - 1);
1462        strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->cdev->ctx),
1463                sizeof(sh->ibdev_path) - 1);
1464        /*
1465         * Setting port_id to max unallowed value means there is no interrupt
1466         * subhandler installed for the given port index i.
1467         */
1468        for (i = 0; i < sh->max_port; i++) {
1469                sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
1470                sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
1471                sh->port[i].nl_ih_port_id = RTE_MAX_ETHPORTS;
1472        }
1473        if (sh->cdev->config.devx) {
1474                sh->td = mlx5_devx_cmd_create_td(sh->cdev->ctx);
1475                if (!sh->td) {
1476                        DRV_LOG(ERR, "TD allocation failure");
1477                        rte_errno = ENOMEM;
1478                        goto error;
1479                }
1480                if (mlx5_setup_tis(sh)) {
1481                        DRV_LOG(ERR, "TIS allocation failure");
1482                        rte_errno = ENOMEM;
1483                        goto error;
1484                }
1485                err = mlx5_rxtx_uars_prepare(sh);
1486                if (err)
1487                        goto error;
1488#ifndef RTE_ARCH_64
1489        } else {
1490                /* Initialize UAR access locks for 32bit implementations. */
1491                rte_spinlock_init(&sh->uar_lock_cq);
1492                for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
1493                        rte_spinlock_init(&sh->uar_lock[i]);
1494#endif
1495        }
1496        mlx5_os_dev_shared_handler_install(sh);
1497        if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
1498                err = mlx5_flow_os_init_workspace_once();
1499                if (err)
1500                        goto error;
1501        }
1502        mlx5_flow_aging_init(sh);
1503        mlx5_flow_counters_mng_init(sh);
1504        mlx5_flow_ipool_create(sh);
1505        /* Add context to the global device list. */
1506        LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
1507        rte_spinlock_init(&sh->geneve_tlv_opt_sl);
1508exit:
1509        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1510        return sh;
1511error:
1512        err = rte_errno;
1513        pthread_mutex_destroy(&sh->txpp.mutex);
1514        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1515        MLX5_ASSERT(sh);
1516        mlx5_rxtx_uars_release(sh);
1517        i = 0;
1518        do {
1519                if (sh->tis[i])
1520                        claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
1521        } while (++i < (uint32_t)sh->bond.n_port);
1522        if (sh->td)
1523                claim_zero(mlx5_devx_cmd_destroy(sh->td));
1524        mlx5_free(sh);
1525        rte_errno = err;
1526        return NULL;
1527}
1528
1529/**
1530 * Create LWM event_channel and interrupt handle for shared device
1531 * context. All rxqs sharing the device context share the event_channel.
1532 * A callback is registered in interrupt thread to receive the LWM event.
1533 *
1534 * @param[in] priv
1535 *   Pointer to mlx5_priv instance.
1536 *
1537 * @return
1538 *   0 on success, negative with rte_errno set.
1539 */
1540int
1541mlx5_lwm_setup(struct mlx5_priv *priv)
1542{
1543        int fd_lwm;
1544
1545        pthread_mutex_init(&priv->sh->lwm_config_lock, NULL);
1546        priv->sh->devx_channel_lwm = mlx5_os_devx_create_event_channel
1547                        (priv->sh->cdev->ctx,
1548                         MLX5DV_DEVX_CREATE_EVENT_CHANNEL_FLAGS_OMIT_EV_DATA);
1549        if (!priv->sh->devx_channel_lwm)
1550                goto err;
1551        fd_lwm = mlx5_os_get_devx_channel_fd(priv->sh->devx_channel_lwm);
1552        priv->sh->intr_handle_lwm = mlx5_os_interrupt_handler_create
1553                (RTE_INTR_INSTANCE_F_SHARED, true,
1554                 fd_lwm, mlx5_dev_interrupt_handler_lwm, priv);
1555        if (!priv->sh->intr_handle_lwm)
1556                goto err;
1557        return 0;
1558err:
1559        if (priv->sh->devx_channel_lwm) {
1560                mlx5_os_devx_destroy_event_channel
1561                        (priv->sh->devx_channel_lwm);
1562                priv->sh->devx_channel_lwm = NULL;
1563        }
1564        pthread_mutex_destroy(&priv->sh->lwm_config_lock);
1565        return -rte_errno;
1566}
1567
1568/**
1569 * Destroy LWM event_channel and interrupt handle for shared device
1570 * context before free this context. The interrupt handler is also
1571 * unregistered.
1572 *
1573 * @param[in] sh
1574 *   Pointer to shared device context.
1575 */
1576void
1577mlx5_lwm_unset(struct mlx5_dev_ctx_shared *sh)
1578{
1579        if (sh->intr_handle_lwm) {
1580                mlx5_os_interrupt_handler_destroy(sh->intr_handle_lwm,
1581                        mlx5_dev_interrupt_handler_lwm, (void *)-1);
1582                sh->intr_handle_lwm = NULL;
1583        }
1584        if (sh->devx_channel_lwm) {
1585                mlx5_os_devx_destroy_event_channel
1586                        (sh->devx_channel_lwm);
1587                sh->devx_channel_lwm = NULL;
1588        }
1589        pthread_mutex_destroy(&sh->lwm_config_lock);
1590}
1591
1592/**
1593 * Free shared IB device context. Decrement counter and if zero free
1594 * all allocated resources and close handles.
1595 *
1596 * @param[in] sh
1597 *   Pointer to mlx5_dev_ctx_shared object to free
1598 */
1599void
1600mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
1601{
1602        int ret;
1603        int i = 0;
1604
1605        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
1606#ifdef RTE_LIBRTE_MLX5_DEBUG
1607        /* Check the object presence in the list. */
1608        struct mlx5_dev_ctx_shared *lctx;
1609
1610        LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next)
1611                if (lctx == sh)
1612                        break;
1613        MLX5_ASSERT(lctx);
1614        if (lctx != sh) {
1615                DRV_LOG(ERR, "Freeing non-existing shared IB context");
1616                goto exit;
1617        }
1618#endif
1619        MLX5_ASSERT(sh);
1620        MLX5_ASSERT(sh->refcnt);
1621        /* Secondary process should not free the shared context. */
1622        MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
1623        if (--sh->refcnt)
1624                goto exit;
1625        /* Stop watching for mempool events and unregister all mempools. */
1626        if (!sh->cdev->config.mr_mempool_reg_en) {
1627                ret = rte_mempool_event_callback_unregister
1628                                (mlx5_dev_ctx_shared_rx_mempool_event_cb, sh);
1629                if (ret == 0)
1630                        rte_mempool_walk
1631                             (mlx5_dev_ctx_shared_rx_mempool_unregister_cb, sh);
1632        }
1633        /* Remove context from the global device list. */
1634        LIST_REMOVE(sh, next);
1635        /* Release resources on the last device removal. */
1636        if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
1637                mlx5_os_net_cleanup();
1638                mlx5_flow_os_release_workspace();
1639        }
1640        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1641        if (sh->flex_parsers_dv) {
1642                mlx5_list_destroy(sh->flex_parsers_dv);
1643                sh->flex_parsers_dv = NULL;
1644        }
1645        /*
1646         *  Ensure there is no async event handler installed.
1647         *  Only primary process handles async device events.
1648         **/
1649        mlx5_flow_counters_mng_close(sh);
1650        if (sh->ct_mng)
1651                mlx5_flow_aso_ct_mng_close(sh);
1652        if (sh->aso_age_mng) {
1653                mlx5_flow_aso_age_mng_close(sh);
1654                sh->aso_age_mng = NULL;
1655        }
1656        if (sh->mtrmng)
1657                mlx5_aso_flow_mtrs_mng_close(sh);
1658        mlx5_flow_ipool_destroy(sh);
1659        mlx5_os_dev_shared_handler_uninstall(sh);
1660        mlx5_rxtx_uars_release(sh);
1661        do {
1662                if (sh->tis[i])
1663                        claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
1664        } while (++i < sh->bond.n_port);
1665        if (sh->td)
1666                claim_zero(mlx5_devx_cmd_destroy(sh->td));
1667        MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
1668        pthread_mutex_destroy(&sh->txpp.mutex);
1669        mlx5_lwm_unset(sh);
1670        mlx5_free(sh);
1671        return;
1672exit:
1673        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1674}
1675
1676/**
1677 * Destroy table hash list.
1678 *
1679 * @param[in] priv
1680 *   Pointer to the private device data structure.
1681 */
1682void
1683mlx5_free_table_hash_list(struct mlx5_priv *priv)
1684{
1685        struct mlx5_dev_ctx_shared *sh = priv->sh;
1686        struct mlx5_hlist **tbls = (priv->sh->config.dv_flow_en == 2) ?
1687                                   &sh->groups : &sh->flow_tbls;
1688        if (*tbls == NULL)
1689                return;
1690        mlx5_hlist_destroy(*tbls);
1691        *tbls = NULL;
1692}
1693
1694#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
1695/**
1696 * Allocate HW steering group hash list.
1697 *
1698 * @param[in] priv
1699 *   Pointer to the private device data structure.
1700 */
1701static int
1702mlx5_alloc_hw_group_hash_list(struct mlx5_priv *priv)
1703{
1704        int err = 0;
1705        struct mlx5_dev_ctx_shared *sh = priv->sh;
1706        char s[MLX5_NAME_SIZE];
1707
1708        MLX5_ASSERT(sh);
1709        snprintf(s, sizeof(s), "%s_flow_groups", priv->sh->ibdev_name);
1710        sh->groups = mlx5_hlist_create
1711                        (s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE,
1712                         false, true, sh,
1713                         flow_hw_grp_create_cb,
1714                         flow_hw_grp_match_cb,
1715                         flow_hw_grp_remove_cb,
1716                         flow_hw_grp_clone_cb,
1717                         flow_hw_grp_clone_free_cb);
1718        if (!sh->groups) {
1719                DRV_LOG(ERR, "flow groups with hash creation failed.");
1720                err = ENOMEM;
1721        }
1722        return err;
1723}
1724#endif
1725
1726
1727/**
1728 * Initialize flow table hash list and create the root tables entry
1729 * for each domain.
1730 *
1731 * @param[in] priv
1732 *   Pointer to the private device data structure.
1733 *
1734 * @return
1735 *   Zero on success, positive error code otherwise.
1736 */
1737int
1738mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused)
1739{
1740        int err = 0;
1741
1742        /* Tables are only used in DV and DR modes. */
1743#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
1744        struct mlx5_dev_ctx_shared *sh = priv->sh;
1745        char s[MLX5_NAME_SIZE];
1746
1747        if (priv->sh->config.dv_flow_en == 2)
1748                return mlx5_alloc_hw_group_hash_list(priv);
1749        MLX5_ASSERT(sh);
1750        snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name);
1751        sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE,
1752                                          false, true, sh,
1753                                          flow_dv_tbl_create_cb,
1754                                          flow_dv_tbl_match_cb,
1755                                          flow_dv_tbl_remove_cb,
1756                                          flow_dv_tbl_clone_cb,
1757                                          flow_dv_tbl_clone_free_cb);
1758        if (!sh->flow_tbls) {
1759                DRV_LOG(ERR, "flow tables with hash creation failed.");
1760                err = ENOMEM;
1761                return err;
1762        }
1763#ifndef HAVE_MLX5DV_DR
1764        struct rte_flow_error error;
1765        struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id];
1766
1767        /*
1768         * In case we have not DR support, the zero tables should be created
1769         * because DV expect to see them even if they cannot be created by
1770         * RDMA-CORE.
1771         */
1772        if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0,
1773                NULL, 0, 1, 0, &error) ||
1774            !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0,
1775                NULL, 0, 1, 0, &error) ||
1776            !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0,
1777                NULL, 0, 1, 0, &error)) {
1778                err = ENOMEM;
1779                goto error;
1780        }
1781        return err;
1782error:
1783        mlx5_free_table_hash_list(priv);
1784#endif /* HAVE_MLX5DV_DR */
1785#endif
1786        return err;
1787}
1788
1789/**
1790 * Retrieve integer value from environment variable.
1791 *
1792 * @param[in] name
1793 *   Environment variable name.
1794 *
1795 * @return
1796 *   Integer value, 0 if the variable is not set.
1797 */
1798int
1799mlx5_getenv_int(const char *name)
1800{
1801        const char *val = getenv(name);
1802
1803        if (val == NULL)
1804                return 0;
1805        return atoi(val);
1806}
1807
1808/**
1809 * DPDK callback to add udp tunnel port
1810 *
1811 * @param[in] dev
1812 *   A pointer to eth_dev
1813 * @param[in] udp_tunnel
1814 *   A pointer to udp tunnel
1815 *
1816 * @return
1817 *   0 on valid udp ports and tunnels, -ENOTSUP otherwise.
1818 */
1819int
1820mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused,
1821                         struct rte_eth_udp_tunnel *udp_tunnel)
1822{
1823        MLX5_ASSERT(udp_tunnel != NULL);
1824        if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN &&
1825            udp_tunnel->udp_port == 4789)
1826                return 0;
1827        if (udp_tunnel->prot_type == RTE_ETH_TUNNEL_TYPE_VXLAN_GPE &&
1828            udp_tunnel->udp_port == 4790)
1829                return 0;
1830        return -ENOTSUP;
1831}
1832
1833/**
1834 * Initialize process private data structure.
1835 *
1836 * @param dev
1837 *   Pointer to Ethernet device structure.
1838 *
1839 * @return
1840 *   0 on success, a negative errno value otherwise and rte_errno is set.
1841 */
1842int
1843mlx5_proc_priv_init(struct rte_eth_dev *dev)
1844{
1845        struct mlx5_priv *priv = dev->data->dev_private;
1846        struct mlx5_proc_priv *ppriv;
1847        size_t ppriv_size;
1848
1849        mlx5_proc_priv_uninit(dev);
1850        /*
1851         * UAR register table follows the process private structure. BlueFlame
1852         * registers for Tx queues are stored in the table.
1853         */
1854        ppriv_size = sizeof(struct mlx5_proc_priv) +
1855                     priv->txqs_n * sizeof(struct mlx5_uar_data);
1856        ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
1857                            RTE_CACHE_LINE_SIZE, dev->device->numa_node);
1858        if (!ppriv) {
1859                rte_errno = ENOMEM;
1860                return -rte_errno;
1861        }
1862        ppriv->uar_table_sz = priv->txqs_n;
1863        dev->process_private = ppriv;
1864        if (rte_eal_process_type() == RTE_PROC_PRIMARY)
1865                priv->sh->pppriv = ppriv;
1866        return 0;
1867}
1868
1869/**
1870 * Un-initialize process private data structure.
1871 *
1872 * @param dev
1873 *   Pointer to Ethernet device structure.
1874 */
1875void
1876mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
1877{
1878        if (!dev->process_private)
1879                return;
1880        mlx5_free(dev->process_private);
1881        dev->process_private = NULL;
1882}
1883
1884/**
1885 * DPDK callback to close the device.
1886 *
1887 * Destroy all queues and objects, free memory.
1888 *
1889 * @param dev
1890 *   Pointer to Ethernet device structure.
1891 */
1892int
1893mlx5_dev_close(struct rte_eth_dev *dev)
1894{
1895        struct mlx5_priv *priv = dev->data->dev_private;
1896        unsigned int i;
1897        int ret;
1898
1899        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1900                /* Check if process_private released. */
1901                if (!dev->process_private)
1902                        return 0;
1903                mlx5_tx_uar_uninit_secondary(dev);
1904                mlx5_proc_priv_uninit(dev);
1905                rte_eth_dev_release_port(dev);
1906                return 0;
1907        }
1908        if (!priv->sh)
1909                return 0;
1910        DRV_LOG(DEBUG, "port %u closing device \"%s\"",
1911                dev->data->port_id,
1912                ((priv->sh->cdev->ctx != NULL) ?
1913                mlx5_os_get_ctx_device_name(priv->sh->cdev->ctx) : ""));
1914        /*
1915         * If default mreg copy action is removed at the stop stage,
1916         * the search will return none and nothing will be done anymore.
1917         */
1918        mlx5_flow_stop_default(dev);
1919        mlx5_traffic_disable(dev);
1920        /*
1921         * If all the flows are already flushed in the device stop stage,
1922         * then this will return directly without any action.
1923         */
1924        mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1925        mlx5_action_handle_flush(dev);
1926        mlx5_flow_meter_flush(dev, NULL);
1927        /* Prevent crashes when queues are still in use. */
1928        dev->rx_pkt_burst = rte_eth_pkt_burst_dummy;
1929        dev->tx_pkt_burst = rte_eth_pkt_burst_dummy;
1930        rte_wmb();
1931        /* Disable datapath on secondary process. */
1932        mlx5_mp_os_req_stop_rxtx(dev);
1933        /* Free the eCPRI flex parser resource. */
1934        mlx5_flex_parser_ecpri_release(dev);
1935        mlx5_flex_item_port_cleanup(dev);
1936#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
1937        flow_hw_resource_release(dev);
1938#endif
1939        if (priv->rxq_privs != NULL) {
1940                /* XXX race condition if mlx5_rx_burst() is still running. */
1941                rte_delay_us_sleep(1000);
1942                for (i = 0; (i != priv->rxqs_n); ++i)
1943                        mlx5_rxq_release(dev, i);
1944                priv->rxqs_n = 0;
1945                mlx5_free(priv->rxq_privs);
1946                priv->rxq_privs = NULL;
1947        }
1948        if (priv->txqs != NULL) {
1949                /* XXX race condition if mlx5_tx_burst() is still running. */
1950                rte_delay_us_sleep(1000);
1951                for (i = 0; (i != priv->txqs_n); ++i)
1952                        mlx5_txq_release(dev, i);
1953                priv->txqs_n = 0;
1954                priv->txqs = NULL;
1955        }
1956        mlx5_proc_priv_uninit(dev);
1957        if (priv->q_counters) {
1958                mlx5_devx_cmd_destroy(priv->q_counters);
1959                priv->q_counters = NULL;
1960        }
1961        if (priv->drop_queue.hrxq)
1962                mlx5_drop_action_destroy(dev);
1963        if (priv->mreg_cp_tbl)
1964                mlx5_hlist_destroy(priv->mreg_cp_tbl);
1965        mlx5_mprq_free_mp(dev);
1966        mlx5_os_free_shared_dr(priv);
1967        if (priv->rss_conf.rss_key != NULL)
1968                mlx5_free(priv->rss_conf.rss_key);
1969        if (priv->reta_idx != NULL)
1970                mlx5_free(priv->reta_idx);
1971        if (priv->sh->dev_cap.vf)
1972                mlx5_os_mac_addr_flush(dev);
1973        if (priv->nl_socket_route >= 0)
1974                close(priv->nl_socket_route);
1975        if (priv->nl_socket_rdma >= 0)
1976                close(priv->nl_socket_rdma);
1977        if (priv->vmwa_context)
1978                mlx5_vlan_vmwa_exit(priv->vmwa_context);
1979        ret = mlx5_hrxq_verify(dev);
1980        if (ret)
1981                DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
1982                        dev->data->port_id);
1983        ret = mlx5_ind_table_obj_verify(dev);
1984        if (ret)
1985                DRV_LOG(WARNING, "port %u some indirection table still remain",
1986                        dev->data->port_id);
1987        ret = mlx5_rxq_obj_verify(dev);
1988        if (ret)
1989                DRV_LOG(WARNING, "port %u some Rx queue objects still remain",
1990                        dev->data->port_id);
1991        ret = mlx5_ext_rxq_verify(dev);
1992        if (ret)
1993                DRV_LOG(WARNING, "Port %u some external RxQ still remain.",
1994                        dev->data->port_id);
1995        ret = mlx5_rxq_verify(dev);
1996        if (ret)
1997                DRV_LOG(WARNING, "port %u some Rx queues still remain",
1998                        dev->data->port_id);
1999        ret = mlx5_txq_obj_verify(dev);
2000        if (ret)
2001                DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain",
2002                        dev->data->port_id);
2003        ret = mlx5_txq_verify(dev);
2004        if (ret)
2005                DRV_LOG(WARNING, "port %u some Tx queues still remain",
2006                        dev->data->port_id);
2007        ret = mlx5_flow_verify(dev);
2008        if (ret)
2009                DRV_LOG(WARNING, "port %u some flows still remain",
2010                        dev->data->port_id);
2011        if (priv->hrxqs)
2012                mlx5_list_destroy(priv->hrxqs);
2013        mlx5_free(priv->ext_rxqs);
2014        /*
2015         * Free the shared context in last turn, because the cleanup
2016         * routines above may use some shared fields, like
2017         * mlx5_os_mac_addr_flush() uses ibdev_path for retrieving
2018         * ifindex if Netlink fails.
2019         */
2020        mlx5_free_shared_dev_ctx(priv->sh);
2021        if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
2022                unsigned int c = 0;
2023                uint16_t port_id;
2024
2025                MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
2026                        struct mlx5_priv *opriv =
2027                                rte_eth_devices[port_id].data->dev_private;
2028
2029                        if (!opriv ||
2030                            opriv->domain_id != priv->domain_id ||
2031                            &rte_eth_devices[port_id] == dev)
2032                                continue;
2033                        ++c;
2034                        break;
2035                }
2036                if (!c)
2037                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
2038        }
2039        memset(priv, 0, sizeof(*priv));
2040        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
2041        /*
2042         * Reset mac_addrs to NULL such that it is not freed as part of
2043         * rte_eth_dev_release_port(). mac_addrs is part of dev_private so
2044         * it is freed when dev_private is freed.
2045         */
2046        dev->data->mac_addrs = NULL;
2047        return 0;
2048}
2049
2050const struct eth_dev_ops mlx5_dev_ops = {
2051        .dev_configure = mlx5_dev_configure,
2052        .dev_start = mlx5_dev_start,
2053        .dev_stop = mlx5_dev_stop,
2054        .dev_set_link_down = mlx5_set_link_down,
2055        .dev_set_link_up = mlx5_set_link_up,
2056        .dev_close = mlx5_dev_close,
2057        .promiscuous_enable = mlx5_promiscuous_enable,
2058        .promiscuous_disable = mlx5_promiscuous_disable,
2059        .allmulticast_enable = mlx5_allmulticast_enable,
2060        .allmulticast_disable = mlx5_allmulticast_disable,
2061        .link_update = mlx5_link_update,
2062        .stats_get = mlx5_stats_get,
2063        .stats_reset = mlx5_stats_reset,
2064        .xstats_get = mlx5_xstats_get,
2065        .xstats_reset = mlx5_xstats_reset,
2066        .xstats_get_names = mlx5_xstats_get_names,
2067        .fw_version_get = mlx5_fw_version_get,
2068        .dev_infos_get = mlx5_dev_infos_get,
2069        .representor_info_get = mlx5_representor_info_get,
2070        .read_clock = mlx5_txpp_read_clock,
2071        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
2072        .vlan_filter_set = mlx5_vlan_filter_set,
2073        .rx_queue_setup = mlx5_rx_queue_setup,
2074        .rx_queue_avail_thresh_set = mlx5_rx_queue_lwm_set,
2075        .rx_queue_avail_thresh_query = mlx5_rx_queue_lwm_query,
2076        .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
2077        .tx_queue_setup = mlx5_tx_queue_setup,
2078        .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
2079        .rx_queue_release = mlx5_rx_queue_release,
2080        .tx_queue_release = mlx5_tx_queue_release,
2081        .rx_queue_start = mlx5_rx_queue_start,
2082        .rx_queue_stop = mlx5_rx_queue_stop,
2083        .tx_queue_start = mlx5_tx_queue_start,
2084        .tx_queue_stop = mlx5_tx_queue_stop,
2085        .flow_ctrl_get = mlx5_dev_get_flow_ctrl,
2086        .flow_ctrl_set = mlx5_dev_set_flow_ctrl,
2087        .mac_addr_remove = mlx5_mac_addr_remove,
2088        .mac_addr_add = mlx5_mac_addr_add,
2089        .mac_addr_set = mlx5_mac_addr_set,
2090        .set_mc_addr_list = mlx5_set_mc_addr_list,
2091        .mtu_set = mlx5_dev_set_mtu,
2092        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
2093        .vlan_offload_set = mlx5_vlan_offload_set,
2094        .reta_update = mlx5_dev_rss_reta_update,
2095        .reta_query = mlx5_dev_rss_reta_query,
2096        .rss_hash_update = mlx5_rss_hash_update,
2097        .rss_hash_conf_get = mlx5_rss_hash_conf_get,
2098        .flow_ops_get = mlx5_flow_ops_get,
2099        .rxq_info_get = mlx5_rxq_info_get,
2100        .txq_info_get = mlx5_txq_info_get,
2101        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
2102        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
2103        .rx_queue_intr_enable = mlx5_rx_intr_enable,
2104        .rx_queue_intr_disable = mlx5_rx_intr_disable,
2105        .is_removed = mlx5_is_removed,
2106        .udp_tunnel_port_add  = mlx5_udp_tunnel_port_add,
2107        .get_module_info = mlx5_get_module_info,
2108        .get_module_eeprom = mlx5_get_module_eeprom,
2109        .hairpin_cap_get = mlx5_hairpin_cap_get,
2110        .mtr_ops_get = mlx5_flow_meter_ops_get,
2111        .hairpin_bind = mlx5_hairpin_bind,
2112        .hairpin_unbind = mlx5_hairpin_unbind,
2113        .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports,
2114        .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
2115        .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
2116        .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
2117        .get_monitor_addr = mlx5_get_monitor_addr,
2118};
2119
2120/* Available operations from secondary process. */
2121const struct eth_dev_ops mlx5_dev_sec_ops = {
2122        .stats_get = mlx5_stats_get,
2123        .stats_reset = mlx5_stats_reset,
2124        .xstats_get = mlx5_xstats_get,
2125        .xstats_reset = mlx5_xstats_reset,
2126        .xstats_get_names = mlx5_xstats_get_names,
2127        .fw_version_get = mlx5_fw_version_get,
2128        .dev_infos_get = mlx5_dev_infos_get,
2129        .representor_info_get = mlx5_representor_info_get,
2130        .read_clock = mlx5_txpp_read_clock,
2131        .rx_queue_start = mlx5_rx_queue_start,
2132        .rx_queue_stop = mlx5_rx_queue_stop,
2133        .tx_queue_start = mlx5_tx_queue_start,
2134        .tx_queue_stop = mlx5_tx_queue_stop,
2135        .rxq_info_get = mlx5_rxq_info_get,
2136        .txq_info_get = mlx5_txq_info_get,
2137        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
2138        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
2139        .get_module_info = mlx5_get_module_info,
2140        .get_module_eeprom = mlx5_get_module_eeprom,
2141};
2142
2143/* Available operations in flow isolated mode. */
2144const struct eth_dev_ops mlx5_dev_ops_isolate = {
2145        .dev_configure = mlx5_dev_configure,
2146        .dev_start = mlx5_dev_start,
2147        .dev_stop = mlx5_dev_stop,
2148        .dev_set_link_down = mlx5_set_link_down,
2149        .dev_set_link_up = mlx5_set_link_up,
2150        .dev_close = mlx5_dev_close,
2151        .promiscuous_enable = mlx5_promiscuous_enable,
2152        .promiscuous_disable = mlx5_promiscuous_disable,
2153        .allmulticast_enable = mlx5_allmulticast_enable,
2154        .allmulticast_disable = mlx5_allmulticast_disable,
2155        .link_update = mlx5_link_update,
2156        .stats_get = mlx5_stats_get,
2157        .stats_reset = mlx5_stats_reset,
2158        .xstats_get = mlx5_xstats_get,
2159        .xstats_reset = mlx5_xstats_reset,
2160        .xstats_get_names = mlx5_xstats_get_names,
2161        .fw_version_get = mlx5_fw_version_get,
2162        .dev_infos_get = mlx5_dev_infos_get,
2163        .representor_info_get = mlx5_representor_info_get,
2164        .read_clock = mlx5_txpp_read_clock,
2165        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
2166        .vlan_filter_set = mlx5_vlan_filter_set,
2167        .rx_queue_setup = mlx5_rx_queue_setup,
2168        .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
2169        .tx_queue_setup = mlx5_tx_queue_setup,
2170        .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
2171        .rx_queue_release = mlx5_rx_queue_release,
2172        .tx_queue_release = mlx5_tx_queue_release,
2173        .rx_queue_start = mlx5_rx_queue_start,
2174        .rx_queue_stop = mlx5_rx_queue_stop,
2175        .tx_queue_start = mlx5_tx_queue_start,
2176        .tx_queue_stop = mlx5_tx_queue_stop,
2177        .flow_ctrl_get = mlx5_dev_get_flow_ctrl,
2178        .flow_ctrl_set = mlx5_dev_set_flow_ctrl,
2179        .mac_addr_remove = mlx5_mac_addr_remove,
2180        .mac_addr_add = mlx5_mac_addr_add,
2181        .mac_addr_set = mlx5_mac_addr_set,
2182        .set_mc_addr_list = mlx5_set_mc_addr_list,
2183        .mtu_set = mlx5_dev_set_mtu,
2184        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
2185        .vlan_offload_set = mlx5_vlan_offload_set,
2186        .flow_ops_get = mlx5_flow_ops_get,
2187        .rxq_info_get = mlx5_rxq_info_get,
2188        .txq_info_get = mlx5_txq_info_get,
2189        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
2190        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
2191        .rx_queue_intr_enable = mlx5_rx_intr_enable,
2192        .rx_queue_intr_disable = mlx5_rx_intr_disable,
2193        .is_removed = mlx5_is_removed,
2194        .get_module_info = mlx5_get_module_info,
2195        .get_module_eeprom = mlx5_get_module_eeprom,
2196        .hairpin_cap_get = mlx5_hairpin_cap_get,
2197        .mtr_ops_get = mlx5_flow_meter_ops_get,
2198        .hairpin_bind = mlx5_hairpin_bind,
2199        .hairpin_unbind = mlx5_hairpin_unbind,
2200        .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports,
2201        .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
2202        .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
2203        .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
2204        .get_monitor_addr = mlx5_get_monitor_addr,
2205};
2206
2207/**
2208 * Verify and store value for device argument.
2209 *
2210 * @param[in] key
2211 *   Key argument to verify.
2212 * @param[in] val
2213 *   Value associated with key.
2214 * @param opaque
2215 *   User data.
2216 *
2217 * @return
2218 *   0 on success, a negative errno value otherwise and rte_errno is set.
2219 */
2220static int
2221mlx5_port_args_check_handler(const char *key, const char *val, void *opaque)
2222{
2223        struct mlx5_port_config *config = opaque;
2224        signed long tmp;
2225
2226        /* No-op, port representors are processed in mlx5_dev_spawn(). */
2227        if (!strcmp(MLX5_REPRESENTOR, key))
2228                return 0;
2229        errno = 0;
2230        tmp = strtol(val, NULL, 0);
2231        if (errno) {
2232                rte_errno = errno;
2233                DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
2234                return -rte_errno;
2235        }
2236        if (tmp < 0) {
2237                /* Negative values are acceptable for some keys only. */
2238                rte_errno = EINVAL;
2239                DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val);
2240                return -rte_errno;
2241        }
2242        if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
2243                if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
2244                        DRV_LOG(ERR, "invalid CQE compression "
2245                                     "format parameter");
2246                        rte_errno = EINVAL;
2247                        return -rte_errno;
2248                }
2249                config->cqe_comp = !!tmp;
2250                config->cqe_comp_fmt = tmp;
2251        } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
2252                config->hw_padding = !!tmp;
2253        } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
2254                config->mprq.enabled = !!tmp;
2255        } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) {
2256                config->mprq.log_stride_num = tmp;
2257        } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) {
2258                config->mprq.log_stride_size = tmp;
2259        } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) {
2260                config->mprq.max_memcpy_len = tmp;
2261        } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
2262                config->mprq.min_rxqs_num = tmp;
2263        } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
2264                DRV_LOG(WARNING, "%s: deprecated parameter,"
2265                                 " converted to txq_inline_max", key);
2266                config->txq_inline_max = tmp;
2267        } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) {
2268                config->txq_inline_max = tmp;
2269        } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) {
2270                config->txq_inline_min = tmp;
2271        } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) {
2272                config->txq_inline_mpw = tmp;
2273        } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
2274                config->txqs_inline = tmp;
2275        } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
2276                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
2277        } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
2278                config->mps = !!tmp;
2279        } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
2280                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
2281        } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
2282                DRV_LOG(WARNING, "%s: deprecated parameter,"
2283                                 " converted to txq_inline_mpw", key);
2284                config->txq_inline_mpw = tmp;
2285        } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) {
2286                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
2287        } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
2288                config->rx_vec_en = !!tmp;
2289        } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) {
2290                config->max_dump_files_num = tmp;
2291        } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) {
2292                config->lro_timeout = tmp;
2293        } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) {
2294                config->log_hp_size = tmp;
2295        } else if (strcmp(MLX5_DELAY_DROP, key) == 0) {
2296                config->std_delay_drop = !!(tmp & MLX5_DELAY_DROP_STANDARD);
2297                config->hp_delay_drop = !!(tmp & MLX5_DELAY_DROP_HAIRPIN);
2298        }
2299        return 0;
2300}
2301
2302/**
2303 * Parse user port parameters and adjust them according to device capabilities.
2304 *
2305 * @param priv
2306 *   Pointer to shared device context.
2307 * @param mkvlist
2308 *   Pointer to mlx5 kvargs control, can be NULL if there is no devargs.
2309 * @param config
2310 *   Pointer to port configuration structure.
2311 *
2312 * @return
2313 *   0 on success, a negative errno value otherwise and rte_errno is set.
2314 */
2315int
2316mlx5_port_args_config(struct mlx5_priv *priv, struct mlx5_kvargs_ctrl *mkvlist,
2317                      struct mlx5_port_config *config)
2318{
2319        struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr;
2320        struct mlx5_dev_cap *dev_cap = &priv->sh->dev_cap;
2321        bool devx = priv->sh->cdev->config.devx;
2322        const char **params = (const char *[]){
2323                MLX5_RXQ_CQE_COMP_EN,
2324                MLX5_RXQ_PKT_PAD_EN,
2325                MLX5_RX_MPRQ_EN,
2326                MLX5_RX_MPRQ_LOG_STRIDE_NUM,
2327                MLX5_RX_MPRQ_LOG_STRIDE_SIZE,
2328                MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
2329                MLX5_RXQS_MIN_MPRQ,
2330                MLX5_TXQ_INLINE,
2331                MLX5_TXQ_INLINE_MIN,
2332                MLX5_TXQ_INLINE_MAX,
2333                MLX5_TXQ_INLINE_MPW,
2334                MLX5_TXQS_MIN_INLINE,
2335                MLX5_TXQS_MAX_VEC,
2336                MLX5_TXQ_MPW_EN,
2337                MLX5_TXQ_MPW_HDR_DSEG_EN,
2338                MLX5_TXQ_MAX_INLINE_LEN,
2339                MLX5_TX_VEC_EN,
2340                MLX5_RX_VEC_EN,
2341                MLX5_REPRESENTOR,
2342                MLX5_MAX_DUMP_FILES_NUM,
2343                MLX5_LRO_TIMEOUT_USEC,
2344                MLX5_HP_BUF_SIZE,
2345                MLX5_DELAY_DROP,
2346                NULL,
2347        };
2348        int ret = 0;
2349
2350        /* Default configuration. */
2351        memset(config, 0, sizeof(*config));
2352        config->mps = MLX5_ARG_UNSET;
2353        config->cqe_comp = 1;
2354        config->rx_vec_en = 1;
2355        config->txq_inline_max = MLX5_ARG_UNSET;
2356        config->txq_inline_min = MLX5_ARG_UNSET;
2357        config->txq_inline_mpw = MLX5_ARG_UNSET;
2358        config->txqs_inline = MLX5_ARG_UNSET;
2359        config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
2360        config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
2361        config->mprq.log_stride_num = MLX5_MPRQ_DEFAULT_LOG_STRIDE_NUM;
2362        config->log_hp_size = MLX5_ARG_UNSET;
2363        config->std_delay_drop = 0;
2364        config->hp_delay_drop = 0;
2365        if (mkvlist != NULL) {
2366                /* Process parameters. */
2367                ret = mlx5_kvargs_process(mkvlist, params,
2368                                          mlx5_port_args_check_handler, config);
2369                if (ret) {
2370                        DRV_LOG(ERR, "Failed to process port arguments: %s",
2371                                strerror(rte_errno));
2372                        return -rte_errno;
2373                }
2374        }
2375        /* Adjust parameters according to device capabilities. */
2376        if (config->hw_padding && !dev_cap->hw_padding) {
2377                DRV_LOG(DEBUG, "Rx end alignment padding isn't supported.");
2378                config->hw_padding = 0;
2379        } else if (config->hw_padding) {
2380                DRV_LOG(DEBUG, "Rx end alignment padding is enabled.");
2381        }
2382        /*
2383         * MPW is disabled by default, while the Enhanced MPW is enabled
2384         * by default.
2385         */
2386        if (config->mps == MLX5_ARG_UNSET)
2387                config->mps = (dev_cap->mps == MLX5_MPW_ENHANCED) ?
2388                              MLX5_MPW_ENHANCED : MLX5_MPW_DISABLED;
2389        else
2390                config->mps = config->mps ? dev_cap->mps : MLX5_MPW_DISABLED;
2391        DRV_LOG(INFO, "%sMPS is %s",
2392                config->mps == MLX5_MPW_ENHANCED ? "enhanced " :
2393                config->mps == MLX5_MPW ? "legacy " : "",
2394                config->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
2395        /* LRO is supported only when DV flow enabled. */
2396        if (dev_cap->lro_supported && !priv->sh->config.dv_flow_en)
2397                dev_cap->lro_supported = 0;
2398        if (dev_cap->lro_supported) {
2399                /*
2400                 * If LRO timeout is not configured by application,
2401                 * use the minimal supported value.
2402                 */
2403                if (!config->lro_timeout)
2404                        config->lro_timeout =
2405                                       hca_attr->lro_timer_supported_periods[0];
2406                DRV_LOG(DEBUG, "LRO session timeout set to %d usec.",
2407                        config->lro_timeout);
2408        }
2409        if (config->cqe_comp && !dev_cap->cqe_comp) {
2410                DRV_LOG(WARNING, "Rx CQE 128B compression is not supported.");
2411                config->cqe_comp = 0;
2412        }
2413        if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX &&
2414            (!devx || !hca_attr->mini_cqe_resp_flow_tag)) {
2415                DRV_LOG(WARNING,
2416                        "Flow Tag CQE compression format isn't supported.");
2417                config->cqe_comp = 0;
2418        }
2419        if (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX &&
2420            (!devx || !hca_attr->mini_cqe_resp_l3_l4_tag)) {
2421                DRV_LOG(WARNING,
2422                        "L3/L4 Header CQE compression format isn't supported.");
2423                config->cqe_comp = 0;
2424        }
2425        DRV_LOG(DEBUG, "Rx CQE compression is %ssupported.",
2426                config->cqe_comp ? "" : "not ");
2427        if ((config->std_delay_drop || config->hp_delay_drop) &&
2428            !dev_cap->rq_delay_drop_en) {
2429                config->std_delay_drop = 0;
2430                config->hp_delay_drop = 0;
2431                DRV_LOG(WARNING, "dev_port-%u: Rxq delay drop isn't supported.",
2432                        priv->dev_port);
2433        }
2434        if (config->mprq.enabled && !priv->sh->dev_cap.mprq.enabled) {
2435                DRV_LOG(WARNING, "Multi-Packet RQ isn't supported.");
2436                config->mprq.enabled = 0;
2437        }
2438        if (config->max_dump_files_num == 0)
2439                config->max_dump_files_num = 128;
2440        /* Detect minimal data bytes to inline. */
2441        mlx5_set_min_inline(priv);
2442        DRV_LOG(DEBUG, "VLAN insertion in WQE is %ssupported.",
2443                config->hw_vlan_insert ? "" : "not ");
2444        DRV_LOG(DEBUG, "\"rxq_pkt_pad_en\" is %u.", config->hw_padding);
2445        DRV_LOG(DEBUG, "\"rxq_cqe_comp_en\" is %u.", config->cqe_comp);
2446        DRV_LOG(DEBUG, "\"cqe_comp_fmt\" is %u.", config->cqe_comp_fmt);
2447        DRV_LOG(DEBUG, "\"rx_vec_en\" is %u.", config->rx_vec_en);
2448        DRV_LOG(DEBUG, "Standard \"delay_drop\" is %u.",
2449                config->std_delay_drop);
2450        DRV_LOG(DEBUG, "Hairpin \"delay_drop\" is %u.", config->hp_delay_drop);
2451        DRV_LOG(DEBUG, "\"max_dump_files_num\" is %u.",
2452                config->max_dump_files_num);
2453        DRV_LOG(DEBUG, "\"log_hp_size\" is %u.", config->log_hp_size);
2454        DRV_LOG(DEBUG, "\"mprq_en\" is %u.", config->mprq.enabled);
2455        DRV_LOG(DEBUG, "\"mprq_log_stride_num\" is %u.",
2456                config->mprq.log_stride_num);
2457        DRV_LOG(DEBUG, "\"mprq_log_stride_size\" is %u.",
2458                config->mprq.log_stride_size);
2459        DRV_LOG(DEBUG, "\"mprq_max_memcpy_len\" is %u.",
2460                config->mprq.max_memcpy_len);
2461        DRV_LOG(DEBUG, "\"rxqs_min_mprq\" is %u.", config->mprq.min_rxqs_num);
2462        DRV_LOG(DEBUG, "\"lro_timeout_usec\" is %u.", config->lro_timeout);
2463        DRV_LOG(DEBUG, "\"txq_mpw_en\" is %d.", config->mps);
2464        DRV_LOG(DEBUG, "\"txqs_min_inline\" is %d.", config->txqs_inline);
2465        DRV_LOG(DEBUG, "\"txq_inline_min\" is %d.", config->txq_inline_min);
2466        DRV_LOG(DEBUG, "\"txq_inline_max\" is %d.", config->txq_inline_max);
2467        DRV_LOG(DEBUG, "\"txq_inline_mpw\" is %d.", config->txq_inline_mpw);
2468        return 0;
2469}
2470
2471/**
2472 * Print the key for device argument.
2473 *
2474 * It is "dummy" handler whose whole purpose is to enable using
2475 * mlx5_kvargs_process() function which set devargs as used.
2476 *
2477 * @param key
2478 *   Key argument.
2479 * @param val
2480 *   Value associated with key, unused.
2481 * @param opaque
2482 *   Unused, can be NULL.
2483 *
2484 * @return
2485 *   0 on success, function cannot fail.
2486 */
2487static int
2488mlx5_dummy_handler(const char *key, const char *val, void *opaque)
2489{
2490        DRV_LOG(DEBUG, "\tKey: \"%s\" is set as used.", key);
2491        RTE_SET_USED(opaque);
2492        RTE_SET_USED(val);
2493        return 0;
2494}
2495
2496/**
2497 * Set requested devargs as used when device is already spawned.
2498 *
2499 * It is necessary since it is valid to ask probe again for existing device,
2500 * if its devargs don't assign as used, mlx5_kvargs_validate() will fail.
2501 *
2502 * @param name
2503 *   Name of the existing device.
2504 * @param port_id
2505 *   Port identifier of the device.
2506 * @param mkvlist
2507 *   Pointer to mlx5 kvargs control to sign as used.
2508 */
2509void
2510mlx5_port_args_set_used(const char *name, uint16_t port_id,
2511                        struct mlx5_kvargs_ctrl *mkvlist)
2512{
2513        const char **params = (const char *[]){
2514                MLX5_RXQ_CQE_COMP_EN,
2515                MLX5_RXQ_PKT_PAD_EN,
2516                MLX5_RX_MPRQ_EN,
2517                MLX5_RX_MPRQ_LOG_STRIDE_NUM,
2518                MLX5_RX_MPRQ_LOG_STRIDE_SIZE,
2519                MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
2520                MLX5_RXQS_MIN_MPRQ,
2521                MLX5_TXQ_INLINE,
2522                MLX5_TXQ_INLINE_MIN,
2523                MLX5_TXQ_INLINE_MAX,
2524                MLX5_TXQ_INLINE_MPW,
2525                MLX5_TXQS_MIN_INLINE,
2526                MLX5_TXQS_MAX_VEC,
2527                MLX5_TXQ_MPW_EN,
2528                MLX5_TXQ_MPW_HDR_DSEG_EN,
2529                MLX5_TXQ_MAX_INLINE_LEN,
2530                MLX5_TX_VEC_EN,
2531                MLX5_RX_VEC_EN,
2532                MLX5_REPRESENTOR,
2533                MLX5_MAX_DUMP_FILES_NUM,
2534                MLX5_LRO_TIMEOUT_USEC,
2535                MLX5_HP_BUF_SIZE,
2536                MLX5_DELAY_DROP,
2537                NULL,
2538        };
2539
2540        /* Secondary process should not handle devargs. */
2541        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2542                return;
2543        MLX5_ASSERT(mkvlist != NULL);
2544        DRV_LOG(DEBUG, "Ethernet device \"%s\" for port %u "
2545                "already exists, set devargs as used:", name, port_id);
2546        /* This function cannot fail with this handler. */
2547        mlx5_kvargs_process(mkvlist, params, mlx5_dummy_handler, NULL);
2548}
2549
2550/**
2551 * Check sibling device configurations when probing again.
2552 *
2553 * Sibling devices sharing infiniband device context should have compatible
2554 * configurations. This regards representors and bonding device.
2555 *
2556 * @param cdev
2557 *   Pointer to mlx5 device structure.
2558 * @param mkvlist
2559 *   Pointer to mlx5 kvargs control, can be NULL if there is no devargs.
2560 *
2561 * @return
2562 *   0 on success, a negative errno value otherwise and rte_errno is set.
2563 */
2564int
2565mlx5_probe_again_args_validate(struct mlx5_common_device *cdev,
2566                               struct mlx5_kvargs_ctrl *mkvlist)
2567{
2568        struct mlx5_dev_ctx_shared *sh = NULL;
2569        struct mlx5_sh_config *config;
2570        int ret;
2571
2572        /* Secondary process should not handle devargs. */
2573        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
2574                return 0;
2575        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
2576        /* Search for IB context by common device pointer. */
2577        LIST_FOREACH(sh, &mlx5_dev_ctx_list, next)
2578                if (sh->cdev == cdev)
2579                        break;
2580        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
2581        /* There is sh for this device -> it isn't probe again. */
2582        if (sh == NULL)
2583                return 0;
2584        config = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE,
2585                             sizeof(struct mlx5_sh_config),
2586                             RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
2587        if (config == NULL) {
2588                rte_errno = -ENOMEM;
2589                return -rte_errno;
2590        }
2591        /*
2592         * Creates a temporary IB context configure structure according to new
2593         * devargs attached in probing again.
2594         */
2595        ret = mlx5_shared_dev_ctx_args_config(sh, mkvlist, config);
2596        if (ret) {
2597                DRV_LOG(ERR, "Failed to process device configure: %s",
2598                        strerror(rte_errno));
2599                mlx5_free(config);
2600                return ret;
2601        }
2602        /*
2603         * Checks the match between the temporary structure and the existing
2604         * IB context structure.
2605         */
2606        if (sh->config.dv_flow_en ^ config->dv_flow_en) {
2607                DRV_LOG(ERR, "\"dv_flow_en\" "
2608                        "configuration mismatch for shared %s context.",
2609                        sh->ibdev_name);
2610                goto error;
2611        }
2612        if ((sh->config.dv_xmeta_en ^ config->dv_xmeta_en) ||
2613            (sh->config.dv_miss_info ^ config->dv_miss_info)) {
2614                DRV_LOG(ERR, "\"dv_xmeta_en\" "
2615                        "configuration mismatch for shared %s context.",
2616                        sh->ibdev_name);
2617                goto error;
2618        }
2619        if (sh->config.dv_esw_en ^ config->dv_esw_en) {
2620                DRV_LOG(ERR, "\"dv_esw_en\" "
2621                        "configuration mismatch for shared %s context.",
2622                        sh->ibdev_name);
2623                goto error;
2624        }
2625        if (sh->config.reclaim_mode ^ config->reclaim_mode) {
2626                DRV_LOG(ERR, "\"reclaim_mode\" "
2627                        "configuration mismatch for shared %s context.",
2628                        sh->ibdev_name);
2629                goto error;
2630        }
2631        if (sh->config.allow_duplicate_pattern ^
2632            config->allow_duplicate_pattern) {
2633                DRV_LOG(ERR, "\"allow_duplicate_pattern\" "
2634                        "configuration mismatch for shared %s context.",
2635                        sh->ibdev_name);
2636                goto error;
2637        }
2638        if (sh->config.l3_vxlan_en ^ config->l3_vxlan_en) {
2639                DRV_LOG(ERR, "\"l3_vxlan_en\" "
2640                        "configuration mismatch for shared %s context.",
2641                        sh->ibdev_name);
2642                goto error;
2643        }
2644        if (sh->config.decap_en ^ config->decap_en) {
2645                DRV_LOG(ERR, "\"decap_en\" "
2646                        "configuration mismatch for shared %s context.",
2647                        sh->ibdev_name);
2648                goto error;
2649        }
2650        if (sh->config.lacp_by_user ^ config->lacp_by_user) {
2651                DRV_LOG(ERR, "\"lacp_by_user\" "
2652                        "configuration mismatch for shared %s context.",
2653                        sh->ibdev_name);
2654                goto error;
2655        }
2656        if (sh->config.tx_pp ^ config->tx_pp) {
2657                DRV_LOG(ERR, "\"tx_pp\" "
2658                        "configuration mismatch for shared %s context.",
2659                        sh->ibdev_name);
2660                goto error;
2661        }
2662        if (sh->config.tx_skew ^ config->tx_skew) {
2663                DRV_LOG(ERR, "\"tx_skew\" "
2664                        "configuration mismatch for shared %s context.",
2665                        sh->ibdev_name);
2666                goto error;
2667        }
2668        mlx5_free(config);
2669        return 0;
2670error:
2671        mlx5_free(config);
2672        rte_errno = EINVAL;
2673        return -rte_errno;
2674}
2675
2676/**
2677 * Configures the minimal amount of data to inline into WQE
2678 * while sending packets.
2679 *
2680 * - the txq_inline_min has the maximal priority, if this
2681 *   key is specified in devargs
2682 * - if DevX is enabled the inline mode is queried from the
2683 *   device (HCA attributes and NIC vport context if needed).
2684 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx
2685 *   and none (0 bytes) for other NICs
2686 *
2687 * @param priv
2688 *   Pointer to the private device data structure.
2689 */
2690void
2691mlx5_set_min_inline(struct mlx5_priv *priv)
2692{
2693        struct mlx5_hca_attr *hca_attr = &priv->sh->cdev->config.hca_attr;
2694        struct mlx5_port_config *config = &priv->config;
2695
2696        if (config->txq_inline_min != MLX5_ARG_UNSET) {
2697                /* Application defines size of inlined data explicitly. */
2698                if (priv->pci_dev != NULL) {
2699                        switch (priv->pci_dev->id.device_id) {
2700                        case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
2701                        case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
2702                                if (config->txq_inline_min <
2703                                               (int)MLX5_INLINE_HSIZE_L2) {
2704                                        DRV_LOG(DEBUG,
2705                                                "txq_inline_mix aligned to minimal ConnectX-4 required value %d",
2706                                                (int)MLX5_INLINE_HSIZE_L2);
2707                                        config->txq_inline_min =
2708                                                        MLX5_INLINE_HSIZE_L2;
2709                                }
2710                                break;
2711                        }
2712                }
2713                goto exit;
2714        }
2715        if (hca_attr->eth_net_offloads) {
2716                /* We have DevX enabled, inline mode queried successfully. */
2717                switch (hca_attr->wqe_inline_mode) {
2718                case MLX5_CAP_INLINE_MODE_L2:
2719                        /* outer L2 header must be inlined. */
2720                        config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
2721                        goto exit;
2722                case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2723                        /* No inline data are required by NIC. */
2724                        config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2725                        config->hw_vlan_insert =
2726                                hca_attr->wqe_vlan_insert;
2727                        DRV_LOG(DEBUG, "Tx VLAN insertion is supported");
2728                        goto exit;
2729                case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2730                        /* inline mode is defined by NIC vport context. */
2731                        if (!hca_attr->eth_virt)
2732                                break;
2733                        switch (hca_attr->vport_inline_mode) {
2734                        case MLX5_INLINE_MODE_NONE:
2735                                config->txq_inline_min =
2736                                        MLX5_INLINE_HSIZE_NONE;
2737                                goto exit;
2738                        case MLX5_INLINE_MODE_L2:
2739                                config->txq_inline_min =
2740                                        MLX5_INLINE_HSIZE_L2;
2741                                goto exit;
2742                        case MLX5_INLINE_MODE_IP:
2743                                config->txq_inline_min =
2744                                        MLX5_INLINE_HSIZE_L3;
2745                                goto exit;
2746                        case MLX5_INLINE_MODE_TCP_UDP:
2747                                config->txq_inline_min =
2748                                        MLX5_INLINE_HSIZE_L4;
2749                                goto exit;
2750                        case MLX5_INLINE_MODE_INNER_L2:
2751                                config->txq_inline_min =
2752                                        MLX5_INLINE_HSIZE_INNER_L2;
2753                                goto exit;
2754                        case MLX5_INLINE_MODE_INNER_IP:
2755                                config->txq_inline_min =
2756                                        MLX5_INLINE_HSIZE_INNER_L3;
2757                                goto exit;
2758                        case MLX5_INLINE_MODE_INNER_TCP_UDP:
2759                                config->txq_inline_min =
2760                                        MLX5_INLINE_HSIZE_INNER_L4;
2761                                goto exit;
2762                        }
2763                }
2764        }
2765        if (priv->pci_dev == NULL) {
2766                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2767                goto exit;
2768        }
2769        /*
2770         * We get here if we are unable to deduce
2771         * inline data size with DevX. Try PCI ID
2772         * to determine old NICs.
2773         */
2774        switch (priv->pci_dev->id.device_id) {
2775        case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
2776        case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
2777        case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
2778        case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
2779                config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
2780                config->hw_vlan_insert = 0;
2781                break;
2782        case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
2783        case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
2784        case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
2785        case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
2786                /*
2787                 * These NICs support VLAN insertion from WQE and
2788                 * report the wqe_vlan_insert flag. But there is the bug
2789                 * and PFC control may be broken, so disable feature.
2790                 */
2791                config->hw_vlan_insert = 0;
2792                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2793                break;
2794        default:
2795                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2796                break;
2797        }
2798exit:
2799        DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min);
2800}
2801
2802/**
2803 * Configures the metadata mask fields in the shared context.
2804 *
2805 * @param [in] dev
2806 *   Pointer to Ethernet device.
2807 */
2808void
2809mlx5_set_metadata_mask(struct rte_eth_dev *dev)
2810{
2811        struct mlx5_priv *priv = dev->data->dev_private;
2812        struct mlx5_dev_ctx_shared *sh = priv->sh;
2813        uint32_t meta, mark, reg_c0;
2814
2815        reg_c0 = ~priv->vport_meta_mask;
2816        switch (sh->config.dv_xmeta_en) {
2817        case MLX5_XMETA_MODE_LEGACY:
2818                meta = UINT32_MAX;
2819                mark = MLX5_FLOW_MARK_MASK;
2820                break;
2821        case MLX5_XMETA_MODE_META16:
2822                meta = reg_c0 >> rte_bsf32(reg_c0);
2823                mark = MLX5_FLOW_MARK_MASK;
2824                break;
2825        case MLX5_XMETA_MODE_META32:
2826                meta = UINT32_MAX;
2827                mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK;
2828                break;
2829        default:
2830                meta = 0;
2831                mark = 0;
2832                MLX5_ASSERT(false);
2833                break;
2834        }
2835        if (sh->dv_mark_mask && sh->dv_mark_mask != mark)
2836                DRV_LOG(WARNING, "metadata MARK mask mismatch %08X:%08X",
2837                                 sh->dv_mark_mask, mark);
2838        else
2839                sh->dv_mark_mask = mark;
2840        if (sh->dv_meta_mask && sh->dv_meta_mask != meta)
2841                DRV_LOG(WARNING, "metadata META mask mismatch %08X:%08X",
2842                                 sh->dv_meta_mask, meta);
2843        else
2844                sh->dv_meta_mask = meta;
2845        if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0)
2846                DRV_LOG(WARNING, "metadata reg_c0 mask mismatch %08X:%08X",
2847                                 sh->dv_meta_mask, reg_c0);
2848        else
2849                sh->dv_regc0_mask = reg_c0;
2850        DRV_LOG(DEBUG, "metadata mode %u", sh->config.dv_xmeta_en);
2851        DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask);
2852        DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask);
2853        DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask);
2854}
2855
2856int
2857rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n)
2858{
2859        static const char *const dynf_names[] = {
2860                RTE_PMD_MLX5_FINE_GRANULARITY_INLINE,
2861                RTE_MBUF_DYNFLAG_METADATA_NAME,
2862                RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME
2863        };
2864        unsigned int i;
2865
2866        if (n < RTE_DIM(dynf_names))
2867                return -ENOMEM;
2868        for (i = 0; i < RTE_DIM(dynf_names); i++) {
2869                if (names[i] == NULL)
2870                        return -EINVAL;
2871                strcpy(names[i], dynf_names[i]);
2872        }
2873        return RTE_DIM(dynf_names);
2874}
2875
2876/**
2877 * Look for the ethernet device belonging to mlx5 driver.
2878 *
2879 * @param[in] port_id
2880 *   port_id to start looking for device.
2881 * @param[in] odev
2882 *   Pointer to the hint device. When device is being probed
2883 *   the its siblings (master and preceding representors might
2884 *   not have assigned driver yet (because the mlx5_os_pci_probe()
2885 *   is not completed yet, for this case match on hint
2886 *   device may be used to detect sibling device.
2887 *
2888 * @return
2889 *   port_id of found device, RTE_MAX_ETHPORT if not found.
2890 */
2891uint16_t
2892mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev)
2893{
2894        while (port_id < RTE_MAX_ETHPORTS) {
2895                struct rte_eth_dev *dev = &rte_eth_devices[port_id];
2896
2897                if (dev->state != RTE_ETH_DEV_UNUSED &&
2898                    dev->device &&
2899                    (dev->device == odev ||
2900                     (dev->device->driver &&
2901                     dev->device->driver->name &&
2902                     ((strcmp(dev->device->driver->name,
2903                              MLX5_PCI_DRIVER_NAME) == 0) ||
2904                      (strcmp(dev->device->driver->name,
2905                              MLX5_AUXILIARY_DRIVER_NAME) == 0)))))
2906                        break;
2907                port_id++;
2908        }
2909        if (port_id >= RTE_MAX_ETHPORTS)
2910                return RTE_MAX_ETHPORTS;
2911        return port_id;
2912}
2913
2914/**
2915 * Callback to remove a device.
2916 *
2917 * This function removes all Ethernet devices belong to a given device.
2918 *
2919 * @param[in] cdev
2920 *   Pointer to the generic device.
2921 *
2922 * @return
2923 *   0 on success, the function cannot fail.
2924 */
2925int
2926mlx5_net_remove(struct mlx5_common_device *cdev)
2927{
2928        uint16_t port_id;
2929        int ret = 0;
2930
2931        RTE_ETH_FOREACH_DEV_OF(port_id, cdev->dev) {
2932                /*
2933                 * mlx5_dev_close() is not registered to secondary process,
2934                 * call the close function explicitly for secondary process.
2935                 */
2936                if (rte_eal_process_type() == RTE_PROC_SECONDARY)
2937                        ret |= mlx5_dev_close(&rte_eth_devices[port_id]);
2938                else
2939                        ret |= rte_eth_dev_close(port_id);
2940        }
2941        return ret == 0 ? 0 : -EIO;
2942}
2943
2944static const struct rte_pci_id mlx5_pci_id_map[] = {
2945        {
2946                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2947                               PCI_DEVICE_ID_MELLANOX_CONNECTX4)
2948        },
2949        {
2950                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2951                               PCI_DEVICE_ID_MELLANOX_CONNECTX4VF)
2952        },
2953        {
2954                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2955                               PCI_DEVICE_ID_MELLANOX_CONNECTX4LX)
2956        },
2957        {
2958                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2959                               PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
2960        },
2961        {
2962                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2963                               PCI_DEVICE_ID_MELLANOX_CONNECTX5)
2964        },
2965        {
2966                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2967                               PCI_DEVICE_ID_MELLANOX_CONNECTX5VF)
2968        },
2969        {
2970                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2971                               PCI_DEVICE_ID_MELLANOX_CONNECTX5EX)
2972        },
2973        {
2974                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2975                               PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)
2976        },
2977        {
2978                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2979                               PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
2980        },
2981        {
2982                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2983                               PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
2984        },
2985        {
2986                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2987                                PCI_DEVICE_ID_MELLANOX_CONNECTX6)
2988        },
2989        {
2990                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2991                                PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
2992        },
2993        {
2994                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2995                                PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
2996        },
2997        {
2998                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2999                                PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
3000        },
3001        {
3002                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
3003                                PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
3004        },
3005        {
3006                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
3007                                PCI_DEVICE_ID_MELLANOX_CONNECTX6LX)
3008        },
3009        {
3010                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
3011                                PCI_DEVICE_ID_MELLANOX_CONNECTX7)
3012        },
3013        {
3014                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
3015                                PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
3016        },
3017        {
3018                .vendor_id = 0
3019        }
3020};
3021
3022static struct mlx5_class_driver mlx5_net_driver = {
3023        .drv_class = MLX5_CLASS_ETH,
3024        .name = RTE_STR(MLX5_ETH_DRIVER_NAME),
3025        .id_table = mlx5_pci_id_map,
3026        .probe = mlx5_os_net_probe,
3027        .remove = mlx5_net_remove,
3028        .probe_again = 1,
3029        .intr_lsc = 1,
3030        .intr_rmv = 1,
3031};
3032
3033/* Initialize driver log type. */
3034RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE)
3035
3036/**
3037 * Driver initialization routine.
3038 */
3039RTE_INIT(rte_mlx5_pmd_init)
3040{
3041        pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL);
3042        mlx5_common_init();
3043        /* Build the static tables for Verbs conversion. */
3044        mlx5_set_ptype_table();
3045        mlx5_set_cksum_table();
3046        mlx5_set_swp_types_table();
3047        if (mlx5_glue)
3048                mlx5_class_driver_register(&mlx5_net_driver);
3049}
3050
3051RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__);
3052RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map);
3053RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");
3054