dpdk/drivers/net/mlx5/mlx5.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2015 6WIND S.A.
   3 * Copyright 2015 Mellanox Technologies, Ltd
   4 */
   5
   6#include <stddef.h>
   7#include <unistd.h>
   8#include <string.h>
   9#include <stdint.h>
  10#include <stdlib.h>
  11#include <errno.h>
  12
  13#include <rte_malloc.h>
  14#include <ethdev_driver.h>
  15#include <rte_pci.h>
  16#include <rte_bus_pci.h>
  17#include <rte_common.h>
  18#include <rte_kvargs.h>
  19#include <rte_rwlock.h>
  20#include <rte_spinlock.h>
  21#include <rte_string_fns.h>
  22#include <rte_alarm.h>
  23#include <rte_cycles.h>
  24
  25#include <mlx5_glue.h>
  26#include <mlx5_devx_cmds.h>
  27#include <mlx5_common.h>
  28#include <mlx5_common_os.h>
  29#include <mlx5_common_mp.h>
  30#include <mlx5_malloc.h>
  31
  32#include "mlx5_defs.h"
  33#include "mlx5.h"
  34#include "mlx5_utils.h"
  35#include "mlx5_rxtx.h"
  36#include "mlx5_rx.h"
  37#include "mlx5_tx.h"
  38#include "mlx5_autoconf.h"
  39#include "mlx5_mr.h"
  40#include "mlx5_flow.h"
  41#include "mlx5_flow_os.h"
  42#include "rte_pmd_mlx5.h"
  43
  44#define MLX5_ETH_DRIVER_NAME mlx5_eth
  45
  46/* Device parameter to enable RX completion queue compression. */
  47#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
  48
  49/* Device parameter to enable padding Rx packet to cacheline size. */
  50#define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en"
  51
  52/* Device parameter to enable Multi-Packet Rx queue. */
  53#define MLX5_RX_MPRQ_EN "mprq_en"
  54
  55/* Device parameter to configure log 2 of the number of strides for MPRQ. */
  56#define MLX5_RX_MPRQ_LOG_STRIDE_NUM "mprq_log_stride_num"
  57
  58/* Device parameter to configure log 2 of the stride size for MPRQ. */
  59#define MLX5_RX_MPRQ_LOG_STRIDE_SIZE "mprq_log_stride_size"
  60
  61/* Device parameter to limit the size of memcpy'd packet for MPRQ. */
  62#define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len"
  63
  64/* Device parameter to set the minimum number of Rx queues to enable MPRQ. */
  65#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
  66
  67/* Device parameter to configure inline send. Deprecated, ignored.*/
  68#define MLX5_TXQ_INLINE "txq_inline"
  69
  70/* Device parameter to limit packet size to inline with ordinary SEND. */
  71#define MLX5_TXQ_INLINE_MAX "txq_inline_max"
  72
  73/* Device parameter to configure minimal data size to inline. */
  74#define MLX5_TXQ_INLINE_MIN "txq_inline_min"
  75
  76/* Device parameter to limit packet size to inline with Enhanced MPW. */
  77#define MLX5_TXQ_INLINE_MPW "txq_inline_mpw"
  78
  79/*
  80 * Device parameter to configure the number of TX queues threshold for
  81 * enabling inline send.
  82 */
  83#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
  84
  85/*
  86 * Device parameter to configure the number of TX queues threshold for
  87 * enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines).
  88 */
  89#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
  90
  91/* Device parameter to enable multi-packet send WQEs. */
  92#define MLX5_TXQ_MPW_EN "txq_mpw_en"
  93
  94/*
  95 * Device parameter to force doorbell register mapping
  96 * to non-cahed region eliminating the extra write memory barrier.
  97 */
  98#define MLX5_TX_DB_NC "tx_db_nc"
  99
 100/*
 101 * Device parameter to include 2 dsegs in the title WQEBB.
 102 * Deprecated, ignored.
 103 */
 104#define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en"
 105
 106/*
 107 * Device parameter to limit the size of inlining packet.
 108 * Deprecated, ignored.
 109 */
 110#define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len"
 111
 112/*
 113 * Device parameter to enable Tx scheduling on timestamps
 114 * and specify the packet pacing granularity in nanoseconds.
 115 */
 116#define MLX5_TX_PP "tx_pp"
 117
 118/*
 119 * Device parameter to specify skew in nanoseconds on Tx datapath,
 120 * it represents the time between SQ start WQE processing and
 121 * appearing actual packet data on the wire.
 122 */
 123#define MLX5_TX_SKEW "tx_skew"
 124
 125/*
 126 * Device parameter to enable hardware Tx vector.
 127 * Deprecated, ignored (no vectorized Tx routines anymore).
 128 */
 129#define MLX5_TX_VEC_EN "tx_vec_en"
 130
 131/* Device parameter to enable hardware Rx vector. */
 132#define MLX5_RX_VEC_EN "rx_vec_en"
 133
 134/* Allow L3 VXLAN flow creation. */
 135#define MLX5_L3_VXLAN_EN "l3_vxlan_en"
 136
 137/* Activate DV E-Switch flow steering. */
 138#define MLX5_DV_ESW_EN "dv_esw_en"
 139
 140/* Activate DV flow steering. */
 141#define MLX5_DV_FLOW_EN "dv_flow_en"
 142
 143/* Enable extensive flow metadata support. */
 144#define MLX5_DV_XMETA_EN "dv_xmeta_en"
 145
 146/* Device parameter to let the user manage the lacp traffic of bonded device */
 147#define MLX5_LACP_BY_USER "lacp_by_user"
 148
 149/* Activate Netlink support in VF mode. */
 150#define MLX5_VF_NL_EN "vf_nl_en"
 151
 152/* Enable extending memsegs when creating a MR. */
 153#define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en"
 154
 155/* Select port representors to instantiate. */
 156#define MLX5_REPRESENTOR "representor"
 157
 158/* Device parameter to configure the maximum number of dump files per queue. */
 159#define MLX5_MAX_DUMP_FILES_NUM "max_dump_files_num"
 160
 161/* Configure timeout of LRO session (in microseconds). */
 162#define MLX5_LRO_TIMEOUT_USEC "lro_timeout_usec"
 163
 164/*
 165 * Device parameter to configure the total data buffer size for a single
 166 * hairpin queue (logarithm value).
 167 */
 168#define MLX5_HP_BUF_SIZE "hp_buf_log_sz"
 169
 170/* Flow memory reclaim mode. */
 171#define MLX5_RECLAIM_MEM "reclaim_mem_mode"
 172
 173/* The default memory allocator used in PMD. */
 174#define MLX5_SYS_MEM_EN "sys_mem_en"
 175/* Decap will be used or not. */
 176#define MLX5_DECAP_EN "decap_en"
 177
 178/* Device parameter to configure allow or prevent duplicate rules pattern. */
 179#define MLX5_ALLOW_DUPLICATE_PATTERN "allow_duplicate_pattern"
 180
 181/* Shared memory between primary and secondary processes. */
 182struct mlx5_shared_data *mlx5_shared_data;
 183
 184/** Driver-specific log messages type. */
 185int mlx5_logtype;
 186
 187static LIST_HEAD(, mlx5_dev_ctx_shared) mlx5_dev_ctx_list =
 188                                                LIST_HEAD_INITIALIZER();
 189static pthread_mutex_t mlx5_dev_ctx_list_mutex;
 190static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = {
 191#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
 192        [MLX5_IPOOL_DECAP_ENCAP] = {
 193                .size = sizeof(struct mlx5_flow_dv_encap_decap_resource),
 194                .trunk_size = 64,
 195                .grow_trunk = 3,
 196                .grow_shift = 2,
 197                .need_lock = 1,
 198                .release_mem_en = 1,
 199                .malloc = mlx5_malloc,
 200                .free = mlx5_free,
 201                .type = "mlx5_encap_decap_ipool",
 202        },
 203        [MLX5_IPOOL_PUSH_VLAN] = {
 204                .size = sizeof(struct mlx5_flow_dv_push_vlan_action_resource),
 205                .trunk_size = 64,
 206                .grow_trunk = 3,
 207                .grow_shift = 2,
 208                .need_lock = 1,
 209                .release_mem_en = 1,
 210                .malloc = mlx5_malloc,
 211                .free = mlx5_free,
 212                .type = "mlx5_push_vlan_ipool",
 213        },
 214        [MLX5_IPOOL_TAG] = {
 215                .size = sizeof(struct mlx5_flow_dv_tag_resource),
 216                .trunk_size = 64,
 217                .grow_trunk = 3,
 218                .grow_shift = 2,
 219                .need_lock = 1,
 220                .release_mem_en = 0,
 221                .per_core_cache = (1 << 16),
 222                .malloc = mlx5_malloc,
 223                .free = mlx5_free,
 224                .type = "mlx5_tag_ipool",
 225        },
 226        [MLX5_IPOOL_PORT_ID] = {
 227                .size = sizeof(struct mlx5_flow_dv_port_id_action_resource),
 228                .trunk_size = 64,
 229                .grow_trunk = 3,
 230                .grow_shift = 2,
 231                .need_lock = 1,
 232                .release_mem_en = 1,
 233                .malloc = mlx5_malloc,
 234                .free = mlx5_free,
 235                .type = "mlx5_port_id_ipool",
 236        },
 237        [MLX5_IPOOL_JUMP] = {
 238                .size = sizeof(struct mlx5_flow_tbl_data_entry),
 239                .trunk_size = 64,
 240                .grow_trunk = 3,
 241                .grow_shift = 2,
 242                .need_lock = 1,
 243                .release_mem_en = 1,
 244                .malloc = mlx5_malloc,
 245                .free = mlx5_free,
 246                .type = "mlx5_jump_ipool",
 247        },
 248        [MLX5_IPOOL_SAMPLE] = {
 249                .size = sizeof(struct mlx5_flow_dv_sample_resource),
 250                .trunk_size = 64,
 251                .grow_trunk = 3,
 252                .grow_shift = 2,
 253                .need_lock = 1,
 254                .release_mem_en = 1,
 255                .malloc = mlx5_malloc,
 256                .free = mlx5_free,
 257                .type = "mlx5_sample_ipool",
 258        },
 259        [MLX5_IPOOL_DEST_ARRAY] = {
 260                .size = sizeof(struct mlx5_flow_dv_dest_array_resource),
 261                .trunk_size = 64,
 262                .grow_trunk = 3,
 263                .grow_shift = 2,
 264                .need_lock = 1,
 265                .release_mem_en = 1,
 266                .malloc = mlx5_malloc,
 267                .free = mlx5_free,
 268                .type = "mlx5_dest_array_ipool",
 269        },
 270        [MLX5_IPOOL_TUNNEL_ID] = {
 271                .size = sizeof(struct mlx5_flow_tunnel),
 272                .trunk_size = MLX5_MAX_TUNNELS,
 273                .need_lock = 1,
 274                .release_mem_en = 1,
 275                .type = "mlx5_tunnel_offload",
 276        },
 277        [MLX5_IPOOL_TNL_TBL_ID] = {
 278                .size = 0,
 279                .need_lock = 1,
 280                .type = "mlx5_flow_tnl_tbl_ipool",
 281        },
 282#endif
 283        [MLX5_IPOOL_MTR] = {
 284                /**
 285                 * The ipool index should grow continually from small to big,
 286                 * for meter idx, so not set grow_trunk to avoid meter index
 287                 * not jump continually.
 288                 */
 289                .size = sizeof(struct mlx5_legacy_flow_meter),
 290                .trunk_size = 64,
 291                .need_lock = 1,
 292                .release_mem_en = 1,
 293                .malloc = mlx5_malloc,
 294                .free = mlx5_free,
 295                .type = "mlx5_meter_ipool",
 296        },
 297        [MLX5_IPOOL_MCP] = {
 298                .size = sizeof(struct mlx5_flow_mreg_copy_resource),
 299                .trunk_size = 64,
 300                .grow_trunk = 3,
 301                .grow_shift = 2,
 302                .need_lock = 1,
 303                .release_mem_en = 1,
 304                .malloc = mlx5_malloc,
 305                .free = mlx5_free,
 306                .type = "mlx5_mcp_ipool",
 307        },
 308        [MLX5_IPOOL_HRXQ] = {
 309                .size = (sizeof(struct mlx5_hrxq) + MLX5_RSS_HASH_KEY_LEN),
 310                .trunk_size = 64,
 311                .grow_trunk = 3,
 312                .grow_shift = 2,
 313                .need_lock = 1,
 314                .release_mem_en = 1,
 315                .malloc = mlx5_malloc,
 316                .free = mlx5_free,
 317                .type = "mlx5_hrxq_ipool",
 318        },
 319        [MLX5_IPOOL_MLX5_FLOW] = {
 320                /*
 321                 * MLX5_IPOOL_MLX5_FLOW size varies for DV and VERBS flows.
 322                 * It set in run time according to PCI function configuration.
 323                 */
 324                .size = 0,
 325                .trunk_size = 64,
 326                .grow_trunk = 3,
 327                .grow_shift = 2,
 328                .need_lock = 1,
 329                .release_mem_en = 0,
 330                .per_core_cache = 1 << 19,
 331                .malloc = mlx5_malloc,
 332                .free = mlx5_free,
 333                .type = "mlx5_flow_handle_ipool",
 334        },
 335        [MLX5_IPOOL_RTE_FLOW] = {
 336                .size = sizeof(struct rte_flow),
 337                .trunk_size = 4096,
 338                .need_lock = 1,
 339                .release_mem_en = 1,
 340                .malloc = mlx5_malloc,
 341                .free = mlx5_free,
 342                .type = "rte_flow_ipool",
 343        },
 344        [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID] = {
 345                .size = 0,
 346                .need_lock = 1,
 347                .type = "mlx5_flow_rss_id_ipool",
 348        },
 349        [MLX5_IPOOL_RSS_SHARED_ACTIONS] = {
 350                .size = sizeof(struct mlx5_shared_action_rss),
 351                .trunk_size = 64,
 352                .grow_trunk = 3,
 353                .grow_shift = 2,
 354                .need_lock = 1,
 355                .release_mem_en = 1,
 356                .malloc = mlx5_malloc,
 357                .free = mlx5_free,
 358                .type = "mlx5_shared_action_rss",
 359        },
 360        [MLX5_IPOOL_MTR_POLICY] = {
 361                /**
 362                 * The ipool index should grow continually from small to big,
 363                 * for policy idx, so not set grow_trunk to avoid policy index
 364                 * not jump continually.
 365                 */
 366                .size = sizeof(struct mlx5_flow_meter_sub_policy),
 367                .trunk_size = 64,
 368                .need_lock = 1,
 369                .release_mem_en = 1,
 370                .malloc = mlx5_malloc,
 371                .free = mlx5_free,
 372                .type = "mlx5_meter_policy_ipool",
 373        },
 374};
 375
 376
 377#define MLX5_FLOW_MIN_ID_POOL_SIZE 512
 378#define MLX5_ID_GENERATION_ARRAY_FACTOR 16
 379
 380#define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 1024
 381
 382/**
 383 * Decide whether representor ID is a HPF(host PF) port on BF2.
 384 *
 385 * @param dev
 386 *   Pointer to Ethernet device structure.
 387 *
 388 * @return
 389 *   Non-zero if HPF, otherwise 0.
 390 */
 391bool
 392mlx5_is_hpf(struct rte_eth_dev *dev)
 393{
 394        struct mlx5_priv *priv = dev->data->dev_private;
 395        uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id);
 396        int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
 397
 398        return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF &&
 399               MLX5_REPRESENTOR_REPR(-1) == repr;
 400}
 401
 402/**
 403 * Decide whether representor ID is a SF port representor.
 404 *
 405 * @param dev
 406 *   Pointer to Ethernet device structure.
 407 *
 408 * @return
 409 *   Non-zero if HPF, otherwise 0.
 410 */
 411bool
 412mlx5_is_sf_repr(struct rte_eth_dev *dev)
 413{
 414        struct mlx5_priv *priv = dev->data->dev_private;
 415        int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
 416
 417        return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF;
 418}
 419
 420/**
 421 * Initialize the ASO aging management structure.
 422 *
 423 * @param[in] sh
 424 *   Pointer to mlx5_dev_ctx_shared object to free
 425 *
 426 * @return
 427 *   0 on success, a negative errno value otherwise and rte_errno is set.
 428 */
 429int
 430mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh)
 431{
 432        int err;
 433
 434        if (sh->aso_age_mng)
 435                return 0;
 436        sh->aso_age_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->aso_age_mng),
 437                                      RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 438        if (!sh->aso_age_mng) {
 439                DRV_LOG(ERR, "aso_age_mng allocation was failed.");
 440                rte_errno = ENOMEM;
 441                return -ENOMEM;
 442        }
 443        err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_FLOW_HIT);
 444        if (err) {
 445                mlx5_free(sh->aso_age_mng);
 446                return -1;
 447        }
 448        rte_spinlock_init(&sh->aso_age_mng->resize_sl);
 449        rte_spinlock_init(&sh->aso_age_mng->free_sl);
 450        LIST_INIT(&sh->aso_age_mng->free);
 451        return 0;
 452}
 453
 454/**
 455 * Close and release all the resources of the ASO aging management structure.
 456 *
 457 * @param[in] sh
 458 *   Pointer to mlx5_dev_ctx_shared object to free.
 459 */
 460static void
 461mlx5_flow_aso_age_mng_close(struct mlx5_dev_ctx_shared *sh)
 462{
 463        int i, j;
 464
 465        mlx5_aso_flow_hit_queue_poll_stop(sh);
 466        mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_FLOW_HIT);
 467        if (sh->aso_age_mng->pools) {
 468                struct mlx5_aso_age_pool *pool;
 469
 470                for (i = 0; i < sh->aso_age_mng->next; ++i) {
 471                        pool = sh->aso_age_mng->pools[i];
 472                        claim_zero(mlx5_devx_cmd_destroy
 473                                                (pool->flow_hit_aso_obj));
 474                        for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j)
 475                                if (pool->actions[j].dr_action)
 476                                        claim_zero
 477                                            (mlx5_flow_os_destroy_flow_action
 478                                              (pool->actions[j].dr_action));
 479                        mlx5_free(pool);
 480                }
 481                mlx5_free(sh->aso_age_mng->pools);
 482        }
 483        mlx5_free(sh->aso_age_mng);
 484}
 485
 486/**
 487 * Initialize the shared aging list information per port.
 488 *
 489 * @param[in] sh
 490 *   Pointer to mlx5_dev_ctx_shared object.
 491 */
 492static void
 493mlx5_flow_aging_init(struct mlx5_dev_ctx_shared *sh)
 494{
 495        uint32_t i;
 496        struct mlx5_age_info *age_info;
 497
 498        for (i = 0; i < sh->max_port; i++) {
 499                age_info = &sh->port[i].age_info;
 500                age_info->flags = 0;
 501                TAILQ_INIT(&age_info->aged_counters);
 502                LIST_INIT(&age_info->aged_aso);
 503                rte_spinlock_init(&age_info->aged_sl);
 504                MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
 505        }
 506}
 507
 508/**
 509 * Initialize the counters management structure.
 510 *
 511 * @param[in] sh
 512 *   Pointer to mlx5_dev_ctx_shared object to free
 513 */
 514static void
 515mlx5_flow_counters_mng_init(struct mlx5_dev_ctx_shared *sh)
 516{
 517        int i;
 518
 519        memset(&sh->cmng, 0, sizeof(sh->cmng));
 520        TAILQ_INIT(&sh->cmng.flow_counters);
 521        sh->cmng.min_id = MLX5_CNT_BATCH_OFFSET;
 522        sh->cmng.max_id = -1;
 523        sh->cmng.last_pool_idx = POOL_IDX_INVALID;
 524        rte_spinlock_init(&sh->cmng.pool_update_sl);
 525        for (i = 0; i < MLX5_COUNTER_TYPE_MAX; i++) {
 526                TAILQ_INIT(&sh->cmng.counters[i]);
 527                rte_spinlock_init(&sh->cmng.csl[i]);
 528        }
 529}
 530
 531/**
 532 * Destroy all the resources allocated for a counter memory management.
 533 *
 534 * @param[in] mng
 535 *   Pointer to the memory management structure.
 536 */
 537static void
 538mlx5_flow_destroy_counter_stat_mem_mng(struct mlx5_counter_stats_mem_mng *mng)
 539{
 540        uint8_t *mem = (uint8_t *)(uintptr_t)mng->raws[0].data;
 541
 542        LIST_REMOVE(mng, next);
 543        claim_zero(mlx5_devx_cmd_destroy(mng->dm));
 544        claim_zero(mlx5_os_umem_dereg(mng->umem));
 545        mlx5_free(mem);
 546}
 547
 548/**
 549 * Close and release all the resources of the counters management.
 550 *
 551 * @param[in] sh
 552 *   Pointer to mlx5_dev_ctx_shared object to free.
 553 */
 554static void
 555mlx5_flow_counters_mng_close(struct mlx5_dev_ctx_shared *sh)
 556{
 557        struct mlx5_counter_stats_mem_mng *mng;
 558        int i, j;
 559        int retries = 1024;
 560
 561        rte_errno = 0;
 562        while (--retries) {
 563                rte_eal_alarm_cancel(mlx5_flow_query_alarm, sh);
 564                if (rte_errno != EINPROGRESS)
 565                        break;
 566                rte_pause();
 567        }
 568
 569        if (sh->cmng.pools) {
 570                struct mlx5_flow_counter_pool *pool;
 571                uint16_t n_valid = sh->cmng.n_valid;
 572                bool fallback = sh->cmng.counter_fallback;
 573
 574                for (i = 0; i < n_valid; ++i) {
 575                        pool = sh->cmng.pools[i];
 576                        if (!fallback && pool->min_dcs)
 577                                claim_zero(mlx5_devx_cmd_destroy
 578                                                               (pool->min_dcs));
 579                        for (j = 0; j < MLX5_COUNTERS_PER_POOL; ++j) {
 580                                struct mlx5_flow_counter *cnt =
 581                                                MLX5_POOL_GET_CNT(pool, j);
 582
 583                                if (cnt->action)
 584                                        claim_zero
 585                                         (mlx5_flow_os_destroy_flow_action
 586                                          (cnt->action));
 587                                if (fallback && MLX5_POOL_GET_CNT
 588                                    (pool, j)->dcs_when_free)
 589                                        claim_zero(mlx5_devx_cmd_destroy
 590                                                   (cnt->dcs_when_free));
 591                        }
 592                        mlx5_free(pool);
 593                }
 594                mlx5_free(sh->cmng.pools);
 595        }
 596        mng = LIST_FIRST(&sh->cmng.mem_mngs);
 597        while (mng) {
 598                mlx5_flow_destroy_counter_stat_mem_mng(mng);
 599                mng = LIST_FIRST(&sh->cmng.mem_mngs);
 600        }
 601        memset(&sh->cmng, 0, sizeof(sh->cmng));
 602}
 603
 604/**
 605 * Initialize the aso flow meters management structure.
 606 *
 607 * @param[in] sh
 608 *   Pointer to mlx5_dev_ctx_shared object to free
 609 */
 610int
 611mlx5_aso_flow_mtrs_mng_init(struct mlx5_dev_ctx_shared *sh)
 612{
 613        if (!sh->mtrmng) {
 614                sh->mtrmng = mlx5_malloc(MLX5_MEM_ZERO,
 615                        sizeof(*sh->mtrmng),
 616                        RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 617                if (!sh->mtrmng) {
 618                        DRV_LOG(ERR,
 619                        "meter management allocation was failed.");
 620                        rte_errno = ENOMEM;
 621                        return -ENOMEM;
 622                }
 623                if (sh->meter_aso_en) {
 624                        rte_spinlock_init(&sh->mtrmng->pools_mng.mtrsl);
 625                        LIST_INIT(&sh->mtrmng->pools_mng.meters);
 626                }
 627                sh->mtrmng->def_policy_id = MLX5_INVALID_POLICY_ID;
 628        }
 629        return 0;
 630}
 631
 632/**
 633 * Close and release all the resources of
 634 * the ASO flow meter management structure.
 635 *
 636 * @param[in] sh
 637 *   Pointer to mlx5_dev_ctx_shared object to free.
 638 */
 639static void
 640mlx5_aso_flow_mtrs_mng_close(struct mlx5_dev_ctx_shared *sh)
 641{
 642        struct mlx5_aso_mtr_pool *mtr_pool;
 643        struct mlx5_flow_mtr_mng *mtrmng = sh->mtrmng;
 644        uint32_t idx;
 645#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO
 646        struct mlx5_aso_mtr *aso_mtr;
 647        int i;
 648#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
 649
 650        if (sh->meter_aso_en) {
 651                mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_POLICER);
 652                idx = mtrmng->pools_mng.n_valid;
 653                while (idx--) {
 654                        mtr_pool = mtrmng->pools_mng.pools[idx];
 655#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO
 656                        for (i = 0; i < MLX5_ASO_MTRS_PER_POOL; i++) {
 657                                aso_mtr = &mtr_pool->mtrs[i];
 658                                if (aso_mtr->fm.meter_action)
 659                                        claim_zero
 660                                        (mlx5_glue->destroy_flow_action
 661                                        (aso_mtr->fm.meter_action));
 662                        }
 663#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */
 664                        claim_zero(mlx5_devx_cmd_destroy
 665                                                (mtr_pool->devx_obj));
 666                        mtrmng->pools_mng.n_valid--;
 667                        mlx5_free(mtr_pool);
 668                }
 669                mlx5_free(sh->mtrmng->pools_mng.pools);
 670        }
 671        mlx5_free(sh->mtrmng);
 672        sh->mtrmng = NULL;
 673}
 674
 675/* Send FLOW_AGED event if needed. */
 676void
 677mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh)
 678{
 679        struct mlx5_age_info *age_info;
 680        uint32_t i;
 681
 682        for (i = 0; i < sh->max_port; i++) {
 683                age_info = &sh->port[i].age_info;
 684                if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
 685                        continue;
 686                MLX5_AGE_UNSET(age_info, MLX5_AGE_EVENT_NEW);
 687                if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER)) {
 688                        MLX5_AGE_UNSET(age_info, MLX5_AGE_TRIGGER);
 689                        rte_eth_dev_callback_process
 690                                (&rte_eth_devices[sh->port[i].devx_ih_port_id],
 691                                RTE_ETH_EVENT_FLOW_AGED, NULL);
 692                }
 693        }
 694}
 695
 696/*
 697 * Initialize the ASO connection tracking structure.
 698 *
 699 * @param[in] sh
 700 *   Pointer to mlx5_dev_ctx_shared object.
 701 *
 702 * @return
 703 *   0 on success, a negative errno value otherwise and rte_errno is set.
 704 */
 705int
 706mlx5_flow_aso_ct_mng_init(struct mlx5_dev_ctx_shared *sh)
 707{
 708        int err;
 709
 710        if (sh->ct_mng)
 711                return 0;
 712        sh->ct_mng = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*sh->ct_mng),
 713                                 RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
 714        if (!sh->ct_mng) {
 715                DRV_LOG(ERR, "ASO CT management allocation failed.");
 716                rte_errno = ENOMEM;
 717                return -rte_errno;
 718        }
 719        err = mlx5_aso_queue_init(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
 720        if (err) {
 721                mlx5_free(sh->ct_mng);
 722                /* rte_errno should be extracted from the failure. */
 723                rte_errno = EINVAL;
 724                return -rte_errno;
 725        }
 726        rte_spinlock_init(&sh->ct_mng->ct_sl);
 727        rte_rwlock_init(&sh->ct_mng->resize_rwl);
 728        LIST_INIT(&sh->ct_mng->free_cts);
 729        return 0;
 730}
 731
 732/*
 733 * Close and release all the resources of the
 734 * ASO connection tracking management structure.
 735 *
 736 * @param[in] sh
 737 *   Pointer to mlx5_dev_ctx_shared object to free.
 738 */
 739static void
 740mlx5_flow_aso_ct_mng_close(struct mlx5_dev_ctx_shared *sh)
 741{
 742        struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
 743        struct mlx5_aso_ct_pool *ct_pool;
 744        struct mlx5_aso_ct_action *ct;
 745        uint32_t idx;
 746        uint32_t val;
 747        uint32_t cnt;
 748        int i;
 749
 750        mlx5_aso_queue_uninit(sh, ASO_OPC_MOD_CONNECTION_TRACKING);
 751        idx = mng->next;
 752        while (idx--) {
 753                cnt = 0;
 754                ct_pool = mng->pools[idx];
 755                for (i = 0; i < MLX5_ASO_CT_ACTIONS_PER_POOL; i++) {
 756                        ct = &ct_pool->actions[i];
 757                        val = __atomic_fetch_sub(&ct->refcnt, 1,
 758                                                 __ATOMIC_RELAXED);
 759                        MLX5_ASSERT(val == 1);
 760                        if (val > 1)
 761                                cnt++;
 762#ifdef HAVE_MLX5_DR_ACTION_ASO_CT
 763                        if (ct->dr_action_orig)
 764                                claim_zero(mlx5_glue->destroy_flow_action
 765                                                        (ct->dr_action_orig));
 766                        if (ct->dr_action_rply)
 767                                claim_zero(mlx5_glue->destroy_flow_action
 768                                                        (ct->dr_action_rply));
 769#endif
 770                }
 771                claim_zero(mlx5_devx_cmd_destroy(ct_pool->devx_obj));
 772                if (cnt) {
 773                        DRV_LOG(DEBUG, "%u ASO CT objects are being used in the pool %u",
 774                                cnt, i);
 775                }
 776                mlx5_free(ct_pool);
 777                /* in case of failure. */
 778                mng->next--;
 779        }
 780        mlx5_free(mng->pools);
 781        mlx5_free(mng);
 782        /* Management structure must be cleared to 0s during allocation. */
 783        sh->ct_mng = NULL;
 784}
 785
 786/**
 787 * Initialize the flow resources' indexed mempool.
 788 *
 789 * @param[in] sh
 790 *   Pointer to mlx5_dev_ctx_shared object.
 791 * @param[in] config
 792 *   Pointer to user dev config.
 793 */
 794static void
 795mlx5_flow_ipool_create(struct mlx5_dev_ctx_shared *sh,
 796                       const struct mlx5_dev_config *config)
 797{
 798        uint8_t i;
 799        struct mlx5_indexed_pool_config cfg;
 800
 801        for (i = 0; i < MLX5_IPOOL_MAX; ++i) {
 802                cfg = mlx5_ipool_cfg[i];
 803                switch (i) {
 804                default:
 805                        break;
 806                /*
 807                 * Set MLX5_IPOOL_MLX5_FLOW ipool size
 808                 * according to PCI function flow configuration.
 809                 */
 810                case MLX5_IPOOL_MLX5_FLOW:
 811                        cfg.size = config->dv_flow_en ?
 812                                sizeof(struct mlx5_flow_handle) :
 813                                MLX5_FLOW_HANDLE_VERBS_SIZE;
 814                        break;
 815                }
 816                if (config->reclaim_mode) {
 817                        cfg.release_mem_en = 1;
 818                        cfg.per_core_cache = 0;
 819                } else {
 820                        cfg.release_mem_en = 0;
 821                }
 822                sh->ipool[i] = mlx5_ipool_create(&cfg);
 823        }
 824}
 825
 826
 827/**
 828 * Release the flow resources' indexed mempool.
 829 *
 830 * @param[in] sh
 831 *   Pointer to mlx5_dev_ctx_shared object.
 832 */
 833static void
 834mlx5_flow_ipool_destroy(struct mlx5_dev_ctx_shared *sh)
 835{
 836        uint8_t i;
 837
 838        for (i = 0; i < MLX5_IPOOL_MAX; ++i)
 839                mlx5_ipool_destroy(sh->ipool[i]);
 840        for (i = 0; i < MLX5_MAX_MODIFY_NUM; ++i)
 841                if (sh->mdh_ipools[i])
 842                        mlx5_ipool_destroy(sh->mdh_ipools[i]);
 843}
 844
 845/*
 846 * Check if dynamic flex parser for eCPRI already exists.
 847 *
 848 * @param dev
 849 *   Pointer to Ethernet device structure.
 850 *
 851 * @return
 852 *   true on exists, false on not.
 853 */
 854bool
 855mlx5_flex_parser_ecpri_exist(struct rte_eth_dev *dev)
 856{
 857        struct mlx5_priv *priv = dev->data->dev_private;
 858        struct mlx5_flex_parser_profiles *prf =
 859                                &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
 860
 861        return !!prf->obj;
 862}
 863
 864/*
 865 * Allocation of a flex parser for eCPRI. Once created, this parser related
 866 * resources will be held until the device is closed.
 867 *
 868 * @param dev
 869 *   Pointer to Ethernet device structure.
 870 *
 871 * @return
 872 *   0 on success, a negative errno value otherwise and rte_errno is set.
 873 */
 874int
 875mlx5_flex_parser_ecpri_alloc(struct rte_eth_dev *dev)
 876{
 877        struct mlx5_priv *priv = dev->data->dev_private;
 878        struct mlx5_flex_parser_profiles *prf =
 879                                &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
 880        struct mlx5_devx_graph_node_attr node = {
 881                .modify_field_select = 0,
 882        };
 883        uint32_t ids[8];
 884        int ret;
 885
 886        if (!priv->config.hca_attr.parse_graph_flex_node) {
 887                DRV_LOG(ERR, "Dynamic flex parser is not supported "
 888                        "for device %s.", priv->dev_data->name);
 889                return -ENOTSUP;
 890        }
 891        node.header_length_mode = MLX5_GRAPH_NODE_LEN_FIXED;
 892        /* 8 bytes now: 4B common header + 4B message body header. */
 893        node.header_length_base_value = 0x8;
 894        /* After MAC layer: Ether / VLAN. */
 895        node.in[0].arc_parse_graph_node = MLX5_GRAPH_ARC_NODE_MAC;
 896        /* Type of compared condition should be 0xAEFE in the L2 layer. */
 897        node.in[0].compare_condition_value = RTE_ETHER_TYPE_ECPRI;
 898        /* Sample #0: type in common header. */
 899        node.sample[0].flow_match_sample_en = 1;
 900        /* Fixed offset. */
 901        node.sample[0].flow_match_sample_offset_mode = 0x0;
 902        /* Only the 2nd byte will be used. */
 903        node.sample[0].flow_match_sample_field_base_offset = 0x0;
 904        /* Sample #1: message payload. */
 905        node.sample[1].flow_match_sample_en = 1;
 906        /* Fixed offset. */
 907        node.sample[1].flow_match_sample_offset_mode = 0x0;
 908        /*
 909         * Only the first two bytes will be used right now, and its offset will
 910         * start after the common header that with the length of a DW(u32).
 911         */
 912        node.sample[1].flow_match_sample_field_base_offset = sizeof(uint32_t);
 913        prf->obj = mlx5_devx_cmd_create_flex_parser(priv->sh->ctx, &node);
 914        if (!prf->obj) {
 915                DRV_LOG(ERR, "Failed to create flex parser node object.");
 916                return (rte_errno == 0) ? -ENODEV : -rte_errno;
 917        }
 918        prf->num = 2;
 919        ret = mlx5_devx_cmd_query_parse_samples(prf->obj, ids, prf->num);
 920        if (ret) {
 921                DRV_LOG(ERR, "Failed to query sample IDs.");
 922                return (rte_errno == 0) ? -ENODEV : -rte_errno;
 923        }
 924        prf->offset[0] = 0x0;
 925        prf->offset[1] = sizeof(uint32_t);
 926        prf->ids[0] = ids[0];
 927        prf->ids[1] = ids[1];
 928        return 0;
 929}
 930
 931/*
 932 * Destroy the flex parser node, including the parser itself, input / output
 933 * arcs and DW samples. Resources could be reused then.
 934 *
 935 * @param dev
 936 *   Pointer to Ethernet device structure.
 937 */
 938static void
 939mlx5_flex_parser_ecpri_release(struct rte_eth_dev *dev)
 940{
 941        struct mlx5_priv *priv = dev->data->dev_private;
 942        struct mlx5_flex_parser_profiles *prf =
 943                                &priv->sh->fp[MLX5_FLEX_PARSER_ECPRI_0];
 944
 945        if (prf->obj)
 946                mlx5_devx_cmd_destroy(prf->obj);
 947        prf->obj = NULL;
 948}
 949
 950/*
 951 * Allocate Rx and Tx UARs in robust fashion.
 952 * This routine handles the following UAR allocation issues:
 953 *
 954 *  - tries to allocate the UAR with the most appropriate memory
 955 *    mapping type from the ones supported by the host
 956 *
 957 *  - tries to allocate the UAR with non-NULL base address
 958 *    OFED 5.0.x and Upstream rdma_core before v29 returned the NULL as
 959 *    UAR base address if UAR was not the first object in the UAR page.
 960 *    It caused the PMD failure and we should try to get another UAR
 961 *    till we get the first one with non-NULL base address returned.
 962 */
 963static int
 964mlx5_alloc_rxtx_uars(struct mlx5_dev_ctx_shared *sh,
 965                     const struct mlx5_dev_config *config)
 966{
 967        uint32_t uar_mapping, retry;
 968        int err = 0;
 969        void *base_addr;
 970
 971        for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
 972#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
 973                /* Control the mapping type according to the settings. */
 974                uar_mapping = (config->dbnc == MLX5_TXDB_NCACHED) ?
 975                              MLX5DV_UAR_ALLOC_TYPE_NC :
 976                              MLX5DV_UAR_ALLOC_TYPE_BF;
 977#else
 978                RTE_SET_USED(config);
 979                /*
 980                 * It seems we have no way to control the memory mapping type
 981                 * for the UAR, the default "Write-Combining" type is supposed.
 982                 * The UAR initialization on queue creation queries the
 983                 * actual mapping type done by Verbs/kernel and setups the
 984                 * PMD datapath accordingly.
 985                 */
 986                uar_mapping = 0;
 987#endif
 988                sh->tx_uar = mlx5_glue->devx_alloc_uar(sh->ctx, uar_mapping);
 989#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
 990                if (!sh->tx_uar &&
 991                    uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
 992                        if (config->dbnc == MLX5_TXDB_CACHED ||
 993                            config->dbnc == MLX5_TXDB_HEURISTIC)
 994                                DRV_LOG(WARNING, "Devarg tx_db_nc setting "
 995                                                 "is not supported by DevX");
 996                        /*
 997                         * In some environments like virtual machine
 998                         * the Write Combining mapped might be not supported
 999                         * and UAR allocation fails. We try "Non-Cached"
1000                         * mapping for the case. The tx_burst routines take
1001                         * the UAR mapping type into account on UAR setup
1002                         * on queue creation.
1003                         */
1004                        DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (BF)");
1005                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
1006                        sh->tx_uar = mlx5_glue->devx_alloc_uar
1007                                                        (sh->ctx, uar_mapping);
1008                } else if (!sh->tx_uar &&
1009                           uar_mapping == MLX5DV_UAR_ALLOC_TYPE_NC) {
1010                        if (config->dbnc == MLX5_TXDB_NCACHED)
1011                                DRV_LOG(WARNING, "Devarg tx_db_nc settings "
1012                                                 "is not supported by DevX");
1013                        /*
1014                         * If Verbs/kernel does not support "Non-Cached"
1015                         * try the "Write-Combining".
1016                         */
1017                        DRV_LOG(DEBUG, "Failed to allocate Tx DevX UAR (NC)");
1018                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_BF;
1019                        sh->tx_uar = mlx5_glue->devx_alloc_uar
1020                                                        (sh->ctx, uar_mapping);
1021                }
1022#endif
1023                if (!sh->tx_uar) {
1024                        DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (BF/NC)");
1025                        err = ENOMEM;
1026                        goto exit;
1027                }
1028                base_addr = mlx5_os_get_devx_uar_base_addr(sh->tx_uar);
1029                if (base_addr)
1030                        break;
1031                /*
1032                 * The UARs are allocated by rdma_core within the
1033                 * IB device context, on context closure all UARs
1034                 * will be freed, should be no memory/object leakage.
1035                 */
1036                DRV_LOG(DEBUG, "Retrying to allocate Tx DevX UAR");
1037                sh->tx_uar = NULL;
1038        }
1039        /* Check whether we finally succeeded with valid UAR allocation. */
1040        if (!sh->tx_uar) {
1041                DRV_LOG(ERR, "Failed to allocate Tx DevX UAR (NULL base)");
1042                err = ENOMEM;
1043                goto exit;
1044        }
1045        for (retry = 0; retry < MLX5_ALLOC_UAR_RETRY; ++retry) {
1046                uar_mapping = 0;
1047                sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
1048                                                        (sh->ctx, uar_mapping);
1049#ifdef MLX5DV_UAR_ALLOC_TYPE_NC
1050                if (!sh->devx_rx_uar &&
1051                    uar_mapping == MLX5DV_UAR_ALLOC_TYPE_BF) {
1052                        /*
1053                         * Rx UAR is used to control interrupts only,
1054                         * should be no datapath noticeable impact,
1055                         * can try "Non-Cached" mapping safely.
1056                         */
1057                        DRV_LOG(DEBUG, "Failed to allocate Rx DevX UAR (BF)");
1058                        uar_mapping = MLX5DV_UAR_ALLOC_TYPE_NC;
1059                        sh->devx_rx_uar = mlx5_glue->devx_alloc_uar
1060                                                        (sh->ctx, uar_mapping);
1061                }
1062#endif
1063                if (!sh->devx_rx_uar) {
1064                        DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (BF/NC)");
1065                        err = ENOMEM;
1066                        goto exit;
1067                }
1068                base_addr = mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar);
1069                if (base_addr)
1070                        break;
1071                /*
1072                 * The UARs are allocated by rdma_core within the
1073                 * IB device context, on context closure all UARs
1074                 * will be freed, should be no memory/object leakage.
1075                 */
1076                DRV_LOG(DEBUG, "Retrying to allocate Rx DevX UAR");
1077                sh->devx_rx_uar = NULL;
1078        }
1079        /* Check whether we finally succeeded with valid UAR allocation. */
1080        if (!sh->devx_rx_uar) {
1081                DRV_LOG(ERR, "Failed to allocate Rx DevX UAR (NULL base)");
1082                err = ENOMEM;
1083        }
1084exit:
1085        return err;
1086}
1087
1088/**
1089 * Allocate shared device context. If there is multiport device the
1090 * master and representors will share this context, if there is single
1091 * port dedicated device, the context will be used by only given
1092 * port due to unification.
1093 *
1094 * Routine first searches the context for the specified device name,
1095 * if found the shared context assumed and reference counter is incremented.
1096 * If no context found the new one is created and initialized with specified
1097 * device context and parameters.
1098 *
1099 * @param[in] spawn
1100 *   Pointer to the device attributes (name, port, etc).
1101 * @param[in] config
1102 *   Pointer to device configuration structure.
1103 *
1104 * @return
1105 *   Pointer to mlx5_dev_ctx_shared object on success,
1106 *   otherwise NULL and rte_errno is set.
1107 */
1108struct mlx5_dev_ctx_shared *
1109mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
1110                           const struct mlx5_dev_config *config)
1111{
1112        struct mlx5_dev_ctx_shared *sh;
1113        int err = 0;
1114        uint32_t i;
1115        struct mlx5_devx_tis_attr tis_attr = { 0 };
1116
1117        MLX5_ASSERT(spawn);
1118        /* Secondary process should not create the shared context. */
1119        MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
1120        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
1121        /* Search for IB context by device name. */
1122        LIST_FOREACH(sh, &mlx5_dev_ctx_list, next) {
1123                if (!strcmp(sh->ibdev_name,
1124                        mlx5_os_get_dev_device_name(spawn->phys_dev))) {
1125                        sh->refcnt++;
1126                        goto exit;
1127                }
1128        }
1129        /* No device found, we have to create new shared context. */
1130        MLX5_ASSERT(spawn->max_port);
1131        sh = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE,
1132                         sizeof(struct mlx5_dev_ctx_shared) +
1133                         spawn->max_port *
1134                         sizeof(struct mlx5_dev_shared_port),
1135                         RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
1136        if (!sh) {
1137                DRV_LOG(ERR, "shared context allocation failure");
1138                rte_errno  = ENOMEM;
1139                goto exit;
1140        }
1141        sh->numa_node = spawn->numa_node;
1142        if (spawn->bond_info)
1143                sh->bond = *spawn->bond_info;
1144        err = mlx5_os_open_device(spawn, config, sh);
1145        if (!sh->ctx)
1146                goto error;
1147        err = mlx5_os_get_dev_attr(sh->ctx, &sh->device_attr);
1148        if (err) {
1149                DRV_LOG(DEBUG, "mlx5_os_get_dev_attr() failed");
1150                goto error;
1151        }
1152        sh->refcnt = 1;
1153        sh->max_port = spawn->max_port;
1154        sh->reclaim_mode = config->reclaim_mode;
1155        strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx),
1156                sizeof(sh->ibdev_name) - 1);
1157        strncpy(sh->ibdev_path, mlx5_os_get_ctx_device_path(sh->ctx),
1158                sizeof(sh->ibdev_path) - 1);
1159        /*
1160         * Setting port_id to max unallowed value means
1161         * there is no interrupt subhandler installed for
1162         * the given port index i.
1163         */
1164        for (i = 0; i < sh->max_port; i++) {
1165                sh->port[i].ih_port_id = RTE_MAX_ETHPORTS;
1166                sh->port[i].devx_ih_port_id = RTE_MAX_ETHPORTS;
1167        }
1168        sh->pd = mlx5_os_alloc_pd(sh->ctx);
1169        if (sh->pd == NULL) {
1170                DRV_LOG(ERR, "PD allocation failure");
1171                err = ENOMEM;
1172                goto error;
1173        }
1174        if (sh->devx) {
1175                err = mlx5_os_get_pdn(sh->pd, &sh->pdn);
1176                if (err) {
1177                        DRV_LOG(ERR, "Fail to extract pdn from PD");
1178                        goto error;
1179                }
1180                sh->td = mlx5_devx_cmd_create_td(sh->ctx);
1181                if (!sh->td) {
1182                        DRV_LOG(ERR, "TD allocation failure");
1183                        err = ENOMEM;
1184                        goto error;
1185                }
1186                tis_attr.transport_domain = sh->td->id;
1187                sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
1188                if (!sh->tis) {
1189                        DRV_LOG(ERR, "TIS allocation failure");
1190                        err = ENOMEM;
1191                        goto error;
1192                }
1193                err = mlx5_alloc_rxtx_uars(sh, config);
1194                if (err)
1195                        goto error;
1196                MLX5_ASSERT(sh->tx_uar);
1197                MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->tx_uar));
1198
1199                MLX5_ASSERT(sh->devx_rx_uar);
1200                MLX5_ASSERT(mlx5_os_get_devx_uar_base_addr(sh->devx_rx_uar));
1201        }
1202#ifndef RTE_ARCH_64
1203        /* Initialize UAR access locks for 32bit implementations. */
1204        rte_spinlock_init(&sh->uar_lock_cq);
1205        for (i = 0; i < MLX5_UAR_PAGE_NUM_MAX; i++)
1206                rte_spinlock_init(&sh->uar_lock[i]);
1207#endif
1208        /*
1209         * Once the device is added to the list of memory event
1210         * callback, its global MR cache table cannot be expanded
1211         * on the fly because of deadlock. If it overflows, lookup
1212         * should be done by searching MR list linearly, which is slow.
1213         *
1214         * At this point the device is not added to the memory
1215         * event list yet, context is just being created.
1216         */
1217        err = mlx5_mr_btree_init(&sh->share_cache.cache,
1218                                 MLX5_MR_BTREE_CACHE_N * 2,
1219                                 sh->numa_node);
1220        if (err) {
1221                err = rte_errno;
1222                goto error;
1223        }
1224        mlx5_os_set_reg_mr_cb(&sh->share_cache.reg_mr_cb,
1225                              &sh->share_cache.dereg_mr_cb);
1226        mlx5_os_dev_shared_handler_install(sh);
1227        sh->cnt_id_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_DWORD);
1228        if (!sh->cnt_id_tbl) {
1229                err = rte_errno;
1230                goto error;
1231        }
1232        if (LIST_EMPTY(&mlx5_dev_ctx_list)) {
1233                err = mlx5_flow_os_init_workspace_once();
1234                if (err)
1235                        goto error;
1236        }
1237        mlx5_flow_aging_init(sh);
1238        mlx5_flow_counters_mng_init(sh);
1239        mlx5_flow_ipool_create(sh, config);
1240        /* Add device to memory callback list. */
1241        rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
1242        LIST_INSERT_HEAD(&mlx5_shared_data->mem_event_cb_list,
1243                         sh, mem_event_cb);
1244        rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
1245        /* Add context to the global device list. */
1246        LIST_INSERT_HEAD(&mlx5_dev_ctx_list, sh, next);
1247        rte_spinlock_init(&sh->geneve_tlv_opt_sl);
1248exit:
1249        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1250        return sh;
1251error:
1252        pthread_mutex_destroy(&sh->txpp.mutex);
1253        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1254        MLX5_ASSERT(sh);
1255        if (sh->cnt_id_tbl)
1256                mlx5_l3t_destroy(sh->cnt_id_tbl);
1257        if (sh->tis)
1258                claim_zero(mlx5_devx_cmd_destroy(sh->tis));
1259        if (sh->td)
1260                claim_zero(mlx5_devx_cmd_destroy(sh->td));
1261        if (sh->devx_rx_uar)
1262                mlx5_glue->devx_free_uar(sh->devx_rx_uar);
1263        if (sh->tx_uar)
1264                mlx5_glue->devx_free_uar(sh->tx_uar);
1265        if (sh->pd)
1266                claim_zero(mlx5_os_dealloc_pd(sh->pd));
1267        if (sh->ctx)
1268                claim_zero(mlx5_glue->close_device(sh->ctx));
1269        mlx5_free(sh);
1270        MLX5_ASSERT(err > 0);
1271        rte_errno = err;
1272        return NULL;
1273}
1274
1275/**
1276 * Free shared IB device context. Decrement counter and if zero free
1277 * all allocated resources and close handles.
1278 *
1279 * @param[in] sh
1280 *   Pointer to mlx5_dev_ctx_shared object to free
1281 */
1282void
1283mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
1284{
1285        pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
1286#ifdef RTE_LIBRTE_MLX5_DEBUG
1287        /* Check the object presence in the list. */
1288        struct mlx5_dev_ctx_shared *lctx;
1289
1290        LIST_FOREACH(lctx, &mlx5_dev_ctx_list, next)
1291                if (lctx == sh)
1292                        break;
1293        MLX5_ASSERT(lctx);
1294        if (lctx != sh) {
1295                DRV_LOG(ERR, "Freeing non-existing shared IB context");
1296                goto exit;
1297        }
1298#endif
1299        MLX5_ASSERT(sh);
1300        MLX5_ASSERT(sh->refcnt);
1301        /* Secondary process should not free the shared context. */
1302        MLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);
1303        if (--sh->refcnt)
1304                goto exit;
1305        /* Remove from memory callback device list. */
1306        rte_rwlock_write_lock(&mlx5_shared_data->mem_event_rwlock);
1307        LIST_REMOVE(sh, mem_event_cb);
1308        rte_rwlock_write_unlock(&mlx5_shared_data->mem_event_rwlock);
1309        /* Release created Memory Regions. */
1310        mlx5_mr_release_cache(&sh->share_cache);
1311        /* Remove context from the global device list. */
1312        LIST_REMOVE(sh, next);
1313        /* Release flow workspaces objects on the last device. */
1314        if (LIST_EMPTY(&mlx5_dev_ctx_list))
1315                mlx5_flow_os_release_workspace();
1316        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1317        /*
1318         *  Ensure there is no async event handler installed.
1319         *  Only primary process handles async device events.
1320         **/
1321        mlx5_flow_counters_mng_close(sh);
1322        if (sh->aso_age_mng) {
1323                mlx5_flow_aso_age_mng_close(sh);
1324                sh->aso_age_mng = NULL;
1325        }
1326        if (sh->mtrmng)
1327                mlx5_aso_flow_mtrs_mng_close(sh);
1328        mlx5_flow_ipool_destroy(sh);
1329        mlx5_os_dev_shared_handler_uninstall(sh);
1330        if (sh->cnt_id_tbl) {
1331                mlx5_l3t_destroy(sh->cnt_id_tbl);
1332                sh->cnt_id_tbl = NULL;
1333        }
1334        if (sh->tx_uar) {
1335                mlx5_glue->devx_free_uar(sh->tx_uar);
1336                sh->tx_uar = NULL;
1337        }
1338        if (sh->pd)
1339                claim_zero(mlx5_os_dealloc_pd(sh->pd));
1340        if (sh->tis)
1341                claim_zero(mlx5_devx_cmd_destroy(sh->tis));
1342        if (sh->td)
1343                claim_zero(mlx5_devx_cmd_destroy(sh->td));
1344        if (sh->devx_rx_uar)
1345                mlx5_glue->devx_free_uar(sh->devx_rx_uar);
1346        if (sh->ctx)
1347                claim_zero(mlx5_glue->close_device(sh->ctx));
1348        MLX5_ASSERT(sh->geneve_tlv_option_resource == NULL);
1349        pthread_mutex_destroy(&sh->txpp.mutex);
1350        mlx5_free(sh);
1351        return;
1352exit:
1353        pthread_mutex_unlock(&mlx5_dev_ctx_list_mutex);
1354}
1355
1356/**
1357 * Destroy table hash list.
1358 *
1359 * @param[in] priv
1360 *   Pointer to the private device data structure.
1361 */
1362void
1363mlx5_free_table_hash_list(struct mlx5_priv *priv)
1364{
1365        struct mlx5_dev_ctx_shared *sh = priv->sh;
1366
1367        if (!sh->flow_tbls)
1368                return;
1369        mlx5_hlist_destroy(sh->flow_tbls);
1370}
1371
1372/**
1373 * Initialize flow table hash list and create the root tables entry
1374 * for each domain.
1375 *
1376 * @param[in] priv
1377 *   Pointer to the private device data structure.
1378 *
1379 * @return
1380 *   Zero on success, positive error code otherwise.
1381 */
1382int
1383mlx5_alloc_table_hash_list(struct mlx5_priv *priv __rte_unused)
1384{
1385        int err = 0;
1386        /* Tables are only used in DV and DR modes. */
1387#if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
1388        struct mlx5_dev_ctx_shared *sh = priv->sh;
1389        char s[MLX5_NAME_SIZE];
1390
1391        MLX5_ASSERT(sh);
1392        snprintf(s, sizeof(s), "%s_flow_table", priv->sh->ibdev_name);
1393        sh->flow_tbls = mlx5_hlist_create(s, MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE,
1394                                          false, true, sh,
1395                                          flow_dv_tbl_create_cb,
1396                                          flow_dv_tbl_match_cb,
1397                                          flow_dv_tbl_remove_cb,
1398                                          flow_dv_tbl_clone_cb,
1399                                          flow_dv_tbl_clone_free_cb);
1400        if (!sh->flow_tbls) {
1401                DRV_LOG(ERR, "flow tables with hash creation failed.");
1402                err = ENOMEM;
1403                return err;
1404        }
1405#ifndef HAVE_MLX5DV_DR
1406        struct rte_flow_error error;
1407        struct rte_eth_dev *dev = &rte_eth_devices[priv->dev_data->port_id];
1408
1409        /*
1410         * In case we have not DR support, the zero tables should be created
1411         * because DV expect to see them even if they cannot be created by
1412         * RDMA-CORE.
1413         */
1414        if (!flow_dv_tbl_resource_get(dev, 0, 0, 0, 0,
1415                NULL, 0, 1, 0, &error) ||
1416            !flow_dv_tbl_resource_get(dev, 0, 1, 0, 0,
1417                NULL, 0, 1, 0, &error) ||
1418            !flow_dv_tbl_resource_get(dev, 0, 0, 1, 0,
1419                NULL, 0, 1, 0, &error)) {
1420                err = ENOMEM;
1421                goto error;
1422        }
1423        return err;
1424error:
1425        mlx5_free_table_hash_list(priv);
1426#endif /* HAVE_MLX5DV_DR */
1427#endif
1428        return err;
1429}
1430
1431/**
1432 * Retrieve integer value from environment variable.
1433 *
1434 * @param[in] name
1435 *   Environment variable name.
1436 *
1437 * @return
1438 *   Integer value, 0 if the variable is not set.
1439 */
1440int
1441mlx5_getenv_int(const char *name)
1442{
1443        const char *val = getenv(name);
1444
1445        if (val == NULL)
1446                return 0;
1447        return atoi(val);
1448}
1449
1450/**
1451 * DPDK callback to add udp tunnel port
1452 *
1453 * @param[in] dev
1454 *   A pointer to eth_dev
1455 * @param[in] udp_tunnel
1456 *   A pointer to udp tunnel
1457 *
1458 * @return
1459 *   0 on valid udp ports and tunnels, -ENOTSUP otherwise.
1460 */
1461int
1462mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev __rte_unused,
1463                         struct rte_eth_udp_tunnel *udp_tunnel)
1464{
1465        MLX5_ASSERT(udp_tunnel != NULL);
1466        if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN &&
1467            udp_tunnel->udp_port == 4789)
1468                return 0;
1469        if (udp_tunnel->prot_type == RTE_TUNNEL_TYPE_VXLAN_GPE &&
1470            udp_tunnel->udp_port == 4790)
1471                return 0;
1472        return -ENOTSUP;
1473}
1474
1475/**
1476 * Initialize process private data structure.
1477 *
1478 * @param dev
1479 *   Pointer to Ethernet device structure.
1480 *
1481 * @return
1482 *   0 on success, a negative errno value otherwise and rte_errno is set.
1483 */
1484int
1485mlx5_proc_priv_init(struct rte_eth_dev *dev)
1486{
1487        struct mlx5_priv *priv = dev->data->dev_private;
1488        struct mlx5_proc_priv *ppriv;
1489        size_t ppriv_size;
1490
1491        mlx5_proc_priv_uninit(dev);
1492        /*
1493         * UAR register table follows the process private structure. BlueFlame
1494         * registers for Tx queues are stored in the table.
1495         */
1496        ppriv_size =
1497                sizeof(struct mlx5_proc_priv) + priv->txqs_n * sizeof(void *);
1498        ppriv = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, ppriv_size,
1499                            RTE_CACHE_LINE_SIZE, dev->device->numa_node);
1500        if (!ppriv) {
1501                rte_errno = ENOMEM;
1502                return -rte_errno;
1503        }
1504        ppriv->uar_table_sz = priv->txqs_n;
1505        dev->process_private = ppriv;
1506        return 0;
1507}
1508
1509/**
1510 * Un-initialize process private data structure.
1511 *
1512 * @param dev
1513 *   Pointer to Ethernet device structure.
1514 */
1515void
1516mlx5_proc_priv_uninit(struct rte_eth_dev *dev)
1517{
1518        if (!dev->process_private)
1519                return;
1520        mlx5_free(dev->process_private);
1521        dev->process_private = NULL;
1522}
1523
1524/**
1525 * DPDK callback to close the device.
1526 *
1527 * Destroy all queues and objects, free memory.
1528 *
1529 * @param dev
1530 *   Pointer to Ethernet device structure.
1531 */
1532int
1533mlx5_dev_close(struct rte_eth_dev *dev)
1534{
1535        struct mlx5_priv *priv = dev->data->dev_private;
1536        unsigned int i;
1537        int ret;
1538
1539        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1540                /* Check if process_private released. */
1541                if (!dev->process_private)
1542                        return 0;
1543                mlx5_tx_uar_uninit_secondary(dev);
1544                mlx5_proc_priv_uninit(dev);
1545                rte_eth_dev_release_port(dev);
1546                return 0;
1547        }
1548        if (!priv->sh)
1549                return 0;
1550        DRV_LOG(DEBUG, "port %u closing device \"%s\"",
1551                dev->data->port_id,
1552                ((priv->sh->ctx != NULL) ?
1553                mlx5_os_get_ctx_device_name(priv->sh->ctx) : ""));
1554        /*
1555         * If default mreg copy action is removed at the stop stage,
1556         * the search will return none and nothing will be done anymore.
1557         */
1558        mlx5_flow_stop_default(dev);
1559        mlx5_traffic_disable(dev);
1560        /*
1561         * If all the flows are already flushed in the device stop stage,
1562         * then this will return directly without any action.
1563         */
1564        mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, true);
1565        mlx5_action_handle_flush(dev);
1566        mlx5_flow_meter_flush(dev, NULL);
1567        /* Prevent crashes when queues are still in use. */
1568        dev->rx_pkt_burst = removed_rx_burst;
1569        dev->tx_pkt_burst = removed_tx_burst;
1570        rte_wmb();
1571        /* Disable datapath on secondary process. */
1572        mlx5_mp_os_req_stop_rxtx(dev);
1573        /* Free the eCPRI flex parser resource. */
1574        mlx5_flex_parser_ecpri_release(dev);
1575        if (priv->rxqs != NULL) {
1576                /* XXX race condition if mlx5_rx_burst() is still running. */
1577                rte_delay_us_sleep(1000);
1578                for (i = 0; (i != priv->rxqs_n); ++i)
1579                        mlx5_rxq_release(dev, i);
1580                priv->rxqs_n = 0;
1581                priv->rxqs = NULL;
1582        }
1583        if (priv->representor) {
1584                /* Each representor has a dedicated interrupts handler */
1585                mlx5_free(dev->intr_handle);
1586                dev->intr_handle = NULL;
1587        }
1588        if (priv->txqs != NULL) {
1589                /* XXX race condition if mlx5_tx_burst() is still running. */
1590                rte_delay_us_sleep(1000);
1591                for (i = 0; (i != priv->txqs_n); ++i)
1592                        mlx5_txq_release(dev, i);
1593                priv->txqs_n = 0;
1594                priv->txqs = NULL;
1595        }
1596        mlx5_proc_priv_uninit(dev);
1597        if (priv->q_counters) {
1598                mlx5_devx_cmd_destroy(priv->q_counters);
1599                priv->q_counters = NULL;
1600        }
1601        if (priv->drop_queue.hrxq)
1602                mlx5_drop_action_destroy(dev);
1603        if (priv->mreg_cp_tbl)
1604                mlx5_hlist_destroy(priv->mreg_cp_tbl);
1605        mlx5_mprq_free_mp(dev);
1606        if (priv->sh->ct_mng)
1607                mlx5_flow_aso_ct_mng_close(priv->sh);
1608        mlx5_os_free_shared_dr(priv);
1609        if (priv->rss_conf.rss_key != NULL)
1610                mlx5_free(priv->rss_conf.rss_key);
1611        if (priv->reta_idx != NULL)
1612                mlx5_free(priv->reta_idx);
1613        if (priv->config.vf)
1614                mlx5_os_mac_addr_flush(dev);
1615        if (priv->nl_socket_route >= 0)
1616                close(priv->nl_socket_route);
1617        if (priv->nl_socket_rdma >= 0)
1618                close(priv->nl_socket_rdma);
1619        if (priv->vmwa_context)
1620                mlx5_vlan_vmwa_exit(priv->vmwa_context);
1621        ret = mlx5_hrxq_verify(dev);
1622        if (ret)
1623                DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
1624                        dev->data->port_id);
1625        ret = mlx5_ind_table_obj_verify(dev);
1626        if (ret)
1627                DRV_LOG(WARNING, "port %u some indirection table still remain",
1628                        dev->data->port_id);
1629        ret = mlx5_rxq_obj_verify(dev);
1630        if (ret)
1631                DRV_LOG(WARNING, "port %u some Rx queue objects still remain",
1632                        dev->data->port_id);
1633        ret = mlx5_rxq_verify(dev);
1634        if (ret)
1635                DRV_LOG(WARNING, "port %u some Rx queues still remain",
1636                        dev->data->port_id);
1637        ret = mlx5_txq_obj_verify(dev);
1638        if (ret)
1639                DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain",
1640                        dev->data->port_id);
1641        ret = mlx5_txq_verify(dev);
1642        if (ret)
1643                DRV_LOG(WARNING, "port %u some Tx queues still remain",
1644                        dev->data->port_id);
1645        ret = mlx5_flow_verify(dev);
1646        if (ret)
1647                DRV_LOG(WARNING, "port %u some flows still remain",
1648                        dev->data->port_id);
1649        if (priv->hrxqs)
1650                mlx5_list_destroy(priv->hrxqs);
1651        /*
1652         * Free the shared context in last turn, because the cleanup
1653         * routines above may use some shared fields, like
1654         * mlx5_os_mac_addr_flush() uses ibdev_path for retrieveing
1655         * ifindex if Netlink fails.
1656         */
1657        mlx5_free_shared_dev_ctx(priv->sh);
1658        if (priv->domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {
1659                unsigned int c = 0;
1660                uint16_t port_id;
1661
1662                MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1663                        struct mlx5_priv *opriv =
1664                                rte_eth_devices[port_id].data->dev_private;
1665
1666                        if (!opriv ||
1667                            opriv->domain_id != priv->domain_id ||
1668                            &rte_eth_devices[port_id] == dev)
1669                                continue;
1670                        ++c;
1671                        break;
1672                }
1673                if (!c)
1674                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
1675        }
1676        memset(priv, 0, sizeof(*priv));
1677        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
1678        /*
1679         * Reset mac_addrs to NULL such that it is not freed as part of
1680         * rte_eth_dev_release_port(). mac_addrs is part of dev_private so
1681         * it is freed when dev_private is freed.
1682         */
1683        dev->data->mac_addrs = NULL;
1684        return 0;
1685}
1686
1687const struct eth_dev_ops mlx5_dev_ops = {
1688        .dev_configure = mlx5_dev_configure,
1689        .dev_start = mlx5_dev_start,
1690        .dev_stop = mlx5_dev_stop,
1691        .dev_set_link_down = mlx5_set_link_down,
1692        .dev_set_link_up = mlx5_set_link_up,
1693        .dev_close = mlx5_dev_close,
1694        .promiscuous_enable = mlx5_promiscuous_enable,
1695        .promiscuous_disable = mlx5_promiscuous_disable,
1696        .allmulticast_enable = mlx5_allmulticast_enable,
1697        .allmulticast_disable = mlx5_allmulticast_disable,
1698        .link_update = mlx5_link_update,
1699        .stats_get = mlx5_stats_get,
1700        .stats_reset = mlx5_stats_reset,
1701        .xstats_get = mlx5_xstats_get,
1702        .xstats_reset = mlx5_xstats_reset,
1703        .xstats_get_names = mlx5_xstats_get_names,
1704        .fw_version_get = mlx5_fw_version_get,
1705        .dev_infos_get = mlx5_dev_infos_get,
1706        .representor_info_get = mlx5_representor_info_get,
1707        .read_clock = mlx5_txpp_read_clock,
1708        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
1709        .vlan_filter_set = mlx5_vlan_filter_set,
1710        .rx_queue_setup = mlx5_rx_queue_setup,
1711        .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
1712        .tx_queue_setup = mlx5_tx_queue_setup,
1713        .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
1714        .rx_queue_release = mlx5_rx_queue_release,
1715        .tx_queue_release = mlx5_tx_queue_release,
1716        .rx_queue_start = mlx5_rx_queue_start,
1717        .rx_queue_stop = mlx5_rx_queue_stop,
1718        .tx_queue_start = mlx5_tx_queue_start,
1719        .tx_queue_stop = mlx5_tx_queue_stop,
1720        .flow_ctrl_get = mlx5_dev_get_flow_ctrl,
1721        .flow_ctrl_set = mlx5_dev_set_flow_ctrl,
1722        .mac_addr_remove = mlx5_mac_addr_remove,
1723        .mac_addr_add = mlx5_mac_addr_add,
1724        .mac_addr_set = mlx5_mac_addr_set,
1725        .set_mc_addr_list = mlx5_set_mc_addr_list,
1726        .mtu_set = mlx5_dev_set_mtu,
1727        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
1728        .vlan_offload_set = mlx5_vlan_offload_set,
1729        .reta_update = mlx5_dev_rss_reta_update,
1730        .reta_query = mlx5_dev_rss_reta_query,
1731        .rss_hash_update = mlx5_rss_hash_update,
1732        .rss_hash_conf_get = mlx5_rss_hash_conf_get,
1733        .flow_ops_get = mlx5_flow_ops_get,
1734        .rxq_info_get = mlx5_rxq_info_get,
1735        .txq_info_get = mlx5_txq_info_get,
1736        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
1737        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
1738        .rx_queue_intr_enable = mlx5_rx_intr_enable,
1739        .rx_queue_intr_disable = mlx5_rx_intr_disable,
1740        .is_removed = mlx5_is_removed,
1741        .udp_tunnel_port_add  = mlx5_udp_tunnel_port_add,
1742        .get_module_info = mlx5_get_module_info,
1743        .get_module_eeprom = mlx5_get_module_eeprom,
1744        .hairpin_cap_get = mlx5_hairpin_cap_get,
1745        .mtr_ops_get = mlx5_flow_meter_ops_get,
1746        .hairpin_bind = mlx5_hairpin_bind,
1747        .hairpin_unbind = mlx5_hairpin_unbind,
1748        .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports,
1749        .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
1750        .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
1751        .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
1752        .get_monitor_addr = mlx5_get_monitor_addr,
1753};
1754
1755/* Available operations from secondary process. */
1756const struct eth_dev_ops mlx5_dev_sec_ops = {
1757        .stats_get = mlx5_stats_get,
1758        .stats_reset = mlx5_stats_reset,
1759        .xstats_get = mlx5_xstats_get,
1760        .xstats_reset = mlx5_xstats_reset,
1761        .xstats_get_names = mlx5_xstats_get_names,
1762        .fw_version_get = mlx5_fw_version_get,
1763        .dev_infos_get = mlx5_dev_infos_get,
1764        .representor_info_get = mlx5_representor_info_get,
1765        .read_clock = mlx5_txpp_read_clock,
1766        .rx_queue_start = mlx5_rx_queue_start,
1767        .rx_queue_stop = mlx5_rx_queue_stop,
1768        .tx_queue_start = mlx5_tx_queue_start,
1769        .tx_queue_stop = mlx5_tx_queue_stop,
1770        .rxq_info_get = mlx5_rxq_info_get,
1771        .txq_info_get = mlx5_txq_info_get,
1772        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
1773        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
1774        .get_module_info = mlx5_get_module_info,
1775        .get_module_eeprom = mlx5_get_module_eeprom,
1776};
1777
1778/* Available operations in flow isolated mode. */
1779const struct eth_dev_ops mlx5_dev_ops_isolate = {
1780        .dev_configure = mlx5_dev_configure,
1781        .dev_start = mlx5_dev_start,
1782        .dev_stop = mlx5_dev_stop,
1783        .dev_set_link_down = mlx5_set_link_down,
1784        .dev_set_link_up = mlx5_set_link_up,
1785        .dev_close = mlx5_dev_close,
1786        .promiscuous_enable = mlx5_promiscuous_enable,
1787        .promiscuous_disable = mlx5_promiscuous_disable,
1788        .allmulticast_enable = mlx5_allmulticast_enable,
1789        .allmulticast_disable = mlx5_allmulticast_disable,
1790        .link_update = mlx5_link_update,
1791        .stats_get = mlx5_stats_get,
1792        .stats_reset = mlx5_stats_reset,
1793        .xstats_get = mlx5_xstats_get,
1794        .xstats_reset = mlx5_xstats_reset,
1795        .xstats_get_names = mlx5_xstats_get_names,
1796        .fw_version_get = mlx5_fw_version_get,
1797        .dev_infos_get = mlx5_dev_infos_get,
1798        .representor_info_get = mlx5_representor_info_get,
1799        .read_clock = mlx5_txpp_read_clock,
1800        .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
1801        .vlan_filter_set = mlx5_vlan_filter_set,
1802        .rx_queue_setup = mlx5_rx_queue_setup,
1803        .rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
1804        .tx_queue_setup = mlx5_tx_queue_setup,
1805        .tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
1806        .rx_queue_release = mlx5_rx_queue_release,
1807        .tx_queue_release = mlx5_tx_queue_release,
1808        .rx_queue_start = mlx5_rx_queue_start,
1809        .rx_queue_stop = mlx5_rx_queue_stop,
1810        .tx_queue_start = mlx5_tx_queue_start,
1811        .tx_queue_stop = mlx5_tx_queue_stop,
1812        .flow_ctrl_get = mlx5_dev_get_flow_ctrl,
1813        .flow_ctrl_set = mlx5_dev_set_flow_ctrl,
1814        .mac_addr_remove = mlx5_mac_addr_remove,
1815        .mac_addr_add = mlx5_mac_addr_add,
1816        .mac_addr_set = mlx5_mac_addr_set,
1817        .set_mc_addr_list = mlx5_set_mc_addr_list,
1818        .mtu_set = mlx5_dev_set_mtu,
1819        .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
1820        .vlan_offload_set = mlx5_vlan_offload_set,
1821        .flow_ops_get = mlx5_flow_ops_get,
1822        .rxq_info_get = mlx5_rxq_info_get,
1823        .txq_info_get = mlx5_txq_info_get,
1824        .rx_burst_mode_get = mlx5_rx_burst_mode_get,
1825        .tx_burst_mode_get = mlx5_tx_burst_mode_get,
1826        .rx_queue_intr_enable = mlx5_rx_intr_enable,
1827        .rx_queue_intr_disable = mlx5_rx_intr_disable,
1828        .is_removed = mlx5_is_removed,
1829        .get_module_info = mlx5_get_module_info,
1830        .get_module_eeprom = mlx5_get_module_eeprom,
1831        .hairpin_cap_get = mlx5_hairpin_cap_get,
1832        .mtr_ops_get = mlx5_flow_meter_ops_get,
1833        .hairpin_bind = mlx5_hairpin_bind,
1834        .hairpin_unbind = mlx5_hairpin_unbind,
1835        .hairpin_get_peer_ports = mlx5_hairpin_get_peer_ports,
1836        .hairpin_queue_peer_update = mlx5_hairpin_queue_peer_update,
1837        .hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
1838        .hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
1839        .get_monitor_addr = mlx5_get_monitor_addr,
1840};
1841
1842/**
1843 * Verify and store value for device argument.
1844 *
1845 * @param[in] key
1846 *   Key argument to verify.
1847 * @param[in] val
1848 *   Value associated with key.
1849 * @param opaque
1850 *   User data.
1851 *
1852 * @return
1853 *   0 on success, a negative errno value otherwise and rte_errno is set.
1854 */
1855static int
1856mlx5_args_check(const char *key, const char *val, void *opaque)
1857{
1858        struct mlx5_dev_config *config = opaque;
1859        unsigned long mod;
1860        signed long tmp;
1861
1862        /* No-op, port representors are processed in mlx5_dev_spawn(). */
1863        if (!strcmp(MLX5_REPRESENTOR, key))
1864                return 0;
1865        errno = 0;
1866        tmp = strtol(val, NULL, 0);
1867        if (errno) {
1868                rte_errno = errno;
1869                DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val);
1870                return -rte_errno;
1871        }
1872        if (tmp < 0 && strcmp(MLX5_TX_PP, key) && strcmp(MLX5_TX_SKEW, key)) {
1873                /* Negative values are acceptable for some keys only. */
1874                rte_errno = EINVAL;
1875                DRV_LOG(WARNING, "%s: invalid negative value \"%s\"", key, val);
1876                return -rte_errno;
1877        }
1878        mod = tmp >= 0 ? tmp : -tmp;
1879        if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
1880                if (tmp > MLX5_CQE_RESP_FORMAT_L34H_STRIDX) {
1881                        DRV_LOG(ERR, "invalid CQE compression "
1882                                     "format parameter");
1883                        rte_errno = EINVAL;
1884                        return -rte_errno;
1885                }
1886                config->cqe_comp = !!tmp;
1887                config->cqe_comp_fmt = tmp;
1888        } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) {
1889                config->hw_padding = !!tmp;
1890        } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
1891                config->mprq.enabled = !!tmp;
1892        } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) {
1893                config->mprq.stride_num_n = tmp;
1894        } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_SIZE, key) == 0) {
1895                config->mprq.stride_size_n = tmp;
1896        } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) {
1897                config->mprq.max_memcpy_len = tmp;
1898        } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
1899                config->mprq.min_rxqs_num = tmp;
1900        } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
1901                DRV_LOG(WARNING, "%s: deprecated parameter,"
1902                                 " converted to txq_inline_max", key);
1903                config->txq_inline_max = tmp;
1904        } else if (strcmp(MLX5_TXQ_INLINE_MAX, key) == 0) {
1905                config->txq_inline_max = tmp;
1906        } else if (strcmp(MLX5_TXQ_INLINE_MIN, key) == 0) {
1907                config->txq_inline_min = tmp;
1908        } else if (strcmp(MLX5_TXQ_INLINE_MPW, key) == 0) {
1909                config->txq_inline_mpw = tmp;
1910        } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
1911                config->txqs_inline = tmp;
1912        } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
1913                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
1914        } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
1915                config->mps = !!tmp;
1916        } else if (strcmp(MLX5_TX_DB_NC, key) == 0) {
1917                if (tmp != MLX5_TXDB_CACHED &&
1918                    tmp != MLX5_TXDB_NCACHED &&
1919                    tmp != MLX5_TXDB_HEURISTIC) {
1920                        DRV_LOG(ERR, "invalid Tx doorbell "
1921                                     "mapping parameter");
1922                        rte_errno = EINVAL;
1923                        return -rte_errno;
1924                }
1925                config->dbnc = tmp;
1926        } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
1927                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
1928        } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
1929                DRV_LOG(WARNING, "%s: deprecated parameter,"
1930                                 " converted to txq_inline_mpw", key);
1931                config->txq_inline_mpw = tmp;
1932        } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) {
1933                DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
1934        } else if (strcmp(MLX5_TX_PP, key) == 0) {
1935                if (!mod) {
1936                        DRV_LOG(ERR, "Zero Tx packet pacing parameter");
1937                        rte_errno = EINVAL;
1938                        return -rte_errno;
1939                }
1940                config->tx_pp = tmp;
1941        } else if (strcmp(MLX5_TX_SKEW, key) == 0) {
1942                config->tx_skew = tmp;
1943        } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
1944                config->rx_vec_en = !!tmp;
1945        } else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
1946                config->l3_vxlan_en = !!tmp;
1947        } else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
1948                config->vf_nl_en = !!tmp;
1949        } else if (strcmp(MLX5_DV_ESW_EN, key) == 0) {
1950                config->dv_esw_en = !!tmp;
1951        } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
1952                config->dv_flow_en = !!tmp;
1953        } else if (strcmp(MLX5_DV_XMETA_EN, key) == 0) {
1954                if (tmp != MLX5_XMETA_MODE_LEGACY &&
1955                    tmp != MLX5_XMETA_MODE_META16 &&
1956                    tmp != MLX5_XMETA_MODE_META32 &&
1957                    tmp != MLX5_XMETA_MODE_MISS_INFO) {
1958                        DRV_LOG(ERR, "invalid extensive "
1959                                     "metadata parameter");
1960                        rte_errno = EINVAL;
1961                        return -rte_errno;
1962                }
1963                if (tmp != MLX5_XMETA_MODE_MISS_INFO)
1964                        config->dv_xmeta_en = tmp;
1965                else
1966                        config->dv_miss_info = 1;
1967        } else if (strcmp(MLX5_LACP_BY_USER, key) == 0) {
1968                config->lacp_by_user = !!tmp;
1969        } else if (strcmp(MLX5_MR_EXT_MEMSEG_EN, key) == 0) {
1970                config->mr_ext_memseg_en = !!tmp;
1971        } else if (strcmp(MLX5_MAX_DUMP_FILES_NUM, key) == 0) {
1972                config->max_dump_files_num = tmp;
1973        } else if (strcmp(MLX5_LRO_TIMEOUT_USEC, key) == 0) {
1974                config->lro.timeout = tmp;
1975        } else if (strcmp(RTE_DEVARGS_KEY_CLASS, key) == 0) {
1976                DRV_LOG(DEBUG, "class argument is %s.", val);
1977        } else if (strcmp(MLX5_HP_BUF_SIZE, key) == 0) {
1978                config->log_hp_size = tmp;
1979        } else if (strcmp(MLX5_RECLAIM_MEM, key) == 0) {
1980                if (tmp != MLX5_RCM_NONE &&
1981                    tmp != MLX5_RCM_LIGHT &&
1982                    tmp != MLX5_RCM_AGGR) {
1983                        DRV_LOG(ERR, "Unrecognize %s: \"%s\"", key, val);
1984                        rte_errno = EINVAL;
1985                        return -rte_errno;
1986                }
1987                config->reclaim_mode = tmp;
1988        } else if (strcmp(MLX5_SYS_MEM_EN, key) == 0) {
1989                config->sys_mem_en = !!tmp;
1990        } else if (strcmp(MLX5_DECAP_EN, key) == 0) {
1991                config->decap_en = !!tmp;
1992        } else if (strcmp(MLX5_ALLOW_DUPLICATE_PATTERN, key) == 0) {
1993                config->allow_duplicate_pattern = !!tmp;
1994        } else {
1995                DRV_LOG(WARNING, "%s: unknown parameter", key);
1996                rte_errno = EINVAL;
1997                return -rte_errno;
1998        }
1999        return 0;
2000}
2001
2002/**
2003 * Parse device parameters.
2004 *
2005 * @param config
2006 *   Pointer to device configuration structure.
2007 * @param devargs
2008 *   Device arguments structure.
2009 *
2010 * @return
2011 *   0 on success, a negative errno value otherwise and rte_errno is set.
2012 */
2013int
2014mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
2015{
2016        const char **params = (const char *[]){
2017                MLX5_RXQ_CQE_COMP_EN,
2018                MLX5_RXQ_PKT_PAD_EN,
2019                MLX5_RX_MPRQ_EN,
2020                MLX5_RX_MPRQ_LOG_STRIDE_NUM,
2021                MLX5_RX_MPRQ_LOG_STRIDE_SIZE,
2022                MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
2023                MLX5_RXQS_MIN_MPRQ,
2024                MLX5_TXQ_INLINE,
2025                MLX5_TXQ_INLINE_MIN,
2026                MLX5_TXQ_INLINE_MAX,
2027                MLX5_TXQ_INLINE_MPW,
2028                MLX5_TXQS_MIN_INLINE,
2029                MLX5_TXQS_MAX_VEC,
2030                MLX5_TXQ_MPW_EN,
2031                MLX5_TXQ_MPW_HDR_DSEG_EN,
2032                MLX5_TXQ_MAX_INLINE_LEN,
2033                MLX5_TX_DB_NC,
2034                MLX5_TX_PP,
2035                MLX5_TX_SKEW,
2036                MLX5_TX_VEC_EN,
2037                MLX5_RX_VEC_EN,
2038                MLX5_L3_VXLAN_EN,
2039                MLX5_VF_NL_EN,
2040                MLX5_DV_ESW_EN,
2041                MLX5_DV_FLOW_EN,
2042                MLX5_DV_XMETA_EN,
2043                MLX5_LACP_BY_USER,
2044                MLX5_MR_EXT_MEMSEG_EN,
2045                MLX5_REPRESENTOR,
2046                MLX5_MAX_DUMP_FILES_NUM,
2047                MLX5_LRO_TIMEOUT_USEC,
2048                RTE_DEVARGS_KEY_CLASS,
2049                MLX5_HP_BUF_SIZE,
2050                MLX5_RECLAIM_MEM,
2051                MLX5_SYS_MEM_EN,
2052                MLX5_DECAP_EN,
2053                MLX5_ALLOW_DUPLICATE_PATTERN,
2054                NULL,
2055        };
2056        struct rte_kvargs *kvlist;
2057        int ret = 0;
2058        int i;
2059
2060        if (devargs == NULL)
2061                return 0;
2062        /* Following UGLY cast is done to pass checkpatch. */
2063        kvlist = rte_kvargs_parse(devargs->args, params);
2064        if (kvlist == NULL) {
2065                rte_errno = EINVAL;
2066                return -rte_errno;
2067        }
2068        /* Process parameters. */
2069        for (i = 0; (params[i] != NULL); ++i) {
2070                if (rte_kvargs_count(kvlist, params[i])) {
2071                        ret = rte_kvargs_process(kvlist, params[i],
2072                                                 mlx5_args_check, config);
2073                        if (ret) {
2074                                rte_errno = EINVAL;
2075                                rte_kvargs_free(kvlist);
2076                                return -rte_errno;
2077                        }
2078                }
2079        }
2080        rte_kvargs_free(kvlist);
2081        return 0;
2082}
2083
2084/**
2085 * Configures the minimal amount of data to inline into WQE
2086 * while sending packets.
2087 *
2088 * - the txq_inline_min has the maximal priority, if this
2089 *   key is specified in devargs
2090 * - if DevX is enabled the inline mode is queried from the
2091 *   device (HCA attributes and NIC vport context if needed).
2092 * - otherwise L2 mode (18 bytes) is assumed for ConnectX-4/4 Lx
2093 *   and none (0 bytes) for other NICs
2094 *
2095 * @param spawn
2096 *   Verbs device parameters (name, port, switch_info) to spawn.
2097 * @param config
2098 *   Device configuration parameters.
2099 */
2100void
2101mlx5_set_min_inline(struct mlx5_dev_spawn_data *spawn,
2102                    struct mlx5_dev_config *config)
2103{
2104        if (config->txq_inline_min != MLX5_ARG_UNSET) {
2105                /* Application defines size of inlined data explicitly. */
2106                if (spawn->pci_dev != NULL) {
2107                        switch (spawn->pci_dev->id.device_id) {
2108                        case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
2109                        case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
2110                                if (config->txq_inline_min <
2111                                               (int)MLX5_INLINE_HSIZE_L2) {
2112                                        DRV_LOG(DEBUG,
2113                                                "txq_inline_mix aligned to minimal ConnectX-4 required value %d",
2114                                                (int)MLX5_INLINE_HSIZE_L2);
2115                                        config->txq_inline_min =
2116                                                        MLX5_INLINE_HSIZE_L2;
2117                                }
2118                                break;
2119                        }
2120                }
2121                goto exit;
2122        }
2123        if (config->hca_attr.eth_net_offloads) {
2124                /* We have DevX enabled, inline mode queried successfully. */
2125                switch (config->hca_attr.wqe_inline_mode) {
2126                case MLX5_CAP_INLINE_MODE_L2:
2127                        /* outer L2 header must be inlined. */
2128                        config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
2129                        goto exit;
2130                case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
2131                        /* No inline data are required by NIC. */
2132                        config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2133                        config->hw_vlan_insert =
2134                                config->hca_attr.wqe_vlan_insert;
2135                        DRV_LOG(DEBUG, "Tx VLAN insertion is supported");
2136                        goto exit;
2137                case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
2138                        /* inline mode is defined by NIC vport context. */
2139                        if (!config->hca_attr.eth_virt)
2140                                break;
2141                        switch (config->hca_attr.vport_inline_mode) {
2142                        case MLX5_INLINE_MODE_NONE:
2143                                config->txq_inline_min =
2144                                        MLX5_INLINE_HSIZE_NONE;
2145                                goto exit;
2146                        case MLX5_INLINE_MODE_L2:
2147                                config->txq_inline_min =
2148                                        MLX5_INLINE_HSIZE_L2;
2149                                goto exit;
2150                        case MLX5_INLINE_MODE_IP:
2151                                config->txq_inline_min =
2152                                        MLX5_INLINE_HSIZE_L3;
2153                                goto exit;
2154                        case MLX5_INLINE_MODE_TCP_UDP:
2155                                config->txq_inline_min =
2156                                        MLX5_INLINE_HSIZE_L4;
2157                                goto exit;
2158                        case MLX5_INLINE_MODE_INNER_L2:
2159                                config->txq_inline_min =
2160                                        MLX5_INLINE_HSIZE_INNER_L2;
2161                                goto exit;
2162                        case MLX5_INLINE_MODE_INNER_IP:
2163                                config->txq_inline_min =
2164                                        MLX5_INLINE_HSIZE_INNER_L3;
2165                                goto exit;
2166                        case MLX5_INLINE_MODE_INNER_TCP_UDP:
2167                                config->txq_inline_min =
2168                                        MLX5_INLINE_HSIZE_INNER_L4;
2169                                goto exit;
2170                        }
2171                }
2172        }
2173        if (spawn->pci_dev == NULL) {
2174                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2175                goto exit;
2176        }
2177        /*
2178         * We get here if we are unable to deduce
2179         * inline data size with DevX. Try PCI ID
2180         * to determine old NICs.
2181         */
2182        switch (spawn->pci_dev->id.device_id) {
2183        case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
2184        case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
2185        case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
2186        case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
2187                config->txq_inline_min = MLX5_INLINE_HSIZE_L2;
2188                config->hw_vlan_insert = 0;
2189                break;
2190        case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
2191        case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
2192        case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
2193        case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
2194                /*
2195                 * These NICs support VLAN insertion from WQE and
2196                 * report the wqe_vlan_insert flag. But there is the bug
2197                 * and PFC control may be broken, so disable feature.
2198                 */
2199                config->hw_vlan_insert = 0;
2200                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2201                break;
2202        default:
2203                config->txq_inline_min = MLX5_INLINE_HSIZE_NONE;
2204                break;
2205        }
2206exit:
2207        DRV_LOG(DEBUG, "min tx inline configured: %d", config->txq_inline_min);
2208}
2209
2210/**
2211 * Configures the metadata mask fields in the shared context.
2212 *
2213 * @param [in] dev
2214 *   Pointer to Ethernet device.
2215 */
2216void
2217mlx5_set_metadata_mask(struct rte_eth_dev *dev)
2218{
2219        struct mlx5_priv *priv = dev->data->dev_private;
2220        struct mlx5_dev_ctx_shared *sh = priv->sh;
2221        uint32_t meta, mark, reg_c0;
2222
2223        reg_c0 = ~priv->vport_meta_mask;
2224        switch (priv->config.dv_xmeta_en) {
2225        case MLX5_XMETA_MODE_LEGACY:
2226                meta = UINT32_MAX;
2227                mark = MLX5_FLOW_MARK_MASK;
2228                break;
2229        case MLX5_XMETA_MODE_META16:
2230                meta = reg_c0 >> rte_bsf32(reg_c0);
2231                mark = MLX5_FLOW_MARK_MASK;
2232                break;
2233        case MLX5_XMETA_MODE_META32:
2234                meta = UINT32_MAX;
2235                mark = (reg_c0 >> rte_bsf32(reg_c0)) & MLX5_FLOW_MARK_MASK;
2236                break;
2237        default:
2238                meta = 0;
2239                mark = 0;
2240                MLX5_ASSERT(false);
2241                break;
2242        }
2243        if (sh->dv_mark_mask && sh->dv_mark_mask != mark)
2244                DRV_LOG(WARNING, "metadata MARK mask mismatche %08X:%08X",
2245                                 sh->dv_mark_mask, mark);
2246        else
2247                sh->dv_mark_mask = mark;
2248        if (sh->dv_meta_mask && sh->dv_meta_mask != meta)
2249                DRV_LOG(WARNING, "metadata META mask mismatche %08X:%08X",
2250                                 sh->dv_meta_mask, meta);
2251        else
2252                sh->dv_meta_mask = meta;
2253        if (sh->dv_regc0_mask && sh->dv_regc0_mask != reg_c0)
2254                DRV_LOG(WARNING, "metadata reg_c0 mask mismatche %08X:%08X",
2255                                 sh->dv_meta_mask, reg_c0);
2256        else
2257                sh->dv_regc0_mask = reg_c0;
2258        DRV_LOG(DEBUG, "metadata mode %u", priv->config.dv_xmeta_en);
2259        DRV_LOG(DEBUG, "metadata MARK mask %08X", sh->dv_mark_mask);
2260        DRV_LOG(DEBUG, "metadata META mask %08X", sh->dv_meta_mask);
2261        DRV_LOG(DEBUG, "metadata reg_c0 mask %08X", sh->dv_regc0_mask);
2262}
2263
2264int
2265rte_pmd_mlx5_get_dyn_flag_names(char *names[], unsigned int n)
2266{
2267        static const char *const dynf_names[] = {
2268                RTE_PMD_MLX5_FINE_GRANULARITY_INLINE,
2269                RTE_MBUF_DYNFLAG_METADATA_NAME,
2270                RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME
2271        };
2272        unsigned int i;
2273
2274        if (n < RTE_DIM(dynf_names))
2275                return -ENOMEM;
2276        for (i = 0; i < RTE_DIM(dynf_names); i++) {
2277                if (names[i] == NULL)
2278                        return -EINVAL;
2279                strcpy(names[i], dynf_names[i]);
2280        }
2281        return RTE_DIM(dynf_names);
2282}
2283
2284/**
2285 * Comparison callback to sort device data.
2286 *
2287 * This is meant to be used with qsort().
2288 *
2289 * @param a[in]
2290 *   Pointer to pointer to first data object.
2291 * @param b[in]
2292 *   Pointer to pointer to second data object.
2293 *
2294 * @return
2295 *   0 if both objects are equal, less than 0 if the first argument is less
2296 *   than the second, greater than 0 otherwise.
2297 */
2298int
2299mlx5_dev_check_sibling_config(struct mlx5_priv *priv,
2300                              struct mlx5_dev_config *config,
2301                              struct rte_device *dpdk_dev)
2302{
2303        struct mlx5_dev_ctx_shared *sh = priv->sh;
2304        struct mlx5_dev_config *sh_conf = NULL;
2305        uint16_t port_id;
2306
2307        MLX5_ASSERT(sh);
2308        /* Nothing to compare for the single/first device. */
2309        if (sh->refcnt == 1)
2310                return 0;
2311        /* Find the device with shared context. */
2312        MLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {
2313                struct mlx5_priv *opriv =
2314                        rte_eth_devices[port_id].data->dev_private;
2315
2316                if (opriv && opriv != priv && opriv->sh == sh) {
2317                        sh_conf = &opriv->config;
2318                        break;
2319                }
2320        }
2321        if (!sh_conf)
2322                return 0;
2323        if (sh_conf->dv_flow_en ^ config->dv_flow_en) {
2324                DRV_LOG(ERR, "\"dv_flow_en\" configuration mismatch"
2325                             " for shared %s context", sh->ibdev_name);
2326                rte_errno = EINVAL;
2327                return rte_errno;
2328        }
2329        if (sh_conf->dv_xmeta_en ^ config->dv_xmeta_en) {
2330                DRV_LOG(ERR, "\"dv_xmeta_en\" configuration mismatch"
2331                             " for shared %s context", sh->ibdev_name);
2332                rte_errno = EINVAL;
2333                return rte_errno;
2334        }
2335        return 0;
2336}
2337
2338/**
2339 * Look for the ethernet device belonging to mlx5 driver.
2340 *
2341 * @param[in] port_id
2342 *   port_id to start looking for device.
2343 * @param[in] odev
2344 *   Pointer to the hint device. When device is being probed
2345 *   the its siblings (master and preceding representors might
2346 *   not have assigned driver yet (because the mlx5_os_pci_probe()
2347 *   is not completed yet, for this case match on hint
2348 *   device may be used to detect sibling device.
2349 *
2350 * @return
2351 *   port_id of found device, RTE_MAX_ETHPORT if not found.
2352 */
2353uint16_t
2354mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev)
2355{
2356        while (port_id < RTE_MAX_ETHPORTS) {
2357                struct rte_eth_dev *dev = &rte_eth_devices[port_id];
2358
2359                if (dev->state != RTE_ETH_DEV_UNUSED &&
2360                    dev->device &&
2361                    (dev->device == odev ||
2362                     (dev->device->driver &&
2363                     dev->device->driver->name &&
2364                     ((strcmp(dev->device->driver->name,
2365                              MLX5_PCI_DRIVER_NAME) == 0) ||
2366                      (strcmp(dev->device->driver->name,
2367                              MLX5_AUXILIARY_DRIVER_NAME) == 0)))))
2368                        break;
2369                port_id++;
2370        }
2371        if (port_id >= RTE_MAX_ETHPORTS)
2372                return RTE_MAX_ETHPORTS;
2373        return port_id;
2374}
2375
2376/**
2377 * Callback to remove a device.
2378 *
2379 * This function removes all Ethernet devices belong to a given device.
2380 *
2381 * @param[in] dev
2382 *   Pointer to the generic device.
2383 *
2384 * @return
2385 *   0 on success, the function cannot fail.
2386 */
2387static int
2388mlx5_net_remove(struct rte_device *dev)
2389{
2390        uint16_t port_id;
2391        int ret = 0;
2392
2393        RTE_ETH_FOREACH_DEV_OF(port_id, dev) {
2394                /*
2395                 * mlx5_dev_close() is not registered to secondary process,
2396                 * call the close function explicitly for secondary process.
2397                 */
2398                if (rte_eal_process_type() == RTE_PROC_SECONDARY)
2399                        ret |= mlx5_dev_close(&rte_eth_devices[port_id]);
2400                else
2401                        ret |= rte_eth_dev_close(port_id);
2402        }
2403        return ret == 0 ? 0 : -EIO;
2404}
2405
2406static const struct rte_pci_id mlx5_pci_id_map[] = {
2407        {
2408                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2409                               PCI_DEVICE_ID_MELLANOX_CONNECTX4)
2410        },
2411        {
2412                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2413                               PCI_DEVICE_ID_MELLANOX_CONNECTX4VF)
2414        },
2415        {
2416                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2417                               PCI_DEVICE_ID_MELLANOX_CONNECTX4LX)
2418        },
2419        {
2420                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2421                               PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)
2422        },
2423        {
2424                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2425                               PCI_DEVICE_ID_MELLANOX_CONNECTX5)
2426        },
2427        {
2428                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2429                               PCI_DEVICE_ID_MELLANOX_CONNECTX5VF)
2430        },
2431        {
2432                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2433                               PCI_DEVICE_ID_MELLANOX_CONNECTX5EX)
2434        },
2435        {
2436                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2437                               PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)
2438        },
2439        {
2440                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2441                               PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
2442        },
2443        {
2444                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2445                               PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
2446        },
2447        {
2448                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2449                                PCI_DEVICE_ID_MELLANOX_CONNECTX6)
2450        },
2451        {
2452                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2453                                PCI_DEVICE_ID_MELLANOX_CONNECTX6VF)
2454        },
2455        {
2456                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2457                                PCI_DEVICE_ID_MELLANOX_CONNECTX6DX)
2458        },
2459        {
2460                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2461                                PCI_DEVICE_ID_MELLANOX_CONNECTXVF)
2462        },
2463        {
2464                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2465                                PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF)
2466        },
2467        {
2468                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2469                                PCI_DEVICE_ID_MELLANOX_CONNECTX6LX)
2470        },
2471        {
2472                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2473                                PCI_DEVICE_ID_MELLANOX_CONNECTX7)
2474        },
2475        {
2476                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
2477                                PCI_DEVICE_ID_MELLANOX_CONNECTX7BF)
2478        },
2479        {
2480                .vendor_id = 0
2481        }
2482};
2483
2484static struct mlx5_class_driver mlx5_net_driver = {
2485        .drv_class = MLX5_CLASS_ETH,
2486        .name = RTE_STR(MLX5_ETH_DRIVER_NAME),
2487        .id_table = mlx5_pci_id_map,
2488        .probe = mlx5_os_net_probe,
2489        .remove = mlx5_net_remove,
2490        .dma_map = mlx5_net_dma_map,
2491        .dma_unmap = mlx5_net_dma_unmap,
2492        .probe_again = 1,
2493        .intr_lsc = 1,
2494        .intr_rmv = 1,
2495};
2496
2497/* Initialize driver log type. */
2498RTE_LOG_REGISTER_DEFAULT(mlx5_logtype, NOTICE)
2499
2500/**
2501 * Driver initialization routine.
2502 */
2503RTE_INIT(rte_mlx5_pmd_init)
2504{
2505        pthread_mutex_init(&mlx5_dev_ctx_list_mutex, NULL);
2506        mlx5_common_init();
2507        /* Build the static tables for Verbs conversion. */
2508        mlx5_set_ptype_table();
2509        mlx5_set_cksum_table();
2510        mlx5_set_swp_types_table();
2511        if (mlx5_glue)
2512                mlx5_class_driver_register(&mlx5_net_driver);
2513}
2514
2515RTE_PMD_EXPORT_NAME(MLX5_ETH_DRIVER_NAME, __COUNTER__);
2516RTE_PMD_REGISTER_PCI_TABLE(MLX5_ETH_DRIVER_NAME, mlx5_pci_id_map);
2517RTE_PMD_REGISTER_KMOD_DEP(MLX5_ETH_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib");
2518