dpdk/drivers/net/mlx5/mlx5_flow_aso.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright 2020 Mellanox Technologies, Ltd
   3 */
   4#include <mlx5_prm.h>
   5#include <rte_malloc.h>
   6#include <rte_cycles.h>
   7#include <rte_eal_paging.h>
   8
   9#include <mlx5_malloc.h>
  10#include <mlx5_common_os.h>
  11#include <mlx5_common_devx.h>
  12
  13#include "mlx5.h"
  14#include "mlx5_flow.h"
  15
  16/**
  17 * Free MR resources.
  18 *
  19 * @param[in] cdev
  20 *   Pointer to the mlx5 common device.
  21 * @param[in] mr
  22 *   MR to free.
  23 */
  24static void
  25mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
  26{
  27        void *addr = mr->addr;
  28
  29        cdev->mr_scache.dereg_mr_cb(mr);
  30        mlx5_free(addr);
  31        memset(mr, 0, sizeof(*mr));
  32}
  33
  34/**
  35 * Register Memory Region.
  36 *
  37 * @param[in] cdev
  38 *   Pointer to the mlx5 common device.
  39 * @param[in] length
  40 *   Size of MR buffer.
  41 * @param[in/out] mr
  42 *   Pointer to MR to create.
  43 *
  44 * @return
  45 *   0 on success, a negative errno value otherwise and rte_errno is set.
  46 */
  47static int
  48mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
  49                struct mlx5_pmd_mr *mr)
  50{
  51        int ret;
  52
  53        mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
  54                               SOCKET_ID_ANY);
  55        if (!mr->addr) {
  56                DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
  57                return -1;
  58        }
  59        ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
  60        if (ret) {
  61                DRV_LOG(ERR, "Failed to create direct Mkey.");
  62                mlx5_free(mr->addr);
  63                return -1;
  64        }
  65        return 0;
  66}
  67
  68/**
  69 * Destroy Send Queue used for ASO access.
  70 *
  71 * @param[in] sq
  72 *   ASO SQ to destroy.
  73 */
  74static void
  75mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
  76{
  77        mlx5_devx_sq_destroy(&sq->sq_obj);
  78        mlx5_devx_cq_destroy(&sq->cq.cq_obj);
  79        memset(sq, 0, sizeof(*sq));
  80}
  81
  82/**
  83 * Initialize Send Queue used for ASO access.
  84 *
  85 * @param[in] sq
  86 *   ASO SQ to initialize.
  87 */
  88static void
  89mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
  90{
  91        volatile struct mlx5_aso_wqe *restrict wqe;
  92        int i;
  93        int size = 1 << sq->log_desc_n;
  94        uint64_t addr;
  95
  96        /* All the next fields state should stay constant. */
  97        for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
  98                wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
  99                                                          (sizeof(*wqe) >> 4));
 100                wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
 101                addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
 102                                            MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
 103                wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
 104                wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
 105                wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
 106                        (0u |
 107                         (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
 108                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
 109                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
 110                         (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
 111                wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
 112        }
 113}
 114
 115/**
 116 * Initialize Send Queue used for ASO flow meter access.
 117 *
 118 * @param[in] sq
 119 *   ASO SQ to initialize.
 120 */
 121static void
 122mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
 123{
 124        volatile struct mlx5_aso_wqe *restrict wqe;
 125        int i;
 126        int size = 1 << sq->log_desc_n;
 127
 128        /* All the next fields state should stay constant. */
 129        for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
 130                wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
 131                                                          (sizeof(*wqe) >> 4));
 132                wqe->aso_cseg.operand_masks = RTE_BE32(0u |
 133                         (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
 134                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
 135                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
 136                         (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
 137                wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 138                                                         MLX5_COMP_MODE_OFFSET);
 139        }
 140}
 141
 142/*
 143 * Initialize Send Queue used for ASO connection tracking.
 144 *
 145 * @param[in] sq
 146 *   ASO SQ to initialize.
 147 */
 148static void
 149mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
 150{
 151        volatile struct mlx5_aso_wqe *restrict wqe;
 152        int i;
 153        int size = 1 << sq->log_desc_n;
 154        uint64_t addr;
 155
 156        /* All the next fields state should stay constant. */
 157        for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
 158                wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
 159                                                          (sizeof(*wqe) >> 4));
 160                /* One unique MR for the query data. */
 161                wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
 162                /* Magic number 64 represents the length of a ASO CT obj. */
 163                addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
 164                wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
 165                wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
 166                /*
 167                 * The values of operand_masks are different for modify
 168                 * and query.
 169                 * And data_mask may be different for each modification. In
 170                 * query, it could be zero and ignored.
 171                 * CQE generation is always needed, in order to decide when
 172                 * it is available to create the flow or read the data.
 173                 */
 174                wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 175                                                   MLX5_COMP_MODE_OFFSET);
 176        }
 177}
 178
 179/**
 180 * Create Send Queue used for ASO access.
 181 *
 182 * @param[in] cdev
 183 *   Pointer to the mlx5 common device.
 184 * @param[in/out] sq
 185 *   Pointer to SQ to create.
 186 * @param[in] uar
 187 *   User Access Region object.
 188 *
 189 * @return
 190 *   0 on success, a negative errno value otherwise and rte_errno is set.
 191 */
 192static int
 193mlx5_aso_sq_create(struct mlx5_common_device *cdev, struct mlx5_aso_sq *sq,
 194                   void *uar)
 195{
 196        struct mlx5_devx_cq_attr cq_attr = {
 197                .uar_page_id = mlx5_os_get_devx_uar_page_id(uar),
 198        };
 199        struct mlx5_devx_create_sq_attr sq_attr = {
 200                .user_index = 0xFFFF,
 201                .wq_attr = (struct mlx5_devx_wq_attr){
 202                        .pd = cdev->pdn,
 203                        .uar_page = mlx5_os_get_devx_uar_page_id(uar),
 204                },
 205                .ts_format =
 206                        mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
 207        };
 208        struct mlx5_devx_modify_sq_attr modify_attr = {
 209                .state = MLX5_SQC_STATE_RDY,
 210        };
 211        uint16_t log_wqbb_n;
 212        int ret;
 213
 214        if (mlx5_devx_cq_create(cdev->ctx, &sq->cq.cq_obj,
 215                                MLX5_ASO_QUEUE_LOG_DESC, &cq_attr,
 216                                SOCKET_ID_ANY))
 217                goto error;
 218        sq->cq.cq_ci = 0;
 219        sq->cq.log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
 220        sq->log_desc_n = MLX5_ASO_QUEUE_LOG_DESC;
 221        sq_attr.cqn = sq->cq.cq_obj.cq->id;
 222        /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
 223        log_wqbb_n = sq->log_desc_n + 1;
 224        ret = mlx5_devx_sq_create(cdev->ctx, &sq->sq_obj, log_wqbb_n, &sq_attr,
 225                                  SOCKET_ID_ANY);
 226        if (ret) {
 227                DRV_LOG(ERR, "Can't create SQ object.");
 228                rte_errno = ENOMEM;
 229                goto error;
 230        }
 231        ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
 232        if (ret) {
 233                DRV_LOG(ERR, "Can't change SQ state to ready.");
 234                rte_errno = ENOMEM;
 235                goto error;
 236        }
 237        sq->pi = 0;
 238        sq->head = 0;
 239        sq->tail = 0;
 240        sq->sqn = sq->sq_obj.sq->id;
 241        rte_spinlock_init(&sq->sqsl);
 242        return 0;
 243error:
 244        mlx5_aso_destroy_sq(sq);
 245        return -1;
 246}
 247
 248/**
 249 * API to create and initialize Send Queue used for ASO access.
 250 *
 251 * @param[in] sh
 252 *   Pointer to shared device context.
 253 * @param[in] aso_opc_mod
 254 *   Mode of ASO feature.
 255 *
 256 * @return
 257 *   0 on success, a negative errno value otherwise and rte_errno is set.
 258 */
 259int
 260mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
 261                    enum mlx5_access_aso_opc_mod aso_opc_mod)
 262{
 263        uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
 264        struct mlx5_common_device *cdev = sh->cdev;
 265
 266        switch (aso_opc_mod) {
 267        case ASO_OPC_MOD_FLOW_HIT:
 268                if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
 269                                    sq_desc_n, &sh->aso_age_mng->aso_sq.mr))
 270                        return -1;
 271                if (mlx5_aso_sq_create(cdev, &sh->aso_age_mng->aso_sq,
 272                                       sh->tx_uar.obj)) {
 273                        mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
 274                        return -1;
 275                }
 276                mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
 277                break;
 278        case ASO_OPC_MOD_POLICER:
 279                if (mlx5_aso_sq_create(cdev, &sh->mtrmng->pools_mng.sq,
 280                                       sh->tx_uar.obj))
 281                        return -1;
 282                mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
 283                break;
 284        case ASO_OPC_MOD_CONNECTION_TRACKING:
 285                /* 64B per object for query. */
 286                if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
 287                                    &sh->ct_mng->aso_sq.mr))
 288                        return -1;
 289                if (mlx5_aso_sq_create(cdev, &sh->ct_mng->aso_sq,
 290                                       sh->tx_uar.obj)) {
 291                        mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
 292                        return -1;
 293                }
 294                mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
 295                break;
 296        default:
 297                DRV_LOG(ERR, "Unknown ASO operation mode");
 298                return -1;
 299        }
 300        return 0;
 301}
 302
 303/**
 304 * API to destroy Send Queue used for ASO access.
 305 *
 306 * @param[in] sh
 307 *   Pointer to shared device context.
 308 * @param[in] aso_opc_mod
 309 *   Mode of ASO feature.
 310 */
 311void
 312mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
 313                      enum mlx5_access_aso_opc_mod aso_opc_mod)
 314{
 315        struct mlx5_aso_sq *sq;
 316
 317        switch (aso_opc_mod) {
 318        case ASO_OPC_MOD_FLOW_HIT:
 319                mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
 320                sq = &sh->aso_age_mng->aso_sq;
 321                break;
 322        case ASO_OPC_MOD_POLICER:
 323                sq = &sh->mtrmng->pools_mng.sq;
 324                break;
 325        case ASO_OPC_MOD_CONNECTION_TRACKING:
 326                mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
 327                sq = &sh->ct_mng->aso_sq;
 328                break;
 329        default:
 330                DRV_LOG(ERR, "Unknown ASO operation mode");
 331                return;
 332        }
 333        mlx5_aso_destroy_sq(sq);
 334}
 335
 336/**
 337 * Write a burst of WQEs to ASO SQ.
 338 *
 339 * @param[in] sh
 340 *   Pointer to shared device context.
 341 * @param[in] n
 342 *   Index of the last valid pool.
 343 *
 344 * @return
 345 *   Number of WQEs in burst.
 346 */
 347static uint16_t
 348mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
 349{
 350        struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
 351        volatile struct mlx5_aso_wqe *wqe;
 352        struct mlx5_aso_sq *sq = &mng->aso_sq;
 353        struct mlx5_aso_age_pool *pool;
 354        uint16_t size = 1 << sq->log_desc_n;
 355        uint16_t mask = size - 1;
 356        uint16_t max;
 357        uint16_t start_head = sq->head;
 358
 359        max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
 360        if (unlikely(!max))
 361                return 0;
 362        sq->elts[start_head & mask].burst_size = max;
 363        do {
 364                wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
 365                rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 366                /* Fill next WQE. */
 367                rte_rwlock_read_lock(&mng->resize_rwl);
 368                pool = mng->pools[sq->next];
 369                rte_rwlock_read_unlock(&mng->resize_rwl);
 370                sq->elts[sq->head & mask].pool = pool;
 371                wqe->general_cseg.misc =
 372                                rte_cpu_to_be_32(((struct mlx5_devx_obj *)
 373                                                 (pool->flow_hit_aso_obj))->id);
 374                wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
 375                                                         MLX5_COMP_MODE_OFFSET);
 376                wqe->general_cseg.opcode = rte_cpu_to_be_32
 377                                                (MLX5_OPCODE_ACCESS_ASO |
 378                                                 (ASO_OPC_MOD_FLOW_HIT <<
 379                                                  WQE_CSEG_OPC_MOD_OFFSET) |
 380                                                 (sq->pi <<
 381                                                  WQE_CSEG_WQE_INDEX_OFFSET));
 382                sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
 383                sq->head++;
 384                sq->next++;
 385                max--;
 386        } while (max);
 387        wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
 388                                                         MLX5_COMP_MODE_OFFSET);
 389        mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 390                           sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 391                           !sh->tx_uar.dbnc);
 392        return sq->elts[start_head & mask].burst_size;
 393}
 394
 395/**
 396 * Debug utility function. Dump contents of error CQE and WQE.
 397 *
 398 * @param[in] cqe
 399 *   Error CQE to dump.
 400 * @param[in] wqe
 401 *   Error WQE to dump.
 402 */
 403static void
 404mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
 405{
 406        int i;
 407
 408        DRV_LOG(ERR, "Error cqe:");
 409        for (i = 0; i < 16; i += 4)
 410                DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
 411                        cqe[i + 2], cqe[i + 3]);
 412        DRV_LOG(ERR, "\nError wqe:");
 413        for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
 414                DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
 415                        wqe[i + 2], wqe[i + 3]);
 416}
 417
 418/**
 419 * Handle case of error CQE.
 420 *
 421 * @param[in] sq
 422 *   ASO SQ to use.
 423 */
 424static void
 425mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
 426{
 427        struct mlx5_aso_cq *cq = &sq->cq;
 428        uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
 429        volatile struct mlx5_err_cqe *cqe =
 430                        (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
 431
 432        cq->errors++;
 433        idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
 434        mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
 435                               (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
 436}
 437
 438/**
 439 * Update ASO objects upon completion.
 440 *
 441 * @param[in] sh
 442 *   Shared device context.
 443 * @param[in] n
 444 *   Number of completed ASO objects.
 445 */
 446static void
 447mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
 448{
 449        struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
 450        struct mlx5_aso_sq *sq = &mng->aso_sq;
 451        struct mlx5_age_info *age_info;
 452        const uint16_t size = 1 << sq->log_desc_n;
 453        const uint16_t mask = size - 1;
 454        const uint64_t curr = MLX5_CURR_TIME_SEC;
 455        uint16_t expected = AGE_CANDIDATE;
 456        uint16_t i;
 457
 458        for (i = 0; i < n; ++i) {
 459                uint16_t idx = (sq->tail + i) & mask;
 460                struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
 461                uint64_t diff = curr - pool->time_of_last_age_check;
 462                uint64_t *addr = sq->mr.addr;
 463                int j;
 464
 465                addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
 466                pool->time_of_last_age_check = curr;
 467                for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
 468                        struct mlx5_aso_age_action *act = &pool->actions[j];
 469                        struct mlx5_age_param *ap = &act->age_params;
 470                        uint8_t byte;
 471                        uint8_t offset;
 472                        uint8_t *u8addr;
 473                        uint8_t hit;
 474
 475                        if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
 476                                            AGE_CANDIDATE)
 477                                continue;
 478                        byte = 63 - (j / 8);
 479                        offset = j % 8;
 480                        u8addr = (uint8_t *)addr;
 481                        hit = (u8addr[byte] >> offset) & 0x1;
 482                        if (hit) {
 483                                __atomic_store_n(&ap->sec_since_last_hit, 0,
 484                                                 __ATOMIC_RELAXED);
 485                        } else {
 486                                struct mlx5_priv *priv;
 487
 488                                __atomic_fetch_add(&ap->sec_since_last_hit,
 489                                                   diff, __ATOMIC_RELAXED);
 490                                /* If timeout passed add to aged-out list. */
 491                                if (ap->sec_since_last_hit <= ap->timeout)
 492                                        continue;
 493                                priv =
 494                                rte_eth_devices[ap->port_id].data->dev_private;
 495                                age_info = GET_PORT_AGE_INFO(priv);
 496                                rte_spinlock_lock(&age_info->aged_sl);
 497                                if (__atomic_compare_exchange_n(&ap->state,
 498                                                                &expected,
 499                                                                AGE_TMOUT,
 500                                                                false,
 501                                                               __ATOMIC_RELAXED,
 502                                                            __ATOMIC_RELAXED)) {
 503                                        LIST_INSERT_HEAD(&age_info->aged_aso,
 504                                                         act, next);
 505                                        MLX5_AGE_SET(age_info,
 506                                                     MLX5_AGE_EVENT_NEW);
 507                                }
 508                                rte_spinlock_unlock(&age_info->aged_sl);
 509                        }
 510                }
 511        }
 512        mlx5_age_event_prepare(sh);
 513}
 514
 515/**
 516 * Handle completions from WQEs sent to ASO SQ.
 517 *
 518 * @param[in] sh
 519 *   Shared device context.
 520 *
 521 * @return
 522 *   Number of CQEs handled.
 523 */
 524static uint16_t
 525mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
 526{
 527        struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
 528        struct mlx5_aso_sq *sq = &mng->aso_sq;
 529        struct mlx5_aso_cq *cq = &sq->cq;
 530        volatile struct mlx5_cqe *restrict cqe;
 531        const unsigned int cq_size = 1 << cq->log_desc_n;
 532        const unsigned int mask = cq_size - 1;
 533        uint32_t idx;
 534        uint32_t next_idx = cq->cq_ci & mask;
 535        const uint16_t max = (uint16_t)(sq->head - sq->tail);
 536        uint16_t i = 0;
 537        int ret;
 538        if (unlikely(!max))
 539                return 0;
 540        do {
 541                idx = next_idx;
 542                next_idx = (cq->cq_ci + 1) & mask;
 543                rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
 544                cqe = &cq->cq_obj.cqes[idx];
 545                ret = check_cqe(cqe, cq_size, cq->cq_ci);
 546                /*
 547                 * Be sure owner read is done before any other cookie field or
 548                 * opaque field.
 549                 */
 550                rte_io_rmb();
 551                if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
 552                        if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
 553                                break;
 554                        mlx5_aso_cqe_err_handle(sq);
 555                } else {
 556                        i += sq->elts[(sq->tail + i) & mask].burst_size;
 557                }
 558                cq->cq_ci++;
 559        } while (1);
 560        if (likely(i)) {
 561                mlx5_aso_age_action_update(sh, i);
 562                sq->tail += i;
 563                rte_io_wmb();
 564                cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 565        }
 566        return i;
 567}
 568
 569/**
 570 * Periodically read CQEs and send WQEs to ASO SQ.
 571 *
 572 * @param[in] arg
 573 *   Shared device context containing the ASO SQ.
 574 */
 575static void
 576mlx5_flow_aso_alarm(void *arg)
 577{
 578        struct mlx5_dev_ctx_shared *sh = arg;
 579        struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
 580        uint32_t us = 100u;
 581        uint16_t n;
 582
 583        rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl);
 584        n = sh->aso_age_mng->next;
 585        rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl);
 586        mlx5_aso_completion_handle(sh);
 587        if (sq->next == n) {
 588                /* End of loop: wait 1 second. */
 589                us = US_PER_S;
 590                sq->next = 0;
 591        }
 592        mlx5_aso_sq_enqueue_burst(sh, n);
 593        if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
 594                DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
 595}
 596
 597/**
 598 * API to start ASO access using ASO SQ.
 599 *
 600 * @param[in] sh
 601 *   Pointer to shared device context.
 602 *
 603 * @return
 604 *   0 on success, a negative errno value otherwise and rte_errno is set.
 605 */
 606int
 607mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
 608{
 609        if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
 610                DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
 611                return -rte_errno;
 612        }
 613        return 0;
 614}
 615
 616/**
 617 * API to stop ASO access using ASO SQ.
 618 *
 619 * @param[in] sh
 620 *   Pointer to shared device context.
 621 *
 622 * @return
 623 *   0 on success, a negative errno value otherwise and rte_errno is set.
 624 */
 625int
 626mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
 627{
 628        int retries = 1024;
 629
 630        if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
 631                return -EINVAL;
 632        rte_errno = 0;
 633        while (--retries) {
 634                rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
 635                if (rte_errno != EINPROGRESS)
 636                        break;
 637                rte_pause();
 638        }
 639        return -rte_errno;
 640}
 641
 642static uint16_t
 643mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 644                               struct mlx5_aso_sq *sq,
 645                               struct mlx5_aso_mtr *aso_mtr)
 646{
 647        volatile struct mlx5_aso_wqe *wqe = NULL;
 648        struct mlx5_flow_meter_info *fm = NULL;
 649        struct mlx5_flow_meter_profile *fmp;
 650        uint16_t size = 1 << sq->log_desc_n;
 651        uint16_t mask = size - 1;
 652        uint16_t res;
 653        uint32_t dseg_idx = 0;
 654        struct mlx5_aso_mtr_pool *pool = NULL;
 655        uint32_t param_le;
 656
 657        rte_spinlock_lock(&sq->sqsl);
 658        res = size - (uint16_t)(sq->head - sq->tail);
 659        if (unlikely(!res)) {
 660                DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
 661                rte_spinlock_unlock(&sq->sqsl);
 662                return 0;
 663        }
 664        wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
 665        rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 666        /* Fill next WQE. */
 667        fm = &aso_mtr->fm;
 668        sq->elts[sq->head & mask].mtr = aso_mtr;
 669        pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
 670                        mtrs[aso_mtr->offset]);
 671        wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
 672                        (aso_mtr->offset >> 1));
 673        wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
 674                        (ASO_OPC_MOD_POLICER <<
 675                        WQE_CSEG_OPC_MOD_OFFSET) |
 676                        sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
 677        /* There are 2 meters in one ASO cache line. */
 678        dseg_idx = aso_mtr->offset & 0x1;
 679        wqe->aso_cseg.data_mask =
 680                RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
 681        if (fm->is_enable) {
 682                wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
 683                        fm->profile->srtcm_prm.cbs_cir;
 684                wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
 685                        fm->profile->srtcm_prm.ebs_eir;
 686        } else {
 687                wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
 688                        RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
 689                wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
 690        }
 691        fmp = fm->profile;
 692        param_le = (1 << ASO_DSEG_VALID_OFFSET);
 693        if (fm->color_aware)
 694                param_le |= (MLX5_FLOW_COLOR_UNDEFINED << ASO_DSEG_SC_OFFSET);
 695        else
 696                param_le |= (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET);
 697        if (fmp->profile.packet_mode)
 698                param_le |= (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE);
 699        wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = RTE_BE32(param_le);
 700        switch (fmp->profile.alg) {
 701        case RTE_MTR_SRTCM_RFC2697:
 702                /* Only needed for RFC2697. */
 703                if (fm->profile->srtcm_prm.ebs_eir)
 704                        wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
 705                                        RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
 706                break;
 707        case RTE_MTR_TRTCM_RFC2698:
 708                wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
 709                                RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
 710                break;
 711        case RTE_MTR_TRTCM_RFC4115:
 712                wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
 713                                RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
 714                break;
 715        default:
 716                break;
 717        }
 718        /*
 719         * Note:
 720         * Due to software performance reason, the token fields will not be
 721         * set when posting the WQE to ASO SQ. It will be filled by the HW
 722         * automatically.
 723         */
 724        sq->head++;
 725        sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
 726        mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 727                           sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 728                           !sh->tx_uar.dbnc);
 729        rte_spinlock_unlock(&sq->sqsl);
 730        return 1;
 731}
 732
 733static void
 734mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
 735{
 736        uint16_t size = 1 << sq->log_desc_n;
 737        uint16_t mask = size - 1;
 738        uint16_t i;
 739        struct mlx5_aso_mtr *aso_mtr = NULL;
 740        uint8_t exp_state = ASO_METER_WAIT;
 741
 742        for (i = 0; i < aso_mtrs_nums; ++i) {
 743                aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
 744                MLX5_ASSERT(aso_mtr);
 745                (void)__atomic_compare_exchange_n(&aso_mtr->state,
 746                                &exp_state, ASO_METER_READY,
 747                                false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
 748        }
 749}
 750
 751static void
 752mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
 753{
 754        struct mlx5_aso_cq *cq = &sq->cq;
 755        volatile struct mlx5_cqe *restrict cqe;
 756        const unsigned int cq_size = 1 << cq->log_desc_n;
 757        const unsigned int mask = cq_size - 1;
 758        uint32_t idx;
 759        uint32_t next_idx = cq->cq_ci & mask;
 760        uint16_t max;
 761        uint16_t n = 0;
 762        int ret;
 763
 764        rte_spinlock_lock(&sq->sqsl);
 765        max = (uint16_t)(sq->head - sq->tail);
 766        if (unlikely(!max)) {
 767                rte_spinlock_unlock(&sq->sqsl);
 768                return;
 769        }
 770        do {
 771                idx = next_idx;
 772                next_idx = (cq->cq_ci + 1) & mask;
 773                rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
 774                cqe = &cq->cq_obj.cqes[idx];
 775                ret = check_cqe(cqe, cq_size, cq->cq_ci);
 776                /*
 777                 * Be sure owner read is done before any other cookie field or
 778                 * opaque field.
 779                 */
 780                rte_io_rmb();
 781                if (ret != MLX5_CQE_STATUS_SW_OWN) {
 782                        if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
 783                                break;
 784                        mlx5_aso_cqe_err_handle(sq);
 785                } else {
 786                        n++;
 787                }
 788                cq->cq_ci++;
 789        } while (1);
 790        if (likely(n)) {
 791                mlx5_aso_mtrs_status_update(sq, n);
 792                sq->tail += n;
 793                rte_io_wmb();
 794                cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
 795        }
 796        rte_spinlock_unlock(&sq->sqsl);
 797}
 798
 799/**
 800 * Update meter parameter by send WQE.
 801 *
 802 * @param[in] dev
 803 *   Pointer to Ethernet device.
 804 * @param[in] priv
 805 *   Pointer to mlx5 private data structure.
 806 * @param[in] fm
 807 *   Pointer to flow meter to be modified.
 808 *
 809 * @return
 810 *   0 on success, a negative errno value otherwise and rte_errno is set.
 811 */
 812int
 813mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
 814                        struct mlx5_aso_mtr *mtr)
 815{
 816        struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
 817        uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
 818
 819        do {
 820                mlx5_aso_mtr_completion_handle(sq);
 821                if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr))
 822                        return 0;
 823                /* Waiting for wqe resource. */
 824                rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
 825        } while (--poll_wqe_times);
 826        DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
 827                        mtr->offset);
 828        return -1;
 829}
 830
 831/**
 832 * Wait for meter to be ready.
 833 *
 834 * @param[in] dev
 835 *   Pointer to Ethernet device.
 836 * @param[in] priv
 837 *   Pointer to mlx5 private data structure.
 838 * @param[in] fm
 839 *   Pointer to flow meter to be modified.
 840 *
 841 * @return
 842 *   0 on success, a negative errno value otherwise and rte_errno is set.
 843 */
 844int
 845mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
 846                        struct mlx5_aso_mtr *mtr)
 847{
 848        struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
 849        uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
 850
 851        if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
 852                                            ASO_METER_READY)
 853                return 0;
 854        do {
 855                mlx5_aso_mtr_completion_handle(sq);
 856                if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
 857                                            ASO_METER_READY)
 858                        return 0;
 859                /* Waiting for CQE ready. */
 860                rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
 861        } while (--poll_cqe_times);
 862        DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
 863                        mtr->offset);
 864        return -1;
 865}
 866
 867/*
 868 * Post a WQE to the ASO CT SQ to modify the context.
 869 *
 870 * @param[in] sh
 871 *   Pointer to shared device context.
 872 * @param[in] ct
 873 *   Pointer to the generic CT structure related to the context.
 874 * @param[in] profile
 875 *   Pointer to configuration profile.
 876 *
 877 * @return
 878 *   1 on success (WQE number), 0 on failure.
 879 */
 880static uint16_t
 881mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
 882                              struct mlx5_aso_ct_action *ct,
 883                              const struct rte_flow_action_conntrack *profile)
 884{
 885        volatile struct mlx5_aso_wqe *wqe = NULL;
 886        struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
 887        uint16_t size = 1 << sq->log_desc_n;
 888        uint16_t mask = size - 1;
 889        uint16_t res;
 890        struct mlx5_aso_ct_pool *pool;
 891        void *desg;
 892        void *orig_dir;
 893        void *reply_dir;
 894
 895        rte_spinlock_lock(&sq->sqsl);
 896        /* Prevent other threads to update the index. */
 897        res = size - (uint16_t)(sq->head - sq->tail);
 898        if (unlikely(!res)) {
 899                rte_spinlock_unlock(&sq->sqsl);
 900                DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
 901                return 0;
 902        }
 903        wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
 904        rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
 905        /* Fill next WQE. */
 906        MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
 907        sq->elts[sq->head & mask].ct = ct;
 908        sq->elts[sq->head & mask].query_data = NULL;
 909        pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
 910        /* Each WQE will have a single CT object. */
 911        wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
 912                                                  ct->offset);
 913        wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
 914                        (ASO_OPC_MOD_CONNECTION_TRACKING <<
 915                         WQE_CSEG_OPC_MOD_OFFSET) |
 916                        sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
 917        wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
 918                        (0u |
 919                         (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
 920                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
 921                         (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
 922                         (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
 923        wqe->aso_cseg.data_mask = UINT64_MAX;
 924        /* To make compiler happy. */
 925        desg = (void *)(uintptr_t)wqe->aso_dseg.data;
 926        MLX5_SET(conn_track_aso, desg, valid, 1);
 927        MLX5_SET(conn_track_aso, desg, state, profile->state);
 928        MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
 929        MLX5_SET(conn_track_aso, desg, connection_assured,
 930                 profile->live_connection);
 931        MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
 932        MLX5_SET(conn_track_aso, desg, challenged_acked,
 933                 profile->challenge_ack_passed);
 934        /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
 935        MLX5_SET(conn_track_aso, desg, heartbeat, 0);
 936        MLX5_SET(conn_track_aso, desg, max_ack_window,
 937                 profile->max_ack_window);
 938        MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
 939        MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
 940        MLX5_SET(conn_track_aso, desg, retranmission_limit,
 941                 profile->retransmission_limit);
 942        MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
 943                 profile->reply_dir.scale);
 944        MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
 945                 profile->reply_dir.close_initiated);
 946        /* Both directions will use the same liberal mode. */
 947        MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
 948                 profile->liberal_mode);
 949        MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
 950                 profile->reply_dir.data_unacked);
 951        MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
 952                 profile->reply_dir.last_ack_seen);
 953        MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
 954                 profile->original_dir.scale);
 955        MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
 956                 profile->original_dir.close_initiated);
 957        MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
 958                 profile->liberal_mode);
 959        MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
 960                 profile->original_dir.data_unacked);
 961        MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
 962                 profile->original_dir.last_ack_seen);
 963        MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
 964        MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
 965        MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
 966        MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
 967        MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
 968        MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
 969        orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
 970        MLX5_SET(tcp_window_params, orig_dir, sent_end,
 971                 profile->original_dir.sent_end);
 972        MLX5_SET(tcp_window_params, orig_dir, reply_end,
 973                 profile->original_dir.reply_end);
 974        MLX5_SET(tcp_window_params, orig_dir, max_win,
 975                 profile->original_dir.max_win);
 976        MLX5_SET(tcp_window_params, orig_dir, max_ack,
 977                 profile->original_dir.max_ack);
 978        reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
 979        MLX5_SET(tcp_window_params, reply_dir, sent_end,
 980                 profile->reply_dir.sent_end);
 981        MLX5_SET(tcp_window_params, reply_dir, reply_end,
 982                 profile->reply_dir.reply_end);
 983        MLX5_SET(tcp_window_params, reply_dir, max_win,
 984                 profile->reply_dir.max_win);
 985        MLX5_SET(tcp_window_params, reply_dir, max_ack,
 986                 profile->reply_dir.max_ack);
 987        sq->head++;
 988        sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
 989        mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
 990                           sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
 991                           !sh->tx_uar.dbnc);
 992        rte_spinlock_unlock(&sq->sqsl);
 993        return 1;
 994}
 995
 996/*
 997 * Update the status field of CTs to indicate ready to be used by flows.
 998 * A continuous number of CTs since last update.
 999 *
1000 * @param[in] sq
1001 *   Pointer to ASO CT SQ.
1002 * @param[in] num
1003 *   Number of CT structures to be updated.
1004 *
1005 * @return
1006 *   0 on success, a negative value.
1007 */
1008static void
1009mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1010{
1011        uint16_t size = 1 << sq->log_desc_n;
1012        uint16_t mask = size - 1;
1013        uint16_t i;
1014        struct mlx5_aso_ct_action *ct = NULL;
1015        uint16_t idx;
1016
1017        for (i = 0; i < num; i++) {
1018                idx = (uint16_t)((sq->tail + i) & mask);
1019                ct = sq->elts[idx].ct;
1020                MLX5_ASSERT(ct);
1021                MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1022                if (sq->elts[idx].query_data)
1023                        rte_memcpy(sq->elts[idx].query_data,
1024                                   (char *)((uintptr_t)sq->mr.addr + idx * 64),
1025                                   64);
1026        }
1027}
1028
1029/*
1030 * Post a WQE to the ASO CT SQ to query the current context.
1031 *
1032 * @param[in] sh
1033 *   Pointer to shared device context.
1034 * @param[in] ct
1035 *   Pointer to the generic CT structure related to the context.
1036 * @param[in] data
1037 *   Pointer to data area to be filled.
1038 *
1039 * @return
1040 *   1 on success (WQE number), 0 on failure.
1041 */
1042static int
1043mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
1044                            struct mlx5_aso_ct_action *ct, char *data)
1045{
1046        volatile struct mlx5_aso_wqe *wqe = NULL;
1047        struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq;
1048        uint16_t size = 1 << sq->log_desc_n;
1049        uint16_t mask = size - 1;
1050        uint16_t res;
1051        uint16_t wqe_idx;
1052        struct mlx5_aso_ct_pool *pool;
1053        enum mlx5_aso_ct_state state =
1054                                __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1055
1056        if (state == ASO_CONNTRACK_FREE) {
1057                DRV_LOG(ERR, "Fail: No context to query");
1058                return -1;
1059        } else if (state == ASO_CONNTRACK_WAIT) {
1060                return 0;
1061        }
1062        rte_spinlock_lock(&sq->sqsl);
1063        res = size - (uint16_t)(sq->head - sq->tail);
1064        if (unlikely(!res)) {
1065                rte_spinlock_unlock(&sq->sqsl);
1066                DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1067                return 0;
1068        }
1069        MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1070        wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1071        /* Confirm the location and address of the prefetch instruction. */
1072        rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1073        /* Fill next WQE. */
1074        wqe_idx = sq->head & mask;
1075        sq->elts[wqe_idx].ct = ct;
1076        sq->elts[wqe_idx].query_data = data;
1077        pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1078        /* Each WQE will have a single CT object. */
1079        wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1080                                                  ct->offset);
1081        wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1082                        (ASO_OPC_MOD_CONNECTION_TRACKING <<
1083                         WQE_CSEG_OPC_MOD_OFFSET) |
1084                        sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1085        /*
1086         * There is no write request is required.
1087         * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1088         * "BYTEWISE_64BYTE" is needed for a whole context.
1089         * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1090         * "data_mask" is ignored.
1091         * Buffer address was already filled during initialization.
1092         */
1093        wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1094                                        ASO_CSEG_DATA_MASK_MODE_OFFSET);
1095        wqe->aso_cseg.data_mask = 0;
1096        sq->head++;
1097        /*
1098         * Each WQE contains 2 WQEBB's, even though
1099         * data segment is not used in this case.
1100         */
1101        sq->pi += 2;
1102        mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1103                           sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1104                           !sh->tx_uar.dbnc);
1105        rte_spinlock_unlock(&sq->sqsl);
1106        return 1;
1107}
1108
1109/*
1110 * Handle completions from WQEs sent to ASO CT.
1111 *
1112 * @param[in] mng
1113 *   Pointer to the CT pools management structure.
1114 */
1115static void
1116mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1117{
1118        struct mlx5_aso_sq *sq = &mng->aso_sq;
1119        struct mlx5_aso_cq *cq = &sq->cq;
1120        volatile struct mlx5_cqe *restrict cqe;
1121        const uint32_t cq_size = 1 << cq->log_desc_n;
1122        const uint32_t mask = cq_size - 1;
1123        uint32_t idx;
1124        uint32_t next_idx;
1125        uint16_t max;
1126        uint16_t n = 0;
1127        int ret;
1128
1129        rte_spinlock_lock(&sq->sqsl);
1130        max = (uint16_t)(sq->head - sq->tail);
1131        if (unlikely(!max)) {
1132                rte_spinlock_unlock(&sq->sqsl);
1133                return;
1134        }
1135        next_idx = cq->cq_ci & mask;
1136        do {
1137                idx = next_idx;
1138                next_idx = (cq->cq_ci + 1) & mask;
1139                /* Need to confirm the position of the prefetch. */
1140                rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1141                cqe = &cq->cq_obj.cqes[idx];
1142                ret = check_cqe(cqe, cq_size, cq->cq_ci);
1143                /*
1144                 * Be sure owner read is done before any other cookie field or
1145                 * opaque field.
1146                 */
1147                rte_io_rmb();
1148                if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1149                        if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1150                                break;
1151                        mlx5_aso_cqe_err_handle(sq);
1152                } else {
1153                        n++;
1154                }
1155                cq->cq_ci++;
1156        } while (1);
1157        if (likely(n)) {
1158                mlx5_aso_ct_status_update(sq, n);
1159                sq->tail += n;
1160                rte_io_wmb();
1161                cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1162        }
1163        rte_spinlock_unlock(&sq->sqsl);
1164}
1165
1166/*
1167 * Update connection tracking ASO context by sending WQE.
1168 *
1169 * @param[in] sh
1170 *   Pointer to mlx5_dev_ctx_shared object.
1171 * @param[in] ct
1172 *   Pointer to connection tracking offload object.
1173 * @param[in] profile
1174 *   Pointer to connection tracking TCP parameter.
1175 *
1176 * @return
1177 *   0 on success, -1 on failure.
1178 */
1179int
1180mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1181                          struct mlx5_aso_ct_action *ct,
1182                          const struct rte_flow_action_conntrack *profile)
1183{
1184        uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1185        struct mlx5_aso_ct_pool *pool;
1186
1187        MLX5_ASSERT(ct);
1188        do {
1189                mlx5_aso_ct_completion_handle(sh->ct_mng);
1190                if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile))
1191                        return 0;
1192                /* Waiting for wqe resource. */
1193                rte_delay_us_sleep(10u);
1194        } while (--poll_wqe_times);
1195        pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1196        DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1197                ct->offset, pool->index);
1198        return -1;
1199}
1200
1201/*
1202 * The routine is used to wait for WQE completion to continue with queried data.
1203 *
1204 * @param[in] sh
1205 *   Pointer to mlx5_dev_ctx_shared object.
1206 * @param[in] ct
1207 *   Pointer to connection tracking offload object.
1208 *
1209 * @return
1210 *   0 on success, -1 on failure.
1211 */
1212int
1213mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1214                       struct mlx5_aso_ct_action *ct)
1215{
1216        struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1217        uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1218        struct mlx5_aso_ct_pool *pool;
1219
1220        if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1221            ASO_CONNTRACK_READY)
1222                return 0;
1223        do {
1224                mlx5_aso_ct_completion_handle(mng);
1225                if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1226                    ASO_CONNTRACK_READY)
1227                        return 0;
1228                /* Waiting for CQE ready, consider should block or sleep. */
1229                rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1230        } while (--poll_cqe_times);
1231        pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1232        DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1233                ct->offset, pool->index);
1234        return -1;
1235}
1236
1237/*
1238 * Convert the hardware conntrack data format into the profile.
1239 *
1240 * @param[in] profile
1241 *   Pointer to conntrack profile to be filled after query.
1242 * @param[in] wdata
1243 *   Pointer to data fetched from hardware.
1244 */
1245static inline void
1246mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1247                        char *wdata)
1248{
1249        void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1250        void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1251
1252        /* MLX5_GET16 should be taken into consideration. */
1253        profile->state = (enum rte_flow_conntrack_state)
1254                         MLX5_GET(conn_track_aso, wdata, state);
1255        profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1256        profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1257                                          sack_permitted);
1258        profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1259                                            connection_assured);
1260        profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1261                                                 challenged_acked);
1262        profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1263                                           max_ack_window);
1264        profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1265                                                 retranmission_limit);
1266        profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1267        profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1268        profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1269                              MLX5_GET(conn_track_aso, wdata, last_index);
1270        profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1271        profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1272        profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1273        profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1274                                reply_direction_tcp_liberal_enabled) |
1275                                MLX5_GET(conn_track_aso, wdata,
1276                                original_direction_tcp_liberal_enabled);
1277        /* No liberal in the RTE structure profile. */
1278        profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1279                                            reply_direction_tcp_scale);
1280        profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1281                                        reply_direction_tcp_close_initiated);
1282        profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1283                                        reply_direction_tcp_data_unacked);
1284        profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1285                                        reply_direction_tcp_max_ack);
1286        profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1287                                               r_dir, sent_end);
1288        profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1289                                                r_dir, reply_end);
1290        profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1291                                              r_dir, max_win);
1292        profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1293                                              r_dir, max_ack);
1294        profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1295                                               original_direction_tcp_scale);
1296        profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1297                                        original_direction_tcp_close_initiated);
1298        profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1299                                        original_direction_tcp_data_unacked);
1300        profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1301                                        original_direction_tcp_max_ack);
1302        profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1303                                                  o_dir, sent_end);
1304        profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1305                                                   o_dir, reply_end);
1306        profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1307                                                 o_dir, max_win);
1308        profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1309                                                 o_dir, max_ack);
1310}
1311
1312/*
1313 * Query connection tracking information parameter by send WQE.
1314 *
1315 * @param[in] dev
1316 *   Pointer to Ethernet device.
1317 * @param[in] ct
1318 *   Pointer to connection tracking offload object.
1319 * @param[out] profile
1320 *   Pointer to connection tracking TCP information.
1321 *
1322 * @return
1323 *   0 on success, -1 on failure.
1324 */
1325int
1326mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1327                         struct mlx5_aso_ct_action *ct,
1328                         struct rte_flow_action_conntrack *profile)
1329{
1330        uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1331        struct mlx5_aso_ct_pool *pool;
1332        char out_data[64 * 2];
1333        int ret;
1334
1335        MLX5_ASSERT(ct);
1336        do {
1337                mlx5_aso_ct_completion_handle(sh->ct_mng);
1338                ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data);
1339                if (ret < 0)
1340                        return ret;
1341                else if (ret > 0)
1342                        goto data_handle;
1343                /* Waiting for wqe resource or state. */
1344                else
1345                        rte_delay_us_sleep(10u);
1346        } while (--poll_wqe_times);
1347        pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1348        DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1349                ct->offset, pool->index);
1350        return -1;
1351data_handle:
1352        ret = mlx5_aso_ct_wait_ready(sh, ct);
1353        if (!ret)
1354                mlx5_aso_ct_obj_analyze(profile, out_data);
1355        return ret;
1356}
1357
1358/*
1359 * Make sure the conntrack context is synchronized with hardware before
1360 * creating a flow rule that uses it.
1361 *
1362 * @param[in] sh
1363 *   Pointer to shared device context.
1364 * @param[in] ct
1365 *   Pointer to connection tracking offload object.
1366 *
1367 * @return
1368 *   0 on success, a negative errno value otherwise and rte_errno is set.
1369 */
1370int
1371mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1372                      struct mlx5_aso_ct_action *ct)
1373{
1374        struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1375        uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1376        enum mlx5_aso_ct_state state =
1377                                __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1378
1379        if (state == ASO_CONNTRACK_FREE) {
1380                rte_errno = ENXIO;
1381                return -rte_errno;
1382        } else if (state == ASO_CONNTRACK_READY ||
1383                   state == ASO_CONNTRACK_QUERY) {
1384                return 0;
1385        }
1386        do {
1387                mlx5_aso_ct_completion_handle(mng);
1388                state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1389                if (state == ASO_CONNTRACK_READY ||
1390                    state == ASO_CONNTRACK_QUERY)
1391                        return 0;
1392                /* Waiting for CQE ready, consider should block or sleep. */
1393                rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1394        } while (--poll_cqe_times);
1395        rte_errno = EBUSY;
1396        return -rte_errno;
1397}
1398