dpdk/drivers/baseband/acc100/rte_acc100_pmd.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2020 Intel Corporation
   3 */
   4
   5#include <unistd.h>
   6
   7#include <rte_common.h>
   8#include <rte_log.h>
   9#include <rte_dev.h>
  10#include <rte_malloc.h>
  11#include <rte_mempool.h>
  12#include <rte_byteorder.h>
  13#include <rte_errno.h>
  14#include <rte_branch_prediction.h>
  15#include <rte_hexdump.h>
  16#include <rte_pci.h>
  17#include <rte_bus_pci.h>
  18#ifdef RTE_BBDEV_OFFLOAD_COST
  19#include <rte_cycles.h>
  20#endif
  21
  22#include <rte_bbdev.h>
  23#include <rte_bbdev_pmd.h>
  24#include "rte_acc100_pmd.h"
  25
  26#ifdef RTE_LIBRTE_BBDEV_DEBUG
  27RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
  28#else
  29RTE_LOG_REGISTER_DEFAULT(acc100_logtype, NOTICE);
  30#endif
  31
  32/* Write to MMIO register address */
  33static inline void
  34mmio_write(void *addr, uint32_t value)
  35{
  36        *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value);
  37}
  38
  39/* Write a register of a ACC100 device */
  40static inline void
  41acc100_reg_write(struct acc100_device *d, uint32_t offset, uint32_t value)
  42{
  43        void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
  44        mmio_write(reg_addr, value);
  45        usleep(ACC100_LONG_WAIT);
  46}
  47
  48/* Read a register of a ACC100 device */
  49static inline uint32_t
  50acc100_reg_read(struct acc100_device *d, uint32_t offset)
  51{
  52
  53        void *reg_addr = RTE_PTR_ADD(d->mmio_base, offset);
  54        uint32_t ret = *((volatile uint32_t *)(reg_addr));
  55        return rte_le_to_cpu_32(ret);
  56}
  57
  58/* Basic Implementation of Log2 for exact 2^N */
  59static inline uint32_t
  60log2_basic(uint32_t value)
  61{
  62        return (value == 0) ? 0 : rte_bsf32(value);
  63}
  64
  65/* Calculate memory alignment offset assuming alignment is 2^N */
  66static inline uint32_t
  67calc_mem_alignment_offset(void *unaligned_virt_mem, uint32_t alignment)
  68{
  69        rte_iova_t unaligned_phy_mem = rte_malloc_virt2iova(unaligned_virt_mem);
  70        return (uint32_t)(alignment -
  71                        (unaligned_phy_mem & (alignment-1)));
  72}
  73
  74/* Calculate the offset of the enqueue register */
  75static inline uint32_t
  76queue_offset(bool pf_device, uint8_t vf_id, uint8_t qgrp_id, uint16_t aq_id)
  77{
  78        if (pf_device)
  79                return ((vf_id << 12) + (qgrp_id << 7) + (aq_id << 3) +
  80                                HWPfQmgrIngressAq);
  81        else
  82                return ((qgrp_id << 7) + (aq_id << 3) +
  83                                HWVfQmgrIngressAq);
  84}
  85
  86enum {UL_4G = 0, UL_5G, DL_4G, DL_5G, NUM_ACC};
  87
  88/* Return the accelerator enum for a Queue Group Index */
  89static inline int
  90accFromQgid(int qg_idx, const struct rte_acc100_conf *acc100_conf)
  91{
  92        int accQg[ACC100_NUM_QGRPS];
  93        int NumQGroupsPerFn[NUM_ACC];
  94        int acc, qgIdx, qgIndex = 0;
  95        for (qgIdx = 0; qgIdx < ACC100_NUM_QGRPS; qgIdx++)
  96                accQg[qgIdx] = 0;
  97        NumQGroupsPerFn[UL_4G] = acc100_conf->q_ul_4g.num_qgroups;
  98        NumQGroupsPerFn[UL_5G] = acc100_conf->q_ul_5g.num_qgroups;
  99        NumQGroupsPerFn[DL_4G] = acc100_conf->q_dl_4g.num_qgroups;
 100        NumQGroupsPerFn[DL_5G] = acc100_conf->q_dl_5g.num_qgroups;
 101        for (acc = UL_4G;  acc < NUM_ACC; acc++)
 102                for (qgIdx = 0; qgIdx < NumQGroupsPerFn[acc]; qgIdx++)
 103                        accQg[qgIndex++] = acc;
 104        acc = accQg[qg_idx];
 105        return acc;
 106}
 107
 108/* Return the queue topology for a Queue Group Index */
 109static inline void
 110qtopFromAcc(struct rte_acc100_queue_topology **qtop, int acc_enum,
 111                struct rte_acc100_conf *acc100_conf)
 112{
 113        struct rte_acc100_queue_topology *p_qtop;
 114        p_qtop = NULL;
 115        switch (acc_enum) {
 116        case UL_4G:
 117                p_qtop = &(acc100_conf->q_ul_4g);
 118                break;
 119        case UL_5G:
 120                p_qtop = &(acc100_conf->q_ul_5g);
 121                break;
 122        case DL_4G:
 123                p_qtop = &(acc100_conf->q_dl_4g);
 124                break;
 125        case DL_5G:
 126                p_qtop = &(acc100_conf->q_dl_5g);
 127                break;
 128        default:
 129                /* NOTREACHED */
 130                rte_bbdev_log(ERR, "Unexpected error evaluating qtopFromAcc");
 131                break;
 132        }
 133        *qtop = p_qtop;
 134}
 135
 136/* Return the AQ depth for a Queue Group Index */
 137static inline int
 138aqDepth(int qg_idx, struct rte_acc100_conf *acc100_conf)
 139{
 140        struct rte_acc100_queue_topology *q_top = NULL;
 141        int acc_enum = accFromQgid(qg_idx, acc100_conf);
 142        qtopFromAcc(&q_top, acc_enum, acc100_conf);
 143        if (unlikely(q_top == NULL))
 144                return 0;
 145        return q_top->aq_depth_log2;
 146}
 147
 148/* Return the AQ depth for a Queue Group Index */
 149static inline int
 150aqNum(int qg_idx, struct rte_acc100_conf *acc100_conf)
 151{
 152        struct rte_acc100_queue_topology *q_top = NULL;
 153        int acc_enum = accFromQgid(qg_idx, acc100_conf);
 154        qtopFromAcc(&q_top, acc_enum, acc100_conf);
 155        if (unlikely(q_top == NULL))
 156                return 0;
 157        return q_top->num_aqs_per_groups;
 158}
 159
 160static void
 161initQTop(struct rte_acc100_conf *acc100_conf)
 162{
 163        acc100_conf->q_ul_4g.num_aqs_per_groups = 0;
 164        acc100_conf->q_ul_4g.num_qgroups = 0;
 165        acc100_conf->q_ul_4g.first_qgroup_index = -1;
 166        acc100_conf->q_ul_5g.num_aqs_per_groups = 0;
 167        acc100_conf->q_ul_5g.num_qgroups = 0;
 168        acc100_conf->q_ul_5g.first_qgroup_index = -1;
 169        acc100_conf->q_dl_4g.num_aqs_per_groups = 0;
 170        acc100_conf->q_dl_4g.num_qgroups = 0;
 171        acc100_conf->q_dl_4g.first_qgroup_index = -1;
 172        acc100_conf->q_dl_5g.num_aqs_per_groups = 0;
 173        acc100_conf->q_dl_5g.num_qgroups = 0;
 174        acc100_conf->q_dl_5g.first_qgroup_index = -1;
 175}
 176
 177static inline void
 178updateQtop(uint8_t acc, uint8_t qg, struct rte_acc100_conf *acc100_conf,
 179                struct acc100_device *d) {
 180        uint32_t reg;
 181        struct rte_acc100_queue_topology *q_top = NULL;
 182        qtopFromAcc(&q_top, acc, acc100_conf);
 183        if (unlikely(q_top == NULL))
 184                return;
 185        uint16_t aq;
 186        q_top->num_qgroups++;
 187        if (q_top->first_qgroup_index == -1) {
 188                q_top->first_qgroup_index = qg;
 189                /* Can be optimized to assume all are enabled by default */
 190                reg = acc100_reg_read(d, queue_offset(d->pf_device,
 191                                0, qg, ACC100_NUM_AQS - 1));
 192                if (reg & ACC100_QUEUE_ENABLE) {
 193                        q_top->num_aqs_per_groups = ACC100_NUM_AQS;
 194                        return;
 195                }
 196                q_top->num_aqs_per_groups = 0;
 197                for (aq = 0; aq < ACC100_NUM_AQS; aq++) {
 198                        reg = acc100_reg_read(d, queue_offset(d->pf_device,
 199                                        0, qg, aq));
 200                        if (reg & ACC100_QUEUE_ENABLE)
 201                                q_top->num_aqs_per_groups++;
 202                }
 203        }
 204}
 205
 206/* Fetch configuration enabled for the PF/VF using MMIO Read (slow) */
 207static inline void
 208fetch_acc100_config(struct rte_bbdev *dev)
 209{
 210        struct acc100_device *d = dev->data->dev_private;
 211        struct rte_acc100_conf *acc100_conf = &d->acc100_conf;
 212        const struct acc100_registry_addr *reg_addr;
 213        uint8_t acc, qg;
 214        uint32_t reg, reg_aq, reg_len0, reg_len1;
 215        uint32_t reg_mode;
 216
 217        /* No need to retrieve the configuration is already done */
 218        if (d->configured)
 219                return;
 220
 221        /* Choose correct registry addresses for the device type */
 222        if (d->pf_device)
 223                reg_addr = &pf_reg_addr;
 224        else
 225                reg_addr = &vf_reg_addr;
 226
 227        d->ddr_size = (1 + acc100_reg_read(d, reg_addr->ddr_range)) << 10;
 228
 229        /* Single VF Bundle by VF */
 230        acc100_conf->num_vf_bundles = 1;
 231        initQTop(acc100_conf);
 232
 233        struct rte_acc100_queue_topology *q_top = NULL;
 234        int qman_func_id[ACC100_NUM_ACCS] = {ACC100_ACCMAP_0, ACC100_ACCMAP_1,
 235                        ACC100_ACCMAP_2, ACC100_ACCMAP_3, ACC100_ACCMAP_4};
 236        reg = acc100_reg_read(d, reg_addr->qman_group_func);
 237        for (qg = 0; qg < ACC100_NUM_QGRPS_PER_WORD; qg++) {
 238                reg_aq = acc100_reg_read(d,
 239                                queue_offset(d->pf_device, 0, qg, 0));
 240                if (reg_aq & ACC100_QUEUE_ENABLE) {
 241                        uint32_t idx = (reg >> (qg * 4)) & 0x7;
 242                        if (idx < ACC100_NUM_ACCS) {
 243                                acc = qman_func_id[idx];
 244                                updateQtop(acc, qg, acc100_conf, d);
 245                        }
 246                }
 247        }
 248
 249        /* Check the depth of the AQs*/
 250        reg_len0 = acc100_reg_read(d, reg_addr->depth_log0_offset);
 251        reg_len1 = acc100_reg_read(d, reg_addr->depth_log1_offset);
 252        for (acc = 0; acc < NUM_ACC; acc++) {
 253                qtopFromAcc(&q_top, acc, acc100_conf);
 254                if (q_top->first_qgroup_index < ACC100_NUM_QGRPS_PER_WORD)
 255                        q_top->aq_depth_log2 = (reg_len0 >>
 256                                        (q_top->first_qgroup_index * 4))
 257                                        & 0xF;
 258                else
 259                        q_top->aq_depth_log2 = (reg_len1 >>
 260                                        ((q_top->first_qgroup_index -
 261                                        ACC100_NUM_QGRPS_PER_WORD) * 4))
 262                                        & 0xF;
 263        }
 264
 265        /* Read PF mode */
 266        if (d->pf_device) {
 267                reg_mode = acc100_reg_read(d, HWPfHiPfMode);
 268                acc100_conf->pf_mode_en = (reg_mode == ACC100_PF_VAL) ? 1 : 0;
 269        }
 270
 271        rte_bbdev_log_debug(
 272                        "%s Config LLR SIGN IN/OUT %s %s QG %u %u %u %u AQ %u %u %u %u Len %u %u %u %u\n",
 273                        (d->pf_device) ? "PF" : "VF",
 274                        (acc100_conf->input_pos_llr_1_bit) ? "POS" : "NEG",
 275                        (acc100_conf->output_pos_llr_1_bit) ? "POS" : "NEG",
 276                        acc100_conf->q_ul_4g.num_qgroups,
 277                        acc100_conf->q_dl_4g.num_qgroups,
 278                        acc100_conf->q_ul_5g.num_qgroups,
 279                        acc100_conf->q_dl_5g.num_qgroups,
 280                        acc100_conf->q_ul_4g.num_aqs_per_groups,
 281                        acc100_conf->q_dl_4g.num_aqs_per_groups,
 282                        acc100_conf->q_ul_5g.num_aqs_per_groups,
 283                        acc100_conf->q_dl_5g.num_aqs_per_groups,
 284                        acc100_conf->q_ul_4g.aq_depth_log2,
 285                        acc100_conf->q_dl_4g.aq_depth_log2,
 286                        acc100_conf->q_ul_5g.aq_depth_log2,
 287                        acc100_conf->q_dl_5g.aq_depth_log2);
 288}
 289
 290static void
 291free_base_addresses(void **base_addrs, int size)
 292{
 293        int i;
 294        for (i = 0; i < size; i++)
 295                rte_free(base_addrs[i]);
 296}
 297
 298static inline uint32_t
 299get_desc_len(void)
 300{
 301        return sizeof(union acc100_dma_desc);
 302}
 303
 304/* Allocate the 2 * 64MB block for the sw rings */
 305static int
 306alloc_2x64mb_sw_rings_mem(struct rte_bbdev *dev, struct acc100_device *d,
 307                int socket)
 308{
 309        uint32_t sw_ring_size = ACC100_SIZE_64MBYTE;
 310        d->sw_rings_base = rte_zmalloc_socket(dev->device->driver->name,
 311                        2 * sw_ring_size, RTE_CACHE_LINE_SIZE, socket);
 312        if (d->sw_rings_base == NULL) {
 313                rte_bbdev_log(ERR, "Failed to allocate memory for %s:%u",
 314                                dev->device->driver->name,
 315                                dev->data->dev_id);
 316                return -ENOMEM;
 317        }
 318        uint32_t next_64mb_align_offset = calc_mem_alignment_offset(
 319                        d->sw_rings_base, ACC100_SIZE_64MBYTE);
 320        d->sw_rings = RTE_PTR_ADD(d->sw_rings_base, next_64mb_align_offset);
 321        d->sw_rings_iova = rte_malloc_virt2iova(d->sw_rings_base) +
 322                        next_64mb_align_offset;
 323        d->sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
 324        d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
 325
 326        return 0;
 327}
 328
 329/* Attempt to allocate minimised memory space for sw rings */
 330static void
 331alloc_sw_rings_min_mem(struct rte_bbdev *dev, struct acc100_device *d,
 332                uint16_t num_queues, int socket)
 333{
 334        rte_iova_t sw_rings_base_iova, next_64mb_align_addr_iova;
 335        uint32_t next_64mb_align_offset;
 336        rte_iova_t sw_ring_iova_end_addr;
 337        void *base_addrs[ACC100_SW_RING_MEM_ALLOC_ATTEMPTS];
 338        void *sw_rings_base;
 339        int i = 0;
 340        uint32_t q_sw_ring_size = ACC100_MAX_QUEUE_DEPTH * get_desc_len();
 341        uint32_t dev_sw_ring_size = q_sw_ring_size * num_queues;
 342
 343        /* Find an aligned block of memory to store sw rings */
 344        while (i < ACC100_SW_RING_MEM_ALLOC_ATTEMPTS) {
 345                /*
 346                 * sw_ring allocated memory is guaranteed to be aligned to
 347                 * q_sw_ring_size at the condition that the requested size is
 348                 * less than the page size
 349                 */
 350                sw_rings_base = rte_zmalloc_socket(
 351                                dev->device->driver->name,
 352                                dev_sw_ring_size, q_sw_ring_size, socket);
 353
 354                if (sw_rings_base == NULL) {
 355                        rte_bbdev_log(ERR,
 356                                        "Failed to allocate memory for %s:%u",
 357                                        dev->device->driver->name,
 358                                        dev->data->dev_id);
 359                        break;
 360                }
 361
 362                sw_rings_base_iova = rte_malloc_virt2iova(sw_rings_base);
 363                next_64mb_align_offset = calc_mem_alignment_offset(
 364                                sw_rings_base, ACC100_SIZE_64MBYTE);
 365                next_64mb_align_addr_iova = sw_rings_base_iova +
 366                                next_64mb_align_offset;
 367                sw_ring_iova_end_addr = sw_rings_base_iova + dev_sw_ring_size;
 368
 369                /* Check if the end of the sw ring memory block is before the
 370                 * start of next 64MB aligned mem address
 371                 */
 372                if (sw_ring_iova_end_addr < next_64mb_align_addr_iova) {
 373                        d->sw_rings_iova = sw_rings_base_iova;
 374                        d->sw_rings = sw_rings_base;
 375                        d->sw_rings_base = sw_rings_base;
 376                        d->sw_ring_size = q_sw_ring_size;
 377                        d->sw_ring_max_depth = ACC100_MAX_QUEUE_DEPTH;
 378                        break;
 379                }
 380                /* Store the address of the unaligned mem block */
 381                base_addrs[i] = sw_rings_base;
 382                i++;
 383        }
 384
 385        /* Free all unaligned blocks of mem allocated in the loop */
 386        free_base_addresses(base_addrs, i);
 387}
 388
 389/*
 390 * Find queue_id of a device queue based on details from the Info Ring.
 391 * If a queue isn't found UINT16_MAX is returned.
 392 */
 393static inline uint16_t
 394get_queue_id_from_ring_info(struct rte_bbdev_data *data,
 395                const union acc100_info_ring_data ring_data)
 396{
 397        uint16_t queue_id;
 398
 399        for (queue_id = 0; queue_id < data->num_queues; ++queue_id) {
 400                struct acc100_queue *acc100_q =
 401                                data->queues[queue_id].queue_private;
 402                if (acc100_q != NULL && acc100_q->aq_id == ring_data.aq_id &&
 403                                acc100_q->qgrp_id == ring_data.qg_id &&
 404                                acc100_q->vf_id == ring_data.vf_id)
 405                        return queue_id;
 406        }
 407
 408        return UINT16_MAX;
 409}
 410
 411/* Checks PF Info Ring to find the interrupt cause and handles it accordingly */
 412static inline void
 413acc100_check_ir(struct acc100_device *acc100_dev)
 414{
 415        volatile union acc100_info_ring_data *ring_data;
 416        uint16_t info_ring_head = acc100_dev->info_ring_head;
 417        if (acc100_dev->info_ring == NULL)
 418                return;
 419
 420        ring_data = acc100_dev->info_ring + (acc100_dev->info_ring_head &
 421                        ACC100_INFO_RING_MASK);
 422
 423        while (ring_data->valid) {
 424                if ((ring_data->int_nb < ACC100_PF_INT_DMA_DL_DESC_IRQ) || (
 425                                ring_data->int_nb >
 426                                ACC100_PF_INT_DMA_DL5G_DESC_IRQ))
 427                        rte_bbdev_log(WARNING, "InfoRing: ITR:%d Info:0x%x",
 428                                ring_data->int_nb, ring_data->detailed_info);
 429                /* Initialize Info Ring entry and move forward */
 430                ring_data->val = 0;
 431                info_ring_head++;
 432                ring_data = acc100_dev->info_ring +
 433                                (info_ring_head & ACC100_INFO_RING_MASK);
 434        }
 435}
 436
 437/* Checks PF Info Ring to find the interrupt cause and handles it accordingly */
 438static inline void
 439acc100_pf_interrupt_handler(struct rte_bbdev *dev)
 440{
 441        struct acc100_device *acc100_dev = dev->data->dev_private;
 442        volatile union acc100_info_ring_data *ring_data;
 443        struct acc100_deq_intr_details deq_intr_det;
 444
 445        ring_data = acc100_dev->info_ring + (acc100_dev->info_ring_head &
 446                        ACC100_INFO_RING_MASK);
 447
 448        while (ring_data->valid) {
 449
 450                rte_bbdev_log_debug(
 451                                "ACC100 PF Interrupt received, Info Ring data: 0x%x",
 452                                ring_data->val);
 453
 454                switch (ring_data->int_nb) {
 455                case ACC100_PF_INT_DMA_DL_DESC_IRQ:
 456                case ACC100_PF_INT_DMA_UL_DESC_IRQ:
 457                case ACC100_PF_INT_DMA_UL5G_DESC_IRQ:
 458                case ACC100_PF_INT_DMA_DL5G_DESC_IRQ:
 459                        deq_intr_det.queue_id = get_queue_id_from_ring_info(
 460                                        dev->data, *ring_data);
 461                        if (deq_intr_det.queue_id == UINT16_MAX) {
 462                                rte_bbdev_log(ERR,
 463                                                "Couldn't find queue: aq_id: %u, qg_id: %u, vf_id: %u",
 464                                                ring_data->aq_id,
 465                                                ring_data->qg_id,
 466                                                ring_data->vf_id);
 467                                return;
 468                        }
 469                        rte_bbdev_pmd_callback_process(dev,
 470                                        RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det);
 471                        break;
 472                default:
 473                        rte_bbdev_pmd_callback_process(dev,
 474                                        RTE_BBDEV_EVENT_ERROR, NULL);
 475                        break;
 476                }
 477
 478                /* Initialize Info Ring entry and move forward */
 479                ring_data->val = 0;
 480                ++acc100_dev->info_ring_head;
 481                ring_data = acc100_dev->info_ring +
 482                                (acc100_dev->info_ring_head &
 483                                ACC100_INFO_RING_MASK);
 484        }
 485}
 486
 487/* Checks VF Info Ring to find the interrupt cause and handles it accordingly */
 488static inline void
 489acc100_vf_interrupt_handler(struct rte_bbdev *dev)
 490{
 491        struct acc100_device *acc100_dev = dev->data->dev_private;
 492        volatile union acc100_info_ring_data *ring_data;
 493        struct acc100_deq_intr_details deq_intr_det;
 494
 495        ring_data = acc100_dev->info_ring + (acc100_dev->info_ring_head &
 496                        ACC100_INFO_RING_MASK);
 497
 498        while (ring_data->valid) {
 499
 500                rte_bbdev_log_debug(
 501                                "ACC100 VF Interrupt received, Info Ring data: 0x%x",
 502                                ring_data->val);
 503
 504                switch (ring_data->int_nb) {
 505                case ACC100_VF_INT_DMA_DL_DESC_IRQ:
 506                case ACC100_VF_INT_DMA_UL_DESC_IRQ:
 507                case ACC100_VF_INT_DMA_UL5G_DESC_IRQ:
 508                case ACC100_VF_INT_DMA_DL5G_DESC_IRQ:
 509                        /* VFs are not aware of their vf_id - it's set to 0 in
 510                         * queue structures.
 511                         */
 512                        ring_data->vf_id = 0;
 513                        deq_intr_det.queue_id = get_queue_id_from_ring_info(
 514                                        dev->data, *ring_data);
 515                        if (deq_intr_det.queue_id == UINT16_MAX) {
 516                                rte_bbdev_log(ERR,
 517                                                "Couldn't find queue: aq_id: %u, qg_id: %u",
 518                                                ring_data->aq_id,
 519                                                ring_data->qg_id);
 520                                return;
 521                        }
 522                        rte_bbdev_pmd_callback_process(dev,
 523                                        RTE_BBDEV_EVENT_DEQUEUE, &deq_intr_det);
 524                        break;
 525                default:
 526                        rte_bbdev_pmd_callback_process(dev,
 527                                        RTE_BBDEV_EVENT_ERROR, NULL);
 528                        break;
 529                }
 530
 531                /* Initialize Info Ring entry and move forward */
 532                ring_data->valid = 0;
 533                ++acc100_dev->info_ring_head;
 534                ring_data = acc100_dev->info_ring + (acc100_dev->info_ring_head
 535                                & ACC100_INFO_RING_MASK);
 536        }
 537}
 538
 539/* Interrupt handler triggered by ACC100 dev for handling specific interrupt */
 540static void
 541acc100_dev_interrupt_handler(void *cb_arg)
 542{
 543        struct rte_bbdev *dev = cb_arg;
 544        struct acc100_device *acc100_dev = dev->data->dev_private;
 545
 546        /* Read info ring */
 547        if (acc100_dev->pf_device)
 548                acc100_pf_interrupt_handler(dev);
 549        else
 550                acc100_vf_interrupt_handler(dev);
 551}
 552
 553/* Allocate and setup inforing */
 554static int
 555allocate_info_ring(struct rte_bbdev *dev)
 556{
 557        struct acc100_device *d = dev->data->dev_private;
 558        const struct acc100_registry_addr *reg_addr;
 559        rte_iova_t info_ring_iova;
 560        uint32_t phys_low, phys_high;
 561
 562        if (d->info_ring != NULL)
 563                return 0; /* Already configured */
 564
 565        /* Choose correct registry addresses for the device type */
 566        if (d->pf_device)
 567                reg_addr = &pf_reg_addr;
 568        else
 569                reg_addr = &vf_reg_addr;
 570        /* Allocate InfoRing */
 571        d->info_ring = rte_zmalloc_socket("Info Ring",
 572                        ACC100_INFO_RING_NUM_ENTRIES *
 573                        sizeof(*d->info_ring), RTE_CACHE_LINE_SIZE,
 574                        dev->data->socket_id);
 575        if (d->info_ring == NULL) {
 576                rte_bbdev_log(ERR,
 577                                "Failed to allocate Info Ring for %s:%u",
 578                                dev->device->driver->name,
 579                                dev->data->dev_id);
 580                return -ENOMEM;
 581        }
 582        info_ring_iova = rte_malloc_virt2iova(d->info_ring);
 583
 584        /* Setup Info Ring */
 585        phys_high = (uint32_t)(info_ring_iova >> 32);
 586        phys_low  = (uint32_t)(info_ring_iova);
 587        acc100_reg_write(d, reg_addr->info_ring_hi, phys_high);
 588        acc100_reg_write(d, reg_addr->info_ring_lo, phys_low);
 589        acc100_reg_write(d, reg_addr->info_ring_en, ACC100_REG_IRQ_EN_ALL);
 590        d->info_ring_head = (acc100_reg_read(d, reg_addr->info_ring_ptr) &
 591                        0xFFF) / sizeof(union acc100_info_ring_data);
 592        return 0;
 593}
 594
 595
 596/* Allocate 64MB memory used for all software rings */
 597static int
 598acc100_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
 599{
 600        uint32_t phys_low, phys_high, value;
 601        struct acc100_device *d = dev->data->dev_private;
 602        const struct acc100_registry_addr *reg_addr;
 603        int ret;
 604
 605        if (d->pf_device && !d->acc100_conf.pf_mode_en) {
 606                rte_bbdev_log(NOTICE,
 607                                "%s has PF mode disabled. This PF can't be used.",
 608                                dev->data->name);
 609                return -ENODEV;
 610        }
 611
 612        alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
 613
 614        /* If minimal memory space approach failed, then allocate
 615         * the 2 * 64MB block for the sw rings
 616         */
 617        if (d->sw_rings == NULL)
 618                alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
 619
 620        if (d->sw_rings == NULL) {
 621                rte_bbdev_log(NOTICE,
 622                                "Failure allocating sw_rings memory");
 623                return -ENODEV;
 624        }
 625
 626        /* Configure ACC100 with the base address for DMA descriptor rings
 627         * Same descriptor rings used for UL and DL DMA Engines
 628         * Note : Assuming only VF0 bundle is used for PF mode
 629         */
 630        phys_high = (uint32_t)(d->sw_rings_iova >> 32);
 631        phys_low  = (uint32_t)(d->sw_rings_iova & ~(ACC100_SIZE_64MBYTE-1));
 632
 633        /* Choose correct registry addresses for the device type */
 634        if (d->pf_device)
 635                reg_addr = &pf_reg_addr;
 636        else
 637                reg_addr = &vf_reg_addr;
 638
 639        /* Read the populated cfg from ACC100 registers */
 640        fetch_acc100_config(dev);
 641
 642        /* Release AXI from PF */
 643        if (d->pf_device)
 644                acc100_reg_write(d, HWPfDmaAxiControl, 1);
 645
 646        acc100_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
 647        acc100_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
 648        acc100_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
 649        acc100_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
 650        acc100_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
 651        acc100_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
 652        acc100_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
 653        acc100_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
 654
 655        /*
 656         * Configure Ring Size to the max queue ring size
 657         * (used for wrapping purpose)
 658         */
 659        value = log2_basic(d->sw_ring_size / 64);
 660        acc100_reg_write(d, reg_addr->ring_size, value);
 661
 662        /* Configure tail pointer for use when SDONE enabled */
 663        d->tail_ptrs = rte_zmalloc_socket(
 664                        dev->device->driver->name,
 665                        ACC100_NUM_QGRPS * ACC100_NUM_AQS * sizeof(uint32_t),
 666                        RTE_CACHE_LINE_SIZE, socket_id);
 667        if (d->tail_ptrs == NULL) {
 668                rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
 669                                dev->device->driver->name,
 670                                dev->data->dev_id);
 671                rte_free(d->sw_rings);
 672                return -ENOMEM;
 673        }
 674        d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
 675
 676        phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
 677        phys_low  = (uint32_t)(d->tail_ptr_iova);
 678        acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
 679        acc100_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
 680        acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
 681        acc100_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
 682        acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
 683        acc100_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
 684        acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
 685        acc100_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
 686
 687        ret = allocate_info_ring(dev);
 688        if (ret < 0) {
 689                rte_bbdev_log(ERR, "Failed to allocate info_ring for %s:%u",
 690                                dev->device->driver->name,
 691                                dev->data->dev_id);
 692                /* Continue */
 693        }
 694
 695        d->harq_layout = rte_zmalloc_socket("HARQ Layout",
 696                        ACC100_HARQ_LAYOUT * sizeof(*d->harq_layout),
 697                        RTE_CACHE_LINE_SIZE, dev->data->socket_id);
 698        if (d->harq_layout == NULL) {
 699                rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
 700                                dev->device->driver->name,
 701                                dev->data->dev_id);
 702                rte_free(d->sw_rings);
 703                return -ENOMEM;
 704        }
 705
 706        /* Mark as configured properly */
 707        d->configured = true;
 708
 709        rte_bbdev_log_debug(
 710                        "ACC100 (%s) configured  sw_rings = %p, sw_rings_iova = %#"
 711                        PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
 712
 713        return 0;
 714}
 715
 716static int
 717acc100_intr_enable(struct rte_bbdev *dev)
 718{
 719        int ret;
 720        struct acc100_device *d = dev->data->dev_private;
 721
 722        /* Only MSI are currently supported */
 723        if (rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_VFIO_MSI ||
 724                        rte_intr_type_get(dev->intr_handle) == RTE_INTR_HANDLE_UIO) {
 725
 726                ret = allocate_info_ring(dev);
 727                if (ret < 0) {
 728                        rte_bbdev_log(ERR,
 729                                        "Couldn't allocate info ring for device: %s",
 730                                        dev->data->name);
 731                        return ret;
 732                }
 733
 734                ret = rte_intr_enable(dev->intr_handle);
 735                if (ret < 0) {
 736                        rte_bbdev_log(ERR,
 737                                        "Couldn't enable interrupts for device: %s",
 738                                        dev->data->name);
 739                        rte_free(d->info_ring);
 740                        return ret;
 741                }
 742                ret = rte_intr_callback_register(dev->intr_handle,
 743                                acc100_dev_interrupt_handler, dev);
 744                if (ret < 0) {
 745                        rte_bbdev_log(ERR,
 746                                        "Couldn't register interrupt callback for device: %s",
 747                                        dev->data->name);
 748                        rte_free(d->info_ring);
 749                        return ret;
 750                }
 751
 752                return 0;
 753        }
 754
 755        rte_bbdev_log(ERR, "ACC100 (%s) supports only VFIO MSI interrupts",
 756                        dev->data->name);
 757        return -ENOTSUP;
 758}
 759
 760/* Free memory used for software rings */
 761static int
 762acc100_dev_close(struct rte_bbdev *dev)
 763{
 764        struct acc100_device *d = dev->data->dev_private;
 765        acc100_check_ir(d);
 766        if (d->sw_rings_base != NULL) {
 767                rte_free(d->tail_ptrs);
 768                rte_free(d->info_ring);
 769                rte_free(d->sw_rings_base);
 770                d->sw_rings_base = NULL;
 771        }
 772        /* Ensure all in flight HW transactions are completed */
 773        usleep(ACC100_LONG_WAIT);
 774        return 0;
 775}
 776
 777/**
 778 * Report a ACC100 queue index which is free
 779 * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
 780 * Note : Only supporting VF0 Bundle for PF mode
 781 */
 782static int
 783acc100_find_free_queue_idx(struct rte_bbdev *dev,
 784                const struct rte_bbdev_queue_conf *conf)
 785{
 786        struct acc100_device *d = dev->data->dev_private;
 787        int op_2_acc[5] = {0, UL_4G, DL_4G, UL_5G, DL_5G};
 788        int acc = op_2_acc[conf->op_type];
 789        struct rte_acc100_queue_topology *qtop = NULL;
 790
 791        qtopFromAcc(&qtop, acc, &(d->acc100_conf));
 792        if (qtop == NULL)
 793                return -1;
 794        /* Identify matching QGroup Index which are sorted in priority order */
 795        uint16_t group_idx = qtop->first_qgroup_index;
 796        group_idx += conf->priority;
 797        if (group_idx >= ACC100_NUM_QGRPS ||
 798                        conf->priority >= qtop->num_qgroups) {
 799                rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
 800                                dev->data->name, conf->priority);
 801                return -1;
 802        }
 803        /* Find a free AQ_idx  */
 804        uint16_t aq_idx;
 805        for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
 806                if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
 807                        /* Mark the Queue as assigned */
 808                        d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
 809                        /* Report the AQ Index */
 810                        return (group_idx << ACC100_GRP_ID_SHIFT) + aq_idx;
 811                }
 812        }
 813        rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
 814                        dev->data->name, conf->priority);
 815        return -1;
 816}
 817
 818/* Setup ACC100 queue */
 819static int
 820acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 821                const struct rte_bbdev_queue_conf *conf)
 822{
 823        struct acc100_device *d = dev->data->dev_private;
 824        struct acc100_queue *q;
 825        int16_t q_idx;
 826
 827        /* Allocate the queue data structure. */
 828        q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
 829                        RTE_CACHE_LINE_SIZE, conf->socket);
 830        if (q == NULL) {
 831                rte_bbdev_log(ERR, "Failed to allocate queue memory");
 832                return -ENOMEM;
 833        }
 834        if (d == NULL) {
 835                rte_bbdev_log(ERR, "Undefined device");
 836                return -ENODEV;
 837        }
 838
 839        q->d = d;
 840        q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
 841        q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
 842
 843        /* Prepare the Ring with default descriptor format */
 844        union acc100_dma_desc *desc = NULL;
 845        unsigned int desc_idx, b_idx;
 846        int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
 847                ACC100_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
 848                ACC100_FCW_TD_BLEN : ACC100_FCW_LD_BLEN));
 849
 850        for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
 851                desc = q->ring_addr + desc_idx;
 852                desc->req.word0 = ACC100_DMA_DESC_TYPE;
 853                desc->req.word1 = 0; /**< Timestamp */
 854                desc->req.word2 = 0;
 855                desc->req.word3 = 0;
 856                uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
 857                desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
 858                desc->req.data_ptrs[0].blen = fcw_len;
 859                desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
 860                desc->req.data_ptrs[0].last = 0;
 861                desc->req.data_ptrs[0].dma_ext = 0;
 862                for (b_idx = 1; b_idx < ACC100_DMA_MAX_NUM_POINTERS - 1;
 863                                b_idx++) {
 864                        desc->req.data_ptrs[b_idx].blkid = ACC100_DMA_BLKID_IN;
 865                        desc->req.data_ptrs[b_idx].last = 1;
 866                        desc->req.data_ptrs[b_idx].dma_ext = 0;
 867                        b_idx++;
 868                        desc->req.data_ptrs[b_idx].blkid =
 869                                        ACC100_DMA_BLKID_OUT_ENC;
 870                        desc->req.data_ptrs[b_idx].last = 1;
 871                        desc->req.data_ptrs[b_idx].dma_ext = 0;
 872                }
 873                /* Preset some fields of LDPC FCW */
 874                desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
 875                desc->req.fcw_ld.gain_i = 1;
 876                desc->req.fcw_ld.gain_h = 1;
 877        }
 878
 879        q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
 880                        RTE_CACHE_LINE_SIZE,
 881                        RTE_CACHE_LINE_SIZE, conf->socket);
 882        if (q->lb_in == NULL) {
 883                rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
 884                rte_free(q);
 885                return -ENOMEM;
 886        }
 887        q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
 888        q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
 889                        RTE_CACHE_LINE_SIZE,
 890                        RTE_CACHE_LINE_SIZE, conf->socket);
 891        if (q->lb_out == NULL) {
 892                rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
 893                rte_free(q->lb_in);
 894                rte_free(q);
 895                return -ENOMEM;
 896        }
 897        q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
 898
 899        /*
 900         * Software queue ring wraps synchronously with the HW when it reaches
 901         * the boundary of the maximum allocated queue size, no matter what the
 902         * sw queue size is. This wrapping is guarded by setting the wrap_mask
 903         * to represent the maximum queue size as allocated at the time when
 904         * the device has been setup (in configure()).
 905         *
 906         * The queue depth is set to the queue size value (conf->queue_size).
 907         * This limits the occupancy of the queue at any point of time, so that
 908         * the queue does not get swamped with enqueue requests.
 909         */
 910        q->sw_ring_depth = conf->queue_size;
 911        q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
 912
 913        q->op_type = conf->op_type;
 914
 915        q_idx = acc100_find_free_queue_idx(dev, conf);
 916        if (q_idx == -1) {
 917                rte_free(q->lb_in);
 918                rte_free(q->lb_out);
 919                rte_free(q);
 920                return -1;
 921        }
 922
 923        q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
 924        q->vf_id = (q_idx >> ACC100_VF_ID_SHIFT)  & 0x3F;
 925        q->aq_id = q_idx & 0xF;
 926        q->aq_depth = (conf->op_type ==  RTE_BBDEV_OP_TURBO_DEC) ?
 927                        (1 << d->acc100_conf.q_ul_4g.aq_depth_log2) :
 928                        (1 << d->acc100_conf.q_dl_4g.aq_depth_log2);
 929
 930        q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
 931                        queue_offset(d->pf_device,
 932                                        q->vf_id, q->qgrp_id, q->aq_id));
 933
 934        rte_bbdev_log_debug(
 935                        "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p",
 936                        dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
 937                        q->aq_id, q->aq_depth, q->mmio_reg_enqueue);
 938
 939        dev->data->queues[queue_id].queue_private = q;
 940        return 0;
 941}
 942
 943/* Release ACC100 queue */
 944static int
 945acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
 946{
 947        struct acc100_device *d = dev->data->dev_private;
 948        struct acc100_queue *q = dev->data->queues[q_id].queue_private;
 949
 950        if (q != NULL) {
 951                /* Mark the Queue as un-assigned */
 952                d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
 953                                (1 << q->aq_id));
 954                rte_free(q->lb_in);
 955                rte_free(q->lb_out);
 956                rte_free(q);
 957                dev->data->queues[q_id].queue_private = NULL;
 958        }
 959
 960        return 0;
 961}
 962
 963/* Get ACC100 device info */
 964static void
 965acc100_dev_info_get(struct rte_bbdev *dev,
 966                struct rte_bbdev_driver_info *dev_info)
 967{
 968        struct acc100_device *d = dev->data->dev_private;
 969
 970        static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
 971                {
 972                        .type = RTE_BBDEV_OP_TURBO_DEC,
 973                        .cap.turbo_dec = {
 974                                .capability_flags =
 975                                        RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
 976                                        RTE_BBDEV_TURBO_CRC_TYPE_24B |
 977                                        RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
 978                                        RTE_BBDEV_TURBO_EARLY_TERMINATION |
 979                                        RTE_BBDEV_TURBO_DEC_INTERRUPTS |
 980                                        RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
 981                                        RTE_BBDEV_TURBO_MAP_DEC |
 982                                        RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
 983                                        RTE_BBDEV_TURBO_DEC_CRC_24B_DROP |
 984                                        RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
 985                                .max_llr_modulus = INT8_MAX,
 986                                .num_buffers_src =
 987                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
 988                                .num_buffers_hard_out =
 989                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
 990                                .num_buffers_soft_out =
 991                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
 992                        }
 993                },
 994                {
 995                        .type = RTE_BBDEV_OP_TURBO_ENC,
 996                        .cap.turbo_enc = {
 997                                .capability_flags =
 998                                        RTE_BBDEV_TURBO_CRC_24B_ATTACH |
 999                                        RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
1000                                        RTE_BBDEV_TURBO_RATE_MATCH |
1001                                        RTE_BBDEV_TURBO_ENC_INTERRUPTS |
1002                                        RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
1003                                .num_buffers_src =
1004                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
1005                                .num_buffers_dst =
1006                                                RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
1007                        }
1008                },
1009                {
1010                        .type   = RTE_BBDEV_OP_LDPC_ENC,
1011                        .cap.ldpc_enc = {
1012                                .capability_flags =
1013                                        RTE_BBDEV_LDPC_RATE_MATCH |
1014                                        RTE_BBDEV_LDPC_CRC_24B_ATTACH |
1015                                        RTE_BBDEV_LDPC_INTERLEAVER_BYPASS |
1016                                        RTE_BBDEV_LDPC_ENC_INTERRUPTS,
1017                                .num_buffers_src =
1018                                                RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
1019                                .num_buffers_dst =
1020                                                RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
1021                        }
1022                },
1023                {
1024                        .type   = RTE_BBDEV_OP_LDPC_DEC,
1025                        .cap.ldpc_dec = {
1026                        .capability_flags =
1027                                RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK |
1028                                RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP |
1029                                RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE |
1030                                RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE |
1031#ifdef ACC100_EXT_MEM
1032                                RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK |
1033                                RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE |
1034                                RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE |
1035#endif
1036                                RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE |
1037                                RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS |
1038                                RTE_BBDEV_LDPC_DECODE_BYPASS |
1039                                RTE_BBDEV_LDPC_DEC_SCATTER_GATHER |
1040                                RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION |
1041                                RTE_BBDEV_LDPC_LLR_COMPRESSION |
1042                                RTE_BBDEV_LDPC_DEC_INTERRUPTS,
1043                        .llr_size = 8,
1044                        .llr_decimals = 1,
1045                        .num_buffers_src =
1046                                        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
1047                        .num_buffers_hard_out =
1048                                        RTE_BBDEV_LDPC_MAX_CODE_BLOCKS,
1049                        .num_buffers_soft_out = 0,
1050                        }
1051                },
1052                RTE_BBDEV_END_OF_CAPABILITIES_LIST()
1053        };
1054
1055        static struct rte_bbdev_queue_conf default_queue_conf;
1056        default_queue_conf.socket = dev->data->socket_id;
1057        default_queue_conf.queue_size = ACC100_MAX_QUEUE_DEPTH;
1058
1059        dev_info->driver_name = dev->device->driver->name;
1060
1061        /* Read and save the populated config from ACC100 registers */
1062        fetch_acc100_config(dev);
1063
1064        /* This isn't ideal because it reports the maximum number of queues but
1065         * does not provide info on how many can be uplink/downlink or different
1066         * priorities
1067         */
1068        dev_info->max_num_queues =
1069                        d->acc100_conf.q_dl_5g.num_aqs_per_groups *
1070                        d->acc100_conf.q_dl_5g.num_qgroups +
1071                        d->acc100_conf.q_ul_5g.num_aqs_per_groups *
1072                        d->acc100_conf.q_ul_5g.num_qgroups +
1073                        d->acc100_conf.q_dl_4g.num_aqs_per_groups *
1074                        d->acc100_conf.q_dl_4g.num_qgroups +
1075                        d->acc100_conf.q_ul_4g.num_aqs_per_groups *
1076                        d->acc100_conf.q_ul_4g.num_qgroups;
1077        dev_info->queue_size_lim = ACC100_MAX_QUEUE_DEPTH;
1078        dev_info->hardware_accelerated = true;
1079        dev_info->max_dl_queue_priority =
1080                        d->acc100_conf.q_dl_4g.num_qgroups - 1;
1081        dev_info->max_ul_queue_priority =
1082                        d->acc100_conf.q_ul_4g.num_qgroups - 1;
1083        dev_info->default_queue_conf = default_queue_conf;
1084        dev_info->cpu_flag_reqs = NULL;
1085        dev_info->min_alignment = 64;
1086        dev_info->capabilities = bbdev_capabilities;
1087#ifdef ACC100_EXT_MEM
1088        dev_info->harq_buffer_size = d->ddr_size;
1089#else
1090        dev_info->harq_buffer_size = 0;
1091#endif
1092        dev_info->data_endianness = RTE_LITTLE_ENDIAN;
1093        acc100_check_ir(d);
1094}
1095
1096static int
1097acc100_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id)
1098{
1099        struct acc100_queue *q = dev->data->queues[queue_id].queue_private;
1100
1101        if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI &&
1102                        rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_UIO)
1103                return -ENOTSUP;
1104
1105        q->irq_enable = 1;
1106        return 0;
1107}
1108
1109static int
1110acc100_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id)
1111{
1112        struct acc100_queue *q = dev->data->queues[queue_id].queue_private;
1113
1114        if (rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_VFIO_MSI &&
1115                        rte_intr_type_get(dev->intr_handle) != RTE_INTR_HANDLE_UIO)
1116                return -ENOTSUP;
1117
1118        q->irq_enable = 0;
1119        return 0;
1120}
1121
1122static const struct rte_bbdev_ops acc100_bbdev_ops = {
1123        .setup_queues = acc100_setup_queues,
1124        .intr_enable = acc100_intr_enable,
1125        .close = acc100_dev_close,
1126        .info_get = acc100_dev_info_get,
1127        .queue_setup = acc100_queue_setup,
1128        .queue_release = acc100_queue_release,
1129        .queue_intr_enable = acc100_queue_intr_enable,
1130        .queue_intr_disable = acc100_queue_intr_disable
1131};
1132
1133/* ACC100 PCI PF address map */
1134static struct rte_pci_id pci_id_acc100_pf_map[] = {
1135        {
1136                RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_PF_DEVICE_ID)
1137        },
1138        {.device_id = 0},
1139};
1140
1141/* ACC100 PCI VF address map */
1142static struct rte_pci_id pci_id_acc100_vf_map[] = {
1143        {
1144                RTE_PCI_DEVICE(RTE_ACC100_VENDOR_ID, RTE_ACC100_VF_DEVICE_ID)
1145        },
1146        {.device_id = 0},
1147};
1148
1149/* Read flag value 0/1 from bitmap */
1150static inline bool
1151check_bit(uint32_t bitmap, uint32_t bitmask)
1152{
1153        return bitmap & bitmask;
1154}
1155
1156static inline char *
1157mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len)
1158{
1159        if (unlikely(len > rte_pktmbuf_tailroom(m)))
1160                return NULL;
1161
1162        char *tail = (char *)m->buf_addr + m->data_off + m->data_len;
1163        m->data_len = (uint16_t)(m->data_len + len);
1164        m_head->pkt_len  = (m_head->pkt_len + len);
1165        return tail;
1166}
1167
1168/* Fill in a frame control word for turbo encoding. */
1169static inline void
1170acc100_fcw_te_fill(const struct rte_bbdev_enc_op *op, struct acc100_fcw_te *fcw)
1171{
1172        fcw->code_block_mode = op->turbo_enc.code_block_mode;
1173        if (fcw->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1174                fcw->k_neg = op->turbo_enc.tb_params.k_neg;
1175                fcw->k_pos = op->turbo_enc.tb_params.k_pos;
1176                fcw->c_neg = op->turbo_enc.tb_params.c_neg;
1177                fcw->c = op->turbo_enc.tb_params.c;
1178                fcw->ncb_neg = op->turbo_enc.tb_params.ncb_neg;
1179                fcw->ncb_pos = op->turbo_enc.tb_params.ncb_pos;
1180
1181                if (check_bit(op->turbo_enc.op_flags,
1182                                RTE_BBDEV_TURBO_RATE_MATCH)) {
1183                        fcw->bypass_rm = 0;
1184                        fcw->cab = op->turbo_enc.tb_params.cab;
1185                        fcw->ea = op->turbo_enc.tb_params.ea;
1186                        fcw->eb = op->turbo_enc.tb_params.eb;
1187                } else {
1188                        /* E is set to the encoding output size when RM is
1189                         * bypassed.
1190                         */
1191                        fcw->bypass_rm = 1;
1192                        fcw->cab = fcw->c_neg;
1193                        fcw->ea = 3 * fcw->k_neg + 12;
1194                        fcw->eb = 3 * fcw->k_pos + 12;
1195                }
1196        } else { /* For CB mode */
1197                fcw->k_pos = op->turbo_enc.cb_params.k;
1198                fcw->ncb_pos = op->turbo_enc.cb_params.ncb;
1199
1200                if (check_bit(op->turbo_enc.op_flags,
1201                                RTE_BBDEV_TURBO_RATE_MATCH)) {
1202                        fcw->bypass_rm = 0;
1203                        fcw->eb = op->turbo_enc.cb_params.e;
1204                } else {
1205                        /* E is set to the encoding output size when RM is
1206                         * bypassed.
1207                         */
1208                        fcw->bypass_rm = 1;
1209                        fcw->eb = 3 * fcw->k_pos + 12;
1210                }
1211        }
1212
1213        fcw->bypass_rv_idx1 = check_bit(op->turbo_enc.op_flags,
1214                        RTE_BBDEV_TURBO_RV_INDEX_BYPASS);
1215        fcw->code_block_crc = check_bit(op->turbo_enc.op_flags,
1216                        RTE_BBDEV_TURBO_CRC_24B_ATTACH);
1217        fcw->rv_idx1 = op->turbo_enc.rv_index;
1218}
1219
1220/* Compute value of k0.
1221 * Based on 3GPP 38.212 Table 5.4.2.1-2
1222 * Starting position of different redundancy versions, k0
1223 */
1224static inline uint16_t
1225get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index)
1226{
1227        if (rv_index == 0)
1228                return 0;
1229        uint16_t n = (bg == 1 ? ACC100_N_ZC_1 : ACC100_N_ZC_2) * z_c;
1230        if (n_cb == n) {
1231                if (rv_index == 1)
1232                        return (bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * z_c;
1233                else if (rv_index == 2)
1234                        return (bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * z_c;
1235                else
1236                        return (bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * z_c;
1237        }
1238        /* LBRM case - includes a division by N */
1239        if (rv_index == 1)
1240                return (((bg == 1 ? ACC100_K0_1_1 : ACC100_K0_1_2) * n_cb)
1241                                / n) * z_c;
1242        else if (rv_index == 2)
1243                return (((bg == 1 ? ACC100_K0_2_1 : ACC100_K0_2_2) * n_cb)
1244                                / n) * z_c;
1245        else
1246                return (((bg == 1 ? ACC100_K0_3_1 : ACC100_K0_3_2) * n_cb)
1247                                / n) * z_c;
1248}
1249
1250/* Fill in a frame control word for LDPC encoding. */
1251static inline void
1252acc100_fcw_le_fill(const struct rte_bbdev_enc_op *op,
1253                struct acc100_fcw_le *fcw, int num_cb)
1254{
1255        fcw->qm = op->ldpc_enc.q_m;
1256        fcw->nfiller = op->ldpc_enc.n_filler;
1257        fcw->BG = (op->ldpc_enc.basegraph - 1);
1258        fcw->Zc = op->ldpc_enc.z_c;
1259        fcw->ncb = op->ldpc_enc.n_cb;
1260        fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_enc.basegraph,
1261                        op->ldpc_enc.rv_index);
1262        fcw->rm_e = op->ldpc_enc.cb_params.e;
1263        fcw->crc_select = check_bit(op->ldpc_enc.op_flags,
1264                        RTE_BBDEV_LDPC_CRC_24B_ATTACH);
1265        fcw->bypass_intlv = check_bit(op->ldpc_enc.op_flags,
1266                        RTE_BBDEV_LDPC_INTERLEAVER_BYPASS);
1267        fcw->mcb_count = num_cb;
1268}
1269
1270/* Fill in a frame control word for turbo decoding. */
1271static inline void
1272acc100_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_td *fcw)
1273{
1274        /* Note : Early termination is always enabled for 4GUL */
1275        fcw->fcw_ver = 1;
1276        if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
1277                fcw->k_pos = op->turbo_dec.tb_params.k_pos;
1278        else
1279                fcw->k_pos = op->turbo_dec.cb_params.k;
1280        fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
1281                        RTE_BBDEV_TURBO_CRC_TYPE_24B);
1282        fcw->bypass_sb_deint = 0;
1283        fcw->raw_decoder_input_on = 0;
1284        fcw->max_iter = op->turbo_dec.iter_max;
1285        fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
1286                        RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
1287}
1288
1289/* Fill in a frame control word for LDPC decoding. */
1290static inline void
1291acc100_fcw_ld_fill(const struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
1292                union acc100_harq_layout_data *harq_layout)
1293{
1294        uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
1295        uint16_t harq_index;
1296        uint32_t l;
1297        bool harq_prun = false;
1298
1299        fcw->qm = op->ldpc_dec.q_m;
1300        fcw->nfiller = op->ldpc_dec.n_filler;
1301        fcw->BG = (op->ldpc_dec.basegraph - 1);
1302        fcw->Zc = op->ldpc_dec.z_c;
1303        fcw->ncb = op->ldpc_dec.n_cb;
1304        fcw->k0 = get_k0(fcw->ncb, fcw->Zc, op->ldpc_dec.basegraph,
1305                        op->ldpc_dec.rv_index);
1306        if (op->ldpc_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
1307                fcw->rm_e = op->ldpc_dec.cb_params.e;
1308        else
1309                fcw->rm_e = (op->ldpc_dec.tb_params.r <
1310                                op->ldpc_dec.tb_params.cab) ?
1311                                                op->ldpc_dec.tb_params.ea :
1312                                                op->ldpc_dec.tb_params.eb;
1313
1314        fcw->hcin_en = check_bit(op->ldpc_dec.op_flags,
1315                        RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE);
1316        fcw->hcout_en = check_bit(op->ldpc_dec.op_flags,
1317                        RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE);
1318        fcw->crc_select = check_bit(op->ldpc_dec.op_flags,
1319                        RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK);
1320        fcw->bypass_dec = check_bit(op->ldpc_dec.op_flags,
1321                        RTE_BBDEV_LDPC_DECODE_BYPASS);
1322        fcw->bypass_intlv = check_bit(op->ldpc_dec.op_flags,
1323                        RTE_BBDEV_LDPC_DEINTERLEAVER_BYPASS);
1324        if (op->ldpc_dec.q_m == 1) {
1325                fcw->bypass_intlv = 1;
1326                fcw->qm = 2;
1327        }
1328        fcw->hcin_decomp_mode = check_bit(op->ldpc_dec.op_flags,
1329                        RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
1330        fcw->hcout_comp_mode = check_bit(op->ldpc_dec.op_flags,
1331                        RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
1332        fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
1333                        RTE_BBDEV_LDPC_LLR_COMPRESSION);
1334        harq_index = op->ldpc_dec.harq_combined_output.offset /
1335                        ACC100_HARQ_OFFSET;
1336#ifdef ACC100_EXT_MEM
1337        /* Limit cases when HARQ pruning is valid */
1338        harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
1339                        ACC100_HARQ_OFFSET) == 0) &&
1340                        (op->ldpc_dec.harq_combined_output.offset <= UINT16_MAX
1341                        * ACC100_HARQ_OFFSET);
1342#endif
1343        if (fcw->hcin_en > 0) {
1344                harq_in_length = op->ldpc_dec.harq_combined_input.length;
1345                if (fcw->hcin_decomp_mode > 0)
1346                        harq_in_length = harq_in_length * 8 / 6;
1347                harq_in_length = RTE_ALIGN(harq_in_length, 64);
1348                if ((harq_layout[harq_index].offset > 0) & harq_prun) {
1349                        rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
1350                        fcw->hcin_size0 = harq_layout[harq_index].size0;
1351                        fcw->hcin_offset = harq_layout[harq_index].offset;
1352                        fcw->hcin_size1 = harq_in_length -
1353                                        harq_layout[harq_index].offset;
1354                } else {
1355                        fcw->hcin_size0 = harq_in_length;
1356                        fcw->hcin_offset = 0;
1357                        fcw->hcin_size1 = 0;
1358                }
1359        } else {
1360                fcw->hcin_size0 = 0;
1361                fcw->hcin_offset = 0;
1362                fcw->hcin_size1 = 0;
1363        }
1364
1365        fcw->itmax = op->ldpc_dec.iter_max;
1366        fcw->itstop = check_bit(op->ldpc_dec.op_flags,
1367                        RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE);
1368        fcw->synd_precoder = fcw->itstop;
1369        /*
1370         * These are all implicitly set
1371         * fcw->synd_post = 0;
1372         * fcw->so_en = 0;
1373         * fcw->so_bypass_rm = 0;
1374         * fcw->so_bypass_intlv = 0;
1375         * fcw->dec_convllr = 0;
1376         * fcw->hcout_convllr = 0;
1377         * fcw->hcout_size1 = 0;
1378         * fcw->so_it = 0;
1379         * fcw->hcout_offset = 0;
1380         * fcw->negstop_th = 0;
1381         * fcw->negstop_it = 0;
1382         * fcw->negstop_en = 0;
1383         * fcw->gain_i = 1;
1384         * fcw->gain_h = 1;
1385         */
1386        if (fcw->hcout_en > 0) {
1387                parity_offset = (op->ldpc_dec.basegraph == 1 ? 20 : 8)
1388                        * op->ldpc_dec.z_c - op->ldpc_dec.n_filler;
1389                k0_p = (fcw->k0 > parity_offset) ?
1390                                fcw->k0 - op->ldpc_dec.n_filler : fcw->k0;
1391                ncb_p = fcw->ncb - op->ldpc_dec.n_filler;
1392                l = k0_p + fcw->rm_e;
1393                harq_out_length = (uint16_t) fcw->hcin_size0;
1394                harq_out_length = RTE_MIN(RTE_MAX(harq_out_length, l), ncb_p);
1395                harq_out_length = (harq_out_length + 0x3F) & 0xFFC0;
1396                if ((k0_p > fcw->hcin_size0 + ACC100_HARQ_OFFSET_THRESHOLD) &&
1397                                harq_prun) {
1398                        fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
1399                        fcw->hcout_offset = k0_p & 0xFFC0;
1400                        fcw->hcout_size1 = harq_out_length - fcw->hcout_offset;
1401                } else {
1402                        fcw->hcout_size0 = harq_out_length;
1403                        fcw->hcout_size1 = 0;
1404                        fcw->hcout_offset = 0;
1405                }
1406                harq_layout[harq_index].offset = fcw->hcout_offset;
1407                harq_layout[harq_index].size0 = fcw->hcout_size0;
1408        } else {
1409                fcw->hcout_size0 = 0;
1410                fcw->hcout_size1 = 0;
1411                fcw->hcout_offset = 0;
1412        }
1413}
1414
1415/**
1416 * Fills descriptor with data pointers of one block type.
1417 *
1418 * @param desc
1419 *   Pointer to DMA descriptor.
1420 * @param input
1421 *   Pointer to pointer to input data which will be encoded. It can be changed
1422 *   and points to next segment in scatter-gather case.
1423 * @param offset
1424 *   Input offset in rte_mbuf structure. It is used for calculating the point
1425 *   where data is starting.
1426 * @param cb_len
1427 *   Length of currently processed Code Block
1428 * @param seg_total_left
1429 *   It indicates how many bytes still left in segment (mbuf) for further
1430 *   processing.
1431 * @param op_flags
1432 *   Store information about device capabilities
1433 * @param next_triplet
1434 *   Index for ACC100 DMA Descriptor triplet
1435 *
1436 * @return
1437 *   Returns index of next triplet on success, other value if lengths of
1438 *   pkt and processed cb do not match.
1439 *
1440 */
1441static inline int
1442acc100_dma_fill_blk_type_in(struct acc100_dma_req_desc *desc,
1443                struct rte_mbuf **input, uint32_t *offset, uint32_t cb_len,
1444                uint32_t *seg_total_left, int next_triplet)
1445{
1446        uint32_t part_len;
1447        struct rte_mbuf *m = *input;
1448
1449        part_len = (*seg_total_left < cb_len) ? *seg_total_left : cb_len;
1450        cb_len -= part_len;
1451        *seg_total_left -= part_len;
1452
1453        desc->data_ptrs[next_triplet].address =
1454                        rte_pktmbuf_iova_offset(m, *offset);
1455        desc->data_ptrs[next_triplet].blen = part_len;
1456        desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
1457        desc->data_ptrs[next_triplet].last = 0;
1458        desc->data_ptrs[next_triplet].dma_ext = 0;
1459        *offset += part_len;
1460        next_triplet++;
1461
1462        while (cb_len > 0) {
1463                if (next_triplet < ACC100_DMA_MAX_NUM_POINTERS_IN && m->next != NULL) {
1464
1465                        m = m->next;
1466                        *seg_total_left = rte_pktmbuf_data_len(m);
1467                        part_len = (*seg_total_left < cb_len) ?
1468                                        *seg_total_left :
1469                                        cb_len;
1470                        desc->data_ptrs[next_triplet].address =
1471                                        rte_pktmbuf_iova_offset(m, 0);
1472                        desc->data_ptrs[next_triplet].blen = part_len;
1473                        desc->data_ptrs[next_triplet].blkid =
1474                                        ACC100_DMA_BLKID_IN;
1475                        desc->data_ptrs[next_triplet].last = 0;
1476                        desc->data_ptrs[next_triplet].dma_ext = 0;
1477                        cb_len -= part_len;
1478                        *seg_total_left -= part_len;
1479                        /* Initializing offset for next segment (mbuf) */
1480                        *offset = part_len;
1481                        next_triplet++;
1482                } else {
1483                        rte_bbdev_log(ERR,
1484                                "Some data still left for processing: "
1485                                "data_left: %u, next_triplet: %u, next_mbuf: %p",
1486                                cb_len, next_triplet, m->next);
1487                        return -EINVAL;
1488                }
1489        }
1490        /* Storing new mbuf as it could be changed in scatter-gather case*/
1491        *input = m;
1492
1493        return next_triplet;
1494}
1495
1496/* Fills descriptor with data pointers of one block type.
1497 * Returns index of next triplet on success, other value if lengths of
1498 * output data and processed mbuf do not match.
1499 */
1500static inline int
1501acc100_dma_fill_blk_type_out(struct acc100_dma_req_desc *desc,
1502                struct rte_mbuf *output, uint32_t out_offset,
1503                uint32_t output_len, int next_triplet, int blk_id)
1504{
1505        desc->data_ptrs[next_triplet].address =
1506                        rte_pktmbuf_iova_offset(output, out_offset);
1507        desc->data_ptrs[next_triplet].blen = output_len;
1508        desc->data_ptrs[next_triplet].blkid = blk_id;
1509        desc->data_ptrs[next_triplet].last = 0;
1510        desc->data_ptrs[next_triplet].dma_ext = 0;
1511        next_triplet++;
1512
1513        return next_triplet;
1514}
1515
1516static inline void
1517acc100_header_init(struct acc100_dma_req_desc *desc)
1518{
1519        desc->word0 = ACC100_DMA_DESC_TYPE;
1520        desc->word1 = 0; /**< Timestamp could be disabled */
1521        desc->word2 = 0;
1522        desc->word3 = 0;
1523        desc->numCBs = 1;
1524}
1525
1526#ifdef RTE_LIBRTE_BBDEV_DEBUG
1527/* Check if any input data is unexpectedly left for processing */
1528static inline int
1529check_mbuf_total_left(uint32_t mbuf_total_left)
1530{
1531        if (mbuf_total_left == 0)
1532                return 0;
1533        rte_bbdev_log(ERR,
1534                "Some date still left for processing: mbuf_total_left = %u",
1535                mbuf_total_left);
1536        return -EINVAL;
1537}
1538#endif
1539
1540static inline int
1541acc100_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
1542                struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1543                struct rte_mbuf *output, uint32_t *in_offset,
1544                uint32_t *out_offset, uint32_t *out_length,
1545                uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
1546{
1547        int next_triplet = 1; /* FCW already done */
1548        uint32_t e, ea, eb, length;
1549        uint16_t k, k_neg, k_pos;
1550        uint8_t cab, c_neg;
1551
1552        desc->word0 = ACC100_DMA_DESC_TYPE;
1553        desc->word1 = 0; /**< Timestamp could be disabled */
1554        desc->word2 = 0;
1555        desc->word3 = 0;
1556        desc->numCBs = 1;
1557
1558        if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1559                ea = op->turbo_enc.tb_params.ea;
1560                eb = op->turbo_enc.tb_params.eb;
1561                cab = op->turbo_enc.tb_params.cab;
1562                k_neg = op->turbo_enc.tb_params.k_neg;
1563                k_pos = op->turbo_enc.tb_params.k_pos;
1564                c_neg = op->turbo_enc.tb_params.c_neg;
1565                e = (r < cab) ? ea : eb;
1566                k = (r < c_neg) ? k_neg : k_pos;
1567        } else {
1568                e = op->turbo_enc.cb_params.e;
1569                k = op->turbo_enc.cb_params.k;
1570        }
1571
1572        if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
1573                length = (k - 24) >> 3;
1574        else
1575                length = k >> 3;
1576
1577        if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
1578                rte_bbdev_log(ERR,
1579                                "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1580                                *mbuf_total_left, length);
1581                return -1;
1582        }
1583
1584        next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1585                        length, seg_total_left, next_triplet);
1586        if (unlikely(next_triplet < 0)) {
1587                rte_bbdev_log(ERR,
1588                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1589                                op);
1590                return -1;
1591        }
1592        desc->data_ptrs[next_triplet - 1].last = 1;
1593        desc->m2dlen = next_triplet;
1594        *mbuf_total_left -= length;
1595
1596        /* Set output length */
1597        if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
1598                /* Integer round up division by 8 */
1599                *out_length = (e + 7) >> 3;
1600        else
1601                *out_length = (k >> 3) * 3 + 2;
1602
1603        next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1604                        *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1605        if (unlikely(next_triplet < 0)) {
1606                rte_bbdev_log(ERR,
1607                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1608                                op);
1609                return -1;
1610        }
1611        op->turbo_enc.output.length += *out_length;
1612        *out_offset += *out_length;
1613        desc->data_ptrs[next_triplet - 1].last = 1;
1614        desc->d2mlen = next_triplet - desc->m2dlen;
1615
1616        desc->op_addr = op;
1617
1618        return 0;
1619}
1620
1621static inline int
1622acc100_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
1623                struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1624                struct rte_mbuf *output, uint32_t *in_offset,
1625                uint32_t *out_offset, uint32_t *out_length,
1626                uint32_t *mbuf_total_left, uint32_t *seg_total_left)
1627{
1628        int next_triplet = 1; /* FCW already done */
1629        uint16_t K, in_length_in_bits, in_length_in_bytes;
1630        struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc;
1631
1632        acc100_header_init(desc);
1633
1634        K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c;
1635        in_length_in_bits = K - enc->n_filler;
1636        if ((enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) ||
1637                        (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH))
1638                in_length_in_bits -= 24;
1639        in_length_in_bytes = in_length_in_bits >> 3;
1640
1641        if (unlikely((*mbuf_total_left == 0) ||
1642                        (*mbuf_total_left < in_length_in_bytes))) {
1643                rte_bbdev_log(ERR,
1644                                "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1645                                *mbuf_total_left, in_length_in_bytes);
1646                return -1;
1647        }
1648
1649        next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset,
1650                        in_length_in_bytes,
1651                        seg_total_left, next_triplet);
1652        if (unlikely(next_triplet < 0)) {
1653                rte_bbdev_log(ERR,
1654                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1655                                op);
1656                return -1;
1657        }
1658        desc->data_ptrs[next_triplet - 1].last = 1;
1659        desc->m2dlen = next_triplet;
1660        *mbuf_total_left -= in_length_in_bytes;
1661
1662        /* Set output length */
1663        /* Integer round up division by 8 */
1664        *out_length = (enc->cb_params.e + 7) >> 3;
1665
1666        next_triplet = acc100_dma_fill_blk_type_out(desc, output, *out_offset,
1667                        *out_length, next_triplet, ACC100_DMA_BLKID_OUT_ENC);
1668        op->ldpc_enc.output.length += *out_length;
1669        *out_offset += *out_length;
1670        desc->data_ptrs[next_triplet - 1].last = 1;
1671        desc->data_ptrs[next_triplet - 1].dma_ext = 0;
1672        desc->d2mlen = next_triplet - desc->m2dlen;
1673
1674        desc->op_addr = op;
1675
1676        return 0;
1677}
1678
1679static inline int
1680acc100_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
1681                struct acc100_dma_req_desc *desc, struct rte_mbuf **input,
1682                struct rte_mbuf *h_output, struct rte_mbuf *s_output,
1683                uint32_t *in_offset, uint32_t *h_out_offset,
1684                uint32_t *s_out_offset, uint32_t *h_out_length,
1685                uint32_t *s_out_length, uint32_t *mbuf_total_left,
1686                uint32_t *seg_total_left, uint8_t r)
1687{
1688        int next_triplet = 1; /* FCW already done */
1689        uint16_t k;
1690        uint16_t crc24_overlap = 0;
1691        uint32_t e, kw;
1692
1693        desc->word0 = ACC100_DMA_DESC_TYPE;
1694        desc->word1 = 0; /**< Timestamp could be disabled */
1695        desc->word2 = 0;
1696        desc->word3 = 0;
1697        desc->numCBs = 1;
1698
1699        if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
1700                k = (r < op->turbo_dec.tb_params.c_neg)
1701                        ? op->turbo_dec.tb_params.k_neg
1702                        : op->turbo_dec.tb_params.k_pos;
1703                e = (r < op->turbo_dec.tb_params.cab)
1704                        ? op->turbo_dec.tb_params.ea
1705                        : op->turbo_dec.tb_params.eb;
1706        } else {
1707                k = op->turbo_dec.cb_params.k;
1708                e = op->turbo_dec.cb_params.e;
1709        }
1710
1711        if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
1712                        && !check_bit(op->turbo_dec.op_flags,
1713                        RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
1714                crc24_overlap = 24;
1715        if ((op->turbo_dec.code_block_mode == RTE_BBDEV_CODE_BLOCK)
1716                        && check_bit(op->turbo_dec.op_flags,
1717                        RTE_BBDEV_TURBO_DEC_CRC_24B_DROP))
1718                crc24_overlap = 24;
1719
1720        /* Calculates circular buffer size.
1721         * According to 3gpp 36.212 section 5.1.4.2
1722         *   Kw = 3 * Kpi,
1723         * where:
1724         *   Kpi = nCol * nRow
1725         * where nCol is 32 and nRow can be calculated from:
1726         *   D =< nCol * nRow
1727         * where D is the size of each output from turbo encoder block (k + 4).
1728         */
1729        kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
1730
1731        if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
1732                rte_bbdev_log(ERR,
1733                                "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1734                                *mbuf_total_left, kw);
1735                return -1;
1736        }
1737
1738        next_triplet = acc100_dma_fill_blk_type_in(desc, input, in_offset, kw,
1739                        seg_total_left, next_triplet);
1740        if (unlikely(next_triplet < 0)) {
1741                rte_bbdev_log(ERR,
1742                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1743                                op);
1744                return -1;
1745        }
1746        desc->data_ptrs[next_triplet - 1].last = 1;
1747        desc->m2dlen = next_triplet;
1748        *mbuf_total_left -= kw;
1749
1750        next_triplet = acc100_dma_fill_blk_type_out(
1751                        desc, h_output, *h_out_offset,
1752                        (k - crc24_overlap) >> 3, next_triplet,
1753                        ACC100_DMA_BLKID_OUT_HARD);
1754        if (unlikely(next_triplet < 0)) {
1755                rte_bbdev_log(ERR,
1756                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1757                                op);
1758                return -1;
1759        }
1760
1761        *h_out_length = ((k - crc24_overlap) >> 3);
1762        op->turbo_dec.hard_output.length += *h_out_length;
1763        *h_out_offset += *h_out_length;
1764
1765        /* Soft output */
1766        if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
1767                if (check_bit(op->turbo_dec.op_flags,
1768                                RTE_BBDEV_TURBO_EQUALIZER))
1769                        *s_out_length = e;
1770                else
1771                        *s_out_length = (k * 3) + 12;
1772
1773                next_triplet = acc100_dma_fill_blk_type_out(desc, s_output,
1774                                *s_out_offset, *s_out_length, next_triplet,
1775                                ACC100_DMA_BLKID_OUT_SOFT);
1776                if (unlikely(next_triplet < 0)) {
1777                        rte_bbdev_log(ERR,
1778                                        "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1779                                        op);
1780                        return -1;
1781                }
1782
1783                op->turbo_dec.soft_output.length += *s_out_length;
1784                *s_out_offset += *s_out_length;
1785        }
1786
1787        desc->data_ptrs[next_triplet - 1].last = 1;
1788        desc->d2mlen = next_triplet - desc->m2dlen;
1789
1790        desc->op_addr = op;
1791
1792        return 0;
1793}
1794
1795static inline int
1796acc100_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
1797                struct acc100_dma_req_desc *desc,
1798                struct rte_mbuf **input, struct rte_mbuf *h_output,
1799                uint32_t *in_offset, uint32_t *h_out_offset,
1800                uint32_t *h_out_length, uint32_t *mbuf_total_left,
1801                uint32_t *seg_total_left,
1802                struct acc100_fcw_ld *fcw)
1803{
1804        struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
1805        int next_triplet = 1; /* FCW already done */
1806        uint32_t input_length;
1807        uint16_t output_length, crc24_overlap = 0;
1808        uint16_t sys_cols, K, h_p_size, h_np_size;
1809        bool h_comp = check_bit(dec->op_flags,
1810                        RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION);
1811
1812        acc100_header_init(desc);
1813
1814        if (check_bit(op->ldpc_dec.op_flags,
1815                        RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP))
1816                crc24_overlap = 24;
1817
1818        /* Compute some LDPC BG lengths */
1819        input_length = dec->cb_params.e;
1820        if (check_bit(op->ldpc_dec.op_flags,
1821                        RTE_BBDEV_LDPC_LLR_COMPRESSION))
1822                input_length = (input_length * 3 + 3) / 4;
1823        sys_cols = (dec->basegraph == 1) ? 22 : 10;
1824        K = sys_cols * dec->z_c;
1825        output_length = K - dec->n_filler - crc24_overlap;
1826
1827        if (unlikely((*mbuf_total_left == 0) ||
1828                        (*mbuf_total_left < input_length))) {
1829                rte_bbdev_log(ERR,
1830                                "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
1831                                *mbuf_total_left, input_length);
1832                return -1;
1833        }
1834
1835        next_triplet = acc100_dma_fill_blk_type_in(desc, input,
1836                        in_offset, input_length,
1837                        seg_total_left, next_triplet);
1838
1839        if (unlikely(next_triplet < 0)) {
1840                rte_bbdev_log(ERR,
1841                                "Mismatch between data to process and mbuf data length in bbdev_op: %p",
1842                                op);
1843                return -1;
1844        }
1845
1846        if (check_bit(op->ldpc_dec.op_flags,
1847                                RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1848                h_p_size = fcw->hcin_size0 + fcw->hcin_size1;
1849                if (h_comp)
1850                        h_p_size = (h_p_size * 3 + 3) / 4;
1851                desc->data_ptrs[next_triplet].address =
1852                                dec->harq_combined_input.offset;
1853                desc->data_ptrs[next_triplet].blen = h_p_size;
1854                desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN_HARQ;
1855                desc->data_ptrs[next_triplet].dma_ext = 1;
1856#ifndef ACC100_EXT_MEM
1857                acc100_dma_fill_blk_type_out(
1858                                desc,
1859                                op->ldpc_dec.harq_combined_input.data,
1860                                op->ldpc_dec.harq_combined_input.offset,
1861                                h_p_size,
1862                                next_triplet,
1863                                ACC100_DMA_BLKID_IN_HARQ);
1864#endif
1865                next_triplet++;
1866        }
1867
1868        desc->data_ptrs[next_triplet - 1].last = 1;
1869        desc->m2dlen = next_triplet;
1870        *mbuf_total_left -= input_length;
1871
1872        next_triplet = acc100_dma_fill_blk_type_out(desc, h_output,
1873                        *h_out_offset, output_length >> 3, next_triplet,
1874                        ACC100_DMA_BLKID_OUT_HARD);
1875
1876        if (check_bit(op->ldpc_dec.op_flags,
1877                                RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1878                /* Pruned size of the HARQ */
1879                h_p_size = fcw->hcout_size0 + fcw->hcout_size1;
1880                /* Non-Pruned size of the HARQ */
1881                h_np_size = fcw->hcout_offset > 0 ?
1882                                fcw->hcout_offset + fcw->hcout_size1 :
1883                                h_p_size;
1884                if (h_comp) {
1885                        h_np_size = (h_np_size * 3 + 3) / 4;
1886                        h_p_size = (h_p_size * 3 + 3) / 4;
1887                }
1888                dec->harq_combined_output.length = h_np_size;
1889                desc->data_ptrs[next_triplet].address =
1890                                dec->harq_combined_output.offset;
1891                desc->data_ptrs[next_triplet].blen = h_p_size;
1892                desc->data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARQ;
1893                desc->data_ptrs[next_triplet].dma_ext = 1;
1894#ifndef ACC100_EXT_MEM
1895                acc100_dma_fill_blk_type_out(
1896                                desc,
1897                                dec->harq_combined_output.data,
1898                                dec->harq_combined_output.offset,
1899                                h_p_size,
1900                                next_triplet,
1901                                ACC100_DMA_BLKID_OUT_HARQ);
1902#endif
1903                next_triplet++;
1904        }
1905
1906        *h_out_length = output_length >> 3;
1907        dec->hard_output.length += *h_out_length;
1908        *h_out_offset += *h_out_length;
1909        desc->data_ptrs[next_triplet - 1].last = 1;
1910        desc->d2mlen = next_triplet - desc->m2dlen;
1911
1912        desc->op_addr = op;
1913
1914        return 0;
1915}
1916
1917static inline void
1918acc100_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
1919                struct acc100_dma_req_desc *desc,
1920                struct rte_mbuf *input, struct rte_mbuf *h_output,
1921                uint32_t *in_offset, uint32_t *h_out_offset,
1922                uint32_t *h_out_length,
1923                union acc100_harq_layout_data *harq_layout)
1924{
1925        int next_triplet = 1; /* FCW already done */
1926        desc->data_ptrs[next_triplet].address =
1927                        rte_pktmbuf_iova_offset(input, *in_offset);
1928        next_triplet++;
1929
1930        if (check_bit(op->ldpc_dec.op_flags,
1931                                RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) {
1932                struct rte_bbdev_op_data hi = op->ldpc_dec.harq_combined_input;
1933                desc->data_ptrs[next_triplet].address = hi.offset;
1934#ifndef ACC100_EXT_MEM
1935                desc->data_ptrs[next_triplet].address =
1936                                rte_pktmbuf_iova_offset(hi.data, hi.offset);
1937#endif
1938                next_triplet++;
1939        }
1940
1941        desc->data_ptrs[next_triplet].address =
1942                        rte_pktmbuf_iova_offset(h_output, *h_out_offset);
1943        *h_out_length = desc->data_ptrs[next_triplet].blen;
1944        next_triplet++;
1945
1946        if (check_bit(op->ldpc_dec.op_flags,
1947                                RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) {
1948                desc->data_ptrs[next_triplet].address =
1949                                op->ldpc_dec.harq_combined_output.offset;
1950                /* Adjust based on previous operation */
1951                struct rte_bbdev_dec_op *prev_op = desc->op_addr;
1952                op->ldpc_dec.harq_combined_output.length =
1953                                prev_op->ldpc_dec.harq_combined_output.length;
1954                int16_t hq_idx = op->ldpc_dec.harq_combined_output.offset /
1955                                ACC100_HARQ_OFFSET;
1956                int16_t prev_hq_idx =
1957                                prev_op->ldpc_dec.harq_combined_output.offset
1958                                / ACC100_HARQ_OFFSET;
1959                harq_layout[hq_idx].val = harq_layout[prev_hq_idx].val;
1960#ifndef ACC100_EXT_MEM
1961                struct rte_bbdev_op_data ho =
1962                                op->ldpc_dec.harq_combined_output;
1963                desc->data_ptrs[next_triplet].address =
1964                                rte_pktmbuf_iova_offset(ho.data, ho.offset);
1965#endif
1966                next_triplet++;
1967        }
1968
1969        op->ldpc_dec.hard_output.length += *h_out_length;
1970        desc->op_addr = op;
1971}
1972
1973
1974/* Enqueue a number of operations to HW and update software rings */
1975static inline void
1976acc100_dma_enqueue(struct acc100_queue *q, uint16_t n,
1977                struct rte_bbdev_stats *queue_stats)
1978{
1979        union acc100_enqueue_reg_fmt enq_req;
1980#ifdef RTE_BBDEV_OFFLOAD_COST
1981        uint64_t start_time = 0;
1982        queue_stats->acc_offload_cycles = 0;
1983#else
1984        RTE_SET_USED(queue_stats);
1985#endif
1986
1987        enq_req.val = 0;
1988        /* Setting offset, 100b for 256 DMA Desc */
1989        enq_req.addr_offset = ACC100_DESC_OFFSET;
1990
1991        /* Split ops into batches */
1992        do {
1993                union acc100_dma_desc *desc;
1994                uint16_t enq_batch_size;
1995                uint64_t offset;
1996                rte_iova_t req_elem_addr;
1997
1998                enq_batch_size = RTE_MIN(n, MAX_ENQ_BATCH_SIZE);
1999
2000                /* Set flag on last descriptor in a batch */
2001                desc = q->ring_addr + ((q->sw_ring_head + enq_batch_size - 1) &
2002                                q->sw_ring_wrap_mask);
2003                desc->req.last_desc_in_batch = 1;
2004
2005                /* Calculate the 1st descriptor's address */
2006                offset = ((q->sw_ring_head & q->sw_ring_wrap_mask) *
2007                                sizeof(union acc100_dma_desc));
2008                req_elem_addr = q->ring_addr_iova + offset;
2009
2010                /* Fill enqueue struct */
2011                enq_req.num_elem = enq_batch_size;
2012                /* low 6 bits are not needed */
2013                enq_req.req_elem_addr = (uint32_t)(req_elem_addr >> 6);
2014
2015#ifdef RTE_LIBRTE_BBDEV_DEBUG
2016                rte_memdump(stderr, "Req sdone", desc, sizeof(*desc));
2017#endif
2018                rte_bbdev_log_debug(
2019                                "Enqueue %u reqs (phys %#"PRIx64") to reg %p",
2020                                enq_batch_size,
2021                                req_elem_addr,
2022                                (void *)q->mmio_reg_enqueue);
2023
2024                rte_wmb();
2025
2026#ifdef RTE_BBDEV_OFFLOAD_COST
2027                /* Start time measurement for enqueue function offload. */
2028                start_time = rte_rdtsc_precise();
2029#endif
2030                rte_bbdev_log(DEBUG, "Debug : MMIO Enqueue");
2031                mmio_write(q->mmio_reg_enqueue, enq_req.val);
2032
2033#ifdef RTE_BBDEV_OFFLOAD_COST
2034                queue_stats->acc_offload_cycles +=
2035                                rte_rdtsc_precise() - start_time;
2036#endif
2037
2038                q->aq_enqueued++;
2039                q->sw_ring_head += enq_batch_size;
2040                n -= enq_batch_size;
2041
2042        } while (n);
2043
2044
2045}
2046
2047#ifdef RTE_LIBRTE_BBDEV_DEBUG
2048/* Validates turbo encoder parameters */
2049static inline int
2050validate_enc_op(struct rte_bbdev_enc_op *op)
2051{
2052        struct rte_bbdev_op_turbo_enc *turbo_enc = &op->turbo_enc;
2053        struct rte_bbdev_op_enc_turbo_cb_params *cb = NULL;
2054        struct rte_bbdev_op_enc_turbo_tb_params *tb = NULL;
2055        uint16_t kw, kw_neg, kw_pos;
2056
2057        if (op->mempool == NULL) {
2058                rte_bbdev_log(ERR, "Invalid mempool pointer");
2059                return -1;
2060        }
2061        if (turbo_enc->input.data == NULL) {
2062                rte_bbdev_log(ERR, "Invalid input pointer");
2063                return -1;
2064        }
2065        if (turbo_enc->output.data == NULL) {
2066                rte_bbdev_log(ERR, "Invalid output pointer");
2067                return -1;
2068        }
2069        if (turbo_enc->rv_index > 3) {
2070                rte_bbdev_log(ERR,
2071                                "rv_index (%u) is out of range 0 <= value <= 3",
2072                                turbo_enc->rv_index);
2073                return -1;
2074        }
2075        if (turbo_enc->code_block_mode != RTE_BBDEV_TRANSPORT_BLOCK &&
2076                        turbo_enc->code_block_mode != RTE_BBDEV_CODE_BLOCK) {
2077                rte_bbdev_log(ERR,
2078                                "code_block_mode (%u) is out of range 0 <= value <= 1",
2079                                turbo_enc->code_block_mode);
2080                return -1;
2081        }
2082
2083        if (turbo_enc->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
2084                tb = &turbo_enc->tb_params;
2085                if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE
2086                                || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE)
2087                                && tb->c_neg > 0) {
2088                        rte_bbdev_log(ERR,
2089                                        "k_neg (%u) is out of range %u <= value <= %u",
2090                                        tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2091                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2092                        return -1;
2093                }
2094                if (tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE
2095                                || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
2096                        rte_bbdev_log(ERR,
2097                                        "k_pos (%u) is out of range %u <= value <= %u",
2098                                        tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2099                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2100                        return -1;
2101                }
2102                if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1))
2103                        rte_bbdev_log(ERR,
2104                                        "c_neg (%u) is out of range 0 <= value <= %u",
2105                                        tb->c_neg,
2106                                        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1);
2107                if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) {
2108                        rte_bbdev_log(ERR,
2109                                        "c (%u) is out of range 1 <= value <= %u",
2110                                        tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS);
2111                        return -1;
2112                }
2113                if (tb->cab > tb->c) {
2114                        rte_bbdev_log(ERR,
2115                                        "cab (%u) is greater than c (%u)",
2116                                        tb->cab, tb->c);
2117                        return -1;
2118                }
2119                if ((tb->ea < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->ea % 2))
2120                                && tb->r < tb->cab) {
2121                        rte_bbdev_log(ERR,
2122                                        "ea (%u) is less than %u or it is not even",
2123                                        tb->ea, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2124                        return -1;
2125                }
2126                if ((tb->eb < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->eb % 2))
2127                                && tb->c > tb->cab) {
2128                        rte_bbdev_log(ERR,
2129                                        "eb (%u) is less than %u or it is not even",
2130                                        tb->eb, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2131                        return -1;
2132                }
2133
2134                kw_neg = 3 * RTE_ALIGN_CEIL(tb->k_neg + 4,
2135                                        RTE_BBDEV_TURBO_C_SUBBLOCK);
2136                if (tb->ncb_neg < tb->k_neg || tb->ncb_neg > kw_neg) {
2137                        rte_bbdev_log(ERR,
2138                                        "ncb_neg (%u) is out of range (%u) k_neg <= value <= (%u) kw_neg",
2139                                        tb->ncb_neg, tb->k_neg, kw_neg);
2140                        return -1;
2141                }
2142
2143                kw_pos = 3 * RTE_ALIGN_CEIL(tb->k_pos + 4,
2144                                        RTE_BBDEV_TURBO_C_SUBBLOCK);
2145                if (tb->ncb_pos < tb->k_pos || tb->ncb_pos > kw_pos) {
2146                        rte_bbdev_log(ERR,
2147                                        "ncb_pos (%u) is out of range (%u) k_pos <= value <= (%u) kw_pos",
2148                                        tb->ncb_pos, tb->k_pos, kw_pos);
2149                        return -1;
2150                }
2151                if (tb->r > (tb->c - 1)) {
2152                        rte_bbdev_log(ERR,
2153                                        "r (%u) is greater than c - 1 (%u)",
2154                                        tb->r, tb->c - 1);
2155                        return -1;
2156                }
2157        } else {
2158                cb = &turbo_enc->cb_params;
2159                if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE
2160                                || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
2161                        rte_bbdev_log(ERR,
2162                                        "k (%u) is out of range %u <= value <= %u",
2163                                        cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2164                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2165                        return -1;
2166                }
2167
2168                if (cb->e < RTE_BBDEV_TURBO_MIN_CB_SIZE || (cb->e % 2)) {
2169                        rte_bbdev_log(ERR,
2170                                        "e (%u) is less than %u or it is not even",
2171                                        cb->e, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2172                        return -1;
2173                }
2174
2175                kw = RTE_ALIGN_CEIL(cb->k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3;
2176                if (cb->ncb < cb->k || cb->ncb > kw) {
2177                        rte_bbdev_log(ERR,
2178                                        "ncb (%u) is out of range (%u) k <= value <= (%u) kw",
2179                                        cb->ncb, cb->k, kw);
2180                        return -1;
2181                }
2182        }
2183
2184        return 0;
2185}
2186/* Validates LDPC encoder parameters */
2187static inline int
2188validate_ldpc_enc_op(struct rte_bbdev_enc_op *op)
2189{
2190        struct rte_bbdev_op_ldpc_enc *ldpc_enc = &op->ldpc_enc;
2191
2192        if (op->mempool == NULL) {
2193                rte_bbdev_log(ERR, "Invalid mempool pointer");
2194                return -1;
2195        }
2196        if (ldpc_enc->input.data == NULL) {
2197                rte_bbdev_log(ERR, "Invalid input pointer");
2198                return -1;
2199        }
2200        if (ldpc_enc->output.data == NULL) {
2201                rte_bbdev_log(ERR, "Invalid output pointer");
2202                return -1;
2203        }
2204        if (ldpc_enc->input.length >
2205                        RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) {
2206                rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d",
2207                                ldpc_enc->input.length,
2208                                RTE_BBDEV_LDPC_MAX_CB_SIZE);
2209                return -1;
2210        }
2211        if ((ldpc_enc->basegraph > 2) || (ldpc_enc->basegraph == 0)) {
2212                rte_bbdev_log(ERR,
2213                                "BG (%u) is out of range 1 <= value <= 2",
2214                                ldpc_enc->basegraph);
2215                return -1;
2216        }
2217        if (ldpc_enc->rv_index > 3) {
2218                rte_bbdev_log(ERR,
2219                                "rv_index (%u) is out of range 0 <= value <= 3",
2220                                ldpc_enc->rv_index);
2221                return -1;
2222        }
2223        if (ldpc_enc->code_block_mode > RTE_BBDEV_CODE_BLOCK) {
2224                rte_bbdev_log(ERR,
2225                                "code_block_mode (%u) is out of range 0 <= value <= 1",
2226                                ldpc_enc->code_block_mode);
2227                return -1;
2228        }
2229        int K = (ldpc_enc->basegraph == 1 ? 22 : 10) * ldpc_enc->z_c;
2230        if (ldpc_enc->n_filler >= K) {
2231                rte_bbdev_log(ERR,
2232                                "K and F are not compatible %u %u",
2233                                K, ldpc_enc->n_filler);
2234                return -1;
2235        }
2236        return 0;
2237}
2238
2239/* Validates LDPC decoder parameters */
2240static inline int
2241validate_ldpc_dec_op(struct rte_bbdev_dec_op *op)
2242{
2243        struct rte_bbdev_op_ldpc_dec *ldpc_dec = &op->ldpc_dec;
2244
2245        if (op->mempool == NULL) {
2246                rte_bbdev_log(ERR, "Invalid mempool pointer");
2247                return -1;
2248        }
2249        if ((ldpc_dec->basegraph > 2) || (ldpc_dec->basegraph == 0)) {
2250                rte_bbdev_log(ERR,
2251                                "BG (%u) is out of range 1 <= value <= 2",
2252                                ldpc_dec->basegraph);
2253                return -1;
2254        }
2255        if (ldpc_dec->iter_max == 0) {
2256                rte_bbdev_log(ERR,
2257                                "iter_max (%u) is equal to 0",
2258                                ldpc_dec->iter_max);
2259                return -1;
2260        }
2261        if (ldpc_dec->rv_index > 3) {
2262                rte_bbdev_log(ERR,
2263                                "rv_index (%u) is out of range 0 <= value <= 3",
2264                                ldpc_dec->rv_index);
2265                return -1;
2266        }
2267        if (ldpc_dec->code_block_mode > RTE_BBDEV_CODE_BLOCK) {
2268                rte_bbdev_log(ERR,
2269                                "code_block_mode (%u) is out of range 0 <= value <= 1",
2270                                ldpc_dec->code_block_mode);
2271                return -1;
2272        }
2273        int K = (ldpc_dec->basegraph == 1 ? 22 : 10) * ldpc_dec->z_c;
2274        if (ldpc_dec->n_filler >= K) {
2275                rte_bbdev_log(ERR,
2276                                "K and F are not compatible %u %u",
2277                                K, ldpc_dec->n_filler);
2278                return -1;
2279        }
2280        return 0;
2281}
2282#endif
2283
2284/* Enqueue one encode operations for ACC100 device in CB mode */
2285static inline int
2286enqueue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
2287                uint16_t total_enqueued_cbs)
2288{
2289        union acc100_dma_desc *desc = NULL;
2290        int ret;
2291        uint32_t in_offset, out_offset, out_length, mbuf_total_left,
2292                seg_total_left;
2293        struct rte_mbuf *input, *output_head, *output;
2294
2295#ifdef RTE_LIBRTE_BBDEV_DEBUG
2296        /* Validate op structure */
2297        if (validate_enc_op(op) == -1) {
2298                rte_bbdev_log(ERR, "Turbo encoder validation failed");
2299                return -EINVAL;
2300        }
2301#endif
2302
2303        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2304                        & q->sw_ring_wrap_mask);
2305        desc = q->ring_addr + desc_idx;
2306        acc100_fcw_te_fill(op, &desc->req.fcw_te);
2307
2308        input = op->turbo_enc.input.data;
2309        output_head = output = op->turbo_enc.output.data;
2310        in_offset = op->turbo_enc.input.offset;
2311        out_offset = op->turbo_enc.output.offset;
2312        out_length = 0;
2313        mbuf_total_left = op->turbo_enc.input.length;
2314        seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
2315                        - in_offset;
2316
2317        ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
2318                        &in_offset, &out_offset, &out_length, &mbuf_total_left,
2319                        &seg_total_left, 0);
2320
2321        if (unlikely(ret < 0))
2322                return ret;
2323
2324        mbuf_append(output_head, output, out_length);
2325
2326#ifdef RTE_LIBRTE_BBDEV_DEBUG
2327        rte_memdump(stderr, "FCW", &desc->req.fcw_te,
2328                        sizeof(desc->req.fcw_te) - 8);
2329        rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2330        if (check_mbuf_total_left(mbuf_total_left) != 0)
2331                return -EINVAL;
2332#endif
2333        /* One CB (one op) was successfully prepared to enqueue */
2334        return 1;
2335}
2336
2337/* Enqueue one encode operations for ACC100 device in CB mode */
2338static inline int
2339enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
2340                uint16_t total_enqueued_cbs, int16_t num)
2341{
2342        union acc100_dma_desc *desc = NULL;
2343        uint32_t out_length;
2344        struct rte_mbuf *output_head, *output;
2345        int i, next_triplet;
2346        uint16_t  in_length_in_bytes;
2347        struct rte_bbdev_op_ldpc_enc *enc = &ops[0]->ldpc_enc;
2348
2349#ifdef RTE_LIBRTE_BBDEV_DEBUG
2350        /* Validate op structure */
2351        if (validate_ldpc_enc_op(ops[0]) == -1) {
2352                rte_bbdev_log(ERR, "LDPC encoder validation failed");
2353                return -EINVAL;
2354        }
2355#endif
2356
2357        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2358                        & q->sw_ring_wrap_mask);
2359        desc = q->ring_addr + desc_idx;
2360        acc100_fcw_le_fill(ops[0], &desc->req.fcw_le, num);
2361
2362        /** This could be done at polling */
2363        acc100_header_init(&desc->req);
2364        desc->req.numCBs = num;
2365
2366        in_length_in_bytes = ops[0]->ldpc_enc.input.data->data_len;
2367        out_length = (enc->cb_params.e + 7) >> 3;
2368        desc->req.m2dlen = 1 + num;
2369        desc->req.d2mlen = num;
2370        next_triplet = 1;
2371
2372        for (i = 0; i < num; i++) {
2373                desc->req.data_ptrs[next_triplet].address =
2374                        rte_pktmbuf_iova_offset(ops[i]->ldpc_enc.input.data, 0);
2375                desc->req.data_ptrs[next_triplet].blen = in_length_in_bytes;
2376                next_triplet++;
2377                desc->req.data_ptrs[next_triplet].address =
2378                                rte_pktmbuf_iova_offset(
2379                                ops[i]->ldpc_enc.output.data, 0);
2380                desc->req.data_ptrs[next_triplet].blen = out_length;
2381                next_triplet++;
2382                ops[i]->ldpc_enc.output.length = out_length;
2383                output_head = output = ops[i]->ldpc_enc.output.data;
2384                mbuf_append(output_head, output, out_length);
2385                output->data_len = out_length;
2386        }
2387
2388        desc->req.op_addr = ops[0];
2389
2390#ifdef RTE_LIBRTE_BBDEV_DEBUG
2391        rte_memdump(stderr, "FCW", &desc->req.fcw_le,
2392                        sizeof(desc->req.fcw_le) - 8);
2393        rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2394#endif
2395
2396        /* One CB (one op) was successfully prepared to enqueue */
2397        return num;
2398}
2399
2400/* Enqueue one encode operations for ACC100 device in CB mode */
2401static inline int
2402enqueue_ldpc_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
2403                uint16_t total_enqueued_cbs)
2404{
2405        union acc100_dma_desc *desc = NULL;
2406        int ret;
2407        uint32_t in_offset, out_offset, out_length, mbuf_total_left,
2408                seg_total_left;
2409        struct rte_mbuf *input, *output_head, *output;
2410
2411#ifdef RTE_LIBRTE_BBDEV_DEBUG
2412        /* Validate op structure */
2413        if (validate_ldpc_enc_op(op) == -1) {
2414                rte_bbdev_log(ERR, "LDPC encoder validation failed");
2415                return -EINVAL;
2416        }
2417#endif
2418
2419        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2420                        & q->sw_ring_wrap_mask);
2421        desc = q->ring_addr + desc_idx;
2422        acc100_fcw_le_fill(op, &desc->req.fcw_le, 1);
2423
2424        input = op->ldpc_enc.input.data;
2425        output_head = output = op->ldpc_enc.output.data;
2426        in_offset = op->ldpc_enc.input.offset;
2427        out_offset = op->ldpc_enc.output.offset;
2428        out_length = 0;
2429        mbuf_total_left = op->ldpc_enc.input.length;
2430        seg_total_left = rte_pktmbuf_data_len(op->ldpc_enc.input.data)
2431                        - in_offset;
2432
2433        ret = acc100_dma_desc_le_fill(op, &desc->req, &input, output,
2434                        &in_offset, &out_offset, &out_length, &mbuf_total_left,
2435                        &seg_total_left);
2436
2437        if (unlikely(ret < 0))
2438                return ret;
2439
2440        mbuf_append(output_head, output, out_length);
2441
2442#ifdef RTE_LIBRTE_BBDEV_DEBUG
2443        rte_memdump(stderr, "FCW", &desc->req.fcw_le,
2444                        sizeof(desc->req.fcw_le) - 8);
2445        rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2446
2447        if (check_mbuf_total_left(mbuf_total_left) != 0)
2448                return -EINVAL;
2449#endif
2450        /* One CB (one op) was successfully prepared to enqueue */
2451        return 1;
2452}
2453
2454
2455/* Enqueue one encode operations for ACC100 device in TB mode. */
2456static inline int
2457enqueue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op *op,
2458                uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
2459{
2460        union acc100_dma_desc *desc = NULL;
2461        int ret;
2462        uint8_t r, c;
2463        uint32_t in_offset, out_offset, out_length, mbuf_total_left,
2464                seg_total_left;
2465        struct rte_mbuf *input, *output_head, *output;
2466        uint16_t current_enqueued_cbs = 0;
2467
2468#ifdef RTE_LIBRTE_BBDEV_DEBUG
2469        /* Validate op structure */
2470        if (validate_enc_op(op) == -1) {
2471                rte_bbdev_log(ERR, "Turbo encoder validation failed");
2472                return -EINVAL;
2473        }
2474#endif
2475
2476        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2477                        & q->sw_ring_wrap_mask);
2478        desc = q->ring_addr + desc_idx;
2479        uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
2480        acc100_fcw_te_fill(op, &desc->req.fcw_te);
2481
2482        input = op->turbo_enc.input.data;
2483        output_head = output = op->turbo_enc.output.data;
2484        in_offset = op->turbo_enc.input.offset;
2485        out_offset = op->turbo_enc.output.offset;
2486        out_length = 0;
2487        mbuf_total_left = op->turbo_enc.input.length;
2488
2489        c = op->turbo_enc.tb_params.c;
2490        r = op->turbo_enc.tb_params.r;
2491
2492        while (mbuf_total_left > 0 && r < c) {
2493                seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2494                /* Set up DMA descriptor */
2495                desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2496                                & q->sw_ring_wrap_mask);
2497                desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
2498                desc->req.data_ptrs[0].blen = ACC100_FCW_TE_BLEN;
2499
2500                ret = acc100_dma_desc_te_fill(op, &desc->req, &input, output,
2501                                &in_offset, &out_offset, &out_length,
2502                                &mbuf_total_left, &seg_total_left, r);
2503                if (unlikely(ret < 0))
2504                        return ret;
2505                mbuf_append(output_head, output, out_length);
2506
2507                /* Set total number of CBs in TB */
2508                desc->req.cbs_in_tb = cbs_in_tb;
2509#ifdef RTE_LIBRTE_BBDEV_DEBUG
2510                rte_memdump(stderr, "FCW", &desc->req.fcw_te,
2511                                sizeof(desc->req.fcw_te) - 8);
2512                rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2513#endif
2514
2515                if (seg_total_left == 0) {
2516                        /* Go to the next mbuf */
2517                        input = input->next;
2518                        in_offset = 0;
2519                        output = output->next;
2520                        out_offset = 0;
2521                }
2522
2523                total_enqueued_cbs++;
2524                current_enqueued_cbs++;
2525                r++;
2526        }
2527
2528#ifdef RTE_LIBRTE_BBDEV_DEBUG
2529        if (check_mbuf_total_left(mbuf_total_left) != 0)
2530                return -EINVAL;
2531#endif
2532
2533        /* Set SDone on last CB descriptor for TB mode. */
2534        desc->req.sdone_enable = 1;
2535        desc->req.irq_enable = q->irq_enable;
2536
2537        return current_enqueued_cbs;
2538}
2539
2540#ifdef RTE_LIBRTE_BBDEV_DEBUG
2541/* Validates turbo decoder parameters */
2542static inline int
2543validate_dec_op(struct rte_bbdev_dec_op *op)
2544{
2545        struct rte_bbdev_op_turbo_dec *turbo_dec = &op->turbo_dec;
2546        struct rte_bbdev_op_dec_turbo_cb_params *cb = NULL;
2547        struct rte_bbdev_op_dec_turbo_tb_params *tb = NULL;
2548
2549        if (op->mempool == NULL) {
2550                rte_bbdev_log(ERR, "Invalid mempool pointer");
2551                return -1;
2552        }
2553        if (turbo_dec->input.data == NULL) {
2554                rte_bbdev_log(ERR, "Invalid input pointer");
2555                return -1;
2556        }
2557        if (turbo_dec->hard_output.data == NULL) {
2558                rte_bbdev_log(ERR, "Invalid hard_output pointer");
2559                return -1;
2560        }
2561        if (check_bit(turbo_dec->op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT) &&
2562                        turbo_dec->soft_output.data == NULL) {
2563                rte_bbdev_log(ERR, "Invalid soft_output pointer");
2564                return -1;
2565        }
2566        if (turbo_dec->rv_index > 3) {
2567                rte_bbdev_log(ERR,
2568                                "rv_index (%u) is out of range 0 <= value <= 3",
2569                                turbo_dec->rv_index);
2570                return -1;
2571        }
2572        if (turbo_dec->iter_min < 1) {
2573                rte_bbdev_log(ERR,
2574                                "iter_min (%u) is less than 1",
2575                                turbo_dec->iter_min);
2576                return -1;
2577        }
2578        if (turbo_dec->iter_max <= 2) {
2579                rte_bbdev_log(ERR,
2580                                "iter_max (%u) is less than or equal to 2",
2581                                turbo_dec->iter_max);
2582                return -1;
2583        }
2584        if (turbo_dec->iter_min > turbo_dec->iter_max) {
2585                rte_bbdev_log(ERR,
2586                                "iter_min (%u) is greater than iter_max (%u)",
2587                                turbo_dec->iter_min, turbo_dec->iter_max);
2588                return -1;
2589        }
2590        if (turbo_dec->code_block_mode != RTE_BBDEV_TRANSPORT_BLOCK &&
2591                        turbo_dec->code_block_mode != RTE_BBDEV_CODE_BLOCK) {
2592                rte_bbdev_log(ERR,
2593                                "code_block_mode (%u) is out of range 0 <= value <= 1",
2594                                turbo_dec->code_block_mode);
2595                return -1;
2596        }
2597
2598        if (turbo_dec->code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
2599                tb = &turbo_dec->tb_params;
2600                if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE
2601                                || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE)
2602                                && tb->c_neg > 0) {
2603                        rte_bbdev_log(ERR,
2604                                        "k_neg (%u) is out of range %u <= value <= %u",
2605                                        tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2606                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2607                        return -1;
2608                }
2609                if ((tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE
2610                                || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE)
2611                                && tb->c > tb->c_neg) {
2612                        rte_bbdev_log(ERR,
2613                                        "k_pos (%u) is out of range %u <= value <= %u",
2614                                        tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2615                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2616                        return -1;
2617                }
2618                if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1))
2619                        rte_bbdev_log(ERR,
2620                                        "c_neg (%u) is out of range 0 <= value <= %u",
2621                                        tb->c_neg,
2622                                        RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1);
2623                if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) {
2624                        rte_bbdev_log(ERR,
2625                                        "c (%u) is out of range 1 <= value <= %u",
2626                                        tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS);
2627                        return -1;
2628                }
2629                if (tb->cab > tb->c) {
2630                        rte_bbdev_log(ERR,
2631                                        "cab (%u) is greater than c (%u)",
2632                                        tb->cab, tb->c);
2633                        return -1;
2634                }
2635                if (check_bit(turbo_dec->op_flags, RTE_BBDEV_TURBO_EQUALIZER) &&
2636                                (tb->ea < RTE_BBDEV_TURBO_MIN_CB_SIZE
2637                                                || (tb->ea % 2))
2638                                && tb->cab > 0) {
2639                        rte_bbdev_log(ERR,
2640                                        "ea (%u) is less than %u or it is not even",
2641                                        tb->ea, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2642                        return -1;
2643                }
2644                if (check_bit(turbo_dec->op_flags, RTE_BBDEV_TURBO_EQUALIZER) &&
2645                                (tb->eb < RTE_BBDEV_TURBO_MIN_CB_SIZE
2646                                                || (tb->eb % 2))
2647                                && tb->c > tb->cab) {
2648                        rte_bbdev_log(ERR,
2649                                        "eb (%u) is less than %u or it is not even",
2650                                        tb->eb, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2651                }
2652        } else {
2653                cb = &turbo_dec->cb_params;
2654                if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE
2655                                || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) {
2656                        rte_bbdev_log(ERR,
2657                                        "k (%u) is out of range %u <= value <= %u",
2658                                        cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE,
2659                                        RTE_BBDEV_TURBO_MAX_CB_SIZE);
2660                        return -1;
2661                }
2662                if (check_bit(turbo_dec->op_flags, RTE_BBDEV_TURBO_EQUALIZER) &&
2663                                (cb->e < RTE_BBDEV_TURBO_MIN_CB_SIZE ||
2664                                (cb->e % 2))) {
2665                        rte_bbdev_log(ERR,
2666                                        "e (%u) is less than %u or it is not even",
2667                                        cb->e, RTE_BBDEV_TURBO_MIN_CB_SIZE);
2668                        return -1;
2669                }
2670        }
2671
2672        return 0;
2673}
2674#endif
2675
2676/** Enqueue one decode operations for ACC100 device in CB mode */
2677static inline int
2678enqueue_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2679                uint16_t total_enqueued_cbs)
2680{
2681        union acc100_dma_desc *desc = NULL;
2682        int ret;
2683        uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
2684                h_out_length, mbuf_total_left, seg_total_left;
2685        struct rte_mbuf *input, *h_output_head, *h_output,
2686                *s_output_head, *s_output;
2687
2688#ifdef RTE_LIBRTE_BBDEV_DEBUG
2689        /* Validate op structure */
2690        if (validate_dec_op(op) == -1) {
2691                rte_bbdev_log(ERR, "Turbo decoder validation failed");
2692                return -EINVAL;
2693        }
2694#endif
2695
2696        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2697                        & q->sw_ring_wrap_mask);
2698        desc = q->ring_addr + desc_idx;
2699        acc100_fcw_td_fill(op, &desc->req.fcw_td);
2700
2701        input = op->turbo_dec.input.data;
2702        h_output_head = h_output = op->turbo_dec.hard_output.data;
2703        s_output_head = s_output = op->turbo_dec.soft_output.data;
2704        in_offset = op->turbo_dec.input.offset;
2705        h_out_offset = op->turbo_dec.hard_output.offset;
2706        s_out_offset = op->turbo_dec.soft_output.offset;
2707        h_out_length = s_out_length = 0;
2708        mbuf_total_left = op->turbo_dec.input.length;
2709        seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
2710
2711#ifdef RTE_LIBRTE_BBDEV_DEBUG
2712        if (unlikely(input == NULL)) {
2713                rte_bbdev_log(ERR, "Invalid mbuf pointer");
2714                return -EFAULT;
2715        }
2716#endif
2717
2718        /* Set up DMA descriptor */
2719        desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
2720                        & q->sw_ring_wrap_mask);
2721
2722        ret = acc100_dma_desc_td_fill(op, &desc->req, &input, h_output,
2723                        s_output, &in_offset, &h_out_offset, &s_out_offset,
2724                        &h_out_length, &s_out_length, &mbuf_total_left,
2725                        &seg_total_left, 0);
2726
2727        if (unlikely(ret < 0))
2728                return ret;
2729
2730        /* Hard output */
2731        mbuf_append(h_output_head, h_output, h_out_length);
2732
2733        /* Soft output */
2734        if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
2735                mbuf_append(s_output_head, s_output, s_out_length);
2736
2737#ifdef RTE_LIBRTE_BBDEV_DEBUG
2738        rte_memdump(stderr, "FCW", &desc->req.fcw_td,
2739                        sizeof(desc->req.fcw_td) - 8);
2740        rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2741        if (check_mbuf_total_left(mbuf_total_left) != 0)
2742                return -EINVAL;
2743#endif
2744
2745        /* One CB (one op) was successfully prepared to enqueue */
2746        return 1;
2747}
2748
2749static inline int
2750harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2751                uint16_t total_enqueued_cbs) {
2752        struct acc100_fcw_ld *fcw;
2753        union acc100_dma_desc *desc;
2754        int next_triplet = 1;
2755        struct rte_mbuf *hq_output_head, *hq_output;
2756        uint16_t harq_dma_length_in, harq_dma_length_out;
2757        uint16_t harq_in_length = op->ldpc_dec.harq_combined_input.length;
2758        if (harq_in_length == 0) {
2759                rte_bbdev_log(ERR, "Loopback of invalid null size\n");
2760                return -EINVAL;
2761        }
2762
2763        int h_comp = check_bit(op->ldpc_dec.op_flags,
2764                        RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION
2765                        ) ? 1 : 0;
2766        if (h_comp == 1) {
2767                harq_in_length = harq_in_length * 8 / 6;
2768                harq_in_length = RTE_ALIGN(harq_in_length, 64);
2769                harq_dma_length_in = harq_in_length * 6 / 8;
2770        } else {
2771                harq_in_length = RTE_ALIGN(harq_in_length, 64);
2772                harq_dma_length_in = harq_in_length;
2773        }
2774        harq_dma_length_out = harq_dma_length_in;
2775
2776        bool ddr_mem_in = check_bit(op->ldpc_dec.op_flags,
2777                        RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE);
2778        union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2779        uint16_t harq_index = (ddr_mem_in ?
2780                        op->ldpc_dec.harq_combined_input.offset :
2781                        op->ldpc_dec.harq_combined_output.offset)
2782                        / ACC100_HARQ_OFFSET;
2783
2784        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2785                        & q->sw_ring_wrap_mask);
2786        desc = q->ring_addr + desc_idx;
2787        fcw = &desc->req.fcw_ld;
2788        /* Set the FCW from loopback into DDR */
2789        memset(fcw, 0, sizeof(struct acc100_fcw_ld));
2790        fcw->FCWversion = ACC100_FCW_VER;
2791        fcw->qm = 2;
2792        fcw->Zc = 384;
2793        if (harq_in_length < 16 * ACC100_N_ZC_1)
2794                fcw->Zc = 16;
2795        fcw->ncb = fcw->Zc * ACC100_N_ZC_1;
2796        fcw->rm_e = 2;
2797        fcw->hcin_en = 1;
2798        fcw->hcout_en = 1;
2799
2800        rte_bbdev_log(DEBUG, "Loopback IN %d Index %d offset %d length %d %d\n",
2801                        ddr_mem_in, harq_index,
2802                        harq_layout[harq_index].offset, harq_in_length,
2803                        harq_dma_length_in);
2804
2805        if (ddr_mem_in && (harq_layout[harq_index].offset > 0)) {
2806                fcw->hcin_size0 = harq_layout[harq_index].size0;
2807                fcw->hcin_offset = harq_layout[harq_index].offset;
2808                fcw->hcin_size1 = harq_in_length - fcw->hcin_offset;
2809                harq_dma_length_in = (fcw->hcin_size0 + fcw->hcin_size1);
2810                if (h_comp == 1)
2811                        harq_dma_length_in = harq_dma_length_in * 6 / 8;
2812        } else {
2813                fcw->hcin_size0 = harq_in_length;
2814        }
2815        harq_layout[harq_index].val = 0;
2816        rte_bbdev_log(DEBUG, "Loopback FCW Config %d %d %d\n",
2817                        fcw->hcin_size0, fcw->hcin_offset, fcw->hcin_size1);
2818        fcw->hcout_size0 = harq_in_length;
2819        fcw->hcin_decomp_mode = h_comp;
2820        fcw->hcout_comp_mode = h_comp;
2821        fcw->gain_i = 1;
2822        fcw->gain_h = 1;
2823
2824        /* Set the prefix of descriptor. This could be done at polling */
2825        acc100_header_init(&desc->req);
2826
2827        /* Null LLR input for Decoder */
2828        desc->req.data_ptrs[next_triplet].address =
2829                        q->lb_in_addr_iova;
2830        desc->req.data_ptrs[next_triplet].blen = 2;
2831        desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_IN;
2832        desc->req.data_ptrs[next_triplet].last = 0;
2833        desc->req.data_ptrs[next_triplet].dma_ext = 0;
2834        next_triplet++;
2835
2836        /* HARQ Combine input from either Memory interface */
2837        if (!ddr_mem_in) {
2838                next_triplet = acc100_dma_fill_blk_type_out(&desc->req,
2839                                op->ldpc_dec.harq_combined_input.data,
2840                                op->ldpc_dec.harq_combined_input.offset,
2841                                harq_dma_length_in,
2842                                next_triplet,
2843                                ACC100_DMA_BLKID_IN_HARQ);
2844        } else {
2845                desc->req.data_ptrs[next_triplet].address =
2846                                op->ldpc_dec.harq_combined_input.offset;
2847                desc->req.data_ptrs[next_triplet].blen =
2848                                harq_dma_length_in;
2849                desc->req.data_ptrs[next_triplet].blkid =
2850                                ACC100_DMA_BLKID_IN_HARQ;
2851                desc->req.data_ptrs[next_triplet].dma_ext = 1;
2852                next_triplet++;
2853        }
2854        desc->req.data_ptrs[next_triplet - 1].last = 1;
2855        desc->req.m2dlen = next_triplet;
2856
2857        /* Dropped decoder hard output */
2858        desc->req.data_ptrs[next_triplet].address =
2859                        q->lb_out_addr_iova;
2860        desc->req.data_ptrs[next_triplet].blen = ACC100_BYTES_IN_WORD;
2861        desc->req.data_ptrs[next_triplet].blkid = ACC100_DMA_BLKID_OUT_HARD;
2862        desc->req.data_ptrs[next_triplet].last = 0;
2863        desc->req.data_ptrs[next_triplet].dma_ext = 0;
2864        next_triplet++;
2865
2866        /* HARQ Combine output to either Memory interface */
2867        if (check_bit(op->ldpc_dec.op_flags,
2868                        RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE
2869                        )) {
2870                desc->req.data_ptrs[next_triplet].address =
2871                                op->ldpc_dec.harq_combined_output.offset;
2872                desc->req.data_ptrs[next_triplet].blen =
2873                                harq_dma_length_out;
2874                desc->req.data_ptrs[next_triplet].blkid =
2875                                ACC100_DMA_BLKID_OUT_HARQ;
2876                desc->req.data_ptrs[next_triplet].dma_ext = 1;
2877                next_triplet++;
2878        } else {
2879                hq_output_head = op->ldpc_dec.harq_combined_output.data;
2880                hq_output = op->ldpc_dec.harq_combined_output.data;
2881                next_triplet = acc100_dma_fill_blk_type_out(
2882                                &desc->req,
2883                                op->ldpc_dec.harq_combined_output.data,
2884                                op->ldpc_dec.harq_combined_output.offset,
2885                                harq_dma_length_out,
2886                                next_triplet,
2887                                ACC100_DMA_BLKID_OUT_HARQ);
2888                /* HARQ output */
2889                mbuf_append(hq_output_head, hq_output, harq_dma_length_out);
2890                op->ldpc_dec.harq_combined_output.length =
2891                                harq_dma_length_out;
2892        }
2893        desc->req.data_ptrs[next_triplet - 1].last = 1;
2894        desc->req.d2mlen = next_triplet - desc->req.m2dlen;
2895        desc->req.op_addr = op;
2896
2897        /* One CB (one op) was successfully prepared to enqueue */
2898        return 1;
2899}
2900
2901/** Enqueue one decode operations for ACC100 device in CB mode */
2902static inline int
2903enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
2904                uint16_t total_enqueued_cbs, bool same_op)
2905{
2906        int ret;
2907        if (unlikely(check_bit(op->ldpc_dec.op_flags,
2908                        RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK))) {
2909                ret = harq_loopback(q, op, total_enqueued_cbs);
2910                return ret;
2911        }
2912
2913#ifdef RTE_LIBRTE_BBDEV_DEBUG
2914        /* Validate op structure */
2915        if (validate_ldpc_dec_op(op) == -1) {
2916                rte_bbdev_log(ERR, "LDPC decoder validation failed");
2917                return -EINVAL;
2918        }
2919#endif
2920        union acc100_dma_desc *desc;
2921        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
2922                        & q->sw_ring_wrap_mask);
2923        desc = q->ring_addr + desc_idx;
2924        struct rte_mbuf *input, *h_output_head, *h_output;
2925        uint32_t in_offset, h_out_offset, mbuf_total_left, h_out_length = 0;
2926        input = op->ldpc_dec.input.data;
2927        h_output_head = h_output = op->ldpc_dec.hard_output.data;
2928        in_offset = op->ldpc_dec.input.offset;
2929        h_out_offset = op->ldpc_dec.hard_output.offset;
2930        mbuf_total_left = op->ldpc_dec.input.length;
2931#ifdef RTE_LIBRTE_BBDEV_DEBUG
2932        if (unlikely(input == NULL)) {
2933                rte_bbdev_log(ERR, "Invalid mbuf pointer");
2934                return -EFAULT;
2935        }
2936#endif
2937        union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
2938
2939        if (same_op) {
2940                union acc100_dma_desc *prev_desc;
2941                desc_idx = ((q->sw_ring_head + total_enqueued_cbs - 1)
2942                                & q->sw_ring_wrap_mask);
2943                prev_desc = q->ring_addr + desc_idx;
2944                uint8_t *prev_ptr = (uint8_t *) prev_desc;
2945                uint8_t *new_ptr = (uint8_t *) desc;
2946                /* Copy first 4 words and BDESCs */
2947                rte_memcpy(new_ptr, prev_ptr, ACC100_5GUL_SIZE_0);
2948                rte_memcpy(new_ptr + ACC100_5GUL_OFFSET_0,
2949                                prev_ptr + ACC100_5GUL_OFFSET_0,
2950                                ACC100_5GUL_SIZE_1);
2951                desc->req.op_addr = prev_desc->req.op_addr;
2952                /* Copy FCW */
2953                rte_memcpy(new_ptr + ACC100_DESC_FCW_OFFSET,
2954                                prev_ptr + ACC100_DESC_FCW_OFFSET,
2955                                ACC100_FCW_LD_BLEN);
2956                acc100_dma_desc_ld_update(op, &desc->req, input, h_output,
2957                                &in_offset, &h_out_offset,
2958                                &h_out_length, harq_layout);
2959        } else {
2960                struct acc100_fcw_ld *fcw;
2961                uint32_t seg_total_left;
2962                fcw = &desc->req.fcw_ld;
2963                acc100_fcw_ld_fill(op, fcw, harq_layout);
2964
2965                /* Special handling when overusing mbuf */
2966                if (fcw->rm_e < ACC100_MAX_E_MBUF)
2967                        seg_total_left = rte_pktmbuf_data_len(input)
2968                                        - in_offset;
2969                else
2970                        seg_total_left = fcw->rm_e;
2971
2972                ret = acc100_dma_desc_ld_fill(op, &desc->req, &input, h_output,
2973                                &in_offset, &h_out_offset,
2974                                &h_out_length, &mbuf_total_left,
2975                                &seg_total_left, fcw);
2976                if (unlikely(ret < 0))
2977                        return ret;
2978        }
2979
2980        /* Hard output */
2981        mbuf_append(h_output_head, h_output, h_out_length);
2982#ifndef ACC100_EXT_MEM
2983        if (op->ldpc_dec.harq_combined_output.length > 0) {
2984                /* Push the HARQ output into host memory */
2985                struct rte_mbuf *hq_output_head, *hq_output;
2986                hq_output_head = op->ldpc_dec.harq_combined_output.data;
2987                hq_output = op->ldpc_dec.harq_combined_output.data;
2988                mbuf_append(hq_output_head, hq_output,
2989                                op->ldpc_dec.harq_combined_output.length);
2990        }
2991#endif
2992
2993#ifdef RTE_LIBRTE_BBDEV_DEBUG
2994        rte_memdump(stderr, "FCW", &desc->req.fcw_ld,
2995                        sizeof(desc->req.fcw_ld) - 8);
2996        rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
2997#endif
2998
2999        /* One CB (one op) was successfully prepared to enqueue */
3000        return 1;
3001}
3002
3003
3004/* Enqueue one decode operations for ACC100 device in TB mode */
3005static inline int
3006enqueue_ldpc_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
3007                uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
3008{
3009        union acc100_dma_desc *desc = NULL;
3010        int ret;
3011        uint8_t r, c;
3012        uint32_t in_offset, h_out_offset,
3013                h_out_length, mbuf_total_left, seg_total_left;
3014        struct rte_mbuf *input, *h_output_head, *h_output;
3015        uint16_t current_enqueued_cbs = 0;
3016
3017#ifdef RTE_LIBRTE_BBDEV_DEBUG
3018        /* Validate op structure */
3019        if (validate_ldpc_dec_op(op) == -1) {
3020                rte_bbdev_log(ERR, "LDPC decoder validation failed");
3021                return -EINVAL;
3022        }
3023#endif
3024
3025        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
3026                        & q->sw_ring_wrap_mask);
3027        desc = q->ring_addr + desc_idx;
3028        uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
3029        union acc100_harq_layout_data *harq_layout = q->d->harq_layout;
3030        acc100_fcw_ld_fill(op, &desc->req.fcw_ld, harq_layout);
3031
3032        input = op->ldpc_dec.input.data;
3033        h_output_head = h_output = op->ldpc_dec.hard_output.data;
3034        in_offset = op->ldpc_dec.input.offset;
3035        h_out_offset = op->ldpc_dec.hard_output.offset;
3036        h_out_length = 0;
3037        mbuf_total_left = op->ldpc_dec.input.length;
3038        c = op->ldpc_dec.tb_params.c;
3039        r = op->ldpc_dec.tb_params.r;
3040
3041        while (mbuf_total_left > 0 && r < c) {
3042
3043                seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
3044
3045                /* Set up DMA descriptor */
3046                desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
3047                                & q->sw_ring_wrap_mask);
3048                desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
3049                desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
3050                ret = acc100_dma_desc_ld_fill(op, &desc->req, &input,
3051                                h_output, &in_offset, &h_out_offset,
3052                                &h_out_length,
3053                                &mbuf_total_left, &seg_total_left,
3054                                &desc->req.fcw_ld);
3055
3056                if (unlikely(ret < 0))
3057                        return ret;
3058
3059                /* Hard output */
3060                mbuf_append(h_output_head, h_output, h_out_length);
3061
3062                /* Set total number of CBs in TB */
3063                desc->req.cbs_in_tb = cbs_in_tb;
3064#ifdef RTE_LIBRTE_BBDEV_DEBUG
3065                rte_memdump(stderr, "FCW", &desc->req.fcw_td,
3066                                sizeof(desc->req.fcw_td) - 8);
3067                rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
3068#endif
3069
3070                if (seg_total_left == 0) {
3071                        /* Go to the next mbuf */
3072                        input = input->next;
3073                        in_offset = 0;
3074                        h_output = h_output->next;
3075                        h_out_offset = 0;
3076                }
3077                total_enqueued_cbs++;
3078                current_enqueued_cbs++;
3079                r++;
3080        }
3081
3082#ifdef RTE_LIBRTE_BBDEV_DEBUG
3083        if (check_mbuf_total_left(mbuf_total_left) != 0)
3084                return -EINVAL;
3085#endif
3086        /* Set SDone on last CB descriptor for TB mode */
3087        desc->req.sdone_enable = 1;
3088        desc->req.irq_enable = q->irq_enable;
3089
3090        return current_enqueued_cbs;
3091}
3092
3093/* Enqueue one decode operations for ACC100 device in TB mode */
3094static inline int
3095enqueue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
3096                uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
3097{
3098        union acc100_dma_desc *desc = NULL;
3099        int ret;
3100        uint8_t r, c;
3101        uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
3102                h_out_length, mbuf_total_left, seg_total_left;
3103        struct rte_mbuf *input, *h_output_head, *h_output,
3104                *s_output_head, *s_output;
3105        uint16_t current_enqueued_cbs = 0;
3106
3107#ifdef RTE_LIBRTE_BBDEV_DEBUG
3108        /* Validate op structure */
3109        if (validate_dec_op(op) == -1) {
3110                rte_bbdev_log(ERR, "Turbo decoder validation failed");
3111                return -EINVAL;
3112        }
3113#endif
3114
3115        uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
3116                        & q->sw_ring_wrap_mask);
3117        desc = q->ring_addr + desc_idx;
3118        uint64_t fcw_offset = (desc_idx << 8) + ACC100_DESC_FCW_OFFSET;
3119        acc100_fcw_td_fill(op, &desc->req.fcw_td);
3120
3121        input = op->turbo_dec.input.data;
3122        h_output_head = h_output = op->turbo_dec.hard_output.data;
3123        s_output_head = s_output = op->turbo_dec.soft_output.data;
3124        in_offset = op->turbo_dec.input.offset;
3125        h_out_offset = op->turbo_dec.hard_output.offset;
3126        s_out_offset = op->turbo_dec.soft_output.offset;
3127        h_out_length = s_out_length = 0;
3128        mbuf_total_left = op->turbo_dec.input.length;
3129        c = op->turbo_dec.tb_params.c;
3130        r = op->turbo_dec.tb_params.r;
3131
3132        while (mbuf_total_left > 0 && r < c) {
3133
3134                seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
3135
3136                /* Set up DMA descriptor */
3137                desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
3138                                & q->sw_ring_wrap_mask);
3139                desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
3140                desc->req.data_ptrs[0].blen = ACC100_FCW_TD_BLEN;
3141                ret = acc100_dma_desc_td_fill(op, &desc->req, &input,
3142                                h_output, s_output, &in_offset, &h_out_offset,
3143                                &s_out_offset, &h_out_length, &s_out_length,
3144                                &mbuf_total_left, &seg_total_left, r);
3145
3146                if (unlikely(ret < 0))
3147                        return ret;
3148
3149                /* Hard output */
3150                mbuf_append(h_output_head, h_output, h_out_length);
3151
3152                /* Soft output */
3153                if (check_bit(op->turbo_dec.op_flags,
3154                                RTE_BBDEV_TURBO_SOFT_OUTPUT))
3155                        mbuf_append(s_output_head, s_output, s_out_length);
3156
3157                /* Set total number of CBs in TB */
3158                desc->req.cbs_in_tb = cbs_in_tb;
3159#ifdef RTE_LIBRTE_BBDEV_DEBUG
3160                rte_memdump(stderr, "FCW", &desc->req.fcw_td,
3161                                sizeof(desc->req.fcw_td) - 8);
3162                rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
3163#endif
3164
3165                if (seg_total_left == 0) {
3166                        /* Go to the next mbuf */
3167                        input = input->next;
3168                        in_offset = 0;
3169                        h_output = h_output->next;
3170                        h_out_offset = 0;
3171
3172                        if (check_bit(op->turbo_dec.op_flags,
3173                                        RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
3174                                s_output = s_output->next;
3175                                s_out_offset = 0;
3176                        }
3177                }
3178
3179                total_enqueued_cbs++;
3180                current_enqueued_cbs++;
3181                r++;
3182        }
3183
3184#ifdef RTE_LIBRTE_BBDEV_DEBUG
3185        if (check_mbuf_total_left(mbuf_total_left) != 0)
3186                return -EINVAL;
3187#endif
3188        /* Set SDone on last CB descriptor for TB mode */
3189        desc->req.sdone_enable = 1;
3190        desc->req.irq_enable = q->irq_enable;
3191
3192        return current_enqueued_cbs;
3193}
3194
3195/* Calculates number of CBs in processed encoder TB based on 'r' and input
3196 * length.
3197 */
3198static inline uint8_t
3199get_num_cbs_in_tb_enc(struct rte_bbdev_op_turbo_enc *turbo_enc)
3200{
3201        uint8_t c, c_neg, r, crc24_bits = 0;
3202        uint16_t k, k_neg, k_pos;
3203        uint8_t cbs_in_tb = 0;
3204        int32_t length;
3205
3206        length = turbo_enc->input.length;
3207        r = turbo_enc->tb_params.r;
3208        c = turbo_enc->tb_params.c;
3209        c_neg = turbo_enc->tb_params.c_neg;
3210        k_neg = turbo_enc->tb_params.k_neg;
3211        k_pos = turbo_enc->tb_params.k_pos;
3212        crc24_bits = 0;
3213        if (check_bit(turbo_enc->op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
3214                crc24_bits = 24;
3215        while (length > 0 && r < c) {
3216                k = (r < c_neg) ? k_neg : k_pos;
3217                length -= (k - crc24_bits) >> 3;
3218                r++;
3219                cbs_in_tb++;
3220        }
3221
3222        return cbs_in_tb;
3223}
3224
3225/* Calculates number of CBs in processed decoder TB based on 'r' and input
3226 * length.
3227 */
3228static inline uint16_t
3229get_num_cbs_in_tb_dec(struct rte_bbdev_op_turbo_dec *turbo_dec)
3230{
3231        uint8_t c, c_neg, r = 0;
3232        uint16_t kw, k, k_neg, k_pos, cbs_in_tb = 0;
3233        int32_t length;
3234
3235        length = turbo_dec->input.length;
3236        r = turbo_dec->tb_params.r;
3237        c = turbo_dec->tb_params.c;
3238        c_neg = turbo_dec->tb_params.c_neg;
3239        k_neg = turbo_dec->tb_params.k_neg;
3240        k_pos = turbo_dec->tb_params.k_pos;
3241        while (length > 0 && r < c) {
3242                k = (r < c_neg) ? k_neg : k_pos;
3243                kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
3244                length -= kw;
3245                r++;
3246                cbs_in_tb++;
3247        }
3248
3249        return cbs_in_tb;
3250}
3251
3252/* Calculates number of CBs in processed decoder TB based on 'r' and input
3253 * length.
3254 */
3255static inline uint16_t
3256get_num_cbs_in_tb_ldpc_dec(struct rte_bbdev_op_ldpc_dec *ldpc_dec)
3257{
3258        uint16_t r, cbs_in_tb = 0;
3259        int32_t length = ldpc_dec->input.length;
3260        r = ldpc_dec->tb_params.r;
3261        while (length > 0 && r < ldpc_dec->tb_params.c) {
3262                length -=  (r < ldpc_dec->tb_params.cab) ?
3263                                ldpc_dec->tb_params.ea :
3264                                ldpc_dec->tb_params.eb;
3265                r++;
3266                cbs_in_tb++;
3267        }
3268        return cbs_in_tb;
3269}
3270
3271/* Enqueue encode operations for ACC100 device in CB mode. */
3272static uint16_t
3273acc100_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
3274                struct rte_bbdev_enc_op **ops, uint16_t num)
3275{
3276        struct acc100_queue *q = q_data->queue_private;
3277        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3278        uint16_t i;
3279        union acc100_dma_desc *desc;
3280        int ret;
3281
3282        for (i = 0; i < num; ++i) {
3283                /* Check if there are available space for further processing */
3284                if (unlikely(avail - 1 < 0))
3285                        break;
3286                avail -= 1;
3287
3288                ret = enqueue_enc_one_op_cb(q, ops[i], i);
3289                if (ret < 0)
3290                        break;
3291        }
3292
3293        if (unlikely(i == 0))
3294                return 0; /* Nothing to enqueue */
3295
3296        /* Set SDone in last CB in enqueued ops for CB mode*/
3297        desc = q->ring_addr + ((q->sw_ring_head + i - 1)
3298                        & q->sw_ring_wrap_mask);
3299        desc->req.sdone_enable = 1;
3300        desc->req.irq_enable = q->irq_enable;
3301
3302        acc100_dma_enqueue(q, i, &q_data->queue_stats);
3303
3304        /* Update stats */
3305        q_data->queue_stats.enqueued_count += i;
3306        q_data->queue_stats.enqueue_err_count += num - i;
3307        return i;
3308}
3309
3310/* Check we can mux encode operations with common FCW */
3311static inline bool
3312check_mux(struct rte_bbdev_enc_op **ops, uint16_t num) {
3313        uint16_t i;
3314        if (num <= 1)
3315                return false;
3316        for (i = 1; i < num; ++i) {
3317                /* Only mux compatible code blocks */
3318                if (memcmp((uint8_t *)(&ops[i]->ldpc_enc) + ACC100_ENC_OFFSET,
3319                                (uint8_t *)(&ops[0]->ldpc_enc) +
3320                                ACC100_ENC_OFFSET,
3321                                ACC100_CMP_ENC_SIZE) != 0)
3322                        return false;
3323        }
3324        return true;
3325}
3326
3327/** Enqueue encode operations for ACC100 device in CB mode. */
3328static inline uint16_t
3329acc100_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
3330                struct rte_bbdev_enc_op **ops, uint16_t num)
3331{
3332        struct acc100_queue *q = q_data->queue_private;
3333        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3334        uint16_t i = 0;
3335        union acc100_dma_desc *desc;
3336        int ret, desc_idx = 0;
3337        int16_t enq, left = num;
3338
3339        while (left > 0) {
3340                if (unlikely(avail < 1))
3341                        break;
3342                avail--;
3343                enq = RTE_MIN(left, ACC100_MUX_5GDL_DESC);
3344                if (check_mux(&ops[i], enq)) {
3345                        ret = enqueue_ldpc_enc_n_op_cb(q, &ops[i],
3346                                        desc_idx, enq);
3347                        if (ret < 0)
3348                                break;
3349                        i += enq;
3350                } else {
3351                        ret = enqueue_ldpc_enc_one_op_cb(q, ops[i], desc_idx);
3352                        if (ret < 0)
3353                                break;
3354                        i++;
3355                }
3356                desc_idx++;
3357                left = num - i;
3358        }
3359
3360        if (unlikely(i == 0))
3361                return 0; /* Nothing to enqueue */
3362
3363        /* Set SDone in last CB in enqueued ops for CB mode*/
3364        desc = q->ring_addr + ((q->sw_ring_head + desc_idx - 1)
3365                        & q->sw_ring_wrap_mask);
3366        desc->req.sdone_enable = 1;
3367        desc->req.irq_enable = q->irq_enable;
3368
3369        acc100_dma_enqueue(q, desc_idx, &q_data->queue_stats);
3370
3371        /* Update stats */
3372        q_data->queue_stats.enqueued_count += i;
3373        q_data->queue_stats.enqueue_err_count += num - i;
3374
3375        return i;
3376}
3377
3378/* Enqueue encode operations for ACC100 device in TB mode. */
3379static uint16_t
3380acc100_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
3381                struct rte_bbdev_enc_op **ops, uint16_t num)
3382{
3383        struct acc100_queue *q = q_data->queue_private;
3384        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3385        uint16_t i, enqueued_cbs = 0;
3386        uint8_t cbs_in_tb;
3387        int ret;
3388
3389        for (i = 0; i < num; ++i) {
3390                cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
3391                /* Check if there are available space for further processing */
3392                if (unlikely(avail - cbs_in_tb < 0))
3393                        break;
3394                avail -= cbs_in_tb;
3395
3396                ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
3397                if (ret < 0)
3398                        break;
3399                enqueued_cbs += ret;
3400        }
3401        if (unlikely(enqueued_cbs == 0))
3402                return 0; /* Nothing to enqueue */
3403
3404        acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
3405
3406        /* Update stats */
3407        q_data->queue_stats.enqueued_count += i;
3408        q_data->queue_stats.enqueue_err_count += num - i;
3409
3410        return i;
3411}
3412
3413/* Enqueue encode operations for ACC100 device. */
3414static uint16_t
3415acc100_enqueue_enc(struct rte_bbdev_queue_data *q_data,
3416                struct rte_bbdev_enc_op **ops, uint16_t num)
3417{
3418        if (unlikely(num == 0))
3419                return 0;
3420        if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
3421                return acc100_enqueue_enc_tb(q_data, ops, num);
3422        else
3423                return acc100_enqueue_enc_cb(q_data, ops, num);
3424}
3425
3426/* Enqueue encode operations for ACC100 device. */
3427static uint16_t
3428acc100_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
3429                struct rte_bbdev_enc_op **ops, uint16_t num)
3430{
3431        if (unlikely(num == 0))
3432                return 0;
3433        if (ops[0]->ldpc_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
3434                return acc100_enqueue_enc_tb(q_data, ops, num);
3435        else
3436                return acc100_enqueue_ldpc_enc_cb(q_data, ops, num);
3437}
3438
3439
3440/* Enqueue decode operations for ACC100 device in CB mode */
3441static uint16_t
3442acc100_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
3443                struct rte_bbdev_dec_op **ops, uint16_t num)
3444{
3445        struct acc100_queue *q = q_data->queue_private;
3446        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3447        uint16_t i;
3448        union acc100_dma_desc *desc;
3449        int ret;
3450
3451        for (i = 0; i < num; ++i) {
3452                /* Check if there are available space for further processing */
3453                if (unlikely(avail - 1 < 0))
3454                        break;
3455                avail -= 1;
3456
3457                ret = enqueue_dec_one_op_cb(q, ops[i], i);
3458                if (ret < 0)
3459                        break;
3460        }
3461
3462        if (unlikely(i == 0))
3463                return 0; /* Nothing to enqueue */
3464
3465        /* Set SDone in last CB in enqueued ops for CB mode*/
3466        desc = q->ring_addr + ((q->sw_ring_head + i - 1)
3467                        & q->sw_ring_wrap_mask);
3468        desc->req.sdone_enable = 1;
3469        desc->req.irq_enable = q->irq_enable;
3470
3471        acc100_dma_enqueue(q, i, &q_data->queue_stats);
3472
3473        /* Update stats */
3474        q_data->queue_stats.enqueued_count += i;
3475        q_data->queue_stats.enqueue_err_count += num - i;
3476
3477        return i;
3478}
3479
3480/* Check we can mux encode operations with common FCW */
3481static inline bool
3482cmp_ldpc_dec_op(struct rte_bbdev_dec_op **ops) {
3483        /* Only mux compatible code blocks */
3484        if (memcmp((uint8_t *)(&ops[0]->ldpc_dec) + ACC100_DEC_OFFSET,
3485                        (uint8_t *)(&ops[1]->ldpc_dec) +
3486                        ACC100_DEC_OFFSET, ACC100_CMP_DEC_SIZE) != 0) {
3487                return false;
3488        } else
3489                return true;
3490}
3491
3492
3493/* Enqueue decode operations for ACC100 device in TB mode */
3494static uint16_t
3495acc100_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
3496                struct rte_bbdev_dec_op **ops, uint16_t num)
3497{
3498        struct acc100_queue *q = q_data->queue_private;
3499        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3500        uint16_t i, enqueued_cbs = 0;
3501        uint8_t cbs_in_tb;
3502        int ret;
3503
3504        for (i = 0; i < num; ++i) {
3505                cbs_in_tb = get_num_cbs_in_tb_ldpc_dec(&ops[i]->ldpc_dec);
3506                /* Check if there are available space for further processing */
3507                if (unlikely(avail - cbs_in_tb < 0))
3508                        break;
3509                avail -= cbs_in_tb;
3510
3511                ret = enqueue_ldpc_dec_one_op_tb(q, ops[i],
3512                                enqueued_cbs, cbs_in_tb);
3513                if (ret < 0)
3514                        break;
3515                enqueued_cbs += ret;
3516        }
3517
3518        acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
3519
3520        /* Update stats */
3521        q_data->queue_stats.enqueued_count += i;
3522        q_data->queue_stats.enqueue_err_count += num - i;
3523        return i;
3524}
3525
3526/* Enqueue decode operations for ACC100 device in CB mode */
3527static uint16_t
3528acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
3529                struct rte_bbdev_dec_op **ops, uint16_t num)
3530{
3531        struct acc100_queue *q = q_data->queue_private;
3532        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3533        uint16_t i;
3534        union acc100_dma_desc *desc;
3535        int ret;
3536        bool same_op = false;
3537        for (i = 0; i < num; ++i) {
3538                /* Check if there are available space for further processing */
3539                if (unlikely(avail < 1))
3540                        break;
3541                avail -= 1;
3542
3543                if (i > 0)
3544                        same_op = cmp_ldpc_dec_op(&ops[i-1]);
3545                rte_bbdev_log(INFO, "Op %d %d %d %d %d %d %d %d %d %d %d %d\n",
3546                        i, ops[i]->ldpc_dec.op_flags, ops[i]->ldpc_dec.rv_index,
3547                        ops[i]->ldpc_dec.iter_max, ops[i]->ldpc_dec.iter_count,
3548                        ops[i]->ldpc_dec.basegraph, ops[i]->ldpc_dec.z_c,
3549                        ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
3550                        ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
3551                        same_op);
3552                ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
3553                if (ret < 0)
3554                        break;
3555        }
3556
3557        if (unlikely(i == 0))
3558                return 0; /* Nothing to enqueue */
3559
3560        /* Set SDone in last CB in enqueued ops for CB mode*/
3561        desc = q->ring_addr + ((q->sw_ring_head + i - 1)
3562                        & q->sw_ring_wrap_mask);
3563
3564        desc->req.sdone_enable = 1;
3565        desc->req.irq_enable = q->irq_enable;
3566
3567        acc100_dma_enqueue(q, i, &q_data->queue_stats);
3568
3569        /* Update stats */
3570        q_data->queue_stats.enqueued_count += i;
3571        q_data->queue_stats.enqueue_err_count += num - i;
3572        return i;
3573}
3574
3575
3576/* Enqueue decode operations for ACC100 device in TB mode */
3577static uint16_t
3578acc100_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
3579                struct rte_bbdev_dec_op **ops, uint16_t num)
3580{
3581        struct acc100_queue *q = q_data->queue_private;
3582        int32_t avail = q->sw_ring_depth + q->sw_ring_tail - q->sw_ring_head;
3583        uint16_t i, enqueued_cbs = 0;
3584        uint8_t cbs_in_tb;
3585        int ret;
3586
3587        for (i = 0; i < num; ++i) {
3588                cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
3589                /* Check if there are available space for further processing */
3590                if (unlikely(avail - cbs_in_tb < 0))
3591                        break;
3592                avail -= cbs_in_tb;
3593
3594                ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
3595                if (ret < 0)
3596                        break;
3597                enqueued_cbs += ret;
3598        }
3599
3600        acc100_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
3601
3602        /* Update stats */
3603        q_data->queue_stats.enqueued_count += i;
3604        q_data->queue_stats.enqueue_err_count += num - i;
3605
3606        return i;
3607}
3608
3609/* Enqueue decode operations for ACC100 device. */
3610static uint16_t
3611acc100_enqueue_dec(struct rte_bbdev_queue_data *q_data,
3612                struct rte_bbdev_dec_op **ops, uint16_t num)
3613{
3614        if (unlikely(num == 0))
3615                return 0;
3616        if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
3617                return acc100_enqueue_dec_tb(q_data, ops, num);
3618        else
3619                return acc100_enqueue_dec_cb(q_data, ops, num);
3620}
3621
3622/* Enqueue decode operations for ACC100 device. */
3623static uint16_t
3624acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
3625                struct rte_bbdev_dec_op **ops, uint16_t num)
3626{
3627        struct acc100_queue *q = q_data->queue_private;
3628        int32_t aq_avail = q->aq_depth +
3629                        (q->aq_dequeued - q->aq_enqueued) / 128;
3630
3631        if (unlikely((aq_avail == 0) || (num == 0)))
3632                return 0;
3633
3634        if (ops[0]->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
3635                return acc100_enqueue_ldpc_dec_tb(q_data, ops, num);
3636        else
3637                return acc100_enqueue_ldpc_dec_cb(q_data, ops, num);
3638}
3639
3640
3641/* Dequeue one encode operations from ACC100 device in CB mode */
3642static inline int
3643dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
3644                uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
3645{
3646        union acc100_dma_desc *desc, atom_desc;
3647        union acc100_dma_rsp_desc rsp;
3648        struct rte_bbdev_enc_op *op;
3649        int i;
3650
3651        desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
3652                        & q->sw_ring_wrap_mask);
3653        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3654                        __ATOMIC_RELAXED);
3655
3656        /* Check fdone bit */
3657        if (!(atom_desc.rsp.val & ACC100_FDONE))
3658                return -1;
3659
3660        rsp.val = atom_desc.rsp.val;
3661        rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
3662
3663        /* Dequeue */
3664        op = desc->req.op_addr;
3665
3666        /* Clearing status, it will be set based on response */
3667        op->status = 0;
3668
3669        op->status |= ((rsp.input_err)
3670                        ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3671        op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3672        op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3673
3674        if (desc->req.last_desc_in_batch) {
3675                (*aq_dequeued)++;
3676                desc->req.last_desc_in_batch = 0;
3677        }
3678        desc->rsp.val = ACC100_DMA_DESC_TYPE;
3679        desc->rsp.add_info_0 = 0; /*Reserved bits */
3680        desc->rsp.add_info_1 = 0; /*Reserved bits */
3681
3682        /* Flag that the muxing cause loss of opaque data */
3683        op->opaque_data = (void *)-1;
3684        for (i = 0 ; i < desc->req.numCBs; i++)
3685                ref_op[i] = op;
3686
3687        /* One CB (op) was successfully dequeued */
3688        return desc->req.numCBs;
3689}
3690
3691/* Dequeue one encode operations from ACC100 device in TB mode */
3692static inline int
3693dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
3694                uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
3695{
3696        union acc100_dma_desc *desc, *last_desc, atom_desc;
3697        union acc100_dma_rsp_desc rsp;
3698        struct rte_bbdev_enc_op *op;
3699        uint8_t i = 0;
3700        uint16_t current_dequeued_cbs = 0, cbs_in_tb;
3701
3702        desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
3703                        & q->sw_ring_wrap_mask);
3704        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3705                        __ATOMIC_RELAXED);
3706
3707        /* Check fdone bit */
3708        if (!(atom_desc.rsp.val & ACC100_FDONE))
3709                return -1;
3710
3711        /* Get number of CBs in dequeued TB */
3712        cbs_in_tb = desc->req.cbs_in_tb;
3713        /* Get last CB */
3714        last_desc = q->ring_addr + ((q->sw_ring_tail
3715                        + total_dequeued_cbs + cbs_in_tb - 1)
3716                        & q->sw_ring_wrap_mask);
3717        /* Check if last CB in TB is ready to dequeue (and thus
3718         * the whole TB) - checking sdone bit. If not return.
3719         */
3720        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
3721                        __ATOMIC_RELAXED);
3722        if (!(atom_desc.rsp.val & ACC100_SDONE))
3723                return -1;
3724
3725        /* Dequeue */
3726        op = desc->req.op_addr;
3727
3728        /* Clearing status, it will be set based on response */
3729        op->status = 0;
3730
3731        while (i < cbs_in_tb) {
3732                desc = q->ring_addr + ((q->sw_ring_tail
3733                                + total_dequeued_cbs)
3734                                & q->sw_ring_wrap_mask);
3735                atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3736                                __ATOMIC_RELAXED);
3737                rsp.val = atom_desc.rsp.val;
3738                rte_bbdev_log_debug("Resp. desc %p: %x", desc,
3739                                rsp.val);
3740
3741                op->status |= ((rsp.input_err)
3742                                ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3743                op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3744                op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3745
3746                if (desc->req.last_desc_in_batch) {
3747                        (*aq_dequeued)++;
3748                        desc->req.last_desc_in_batch = 0;
3749                }
3750                desc->rsp.val = ACC100_DMA_DESC_TYPE;
3751                desc->rsp.add_info_0 = 0;
3752                desc->rsp.add_info_1 = 0;
3753                total_dequeued_cbs++;
3754                current_dequeued_cbs++;
3755                i++;
3756        }
3757
3758        *ref_op = op;
3759
3760        return current_dequeued_cbs;
3761}
3762
3763/* Dequeue one decode operation from ACC100 device in CB mode */
3764static inline int
3765dequeue_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
3766                struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3767                uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3768{
3769        union acc100_dma_desc *desc, atom_desc;
3770        union acc100_dma_rsp_desc rsp;
3771        struct rte_bbdev_dec_op *op;
3772
3773        desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3774                        & q->sw_ring_wrap_mask);
3775        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3776                        __ATOMIC_RELAXED);
3777
3778        /* Check fdone bit */
3779        if (!(atom_desc.rsp.val & ACC100_FDONE))
3780                return -1;
3781
3782        rsp.val = atom_desc.rsp.val;
3783        rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
3784
3785        /* Dequeue */
3786        op = desc->req.op_addr;
3787
3788        /* Clearing status, it will be set based on response */
3789        op->status = 0;
3790        op->status |= ((rsp.input_err)
3791                        ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3792        op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3793        op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3794        if (op->status != 0) {
3795                q_data->queue_stats.dequeue_err_count++;
3796                acc100_check_ir(q->d);
3797        }
3798
3799        /* CRC invalid if error exists */
3800        if (!op->status)
3801                op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3802        op->turbo_dec.iter_count = (uint8_t) rsp.iter_cnt / 2;
3803        /* Check if this is the last desc in batch (Atomic Queue) */
3804        if (desc->req.last_desc_in_batch) {
3805                (*aq_dequeued)++;
3806                desc->req.last_desc_in_batch = 0;
3807        }
3808        desc->rsp.val = ACC100_DMA_DESC_TYPE;
3809        desc->rsp.add_info_0 = 0;
3810        desc->rsp.add_info_1 = 0;
3811        *ref_op = op;
3812
3813        /* One CB (op) was successfully dequeued */
3814        return 1;
3815}
3816
3817/* Dequeue one decode operations from ACC100 device in CB mode */
3818static inline int
3819dequeue_ldpc_dec_one_op_cb(struct rte_bbdev_queue_data *q_data,
3820                struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3821                uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3822{
3823        union acc100_dma_desc *desc, atom_desc;
3824        union acc100_dma_rsp_desc rsp;
3825        struct rte_bbdev_dec_op *op;
3826
3827        desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3828                        & q->sw_ring_wrap_mask);
3829        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3830                        __ATOMIC_RELAXED);
3831
3832        /* Check fdone bit */
3833        if (!(atom_desc.rsp.val & ACC100_FDONE))
3834                return -1;
3835
3836        rsp.val = atom_desc.rsp.val;
3837
3838        /* Dequeue */
3839        op = desc->req.op_addr;
3840
3841        /* Clearing status, it will be set based on response */
3842        op->status = 0;
3843        op->status |= rsp.input_err << RTE_BBDEV_DATA_ERROR;
3844        op->status |= rsp.dma_err << RTE_BBDEV_DRV_ERROR;
3845        op->status |= rsp.fcw_err << RTE_BBDEV_DRV_ERROR;
3846        if (op->status != 0)
3847                q_data->queue_stats.dequeue_err_count++;
3848
3849        op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3850        if (op->ldpc_dec.hard_output.length > 0 && !rsp.synd_ok)
3851                op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR;
3852        op->ldpc_dec.iter_count = (uint8_t) rsp.iter_cnt;
3853
3854        if (op->status & (1 << RTE_BBDEV_DRV_ERROR))
3855                acc100_check_ir(q->d);
3856
3857        /* Check if this is the last desc in batch (Atomic Queue) */
3858        if (desc->req.last_desc_in_batch) {
3859                (*aq_dequeued)++;
3860                desc->req.last_desc_in_batch = 0;
3861        }
3862
3863        desc->rsp.val = ACC100_DMA_DESC_TYPE;
3864        desc->rsp.add_info_0 = 0;
3865        desc->rsp.add_info_1 = 0;
3866
3867        *ref_op = op;
3868
3869        /* One CB (op) was successfully dequeued */
3870        return 1;
3871}
3872
3873/* Dequeue one decode operations from ACC100 device in TB mode. */
3874static inline int
3875dequeue_dec_one_op_tb(struct acc100_queue *q, struct rte_bbdev_dec_op **ref_op,
3876                uint16_t dequeued_cbs, uint32_t *aq_dequeued)
3877{
3878        union acc100_dma_desc *desc, *last_desc, atom_desc;
3879        union acc100_dma_rsp_desc rsp;
3880        struct rte_bbdev_dec_op *op;
3881        uint8_t cbs_in_tb = 1, cb_idx = 0;
3882
3883        desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3884                        & q->sw_ring_wrap_mask);
3885        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3886                        __ATOMIC_RELAXED);
3887
3888        /* Check fdone bit */
3889        if (!(atom_desc.rsp.val & ACC100_FDONE))
3890                return -1;
3891
3892        /* Dequeue */
3893        op = desc->req.op_addr;
3894
3895        /* Get number of CBs in dequeued TB */
3896        cbs_in_tb = desc->req.cbs_in_tb;
3897        /* Get last CB */
3898        last_desc = q->ring_addr + ((q->sw_ring_tail
3899                        + dequeued_cbs + cbs_in_tb - 1)
3900                        & q->sw_ring_wrap_mask);
3901        /* Check if last CB in TB is ready to dequeue (and thus
3902         * the whole TB) - checking sdone bit. If not return.
3903         */
3904        atom_desc.atom_hdr = __atomic_load_n((uint64_t *)last_desc,
3905                        __ATOMIC_RELAXED);
3906        if (!(atom_desc.rsp.val & ACC100_SDONE))
3907                return -1;
3908
3909        /* Clearing status, it will be set based on response */
3910        op->status = 0;
3911
3912        /* Read remaining CBs if exists */
3913        while (cb_idx < cbs_in_tb) {
3914                desc = q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3915                                & q->sw_ring_wrap_mask);
3916                atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
3917                                __ATOMIC_RELAXED);
3918                rsp.val = atom_desc.rsp.val;
3919                rte_bbdev_log_debug("Resp. desc %p: %x", desc,
3920                                rsp.val);
3921
3922                op->status |= ((rsp.input_err)
3923                                ? (1 << RTE_BBDEV_DATA_ERROR) : 0);
3924                op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3925                op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
3926
3927                /* CRC invalid if error exists */
3928                if (!op->status)
3929                        op->status |= rsp.crc_status << RTE_BBDEV_CRC_ERROR;
3930                op->turbo_dec.iter_count = RTE_MAX((uint8_t) rsp.iter_cnt,
3931                                op->turbo_dec.iter_count);
3932
3933                /* Check if this is the last desc in batch (Atomic Queue) */
3934                if (desc->req.last_desc_in_batch) {
3935                        (*aq_dequeued)++;
3936                        desc->req.last_desc_in_batch = 0;
3937                }
3938                desc->rsp.val = ACC100_DMA_DESC_TYPE;
3939                desc->rsp.add_info_0 = 0;
3940                desc->rsp.add_info_1 = 0;
3941                dequeued_cbs++;
3942                cb_idx++;
3943        }
3944
3945        *ref_op = op;
3946
3947        return cb_idx;
3948}
3949
3950/* Dequeue encode operations from ACC100 device. */
3951static uint16_t
3952acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
3953                struct rte_bbdev_enc_op **ops, uint16_t num)
3954{
3955        struct acc100_queue *q = q_data->queue_private;
3956        uint16_t dequeue_num;
3957        uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
3958        uint32_t aq_dequeued = 0;
3959        uint16_t i, dequeued_cbs = 0;
3960        struct rte_bbdev_enc_op *op;
3961        int ret;
3962
3963#ifdef RTE_LIBRTE_BBDEV_DEBUG
3964        if (unlikely(ops == NULL || q == NULL)) {
3965                rte_bbdev_log_debug("Unexpected undefined pointer");
3966                return 0;
3967        }
3968#endif
3969
3970        dequeue_num = (avail < num) ? avail : num;
3971
3972        for (i = 0; i < dequeue_num; ++i) {
3973                op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
3974                        & q->sw_ring_wrap_mask))->req.op_addr;
3975                if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
3976                        ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
3977                                        &aq_dequeued);
3978                else
3979                        ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
3980                                        &aq_dequeued);
3981
3982                if (ret < 0)
3983                        break;
3984                dequeued_cbs += ret;
3985        }
3986
3987        q->aq_dequeued += aq_dequeued;
3988        q->sw_ring_tail += dequeued_cbs;
3989
3990        /* Update enqueue stats */
3991        q_data->queue_stats.dequeued_count += i;
3992
3993        return i;
3994}
3995
3996/* Dequeue LDPC encode operations from ACC100 device. */
3997static uint16_t
3998acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
3999                struct rte_bbdev_enc_op **ops, uint16_t num)
4000{
4001        struct acc100_queue *q = q_data->queue_private;
4002        uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
4003        uint32_t aq_dequeued = 0;
4004        uint16_t dequeue_num, i, dequeued_cbs = 0, dequeued_descs = 0;
4005        int ret;
4006
4007#ifdef RTE_LIBRTE_BBDEV_DEBUG
4008        if (unlikely(ops == 0 && q == NULL))
4009                return 0;
4010#endif
4011
4012        dequeue_num = RTE_MIN(avail, num);
4013
4014        for (i = 0; i < dequeue_num; i++) {
4015                ret = dequeue_enc_one_op_cb(q, &ops[dequeued_cbs],
4016                                dequeued_descs, &aq_dequeued);
4017                if (ret < 0)
4018                        break;
4019                dequeued_cbs += ret;
4020                dequeued_descs++;
4021                if (dequeued_cbs >= num)
4022                        break;
4023        }
4024
4025        q->aq_dequeued += aq_dequeued;
4026        q->sw_ring_tail += dequeued_descs;
4027
4028        /* Update enqueue stats */
4029        q_data->queue_stats.dequeued_count += dequeued_cbs;
4030
4031        return dequeued_cbs;
4032}
4033
4034
4035/* Dequeue decode operations from ACC100 device. */
4036static uint16_t
4037acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
4038                struct rte_bbdev_dec_op **ops, uint16_t num)
4039{
4040        struct acc100_queue *q = q_data->queue_private;
4041        uint16_t dequeue_num;
4042        uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
4043        uint32_t aq_dequeued = 0;
4044        uint16_t i;
4045        uint16_t dequeued_cbs = 0;
4046        struct rte_bbdev_dec_op *op;
4047        int ret;
4048
4049#ifdef RTE_LIBRTE_BBDEV_DEBUG
4050        if (unlikely(ops == 0 && q == NULL))
4051                return 0;
4052#endif
4053
4054        dequeue_num = (avail < num) ? avail : num;
4055
4056        for (i = 0; i < dequeue_num; ++i) {
4057                op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
4058                        & q->sw_ring_wrap_mask))->req.op_addr;
4059                if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
4060                        ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
4061                                        &aq_dequeued);
4062                else
4063                        ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
4064                                        dequeued_cbs, &aq_dequeued);
4065
4066                if (ret < 0)
4067                        break;
4068                dequeued_cbs += ret;
4069        }
4070
4071        q->aq_dequeued += aq_dequeued;
4072        q->sw_ring_tail += dequeued_cbs;
4073
4074        /* Update enqueue stats */
4075        q_data->queue_stats.dequeued_count += i;
4076
4077        return i;
4078}
4079
4080/* Dequeue decode operations from ACC100 device. */
4081static uint16_t
4082acc100_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
4083                struct rte_bbdev_dec_op **ops, uint16_t num)
4084{
4085        struct acc100_queue *q = q_data->queue_private;
4086        uint16_t dequeue_num;
4087        uint32_t avail = q->sw_ring_head - q->sw_ring_tail;
4088        uint32_t aq_dequeued = 0;
4089        uint16_t i;
4090        uint16_t dequeued_cbs = 0;
4091        struct rte_bbdev_dec_op *op;
4092        int ret;
4093
4094#ifdef RTE_LIBRTE_BBDEV_DEBUG
4095        if (unlikely(ops == 0 && q == NULL))
4096                return 0;
4097#endif
4098
4099        dequeue_num = RTE_MIN(avail, num);
4100
4101        for (i = 0; i < dequeue_num; ++i) {
4102                op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
4103                        & q->sw_ring_wrap_mask))->req.op_addr;
4104                if (op->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
4105                        ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
4106                                        &aq_dequeued);
4107                else
4108                        ret = dequeue_ldpc_dec_one_op_cb(
4109                                        q_data, q, &ops[i], dequeued_cbs,
4110                                        &aq_dequeued);
4111
4112                if (ret < 0)
4113                        break;
4114                dequeued_cbs += ret;
4115        }
4116
4117        q->aq_dequeued += aq_dequeued;
4118        q->sw_ring_tail += dequeued_cbs;
4119
4120        /* Update enqueue stats */
4121        q_data->queue_stats.dequeued_count += i;
4122
4123        return i;
4124}
4125
4126/* Initialization Function */
4127static void
4128acc100_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
4129{
4130        struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
4131
4132        dev->dev_ops = &acc100_bbdev_ops;
4133        dev->enqueue_enc_ops = acc100_enqueue_enc;
4134        dev->enqueue_dec_ops = acc100_enqueue_dec;
4135        dev->dequeue_enc_ops = acc100_dequeue_enc;
4136        dev->dequeue_dec_ops = acc100_dequeue_dec;
4137        dev->enqueue_ldpc_enc_ops = acc100_enqueue_ldpc_enc;
4138        dev->enqueue_ldpc_dec_ops = acc100_enqueue_ldpc_dec;
4139        dev->dequeue_ldpc_enc_ops = acc100_dequeue_ldpc_enc;
4140        dev->dequeue_ldpc_dec_ops = acc100_dequeue_ldpc_dec;
4141
4142        ((struct acc100_device *) dev->data->dev_private)->pf_device =
4143                        !strcmp(drv->driver.name,
4144                                        RTE_STR(ACC100PF_DRIVER_NAME));
4145        ((struct acc100_device *) dev->data->dev_private)->mmio_base =
4146                        pci_dev->mem_resource[0].addr;
4147
4148        rte_bbdev_log_debug("Init device %s [%s] @ vaddr %p paddr %#"PRIx64"",
4149                        drv->driver.name, dev->data->name,
4150                        (void *)pci_dev->mem_resource[0].addr,
4151                        pci_dev->mem_resource[0].phys_addr);
4152}
4153
4154static int acc100_pci_probe(struct rte_pci_driver *pci_drv,
4155        struct rte_pci_device *pci_dev)
4156{
4157        struct rte_bbdev *bbdev = NULL;
4158        char dev_name[RTE_BBDEV_NAME_MAX_LEN];
4159
4160        if (pci_dev == NULL) {
4161                rte_bbdev_log(ERR, "NULL PCI device");
4162                return -EINVAL;
4163        }
4164
4165        rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name));
4166
4167        /* Allocate memory to be used privately by drivers */
4168        bbdev = rte_bbdev_allocate(pci_dev->device.name);
4169        if (bbdev == NULL)
4170                return -ENODEV;
4171
4172        /* allocate device private memory */
4173        bbdev->data->dev_private = rte_zmalloc_socket(dev_name,
4174                        sizeof(struct acc100_device), RTE_CACHE_LINE_SIZE,
4175                        pci_dev->device.numa_node);
4176
4177        if (bbdev->data->dev_private == NULL) {
4178                rte_bbdev_log(CRIT,
4179                                "Allocate of %zu bytes for device \"%s\" failed",
4180                                sizeof(struct acc100_device), dev_name);
4181                                rte_bbdev_release(bbdev);
4182                        return -ENOMEM;
4183        }
4184
4185        /* Fill HW specific part of device structure */
4186        bbdev->device = &pci_dev->device;
4187        bbdev->intr_handle = pci_dev->intr_handle;
4188        bbdev->data->socket_id = pci_dev->device.numa_node;
4189
4190        /* Invoke ACC100 device initialization function */
4191        acc100_bbdev_init(bbdev, pci_drv);
4192
4193        rte_bbdev_log_debug("Initialised bbdev %s (id = %u)",
4194                        dev_name, bbdev->data->dev_id);
4195        return 0;
4196}
4197
4198static int acc100_pci_remove(struct rte_pci_device *pci_dev)
4199{
4200        struct rte_bbdev *bbdev;
4201        int ret;
4202        uint8_t dev_id;
4203
4204        if (pci_dev == NULL)
4205                return -EINVAL;
4206
4207        /* Find device */
4208        bbdev = rte_bbdev_get_named_dev(pci_dev->device.name);
4209        if (bbdev == NULL) {
4210                rte_bbdev_log(CRIT,
4211                                "Couldn't find HW dev \"%s\" to uninitialise it",
4212                                pci_dev->device.name);
4213                return -ENODEV;
4214        }
4215        dev_id = bbdev->data->dev_id;
4216
4217        /* free device private memory before close */
4218        rte_free(bbdev->data->dev_private);
4219
4220        /* Close device */
4221        ret = rte_bbdev_close(dev_id);
4222        if (ret < 0)
4223                rte_bbdev_log(ERR,
4224                                "Device %i failed to close during uninit: %i",
4225                                dev_id, ret);
4226
4227        /* release bbdev from library */
4228        rte_bbdev_release(bbdev);
4229
4230        rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id);
4231
4232        return 0;
4233}
4234
4235static struct rte_pci_driver acc100_pci_pf_driver = {
4236                .probe = acc100_pci_probe,
4237                .remove = acc100_pci_remove,
4238                .id_table = pci_id_acc100_pf_map,
4239                .drv_flags = RTE_PCI_DRV_NEED_MAPPING
4240};
4241
4242static struct rte_pci_driver acc100_pci_vf_driver = {
4243                .probe = acc100_pci_probe,
4244                .remove = acc100_pci_remove,
4245                .id_table = pci_id_acc100_vf_map,
4246                .drv_flags = RTE_PCI_DRV_NEED_MAPPING
4247};
4248
4249RTE_PMD_REGISTER_PCI(ACC100PF_DRIVER_NAME, acc100_pci_pf_driver);
4250RTE_PMD_REGISTER_PCI_TABLE(ACC100PF_DRIVER_NAME, pci_id_acc100_pf_map);
4251RTE_PMD_REGISTER_PCI(ACC100VF_DRIVER_NAME, acc100_pci_vf_driver);
4252RTE_PMD_REGISTER_PCI_TABLE(ACC100VF_DRIVER_NAME, pci_id_acc100_vf_map);
4253
4254/*
4255 * Workaround implementation to fix the power on status of some 5GUL engines
4256 * This requires DMA permission if ported outside DPDK
4257 * It consists in resolving the state of these engines by running a
4258 * dummy operation and resetting the engines to ensure state are reliably
4259 * defined.
4260 */
4261static void
4262poweron_cleanup(struct rte_bbdev *bbdev, struct acc100_device *d,
4263                struct rte_acc100_conf *conf)
4264{
4265        int i, template_idx, qg_idx;
4266        uint32_t address, status, value;
4267        printf("Need to clear power-on 5GUL status in internal memory\n");
4268        /* Reset LDPC Cores */
4269        for (i = 0; i < ACC100_ENGINES_MAX; i++)
4270                acc100_reg_write(d, HWPfFecUl5gCntrlReg +
4271                                ACC100_ENGINE_OFFSET * i, ACC100_RESET_HI);
4272        usleep(ACC100_LONG_WAIT);
4273        for (i = 0; i < ACC100_ENGINES_MAX; i++)
4274                acc100_reg_write(d, HWPfFecUl5gCntrlReg +
4275                                ACC100_ENGINE_OFFSET * i, ACC100_RESET_LO);
4276        usleep(ACC100_LONG_WAIT);
4277        /* Prepare dummy workload */
4278        alloc_2x64mb_sw_rings_mem(bbdev, d, 0);
4279        /* Set base addresses */
4280        uint32_t phys_high = (uint32_t)(d->sw_rings_iova >> 32);
4281        uint32_t phys_low  = (uint32_t)(d->sw_rings_iova &
4282                        ~(ACC100_SIZE_64MBYTE-1));
4283        acc100_reg_write(d, HWPfDmaFec5GulDescBaseHiRegVf, phys_high);
4284        acc100_reg_write(d, HWPfDmaFec5GulDescBaseLoRegVf, phys_low);
4285
4286        /* Descriptor for a dummy 5GUL code block processing*/
4287        union acc100_dma_desc *desc = NULL;
4288        desc = d->sw_rings;
4289        desc->req.data_ptrs[0].address = d->sw_rings_iova +
4290                        ACC100_DESC_FCW_OFFSET;
4291        desc->req.data_ptrs[0].blen = ACC100_FCW_LD_BLEN;
4292        desc->req.data_ptrs[0].blkid = ACC100_DMA_BLKID_FCW;
4293        desc->req.data_ptrs[0].last = 0;
4294        desc->req.data_ptrs[0].dma_ext = 0;
4295        desc->req.data_ptrs[1].address = d->sw_rings_iova + 512;
4296        desc->req.data_ptrs[1].blkid = ACC100_DMA_BLKID_IN;
4297        desc->req.data_ptrs[1].last = 1;
4298        desc->req.data_ptrs[1].dma_ext = 0;
4299        desc->req.data_ptrs[1].blen = 44;
4300        desc->req.data_ptrs[2].address = d->sw_rings_iova + 1024;
4301        desc->req.data_ptrs[2].blkid = ACC100_DMA_BLKID_OUT_ENC;
4302        desc->req.data_ptrs[2].last = 1;
4303        desc->req.data_ptrs[2].dma_ext = 0;
4304        desc->req.data_ptrs[2].blen = 5;
4305        /* Dummy FCW */
4306        desc->req.fcw_ld.FCWversion = ACC100_FCW_VER;
4307        desc->req.fcw_ld.qm = 1;
4308        desc->req.fcw_ld.nfiller = 30;
4309        desc->req.fcw_ld.BG = 2 - 1;
4310        desc->req.fcw_ld.Zc = 7;
4311        desc->req.fcw_ld.ncb = 350;
4312        desc->req.fcw_ld.rm_e = 4;
4313        desc->req.fcw_ld.itmax = 10;
4314        desc->req.fcw_ld.gain_i = 1;
4315        desc->req.fcw_ld.gain_h = 1;
4316
4317        int engines_to_restart[ACC100_SIG_UL_5G_LAST + 1] = {0};
4318        int num_failed_engine = 0;
4319        /* Detect engines in undefined state */
4320        for (template_idx = ACC100_SIG_UL_5G;
4321                        template_idx <= ACC100_SIG_UL_5G_LAST;
4322                        template_idx++) {
4323                /* Check engine power-on status */
4324                address = HwPfFecUl5gIbDebugReg +
4325                                ACC100_ENGINE_OFFSET * template_idx;
4326                status = (acc100_reg_read(d, address) >> 4) & 0xF;
4327                if (status == 0) {
4328                        engines_to_restart[num_failed_engine] = template_idx;
4329                        num_failed_engine++;
4330                }
4331        }
4332
4333        int numQqsAcc = conf->q_ul_5g.num_qgroups;
4334        int numQgs = conf->q_ul_5g.num_qgroups;
4335        value = 0;
4336        for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
4337                value |= (1 << qg_idx);
4338        /* Force each engine which is in unspecified state */
4339        for (i = 0; i < num_failed_engine; i++) {
4340                int failed_engine = engines_to_restart[i];
4341                printf("Force engine %d\n", failed_engine);
4342                for (template_idx = ACC100_SIG_UL_5G;
4343                                template_idx <= ACC100_SIG_UL_5G_LAST;
4344                                template_idx++) {
4345                        address = HWPfQmgrGrpTmplateReg4Indx
4346                                        + ACC100_BYTES_IN_WORD * template_idx;
4347                        if (template_idx == failed_engine)
4348                                acc100_reg_write(d, address, value);
4349                        else
4350                                acc100_reg_write(d, address, 0);
4351                }
4352                /* Reset descriptor header */
4353                desc->req.word0 = ACC100_DMA_DESC_TYPE;
4354                desc->req.word1 = 0;
4355                desc->req.word2 = 0;
4356                desc->req.word3 = 0;
4357                desc->req.numCBs = 1;
4358                desc->req.m2dlen = 2;
4359                desc->req.d2mlen = 1;
4360                /* Enqueue the code block for processing */
4361                union acc100_enqueue_reg_fmt enq_req;
4362                enq_req.val = 0;
4363                enq_req.addr_offset = ACC100_DESC_OFFSET;
4364                enq_req.num_elem = 1;
4365                enq_req.req_elem_addr = 0;
4366                rte_wmb();
4367                acc100_reg_write(d, HWPfQmgrIngressAq + 0x100, enq_req.val);
4368                usleep(ACC100_LONG_WAIT * 100);
4369                if (desc->req.word0 != 2)
4370                        printf("DMA Response %#"PRIx32"\n", desc->req.word0);
4371        }
4372
4373        /* Reset LDPC Cores */
4374        for (i = 0; i < ACC100_ENGINES_MAX; i++)
4375                acc100_reg_write(d, HWPfFecUl5gCntrlReg +
4376                                ACC100_ENGINE_OFFSET * i,
4377                                ACC100_RESET_HI);
4378        usleep(ACC100_LONG_WAIT);
4379        for (i = 0; i < ACC100_ENGINES_MAX; i++)
4380                acc100_reg_write(d, HWPfFecUl5gCntrlReg +
4381                                ACC100_ENGINE_OFFSET * i,
4382                                ACC100_RESET_LO);
4383        usleep(ACC100_LONG_WAIT);
4384        acc100_reg_write(d, HWPfHi5GHardResetReg, ACC100_RESET_HARD);
4385        usleep(ACC100_LONG_WAIT);
4386        int numEngines = 0;
4387        /* Check engine power-on status again */
4388        for (template_idx = ACC100_SIG_UL_5G;
4389                        template_idx <= ACC100_SIG_UL_5G_LAST;
4390                        template_idx++) {
4391                address = HwPfFecUl5gIbDebugReg +
4392                                ACC100_ENGINE_OFFSET * template_idx;
4393                status = (acc100_reg_read(d, address) >> 4) & 0xF;
4394                address = HWPfQmgrGrpTmplateReg4Indx
4395                                + ACC100_BYTES_IN_WORD * template_idx;
4396                if (status == 1) {
4397                        acc100_reg_write(d, address, value);
4398                        numEngines++;
4399                } else
4400                        acc100_reg_write(d, address, 0);
4401        }
4402        printf("Number of 5GUL engines %d\n", numEngines);
4403
4404        rte_free(d->sw_rings_base);
4405        usleep(ACC100_LONG_WAIT);
4406}
4407
4408/* Initial configuration of a ACC100 device prior to running configure() */
4409int
4410rte_acc100_configure(const char *dev_name, struct rte_acc100_conf *conf)
4411{
4412        rte_bbdev_log(INFO, "rte_acc100_configure");
4413        uint32_t value, address, status;
4414        int qg_idx, template_idx, vf_idx, acc, i;
4415        struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name);
4416
4417        /* Compile time checks */
4418        RTE_BUILD_BUG_ON(sizeof(struct acc100_dma_req_desc) != 256);
4419        RTE_BUILD_BUG_ON(sizeof(union acc100_dma_desc) != 256);
4420        RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_td) != 24);
4421        RTE_BUILD_BUG_ON(sizeof(struct acc100_fcw_te) != 32);
4422
4423        if (bbdev == NULL) {
4424                rte_bbdev_log(ERR,
4425                "Invalid dev_name (%s), or device is not yet initialised",
4426                dev_name);
4427                return -ENODEV;
4428        }
4429        struct acc100_device *d = bbdev->data->dev_private;
4430
4431        /* Store configuration */
4432        rte_memcpy(&d->acc100_conf, conf, sizeof(d->acc100_conf));
4433
4434        /* PCIe Bridge configuration */
4435        acc100_reg_write(d, HwPfPcieGpexBridgeControl, ACC100_CFG_PCI_BRIDGE);
4436        for (i = 1; i < ACC100_GPEX_AXIMAP_NUM; i++)
4437                acc100_reg_write(d,
4438                                HwPfPcieGpexAxiAddrMappingWindowPexBaseHigh
4439                                + i * 16, 0);
4440
4441        /* Prevent blocking AXI read on BRESP for AXI Write */
4442        address = HwPfPcieGpexAxiPioControl;
4443        value = ACC100_CFG_PCI_AXI;
4444        acc100_reg_write(d, address, value);
4445
4446        /* 5GDL PLL phase shift */
4447        acc100_reg_write(d, HWPfChaDl5gPllPhshft0, 0x1);
4448
4449        /* Explicitly releasing AXI as this may be stopped after PF FLR/BME */
4450        address = HWPfDmaAxiControl;
4451        value = 1;
4452        acc100_reg_write(d, address, value);
4453
4454        /* DDR Configuration */
4455        address = HWPfDdrBcTim6;
4456        value = acc100_reg_read(d, address);
4457        value &= 0xFFFFFFFB; /* Bit 2 */
4458#ifdef ACC100_DDR_ECC_ENABLE
4459        value |= 0x4;
4460#endif
4461        acc100_reg_write(d, address, value);
4462        address = HWPfDdrPhyDqsCountNum;
4463#ifdef ACC100_DDR_ECC_ENABLE
4464        value = 9;
4465#else
4466        value = 8;
4467#endif
4468        acc100_reg_write(d, address, value);
4469
4470        /* Set default descriptor signature */
4471        address = HWPfDmaDescriptorSignatuture;
4472        value = 0;
4473        acc100_reg_write(d, address, value);
4474
4475        /* Enable the Error Detection in DMA */
4476        value = ACC100_CFG_DMA_ERROR;
4477        address = HWPfDmaErrorDetectionEn;
4478        acc100_reg_write(d, address, value);
4479
4480        /* AXI Cache configuration */
4481        value = ACC100_CFG_AXI_CACHE;
4482        address = HWPfDmaAxcacheReg;
4483        acc100_reg_write(d, address, value);
4484
4485        /* Default DMA Configuration (Qmgr Enabled) */
4486        address = HWPfDmaConfig0Reg;
4487        value = 0;
4488        acc100_reg_write(d, address, value);
4489        address = HWPfDmaQmanen;
4490        value = 0;
4491        acc100_reg_write(d, address, value);
4492
4493        /* Default RLIM/ALEN configuration */
4494        address = HWPfDmaConfig1Reg;
4495        value = (1 << 31) + (23 << 8) + (1 << 6) + 7;
4496        acc100_reg_write(d, address, value);
4497
4498        /* Configure DMA Qmanager addresses */
4499        address = HWPfDmaQmgrAddrReg;
4500        value = HWPfQmgrEgressQueuesTemplate;
4501        acc100_reg_write(d, address, value);
4502
4503        /* ===== Qmgr Configuration ===== */
4504        /* Configuration of the AQueue Depth QMGR_GRP_0_DEPTH_LOG2 for UL */
4505        int totalQgs = conf->q_ul_4g.num_qgroups +
4506                        conf->q_ul_5g.num_qgroups +
4507                        conf->q_dl_4g.num_qgroups +
4508                        conf->q_dl_5g.num_qgroups;
4509        for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
4510                address = HWPfQmgrDepthLog2Grp +
4511                ACC100_BYTES_IN_WORD * qg_idx;
4512                value = aqDepth(qg_idx, conf);
4513                acc100_reg_write(d, address, value);
4514                address = HWPfQmgrTholdGrp +
4515                ACC100_BYTES_IN_WORD * qg_idx;
4516                value = (1 << 16) + (1 << (aqDepth(qg_idx, conf) - 1));
4517                acc100_reg_write(d, address, value);
4518        }
4519
4520        /* Template Priority in incremental order */
4521        for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
4522                        template_idx++) {
4523                address = HWPfQmgrGrpTmplateReg0Indx +
4524                ACC100_BYTES_IN_WORD * (template_idx % 8);
4525                value = ACC100_TMPL_PRI_0;
4526                acc100_reg_write(d, address, value);
4527                address = HWPfQmgrGrpTmplateReg1Indx +
4528                ACC100_BYTES_IN_WORD * (template_idx % 8);
4529                value = ACC100_TMPL_PRI_1;
4530                acc100_reg_write(d, address, value);
4531                address = HWPfQmgrGrpTmplateReg2indx +
4532                ACC100_BYTES_IN_WORD * (template_idx % 8);
4533                value = ACC100_TMPL_PRI_2;
4534                acc100_reg_write(d, address, value);
4535                address = HWPfQmgrGrpTmplateReg3Indx +
4536                ACC100_BYTES_IN_WORD * (template_idx % 8);
4537                value = ACC100_TMPL_PRI_3;
4538                acc100_reg_write(d, address, value);
4539        }
4540
4541        address = HWPfQmgrGrpPriority;
4542        value = ACC100_CFG_QMGR_HI_P;
4543        acc100_reg_write(d, address, value);
4544
4545        /* Template Configuration */
4546        for (template_idx = 0; template_idx < ACC100_NUM_TMPL;
4547                        template_idx++) {
4548                value = 0;
4549                address = HWPfQmgrGrpTmplateReg4Indx
4550                                + ACC100_BYTES_IN_WORD * template_idx;
4551                acc100_reg_write(d, address, value);
4552        }
4553        /* 4GUL */
4554        int numQgs = conf->q_ul_4g.num_qgroups;
4555        int numQqsAcc = 0;
4556        value = 0;
4557        for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
4558                value |= (1 << qg_idx);
4559        for (template_idx = ACC100_SIG_UL_4G;
4560                        template_idx <= ACC100_SIG_UL_4G_LAST;
4561                        template_idx++) {
4562                address = HWPfQmgrGrpTmplateReg4Indx
4563                                + ACC100_BYTES_IN_WORD * template_idx;
4564                acc100_reg_write(d, address, value);
4565        }
4566        /* 5GUL */
4567        numQqsAcc += numQgs;
4568        numQgs  = conf->q_ul_5g.num_qgroups;
4569        value = 0;
4570        int numEngines = 0;
4571        for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
4572                value |= (1 << qg_idx);
4573        for (template_idx = ACC100_SIG_UL_5G;
4574                        template_idx <= ACC100_SIG_UL_5G_LAST;
4575                        template_idx++) {
4576                /* Check engine power-on status */
4577                address = HwPfFecUl5gIbDebugReg +
4578                                ACC100_ENGINE_OFFSET * template_idx;
4579                status = (acc100_reg_read(d, address) >> 4) & 0xF;
4580                address = HWPfQmgrGrpTmplateReg4Indx
4581                                + ACC100_BYTES_IN_WORD * template_idx;
4582                if (status == 1) {
4583                        acc100_reg_write(d, address, value);
4584                        numEngines++;
4585                } else
4586                        acc100_reg_write(d, address, 0);
4587#if RTE_ACC100_SINGLE_FEC == 1
4588                value = 0;
4589#endif
4590        }
4591        printf("Number of 5GUL engines %d\n", numEngines);
4592        /* 4GDL */
4593        numQqsAcc += numQgs;
4594        numQgs  = conf->q_dl_4g.num_qgroups;
4595        value = 0;
4596        for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
4597                value |= (1 << qg_idx);
4598        for (template_idx = ACC100_SIG_DL_4G;
4599                        template_idx <= ACC100_SIG_DL_4G_LAST;
4600                        template_idx++) {
4601                address = HWPfQmgrGrpTmplateReg4Indx
4602                                + ACC100_BYTES_IN_WORD * template_idx;
4603                acc100_reg_write(d, address, value);
4604#if RTE_ACC100_SINGLE_FEC == 1
4605                        value = 0;
4606#endif
4607        }
4608        /* 5GDL */
4609        numQqsAcc += numQgs;
4610        numQgs  = conf->q_dl_5g.num_qgroups;
4611        value = 0;
4612        for (qg_idx = numQqsAcc; qg_idx < (numQgs + numQqsAcc); qg_idx++)
4613                value |= (1 << qg_idx);
4614        for (template_idx = ACC100_SIG_DL_5G;
4615                        template_idx <= ACC100_SIG_DL_5G_LAST;
4616                        template_idx++) {
4617                address = HWPfQmgrGrpTmplateReg4Indx
4618                                + ACC100_BYTES_IN_WORD * template_idx;
4619                acc100_reg_write(d, address, value);
4620#if RTE_ACC100_SINGLE_FEC == 1
4621                value = 0;
4622#endif
4623        }
4624
4625        /* Queue Group Function mapping */
4626        int qman_func_id[5] = {0, 2, 1, 3, 4};
4627        address = HWPfQmgrGrpFunction0;
4628        value = 0;
4629        for (qg_idx = 0; qg_idx < 8; qg_idx++) {
4630                acc = accFromQgid(qg_idx, conf);
4631                value |= qman_func_id[acc]<<(qg_idx * 4);
4632        }
4633        acc100_reg_write(d, address, value);
4634
4635        /* Configuration of the Arbitration QGroup depth to 1 */
4636        for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
4637                address = HWPfQmgrArbQDepthGrp +
4638                ACC100_BYTES_IN_WORD * qg_idx;
4639                value = 0;
4640                acc100_reg_write(d, address, value);
4641        }
4642
4643        /* Enabling AQueues through the Queue hierarchy*/
4644        for (vf_idx = 0; vf_idx < ACC100_NUM_VFS; vf_idx++) {
4645                for (qg_idx = 0; qg_idx < ACC100_NUM_QGRPS; qg_idx++) {
4646                        value = 0;
4647                        if (vf_idx < conf->num_vf_bundles &&
4648                                        qg_idx < totalQgs)
4649                                value = (1 << aqNum(qg_idx, conf)) - 1;
4650                        address = HWPfQmgrAqEnableVf
4651                                        + vf_idx * ACC100_BYTES_IN_WORD;
4652                        value += (qg_idx << 16);
4653                        acc100_reg_write(d, address, value);
4654                }
4655        }
4656
4657        /* This pointer to ARAM (256kB) is shifted by 2 (4B per register) */
4658        uint32_t aram_address = 0;
4659        for (qg_idx = 0; qg_idx < totalQgs; qg_idx++) {
4660                for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
4661                        address = HWPfQmgrVfBaseAddr + vf_idx
4662                                        * ACC100_BYTES_IN_WORD + qg_idx
4663                                        * ACC100_BYTES_IN_WORD * 64;
4664                        value = aram_address;
4665                        acc100_reg_write(d, address, value);
4666                        /* Offset ARAM Address for next memory bank
4667                         * - increment of 4B
4668                         */
4669                        aram_address += aqNum(qg_idx, conf) *
4670                                        (1 << aqDepth(qg_idx, conf));
4671                }
4672        }
4673
4674        if (aram_address > ACC100_WORDS_IN_ARAM_SIZE) {
4675                rte_bbdev_log(ERR, "ARAM Configuration not fitting %d %d\n",
4676                                aram_address, ACC100_WORDS_IN_ARAM_SIZE);
4677                return -EINVAL;
4678        }
4679
4680        /* ==== HI Configuration ==== */
4681
4682        /* Prevent Block on Transmit Error */
4683        address = HWPfHiBlockTransmitOnErrorEn;
4684        value = 0;
4685        acc100_reg_write(d, address, value);
4686        /* Prevents to drop MSI */
4687        address = HWPfHiMsiDropEnableReg;
4688        value = 0;
4689        acc100_reg_write(d, address, value);
4690        /* Set the PF Mode register */
4691        address = HWPfHiPfMode;
4692        value = (conf->pf_mode_en) ? ACC100_PF_VAL : 0;
4693        acc100_reg_write(d, address, value);
4694        /* Enable Error Detection in HW */
4695        address = HWPfDmaErrorDetectionEn;
4696        value = 0x3D7;
4697        acc100_reg_write(d, address, value);
4698
4699        /* QoS overflow init */
4700        value = 1;
4701        address = HWPfQosmonAEvalOverflow0;
4702        acc100_reg_write(d, address, value);
4703        address = HWPfQosmonBEvalOverflow0;
4704        acc100_reg_write(d, address, value);
4705
4706        /* HARQ DDR Configuration */
4707        unsigned int ddrSizeInMb = 512; /* Fixed to 512 MB per VF for now */
4708        for (vf_idx = 0; vf_idx < conf->num_vf_bundles; vf_idx++) {
4709                address = HWPfDmaVfDdrBaseRw + vf_idx
4710                                * 0x10;
4711                value = ((vf_idx * (ddrSizeInMb / 64)) << 16) +
4712                                (ddrSizeInMb - 1);
4713                acc100_reg_write(d, address, value);
4714        }
4715        usleep(ACC100_LONG_WAIT);
4716
4717        /* Workaround in case some 5GUL engines are in an unexpected state */
4718        if (numEngines < (ACC100_SIG_UL_5G_LAST + 1))
4719                poweron_cleanup(bbdev, d, conf);
4720
4721        rte_bbdev_log_debug("PF Tip configuration complete for %s", dev_name);
4722        return 0;
4723}
4724