dpdk/drivers/common/cnxk/roc_npa.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(C) 2021 Marvell.
   3 */
   4
   5#ifndef _ROC_NPA_H_
   6#define _ROC_NPA_H_
   7
   8#define ROC_AURA_ID_MASK       (BIT_ULL(16) - 1)
   9#define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
  10
  11#define ROC_NPA_MAX_BLOCK_SZ               (128 * 1024)
  12#define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
  13#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15
  14
  15/* This value controls how much of the present average resource level is used to
  16 * calculate the new resource level.
  17 */
  18#define ROC_NPA_AVG_CONT 0xE0
  19
  20/* 16 CASP instructions can be outstanding in CN9k, but we use only 15
  21 * outstanding CASPs as we run out of registers.
  22 */
  23#define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
  24
  25/*
  26 * Generate 64bit handle to have optimized alloc and free aura operation.
  27 * 0 - ROC_AURA_ID_MASK for storing the aura_id.
  28 * [ROC_AURA_ID_MASK+1, (2^64 - 1)] for storing the lf base address.
  29 * This scheme is valid when OS can give ROC_AURA_ID_MASK
  30 * aligned address for lf base address.
  31 */
  32static inline uint64_t
  33roc_npa_aura_handle_gen(uint32_t aura_id, uintptr_t addr)
  34{
  35        uint64_t val;
  36
  37        val = aura_id & ROC_AURA_ID_MASK;
  38        return (uint64_t)addr | val;
  39}
  40
  41static inline uint64_t
  42roc_npa_aura_handle_to_aura(uint64_t aura_handle)
  43{
  44        return aura_handle & ROC_AURA_ID_MASK;
  45}
  46
  47static inline uintptr_t
  48roc_npa_aura_handle_to_base(uint64_t aura_handle)
  49{
  50        return (uintptr_t)(aura_handle & ~ROC_AURA_ID_MASK);
  51}
  52
  53static inline uint64_t
  54roc_npa_aura_op_alloc(uint64_t aura_handle, const int drop)
  55{
  56        uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
  57        int64_t *addr;
  58
  59        if (drop)
  60                wdata |= BIT_ULL(63); /* DROP */
  61
  62        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
  63                           NPA_LF_AURA_OP_ALLOCX(0));
  64        return roc_atomic64_add_nosync(wdata, addr);
  65}
  66
  67static inline void
  68roc_npa_aura_op_free(uint64_t aura_handle, const int fabs, uint64_t iova)
  69{
  70        uint64_t reg = roc_npa_aura_handle_to_aura(aura_handle);
  71        const uint64_t addr =
  72                roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0;
  73        if (fabs)
  74                reg |= BIT_ULL(63); /* FABS */
  75
  76        roc_store_pair(iova, reg, addr);
  77}
  78
  79static inline uint64_t
  80roc_npa_aura_op_cnt_get(uint64_t aura_handle)
  81{
  82        uint64_t wdata;
  83        int64_t *addr;
  84        uint64_t reg;
  85
  86        wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
  87        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
  88                           NPA_LF_AURA_OP_CNT);
  89        reg = roc_atomic64_add_nosync(wdata, addr);
  90
  91        if (reg & BIT_ULL(42) /* OP_ERR */)
  92                return 0;
  93        else
  94                return reg & 0xFFFFFFFFF;
  95}
  96
  97static inline void
  98roc_npa_aura_op_cnt_set(uint64_t aura_handle, const int sign, uint64_t count)
  99{
 100        uint64_t reg = count & (BIT_ULL(36) - 1);
 101
 102        if (sign)
 103                reg |= BIT_ULL(43); /* CNT_ADD */
 104
 105        reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
 106
 107        plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
 108                                 NPA_LF_AURA_OP_CNT);
 109}
 110
 111static inline uint64_t
 112roc_npa_aura_op_limit_get(uint64_t aura_handle)
 113{
 114        uint64_t wdata;
 115        int64_t *addr;
 116        uint64_t reg;
 117
 118        wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
 119        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
 120                           NPA_LF_AURA_OP_LIMIT);
 121        reg = roc_atomic64_add_nosync(wdata, addr);
 122
 123        if (reg & BIT_ULL(42) /* OP_ERR */)
 124                return 0;
 125        else
 126                return reg & ROC_AURA_OP_LIMIT_MASK;
 127}
 128
 129static inline void
 130roc_npa_aura_op_limit_set(uint64_t aura_handle, uint64_t limit)
 131{
 132        uint64_t reg = limit & ROC_AURA_OP_LIMIT_MASK;
 133
 134        reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
 135
 136        plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
 137                                 NPA_LF_AURA_OP_LIMIT);
 138}
 139
 140static inline uint64_t
 141roc_npa_aura_op_available(uint64_t aura_handle)
 142{
 143        uint64_t wdata;
 144        uint64_t reg;
 145        int64_t *addr;
 146
 147        wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
 148        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
 149                           NPA_LF_POOL_OP_AVAILABLE);
 150        reg = roc_atomic64_add_nosync(wdata, addr);
 151
 152        if (reg & BIT_ULL(42) /* OP_ERR */)
 153                return 0;
 154        else
 155                return reg & 0xFFFFFFFFF;
 156}
 157
 158static inline uint64_t
 159roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
 160{
 161        union {
 162                uint64_t u;
 163                struct npa_aura_op_wdata_s s;
 164        } op_wdata;
 165        int64_t *addr;
 166        uint64_t reg;
 167
 168        op_wdata.u = 0;
 169        op_wdata.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
 170        if (drop)
 171                op_wdata.s.drop |= BIT_ULL(63); /* DROP */
 172
 173        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
 174                           NPA_LF_POOL_OP_PC);
 175
 176        reg = roc_atomic64_add_nosync(op_wdata.u, addr);
 177        /*
 178         * NPA_LF_POOL_OP_PC Read Data
 179         *
 180         * 63       49 48    48 47     0
 181         * -----------------------------
 182         * | Reserved | OP_ERR | OP_PC |
 183         * -----------------------------
 184         */
 185
 186        if (reg & BIT_ULL(48) /* OP_ERR */)
 187                return 0;
 188        else
 189                return reg & 0xFFFFFFFFFFFF;
 190}
 191
 192static inline int
 193roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
 194                               unsigned int num, const int dis_wait,
 195                               const int drop)
 196{
 197        unsigned int i;
 198        int64_t *addr;
 199        uint64_t res;
 200        union {
 201                uint64_t u;
 202                struct npa_batch_alloc_compare_s compare_s;
 203        } cmp;
 204
 205        if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
 206                return -1;
 207
 208        /* Zero first word of every cache line */
 209        for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
 210                buf[i] = 0;
 211
 212        addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
 213                           NPA_LF_AURA_BATCH_ALLOC);
 214        cmp.u = 0;
 215        cmp.compare_s.aura = roc_npa_aura_handle_to_aura(aura_handle);
 216        cmp.compare_s.drop = drop;
 217        cmp.compare_s.stype = ALLOC_STYPE_STF;
 218        cmp.compare_s.dis_wait = dis_wait;
 219        cmp.compare_s.count = num;
 220
 221        res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
 222        if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
 223                return -1;
 224
 225        return 0;
 226}
 227
 228static inline void
 229roc_npa_batch_alloc_wait(uint64_t *cache_line)
 230{
 231        /* Batch alloc status code is updated in bits [5:6] of the first word
 232         * of the 128 byte cache line.
 233         */
 234        while (((__atomic_load_n(cache_line, __ATOMIC_RELAXED) >> 5) & 0x3) ==
 235               ALLOC_CCODE_INVAL)
 236                ;
 237}
 238
 239static inline unsigned int
 240roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num)
 241{
 242        unsigned int count, i;
 243
 244        if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
 245                return 0;
 246
 247        count = 0;
 248        /* Check each ROC cache line one by one */
 249        for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
 250                struct npa_batch_alloc_status_s *status;
 251
 252                status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
 253
 254                roc_npa_batch_alloc_wait(&aligned_buf[i]);
 255                count += status->count;
 256        }
 257
 258        return count;
 259}
 260
 261static inline unsigned int
 262roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
 263                                 unsigned int num)
 264{
 265        unsigned int count, i;
 266
 267        if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
 268                return 0;
 269
 270        count = 0;
 271        /* Check each ROC cache line one by one */
 272        for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
 273                struct npa_batch_alloc_status_s *status;
 274                int line_count;
 275
 276                status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
 277
 278                roc_npa_batch_alloc_wait(&aligned_buf[i]);
 279
 280                line_count = status->count;
 281
 282                /* Clear the status from the cache line */
 283                status->ccode = 0;
 284                status->count = 0;
 285
 286                /* 'Compress' the allocated buffers as there can
 287                 * be 'holes' at the end of the 128 byte cache
 288                 * lines.
 289                 */
 290                memmove(&buf[count], &aligned_buf[i],
 291                        line_count * sizeof(uint64_t));
 292
 293                count += line_count;
 294        }
 295
 296        return count;
 297}
 298
 299static inline void
 300roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
 301                          unsigned int num, const int fabs)
 302{
 303        unsigned int i;
 304
 305        for (i = 0; i < num; i++) {
 306                const uint64_t inbuf = buf[i];
 307
 308                roc_npa_aura_op_free(aura_handle, fabs, inbuf);
 309        }
 310}
 311
 312static inline unsigned int
 313roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
 314                            uint64_t *aligned_buf, unsigned int num,
 315                            const int dis_wait, const int drop,
 316                            const int partial)
 317{
 318        unsigned int count, chunk, num_alloc;
 319
 320        /* The buffer should be 128 byte cache line aligned */
 321        if (((uint64_t)aligned_buf & (ROC_ALIGN - 1)) != 0)
 322                return 0;
 323
 324        count = 0;
 325        while (num) {
 326                chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
 327                                      ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
 328                                      num;
 329
 330                if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
 331                                                   chunk, dis_wait, drop))
 332                        break;
 333
 334                num_alloc = roc_npa_aura_batch_alloc_extract(buf, aligned_buf,
 335                                                             chunk);
 336
 337                count += num_alloc;
 338                buf += num_alloc;
 339                num -= num_alloc;
 340
 341                if (num_alloc != chunk)
 342                        break;
 343        }
 344
 345        /* If the requested number of pointers was not allocated and if partial
 346         * alloc is not desired, then free allocated pointers.
 347         */
 348        if (unlikely(num != 0 && !partial)) {
 349                roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
 350                count = 0;
 351        }
 352
 353        return count;
 354}
 355
 356static inline void
 357roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
 358                        unsigned int num, const int fabs, uint64_t lmt_addr,
 359                        uint64_t lmt_id)
 360{
 361        uint64_t addr, tar_addr, free0;
 362        volatile uint64_t *lmt_data;
 363        unsigned int i;
 364
 365        if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
 366                return;
 367
 368        lmt_data = (uint64_t *)lmt_addr;
 369
 370        addr = roc_npa_aura_handle_to_base(aura_handle) +
 371               NPA_LF_AURA_BATCH_FREE0;
 372
 373        /*
 374         * NPA_LF_AURA_BATCH_FREE0
 375         *
 376         * 63   63 62  33 32       32 31  20 19    0
 377         * -----------------------------------------
 378         * | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
 379         * -----------------------------------------
 380         */
 381        free0 = roc_npa_aura_handle_to_aura(aura_handle);
 382        if (fabs)
 383                free0 |= (0x1UL << 63);
 384        if (num & 0x1)
 385                free0 |= (0x1UL << 32);
 386
 387        /* tar_addr[4:6] is LMTST size-1 in units of 128b */
 388        tar_addr = addr | ((num >> 1) << 4);
 389
 390        lmt_data[0] = free0;
 391        for (i = 0; i < num; i++)
 392                lmt_data[i + 1] = buf[i];
 393
 394        roc_lmt_submit_steorl(lmt_id, tar_addr);
 395        plt_io_wmb();
 396}
 397
 398static inline void
 399roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
 400                           unsigned int num, const int fabs, uint64_t lmt_addr,
 401                           uint64_t lmt_id)
 402{
 403        unsigned int chunk;
 404
 405        while (num) {
 406                chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
 407                                      ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
 408                                      num;
 409
 410                roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
 411                                        lmt_id);
 412
 413                buf += chunk;
 414                num -= chunk;
 415        }
 416}
 417
 418static inline unsigned int
 419roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
 420                        const int drop)
 421{
 422#if defined(__aarch64__)
 423        uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
 424        unsigned int i, count;
 425        uint64_t addr;
 426
 427        if (drop)
 428                wdata |= BIT_ULL(63); /* DROP */
 429
 430        addr = roc_npa_aura_handle_to_base(aura_handle) +
 431               NPA_LF_AURA_OP_ALLOCX(0);
 432
 433        switch (num) {
 434        case 30:
 435                asm volatile(
 436                        ".cpu  generic+lse\n"
 437                        "mov v18.d[0], %[dst]\n"
 438                        "mov v18.d[1], %[loc]\n"
 439                        "mov v19.d[0], %[wdata]\n"
 440                        "mov v19.d[1], x30\n"
 441                        "mov v20.d[0], x24\n"
 442                        "mov v20.d[1], x25\n"
 443                        "mov v21.d[0], x26\n"
 444                        "mov v21.d[1], x27\n"
 445                        "mov v22.d[0], x28\n"
 446                        "mov v22.d[1], x29\n"
 447                        "mov x28, v19.d[0]\n"
 448                        "mov x29, v19.d[0]\n"
 449                        "mov x30, v18.d[1]\n"
 450                        "casp x0, x1, x28, x29, [x30]\n"
 451                        "casp x2, x3, x28, x29, [x30]\n"
 452                        "casp x4, x5, x28, x29, [x30]\n"
 453                        "casp x6, x7, x28, x29, [x30]\n"
 454                        "casp x8, x9, x28, x29, [x30]\n"
 455                        "casp x10, x11, x28, x29, [x30]\n"
 456                        "casp x12, x13, x28, x29, [x30]\n"
 457                        "casp x14, x15, x28, x29, [x30]\n"
 458                        "casp x16, x17, x28, x29, [x30]\n"
 459                        "casp x18, x19, x28, x29, [x30]\n"
 460                        "casp x20, x21, x28, x29, [x30]\n"
 461                        "casp x22, x23, x28, x29, [x30]\n"
 462                        "casp x24, x25, x28, x29, [x30]\n"
 463                        "casp x26, x27, x28, x29, [x30]\n"
 464                        "casp x28, x29, x28, x29, [x30]\n"
 465                        "mov x30, v18.d[0]\n"
 466                        "stp x0, x1, [x30]\n"
 467                        "stp x2, x3, [x30, #16]\n"
 468                        "stp x4, x5, [x30, #32]\n"
 469                        "stp x6, x7, [x30, #48]\n"
 470                        "stp x8, x9, [x30, #64]\n"
 471                        "stp x10, x11, [x30, #80]\n"
 472                        "stp x12, x13, [x30, #96]\n"
 473                        "stp x14, x15, [x30, #112]\n"
 474                        "stp x16, x17, [x30, #128]\n"
 475                        "stp x18, x19, [x30, #144]\n"
 476                        "stp x20, x21, [x30, #160]\n"
 477                        "stp x22, x23, [x30, #176]\n"
 478                        "stp x24, x25, [x30, #192]\n"
 479                        "stp x26, x27, [x30, #208]\n"
 480                        "stp x28, x29, [x30, #224]\n"
 481                        "mov %[dst], v18.d[0]\n"
 482                        "mov %[loc], v18.d[1]\n"
 483                        "mov %[wdata], v19.d[0]\n"
 484                        "mov x30, v19.d[1]\n"
 485                        "mov x24, v20.d[0]\n"
 486                        "mov x25, v20.d[1]\n"
 487                        "mov x26, v21.d[0]\n"
 488                        "mov x27, v21.d[1]\n"
 489                        "mov x28, v22.d[0]\n"
 490                        "mov x29, v22.d[1]\n"
 491                        :
 492                        : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
 493                        : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
 494                          "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
 495                          "x15", "x16", "x17", "x18", "x19", "x20", "x21",
 496                          "x22", "x23", "v18", "v19", "v20", "v21", "v22");
 497                break;
 498        case 16:
 499                asm volatile(
 500                        ".cpu  generic+lse\n"
 501                        "mov x16, %[wdata]\n"
 502                        "mov x17, %[wdata]\n"
 503                        "casp x0, x1, x16, x17, [%[loc]]\n"
 504                        "casp x2, x3, x16, x17, [%[loc]]\n"
 505                        "casp x4, x5, x16, x17, [%[loc]]\n"
 506                        "casp x6, x7, x16, x17, [%[loc]]\n"
 507                        "casp x8, x9, x16, x17, [%[loc]]\n"
 508                        "casp x10, x11, x16, x17, [%[loc]]\n"
 509                        "casp x12, x13, x16, x17, [%[loc]]\n"
 510                        "casp x14, x15, x16, x17, [%[loc]]\n"
 511                        "stp x0, x1, [%[dst]]\n"
 512                        "stp x2, x3, [%[dst], #16]\n"
 513                        "stp x4, x5, [%[dst], #32]\n"
 514                        "stp x6, x7, [%[dst], #48]\n"
 515                        "stp x8, x9, [%[dst], #64]\n"
 516                        "stp x10, x11, [%[dst], #80]\n"
 517                        "stp x12, x13, [%[dst], #96]\n"
 518                        "stp x14, x15, [%[dst], #112]\n"
 519                        :
 520                        : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
 521                        : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
 522                          "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
 523                          "x15", "x16", "x17"
 524                );
 525                break;
 526        case 8:
 527                asm volatile(
 528                        ".cpu  generic+lse\n"
 529                        "mov x16, %[wdata]\n"
 530                        "mov x17, %[wdata]\n"
 531                        "casp x0, x1, x16, x17, [%[loc]]\n"
 532                        "casp x2, x3, x16, x17, [%[loc]]\n"
 533                        "casp x4, x5, x16, x17, [%[loc]]\n"
 534                        "casp x6, x7, x16, x17, [%[loc]]\n"
 535                        "stp x0, x1, [%[dst]]\n"
 536                        "stp x2, x3, [%[dst], #16]\n"
 537                        "stp x4, x5, [%[dst], #32]\n"
 538                        "stp x6, x7, [%[dst], #48]\n"
 539                        :
 540                        : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
 541                        : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
 542                          "x7", "x16", "x17"
 543                );
 544                break;
 545        case 4:
 546                asm volatile(
 547                        ".cpu  generic+lse\n"
 548                        "mov x16, %[wdata]\n"
 549                        "mov x17, %[wdata]\n"
 550                        "casp x0, x1, x16, x17, [%[loc]]\n"
 551                        "casp x2, x3, x16, x17, [%[loc]]\n"
 552                        "stp x0, x1, [%[dst]]\n"
 553                        "stp x2, x3, [%[dst], #16]\n"
 554                        :
 555                        : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
 556                        : "memory", "x0", "x1", "x2", "x3", "x16", "x17"
 557                );
 558                break;
 559        case 2:
 560                asm volatile(
 561                        ".cpu  generic+lse\n"
 562                        "mov x16, %[wdata]\n"
 563                        "mov x17, %[wdata]\n"
 564                        "casp x0, x1, x16, x17, [%[loc]]\n"
 565                        "stp x0, x1, [%[dst]]\n"
 566                        :
 567                        : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
 568                        : "memory", "x0", "x1", "x16", "x17"
 569                );
 570                break;
 571        case 1:
 572                buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
 573                return !!buf[0];
 574        }
 575
 576        /* Pack the pointers */
 577        for (i = 0, count = 0; i < num; i++)
 578                if (buf[i])
 579                        buf[count++] = buf[i];
 580
 581        return count;
 582#else
 583        unsigned int i, count;
 584
 585        for (i = 0, count = 0; i < num; i++) {
 586                buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
 587                if (buf[count])
 588                        count++;
 589        }
 590
 591        return count;
 592#endif
 593}
 594
 595static inline unsigned int
 596roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
 597                           unsigned int num, const int drop, const int partial)
 598{
 599        unsigned int chunk, count, num_alloc;
 600
 601        count = 0;
 602        while (num) {
 603                chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
 604                                      ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
 605                                      plt_align32prevpow2(num);
 606
 607                num_alloc =
 608                        roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
 609
 610                count += num_alloc;
 611                buf += num_alloc;
 612                num -= num_alloc;
 613
 614                if (unlikely(num_alloc != chunk))
 615                        break;
 616        }
 617
 618        /* If the requested number of pointers was not allocated and if partial
 619         * alloc is not desired, then free allocated pointers.
 620         */
 621        if (unlikely(num != 0 && !partial)) {
 622                roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
 623                count = 0;
 624        }
 625
 626        return count;
 627}
 628
 629struct roc_npa {
 630        struct plt_pci_device *pci_dev;
 631
 632#define ROC_NPA_MEM_SZ (1 * 1024)
 633        uint8_t reserved[ROC_NPA_MEM_SZ] __plt_cache_aligned;
 634} __plt_cache_aligned;
 635
 636int __roc_api roc_npa_dev_init(struct roc_npa *roc_npa);
 637int __roc_api roc_npa_dev_fini(struct roc_npa *roc_npa);
 638
 639/* NPA pool */
 640int __roc_api roc_npa_pool_create(uint64_t *aura_handle, uint32_t block_size,
 641                                  uint32_t block_count, struct npa_aura_s *aura,
 642                                  struct npa_pool_s *pool);
 643int __roc_api roc_npa_aura_limit_modify(uint64_t aura_handle,
 644                                        uint16_t aura_limit);
 645int __roc_api roc_npa_pool_destroy(uint64_t aura_handle);
 646int __roc_api roc_npa_pool_range_update_check(uint64_t aura_handle);
 647void __roc_api roc_npa_aura_op_range_set(uint64_t aura_handle,
 648                                         uint64_t start_iova,
 649                                         uint64_t end_iova);
 650
 651/* Init callbacks */
 652typedef int (*roc_npa_lf_init_cb_t)(struct plt_pci_device *pci_dev);
 653int __roc_api roc_npa_lf_init_cb_register(roc_npa_lf_init_cb_t cb);
 654
 655/* Debug */
 656int __roc_api roc_npa_ctx_dump(void);
 657int __roc_api roc_npa_dump(void);
 658
 659/* Reset operation performance counter. */
 660int __roc_api roc_npa_pool_op_pc_reset(uint64_t aura_handle);
 661
 662#endif /* _ROC_NPA_H_ */
 663