dpdk/drivers/mempool/octeontx2/otx2_mempool_ops.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(C) 2019 Marvell International Ltd.
   3 */
   4
   5#include <rte_mempool.h>
   6#include <rte_vect.h>
   7
   8#include "otx2_mempool.h"
   9
  10static int __rte_hot
  11otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
  12{
  13        unsigned int index; const uint64_t aura_handle = mp->pool_id;
  14        const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
  15        const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
  16                                 NPA_LF_AURA_OP_FREE0;
  17
  18        /* Ensure mbuf init changes are written before the free pointers
  19         * are enqueued to the stack.
  20         */
  21        rte_io_wmb();
  22        for (index = 0; index < n; index++)
  23                otx2_store_pair((uint64_t)obj_table[index], reg, addr);
  24
  25        return 0;
  26}
  27
  28static __rte_noinline int
  29npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
  30                         void **obj_table, uint8_t i)
  31{
  32        uint8_t retry = 4;
  33
  34        do {
  35                obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
  36                if (obj_table[i] != NULL)
  37                        return 0;
  38
  39        } while (retry--);
  40
  41        return -ENOENT;
  42}
  43
  44#if defined(RTE_ARCH_ARM64)
  45static __rte_noinline int
  46npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
  47                void **obj_table, unsigned int n)
  48{
  49        uint8_t i;
  50
  51        for (i = 0; i < n; i++) {
  52                if (obj_table[i] != NULL)
  53                        continue;
  54                if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
  55                        return -ENOENT;
  56        }
  57
  58        return 0;
  59}
  60
  61static __rte_noinline int
  62npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
  63                          unsigned int n, void **obj_table)
  64{
  65        register const uint64_t wdata64 __asm("x26") = wdata;
  66        register const uint64_t wdata128 __asm("x27") = wdata;
  67        uint64x2_t failed = vdupq_n_u64(~0);
  68
  69        switch (n) {
  70        case 32:
  71        {
  72                asm volatile (
  73                ".cpu  generic+lse\n"
  74                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
  75                "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
  76                "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
  77                "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
  78                "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
  79                "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
  80                "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
  81                "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
  82                "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
  83                "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
  84                "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
  85                "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
  86                "fmov d16, x0\n"
  87                "fmov v16.D[1], x1\n"
  88                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
  89                "fmov d17, x2\n"
  90                "fmov v17.D[1], x3\n"
  91                "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
  92                "fmov d18, x4\n"
  93                "fmov v18.D[1], x5\n"
  94                "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
  95                "fmov d19, x6\n"
  96                "fmov v19.D[1], x7\n"
  97                "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
  98                "and %[failed].16B, %[failed].16B, v16.16B\n"
  99                "and %[failed].16B, %[failed].16B, v17.16B\n"
 100                "and %[failed].16B, %[failed].16B, v18.16B\n"
 101                "and %[failed].16B, %[failed].16B, v19.16B\n"
 102                "fmov d20, x8\n"
 103                "fmov v20.D[1], x9\n"
 104                "fmov d21, x10\n"
 105                "fmov v21.D[1], x11\n"
 106                "fmov d22, x12\n"
 107                "fmov v22.D[1], x13\n"
 108                "fmov d23, x14\n"
 109                "fmov v23.D[1], x15\n"
 110                "and %[failed].16B, %[failed].16B, v20.16B\n"
 111                "and %[failed].16B, %[failed].16B, v21.16B\n"
 112                "and %[failed].16B, %[failed].16B, v22.16B\n"
 113                "and %[failed].16B, %[failed].16B, v23.16B\n"
 114                "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
 115                "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
 116                "fmov d16, x16\n"
 117                "fmov v16.D[1], x17\n"
 118                "fmov d17, x18\n"
 119                "fmov v17.D[1], x19\n"
 120                "fmov d18, x20\n"
 121                "fmov v18.D[1], x21\n"
 122                "fmov d19, x22\n"
 123                "fmov v19.D[1], x23\n"
 124                "and %[failed].16B, %[failed].16B, v16.16B\n"
 125                "and %[failed].16B, %[failed].16B, v17.16B\n"
 126                "and %[failed].16B, %[failed].16B, v18.16B\n"
 127                "and %[failed].16B, %[failed].16B, v19.16B\n"
 128                "fmov d20, x0\n"
 129                "fmov v20.D[1], x1\n"
 130                "fmov d21, x2\n"
 131                "fmov v21.D[1], x3\n"
 132                "fmov d22, x4\n"
 133                "fmov v22.D[1], x5\n"
 134                "fmov d23, x6\n"
 135                "fmov v23.D[1], x7\n"
 136                "and %[failed].16B, %[failed].16B, v20.16B\n"
 137                "and %[failed].16B, %[failed].16B, v21.16B\n"
 138                "and %[failed].16B, %[failed].16B, v22.16B\n"
 139                "and %[failed].16B, %[failed].16B, v23.16B\n"
 140                "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
 141                "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
 142                : "+Q" (*addr), [failed] "=&w" (failed)
 143                : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
 144                [dst] "r" (obj_table), [loc] "r" (addr)
 145                : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
 146                "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
 147                "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
 148                "v18", "v19", "v20", "v21", "v22", "v23"
 149                );
 150                break;
 151        }
 152        case 16:
 153        {
 154                asm volatile (
 155                ".cpu  generic+lse\n"
 156                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
 157                "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
 158                "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
 159                "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
 160                "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
 161                "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
 162                "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
 163                "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
 164                "fmov d16, x0\n"
 165                "fmov v16.D[1], x1\n"
 166                "fmov d17, x2\n"
 167                "fmov v17.D[1], x3\n"
 168                "fmov d18, x4\n"
 169                "fmov v18.D[1], x5\n"
 170                "fmov d19, x6\n"
 171                "fmov v19.D[1], x7\n"
 172                "and %[failed].16B, %[failed].16B, v16.16B\n"
 173                "and %[failed].16B, %[failed].16B, v17.16B\n"
 174                "and %[failed].16B, %[failed].16B, v18.16B\n"
 175                "and %[failed].16B, %[failed].16B, v19.16B\n"
 176                "fmov d20, x8\n"
 177                "fmov v20.D[1], x9\n"
 178                "fmov d21, x10\n"
 179                "fmov v21.D[1], x11\n"
 180                "fmov d22, x12\n"
 181                "fmov v22.D[1], x13\n"
 182                "fmov d23, x14\n"
 183                "fmov v23.D[1], x15\n"
 184                "and %[failed].16B, %[failed].16B, v20.16B\n"
 185                "and %[failed].16B, %[failed].16B, v21.16B\n"
 186                "and %[failed].16B, %[failed].16B, v22.16B\n"
 187                "and %[failed].16B, %[failed].16B, v23.16B\n"
 188                "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
 189                "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
 190                : "+Q" (*addr), [failed] "=&w" (failed)
 191                : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
 192                [dst] "r" (obj_table), [loc] "r" (addr)
 193                : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
 194                "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
 195                "v17", "v18", "v19", "v20", "v21", "v22", "v23"
 196                );
 197                break;
 198        }
 199        case 8:
 200        {
 201                asm volatile (
 202                ".cpu  generic+lse\n"
 203                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
 204                "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
 205                "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
 206                "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
 207                "fmov d16, x0\n"
 208                "fmov v16.D[1], x1\n"
 209                "fmov d17, x2\n"
 210                "fmov v17.D[1], x3\n"
 211                "fmov d18, x4\n"
 212                "fmov v18.D[1], x5\n"
 213                "fmov d19, x6\n"
 214                "fmov v19.D[1], x7\n"
 215                "and %[failed].16B, %[failed].16B, v16.16B\n"
 216                "and %[failed].16B, %[failed].16B, v17.16B\n"
 217                "and %[failed].16B, %[failed].16B, v18.16B\n"
 218                "and %[failed].16B, %[failed].16B, v19.16B\n"
 219                "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
 220                : "+Q" (*addr), [failed] "=&w" (failed)
 221                : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
 222                [dst] "r" (obj_table), [loc] "r" (addr)
 223                : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
 224                "v16", "v17", "v18", "v19"
 225                );
 226                break;
 227        }
 228        case 4:
 229        {
 230                asm volatile (
 231                ".cpu  generic+lse\n"
 232                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
 233                "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
 234                "fmov d16, x0\n"
 235                "fmov v16.D[1], x1\n"
 236                "fmov d17, x2\n"
 237                "fmov v17.D[1], x3\n"
 238                "and %[failed].16B, %[failed].16B, v16.16B\n"
 239                "and %[failed].16B, %[failed].16B, v17.16B\n"
 240                "st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
 241                : "+Q" (*addr), [failed] "=&w" (failed)
 242                : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
 243                [dst] "r" (obj_table), [loc] "r" (addr)
 244                : "memory", "x0", "x1", "x2", "x3", "v16", "v17"
 245                );
 246                break;
 247        }
 248        case 2:
 249        {
 250                asm volatile (
 251                ".cpu  generic+lse\n"
 252                "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
 253                "fmov d16, x0\n"
 254                "fmov v16.D[1], x1\n"
 255                "and %[failed].16B, %[failed].16B, v16.16B\n"
 256                "st1 { v16.2d}, [%[dst]], 16\n"
 257                : "+Q" (*addr), [failed] "=&w" (failed)
 258                : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
 259                [dst] "r" (obj_table), [loc] "r" (addr)
 260                : "memory", "x0", "x1", "v16"
 261                );
 262                break;
 263        }
 264        case 1:
 265                return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
 266        }
 267
 268        if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
 269                return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
 270                        ((char *)obj_table - (sizeof(uint64_t) * n)), n);
 271
 272        return 0;
 273}
 274
 275static __rte_noinline void
 276otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
 277{
 278        unsigned int i;
 279
 280        for (i = 0; i < n; i++) {
 281                if (obj_table[i] != NULL) {
 282                        otx2_npa_enq(mp, &obj_table[i], 1);
 283                        obj_table[i] = NULL;
 284                }
 285        }
 286}
 287
 288static __rte_noinline int __rte_hot
 289otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
 290{
 291        const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
 292        void **obj_table_bak = obj_table;
 293        const unsigned int nfree = n;
 294        unsigned int parts;
 295
 296        int64_t * const addr = (int64_t * const)
 297                        (npa_lf_aura_handle_to_base(mp->pool_id) +
 298                                NPA_LF_AURA_OP_ALLOCX(0));
 299        while (n) {
 300                parts = n > 31 ? 32 : rte_align32prevpow2(n);
 301                n -= parts;
 302                if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
 303                                parts, obj_table))) {
 304                        otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
 305                        return -ENOENT;
 306                }
 307                obj_table += parts;
 308        }
 309
 310        return 0;
 311}
 312
 313#else
 314
 315static inline int __rte_hot
 316otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
 317{
 318        const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
 319        unsigned int index;
 320        uint64_t obj;
 321
 322        int64_t * const addr = (int64_t *)
 323                        (npa_lf_aura_handle_to_base(mp->pool_id) +
 324                                NPA_LF_AURA_OP_ALLOCX(0));
 325        for (index = 0; index < n; index++, obj_table++) {
 326                obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
 327                if (obj == 0) {
 328                        for (; index > 0; index--) {
 329                                obj_table--;
 330                                otx2_npa_enq(mp, obj_table, 1);
 331                        }
 332                        return -ENOENT;
 333                }
 334                *obj_table = (void *)obj;
 335        }
 336
 337        return 0;
 338}
 339
 340#endif
 341
 342static unsigned int
 343otx2_npa_get_count(const struct rte_mempool *mp)
 344{
 345        return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
 346}
 347
 348static int
 349npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
 350                      struct npa_aura_s *aura, struct npa_pool_s *pool)
 351{
 352        struct npa_aq_enq_req *aura_init_req, *pool_init_req;
 353        struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
 354        struct otx2_mbox_dev *mdev = &mbox->dev[0];
 355        struct otx2_idev_cfg *idev;
 356        int rc, off;
 357
 358        idev = otx2_intra_dev_get_cfg();
 359        if (idev == NULL)
 360                return -ENOMEM;
 361
 362        aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 363
 364        aura_init_req->aura_id = aura_id;
 365        aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
 366        aura_init_req->op = NPA_AQ_INSTOP_INIT;
 367        otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
 368
 369        pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 370
 371        pool_init_req->aura_id = aura_id;
 372        pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
 373        pool_init_req->op = NPA_AQ_INSTOP_INIT;
 374        otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
 375
 376        otx2_mbox_msg_send(mbox, 0);
 377        rc = otx2_mbox_wait_for_rsp(mbox, 0);
 378        if (rc < 0)
 379                return rc;
 380
 381        off = mbox->rx_start +
 382                        RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
 383        aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
 384        off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
 385        pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
 386
 387        if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
 388                return 0;
 389        else
 390                return NPA_LF_ERR_AURA_POOL_INIT;
 391
 392        if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
 393                return 0;
 394
 395        aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 396        aura_init_req->aura_id = aura_id;
 397        aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
 398        aura_init_req->op = NPA_AQ_INSTOP_LOCK;
 399
 400        pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 401        if (!pool_init_req) {
 402                /* The shared memory buffer can be full.
 403                 * Flush it and retry
 404                 */
 405                otx2_mbox_msg_send(mbox, 0);
 406                rc = otx2_mbox_wait_for_rsp(mbox, 0);
 407                if (rc < 0) {
 408                        otx2_err("Failed to LOCK AURA context");
 409                        return -ENOMEM;
 410                }
 411
 412                pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 413                if (!pool_init_req) {
 414                        otx2_err("Failed to LOCK POOL context");
 415                        return -ENOMEM;
 416                }
 417        }
 418        pool_init_req->aura_id = aura_id;
 419        pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
 420        pool_init_req->op = NPA_AQ_INSTOP_LOCK;
 421
 422        rc = otx2_mbox_process(mbox);
 423        if (rc < 0) {
 424                otx2_err("Failed to lock POOL ctx to NDC");
 425                return -ENOMEM;
 426        }
 427
 428        return 0;
 429}
 430
 431static int
 432npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
 433                      uint32_t aura_id,
 434                      uint64_t aura_handle)
 435{
 436        struct npa_aq_enq_req *aura_req, *pool_req;
 437        struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
 438        struct otx2_mbox_dev *mdev = &mbox->dev[0];
 439        struct ndc_sync_op *ndc_req;
 440        struct otx2_idev_cfg *idev;
 441        int rc, off;
 442
 443        idev = otx2_intra_dev_get_cfg();
 444        if (idev == NULL)
 445                return -EINVAL;
 446
 447        /* Procedure for disabling an aura/pool */
 448        rte_delay_us(10);
 449        npa_lf_aura_op_alloc(aura_handle, 0);
 450
 451        pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 452        pool_req->aura_id = aura_id;
 453        pool_req->ctype = NPA_AQ_CTYPE_POOL;
 454        pool_req->op = NPA_AQ_INSTOP_WRITE;
 455        pool_req->pool.ena = 0;
 456        pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
 457
 458        aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 459        aura_req->aura_id = aura_id;
 460        aura_req->ctype = NPA_AQ_CTYPE_AURA;
 461        aura_req->op = NPA_AQ_INSTOP_WRITE;
 462        aura_req->aura.ena = 0;
 463        aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
 464
 465        otx2_mbox_msg_send(mbox, 0);
 466        rc = otx2_mbox_wait_for_rsp(mbox, 0);
 467        if (rc < 0)
 468                return rc;
 469
 470        off = mbox->rx_start +
 471                        RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
 472        pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
 473
 474        off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
 475        aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
 476
 477        if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
 478                return NPA_LF_ERR_AURA_POOL_FINI;
 479
 480        /* Sync NDC-NPA for LF */
 481        ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
 482        ndc_req->npa_lf_sync = 1;
 483
 484        rc = otx2_mbox_process(mbox);
 485        if (rc) {
 486                otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
 487                return NPA_LF_ERR_AURA_POOL_FINI;
 488        }
 489
 490        if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
 491                return 0;
 492
 493        aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 494        aura_req->aura_id = aura_id;
 495        aura_req->ctype = NPA_AQ_CTYPE_AURA;
 496        aura_req->op = NPA_AQ_INSTOP_UNLOCK;
 497
 498        rc = otx2_mbox_process(mbox);
 499        if (rc < 0) {
 500                otx2_err("Failed to unlock AURA ctx to NDC");
 501                return -EINVAL;
 502        }
 503
 504        pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
 505        pool_req->aura_id = aura_id;
 506        pool_req->ctype = NPA_AQ_CTYPE_POOL;
 507        pool_req->op = NPA_AQ_INSTOP_UNLOCK;
 508
 509        rc = otx2_mbox_process(mbox);
 510        if (rc < 0) {
 511                otx2_err("Failed to unlock POOL ctx to NDC");
 512                return -EINVAL;
 513        }
 514
 515        return 0;
 516}
 517
 518static inline char*
 519npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
 520{
 521        snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
 522                        lf->pf_func, pool_id);
 523
 524        return name;
 525}
 526
 527static inline const struct rte_memzone *
 528npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
 529                       int pool_id, size_t size)
 530{
 531        return rte_memzone_reserve_aligned(
 532                npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
 533                        RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
 534}
 535
 536static inline int
 537npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
 538{
 539        const struct rte_memzone *mz;
 540
 541        mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
 542        if (mz == NULL)
 543                return -EINVAL;
 544
 545        return rte_memzone_free(mz);
 546}
 547
 548static inline int
 549bitmap_ctzll(uint64_t slab)
 550{
 551        if (slab == 0)
 552                return 0;
 553
 554        return __builtin_ctzll(slab);
 555}
 556
 557static int
 558npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
 559                            const uint32_t block_count, struct npa_aura_s *aura,
 560                            struct npa_pool_s *pool, uint64_t *aura_handle)
 561{
 562        int rc, aura_id, pool_id, stack_size, alloc_size;
 563        char name[RTE_MEMZONE_NAMESIZE];
 564        const struct rte_memzone *mz;
 565        uint64_t slab;
 566        uint32_t pos;
 567
 568        /* Sanity check */
 569        if (!lf || !block_size || !block_count ||
 570            !pool || !aura || !aura_handle)
 571                return NPA_LF_ERR_PARAM;
 572
 573        /* Block size should be cache line aligned and in range of 128B-128KB */
 574        if (block_size % OTX2_ALIGN || block_size < 128 ||
 575            block_size > 128 * 1024)
 576                return NPA_LF_ERR_INVALID_BLOCK_SZ;
 577
 578        pos = slab = 0;
 579        /* Scan from the beginning */
 580        __rte_bitmap_scan_init(lf->npa_bmp);
 581        /* Scan bitmap to get the free pool */
 582        rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
 583        /* Empty bitmap */
 584        if (rc == 0) {
 585                otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
 586                return -ERANGE;
 587        }
 588
 589        /* Get aura_id from resource bitmap */
 590        aura_id = pos + bitmap_ctzll(slab);
 591        /* Mark pool as reserved */
 592        rte_bitmap_clear(lf->npa_bmp, aura_id);
 593
 594        /* Configuration based on each aura has separate pool(aura-pool pair) */
 595        pool_id = aura_id;
 596        rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
 597              (int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
 598        if (rc)
 599                goto exit;
 600
 601        /* Allocate stack memory */
 602        stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
 603        alloc_size = stack_size * lf->stack_pg_bytes;
 604
 605        mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
 606        if (mz == NULL) {
 607                rc = -ENOMEM;
 608                goto aura_res_put;
 609        }
 610
 611        /* Update aura fields */
 612        aura->pool_addr = pool_id;/* AF will translate to associated poolctx */
 613        aura->ena = 1;
 614        aura->shift = __builtin_clz(block_count) - 8;
 615        aura->limit = block_count;
 616        aura->pool_caching = 1;
 617        aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
 618        aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
 619        aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
 620        aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
 621        /* Many to one reduction */
 622        aura->err_qint_idx = aura_id % lf->qints;
 623
 624        /* Update pool fields */
 625        pool->stack_base = mz->iova;
 626        pool->ena = 1;
 627        pool->buf_size = block_size / OTX2_ALIGN;
 628        pool->stack_max_pages = stack_size;
 629        pool->shift = __builtin_clz(block_count) - 8;
 630        pool->ptr_start = 0;
 631        pool->ptr_end = ~0;
 632        pool->stack_caching = 1;
 633        pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
 634        pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
 635        pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
 636
 637        /* Many to one reduction */
 638        pool->err_qint_idx = pool_id % lf->qints;
 639
 640        /* Issue AURA_INIT and POOL_INIT op */
 641        rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
 642        if (rc)
 643                goto stack_mem_free;
 644
 645        *aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
 646
 647        /* Update aura count */
 648        npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
 649        /* Read it back to make sure aura count is updated */
 650        npa_lf_aura_op_cnt_get(*aura_handle);
 651
 652        return 0;
 653
 654stack_mem_free:
 655        rte_memzone_free(mz);
 656aura_res_put:
 657        rte_bitmap_set(lf->npa_bmp, aura_id);
 658exit:
 659        return rc;
 660}
 661
 662static int
 663npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
 664{
 665        char name[RTE_MEMZONE_NAMESIZE];
 666        int aura_id, pool_id, rc;
 667
 668        if (!lf || !aura_handle)
 669                return NPA_LF_ERR_PARAM;
 670
 671        aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
 672        rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
 673        rc |= npa_lf_stack_dma_free(lf, name, pool_id);
 674
 675        rte_bitmap_set(lf->npa_bmp, aura_id);
 676
 677        return rc;
 678}
 679
 680static int
 681npa_lf_aura_range_update_check(uint64_t aura_handle)
 682{
 683        uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
 684        struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
 685        struct npa_aura_lim *lim = lf->aura_lim;
 686        __otx2_io struct npa_pool_s *pool;
 687        struct npa_aq_enq_req *req;
 688        struct npa_aq_enq_rsp *rsp;
 689        int rc;
 690
 691        req  = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
 692
 693        req->aura_id = aura_id;
 694        req->ctype = NPA_AQ_CTYPE_POOL;
 695        req->op = NPA_AQ_INSTOP_READ;
 696
 697        rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
 698        if (rc) {
 699                otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
 700                return rc;
 701        }
 702
 703        pool = &rsp->pool;
 704
 705        if (lim[aura_id].ptr_start != pool->ptr_start ||
 706                lim[aura_id].ptr_end != pool->ptr_end) {
 707                otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
 708                return -ERANGE;
 709        }
 710
 711        return 0;
 712}
 713
 714static int
 715otx2_npa_alloc(struct rte_mempool *mp)
 716{
 717        uint32_t block_size, block_count;
 718        uint64_t aura_handle = 0;
 719        struct otx2_npa_lf *lf;
 720        struct npa_aura_s aura;
 721        struct npa_pool_s pool;
 722        size_t padding;
 723        int rc;
 724
 725        lf = otx2_npa_lf_obj_get();
 726        if (lf == NULL) {
 727                rc = -EINVAL;
 728                goto error;
 729        }
 730
 731        block_size = mp->elt_size + mp->header_size + mp->trailer_size;
 732        /*
 733         * OCTEON TX2 has 8 sets, 41 ways L1D cache, VA<9:7> bits dictate
 734         * the set selection.
 735         * Add additional padding to ensure that the element size always
 736         * occupies odd number of cachelines to ensure even distribution
 737         * of elements among L1D cache sets.
 738         */
 739        padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
 740                                RTE_CACHE_LINE_SIZE;
 741        mp->trailer_size += padding;
 742        block_size += padding;
 743
 744        block_count = mp->size;
 745
 746        if (block_size % OTX2_ALIGN != 0) {
 747                otx2_err("Block size should be multiple of 128B");
 748                rc = -ERANGE;
 749                goto error;
 750        }
 751
 752        memset(&aura, 0, sizeof(struct npa_aura_s));
 753        memset(&pool, 0, sizeof(struct npa_pool_s));
 754        pool.nat_align = 1;
 755        pool.buf_offset = 1;
 756
 757        if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
 758                otx2_err("Unsupported mp->header_size=%d", mp->header_size);
 759                rc = -EINVAL;
 760                goto error;
 761        }
 762
 763        /* Use driver specific mp->pool_config to override aura config */
 764        if (mp->pool_config != NULL)
 765                memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
 766
 767        rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
 768                         &aura, &pool, &aura_handle);
 769        if (rc) {
 770                otx2_err("Failed to alloc pool or aura rc=%d", rc);
 771                goto error;
 772        }
 773
 774        /* Store aura_handle for future queue operations */
 775        mp->pool_id = aura_handle;
 776        otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
 777                     lf, block_size, block_count, aura_handle);
 778
 779        /* Just hold the reference of the object */
 780        otx2_npa_lf_obj_ref();
 781        return 0;
 782error:
 783        return rc;
 784}
 785
 786static void
 787otx2_npa_free(struct rte_mempool *mp)
 788{
 789        struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
 790        int rc = 0;
 791
 792        otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
 793        if (lf != NULL)
 794                rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
 795
 796        if (rc)
 797                otx2_err("Failed to free pool or aura rc=%d", rc);
 798
 799        /* Release the reference of npalf */
 800        otx2_npa_lf_fini();
 801}
 802
 803static ssize_t
 804otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
 805                       uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
 806{
 807        size_t total_elt_sz;
 808
 809        /* Need space for one more obj on each chunk to fulfill
 810         * alignment requirements.
 811         */
 812        total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 813        return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
 814                                                total_elt_sz, min_chunk_size,
 815                                                align);
 816}
 817
 818static uint8_t
 819otx2_npa_l1d_way_set_get(uint64_t iova)
 820{
 821        return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
 822}
 823
 824static int
 825otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
 826                  rte_iova_t iova, size_t len,
 827                  rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
 828{
 829#define OTX2_L1D_NB_SETS        8
 830        uint64_t distribution[OTX2_L1D_NB_SETS];
 831        rte_iova_t start_iova;
 832        size_t total_elt_sz;
 833        uint8_t set;
 834        size_t off;
 835        int i;
 836
 837        if (iova == RTE_BAD_IOVA)
 838                return -EINVAL;
 839
 840        total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 841
 842        /* Align object start address to a multiple of total_elt_sz */
 843        off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
 844
 845        if (len < off)
 846                return -EINVAL;
 847
 848
 849        vaddr = (char *)vaddr + off;
 850        iova += off;
 851        len -= off;
 852
 853        memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
 854        start_iova = iova;
 855        while (start_iova < iova + len) {
 856                set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
 857                distribution[set]++;
 858                start_iova += total_elt_sz;
 859        }
 860
 861        otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
 862                     iova);
 863        otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
 864                     (uint64_t)(len + off), (uint64_t)len);
 865        otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
 866        otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
 867                     (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
 868        otx2_npa_dbg("L1D set distribution :");
 869        for (i = 0; i < OTX2_L1D_NB_SETS; i++)
 870                otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
 871                             distribution[i]);
 872
 873        npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
 874
 875        if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
 876                return -EBUSY;
 877
 878        return rte_mempool_op_populate_helper(mp,
 879                                        RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
 880                                        max_objs, vaddr, iova, len,
 881                                        obj_cb, obj_cb_arg);
 882}
 883
 884static struct rte_mempool_ops otx2_npa_ops = {
 885        .name = "octeontx2_npa",
 886        .alloc = otx2_npa_alloc,
 887        .free = otx2_npa_free,
 888        .enqueue = otx2_npa_enq,
 889        .get_count = otx2_npa_get_count,
 890        .calc_mem_size = otx2_npa_calc_mem_size,
 891        .populate = otx2_npa_populate,
 892#if defined(RTE_ARCH_ARM64)
 893        .dequeue = otx2_npa_deq_arm64,
 894#else
 895        .dequeue = otx2_npa_deq,
 896#endif
 897};
 898
 899MEMPOOL_REGISTER_OPS(otx2_npa_ops);
 900