1
2
3
4
5#include <rte_mempool.h>
6#include <rte_vect.h>
7
8#include "otx2_mempool.h"
9
10static int __rte_hot
11otx2_npa_enq(struct rte_mempool *mp, void * const *obj_table, unsigned int n)
12{
13 unsigned int index; const uint64_t aura_handle = mp->pool_id;
14 const uint64_t reg = npa_lf_aura_handle_to_aura(aura_handle);
15 const uint64_t addr = npa_lf_aura_handle_to_base(aura_handle) +
16 NPA_LF_AURA_OP_FREE0;
17
18
19
20
21 rte_io_wmb();
22 for (index = 0; index < n; index++)
23 otx2_store_pair((uint64_t)obj_table[index], reg, addr);
24
25 return 0;
26}
27
28static __rte_noinline int
29npa_lf_aura_op_alloc_one(const int64_t wdata, int64_t * const addr,
30 void **obj_table, uint8_t i)
31{
32 uint8_t retry = 4;
33
34 do {
35 obj_table[i] = (void *)otx2_atomic64_add_nosync(wdata, addr);
36 if (obj_table[i] != NULL)
37 return 0;
38
39 } while (retry--);
40
41 return -ENOENT;
42}
43
44#if defined(RTE_ARCH_ARM64)
45static __rte_noinline int
46npa_lf_aura_op_search_alloc(const int64_t wdata, int64_t * const addr,
47 void **obj_table, unsigned int n)
48{
49 uint8_t i;
50
51 for (i = 0; i < n; i++) {
52 if (obj_table[i] != NULL)
53 continue;
54 if (npa_lf_aura_op_alloc_one(wdata, addr, obj_table, i))
55 return -ENOENT;
56 }
57
58 return 0;
59}
60
61static __rte_noinline int
62npa_lf_aura_op_alloc_bulk(const int64_t wdata, int64_t * const addr,
63 unsigned int n, void **obj_table)
64{
65 register const uint64_t wdata64 __asm("x26") = wdata;
66 register const uint64_t wdata128 __asm("x27") = wdata;
67 uint64x2_t failed = vdupq_n_u64(~0);
68
69 switch (n) {
70 case 32:
71 {
72 asm volatile (
73 ".cpu generic+lse\n"
74 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
75 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
76 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
77 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
78 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
79 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
80 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
81 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
82 "casp x16, x17, %[wdata64], %[wdata128], [%[loc]]\n"
83 "casp x18, x19, %[wdata64], %[wdata128], [%[loc]]\n"
84 "casp x20, x21, %[wdata64], %[wdata128], [%[loc]]\n"
85 "casp x22, x23, %[wdata64], %[wdata128], [%[loc]]\n"
86 "fmov d16, x0\n"
87 "fmov v16.D[1], x1\n"
88 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
89 "fmov d17, x2\n"
90 "fmov v17.D[1], x3\n"
91 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
92 "fmov d18, x4\n"
93 "fmov v18.D[1], x5\n"
94 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
95 "fmov d19, x6\n"
96 "fmov v19.D[1], x7\n"
97 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
98 "and %[failed].16B, %[failed].16B, v16.16B\n"
99 "and %[failed].16B, %[failed].16B, v17.16B\n"
100 "and %[failed].16B, %[failed].16B, v18.16B\n"
101 "and %[failed].16B, %[failed].16B, v19.16B\n"
102 "fmov d20, x8\n"
103 "fmov v20.D[1], x9\n"
104 "fmov d21, x10\n"
105 "fmov v21.D[1], x11\n"
106 "fmov d22, x12\n"
107 "fmov v22.D[1], x13\n"
108 "fmov d23, x14\n"
109 "fmov v23.D[1], x15\n"
110 "and %[failed].16B, %[failed].16B, v20.16B\n"
111 "and %[failed].16B, %[failed].16B, v21.16B\n"
112 "and %[failed].16B, %[failed].16B, v22.16B\n"
113 "and %[failed].16B, %[failed].16B, v23.16B\n"
114 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
115 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
116 "fmov d16, x16\n"
117 "fmov v16.D[1], x17\n"
118 "fmov d17, x18\n"
119 "fmov v17.D[1], x19\n"
120 "fmov d18, x20\n"
121 "fmov v18.D[1], x21\n"
122 "fmov d19, x22\n"
123 "fmov v19.D[1], x23\n"
124 "and %[failed].16B, %[failed].16B, v16.16B\n"
125 "and %[failed].16B, %[failed].16B, v17.16B\n"
126 "and %[failed].16B, %[failed].16B, v18.16B\n"
127 "and %[failed].16B, %[failed].16B, v19.16B\n"
128 "fmov d20, x0\n"
129 "fmov v20.D[1], x1\n"
130 "fmov d21, x2\n"
131 "fmov v21.D[1], x3\n"
132 "fmov d22, x4\n"
133 "fmov v22.D[1], x5\n"
134 "fmov d23, x6\n"
135 "fmov v23.D[1], x7\n"
136 "and %[failed].16B, %[failed].16B, v20.16B\n"
137 "and %[failed].16B, %[failed].16B, v21.16B\n"
138 "and %[failed].16B, %[failed].16B, v22.16B\n"
139 "and %[failed].16B, %[failed].16B, v23.16B\n"
140 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
141 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
142 : "+Q" (*addr), [failed] "=&w" (failed)
143 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
144 [dst] "r" (obj_table), [loc] "r" (addr)
145 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
146 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16",
147 "x17", "x18", "x19", "x20", "x21", "x22", "x23", "v16", "v17",
148 "v18", "v19", "v20", "v21", "v22", "v23"
149 );
150 break;
151 }
152 case 16:
153 {
154 asm volatile (
155 ".cpu generic+lse\n"
156 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
157 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
158 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
159 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
160 "casp x8, x9, %[wdata64], %[wdata128], [%[loc]]\n"
161 "casp x10, x11, %[wdata64], %[wdata128], [%[loc]]\n"
162 "casp x12, x13, %[wdata64], %[wdata128], [%[loc]]\n"
163 "casp x14, x15, %[wdata64], %[wdata128], [%[loc]]\n"
164 "fmov d16, x0\n"
165 "fmov v16.D[1], x1\n"
166 "fmov d17, x2\n"
167 "fmov v17.D[1], x3\n"
168 "fmov d18, x4\n"
169 "fmov v18.D[1], x5\n"
170 "fmov d19, x6\n"
171 "fmov v19.D[1], x7\n"
172 "and %[failed].16B, %[failed].16B, v16.16B\n"
173 "and %[failed].16B, %[failed].16B, v17.16B\n"
174 "and %[failed].16B, %[failed].16B, v18.16B\n"
175 "and %[failed].16B, %[failed].16B, v19.16B\n"
176 "fmov d20, x8\n"
177 "fmov v20.D[1], x9\n"
178 "fmov d21, x10\n"
179 "fmov v21.D[1], x11\n"
180 "fmov d22, x12\n"
181 "fmov v22.D[1], x13\n"
182 "fmov d23, x14\n"
183 "fmov v23.D[1], x15\n"
184 "and %[failed].16B, %[failed].16B, v20.16B\n"
185 "and %[failed].16B, %[failed].16B, v21.16B\n"
186 "and %[failed].16B, %[failed].16B, v22.16B\n"
187 "and %[failed].16B, %[failed].16B, v23.16B\n"
188 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
189 "st1 { v20.2d, v21.2d, v22.2d, v23.2d}, [%[dst]], 64\n"
190 : "+Q" (*addr), [failed] "=&w" (failed)
191 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
192 [dst] "r" (obj_table), [loc] "r" (addr)
193 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
194 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "v16",
195 "v17", "v18", "v19", "v20", "v21", "v22", "v23"
196 );
197 break;
198 }
199 case 8:
200 {
201 asm volatile (
202 ".cpu generic+lse\n"
203 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
204 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
205 "casp x4, x5, %[wdata64], %[wdata128], [%[loc]]\n"
206 "casp x6, x7, %[wdata64], %[wdata128], [%[loc]]\n"
207 "fmov d16, x0\n"
208 "fmov v16.D[1], x1\n"
209 "fmov d17, x2\n"
210 "fmov v17.D[1], x3\n"
211 "fmov d18, x4\n"
212 "fmov v18.D[1], x5\n"
213 "fmov d19, x6\n"
214 "fmov v19.D[1], x7\n"
215 "and %[failed].16B, %[failed].16B, v16.16B\n"
216 "and %[failed].16B, %[failed].16B, v17.16B\n"
217 "and %[failed].16B, %[failed].16B, v18.16B\n"
218 "and %[failed].16B, %[failed].16B, v19.16B\n"
219 "st1 { v16.2d, v17.2d, v18.2d, v19.2d}, [%[dst]], 64\n"
220 : "+Q" (*addr), [failed] "=&w" (failed)
221 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
222 [dst] "r" (obj_table), [loc] "r" (addr)
223 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
224 "v16", "v17", "v18", "v19"
225 );
226 break;
227 }
228 case 4:
229 {
230 asm volatile (
231 ".cpu generic+lse\n"
232 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
233 "casp x2, x3, %[wdata64], %[wdata128], [%[loc]]\n"
234 "fmov d16, x0\n"
235 "fmov v16.D[1], x1\n"
236 "fmov d17, x2\n"
237 "fmov v17.D[1], x3\n"
238 "and %[failed].16B, %[failed].16B, v16.16B\n"
239 "and %[failed].16B, %[failed].16B, v17.16B\n"
240 "st1 { v16.2d, v17.2d}, [%[dst]], 32\n"
241 : "+Q" (*addr), [failed] "=&w" (failed)
242 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
243 [dst] "r" (obj_table), [loc] "r" (addr)
244 : "memory", "x0", "x1", "x2", "x3", "v16", "v17"
245 );
246 break;
247 }
248 case 2:
249 {
250 asm volatile (
251 ".cpu generic+lse\n"
252 "casp x0, x1, %[wdata64], %[wdata128], [%[loc]]\n"
253 "fmov d16, x0\n"
254 "fmov v16.D[1], x1\n"
255 "and %[failed].16B, %[failed].16B, v16.16B\n"
256 "st1 { v16.2d}, [%[dst]], 16\n"
257 : "+Q" (*addr), [failed] "=&w" (failed)
258 : [wdata64] "r" (wdata64), [wdata128] "r" (wdata128),
259 [dst] "r" (obj_table), [loc] "r" (addr)
260 : "memory", "x0", "x1", "v16"
261 );
262 break;
263 }
264 case 1:
265 return npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
266 }
267
268 if (unlikely(!(vgetq_lane_u64(failed, 0) & vgetq_lane_u64(failed, 1))))
269 return npa_lf_aura_op_search_alloc(wdata, addr, (void **)
270 ((char *)obj_table - (sizeof(uint64_t) * n)), n);
271
272 return 0;
273}
274
275static __rte_noinline void
276otx2_npa_clear_alloc(struct rte_mempool *mp, void **obj_table, unsigned int n)
277{
278 unsigned int i;
279
280 for (i = 0; i < n; i++) {
281 if (obj_table[i] != NULL) {
282 otx2_npa_enq(mp, &obj_table[i], 1);
283 obj_table[i] = NULL;
284 }
285 }
286}
287
288static __rte_noinline int __rte_hot
289otx2_npa_deq_arm64(struct rte_mempool *mp, void **obj_table, unsigned int n)
290{
291 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
292 void **obj_table_bak = obj_table;
293 const unsigned int nfree = n;
294 unsigned int parts;
295
296 int64_t * const addr = (int64_t * const)
297 (npa_lf_aura_handle_to_base(mp->pool_id) +
298 NPA_LF_AURA_OP_ALLOCX(0));
299 while (n) {
300 parts = n > 31 ? 32 : rte_align32prevpow2(n);
301 n -= parts;
302 if (unlikely(npa_lf_aura_op_alloc_bulk(wdata, addr,
303 parts, obj_table))) {
304 otx2_npa_clear_alloc(mp, obj_table_bak, nfree - n);
305 return -ENOENT;
306 }
307 obj_table += parts;
308 }
309
310 return 0;
311}
312
313#else
314
315static inline int __rte_hot
316otx2_npa_deq(struct rte_mempool *mp, void **obj_table, unsigned int n)
317{
318 const int64_t wdata = npa_lf_aura_handle_to_aura(mp->pool_id);
319 unsigned int index;
320 uint64_t obj;
321
322 int64_t * const addr = (int64_t *)
323 (npa_lf_aura_handle_to_base(mp->pool_id) +
324 NPA_LF_AURA_OP_ALLOCX(0));
325 for (index = 0; index < n; index++, obj_table++) {
326 obj = npa_lf_aura_op_alloc_one(wdata, addr, obj_table, 0);
327 if (obj == 0) {
328 for (; index > 0; index--) {
329 obj_table--;
330 otx2_npa_enq(mp, obj_table, 1);
331 }
332 return -ENOENT;
333 }
334 *obj_table = (void *)obj;
335 }
336
337 return 0;
338}
339
340#endif
341
342static unsigned int
343otx2_npa_get_count(const struct rte_mempool *mp)
344{
345 return (unsigned int)npa_lf_aura_op_available(mp->pool_id);
346}
347
348static int
349npa_lf_aura_pool_init(struct otx2_mbox *mbox, uint32_t aura_id,
350 struct npa_aura_s *aura, struct npa_pool_s *pool)
351{
352 struct npa_aq_enq_req *aura_init_req, *pool_init_req;
353 struct npa_aq_enq_rsp *aura_init_rsp, *pool_init_rsp;
354 struct otx2_mbox_dev *mdev = &mbox->dev[0];
355 struct otx2_idev_cfg *idev;
356 int rc, off;
357
358 idev = otx2_intra_dev_get_cfg();
359 if (idev == NULL)
360 return -ENOMEM;
361
362 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
363
364 aura_init_req->aura_id = aura_id;
365 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
366 aura_init_req->op = NPA_AQ_INSTOP_INIT;
367 otx2_mbox_memcpy(&aura_init_req->aura, aura, sizeof(*aura));
368
369 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
370
371 pool_init_req->aura_id = aura_id;
372 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
373 pool_init_req->op = NPA_AQ_INSTOP_INIT;
374 otx2_mbox_memcpy(&pool_init_req->pool, pool, sizeof(*pool));
375
376 otx2_mbox_msg_send(mbox, 0);
377 rc = otx2_mbox_wait_for_rsp(mbox, 0);
378 if (rc < 0)
379 return rc;
380
381 off = mbox->rx_start +
382 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
383 aura_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
384 off = mbox->rx_start + aura_init_rsp->hdr.next_msgoff;
385 pool_init_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
386
387 if (rc == 2 && aura_init_rsp->hdr.rc == 0 && pool_init_rsp->hdr.rc == 0)
388 return 0;
389 else
390 return NPA_LF_ERR_AURA_POOL_INIT;
391
392 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
393 return 0;
394
395 aura_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
396 aura_init_req->aura_id = aura_id;
397 aura_init_req->ctype = NPA_AQ_CTYPE_AURA;
398 aura_init_req->op = NPA_AQ_INSTOP_LOCK;
399
400 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
401 if (!pool_init_req) {
402
403
404
405 otx2_mbox_msg_send(mbox, 0);
406 rc = otx2_mbox_wait_for_rsp(mbox, 0);
407 if (rc < 0) {
408 otx2_err("Failed to LOCK AURA context");
409 return -ENOMEM;
410 }
411
412 pool_init_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
413 if (!pool_init_req) {
414 otx2_err("Failed to LOCK POOL context");
415 return -ENOMEM;
416 }
417 }
418 pool_init_req->aura_id = aura_id;
419 pool_init_req->ctype = NPA_AQ_CTYPE_POOL;
420 pool_init_req->op = NPA_AQ_INSTOP_LOCK;
421
422 rc = otx2_mbox_process(mbox);
423 if (rc < 0) {
424 otx2_err("Failed to lock POOL ctx to NDC");
425 return -ENOMEM;
426 }
427
428 return 0;
429}
430
431static int
432npa_lf_aura_pool_fini(struct otx2_mbox *mbox,
433 uint32_t aura_id,
434 uint64_t aura_handle)
435{
436 struct npa_aq_enq_req *aura_req, *pool_req;
437 struct npa_aq_enq_rsp *aura_rsp, *pool_rsp;
438 struct otx2_mbox_dev *mdev = &mbox->dev[0];
439 struct ndc_sync_op *ndc_req;
440 struct otx2_idev_cfg *idev;
441 int rc, off;
442
443 idev = otx2_intra_dev_get_cfg();
444 if (idev == NULL)
445 return -EINVAL;
446
447
448 rte_delay_us(10);
449 npa_lf_aura_op_alloc(aura_handle, 0);
450
451 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
452 pool_req->aura_id = aura_id;
453 pool_req->ctype = NPA_AQ_CTYPE_POOL;
454 pool_req->op = NPA_AQ_INSTOP_WRITE;
455 pool_req->pool.ena = 0;
456 pool_req->pool_mask.ena = ~pool_req->pool_mask.ena;
457
458 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
459 aura_req->aura_id = aura_id;
460 aura_req->ctype = NPA_AQ_CTYPE_AURA;
461 aura_req->op = NPA_AQ_INSTOP_WRITE;
462 aura_req->aura.ena = 0;
463 aura_req->aura_mask.ena = ~aura_req->aura_mask.ena;
464
465 otx2_mbox_msg_send(mbox, 0);
466 rc = otx2_mbox_wait_for_rsp(mbox, 0);
467 if (rc < 0)
468 return rc;
469
470 off = mbox->rx_start +
471 RTE_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
472 pool_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
473
474 off = mbox->rx_start + pool_rsp->hdr.next_msgoff;
475 aura_rsp = (struct npa_aq_enq_rsp *)((uintptr_t)mdev->mbase + off);
476
477 if (rc != 2 || aura_rsp->hdr.rc != 0 || pool_rsp->hdr.rc != 0)
478 return NPA_LF_ERR_AURA_POOL_FINI;
479
480
481 ndc_req = otx2_mbox_alloc_msg_ndc_sync_op(mbox);
482 ndc_req->npa_lf_sync = 1;
483
484 rc = otx2_mbox_process(mbox);
485 if (rc) {
486 otx2_err("Error on NDC-NPA LF sync, rc %d", rc);
487 return NPA_LF_ERR_AURA_POOL_FINI;
488 }
489
490 if (!(idev->npa_lock_mask & BIT_ULL(aura_id)))
491 return 0;
492
493 aura_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
494 aura_req->aura_id = aura_id;
495 aura_req->ctype = NPA_AQ_CTYPE_AURA;
496 aura_req->op = NPA_AQ_INSTOP_UNLOCK;
497
498 rc = otx2_mbox_process(mbox);
499 if (rc < 0) {
500 otx2_err("Failed to unlock AURA ctx to NDC");
501 return -EINVAL;
502 }
503
504 pool_req = otx2_mbox_alloc_msg_npa_aq_enq(mbox);
505 pool_req->aura_id = aura_id;
506 pool_req->ctype = NPA_AQ_CTYPE_POOL;
507 pool_req->op = NPA_AQ_INSTOP_UNLOCK;
508
509 rc = otx2_mbox_process(mbox);
510 if (rc < 0) {
511 otx2_err("Failed to unlock POOL ctx to NDC");
512 return -EINVAL;
513 }
514
515 return 0;
516}
517
518static inline char*
519npa_lf_stack_memzone_name(struct otx2_npa_lf *lf, int pool_id, char *name)
520{
521 snprintf(name, RTE_MEMZONE_NAMESIZE, "otx2_npa_stack_%x_%d",
522 lf->pf_func, pool_id);
523
524 return name;
525}
526
527static inline const struct rte_memzone *
528npa_lf_stack_dma_alloc(struct otx2_npa_lf *lf, char *name,
529 int pool_id, size_t size)
530{
531 return rte_memzone_reserve_aligned(
532 npa_lf_stack_memzone_name(lf, pool_id, name), size, 0,
533 RTE_MEMZONE_IOVA_CONTIG, OTX2_ALIGN);
534}
535
536static inline int
537npa_lf_stack_dma_free(struct otx2_npa_lf *lf, char *name, int pool_id)
538{
539 const struct rte_memzone *mz;
540
541 mz = rte_memzone_lookup(npa_lf_stack_memzone_name(lf, pool_id, name));
542 if (mz == NULL)
543 return -EINVAL;
544
545 return rte_memzone_free(mz);
546}
547
548static inline int
549bitmap_ctzll(uint64_t slab)
550{
551 if (slab == 0)
552 return 0;
553
554 return __builtin_ctzll(slab);
555}
556
557static int
558npa_lf_aura_pool_pair_alloc(struct otx2_npa_lf *lf, const uint32_t block_size,
559 const uint32_t block_count, struct npa_aura_s *aura,
560 struct npa_pool_s *pool, uint64_t *aura_handle)
561{
562 int rc, aura_id, pool_id, stack_size, alloc_size;
563 char name[RTE_MEMZONE_NAMESIZE];
564 const struct rte_memzone *mz;
565 uint64_t slab;
566 uint32_t pos;
567
568
569 if (!lf || !block_size || !block_count ||
570 !pool || !aura || !aura_handle)
571 return NPA_LF_ERR_PARAM;
572
573
574 if (block_size % OTX2_ALIGN || block_size < 128 ||
575 block_size > 128 * 1024)
576 return NPA_LF_ERR_INVALID_BLOCK_SZ;
577
578 pos = slab = 0;
579
580 __rte_bitmap_scan_init(lf->npa_bmp);
581
582 rc = rte_bitmap_scan(lf->npa_bmp, &pos, &slab);
583
584 if (rc == 0) {
585 otx2_err("Mempools exhausted, 'max_pools' devargs to increase");
586 return -ERANGE;
587 }
588
589
590 aura_id = pos + bitmap_ctzll(slab);
591
592 rte_bitmap_clear(lf->npa_bmp, aura_id);
593
594
595 pool_id = aura_id;
596 rc = (aura_id < 0 || pool_id >= (int)lf->nr_pools || aura_id >=
597 (int)BIT_ULL(6 + lf->aura_sz)) ? NPA_LF_ERR_AURA_ID_ALLOC : 0;
598 if (rc)
599 goto exit;
600
601
602 stack_size = (block_count + lf->stack_pg_ptrs - 1) / lf->stack_pg_ptrs;
603 alloc_size = stack_size * lf->stack_pg_bytes;
604
605 mz = npa_lf_stack_dma_alloc(lf, name, pool_id, alloc_size);
606 if (mz == NULL) {
607 rc = -ENOMEM;
608 goto aura_res_put;
609 }
610
611
612 aura->pool_addr = pool_id;
613 aura->ena = 1;
614 aura->shift = __builtin_clz(block_count) - 8;
615 aura->limit = block_count;
616 aura->pool_caching = 1;
617 aura->err_int_ena = BIT(NPA_AURA_ERR_INT_AURA_ADD_OVER);
618 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_ADD_UNDER);
619 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_AURA_FREE_UNDER);
620 aura->err_int_ena |= BIT(NPA_AURA_ERR_INT_POOL_DIS);
621
622 aura->err_qint_idx = aura_id % lf->qints;
623
624
625 pool->stack_base = mz->iova;
626 pool->ena = 1;
627 pool->buf_size = block_size / OTX2_ALIGN;
628 pool->stack_max_pages = stack_size;
629 pool->shift = __builtin_clz(block_count) - 8;
630 pool->ptr_start = 0;
631 pool->ptr_end = ~0;
632 pool->stack_caching = 1;
633 pool->err_int_ena = BIT(NPA_POOL_ERR_INT_OVFLS);
634 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_RANGE);
635 pool->err_int_ena |= BIT(NPA_POOL_ERR_INT_PERR);
636
637
638 pool->err_qint_idx = pool_id % lf->qints;
639
640
641 rc = npa_lf_aura_pool_init(lf->mbox, aura_id, aura, pool);
642 if (rc)
643 goto stack_mem_free;
644
645 *aura_handle = npa_lf_aura_handle_gen(aura_id, lf->base);
646
647
648 npa_lf_aura_op_cnt_set(*aura_handle, 0, block_count);
649
650 npa_lf_aura_op_cnt_get(*aura_handle);
651
652 return 0;
653
654stack_mem_free:
655 rte_memzone_free(mz);
656aura_res_put:
657 rte_bitmap_set(lf->npa_bmp, aura_id);
658exit:
659 return rc;
660}
661
662static int
663npa_lf_aura_pool_pair_free(struct otx2_npa_lf *lf, uint64_t aura_handle)
664{
665 char name[RTE_MEMZONE_NAMESIZE];
666 int aura_id, pool_id, rc;
667
668 if (!lf || !aura_handle)
669 return NPA_LF_ERR_PARAM;
670
671 aura_id = pool_id = npa_lf_aura_handle_to_aura(aura_handle);
672 rc = npa_lf_aura_pool_fini(lf->mbox, aura_id, aura_handle);
673 rc |= npa_lf_stack_dma_free(lf, name, pool_id);
674
675 rte_bitmap_set(lf->npa_bmp, aura_id);
676
677 return rc;
678}
679
680static int
681npa_lf_aura_range_update_check(uint64_t aura_handle)
682{
683 uint64_t aura_id = npa_lf_aura_handle_to_aura(aura_handle);
684 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
685 struct npa_aura_lim *lim = lf->aura_lim;
686 __otx2_io struct npa_pool_s *pool;
687 struct npa_aq_enq_req *req;
688 struct npa_aq_enq_rsp *rsp;
689 int rc;
690
691 req = otx2_mbox_alloc_msg_npa_aq_enq(lf->mbox);
692
693 req->aura_id = aura_id;
694 req->ctype = NPA_AQ_CTYPE_POOL;
695 req->op = NPA_AQ_INSTOP_READ;
696
697 rc = otx2_mbox_process_msg(lf->mbox, (void *)&rsp);
698 if (rc) {
699 otx2_err("Failed to get pool(0x%"PRIx64") context", aura_id);
700 return rc;
701 }
702
703 pool = &rsp->pool;
704
705 if (lim[aura_id].ptr_start != pool->ptr_start ||
706 lim[aura_id].ptr_end != pool->ptr_end) {
707 otx2_err("Range update failed on pool(0x%"PRIx64")", aura_id);
708 return -ERANGE;
709 }
710
711 return 0;
712}
713
714static int
715otx2_npa_alloc(struct rte_mempool *mp)
716{
717 uint32_t block_size, block_count;
718 uint64_t aura_handle = 0;
719 struct otx2_npa_lf *lf;
720 struct npa_aura_s aura;
721 struct npa_pool_s pool;
722 size_t padding;
723 int rc;
724
725 lf = otx2_npa_lf_obj_get();
726 if (lf == NULL) {
727 rc = -EINVAL;
728 goto error;
729 }
730
731 block_size = mp->elt_size + mp->header_size + mp->trailer_size;
732
733
734
735
736
737
738
739 padding = ((block_size / RTE_CACHE_LINE_SIZE) % 2) ? 0 :
740 RTE_CACHE_LINE_SIZE;
741 mp->trailer_size += padding;
742 block_size += padding;
743
744 block_count = mp->size;
745
746 if (block_size % OTX2_ALIGN != 0) {
747 otx2_err("Block size should be multiple of 128B");
748 rc = -ERANGE;
749 goto error;
750 }
751
752 memset(&aura, 0, sizeof(struct npa_aura_s));
753 memset(&pool, 0, sizeof(struct npa_pool_s));
754 pool.nat_align = 1;
755 pool.buf_offset = 1;
756
757 if ((uint32_t)pool.buf_offset * OTX2_ALIGN != mp->header_size) {
758 otx2_err("Unsupported mp->header_size=%d", mp->header_size);
759 rc = -EINVAL;
760 goto error;
761 }
762
763
764 if (mp->pool_config != NULL)
765 memcpy(&aura, mp->pool_config, sizeof(struct npa_aura_s));
766
767 rc = npa_lf_aura_pool_pair_alloc(lf, block_size, block_count,
768 &aura, &pool, &aura_handle);
769 if (rc) {
770 otx2_err("Failed to alloc pool or aura rc=%d", rc);
771 goto error;
772 }
773
774
775 mp->pool_id = aura_handle;
776 otx2_npa_dbg("lf=%p block_sz=%d block_count=%d aura_handle=0x%"PRIx64,
777 lf, block_size, block_count, aura_handle);
778
779
780 otx2_npa_lf_obj_ref();
781 return 0;
782error:
783 return rc;
784}
785
786static void
787otx2_npa_free(struct rte_mempool *mp)
788{
789 struct otx2_npa_lf *lf = otx2_npa_lf_obj_get();
790 int rc = 0;
791
792 otx2_npa_dbg("lf=%p aura_handle=0x%"PRIx64, lf, mp->pool_id);
793 if (lf != NULL)
794 rc = npa_lf_aura_pool_pair_free(lf, mp->pool_id);
795
796 if (rc)
797 otx2_err("Failed to free pool or aura rc=%d", rc);
798
799
800 otx2_npa_lf_fini();
801}
802
803static ssize_t
804otx2_npa_calc_mem_size(const struct rte_mempool *mp, uint32_t obj_num,
805 uint32_t pg_shift, size_t *min_chunk_size, size_t *align)
806{
807 size_t total_elt_sz;
808
809
810
811
812 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
813 return rte_mempool_op_calc_mem_size_helper(mp, obj_num, pg_shift,
814 total_elt_sz, min_chunk_size,
815 align);
816}
817
818static uint8_t
819otx2_npa_l1d_way_set_get(uint64_t iova)
820{
821 return (iova >> rte_log2_u32(RTE_CACHE_LINE_SIZE)) & 0x7;
822}
823
824static int
825otx2_npa_populate(struct rte_mempool *mp, unsigned int max_objs, void *vaddr,
826 rte_iova_t iova, size_t len,
827 rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
828{
829#define OTX2_L1D_NB_SETS 8
830 uint64_t distribution[OTX2_L1D_NB_SETS];
831 rte_iova_t start_iova;
832 size_t total_elt_sz;
833 uint8_t set;
834 size_t off;
835 int i;
836
837 if (iova == RTE_BAD_IOVA)
838 return -EINVAL;
839
840 total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
841
842
843 off = total_elt_sz - ((((uintptr_t)vaddr - 1) % total_elt_sz) + 1);
844
845 if (len < off)
846 return -EINVAL;
847
848
849 vaddr = (char *)vaddr + off;
850 iova += off;
851 len -= off;
852
853 memset(distribution, 0, sizeof(uint64_t) * OTX2_L1D_NB_SETS);
854 start_iova = iova;
855 while (start_iova < iova + len) {
856 set = otx2_npa_l1d_way_set_get(start_iova + mp->header_size);
857 distribution[set]++;
858 start_iova += total_elt_sz;
859 }
860
861 otx2_npa_dbg("iova %"PRIx64", aligned iova %"PRIx64"", iova - off,
862 iova);
863 otx2_npa_dbg("length %"PRIu64", aligned length %"PRIu64"",
864 (uint64_t)(len + off), (uint64_t)len);
865 otx2_npa_dbg("element size %"PRIu64"", (uint64_t)total_elt_sz);
866 otx2_npa_dbg("requested objects %"PRIu64", possible objects %"PRIu64"",
867 (uint64_t)max_objs, (uint64_t)(len / total_elt_sz));
868 otx2_npa_dbg("L1D set distribution :");
869 for (i = 0; i < OTX2_L1D_NB_SETS; i++)
870 otx2_npa_dbg("set[%d] : objects : %"PRIu64"", i,
871 distribution[i]);
872
873 npa_lf_aura_op_range_set(mp->pool_id, iova, iova + len);
874
875 if (npa_lf_aura_range_update_check(mp->pool_id) < 0)
876 return -EBUSY;
877
878 return rte_mempool_op_populate_helper(mp,
879 RTE_MEMPOOL_POPULATE_F_ALIGN_OBJ,
880 max_objs, vaddr, iova, len,
881 obj_cb, obj_cb_arg);
882}
883
884static struct rte_mempool_ops otx2_npa_ops = {
885 .name = "octeontx2_npa",
886 .alloc = otx2_npa_alloc,
887 .free = otx2_npa_free,
888 .enqueue = otx2_npa_enq,
889 .get_count = otx2_npa_get_count,
890 .calc_mem_size = otx2_npa_calc_mem_size,
891 .populate = otx2_npa_populate,
892#if defined(RTE_ARCH_ARM64)
893 .dequeue = otx2_npa_deq_arm64,
894#else
895 .dequeue = otx2_npa_deq,
896#endif
897};
898
899MEMPOOL_REGISTER_OPS(otx2_npa_ops);
900