1
2
3
4
5#ifndef _ROC_NPA_H_
6#define _ROC_NPA_H_
7
8#define ROC_AURA_ID_MASK (BIT_ULL(16) - 1)
9#define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
10
11#define ROC_NPA_MAX_BLOCK_SZ (128 * 1024)
12#define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
13#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS 15
14
15
16
17
18#define ROC_NPA_AVG_CONT 0xE0
19
20
21
22
23#define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30
24
25
26
27
28
29
30
31
32static inline uint64_t
33roc_npa_aura_handle_gen(uint32_t aura_id, uintptr_t addr)
34{
35 uint64_t val;
36
37 val = aura_id & ROC_AURA_ID_MASK;
38 return (uint64_t)addr | val;
39}
40
41static inline uint64_t
42roc_npa_aura_handle_to_aura(uint64_t aura_handle)
43{
44 return aura_handle & ROC_AURA_ID_MASK;
45}
46
47static inline uintptr_t
48roc_npa_aura_handle_to_base(uint64_t aura_handle)
49{
50 return (uintptr_t)(aura_handle & ~ROC_AURA_ID_MASK);
51}
52
53static inline uint64_t
54roc_npa_aura_op_alloc(uint64_t aura_handle, const int drop)
55{
56 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
57 int64_t *addr;
58
59 if (drop)
60 wdata |= BIT_ULL(63);
61
62 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
63 NPA_LF_AURA_OP_ALLOCX(0));
64 return roc_atomic64_add_nosync(wdata, addr);
65}
66
67static inline void
68roc_npa_aura_op_free(uint64_t aura_handle, const int fabs, uint64_t iova)
69{
70 uint64_t reg = roc_npa_aura_handle_to_aura(aura_handle);
71 const uint64_t addr =
72 roc_npa_aura_handle_to_base(aura_handle) + NPA_LF_AURA_OP_FREE0;
73 if (fabs)
74 reg |= BIT_ULL(63);
75
76 roc_store_pair(iova, reg, addr);
77}
78
79static inline uint64_t
80roc_npa_aura_op_cnt_get(uint64_t aura_handle)
81{
82 uint64_t wdata;
83 int64_t *addr;
84 uint64_t reg;
85
86 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
87 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
88 NPA_LF_AURA_OP_CNT);
89 reg = roc_atomic64_add_nosync(wdata, addr);
90
91 if (reg & BIT_ULL(42) )
92 return 0;
93 else
94 return reg & 0xFFFFFFFFF;
95}
96
97static inline void
98roc_npa_aura_op_cnt_set(uint64_t aura_handle, const int sign, uint64_t count)
99{
100 uint64_t reg = count & (BIT_ULL(36) - 1);
101
102 if (sign)
103 reg |= BIT_ULL(43);
104
105 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
106
107 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
108 NPA_LF_AURA_OP_CNT);
109}
110
111static inline uint64_t
112roc_npa_aura_op_limit_get(uint64_t aura_handle)
113{
114 uint64_t wdata;
115 int64_t *addr;
116 uint64_t reg;
117
118 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
119 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
120 NPA_LF_AURA_OP_LIMIT);
121 reg = roc_atomic64_add_nosync(wdata, addr);
122
123 if (reg & BIT_ULL(42) )
124 return 0;
125 else
126 return reg & ROC_AURA_OP_LIMIT_MASK;
127}
128
129static inline void
130roc_npa_aura_op_limit_set(uint64_t aura_handle, uint64_t limit)
131{
132 uint64_t reg = limit & ROC_AURA_OP_LIMIT_MASK;
133
134 reg |= (roc_npa_aura_handle_to_aura(aura_handle) << 44);
135
136 plt_write64(reg, roc_npa_aura_handle_to_base(aura_handle) +
137 NPA_LF_AURA_OP_LIMIT);
138}
139
140static inline uint64_t
141roc_npa_aura_op_available(uint64_t aura_handle)
142{
143 uint64_t wdata;
144 uint64_t reg;
145 int64_t *addr;
146
147 wdata = roc_npa_aura_handle_to_aura(aura_handle) << 44;
148 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
149 NPA_LF_POOL_OP_AVAILABLE);
150 reg = roc_atomic64_add_nosync(wdata, addr);
151
152 if (reg & BIT_ULL(42) )
153 return 0;
154 else
155 return reg & 0xFFFFFFFFF;
156}
157
158static inline uint64_t
159roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
160{
161 union {
162 uint64_t u;
163 struct npa_aura_op_wdata_s s;
164 } op_wdata;
165 int64_t *addr;
166 uint64_t reg;
167
168 op_wdata.u = 0;
169 op_wdata.s.aura = roc_npa_aura_handle_to_aura(aura_handle);
170 if (drop)
171 op_wdata.s.drop |= BIT_ULL(63);
172
173 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
174 NPA_LF_POOL_OP_PC);
175
176 reg = roc_atomic64_add_nosync(op_wdata.u, addr);
177
178
179
180
181
182
183
184
185
186 if (reg & BIT_ULL(48) )
187 return 0;
188 else
189 return reg & 0xFFFFFFFFFFFF;
190}
191
192static inline int
193roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
194 unsigned int num, const int dis_wait,
195 const int drop)
196{
197 unsigned int i;
198 int64_t *addr;
199 uint64_t res;
200 union {
201 uint64_t u;
202 struct npa_batch_alloc_compare_s compare_s;
203 } cmp;
204
205 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
206 return -1;
207
208
209 for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
210 buf[i] = 0;
211
212 addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
213 NPA_LF_AURA_BATCH_ALLOC);
214 cmp.u = 0;
215 cmp.compare_s.aura = roc_npa_aura_handle_to_aura(aura_handle);
216 cmp.compare_s.drop = drop;
217 cmp.compare_s.stype = ALLOC_STYPE_STF;
218 cmp.compare_s.dis_wait = dis_wait;
219 cmp.compare_s.count = num;
220
221 res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
222 if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
223 return -1;
224
225 return 0;
226}
227
228static inline void
229roc_npa_batch_alloc_wait(uint64_t *cache_line)
230{
231
232
233
234 while (((__atomic_load_n(cache_line, __ATOMIC_RELAXED) >> 5) & 0x3) ==
235 ALLOC_CCODE_INVAL)
236 ;
237}
238
239static inline unsigned int
240roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num)
241{
242 unsigned int count, i;
243
244 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
245 return 0;
246
247 count = 0;
248
249 for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
250 struct npa_batch_alloc_status_s *status;
251
252 status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
253
254 roc_npa_batch_alloc_wait(&aligned_buf[i]);
255 count += status->count;
256 }
257
258 return count;
259}
260
261static inline unsigned int
262roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
263 unsigned int num)
264{
265 unsigned int count, i;
266
267 if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
268 return 0;
269
270 count = 0;
271
272 for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
273 struct npa_batch_alloc_status_s *status;
274 int line_count;
275
276 status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
277
278 roc_npa_batch_alloc_wait(&aligned_buf[i]);
279
280 line_count = status->count;
281
282
283 status->ccode = 0;
284 status->count = 0;
285
286
287
288
289
290 memmove(&buf[count], &aligned_buf[i],
291 line_count * sizeof(uint64_t));
292
293 count += line_count;
294 }
295
296 return count;
297}
298
299static inline void
300roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
301 unsigned int num, const int fabs)
302{
303 unsigned int i;
304
305 for (i = 0; i < num; i++) {
306 const uint64_t inbuf = buf[i];
307
308 roc_npa_aura_op_free(aura_handle, fabs, inbuf);
309 }
310}
311
312static inline unsigned int
313roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
314 uint64_t *aligned_buf, unsigned int num,
315 const int dis_wait, const int drop,
316 const int partial)
317{
318 unsigned int count, chunk, num_alloc;
319
320
321 if (((uint64_t)aligned_buf & (ROC_ALIGN - 1)) != 0)
322 return 0;
323
324 count = 0;
325 while (num) {
326 chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
327 ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
328 num;
329
330 if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
331 chunk, dis_wait, drop))
332 break;
333
334 num_alloc = roc_npa_aura_batch_alloc_extract(buf, aligned_buf,
335 chunk);
336
337 count += num_alloc;
338 buf += num_alloc;
339 num -= num_alloc;
340
341 if (num_alloc != chunk)
342 break;
343 }
344
345
346
347
348 if (unlikely(num != 0 && !partial)) {
349 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
350 count = 0;
351 }
352
353 return count;
354}
355
356static inline void
357roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
358 unsigned int num, const int fabs, uint64_t lmt_addr,
359 uint64_t lmt_id)
360{
361 uint64_t addr, tar_addr, free0;
362 volatile uint64_t *lmt_data;
363 unsigned int i;
364
365 if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
366 return;
367
368 lmt_data = (uint64_t *)lmt_addr;
369
370 addr = roc_npa_aura_handle_to_base(aura_handle) +
371 NPA_LF_AURA_BATCH_FREE0;
372
373
374
375
376
377
378
379
380
381 free0 = roc_npa_aura_handle_to_aura(aura_handle);
382 if (fabs)
383 free0 |= (0x1UL << 63);
384 if (num & 0x1)
385 free0 |= (0x1UL << 32);
386
387
388 tar_addr = addr | ((num >> 1) << 4);
389
390 lmt_data[0] = free0;
391 for (i = 0; i < num; i++)
392 lmt_data[i + 1] = buf[i];
393
394 roc_lmt_submit_steorl(lmt_id, tar_addr);
395 plt_io_wmb();
396}
397
398static inline void
399roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
400 unsigned int num, const int fabs, uint64_t lmt_addr,
401 uint64_t lmt_id)
402{
403 unsigned int chunk;
404
405 while (num) {
406 chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
407 ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
408 num;
409
410 roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
411 lmt_id);
412
413 buf += chunk;
414 num -= chunk;
415 }
416}
417
418static inline unsigned int
419roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
420 const int drop)
421{
422#if defined(__aarch64__)
423 uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle);
424 unsigned int i, count;
425 uint64_t addr;
426
427 if (drop)
428 wdata |= BIT_ULL(63);
429
430 addr = roc_npa_aura_handle_to_base(aura_handle) +
431 NPA_LF_AURA_OP_ALLOCX(0);
432
433 switch (num) {
434 case 30:
435 asm volatile(
436 ".cpu generic+lse\n"
437 "mov v18.d[0], %[dst]\n"
438 "mov v18.d[1], %[loc]\n"
439 "mov v19.d[0], %[wdata]\n"
440 "mov v19.d[1], x30\n"
441 "mov v20.d[0], x24\n"
442 "mov v20.d[1], x25\n"
443 "mov v21.d[0], x26\n"
444 "mov v21.d[1], x27\n"
445 "mov v22.d[0], x28\n"
446 "mov v22.d[1], x29\n"
447 "mov x28, v19.d[0]\n"
448 "mov x29, v19.d[0]\n"
449 "mov x30, v18.d[1]\n"
450 "casp x0, x1, x28, x29, [x30]\n"
451 "casp x2, x3, x28, x29, [x30]\n"
452 "casp x4, x5, x28, x29, [x30]\n"
453 "casp x6, x7, x28, x29, [x30]\n"
454 "casp x8, x9, x28, x29, [x30]\n"
455 "casp x10, x11, x28, x29, [x30]\n"
456 "casp x12, x13, x28, x29, [x30]\n"
457 "casp x14, x15, x28, x29, [x30]\n"
458 "casp x16, x17, x28, x29, [x30]\n"
459 "casp x18, x19, x28, x29, [x30]\n"
460 "casp x20, x21, x28, x29, [x30]\n"
461 "casp x22, x23, x28, x29, [x30]\n"
462 "casp x24, x25, x28, x29, [x30]\n"
463 "casp x26, x27, x28, x29, [x30]\n"
464 "casp x28, x29, x28, x29, [x30]\n"
465 "mov x30, v18.d[0]\n"
466 "stp x0, x1, [x30]\n"
467 "stp x2, x3, [x30, #16]\n"
468 "stp x4, x5, [x30, #32]\n"
469 "stp x6, x7, [x30, #48]\n"
470 "stp x8, x9, [x30, #64]\n"
471 "stp x10, x11, [x30, #80]\n"
472 "stp x12, x13, [x30, #96]\n"
473 "stp x14, x15, [x30, #112]\n"
474 "stp x16, x17, [x30, #128]\n"
475 "stp x18, x19, [x30, #144]\n"
476 "stp x20, x21, [x30, #160]\n"
477 "stp x22, x23, [x30, #176]\n"
478 "stp x24, x25, [x30, #192]\n"
479 "stp x26, x27, [x30, #208]\n"
480 "stp x28, x29, [x30, #224]\n"
481 "mov %[dst], v18.d[0]\n"
482 "mov %[loc], v18.d[1]\n"
483 "mov %[wdata], v19.d[0]\n"
484 "mov x30, v19.d[1]\n"
485 "mov x24, v20.d[0]\n"
486 "mov x25, v20.d[1]\n"
487 "mov x26, v21.d[0]\n"
488 "mov x27, v21.d[1]\n"
489 "mov x28, v22.d[0]\n"
490 "mov x29, v22.d[1]\n"
491 :
492 : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf)
493 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
494 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
495 "x15", "x16", "x17", "x18", "x19", "x20", "x21",
496 "x22", "x23", "v18", "v19", "v20", "v21", "v22");
497 break;
498 case 16:
499 asm volatile(
500 ".cpu generic+lse\n"
501 "mov x16, %[wdata]\n"
502 "mov x17, %[wdata]\n"
503 "casp x0, x1, x16, x17, [%[loc]]\n"
504 "casp x2, x3, x16, x17, [%[loc]]\n"
505 "casp x4, x5, x16, x17, [%[loc]]\n"
506 "casp x6, x7, x16, x17, [%[loc]]\n"
507 "casp x8, x9, x16, x17, [%[loc]]\n"
508 "casp x10, x11, x16, x17, [%[loc]]\n"
509 "casp x12, x13, x16, x17, [%[loc]]\n"
510 "casp x14, x15, x16, x17, [%[loc]]\n"
511 "stp x0, x1, [%[dst]]\n"
512 "stp x2, x3, [%[dst], #16]\n"
513 "stp x4, x5, [%[dst], #32]\n"
514 "stp x6, x7, [%[dst], #48]\n"
515 "stp x8, x9, [%[dst], #64]\n"
516 "stp x10, x11, [%[dst], #80]\n"
517 "stp x12, x13, [%[dst], #96]\n"
518 "stp x14, x15, [%[dst], #112]\n"
519 :
520 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
521 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
522 "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14",
523 "x15", "x16", "x17"
524 );
525 break;
526 case 8:
527 asm volatile(
528 ".cpu generic+lse\n"
529 "mov x16, %[wdata]\n"
530 "mov x17, %[wdata]\n"
531 "casp x0, x1, x16, x17, [%[loc]]\n"
532 "casp x2, x3, x16, x17, [%[loc]]\n"
533 "casp x4, x5, x16, x17, [%[loc]]\n"
534 "casp x6, x7, x16, x17, [%[loc]]\n"
535 "stp x0, x1, [%[dst]]\n"
536 "stp x2, x3, [%[dst], #16]\n"
537 "stp x4, x5, [%[dst], #32]\n"
538 "stp x6, x7, [%[dst], #48]\n"
539 :
540 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
541 : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6",
542 "x7", "x16", "x17"
543 );
544 break;
545 case 4:
546 asm volatile(
547 ".cpu generic+lse\n"
548 "mov x16, %[wdata]\n"
549 "mov x17, %[wdata]\n"
550 "casp x0, x1, x16, x17, [%[loc]]\n"
551 "casp x2, x3, x16, x17, [%[loc]]\n"
552 "stp x0, x1, [%[dst]]\n"
553 "stp x2, x3, [%[dst], #16]\n"
554 :
555 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
556 : "memory", "x0", "x1", "x2", "x3", "x16", "x17"
557 );
558 break;
559 case 2:
560 asm volatile(
561 ".cpu generic+lse\n"
562 "mov x16, %[wdata]\n"
563 "mov x17, %[wdata]\n"
564 "casp x0, x1, x16, x17, [%[loc]]\n"
565 "stp x0, x1, [%[dst]]\n"
566 :
567 : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr)
568 : "memory", "x0", "x1", "x16", "x17"
569 );
570 break;
571 case 1:
572 buf[0] = roc_npa_aura_op_alloc(aura_handle, drop);
573 return !!buf[0];
574 }
575
576
577 for (i = 0, count = 0; i < num; i++)
578 if (buf[i])
579 buf[count++] = buf[i];
580
581 return count;
582#else
583 unsigned int i, count;
584
585 for (i = 0, count = 0; i < num; i++) {
586 buf[count] = roc_npa_aura_op_alloc(aura_handle, drop);
587 if (buf[count])
588 count++;
589 }
590
591 return count;
592#endif
593}
594
595static inline unsigned int
596roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf,
597 unsigned int num, const int drop, const int partial)
598{
599 unsigned int chunk, count, num_alloc;
600
601 count = 0;
602 while (num) {
603 chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ?
604 ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS :
605 plt_align32prevpow2(num);
606
607 num_alloc =
608 roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop);
609
610 count += num_alloc;
611 buf += num_alloc;
612 num -= num_alloc;
613
614 if (unlikely(num_alloc != chunk))
615 break;
616 }
617
618
619
620
621 if (unlikely(num != 0 && !partial)) {
622 roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
623 count = 0;
624 }
625
626 return count;
627}
628
629struct roc_npa {
630 struct plt_pci_device *pci_dev;
631
632#define ROC_NPA_MEM_SZ (1 * 1024)
633 uint8_t reserved[ROC_NPA_MEM_SZ] __plt_cache_aligned;
634} __plt_cache_aligned;
635
636int __roc_api roc_npa_dev_init(struct roc_npa *roc_npa);
637int __roc_api roc_npa_dev_fini(struct roc_npa *roc_npa);
638
639
640int __roc_api roc_npa_pool_create(uint64_t *aura_handle, uint32_t block_size,
641 uint32_t block_count, struct npa_aura_s *aura,
642 struct npa_pool_s *pool);
643int __roc_api roc_npa_aura_limit_modify(uint64_t aura_handle,
644 uint16_t aura_limit);
645int __roc_api roc_npa_pool_destroy(uint64_t aura_handle);
646int __roc_api roc_npa_pool_range_update_check(uint64_t aura_handle);
647void __roc_api roc_npa_aura_op_range_set(uint64_t aura_handle,
648 uint64_t start_iova,
649 uint64_t end_iova);
650
651
652typedef int (*roc_npa_lf_init_cb_t)(struct plt_pci_device *pci_dev);
653int __roc_api roc_npa_lf_init_cb_register(roc_npa_lf_init_cb_t cb);
654
655
656int __roc_api roc_npa_ctx_dump(void);
657int __roc_api roc_npa_dump(void);
658
659
660int __roc_api roc_npa_pool_op_pc_reset(uint64_t aura_handle);
661
662#endif
663