1
2
3
4
5#include <rte_vect.h>
6
7#include "otx2_ethdev.h"
8
9#define NIX_XMIT_FC_OR_RETURN(txq, pkts) do { \
10 \
11 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
12 \
13 (txq)->fc_cache_pkts = \
14 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) << \
15 (txq)->sqes_per_sqb_log2; \
16 \
17 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
18 return 0; \
19 } \
20} while (0)
21
22
23static __rte_always_inline uint16_t
24nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
25 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
26{
27 struct otx2_eth_txq *txq = tx_queue; uint16_t i;
28 const rte_iova_t io_addr = txq->io_addr;
29 void *lmt_addr = txq->lmt_addr;
30 uint64_t lso_tun_fmt;
31
32 NIX_XMIT_FC_OR_RETURN(txq, pkts);
33
34 otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
35
36
37 if (flags & NIX_TX_OFFLOAD_TSO_F) {
38 lso_tun_fmt = txq->lso_tun_fmt;
39 for (i = 0; i < pkts; i++)
40 otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
41 }
42
43
44
45
46 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
47 rte_io_wmb();
48
49 for (i = 0; i < pkts; i++) {
50 otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
51
52 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
53 tx_pkts[i]->ol_flags, 4, flags);
54 otx2_nix_xmit_one(cmd, lmt_addr, io_addr, flags);
55 }
56
57
58 txq->fc_cache_pkts -= pkts;
59
60 return pkts;
61}
62
63static __rte_always_inline uint16_t
64nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
65 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
66{
67 struct otx2_eth_txq *txq = tx_queue; uint64_t i;
68 const rte_iova_t io_addr = txq->io_addr;
69 void *lmt_addr = txq->lmt_addr;
70 uint64_t lso_tun_fmt;
71 uint16_t segdw;
72
73 NIX_XMIT_FC_OR_RETURN(txq, pkts);
74
75 otx2_lmt_mov(cmd, &txq->cmd[0], otx2_nix_tx_ext_subs(flags));
76
77
78 if (flags & NIX_TX_OFFLOAD_TSO_F) {
79 lso_tun_fmt = txq->lso_tun_fmt;
80 for (i = 0; i < pkts; i++)
81 otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
82 }
83
84
85
86
87 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
88 rte_io_wmb();
89
90 for (i = 0; i < pkts; i++) {
91 otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
92 segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags);
93 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
94 tx_pkts[i]->ol_flags, segdw,
95 flags);
96 otx2_nix_xmit_mseg_one(cmd, lmt_addr, io_addr, segdw);
97 }
98
99
100 txq->fc_cache_pkts -= pkts;
101
102 return pkts;
103}
104
105#if defined(RTE_ARCH_ARM64)
106
107#define NIX_DESCS_PER_LOOP 4
108static __rte_always_inline uint16_t
109nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
110 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
111{
112 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
113 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
114 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
115 uint64x2_t senddesc01_w0, senddesc23_w0;
116 uint64x2_t senddesc01_w1, senddesc23_w1;
117 uint64x2_t sgdesc01_w0, sgdesc23_w0;
118 uint64x2_t sgdesc01_w1, sgdesc23_w1;
119 struct otx2_eth_txq *txq = tx_queue;
120 uint64_t *lmt_addr = txq->lmt_addr;
121 rte_iova_t io_addr = txq->io_addr;
122 uint64x2_t ltypes01, ltypes23;
123 uint64x2_t xtmp128, ytmp128;
124 uint64x2_t xmask01, xmask23;
125 uint64x2_t cmd00, cmd01;
126 uint64x2_t cmd10, cmd11;
127 uint64x2_t cmd20, cmd21;
128 uint64x2_t cmd30, cmd31;
129 uint64_t lmt_status, i;
130 uint16_t pkts_left;
131
132 NIX_XMIT_FC_OR_RETURN(txq, pkts);
133
134 pkts_left = pkts & (NIX_DESCS_PER_LOOP - 1);
135 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
136
137
138 txq->fc_cache_pkts -= pkts;
139
140
141
142
143 if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
144 rte_io_wmb();
145
146 senddesc01_w0 = vld1q_dup_u64(&txq->cmd[0]);
147 senddesc23_w0 = senddesc01_w0;
148 senddesc01_w1 = vdupq_n_u64(0);
149 senddesc23_w1 = senddesc01_w1;
150 sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
151 sgdesc23_w0 = sgdesc01_w0;
152
153 for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
154
155 senddesc01_w0 = vbicq_u64(senddesc01_w0,
156 vdupq_n_u64(0xFFFFFFFF));
157 sgdesc01_w0 = vbicq_u64(sgdesc01_w0,
158 vdupq_n_u64(0xFFFFFFFF));
159
160 senddesc23_w0 = senddesc01_w0;
161 sgdesc23_w0 = sgdesc01_w0;
162
163
164 mbuf0 = (uint64_t *)tx_pkts[0];
165 mbuf1 = (uint64_t *)tx_pkts[1];
166 mbuf2 = (uint64_t *)tx_pkts[2];
167 mbuf3 = (uint64_t *)tx_pkts[3];
168
169 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
170 offsetof(struct rte_mbuf, buf_iova));
171 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
172 offsetof(struct rte_mbuf, buf_iova));
173 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
174 offsetof(struct rte_mbuf, buf_iova));
175 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
176 offsetof(struct rte_mbuf, buf_iova));
177
178
179
180
181
182
183
184 dataoff_iova0 = vld1q_u64(mbuf0);
185 len_olflags0 = vld1q_u64(mbuf0 + 2);
186 dataoff_iova1 = vld1q_u64(mbuf1);
187 len_olflags1 = vld1q_u64(mbuf1 + 2);
188 dataoff_iova2 = vld1q_u64(mbuf2);
189 len_olflags2 = vld1q_u64(mbuf2 + 2);
190 dataoff_iova3 = vld1q_u64(mbuf3);
191 len_olflags3 = vld1q_u64(mbuf3 + 2);
192
193 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
194 struct rte_mbuf *mbuf;
195
196 xmask01 = vdupq_n_u64(0);
197 xmask23 = xmask01;
198
199 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf0 -
200 offsetof(struct rte_mbuf, buf_iova));
201
202 if (otx2_nix_prefree_seg(mbuf))
203 vsetq_lane_u64(0x80000, xmask01, 0);
204 else
205 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool,
206 (void **)&mbuf,
207 1, 0);
208
209 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf1 -
210 offsetof(struct rte_mbuf, buf_iova));
211 if (otx2_nix_prefree_seg(mbuf))
212 vsetq_lane_u64(0x80000, xmask01, 1);
213 else
214 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool,
215 (void **)&mbuf,
216 1, 0);
217
218 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf2 -
219 offsetof(struct rte_mbuf, buf_iova));
220 if (otx2_nix_prefree_seg(mbuf))
221 vsetq_lane_u64(0x80000, xmask23, 0);
222 else
223 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool,
224 (void **)&mbuf,
225 1, 0);
226
227 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf3 -
228 offsetof(struct rte_mbuf, buf_iova));
229 if (otx2_nix_prefree_seg(mbuf))
230 vsetq_lane_u64(0x80000, xmask23, 1);
231 else
232 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool,
233 (void **)&mbuf,
234 1, 0);
235 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
236 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
237
238
239
240 rte_io_wmb();
241 } else {
242 struct rte_mbuf *mbuf;
243
244
245
246 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf0 -
247 offsetof(struct rte_mbuf, buf_iova));
248 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf,
249 1, 0);
250
251 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf1 -
252 offsetof(struct rte_mbuf, buf_iova));
253 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf,
254 1, 0);
255
256 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf2 -
257 offsetof(struct rte_mbuf, buf_iova));
258 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf,
259 1, 0);
260
261 mbuf = (struct rte_mbuf *)((uintptr_t)mbuf3 -
262 offsetof(struct rte_mbuf, buf_iova));
263 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf,
264 1, 0);
265 RTE_SET_USED(mbuf);
266 }
267
268
269 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
270 offsetof(struct rte_mbuf, pool) -
271 offsetof(struct rte_mbuf, buf_iova));
272 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
273 offsetof(struct rte_mbuf, pool) -
274 offsetof(struct rte_mbuf, buf_iova));
275 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
276 offsetof(struct rte_mbuf, pool) -
277 offsetof(struct rte_mbuf, buf_iova));
278 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
279 offsetof(struct rte_mbuf, pool) -
280 offsetof(struct rte_mbuf, buf_iova));
281
282 if (flags &
283 (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
284 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
285
286
287
288
289
290
291 asm volatile ("LD1 {%[a].D}[0],[%[in]]\n\t" :
292 [a]"+w"(senddesc01_w1) :
293 [in]"r"(mbuf0 + 2) : "memory");
294
295 asm volatile ("LD1 {%[a].D}[1],[%[in]]\n\t" :
296 [a]"+w"(senddesc01_w1) :
297 [in]"r"(mbuf1 + 2) : "memory");
298
299 asm volatile ("LD1 {%[b].D}[0],[%[in]]\n\t" :
300 [b]"+w"(senddesc23_w1) :
301 [in]"r"(mbuf2 + 2) : "memory");
302
303 asm volatile ("LD1 {%[b].D}[1],[%[in]]\n\t" :
304 [b]"+w"(senddesc23_w1) :
305 [in]"r"(mbuf3 + 2) : "memory");
306
307
308 mbuf0 = (uint64_t *)*mbuf0;
309 mbuf1 = (uint64_t *)*mbuf1;
310 mbuf2 = (uint64_t *)*mbuf2;
311 mbuf3 = (uint64_t *)*mbuf3;
312 } else {
313
314 mbuf0 = (uint64_t *)*mbuf0;
315 mbuf1 = (uint64_t *)*mbuf1;
316 mbuf2 = (uint64_t *)*mbuf2;
317 mbuf3 = (uint64_t *)*mbuf3;
318 }
319
320 const uint8x16_t shuf_mask2 = {
321 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
322 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
323 };
324 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
325 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
326
327
328 const uint64x2_t and_mask0 = {
329 0xFFFFFFFFFFFFFFFF,
330 0x000000000000FFFF,
331 };
332
333 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
334 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
335 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
336 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
337
338
339
340
341
342 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
343 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
344
345
346 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
347 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
348
349
350
351
352 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
353 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
354 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
355 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
356
357 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
358 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
359
360
361
362
363
364 const uint8x16_t tbl = {
365
366 0x04,
367 0x14,
368 0x24,
369 0x34,
370 0x03,
371 0x13,
372 0x23,
373 0x33,
374 0x02,
375 0x12,
376 0x22,
377 0x32,
378 0x03,
379 0x13,
380
381
382 0x23,
383
384
385 0x33,
386
387
388 };
389
390
391 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
392 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
393
394
395
396
397
398 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
399 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
400
401
402
403
404
405 xtmp128 = vshrq_n_u8(xtmp128, 4);
406 ytmp128 = vshrq_n_u8(ytmp128, 4);
407
408
409
410
411 const int8x16_t tshft3 = {
412 -1, 0, 8, 8, 8, 8, 8, 8,
413 -1, 0, 8, 8, 8, 8, 8, 8,
414 };
415
416 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
417 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
418
419
420 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
421 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
422
423
424
425
426 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
427 offsetof(struct rte_mempool, pool_id));
428 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
429 offsetof(struct rte_mempool, pool_id));
430 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
431 offsetof(struct rte_mempool, pool_id));
432 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
433 offsetof(struct rte_mempool, pool_id));
434
435
436
437
438 const uint8x16_t shuf_mask0 = {
439 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
440 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
441 };
442
443 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
444 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
445
446
447
448
449
450
451
452 senddesc01_w1 = vaddq_u8(senddesc01_w1,
453 vshlq_n_u16(senddesc01_w1, 8));
454 senddesc23_w1 = vaddq_u8(senddesc23_w1,
455 vshlq_n_u16(senddesc23_w1, 8));
456
457
458 cmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
459 cmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
460 cmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
461 cmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
462
463 xmask01 = vdupq_n_u64(0);
464 xmask23 = xmask01;
465 asm volatile ("LD1 {%[a].H}[0],[%[in]]\n\t" :
466 [a]"+w"(xmask01) : [in]"r"(mbuf0) : "memory");
467
468 asm volatile ("LD1 {%[a].H}[4],[%[in]]\n\t" :
469 [a]"+w"(xmask01) : [in]"r"(mbuf1) : "memory");
470
471 asm volatile ("LD1 {%[b].H}[0],[%[in]]\n\t" :
472 [b]"+w"(xmask23) : [in]"r"(mbuf2) : "memory");
473
474 asm volatile ("LD1 {%[b].H}[4],[%[in]]\n\t" :
475 [b]"+w"(xmask23) : [in]"r"(mbuf3) : "memory");
476 xmask01 = vshlq_n_u64(xmask01, 20);
477 xmask23 = vshlq_n_u64(xmask23, 20);
478
479 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
480 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
481
482 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
483 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
484
485
486 cmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);
487 cmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);
488 cmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);
489 cmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);
490
491 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
492 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
493
494
495
496
497
498 const uint8x16_t tbl = {
499
500 0x00,
501 0x03,
502 0x02,
503 0x03,
504 0x04,
505 0x00,
506 0x00,
507 0x00,
508
509
510 0x00,
511 0x33,
512 0x32,
513 0x33,
514
515
516 0x34,
517 0x00,
518
519
520 0x00,
521
522
523 0x00,
524
525
526 };
527
528
529 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
530 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
531
532
533
534
535
536 const uint8x16_t shuf_mask5 = {
537 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
538 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
539 };
540 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
541 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
542
543
544 const uint64x2_t o_cksum_mask = {
545 0x1C00020000000000,
546 0x1C00020000000000,
547 };
548
549 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
550 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
551
552
553
554
555
556 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
557 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
558
559
560
561
562 xtmp128 = vshrq_n_u8(xtmp128, 2);
563 ytmp128 = vshrq_n_u8(ytmp128, 2);
564
565
566
567
568 const int8x16_t tshft3 = {
569 -1, 0, 8, 8, 8, 8, 8, 8,
570 -1, 0, 8, 8, 8, 8, 8, 8,
571 };
572
573 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
574 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
575
576
577 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
578 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
579
580
581
582
583 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
584 offsetof(struct rte_mempool, pool_id));
585 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
586 offsetof(struct rte_mempool, pool_id));
587 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
588 offsetof(struct rte_mempool, pool_id));
589 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
590 offsetof(struct rte_mempool, pool_id));
591
592
593
594
595 const uint8x16_t shuf_mask0 = {
596 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
597 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
598 };
599
600 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
601 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
602
603
604
605
606
607
608
609 senddesc01_w1 = vaddq_u8(senddesc01_w1,
610 vshlq_n_u16(senddesc01_w1, 8));
611 senddesc23_w1 = vaddq_u8(senddesc23_w1,
612 vshlq_n_u16(senddesc23_w1, 8));
613
614
615 cmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
616 cmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
617 cmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
618 cmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
619
620 xmask01 = vdupq_n_u64(0);
621 xmask23 = xmask01;
622 asm volatile ("LD1 {%[a].H}[0],[%[in]]\n\t" :
623 [a]"+w"(xmask01) : [in]"r"(mbuf0) : "memory");
624
625 asm volatile ("LD1 {%[a].H}[4],[%[in]]\n\t" :
626 [a]"+w"(xmask01) : [in]"r"(mbuf1) : "memory");
627
628 asm volatile ("LD1 {%[b].H}[0],[%[in]]\n\t" :
629 [b]"+w"(xmask23) : [in]"r"(mbuf2) : "memory");
630
631 asm volatile ("LD1 {%[b].H}[4],[%[in]]\n\t" :
632 [b]"+w"(xmask23) : [in]"r"(mbuf3) : "memory");
633 xmask01 = vshlq_n_u64(xmask01, 20);
634 xmask23 = vshlq_n_u64(xmask23, 20);
635
636 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
637 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
638
639 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
640 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
641
642
643 cmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);
644 cmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);
645 cmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);
646 cmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);
647
648 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
649 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
650
651
652
653 const uint8x16x2_t tbl = {
654 {
655 {
656
657 0x04,
658 0x14,
659 0x24,
660 0x34,
661 0x03,
662 0x13,
663
664
665 0x23,
666
667
668 0x33,
669
670
671 0x02,
672 0x12,
673
674
675 0x22,
676
677
678 0x32,
679
680
681 0x03,
682
683
684 0x13,
685
686
687 0x23,
688
689
690 0x33,
691
692
693 },
694
695 {
696
697 0x00,
698 0x03,
699 0x02,
700 0x03,
701 0x04,
702 0x00,
703 0x00,
704 0x00,
705
706
707 0x00,
708 0x33,
709
710
711 0x32,
712
713
714 0x33,
715
716
717 0x34,
718
719
720 0x00,
721
722
723 0x00,
724
725
726 0x00,
727
728
729 },
730 }
731 };
732
733
734 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
735 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
736
737
738
739
740
741 const uint32x4_t tshft_4 = {
742 1, 0,
743 1, 0,
744 };
745 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
746 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
747
748
749
750
751
752 const uint8x16_t shuf_mask5 = {
753 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
754 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
755 };
756 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
757 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
758
759
760 const uint64x2_t oi_cksum_mask = {
761 0x1CF0020000000000,
762 0x1CF0020000000000,
763 };
764
765 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
766 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
767
768
769
770
771
772 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
773 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
774
775
776
777
778
779
780 const int8x16_t tshft5 = {
781 8, 8, 8, 8, 8, 8, -4, -2,
782 8, 8, 8, 8, 8, 8, -4, -2,
783 };
784
785 xtmp128 = vshlq_u8(xtmp128, tshft5);
786 ytmp128 = vshlq_u8(ytmp128, tshft5);
787
788
789
790
791 const int8x16_t tshft3 = {
792 -1, 0, -1, 0, 0, 0, 0, 0,
793 -1, 0, -1, 0, 0, 0, 0, 0,
794 };
795
796 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
797 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
798
799
800 const uint64x2_t oi_cksum_mask2 = {
801 0x1000000000000000,
802 0x1000000000000000,
803 };
804
805 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
806 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
807
808
809 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
810 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
811
812
813
814
815 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
816 offsetof(struct rte_mempool, pool_id));
817 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
818 offsetof(struct rte_mempool, pool_id));
819 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
820 offsetof(struct rte_mempool, pool_id));
821 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
822 offsetof(struct rte_mempool, pool_id));
823
824
825
826
827
828 const uint8x16_t shuf_mask0 = {
829 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
830 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
831 };
832
833 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
834 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
835
836
837
838
839
840
841
842
843
844
845 senddesc01_w1 = vaddq_u8(senddesc01_w1,
846 vshlq_n_u32(senddesc01_w1, 8));
847 senddesc23_w1 = vaddq_u8(senddesc23_w1,
848 vshlq_n_u32(senddesc23_w1, 8));
849
850
851 cmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
852 cmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
853 cmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
854 cmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
855
856
857 senddesc01_w1 = vaddq_u8(senddesc01_w1,
858 vshlq_n_u32(senddesc01_w1, 16));
859 senddesc23_w1 = vaddq_u8(senddesc23_w1,
860 vshlq_n_u32(senddesc23_w1, 16));
861
862 xmask01 = vdupq_n_u64(0);
863 xmask23 = xmask01;
864 asm volatile ("LD1 {%[a].H}[0],[%[in]]\n\t" :
865 [a]"+w"(xmask01) : [in]"r"(mbuf0) : "memory");
866
867 asm volatile ("LD1 {%[a].H}[4],[%[in]]\n\t" :
868 [a]"+w"(xmask01) : [in]"r"(mbuf1) : "memory");
869
870 asm volatile ("LD1 {%[b].H}[0],[%[in]]\n\t" :
871 [b]"+w"(xmask23) : [in]"r"(mbuf2) : "memory");
872
873 asm volatile ("LD1 {%[b].H}[4],[%[in]]\n\t" :
874 [b]"+w"(xmask23) : [in]"r"(mbuf3) : "memory");
875 xmask01 = vshlq_n_u64(xmask01, 20);
876 xmask23 = vshlq_n_u64(xmask23, 20);
877
878 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
879 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
880
881 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
882 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
883
884
885 cmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);
886 cmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);
887 cmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);
888 cmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);
889 } else {
890
891
892
893 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
894 offsetof(struct rte_mempool, pool_id));
895 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
896 offsetof(struct rte_mempool, pool_id));
897 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
898 offsetof(struct rte_mempool, pool_id));
899 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
900 offsetof(struct rte_mempool, pool_id));
901 xmask01 = vdupq_n_u64(0);
902 xmask23 = xmask01;
903 asm volatile ("LD1 {%[a].H}[0],[%[in]]\n\t" :
904 [a]"+w"(xmask01) : [in]"r"(mbuf0) : "memory");
905
906 asm volatile ("LD1 {%[a].H}[4],[%[in]]\n\t" :
907 [a]"+w"(xmask01) : [in]"r"(mbuf1) : "memory");
908
909 asm volatile ("LD1 {%[b].H}[0],[%[in]]\n\t" :
910 [b]"+w"(xmask23) : [in]"r"(mbuf2) : "memory");
911
912 asm volatile ("LD1 {%[b].H}[4],[%[in]]\n\t" :
913 [b]"+w"(xmask23) : [in]"r"(mbuf3) : "memory");
914 xmask01 = vshlq_n_u64(xmask01, 20);
915 xmask23 = vshlq_n_u64(xmask23, 20);
916
917 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
918 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
919
920
921 cmd00 = vzip1q_u64(senddesc01_w0, senddesc01_w1);
922 cmd01 = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
923 cmd10 = vzip2q_u64(senddesc01_w0, senddesc01_w1);
924 cmd11 = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
925 cmd20 = vzip1q_u64(senddesc23_w0, senddesc23_w1);
926 cmd21 = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
927 cmd30 = vzip2q_u64(senddesc23_w0, senddesc23_w1);
928 cmd31 = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
929 }
930
931 do {
932 vst1q_u64(lmt_addr, cmd00);
933 vst1q_u64(lmt_addr + 2, cmd01);
934 vst1q_u64(lmt_addr + 4, cmd10);
935 vst1q_u64(lmt_addr + 6, cmd11);
936 vst1q_u64(lmt_addr + 8, cmd20);
937 vst1q_u64(lmt_addr + 10, cmd21);
938 vst1q_u64(lmt_addr + 12, cmd30);
939 vst1q_u64(lmt_addr + 14, cmd31);
940 lmt_status = otx2_lmt_submit(io_addr);
941
942 } while (lmt_status == 0);
943 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
944 }
945
946 if (unlikely(pkts_left))
947 pkts += nix_xmit_pkts(tx_queue, tx_pkts, pkts_left, cmd, flags);
948
949 return pkts;
950}
951
952#else
953static __rte_always_inline uint16_t
954nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
955 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
956{
957 RTE_SET_USED(tx_queue);
958 RTE_SET_USED(tx_pkts);
959 RTE_SET_USED(pkts);
960 RTE_SET_USED(cmd);
961 RTE_SET_USED(flags);
962 return 0;
963}
964#endif
965
966#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
967static uint16_t __rte_noinline __rte_hot \
968otx2_nix_xmit_pkts_ ## name(void *tx_queue, \
969 struct rte_mbuf **tx_pkts, uint16_t pkts) \
970{ \
971 uint64_t cmd[sz]; \
972 \
973 \
974 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
975 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
976 return 0; \
977 return nix_xmit_pkts(tx_queue, tx_pkts, pkts, cmd, flags); \
978}
979
980NIX_TX_FASTPATH_MODES
981#undef T
982
983#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
984static uint16_t __rte_noinline __rte_hot \
985otx2_nix_xmit_pkts_mseg_ ## name(void *tx_queue, \
986 struct rte_mbuf **tx_pkts, uint16_t pkts) \
987{ \
988 uint64_t cmd[(sz) + NIX_TX_MSEG_SG_DWORDS - 2]; \
989 \
990 \
991 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
992 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
993 return 0; \
994 return nix_xmit_pkts_mseg(tx_queue, tx_pkts, pkts, cmd, \
995 (flags) | NIX_TX_MULTI_SEG_F); \
996}
997
998NIX_TX_FASTPATH_MODES
999#undef T
1000
1001#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
1002static uint16_t __rte_noinline __rte_hot \
1003otx2_nix_xmit_pkts_vec_ ## name(void *tx_queue, \
1004 struct rte_mbuf **tx_pkts, uint16_t pkts) \
1005{ \
1006 uint64_t cmd[sz]; \
1007 \
1008 \
1009 if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F || \
1010 (flags) & NIX_TX_OFFLOAD_TSTAMP_F || \
1011 (flags) & NIX_TX_OFFLOAD_TSO_F) \
1012 return 0; \
1013 return nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, (flags)); \
1014}
1015
1016NIX_TX_FASTPATH_MODES
1017#undef T
1018
1019static inline void
1020pick_tx_func(struct rte_eth_dev *eth_dev,
1021 const eth_tx_burst_t tx_burst[2][2][2][2][2][2][2])
1022{
1023 struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
1024
1025
1026 eth_dev->tx_pkt_burst = tx_burst
1027 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_SECURITY_F)]
1028 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSO_F)]
1029 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_TSTAMP_F)]
1030 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
1031 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
1032 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
1033 [!!(dev->tx_offload_flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
1034}
1035
1036void
1037otx2_eth_set_tx_function(struct rte_eth_dev *eth_dev)
1038{
1039 struct otx2_eth_dev *dev = otx2_eth_pmd_priv(eth_dev);
1040
1041 const eth_tx_burst_t nix_eth_tx_burst[2][2][2][2][2][2][2] = {
1042#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
1043 [f6][f5][f4][f3][f2][f1][f0] = otx2_nix_xmit_pkts_ ## name,
1044
1045NIX_TX_FASTPATH_MODES
1046#undef T
1047 };
1048
1049 const eth_tx_burst_t nix_eth_tx_burst_mseg[2][2][2][2][2][2][2] = {
1050#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
1051 [f6][f5][f4][f3][f2][f1][f0] = otx2_nix_xmit_pkts_mseg_ ## name,
1052
1053NIX_TX_FASTPATH_MODES
1054#undef T
1055 };
1056
1057 const eth_tx_burst_t nix_eth_tx_vec_burst[2][2][2][2][2][2][2] = {
1058#define T(name, f6, f5, f4, f3, f2, f1, f0, sz, flags) \
1059 [f6][f5][f4][f3][f2][f1][f0] = otx2_nix_xmit_pkts_vec_ ## name,
1060
1061NIX_TX_FASTPATH_MODES
1062#undef T
1063 };
1064
1065 if (dev->scalar_ena ||
1066 (dev->tx_offload_flags &
1067 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
1068 NIX_TX_OFFLOAD_TSO_F)))
1069 pick_tx_func(eth_dev, nix_eth_tx_burst);
1070 else
1071 pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
1072
1073 if (dev->tx_offloads & RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
1074 pick_tx_func(eth_dev, nix_eth_tx_burst_mseg);
1075
1076 rte_mb();
1077}
1078