1
2
3
4#ifndef __CN10K_TX_H__
5#define __CN10K_TX_H__
6
7#include <rte_vect.h>
8
9#include <rte_eventdev.h>
10
11#define NIX_TX_OFFLOAD_NONE (0)
12#define NIX_TX_OFFLOAD_L3_L4_CSUM_F BIT(0)
13#define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
14#define NIX_TX_OFFLOAD_VLAN_QINQ_F BIT(2)
15#define NIX_TX_OFFLOAD_MBUF_NOFF_F BIT(3)
16#define NIX_TX_OFFLOAD_TSO_F BIT(4)
17#define NIX_TX_OFFLOAD_TSTAMP_F BIT(5)
18#define NIX_TX_OFFLOAD_SECURITY_F BIT(6)
19#define NIX_TX_OFFLOAD_MAX (NIX_TX_OFFLOAD_SECURITY_F << 1)
20
21
22
23
24
25#define NIX_TX_VWQE_F BIT(14)
26#define NIX_TX_MULTI_SEG_F BIT(15)
27
28#define NIX_TX_NEED_SEND_HDR_W1 \
29 (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F | \
30 NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
31
32#define NIX_TX_NEED_EXT_HDR \
33 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F | \
34 NIX_TX_OFFLOAD_TSO_F)
35
36#define NIX_XMIT_FC_OR_RETURN(txq, pkts) \
37 do { \
38 \
39 if (unlikely((txq)->fc_cache_pkts < (pkts))) { \
40 \
41 (txq)->fc_cache_pkts = \
42 ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem) \
43 << (txq)->sqes_per_sqb_log2; \
44 \
45 if (unlikely((txq)->fc_cache_pkts < (pkts))) \
46 return 0; \
47 } \
48 } while (0)
49
50
51
52
53#define NIX_SEGDW_MAGIC 0x76654432210ULL
54
55#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
56
57
58
59
60static __rte_always_inline int
61cn10k_nix_tx_ext_subs(const uint16_t flags)
62{
63 return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
64 2 :
65 ((flags &
66 (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
67 1 :
68 0);
69}
70
71static __rte_always_inline uint8_t
72cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
73{
74 if (!(flags & NIX_TX_MULTI_SEG_F))
75 return cn10k_nix_tx_ext_subs(flags) + 2;
76
77
78 return segdw;
79}
80
81static __rte_always_inline uint8_t
82cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
83{
84 return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
85 << ROC_LMT_LINES_PER_CORE_LOG2;
86}
87
88static __rte_always_inline uint8_t
89cn10k_nix_tx_dwords_per_line(const uint16_t flags)
90{
91 return (flags & NIX_TX_NEED_EXT_HDR) ?
92 ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
93 8;
94}
95
96static __rte_always_inline uint64_t
97cn10k_nix_tx_steor_data(const uint16_t flags)
98{
99 const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
100 uint64_t data;
101
102
103 data = dw_m1;
104
105 data |= dw_m1 << 19;
106 data |= dw_m1 << 22;
107 data |= dw_m1 << 25;
108 data |= dw_m1 << 28;
109 data |= dw_m1 << 31;
110 data |= dw_m1 << 34;
111 data |= dw_m1 << 37;
112 data |= dw_m1 << 40;
113 data |= dw_m1 << 43;
114 data |= dw_m1 << 46;
115 data |= dw_m1 << 49;
116 data |= dw_m1 << 52;
117 data |= dw_m1 << 55;
118 data |= dw_m1 << 58;
119 data |= dw_m1 << 61;
120
121 return data;
122}
123
124static __rte_always_inline uint8_t
125cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
126{
127 return ((flags & NIX_TX_NEED_EXT_HDR) ?
128 (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
129 4);
130}
131
132static __rte_always_inline uint64_t
133cn10k_nix_tx_steor_vec_data(const uint16_t flags)
134{
135 const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
136 uint64_t data;
137
138
139 data = dw_m1;
140
141 data |= dw_m1 << 19;
142 data |= dw_m1 << 22;
143 data |= dw_m1 << 25;
144 data |= dw_m1 << 28;
145 data |= dw_m1 << 31;
146 data |= dw_m1 << 34;
147 data |= dw_m1 << 37;
148 data |= dw_m1 << 40;
149 data |= dw_m1 << 43;
150 data |= dw_m1 << 46;
151 data |= dw_m1 << 49;
152 data |= dw_m1 << 52;
153 data |= dw_m1 << 55;
154 data |= dw_m1 << 58;
155 data |= dw_m1 << 61;
156
157 return data;
158}
159
160static __rte_always_inline uint64_t
161cn10k_cpt_tx_steor_data(void)
162{
163
164 const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
165 uint64_t data;
166
167
168 data = dw_m1 << 16;
169 data |= dw_m1 << 19;
170 data |= dw_m1 << 22;
171 data |= dw_m1 << 25;
172 data |= dw_m1 << 28;
173 data |= dw_m1 << 31;
174 data |= dw_m1 << 34;
175 data |= dw_m1 << 37;
176 data |= dw_m1 << 40;
177 data |= dw_m1 << 43;
178 data |= dw_m1 << 46;
179 data |= dw_m1 << 49;
180 data |= dw_m1 << 52;
181 data |= dw_m1 << 55;
182 data |= dw_m1 << 58;
183 data |= dw_m1 << 61;
184
185 return data;
186}
187
188static __rte_always_inline void
189cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
190 const uint16_t flags, const uint16_t static_sz)
191{
192 if (static_sz)
193 cmd[0] = txq->send_hdr_w0;
194 else
195 cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
196 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
197 cmd[1] = 0;
198
199 if (flags & NIX_TX_NEED_EXT_HDR) {
200 if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
201 cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
202 else
203 cmd[2] = NIX_SUBDC_EXT << 60;
204 cmd[3] = 0;
205 cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
206 } else {
207 cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
208 }
209}
210
211static __rte_always_inline void
212cn10k_nix_sec_fc_wait(struct cn10k_eth_txq *txq, uint16_t nb_pkts)
213{
214 int32_t nb_desc, val, newval;
215 int32_t *fc_sw;
216 volatile uint64_t *fc;
217
218
219 if (!nb_pkts)
220 return;
221
222again:
223 fc_sw = txq->cpt_fc_sw;
224 val = __atomic_sub_fetch(fc_sw, nb_pkts, __ATOMIC_RELAXED);
225 if (likely(val >= 0))
226 return;
227
228 nb_desc = txq->cpt_desc;
229 fc = txq->cpt_fc;
230 while (true) {
231 newval = nb_desc - __atomic_load_n(fc, __ATOMIC_RELAXED);
232 newval -= nb_pkts;
233 if (newval >= 0)
234 break;
235 }
236
237 if (!__atomic_compare_exchange_n(fc_sw, &val, newval, false,
238 __ATOMIC_RELAXED, __ATOMIC_RELAXED))
239 goto again;
240}
241
242static __rte_always_inline void
243cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
244 uint8_t loff, uint8_t shft)
245{
246 uint64_t data;
247 uintptr_t pa;
248
249
250 if (!lnum && !loff)
251 return;
252
253 data = cn10k_cpt_tx_steor_data();
254
255 if (loff) {
256 data &= ~(0x7ULL << shft);
257
258 data |= (0x3UL << shft);
259 }
260
261 pa = io_addr | ((data >> 16) & 0x7) << 4;
262 data &= ~(0x7ULL << 16);
263
264 data |= ((uint64_t)(lnum + loff - 1)) << 12;
265 data |= lmt_id;
266
267
268 roc_lmt_submit_steorl(data, pa);
269}
270
271#if defined(RTE_ARCH_ARM64)
272static __rte_always_inline void
273cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
274 uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
275 uint8_t *loff, uint8_t *shft, uint64_t sa_base,
276 const uint16_t flags)
277{
278 struct cn10k_sec_sess_priv sess_priv;
279 uint32_t pkt_len, dlen_adj, rlen;
280 uint8_t l3l4type, chksum;
281 uint64x2_t cmd01, cmd23;
282 uint8_t l2_len, l3_len;
283 uintptr_t dptr, nixtx;
284 uint64_t ucode_cmd[4];
285 uint64_t *laddr;
286 uint16_t tag;
287 uint64_t sa;
288
289 sess_priv.u64 = *rte_security_dynfield(m);
290
291 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
292
293 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
294 flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
295 l2_len = vgetq_lane_u8(*cmd0, 10);
296
297 l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len;
298 l3l4type = vgetq_lane_u8(*cmd0, 13);
299 } else {
300 l2_len = vgetq_lane_u8(*cmd0, 8);
301
302 l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len;
303 l3l4type = vgetq_lane_u8(*cmd0, 12);
304 }
305
306 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
307 chksum = ~chksum;
308 sess_priv.chksum = sess_priv.chksum & chksum;
309
310 *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
311 } else {
312 l2_len = m->l2_len;
313 l3_len = m->l3_len;
314 }
315
316
317 dptr = vgetq_lane_u64(*cmd1, 1);
318 pkt_len = vgetq_lane_u16(*cmd0, 0);
319
320
321 dlen_adj = pkt_len - l2_len;
322
323 dlen_adj -= sess_priv.mode ? 0 : l3_len;
324 rlen = (dlen_adj + sess_priv.roundup_len) +
325 (sess_priv.roundup_byte - 1);
326 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
327 rlen += sess_priv.partial_len;
328 dlen_adj = rlen - dlen_adj;
329
330
331 *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
332 *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
333
334
335 nixtx = dptr + pkt_len + dlen_adj;
336 nixtx += BIT_ULL(7);
337 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
338
339
340 *nixtx_addr = (nixtx + 16);
341
342
343 pkt_len -= l2_len;
344 tag = sa_base & 0xFFFFUL;
345 sa_base &= ~0xFFFFUL;
346 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
347 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
348 ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
349 ((uint64_t)sess_priv.chksum) << 32 |
350 ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
351
352
353 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
354
355 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
356
357
358 cmd23 = vdupq_n_u64(0);
359 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
360 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
361 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
362
363 dptr += l2_len;
364
365 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
366 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
367 *((uint16_t *)(dptr - 2)) =
368 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
369 else
370 *((uint16_t *)(dptr - 2)) =
371 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
372 }
373
374 ucode_cmd[1] = dptr;
375 ucode_cmd[2] = dptr;
376
377
378 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
379
380
381 vst1q_u64(laddr, cmd01);
382 vst1q_u64((laddr + 2), cmd23);
383
384 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
385 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
386
387
388 *loff = !(*loff);
389 *lnum = *lnum + (*loff ? 0 : 1);
390 *shft = *shft + (*loff ? 0 : 3);
391}
392
393static __rte_always_inline void
394cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
395 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
396 uint64_t sa_base, const uint16_t flags)
397{
398 struct cn10k_sec_sess_priv sess_priv;
399 uint32_t pkt_len, dlen_adj, rlen;
400 struct nix_send_hdr_s *send_hdr;
401 uint8_t l3l4type, chksum;
402 uint64x2_t cmd01, cmd23;
403 union nix_send_sg_s *sg;
404 uint8_t l2_len, l3_len;
405 uintptr_t dptr, nixtx;
406 uint64_t ucode_cmd[4];
407 uint64_t *laddr;
408 uint16_t tag;
409 uint64_t sa;
410
411
412 sess_priv.u64 = *rte_security_dynfield(m);
413 send_hdr = (struct nix_send_hdr_s *)cmd;
414 if (flags & NIX_TX_NEED_EXT_HDR)
415 sg = (union nix_send_sg_s *)&cmd[4];
416 else
417 sg = (union nix_send_sg_s *)&cmd[2];
418
419 if (flags & NIX_TX_NEED_SEND_HDR_W1) {
420
421 if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
422 flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
423 l2_len = (cmd[1] >> 16) & 0xFF;
424
425 l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len;
426 l3l4type = (cmd[1] >> 40) & 0xFF;
427 } else {
428 l2_len = cmd[1] & 0xFF;
429
430 l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len;
431 l3l4type = (cmd[1] >> 32) & 0xFF;
432 }
433
434 chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
435 chksum = ~chksum;
436 sess_priv.chksum = sess_priv.chksum & chksum;
437
438 cmd[1] &= ~(0xFFFFUL << 32);
439 } else {
440 l2_len = m->l2_len;
441 l3_len = m->l3_len;
442 }
443
444
445 dptr = *(uint64_t *)(sg + 1);
446 pkt_len = send_hdr->w0.total;
447
448
449 dlen_adj = pkt_len - l2_len;
450
451 dlen_adj -= sess_priv.mode ? 0 : l3_len;
452 rlen = (dlen_adj + sess_priv.roundup_len) +
453 (sess_priv.roundup_byte - 1);
454 rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
455 rlen += sess_priv.partial_len;
456 dlen_adj = rlen - dlen_adj;
457
458
459 send_hdr->w0.total = pkt_len + dlen_adj;
460 sg->seg1_size = pkt_len + dlen_adj;
461
462
463 nixtx = dptr + pkt_len + dlen_adj;
464 nixtx += BIT_ULL(7);
465 nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
466
467
468 *nixtx_addr = (nixtx + 16);
469
470
471 pkt_len -= l2_len;
472 tag = sa_base & 0xFFFFUL;
473 sa_base &= ~0xFFFFUL;
474 sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
475 ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
476 ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
477 ((uint64_t)sess_priv.chksum) << 32 |
478 ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
479
480
481 cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
482
483 cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
484
485
486 cmd23 = vdupq_n_u64(0);
487 cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
488 CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
489 cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
490
491 dptr += l2_len;
492
493 if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
494 if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
495 *((uint16_t *)(dptr - 2)) =
496 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
497 else
498 *((uint16_t *)(dptr - 2)) =
499 rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
500 }
501 ucode_cmd[1] = dptr;
502 ucode_cmd[2] = dptr;
503
504
505 laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
506
507
508 vst1q_u64(laddr, cmd01);
509 vst1q_u64((laddr + 2), cmd23);
510
511 *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
512 *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
513
514
515 *loff = !(*loff);
516 *lnum = *lnum + (*loff ? 0 : 1);
517 *shft = *shft + (*loff ? 0 : 3);
518}
519
520#else
521
522static __rte_always_inline void
523cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
524 uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
525 uint64_t sa_base, const uint16_t flags)
526{
527 RTE_SET_USED(m);
528 RTE_SET_USED(cmd);
529 RTE_SET_USED(nixtx_addr);
530 RTE_SET_USED(lbase);
531 RTE_SET_USED(lnum);
532 RTE_SET_USED(loff);
533 RTE_SET_USED(shft);
534 RTE_SET_USED(sa_base);
535 RTE_SET_USED(flags);
536}
537#endif
538
539static __rte_always_inline void
540cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
541{
542 uint64_t mask, ol_flags = m->ol_flags;
543
544 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
545 uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
546 uint16_t *iplen, *oiplen, *oudplen;
547 uint16_t lso_sb, paylen;
548
549 mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
550 lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
551 m->l2_len + m->l3_len + m->l4_len;
552
553
554 paylen = m->pkt_len - lso_sb;
555
556
557 iplen = (uint16_t *)(mdata + m->l2_len +
558 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
559
560 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
561 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
562 const uint8_t is_udp_tun =
563 (CNXK_NIX_UDP_TUN_BITMASK >>
564 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
565 0x1;
566
567 oiplen = (uint16_t *)(mdata + m->outer_l2_len +
568 (2 << !!(ol_flags &
569 RTE_MBUF_F_TX_OUTER_IPV6)));
570 *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
571 paylen);
572
573
574 if (is_udp_tun) {
575 oudplen = (uint16_t *)(mdata + m->outer_l2_len +
576 m->outer_l3_len + 4);
577 *oudplen = rte_cpu_to_be_16(
578 rte_be_to_cpu_16(*oudplen) - paylen);
579 }
580
581
582 iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
583 m->l4_len +
584 (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
585 }
586
587 *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
588 }
589}
590
591static __rte_always_inline void
592cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
593 const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
594 uint64_t mark_fmt)
595{
596 uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
597 struct nix_send_ext_s *send_hdr_ext;
598 struct nix_send_hdr_s *send_hdr;
599 uint64_t ol_flags = 0, mask;
600 union nix_send_hdr_w1_u w1;
601 union nix_send_sg_s *sg;
602 uint16_t mark_form = 0;
603
604 send_hdr = (struct nix_send_hdr_s *)cmd;
605 if (flags & NIX_TX_NEED_EXT_HDR) {
606 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
607 sg = (union nix_send_sg_s *)(cmd + 4);
608
609 send_hdr_ext->w0.lso = 0;
610 send_hdr_ext->w0.mark_en = 0;
611 send_hdr_ext->w1.u = 0;
612 ol_flags = m->ol_flags;
613 } else {
614 sg = (union nix_send_sg_s *)(cmd + 2);
615 }
616
617 if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
618 ol_flags = m->ol_flags;
619 w1.u = 0;
620 }
621
622 if (!(flags & NIX_TX_MULTI_SEG_F))
623 send_hdr->w0.total = m->data_len;
624 else
625 send_hdr->w0.total = m->pkt_len;
626 send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
627
628
629
630
631
632
633
634
635
636
637 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
638 (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
639 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
640 const uint8_t ol3type =
641 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
642 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
643 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
644
645
646 w1.ol3type = ol3type;
647 mask = 0xffffull << ((!!ol3type) << 4);
648 w1.ol3ptr = ~mask & m->outer_l2_len;
649 w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
650
651
652 w1.ol4type = csum + (csum << 1);
653
654
655 w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
656 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
657 w1.il3ptr = w1.ol4ptr + m->l2_len;
658 w1.il4ptr = w1.il3ptr + m->l3_len;
659
660 w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
661
662
663 w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
664
665
666
667
668
669 mask = !ol3type;
670 w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
671 ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
672
673 } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
674 const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
675 const uint8_t outer_l2_len = m->outer_l2_len;
676
677
678 w1.ol3ptr = outer_l2_len;
679 w1.ol4ptr = outer_l2_len + m->outer_l3_len;
680
681 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
682 ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
683 !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
684
685
686 w1.ol4type = csum + (csum << 1);
687
688 } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
689 const uint8_t l2_len = m->l2_len;
690
691
692
693
694
695
696 w1.ol3ptr = l2_len;
697 w1.ol4ptr = l2_len + m->l3_len;
698
699 w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
700 ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
701 !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
702
703
704 w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
705 }
706
707 if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
708 const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
709 const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
710 RTE_MBUF_F_TX_IPV6));
711
712 send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
713
714 send_hdr_ext->w1.vlan1_ins_ptr = 12;
715 send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
716
717 send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
718
719 send_hdr_ext->w1.vlan0_ins_ptr = 12;
720 send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
721
722 mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
723 (send_hdr_ext->w1.vlan1_ins_ena ||
724 send_hdr_ext->w1.vlan0_ins_ena));
725
726
727 mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
728 mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
729
730 mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
731 mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
732 markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
733
734 send_hdr_ext->w0.mark_en = !!mark_off;
735 send_hdr_ext->w0.markform = mark_form & 0x7F;
736 send_hdr_ext->w0.markptr = markptr;
737 }
738
739 if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
740 uint16_t lso_sb;
741 uint64_t mask;
742
743 mask = -(!w1.il3type);
744 lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
745
746 send_hdr_ext->w0.lso_sb = lso_sb;
747 send_hdr_ext->w0.lso = 1;
748 send_hdr_ext->w0.lso_mps = m->tso_segsz;
749 send_hdr_ext->w0.lso_format =
750 NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
751 w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
752
753
754 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
755 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
756 const uint8_t is_udp_tun =
757 (CNXK_NIX_UDP_TUN_BITMASK >>
758 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
759 0x1;
760 uint8_t shift = is_udp_tun ? 32 : 0;
761
762 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
763 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
764
765 w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
766 w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
767
768 send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
769 }
770 }
771
772 if (flags & NIX_TX_NEED_SEND_HDR_W1)
773 send_hdr->w1.u = w1.u;
774
775 if (!(flags & NIX_TX_MULTI_SEG_F)) {
776 sg->seg1_size = send_hdr->w0.total;
777 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
778
779 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
780
781
782
783
784 send_hdr->w0.df = cnxk_nix_prefree_seg(m);
785 }
786
787 if (!send_hdr->w0.df)
788 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
789 } else {
790 sg->seg1_size = m->data_len;
791 *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
792
793
794 }
795
796 if (flags & NIX_TX_OFFLOAD_SECURITY_F)
797 *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
798}
799
800static __rte_always_inline void
801cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
802 const uint16_t flags)
803{
804 struct nix_send_ext_s *send_hdr_ext;
805 union nix_send_sg_s *sg;
806
807
808
809
810
811 *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
812 lmt_addr += 16;
813 if (flags & NIX_TX_NEED_EXT_HDR) {
814 send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
815 *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
816 lmt_addr += 16;
817
818 sg = (union nix_send_sg_s *)(cmd + 4);
819 } else {
820 sg = (union nix_send_sg_s *)(cmd + 2);
821 }
822
823 *((union nix_send_sg_s *)lmt_addr) = *sg;
824 *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
825}
826
827static __rte_always_inline void
828cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
829 const uint64_t ol_flags, const uint16_t no_segdw,
830 const uint16_t flags)
831{
832 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
833 const uint8_t is_ol_tstamp =
834 !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
835 uint64_t *lmt = (uint64_t *)lmt_addr;
836 uint16_t off = (no_segdw - 1) << 1;
837 struct nix_send_mem_s *send_mem;
838
839 send_mem = (struct nix_send_mem_s *)(lmt + off);
840
841
842
843
844
845
846 send_mem->w0.subdc = NIX_SUBDC_MEM;
847 send_mem->w0.alg =
848 NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
849 send_mem->addr =
850 (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
851 }
852}
853
854static __rte_always_inline uint16_t
855cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
856{
857 struct nix_send_hdr_s *send_hdr;
858 union nix_send_sg_s *sg;
859 struct rte_mbuf *m_next;
860 uint64_t *slist, sg_u;
861 uint64_t nb_segs;
862 uint64_t segdw;
863 uint8_t off, i;
864
865 send_hdr = (struct nix_send_hdr_s *)cmd;
866
867 if (flags & NIX_TX_NEED_EXT_HDR)
868 off = 2;
869 else
870 off = 0;
871
872 sg = (union nix_send_sg_s *)&cmd[2 + off];
873
874
875 i = 1;
876 sg_u = sg->u;
877 nb_segs = m->nb_segs - 1;
878 m_next = m->next;
879 slist = &cmd[3 + off + 1];
880
881
882 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
883 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
884
885
886#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
887 if (!(sg_u & (1ULL << 55)))
888 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
889 rte_io_wmb();
890#endif
891 m = m_next;
892 if (!m)
893 goto done;
894
895
896 do {
897 m_next = m->next;
898 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
899 *slist = rte_mbuf_data_iova(m);
900
901 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
902 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
903
904
905#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
906 if (!(sg_u & (1ULL << (i + 55))))
907 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
908#endif
909 slist++;
910 i++;
911 nb_segs--;
912 if (i > 2 && nb_segs) {
913 i = 0;
914
915 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
916 sg->u = sg_u;
917 sg->segs = 3;
918 sg = (union nix_send_sg_s *)slist;
919 sg_u = sg->u;
920 slist++;
921 }
922 m = m_next;
923 } while (nb_segs);
924
925done:
926 sg->u = sg_u;
927 sg->segs = i;
928 segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
929
930 segdw = (segdw >> 1) + (segdw & 0x1);
931
932 segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
933 send_hdr->w0.sizem1 = segdw - 1;
934
935 return segdw;
936}
937
938static __rte_always_inline uint16_t
939cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
940 uint16_t pkts, uint64_t *cmd, const uint16_t flags)
941{
942 struct cn10k_eth_txq *txq = tx_queue;
943 const rte_iova_t io_addr = txq->io_addr;
944 uint8_t lnum, c_lnum, c_shft, c_loff;
945 uintptr_t pa, lbase = txq->lmt_base;
946 uint16_t lmt_id, burst, left, i;
947 uintptr_t c_lbase = lbase;
948 uint64_t lso_tun_fmt = 0;
949 uint64_t mark_fmt = 0;
950 uint8_t mark_flag = 0;
951 rte_iova_t c_io_addr;
952 uint16_t c_lmt_id;
953 uint64_t sa_base;
954 uintptr_t laddr;
955 uint64_t data;
956 bool sec;
957
958 if (!(flags & NIX_TX_VWQE_F)) {
959 NIX_XMIT_FC_OR_RETURN(txq, pkts);
960
961 txq->fc_cache_pkts -= pkts;
962 }
963
964 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
965
966 if (flags & NIX_TX_OFFLOAD_TSO_F)
967 lso_tun_fmt = txq->lso_tun_fmt;
968
969 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
970 mark_fmt = txq->mark_fmt;
971 mark_flag = txq->mark_flag;
972 }
973
974
975 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
976 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
977 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
978 c_io_addr = txq->cpt_io_addr;
979 sa_base = txq->sa_base;
980 }
981
982 left = pkts;
983again:
984 burst = left > 32 ? 32 : left;
985
986 lnum = 0;
987 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
988 c_lnum = 0;
989 c_loff = 0;
990 c_shft = 16;
991 }
992
993 for (i = 0; i < burst; i++) {
994
995
996
997 if (flags & NIX_TX_OFFLOAD_TSO_F)
998 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
999
1000 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1001 &sec, mark_flag, mark_fmt);
1002
1003 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1004
1005
1006 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1007 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1008 &c_lnum, &c_loff, &c_shft, sa_base,
1009 flags);
1010
1011
1012 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1013 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1014 4, flags);
1015 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
1016 lnum++;
1017 }
1018
1019 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1020 ws[1] = roc_sso_hws_head_wait(ws[0]);
1021
1022 left -= burst;
1023 tx_pkts += burst;
1024
1025
1026 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1027
1028 burst -= ((c_lnum << 1) + c_loff);
1029 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1030 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1031 c_shft);
1032 }
1033
1034
1035 if (burst > 16) {
1036 data = cn10k_nix_tx_steor_data(flags);
1037 pa = io_addr | (data & 0x7) << 4;
1038 data &= ~0x7ULL;
1039 data |= (15ULL << 12);
1040 data |= (uint64_t)lmt_id;
1041
1042
1043 roc_lmt_submit_steorl(data, pa);
1044
1045 data = cn10k_nix_tx_steor_data(flags);
1046 pa = io_addr | (data & 0x7) << 4;
1047 data &= ~0x7ULL;
1048 data |= ((uint64_t)(burst - 17)) << 12;
1049 data |= (uint64_t)(lmt_id + 16);
1050
1051
1052 roc_lmt_submit_steorl(data, pa);
1053 } else if (burst) {
1054 data = cn10k_nix_tx_steor_data(flags);
1055 pa = io_addr | (data & 0x7) << 4;
1056 data &= ~0x7ULL;
1057 data |= ((uint64_t)(burst - 1)) << 12;
1058 data |= lmt_id;
1059
1060
1061 roc_lmt_submit_steorl(data, pa);
1062 }
1063
1064 rte_io_wmb();
1065 if (left)
1066 goto again;
1067
1068 return pkts;
1069}
1070
1071static __rte_always_inline uint16_t
1072cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1073 struct rte_mbuf **tx_pkts, uint16_t pkts,
1074 uint64_t *cmd, const uint16_t flags)
1075{
1076 struct cn10k_eth_txq *txq = tx_queue;
1077 uintptr_t pa0, pa1, lbase = txq->lmt_base;
1078 const rte_iova_t io_addr = txq->io_addr;
1079 uint16_t segdw, lmt_id, burst, left, i;
1080 uint8_t lnum, c_lnum, c_loff;
1081 uintptr_t c_lbase = lbase;
1082 uint64_t lso_tun_fmt = 0;
1083 uint64_t mark_fmt = 0;
1084 uint8_t mark_flag = 0;
1085 uint64_t data0, data1;
1086 rte_iova_t c_io_addr;
1087 uint8_t shft, c_shft;
1088 __uint128_t data128;
1089 uint16_t c_lmt_id;
1090 uint64_t sa_base;
1091 uintptr_t laddr;
1092 bool sec;
1093
1094 if (!(flags & NIX_TX_VWQE_F)) {
1095 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1096
1097 txq->fc_cache_pkts -= pkts;
1098 }
1099
1100 cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1101
1102 if (flags & NIX_TX_OFFLOAD_TSO_F)
1103 lso_tun_fmt = txq->lso_tun_fmt;
1104
1105 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1106 mark_fmt = txq->mark_fmt;
1107 mark_flag = txq->mark_flag;
1108 }
1109
1110
1111 ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1112 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1113 ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1114 c_io_addr = txq->cpt_io_addr;
1115 sa_base = txq->sa_base;
1116 }
1117
1118 left = pkts;
1119again:
1120 burst = left > 32 ? 32 : left;
1121 shft = 16;
1122 data128 = 0;
1123
1124 lnum = 0;
1125 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1126 c_lnum = 0;
1127 c_loff = 0;
1128 c_shft = 16;
1129 }
1130
1131 for (i = 0; i < burst; i++) {
1132
1133
1134
1135 if (flags & NIX_TX_OFFLOAD_TSO_F)
1136 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1137
1138 cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1139 &sec, mark_flag, mark_fmt);
1140
1141 laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1142
1143
1144 if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1145 cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1146 &c_lnum, &c_loff, &c_shft, sa_base,
1147 flags);
1148
1149
1150 cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1151
1152 segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1153 flags);
1154 cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1155 segdw, flags);
1156 if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1157 lnum++;
1158 data128 |= (((__uint128_t)(segdw - 1)) << shft);
1159 shft += 3;
1160 }
1161 }
1162
1163 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1164 ws[1] = roc_sso_hws_head_wait(ws[0]);
1165
1166 left -= burst;
1167 tx_pkts += burst;
1168
1169
1170 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1171
1172 burst -= ((c_lnum << 1) + c_loff);
1173 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1174 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1175 c_shft);
1176 }
1177
1178 data0 = (uint64_t)data128;
1179 data1 = (uint64_t)(data128 >> 64);
1180
1181 data0 >>= 16;
1182
1183 if (burst > 16) {
1184 pa0 = io_addr | (data0 & 0x7) << 4;
1185 data0 &= ~0x7ULL;
1186
1187 data0 <<= 16;
1188 data0 |= (15ULL << 12);
1189 data0 |= (uint64_t)lmt_id;
1190
1191
1192 roc_lmt_submit_steorl(data0, pa0);
1193
1194 pa1 = io_addr | (data1 & 0x7) << 4;
1195 data1 &= ~0x7ULL;
1196 data1 <<= 16;
1197 data1 |= ((uint64_t)(burst - 17)) << 12;
1198 data1 |= (uint64_t)(lmt_id + 16);
1199
1200
1201 roc_lmt_submit_steorl(data1, pa1);
1202 } else if (burst) {
1203 pa0 = io_addr | (data0 & 0x7) << 4;
1204 data0 &= ~0x7ULL;
1205
1206 data0 <<= 16;
1207 data0 |= ((burst - 1) << 12);
1208 data0 |= (uint64_t)lmt_id;
1209
1210
1211 roc_lmt_submit_steorl(data0, pa0);
1212 }
1213
1214 rte_io_wmb();
1215 if (left)
1216 goto again;
1217
1218 return pkts;
1219}
1220
1221#if defined(RTE_ARCH_ARM64)
1222
1223static __rte_always_inline void
1224cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1225 union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1226 const uint64_t flags, const uint64_t lso_tun_fmt)
1227{
1228 uint16_t lso_sb;
1229 uint64_t mask;
1230
1231 if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1232 return;
1233
1234 mask = -(!w1->il3type);
1235 lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1236
1237 w0->u |= BIT(14);
1238 w0->lso_sb = lso_sb;
1239 w0->lso_mps = m->tso_segsz;
1240 w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1241 w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1242
1243
1244 if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1245 (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1246 const uint8_t is_udp_tun =
1247 (CNXK_NIX_UDP_TUN_BITMASK >>
1248 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1249 0x1;
1250 uint8_t shift = is_udp_tun ? 32 : 0;
1251
1252 shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1253 shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1254
1255 w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1256 w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1257
1258
1259 w0->lso_format = (lso_tun_fmt >> shift);
1260 }
1261}
1262
1263static __rte_always_inline void
1264cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1265 union nix_send_hdr_w0_u *sh,
1266 union nix_send_sg_s *sg, const uint32_t flags)
1267{
1268 struct rte_mbuf *m_next;
1269 uint64_t *slist, sg_u;
1270 uint16_t nb_segs;
1271 int i = 1;
1272
1273 sh->total = m->pkt_len;
1274
1275 sg->u &= 0xFC00000000000000;
1276 sg_u = sg->u;
1277 slist = &cmd[0];
1278
1279 sg_u = sg_u | ((uint64_t)m->data_len);
1280
1281 nb_segs = m->nb_segs - 1;
1282 m_next = m->next;
1283
1284
1285 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1286 sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1287
1288#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1289 if (!(sg_u & (1ULL << 55)))
1290 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1291 rte_io_wmb();
1292#endif
1293
1294 m = m_next;
1295
1296 do {
1297 m_next = m->next;
1298 sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1299 *slist = rte_mbuf_data_iova(m);
1300
1301 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1302 sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1303
1304
1305#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1306 if (!(sg_u & (1ULL << (i + 55))))
1307 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1308 rte_io_wmb();
1309#endif
1310 slist++;
1311 i++;
1312 nb_segs--;
1313 if (i > 2 && nb_segs) {
1314 i = 0;
1315
1316 *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1317 sg->u = sg_u;
1318 sg->segs = 3;
1319 sg = (union nix_send_sg_s *)slist;
1320 sg_u = sg->u;
1321 slist++;
1322 }
1323 m = m_next;
1324 } while (nb_segs);
1325
1326 sg->u = sg_u;
1327 sg->segs = i;
1328}
1329
1330static __rte_always_inline void
1331cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1332 uint64x2_t *cmd1, const uint8_t segdw,
1333 const uint32_t flags)
1334{
1335 union nix_send_hdr_w0_u sh;
1336 union nix_send_sg_s sg;
1337
1338 if (m->nb_segs == 1) {
1339 if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1340 sg.u = vgetq_lane_u64(cmd1[0], 0);
1341 sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1342 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1343 }
1344
1345#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1346 sg.u = vgetq_lane_u64(cmd1[0], 0);
1347 if (!(sg.u & (1ULL << 55)))
1348 RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1349 rte_io_wmb();
1350#endif
1351 return;
1352 }
1353
1354 sh.u = vgetq_lane_u64(cmd0[0], 0);
1355 sg.u = vgetq_lane_u64(cmd1[0], 0);
1356
1357 cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1358
1359 sh.sizem1 = segdw - 1;
1360 cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1361 cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1362}
1363
1364#define NIX_DESCS_PER_LOOP 4
1365
1366static __rte_always_inline uint8_t
1367cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1368 uint64x2_t *cmd1, uint64x2_t *cmd2,
1369 uint64x2_t *cmd3, uint8_t *segdw,
1370 uint64_t *lmt_addr, __uint128_t *data128,
1371 uint8_t *shift, const uint16_t flags)
1372{
1373 uint8_t j, off, lmt_used;
1374
1375 if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1376 !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1377
1378 if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1379 for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1380 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1381 &cmd0[j], &cmd1[j],
1382 segdw[j], flags);
1383 vst1q_u64(lmt_addr, cmd0[0]);
1384 vst1q_u64(lmt_addr + 2, cmd1[0]);
1385 vst1q_u64(lmt_addr + 4, cmd0[1]);
1386 vst1q_u64(lmt_addr + 6, cmd1[1]);
1387 vst1q_u64(lmt_addr + 8, cmd0[2]);
1388 vst1q_u64(lmt_addr + 10, cmd1[2]);
1389 vst1q_u64(lmt_addr + 12, cmd0[3]);
1390 vst1q_u64(lmt_addr + 14, cmd1[3]);
1391
1392 *data128 |= ((__uint128_t)7) << *shift;
1393 *shift += 3;
1394
1395 return 1;
1396 }
1397 }
1398
1399 lmt_used = 0;
1400 for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1401
1402 if ((segdw[j] + segdw[j + 1]) <= 8) {
1403 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1404 cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1405 &cmd0[j], &cmd1[j],
1406 segdw[j], flags);
1407 cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1408 &cmd0[j + 1],
1409 &cmd1[j + 1],
1410 segdw[j + 1], flags);
1411
1412 vst1q_u64(lmt_addr, cmd0[j]);
1413 vst1q_u64(lmt_addr + 2, cmd2[j]);
1414 vst1q_u64(lmt_addr + 4, cmd1[j]);
1415 vst1q_u64(lmt_addr + 6, cmd3[j]);
1416
1417 vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1418 vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1419 vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1420 vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1421 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1422
1423 cn10k_nix_prepare_mseg_vec(mbufs[j],
1424 lmt_addr + 6,
1425 &cmd0[j], &cmd1[j],
1426 segdw[j], flags);
1427 vst1q_u64(lmt_addr, cmd0[j]);
1428 vst1q_u64(lmt_addr + 2, cmd2[j]);
1429 vst1q_u64(lmt_addr + 4, cmd1[j]);
1430 off = segdw[j] - 3;
1431 off <<= 1;
1432 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1433 lmt_addr + 12 + off,
1434 &cmd0[j + 1],
1435 &cmd1[j + 1],
1436 segdw[j + 1], flags);
1437 vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1438 vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1439 vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1440 } else {
1441 cn10k_nix_prepare_mseg_vec(mbufs[j],
1442 lmt_addr + 4,
1443 &cmd0[j], &cmd1[j],
1444 segdw[j], flags);
1445 vst1q_u64(lmt_addr, cmd0[j]);
1446 vst1q_u64(lmt_addr + 2, cmd1[j]);
1447 off = segdw[j] - 2;
1448 off <<= 1;
1449 cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1450 lmt_addr + 8 + off,
1451 &cmd0[j + 1],
1452 &cmd1[j + 1],
1453 segdw[j + 1], flags);
1454 vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1455 vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1456 }
1457 *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1458 << *shift;
1459 *shift += 3;
1460 j += 2;
1461 } else {
1462 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1463 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1464 cn10k_nix_prepare_mseg_vec(mbufs[j],
1465 lmt_addr + 6,
1466 &cmd0[j], &cmd1[j],
1467 segdw[j], flags);
1468 vst1q_u64(lmt_addr, cmd0[j]);
1469 vst1q_u64(lmt_addr + 2, cmd2[j]);
1470 vst1q_u64(lmt_addr + 4, cmd1[j]);
1471 off = segdw[j] - 4;
1472 off <<= 1;
1473 vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1474 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1475 cn10k_nix_prepare_mseg_vec(mbufs[j],
1476 lmt_addr + 6,
1477 &cmd0[j], &cmd1[j],
1478 segdw[j], flags);
1479 vst1q_u64(lmt_addr, cmd0[j]);
1480 vst1q_u64(lmt_addr + 2, cmd2[j]);
1481 vst1q_u64(lmt_addr + 4, cmd1[j]);
1482 } else {
1483 cn10k_nix_prepare_mseg_vec(mbufs[j],
1484 lmt_addr + 4,
1485 &cmd0[j], &cmd1[j],
1486 segdw[j], flags);
1487 vst1q_u64(lmt_addr, cmd0[j]);
1488 vst1q_u64(lmt_addr + 2, cmd1[j]);
1489 }
1490 *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1491 *shift += 3;
1492 j++;
1493 }
1494 lmt_used++;
1495 lmt_addr += 16;
1496 }
1497
1498 return lmt_used;
1499}
1500
1501static __rte_always_inline void
1502cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1503 uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1504{
1505
1506 if ((*loff + (dw << 4)) > 128) {
1507 *data128 = *data128 |
1508 (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1509 *shift = *shift + 3;
1510 *loff = 0;
1511 *lnum = *lnum + 1;
1512 }
1513
1514 *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1515 *loff = *loff + (dw << 4);
1516}
1517
1518static __rte_always_inline void
1519cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1520 uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1521 uint64x2_t cmd3, const uint16_t flags)
1522{
1523 uint8_t off;
1524
1525
1526 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1527 (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1528 !(flags & NIX_TX_MULTI_SEG_F)) {
1529 union nix_send_sg_s sg;
1530
1531 sg.u = vgetq_lane_u64(cmd1, 0);
1532 sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1533 cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1534
1535#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1536 sg.u = vgetq_lane_u64(cmd1, 0);
1537 if (!(sg.u & (1ULL << 55)))
1538 RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1539 0);
1540 rte_io_wmb();
1541#endif
1542 }
1543 if (flags & NIX_TX_MULTI_SEG_F) {
1544 if ((flags & NIX_TX_NEED_EXT_HDR) &&
1545 (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1546 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1547 &cmd0, &cmd1, segdw, flags);
1548 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1549 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1550 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1551 off = segdw - 4;
1552 off <<= 4;
1553 vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1554 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1555 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1556 &cmd0, &cmd1, segdw, flags);
1557 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1558 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1559 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1560 } else {
1561 cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1562 &cmd0, &cmd1, segdw, flags);
1563 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1564 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1565 }
1566 } else if (flags & NIX_TX_NEED_EXT_HDR) {
1567
1568 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1569 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1570 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1571 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1572 vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1573 } else {
1574 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1575 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1576 vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1577 }
1578 } else {
1579
1580 vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1581 vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1582 }
1583}
1584
1585static __rte_always_inline uint16_t
1586cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1587 struct rte_mbuf **tx_pkts, uint16_t pkts,
1588 uint64_t *cmd, const uint16_t flags)
1589{
1590 uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1591 uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1592 uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1593 cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1594 uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1595 uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1596 uint64x2_t senddesc01_w0, senddesc23_w0;
1597 uint64x2_t senddesc01_w1, senddesc23_w1;
1598 uint64x2_t sendext01_w0, sendext23_w0;
1599 uint64x2_t sendext01_w1, sendext23_w1;
1600 uint64x2_t sendmem01_w0, sendmem23_w0;
1601 uint64x2_t sendmem01_w1, sendmem23_w1;
1602 uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1603 uint64x2_t sgdesc01_w0, sgdesc23_w0;
1604 uint64x2_t sgdesc01_w1, sgdesc23_w1;
1605 struct cn10k_eth_txq *txq = tx_queue;
1606 rte_iova_t io_addr = txq->io_addr;
1607 uintptr_t laddr = txq->lmt_base;
1608 uint8_t c_lnum, c_shft, c_loff;
1609 uint64x2_t ltypes01, ltypes23;
1610 uint64x2_t xtmp128, ytmp128;
1611 uint64x2_t xmask01, xmask23;
1612 uintptr_t c_laddr = laddr;
1613 uint8_t lnum, shift, loff;
1614 rte_iova_t c_io_addr;
1615 uint64_t sa_base;
1616 union wdata {
1617 __uint128_t data128;
1618 uint64_t data[2];
1619 } wd;
1620
1621 if (!(flags & NIX_TX_VWQE_F)) {
1622 NIX_XMIT_FC_OR_RETURN(txq, pkts);
1623 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1624 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1625
1626 txq->fc_cache_pkts -= pkts;
1627 } else {
1628 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1629 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1630 }
1631
1632
1633 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1634 for (i = 0; i < pkts; i++)
1635 cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1636 }
1637
1638 if (!(flags & NIX_TX_VWQE_F)) {
1639 senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1640 } else {
1641 uint64_t w0 =
1642 (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1643 ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1644
1645 senddesc01_w0 = vdupq_n_u64(w0);
1646 }
1647 senddesc23_w0 = senddesc01_w0;
1648
1649 senddesc01_w1 = vdupq_n_u64(0);
1650 senddesc23_w1 = senddesc01_w1;
1651 sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1652 sgdesc23_w0 = sgdesc01_w0;
1653
1654 if (flags & NIX_TX_NEED_EXT_HDR) {
1655 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1656 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1657 BIT_ULL(15));
1658 sendmem01_w0 =
1659 vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1660 (NIX_SENDMEMALG_SETTSTMP << 56));
1661 sendmem23_w0 = sendmem01_w0;
1662 sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1663 sendmem23_w1 = sendmem01_w1;
1664 } else {
1665 sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1666 }
1667 sendext23_w0 = sendext01_w0;
1668
1669 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1670 sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1671 else
1672 sendext01_w1 = vdupq_n_u64(0);
1673 sendext23_w1 = sendext01_w1;
1674 }
1675
1676
1677 ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1678 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1679 ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1680 c_io_addr = txq->cpt_io_addr;
1681 sa_base = txq->sa_base;
1682 }
1683
1684 left = pkts;
1685again:
1686
1687 burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1688 cn10k_nix_pkts_per_vec_brst(flags) :
1689 left;
1690 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1691 wd.data128 = 0;
1692 shift = 16;
1693 }
1694 lnum = 0;
1695 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1696 loff = 0;
1697 c_loff = 0;
1698 c_lnum = 0;
1699 c_shft = 16;
1700 }
1701
1702 for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1703 if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1704 burst = i;
1705 break;
1706 }
1707
1708 if (flags & NIX_TX_MULTI_SEG_F) {
1709 uint8_t j;
1710
1711 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1712 struct rte_mbuf *m = tx_pkts[j];
1713
1714
1715 segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1716
1717 segdw[j] += 1 +
1718 !!(flags & NIX_TX_NEED_EXT_HDR) +
1719 !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1720 }
1721
1722
1723 if (lnum + 4 > 32) {
1724 uint8_t ldwords_con = 0, lneeded = 0;
1725 for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1726 ldwords_con += segdw[j];
1727 if (ldwords_con > 8) {
1728 lneeded += 1;
1729 ldwords_con = segdw[j];
1730 }
1731 }
1732 lneeded += 1;
1733 if (lnum + lneeded > 32) {
1734 burst = i;
1735 break;
1736 }
1737 }
1738 }
1739
1740 senddesc01_w0 =
1741 vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1742 sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1743
1744 senddesc23_w0 = senddesc01_w0;
1745 sgdesc23_w0 = sgdesc01_w0;
1746
1747
1748 if (flags & NIX_TX_NEED_EXT_HDR) {
1749 sendext01_w1 = vbicq_u64(sendext01_w1,
1750 vdupq_n_u64(0x3FFFF00FFFF00));
1751 sendext23_w1 = sendext01_w1;
1752 }
1753
1754 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755
1756 sendmem01_w0 = vbicq_u64(sendmem01_w0,
1757 vdupq_n_u64(BIT_ULL(59)));
1758
1759 sendmem01_w1 =
1760 vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1761 sendmem23_w0 = sendmem01_w0;
1762 sendmem23_w1 = sendmem01_w1;
1763 }
1764
1765 if (flags & NIX_TX_OFFLOAD_TSO_F) {
1766
1767 sendext01_w0 = vbicq_u64(sendext01_w0,
1768 vdupq_n_u64(BIT_ULL(14)));
1769 sendext23_w0 = sendext01_w0;
1770 }
1771
1772
1773 mbuf0 = (uint64_t *)tx_pkts[0];
1774 mbuf1 = (uint64_t *)tx_pkts[1];
1775 mbuf2 = (uint64_t *)tx_pkts[2];
1776 mbuf3 = (uint64_t *)tx_pkts[3];
1777
1778 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1779 offsetof(struct rte_mbuf, buf_iova));
1780 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1781 offsetof(struct rte_mbuf, buf_iova));
1782 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1783 offsetof(struct rte_mbuf, buf_iova));
1784 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1785 offsetof(struct rte_mbuf, buf_iova));
1786
1787
1788
1789
1790
1791
1792
1793 dataoff_iova0 = vld1q_u64(mbuf0);
1794 len_olflags0 = vld1q_u64(mbuf0 + 2);
1795 dataoff_iova1 = vld1q_u64(mbuf1);
1796 len_olflags1 = vld1q_u64(mbuf1 + 2);
1797 dataoff_iova2 = vld1q_u64(mbuf2);
1798 len_olflags2 = vld1q_u64(mbuf2 + 2);
1799 dataoff_iova3 = vld1q_u64(mbuf3);
1800 len_olflags3 = vld1q_u64(mbuf3 + 2);
1801
1802
1803 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1804 offsetof(struct rte_mbuf, pool) -
1805 offsetof(struct rte_mbuf, buf_iova));
1806 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1807 offsetof(struct rte_mbuf, pool) -
1808 offsetof(struct rte_mbuf, buf_iova));
1809 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1810 offsetof(struct rte_mbuf, pool) -
1811 offsetof(struct rte_mbuf, buf_iova));
1812 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1813 offsetof(struct rte_mbuf, pool) -
1814 offsetof(struct rte_mbuf, buf_iova));
1815
1816 if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1817 NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1818
1819
1820
1821
1822
1823
1824 asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1825 : [a] "+w"(senddesc01_w1)
1826 : [in] "r"(mbuf0 + 2)
1827 : "memory");
1828
1829 asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1830 : [a] "+w"(senddesc01_w1)
1831 : [in] "r"(mbuf1 + 2)
1832 : "memory");
1833
1834 asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1835 : [b] "+w"(senddesc23_w1)
1836 : [in] "r"(mbuf2 + 2)
1837 : "memory");
1838
1839 asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1840 : [b] "+w"(senddesc23_w1)
1841 : [in] "r"(mbuf3 + 2)
1842 : "memory");
1843
1844
1845 mbuf0 = (uint64_t *)*mbuf0;
1846 mbuf1 = (uint64_t *)*mbuf1;
1847 mbuf2 = (uint64_t *)*mbuf2;
1848 mbuf3 = (uint64_t *)*mbuf3;
1849 } else {
1850
1851 mbuf0 = (uint64_t *)*mbuf0;
1852 mbuf1 = (uint64_t *)*mbuf1;
1853 mbuf2 = (uint64_t *)*mbuf2;
1854 mbuf3 = (uint64_t *)*mbuf3;
1855 }
1856
1857 const uint8x16_t shuf_mask2 = {
1858 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1859 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1860 };
1861 xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1862 ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1863
1864
1865 const uint64x2_t and_mask0 = {
1866 0xFFFFFFFFFFFFFFFF,
1867 0x000000000000FFFF,
1868 };
1869
1870 dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1871 dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1872 dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1873 dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1874
1875
1876
1877
1878
1879 xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1880 ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1881
1882
1883 sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1884 sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1885
1886
1887
1888
1889 sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1890 sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1891 senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1892 senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1893
1894
1895 mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1896 offsetof(struct rte_mempool, pool_id));
1897 mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1898 offsetof(struct rte_mempool, pool_id));
1899 mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1900 offsetof(struct rte_mempool, pool_id));
1901 mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1902 offsetof(struct rte_mempool, pool_id));
1903
1904 if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1905 !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1906
1907
1908
1909
1910
1911 const uint8x16_t tbl = {
1912
1913 0x04,
1914 0x14,
1915 0x24,
1916 0x34,
1917 0x03,
1918 0x13,
1919 0x23,
1920 0x33,
1921 0x02,
1922 0x12,
1923 0x22,
1924 0x32,
1925 0x03,
1926 0x13,
1927
1928
1929 0x23,
1930
1931
1932 0x33,
1933
1934
1935 };
1936
1937
1938 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1939 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1940
1941
1942
1943
1944
1945 senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1946 senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1947
1948
1949
1950
1951
1952 xtmp128 = vshrq_n_u8(xtmp128, 4);
1953 ytmp128 = vshrq_n_u8(ytmp128, 4);
1954
1955
1956
1957
1958 const int8x16_t tshft3 = {
1959 -1, 0, 8, 8, 8, 8, 8, 8,
1960 -1, 0, 8, 8, 8, 8, 8, 8,
1961 };
1962
1963 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1964 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1965
1966
1967 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1968 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1969
1970
1971
1972
1973 const uint8x16_t shuf_mask0 = {
1974 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1975 0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1976 };
1977
1978 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1979 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1980
1981
1982
1983
1984
1985
1986
1987 senddesc01_w1 = vaddq_u8(senddesc01_w1,
1988 vshlq_n_u16(senddesc01_w1, 8));
1989 senddesc23_w1 = vaddq_u8(senddesc23_w1,
1990 vshlq_n_u16(senddesc23_w1, 8));
1991
1992
1993 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1994 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1995 } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1996 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1997
1998
1999
2000
2001
2002 const uint8x16_t tbl = {
2003
2004 0x00,
2005 0x03,
2006 0x02,
2007 0x03,
2008 0x04,
2009 0x00,
2010 0x00,
2011 0x00,
2012
2013
2014 0x00,
2015 0x33,
2016 0x32,
2017 0x33,
2018
2019
2020 0x34,
2021 0x00,
2022
2023
2024 0x00,
2025
2026
2027 0x00,
2028
2029
2030 };
2031
2032
2033 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2034 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2035
2036
2037
2038
2039
2040 const uint8x16_t shuf_mask5 = {
2041 0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2042 0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2043 };
2044 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2045 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2046
2047
2048 const uint64x2_t o_cksum_mask = {
2049 0x1C00020000000000,
2050 0x1C00020000000000,
2051 };
2052
2053 xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2054 ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2055
2056
2057
2058
2059
2060 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2061 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2062
2063
2064
2065
2066 xtmp128 = vshrq_n_u8(xtmp128, 2);
2067 ytmp128 = vshrq_n_u8(ytmp128, 2);
2068
2069
2070
2071
2072 const int8x16_t tshft3 = {
2073 -1, 0, 8, 8, 8, 8, 8, 8,
2074 -1, 0, 8, 8, 8, 8, 8, 8,
2075 };
2076
2077 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2078 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2079
2080
2081 ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2082 ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2083
2084
2085
2086
2087 const uint8x16_t shuf_mask0 = {
2088 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2089 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2090 };
2091
2092 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2093 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2094
2095
2096
2097
2098
2099
2100
2101 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2102 vshlq_n_u16(senddesc01_w1, 8));
2103 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2104 vshlq_n_u16(senddesc23_w1, 8));
2105
2106
2107 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2108 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2109 } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2110 (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2111
2112
2113
2114 const uint8x16x2_t tbl = {{
2115 {
2116
2117 0x04,
2118 0x14,
2119 0x24,
2120 0x34,
2121 0x03,
2122 0x13,
2123
2124
2125 0x23,
2126
2127
2128 0x33,
2129
2130
2131 0x02,
2132 0x12,
2133
2134
2135 0x22,
2136
2137
2138 0x32,
2139
2140
2141 0x03,
2142
2143
2144 0x13,
2145
2146
2147 0x23,
2148
2149
2150 0x33,
2151
2152
2153 },
2154
2155 {
2156
2157 0x00,
2158 0x03,
2159 0x02,
2160 0x03,
2161 0x04,
2162 0x00,
2163 0x00,
2164 0x00,
2165
2166
2167 0x00,
2168 0x33,
2169
2170
2171 0x32,
2172
2173
2174 0x33,
2175
2176
2177 0x34,
2178
2179
2180 0x00,
2181
2182
2183 0x00,
2184
2185
2186 0x00,
2187
2188
2189 },
2190 }};
2191
2192
2193 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2194 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2195
2196
2197
2198
2199
2200 const uint32x4_t tshft_4 = {
2201 1,
2202 0,
2203 1,
2204 0,
2205 };
2206 senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2207 senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2208
2209
2210
2211
2212
2213 const uint8x16_t shuf_mask5 = {
2214 0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2215 0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2216 };
2217 senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2218 senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2219
2220
2221 const uint64x2_t oi_cksum_mask = {
2222 0x1CF0020000000000,
2223 0x1CF0020000000000,
2224 };
2225
2226 xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2227 ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2228
2229
2230
2231
2232
2233 xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2234 ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2235
2236
2237
2238
2239
2240
2241 const int8x16_t tshft5 = {
2242 8, 8, 8, 8, 8, 8, -4, -2,
2243 8, 8, 8, 8, 8, 8, -4, -2,
2244 };
2245
2246 xtmp128 = vshlq_u8(xtmp128, tshft5);
2247 ytmp128 = vshlq_u8(ytmp128, tshft5);
2248
2249
2250
2251
2252 const int8x16_t tshft3 = {
2253 -1, 0, -1, 0, 0, 0, 0, 0,
2254 -1, 0, -1, 0, 0, 0, 0, 0,
2255 };
2256
2257 senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2258 senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2259
2260
2261 const uint64x2_t oi_cksum_mask2 = {
2262 0x1000000000000000,
2263 0x1000000000000000,
2264 };
2265
2266 xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2267 ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2268
2269
2270 ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2271 ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2272
2273
2274
2275
2276
2277 const uint8x16_t shuf_mask0 = {
2278 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2279 0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2280 };
2281
2282 ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2283 ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294 senddesc01_w1 = vaddq_u8(senddesc01_w1,
2295 vshlq_n_u32(senddesc01_w1, 8));
2296 senddesc23_w1 = vaddq_u8(senddesc23_w1,
2297 vshlq_n_u32(senddesc23_w1, 8));
2298
2299
2300 senddesc01_w1 = vaddq_u8(
2301 senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2302 senddesc23_w1 = vaddq_u8(
2303 senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2304
2305
2306 senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2307 senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2308 }
2309
2310 xmask01 = vdupq_n_u64(0);
2311 xmask23 = xmask01;
2312 asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2313 : [a] "+w"(xmask01)
2314 : [in] "r"(mbuf0)
2315 : "memory");
2316
2317 asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2318 : [a] "+w"(xmask01)
2319 : [in] "r"(mbuf1)
2320 : "memory");
2321
2322 asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2323 : [b] "+w"(xmask23)
2324 : [in] "r"(mbuf2)
2325 : "memory");
2326
2327 asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2328 : [b] "+w"(xmask23)
2329 : [in] "r"(mbuf3)
2330 : "memory");
2331 xmask01 = vshlq_n_u64(xmask01, 20);
2332 xmask23 = vshlq_n_u64(xmask23, 20);
2333
2334 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2335 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2336
2337 if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2338
2339 const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2340
2341 const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2342
2343 const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2344
2345 const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2346
2347 uint64x2_t ext01 = {
2348 ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2349 ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2350 ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2351 ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2352 };
2353 uint64x2_t ext23 = {
2354 ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2355 ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2356 ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2357 ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2358 };
2359
2360
2361 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2362 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2363
2364
2365 sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2366 sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2367
2368
2369 xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2370 mlv),
2371 vandq_u64(vtstq_u64(xtmp128, olq),
2372 mlq));
2373 ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2374 mlv),
2375 vandq_u64(vtstq_u64(ytmp128, olq),
2376 mlq));
2377
2378
2379 sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2380 sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2381 }
2382
2383 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2384
2385 const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2386 RTE_MBUF_F_TX_IEEE1588_TMST};
2387
2388 const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2389
2390 const uint64x2_t addr = {0x8, 0x8};
2391
2392 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2393 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2394
2395
2396
2397
2398
2399 xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2400 ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2401
2402
2403
2404
2405 sendmem01_w1 = vaddq_u64(sendmem01_w1,
2406 vandq_u64(xtmp128, addr));
2407 sendmem23_w1 = vaddq_u64(sendmem23_w1,
2408 vandq_u64(ytmp128, addr));
2409
2410 sendmem01_w0 = vorrq_u64(sendmem01_w0,
2411 vandq_u64(xtmp128, alg));
2412 sendmem23_w0 = vorrq_u64(sendmem23_w0,
2413 vandq_u64(ytmp128, alg));
2414
2415 cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2416 cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2417 cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2418 cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2419 }
2420
2421 if (flags & NIX_TX_OFFLOAD_TSO_F) {
2422 const uint64_t lso_fmt = txq->lso_tun_fmt;
2423 uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2424 uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2425
2426
2427 vst1q_u64(sd_w1, senddesc01_w1);
2428 vst1q_u64(sd_w1 + 2, senddesc23_w1);
2429
2430
2431 vst1q_u64(sx_w0, sendext01_w0);
2432 vst1q_u64(sx_w0 + 2, sendext23_w0);
2433
2434
2435 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2437
2438
2439 cn10k_nix_prepare_tso(tx_pkts[0],
2440 (union nix_send_hdr_w1_u *)&sd_w1[0],
2441 (union nix_send_ext_w0_u *)&sx_w0[0],
2442 vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2443
2444 cn10k_nix_prepare_tso(tx_pkts[1],
2445 (union nix_send_hdr_w1_u *)&sd_w1[1],
2446 (union nix_send_ext_w0_u *)&sx_w0[1],
2447 vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2448
2449 cn10k_nix_prepare_tso(tx_pkts[2],
2450 (union nix_send_hdr_w1_u *)&sd_w1[2],
2451 (union nix_send_ext_w0_u *)&sx_w0[2],
2452 vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2453
2454 cn10k_nix_prepare_tso(tx_pkts[3],
2455 (union nix_send_hdr_w1_u *)&sd_w1[3],
2456 (union nix_send_ext_w0_u *)&sx_w0[3],
2457 vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2458
2459 senddesc01_w1 = vld1q_u64(sd_w1);
2460 senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2461
2462 sendext01_w0 = vld1q_u64(sx_w0);
2463 sendext23_w0 = vld1q_u64(sx_w0 + 2);
2464 }
2465
2466 if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2467 !(flags & NIX_TX_MULTI_SEG_F) &&
2468 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2469
2470 xmask01 = vdupq_n_u64(0);
2471 xmask23 = xmask01;
2472
2473
2474 mbuf0 = (uint64_t *)tx_pkts[0];
2475 mbuf1 = (uint64_t *)tx_pkts[1];
2476 mbuf2 = (uint64_t *)tx_pkts[2];
2477 mbuf3 = (uint64_t *)tx_pkts[3];
2478
2479 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2480 vsetq_lane_u64(0x80000, xmask01, 0);
2481 else
2482 RTE_MEMPOOL_CHECK_COOKIES(
2483 ((struct rte_mbuf *)mbuf0)->pool,
2484 (void **)&mbuf0, 1, 0);
2485
2486 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2487 vsetq_lane_u64(0x80000, xmask01, 1);
2488 else
2489 RTE_MEMPOOL_CHECK_COOKIES(
2490 ((struct rte_mbuf *)mbuf1)->pool,
2491 (void **)&mbuf1, 1, 0);
2492
2493 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2494 vsetq_lane_u64(0x80000, xmask23, 0);
2495 else
2496 RTE_MEMPOOL_CHECK_COOKIES(
2497 ((struct rte_mbuf *)mbuf2)->pool,
2498 (void **)&mbuf2, 1, 0);
2499
2500 if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2501 vsetq_lane_u64(0x80000, xmask23, 1);
2502 else
2503 RTE_MEMPOOL_CHECK_COOKIES(
2504 ((struct rte_mbuf *)mbuf3)->pool,
2505 (void **)&mbuf3, 1, 0);
2506 senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2507 senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2508 } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2509 !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2510
2511 mbuf0 = (uint64_t *)tx_pkts[0];
2512 mbuf1 = (uint64_t *)tx_pkts[1];
2513 mbuf2 = (uint64_t *)tx_pkts[2];
2514 mbuf3 = (uint64_t *)tx_pkts[3];
2515
2516
2517
2518
2519 RTE_MEMPOOL_CHECK_COOKIES(
2520 ((struct rte_mbuf *)mbuf0)->pool,
2521 (void **)&mbuf0, 1, 0);
2522
2523 RTE_MEMPOOL_CHECK_COOKIES(
2524 ((struct rte_mbuf *)mbuf1)->pool,
2525 (void **)&mbuf1, 1, 0);
2526
2527 RTE_MEMPOOL_CHECK_COOKIES(
2528 ((struct rte_mbuf *)mbuf2)->pool,
2529 (void **)&mbuf2, 1, 0);
2530
2531 RTE_MEMPOOL_CHECK_COOKIES(
2532 ((struct rte_mbuf *)mbuf3)->pool,
2533 (void **)&mbuf3, 1, 0);
2534 }
2535
2536
2537 cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2538 cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2539 cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2540 cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2541
2542 cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2543 cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2544 cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2545 cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2546
2547 if (flags & NIX_TX_NEED_EXT_HDR) {
2548 cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2549 cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2550 cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2551 cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2552 }
2553
2554 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2555 const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2556 RTE_MBUF_F_TX_SEC_OFFLOAD};
2557 uintptr_t next;
2558 uint8_t dw;
2559
2560
2561 xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2562 ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2563
2564 xtmp128 = vtstq_u64(olf, xtmp128);
2565 ytmp128 = vtstq_u64(olf, ytmp128);
2566
2567
2568 dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2569 if (vgetq_lane_u64(xtmp128, 0))
2570 cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2571 &cmd1[0], &next, c_laddr,
2572 &c_lnum, &c_loff,
2573 &c_shft, sa_base, flags);
2574 else
2575 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2576 &shift, &wd.data128, &next);
2577
2578
2579 cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2580 cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2581 flags);
2582
2583
2584 dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2585 if (vgetq_lane_u64(xtmp128, 1))
2586 cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2587 &cmd1[1], &next, c_laddr,
2588 &c_lnum, &c_loff,
2589 &c_shft, sa_base, flags);
2590 else
2591 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2592 &shift, &wd.data128, &next);
2593
2594
2595 cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2596 cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2597 flags);
2598
2599
2600 dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2601 if (vgetq_lane_u64(ytmp128, 0))
2602 cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2603 &cmd1[2], &next, c_laddr,
2604 &c_lnum, &c_loff,
2605 &c_shft, sa_base, flags);
2606 else
2607 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2608 &shift, &wd.data128, &next);
2609
2610
2611 cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2612 cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2613 flags);
2614
2615
2616 dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2617 if (vgetq_lane_u64(ytmp128, 1))
2618 cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2619 &cmd1[3], &next, c_laddr,
2620 &c_lnum, &c_loff,
2621 &c_shft, sa_base, flags);
2622 else
2623 cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2624 &shift, &wd.data128, &next);
2625
2626
2627 cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2628 cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2629 flags);
2630
2631 } else if (flags & NIX_TX_MULTI_SEG_F) {
2632 uint8_t j;
2633
2634 segdw[4] = 8;
2635 j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2636 cmd2, cmd3, segdw,
2637 (uint64_t *)
2638 LMT_OFF(laddr, lnum,
2639 0),
2640 &wd.data128, &shift,
2641 flags);
2642 lnum += j;
2643 } else if (flags & NIX_TX_NEED_EXT_HDR) {
2644
2645 if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2646 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2647 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2648 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2649 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2650 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2651 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2652 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2653 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2654 lnum += 1;
2655 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2656 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2657 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2658 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2659 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2660 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2661 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2662 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2663 } else {
2664 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2665 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2666 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2667 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2668 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2669 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2670 lnum += 1;
2671 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2672 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2673 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2674 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2675 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2676 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2677 }
2678 lnum += 1;
2679 } else {
2680
2681 vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2682 vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2683 vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2684 vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2685 vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2686 vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2687 vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2688 vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2689 lnum += 1;
2690 }
2691
2692 if (flags & NIX_TX_MULTI_SEG_F) {
2693 tx_pkts[0]->next = NULL;
2694 tx_pkts[1]->next = NULL;
2695 tx_pkts[2]->next = NULL;
2696 tx_pkts[3]->next = NULL;
2697 }
2698
2699 tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2700 }
2701
2702
2703 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2704 lnum = lnum + !!loff;
2705 wd.data128 = wd.data128 |
2706 (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2707 }
2708
2709 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2710 wd.data[0] >>= 16;
2711
2712 if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2713 ws[1] = roc_sso_hws_head_wait(ws[0]);
2714
2715 left -= burst;
2716
2717
2718 if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2719 cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
2720 cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2721 c_shft);
2722 }
2723
2724
2725 if (lnum > 16) {
2726 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2727 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2728
2729 pa = io_addr | (wd.data[0] & 0x7) << 4;
2730 wd.data[0] &= ~0x7ULL;
2731
2732 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2733 wd.data[0] <<= 16;
2734
2735 wd.data[0] |= (15ULL << 12);
2736 wd.data[0] |= (uint64_t)lmt_id;
2737
2738
2739 roc_lmt_submit_steorl(wd.data[0], pa);
2740
2741 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2742 wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2743
2744 pa = io_addr | (wd.data[1] & 0x7) << 4;
2745 wd.data[1] &= ~0x7ULL;
2746
2747 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2748 wd.data[1] <<= 16;
2749
2750 wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2751 wd.data[1] |= (uint64_t)(lmt_id + 16);
2752
2753
2754 roc_lmt_submit_steorl(wd.data[1], pa);
2755 } else if (lnum) {
2756 if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2757 wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2758
2759 pa = io_addr | (wd.data[0] & 0x7) << 4;
2760 wd.data[0] &= ~0x7ULL;
2761
2762 if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2763 wd.data[0] <<= 16;
2764
2765 wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2766 wd.data[0] |= lmt_id;
2767
2768
2769 roc_lmt_submit_steorl(wd.data[0], pa);
2770 }
2771
2772 rte_io_wmb();
2773 if (left)
2774 goto again;
2775
2776 if (unlikely(scalar)) {
2777 if (flags & NIX_TX_MULTI_SEG_F)
2778 pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2779 scalar, cmd, flags);
2780 else
2781 pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2782 scalar, cmd, flags);
2783 }
2784
2785 return pkts;
2786}
2787
2788#else
2789static __rte_always_inline uint16_t
2790cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2791 struct rte_mbuf **tx_pkts, uint16_t pkts,
2792 uint64_t *cmd, const uint16_t flags)
2793{
2794 RTE_SET_USED(ws);
2795 RTE_SET_USED(tx_queue);
2796 RTE_SET_USED(tx_pkts);
2797 RTE_SET_USED(pkts);
2798 RTE_SET_USED(cmd);
2799 RTE_SET_USED(flags);
2800 return 0;
2801}
2802#endif
2803
2804#define L3L4CSUM_F NIX_TX_OFFLOAD_L3_L4_CSUM_F
2805#define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2806#define VLAN_F NIX_TX_OFFLOAD_VLAN_QINQ_F
2807#define NOFF_F NIX_TX_OFFLOAD_MBUF_NOFF_F
2808#define TSO_F NIX_TX_OFFLOAD_TSO_F
2809#define TSP_F NIX_TX_OFFLOAD_TSTAMP_F
2810#define T_SEC_F NIX_TX_OFFLOAD_SECURITY_F
2811
2812
2813#define NIX_TX_FASTPATH_MODES_0_15 \
2814 T(no_offload, 6, NIX_TX_OFFLOAD_NONE) \
2815 T(l3l4csum, 6, L3L4CSUM_F) \
2816 T(ol3ol4csum, 6, OL3OL4CSUM_F) \
2817 T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F) \
2818 T(vlan, 6, VLAN_F) \
2819 T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F) \
2820 T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F) \
2821 T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2822 T(noff, 6, NOFF_F) \
2823 T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F) \
2824 T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F) \
2825 T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2826 T(noff_vlan, 6, NOFF_F | VLAN_F) \
2827 T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F) \
2828 T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2829 T(noff_vlan_ol3ol4csum_l3l4csum, 6, \
2830 NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2831
2832#define NIX_TX_FASTPATH_MODES_16_31 \
2833 T(tso, 6, TSO_F) \
2834 T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F) \
2835 T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F) \
2836 T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2837 T(tso_vlan, 6, TSO_F | VLAN_F) \
2838 T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F) \
2839 T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F) \
2840 T(tso_vlan_ol3ol4csum_l3l4csum, 6, \
2841 TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2842 T(tso_noff, 6, TSO_F | NOFF_F) \
2843 T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F) \
2844 T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F) \
2845 T(tso_noff_ol3ol4csum_l3l4csum, 6, \
2846 TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2847 T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F) \
2848 T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2849 T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2850 T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2851 TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2852
2853#define NIX_TX_FASTPATH_MODES_32_47 \
2854 T(ts, 8, TSP_F) \
2855 T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F) \
2856 T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F) \
2857 T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2858 T(ts_vlan, 8, TSP_F | VLAN_F) \
2859 T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F) \
2860 T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F) \
2861 T(ts_vlan_ol3ol4csum_l3l4csum, 8, \
2862 TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2863 T(ts_noff, 8, TSP_F | NOFF_F) \
2864 T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F) \
2865 T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F) \
2866 T(ts_noff_ol3ol4csum_l3l4csum, 8, \
2867 TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2868 T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F) \
2869 T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2870 T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2871 T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2872 TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2873
2874#define NIX_TX_FASTPATH_MODES_48_63 \
2875 T(ts_tso, 8, TSP_F | TSO_F) \
2876 T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F) \
2877 T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F) \
2878 T(ts_tso_ol3ol4csum_l3l4csum, 8, \
2879 TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2880 T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F) \
2881 T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2882 T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2883 T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2884 TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2885 T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F) \
2886 T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2887 T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2888 T(ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2889 TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2890 T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F) \
2891 T(ts_tso_noff_vlan_l3l4csum, 8, \
2892 TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2893 T(ts_tso_noff_vlan_ol3ol4csum, 8, \
2894 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2895 T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2896 TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2897
2898#define NIX_TX_FASTPATH_MODES_64_79 \
2899 T(sec, 6, T_SEC_F) \
2900 T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F) \
2901 T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F) \
2902 T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2903 T(sec_vlan, 6, T_SEC_F | VLAN_F) \
2904 T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F) \
2905 T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F) \
2906 T(sec_vlan_ol3ol4csum_l3l4csum, 6, \
2907 T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2908 T(sec_noff, 6, T_SEC_F | NOFF_F) \
2909 T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F) \
2910 T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F) \
2911 T(sec_noff_ol3ol4csum_l3l4csum, 6, \
2912 T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2913 T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F) \
2914 T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2915 T(sec_noff_vlan_ol3ol4csum, 6, \
2916 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2917 T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2918 T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2919
2920#define NIX_TX_FASTPATH_MODES_80_95 \
2921 T(sec_tso, 6, T_SEC_F | TSO_F) \
2922 T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F) \
2923 T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F) \
2924 T(sec_tso_ol3ol4csum_l3l4csum, 6, \
2925 T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2926 T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F) \
2927 T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2928 T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2929 T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6, \
2930 T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2931 T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F) \
2932 T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2933 T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2934 T(sec_tso_noff_ol3ol4csum_l3l4csum, 6, \
2935 T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2936 T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F) \
2937 T(sec_tso_noff_vlan_l3l4csum, 6, \
2938 T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2939 T(sec_tso_noff_vlan_ol3ol4csum, 6, \
2940 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2941 T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6, \
2942 T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2943
2944#define NIX_TX_FASTPATH_MODES_96_111 \
2945 T(sec_ts, 8, T_SEC_F | TSP_F) \
2946 T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F) \
2947 T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F) \
2948 T(sec_ts_ol3ol4csum_l3l4csum, 8, \
2949 T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2950 T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F) \
2951 T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F) \
2952 T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F) \
2953 T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8, \
2954 T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2955 T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F) \
2956 T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F) \
2957 T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F) \
2958 T(sec_ts_noff_ol3ol4csum_l3l4csum, 8, \
2959 T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2960 T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F) \
2961 T(sec_ts_noff_vlan_l3l4csum, 8, \
2962 T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2963 T(sec_ts_noff_vlan_ol3ol4csum, 8, \
2964 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2965 T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2966 T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2967
2968#define NIX_TX_FASTPATH_MODES_112_127 \
2969 T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F) \
2970 T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F) \
2971 T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F) \
2972 T(sec_ts_tso_ol3ol4csum_l3l4csum, 8, \
2973 T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2974 T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F) \
2975 T(sec_ts_tso_vlan_l3l4csum, 8, \
2976 T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F) \
2977 T(sec_ts_tso_vlan_ol3ol4csum, 8, \
2978 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2979 T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8, \
2980 T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2981 T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F) \
2982 T(sec_ts_tso_noff_l3l4csum, 8, \
2983 T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F) \
2984 T(sec_ts_tso_noff_ol3ol4csum, 8, \
2985 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2986 T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8, \
2987 T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F) \
2988 T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F) \
2989 T(sec_ts_tso_noff_vlan_l3l4csum, 8, \
2990 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F) \
2991 T(sec_ts_tso_noff_vlan_ol3ol4csum, 8, \
2992 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2993 T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8, \
2994 T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | \
2995 L3L4CSUM_F)
2996
2997#define NIX_TX_FASTPATH_MODES \
2998 NIX_TX_FASTPATH_MODES_0_15 \
2999 NIX_TX_FASTPATH_MODES_16_31 \
3000 NIX_TX_FASTPATH_MODES_32_47 \
3001 NIX_TX_FASTPATH_MODES_48_63 \
3002 NIX_TX_FASTPATH_MODES_64_79 \
3003 NIX_TX_FASTPATH_MODES_80_95 \
3004 NIX_TX_FASTPATH_MODES_96_111 \
3005 NIX_TX_FASTPATH_MODES_112_127
3006
3007#define T(name, sz, flags) \
3008 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name( \
3009 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3010 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name( \
3011 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3012 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name( \
3013 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts); \
3014 uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
3015 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
3016
3017NIX_TX_FASTPATH_MODES
3018#undef T
3019
3020#define NIX_TX_XMIT(fn, sz, flags) \
3021 uint16_t __rte_noinline __rte_hot fn( \
3022 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3023 { \
3024 uint64_t cmd[sz]; \
3025 \
3026 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3027 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3028 return 0; \
3029 return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
3030 flags); \
3031 }
3032
3033#define NIX_TX_XMIT_MSEG(fn, sz, flags) \
3034 uint16_t __rte_noinline __rte_hot fn( \
3035 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3036 { \
3037 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
3038 \
3039 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3040 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3041 return 0; \
3042 return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
3043 cmd, \
3044 flags | NIX_TX_MULTI_SEG_F); \
3045 }
3046
3047#define NIX_TX_XMIT_VEC(fn, sz, flags) \
3048 uint16_t __rte_noinline __rte_hot fn( \
3049 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3050 { \
3051 uint64_t cmd[sz]; \
3052 \
3053 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3054 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3055 return 0; \
3056 return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts, \
3057 pkts, cmd, (flags)); \
3058 }
3059
3060#define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags) \
3061 uint16_t __rte_noinline __rte_hot fn( \
3062 void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts) \
3063 { \
3064 uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2]; \
3065 \
3066 if (((flags) & NIX_TX_OFFLOAD_TSO_F) && \
3067 !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) \
3068 return 0; \
3069 return cn10k_nix_xmit_pkts_vector( \
3070 tx_queue, NULL, tx_pkts, pkts, cmd, \
3071 (flags) | NIX_TX_MULTI_SEG_F); \
3072 }
3073
3074#endif
3075