dpdk/drivers/net/cnxk/cn10k_tx.h
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(C) 2021 Marvell.
   3 */
   4#ifndef __CN10K_TX_H__
   5#define __CN10K_TX_H__
   6
   7#include <rte_vect.h>
   8
   9#include <rte_eventdev.h>
  10
  11#define NIX_TX_OFFLOAD_NONE           (0)
  12#define NIX_TX_OFFLOAD_L3_L4_CSUM_F   BIT(0)
  13#define NIX_TX_OFFLOAD_OL3_OL4_CSUM_F BIT(1)
  14#define NIX_TX_OFFLOAD_VLAN_QINQ_F    BIT(2)
  15#define NIX_TX_OFFLOAD_MBUF_NOFF_F    BIT(3)
  16#define NIX_TX_OFFLOAD_TSO_F          BIT(4)
  17#define NIX_TX_OFFLOAD_TSTAMP_F       BIT(5)
  18#define NIX_TX_OFFLOAD_SECURITY_F     BIT(6)
  19#define NIX_TX_OFFLOAD_MAX            (NIX_TX_OFFLOAD_SECURITY_F << 1)
  20
  21/* Flags to control xmit_prepare function.
  22 * Defining it from backwards to denote its been
  23 * not used as offload flags to pick function
  24 */
  25#define NIX_TX_VWQE_F      BIT(14)
  26#define NIX_TX_MULTI_SEG_F BIT(15)
  27
  28#define NIX_TX_NEED_SEND_HDR_W1                                                \
  29        (NIX_TX_OFFLOAD_L3_L4_CSUM_F | NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |         \
  30         NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)
  31
  32#define NIX_TX_NEED_EXT_HDR                                                    \
  33        (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |                \
  34         NIX_TX_OFFLOAD_TSO_F)
  35
  36#define NIX_XMIT_FC_OR_RETURN(txq, pkts)                                       \
  37        do {                                                                   \
  38                /* Cached value is low, Update the fc_cache_pkts */            \
  39                if (unlikely((txq)->fc_cache_pkts < (pkts))) {                 \
  40                        /* Multiply with sqe_per_sqb to express in pkts */     \
  41                        (txq)->fc_cache_pkts =                                 \
  42                                ((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)      \
  43                                << (txq)->sqes_per_sqb_log2;                   \
  44                        /* Check it again for the room */                      \
  45                        if (unlikely((txq)->fc_cache_pkts < (pkts)))           \
  46                                return 0;                                      \
  47                }                                                              \
  48        } while (0)
  49
  50/* Encoded number of segments to number of dwords macro, each value of nb_segs
  51 * is encoded as 4bits.
  52 */
  53#define NIX_SEGDW_MAGIC 0x76654432210ULL
  54
  55#define NIX_NB_SEGS_TO_SEGDW(x) ((NIX_SEGDW_MAGIC >> ((x) << 2)) & 0xF)
  56
  57/* Function to determine no of tx subdesc required in case ext
  58 * sub desc is enabled.
  59 */
  60static __rte_always_inline int
  61cn10k_nix_tx_ext_subs(const uint16_t flags)
  62{
  63        return (flags & NIX_TX_OFFLOAD_TSTAMP_F) ?
  64                             2 :
  65                             ((flags &
  66                         (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSO_F)) ?
  67                                      1 :
  68                                      0);
  69}
  70
  71static __rte_always_inline uint8_t
  72cn10k_nix_tx_dwords(const uint16_t flags, const uint8_t segdw)
  73{
  74        if (!(flags & NIX_TX_MULTI_SEG_F))
  75                return cn10k_nix_tx_ext_subs(flags) + 2;
  76
  77        /* Already everything is accounted for in segdw */
  78        return segdw;
  79}
  80
  81static __rte_always_inline uint8_t
  82cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
  83{
  84        return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
  85               << ROC_LMT_LINES_PER_CORE_LOG2;
  86}
  87
  88static __rte_always_inline uint8_t
  89cn10k_nix_tx_dwords_per_line(const uint16_t flags)
  90{
  91        return (flags & NIX_TX_NEED_EXT_HDR) ?
  92                             ((flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6) :
  93                             8;
  94}
  95
  96static __rte_always_inline uint64_t
  97cn10k_nix_tx_steor_data(const uint16_t flags)
  98{
  99        const uint64_t dw_m1 = cn10k_nix_tx_ext_subs(flags) + 1;
 100        uint64_t data;
 101
 102        /* This will be moved to addr area */
 103        data = dw_m1;
 104        /* 15 vector sizes for single seg */
 105        data |= dw_m1 << 19;
 106        data |= dw_m1 << 22;
 107        data |= dw_m1 << 25;
 108        data |= dw_m1 << 28;
 109        data |= dw_m1 << 31;
 110        data |= dw_m1 << 34;
 111        data |= dw_m1 << 37;
 112        data |= dw_m1 << 40;
 113        data |= dw_m1 << 43;
 114        data |= dw_m1 << 46;
 115        data |= dw_m1 << 49;
 116        data |= dw_m1 << 52;
 117        data |= dw_m1 << 55;
 118        data |= dw_m1 << 58;
 119        data |= dw_m1 << 61;
 120
 121        return data;
 122}
 123
 124static __rte_always_inline uint8_t
 125cn10k_nix_tx_dwords_per_line_seg(const uint16_t flags)
 126{
 127        return ((flags & NIX_TX_NEED_EXT_HDR) ?
 128                              (flags & NIX_TX_OFFLOAD_TSTAMP_F) ? 8 : 6 :
 129                              4);
 130}
 131
 132static __rte_always_inline uint64_t
 133cn10k_nix_tx_steor_vec_data(const uint16_t flags)
 134{
 135        const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
 136        uint64_t data;
 137
 138        /* This will be moved to addr area */
 139        data = dw_m1;
 140        /* 15 vector sizes for single seg */
 141        data |= dw_m1 << 19;
 142        data |= dw_m1 << 22;
 143        data |= dw_m1 << 25;
 144        data |= dw_m1 << 28;
 145        data |= dw_m1 << 31;
 146        data |= dw_m1 << 34;
 147        data |= dw_m1 << 37;
 148        data |= dw_m1 << 40;
 149        data |= dw_m1 << 43;
 150        data |= dw_m1 << 46;
 151        data |= dw_m1 << 49;
 152        data |= dw_m1 << 52;
 153        data |= dw_m1 << 55;
 154        data |= dw_m1 << 58;
 155        data |= dw_m1 << 61;
 156
 157        return data;
 158}
 159
 160static __rte_always_inline uint64_t
 161cn10k_cpt_tx_steor_data(void)
 162{
 163        /* We have two CPT instructions per LMTLine */
 164        const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
 165        uint64_t data;
 166
 167        /* This will be moved to addr area */
 168        data = dw_m1 << 16;
 169        data |= dw_m1 << 19;
 170        data |= dw_m1 << 22;
 171        data |= dw_m1 << 25;
 172        data |= dw_m1 << 28;
 173        data |= dw_m1 << 31;
 174        data |= dw_m1 << 34;
 175        data |= dw_m1 << 37;
 176        data |= dw_m1 << 40;
 177        data |= dw_m1 << 43;
 178        data |= dw_m1 << 46;
 179        data |= dw_m1 << 49;
 180        data |= dw_m1 << 52;
 181        data |= dw_m1 << 55;
 182        data |= dw_m1 << 58;
 183        data |= dw_m1 << 61;
 184
 185        return data;
 186}
 187
 188static __rte_always_inline void
 189cn10k_nix_tx_skeleton(struct cn10k_eth_txq *txq, uint64_t *cmd,
 190                      const uint16_t flags, const uint16_t static_sz)
 191{
 192        if (static_sz)
 193                cmd[0] = txq->send_hdr_w0;
 194        else
 195                cmd[0] = (txq->send_hdr_w0 & 0xFFFFF00000000000) |
 196                         ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
 197        cmd[1] = 0;
 198
 199        if (flags & NIX_TX_NEED_EXT_HDR) {
 200                if (flags & NIX_TX_OFFLOAD_TSTAMP_F)
 201                        cmd[2] = (NIX_SUBDC_EXT << 60) | BIT_ULL(15);
 202                else
 203                        cmd[2] = NIX_SUBDC_EXT << 60;
 204                cmd[3] = 0;
 205                cmd[4] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
 206        } else {
 207                cmd[2] = (NIX_SUBDC_SG << 60) | BIT_ULL(48);
 208        }
 209}
 210
 211static __rte_always_inline void
 212cn10k_nix_sec_fc_wait(struct cn10k_eth_txq *txq, uint16_t nb_pkts)
 213{
 214        int32_t nb_desc, val, newval;
 215        int32_t *fc_sw;
 216        volatile uint64_t *fc;
 217
 218        /* Check if there is any CPT instruction to submit */
 219        if (!nb_pkts)
 220                return;
 221
 222again:
 223        fc_sw = txq->cpt_fc_sw;
 224        val = __atomic_sub_fetch(fc_sw, nb_pkts, __ATOMIC_RELAXED);
 225        if (likely(val >= 0))
 226                return;
 227
 228        nb_desc = txq->cpt_desc;
 229        fc = txq->cpt_fc;
 230        while (true) {
 231                newval = nb_desc - __atomic_load_n(fc, __ATOMIC_RELAXED);
 232                newval -= nb_pkts;
 233                if (newval >= 0)
 234                        break;
 235        }
 236
 237        if (!__atomic_compare_exchange_n(fc_sw, &val, newval, false,
 238                                         __ATOMIC_RELAXED, __ATOMIC_RELAXED))
 239                goto again;
 240}
 241
 242static __rte_always_inline void
 243cn10k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum,
 244                     uint8_t loff, uint8_t shft)
 245{
 246        uint64_t data;
 247        uintptr_t pa;
 248
 249        /* Check if there is any CPT instruction to submit */
 250        if (!lnum && !loff)
 251                return;
 252
 253        data = cn10k_cpt_tx_steor_data();
 254        /* Update lmtline use for partial end line */
 255        if (loff) {
 256                data &= ~(0x7ULL << shft);
 257                /* Update it to half full i.e 64B */
 258                data |= (0x3UL << shft);
 259        }
 260
 261        pa = io_addr | ((data >> 16) & 0x7) << 4;
 262        data &= ~(0x7ULL << 16);
 263        /* Update lines - 1 that contain valid data */
 264        data |= ((uint64_t)(lnum + loff - 1)) << 12;
 265        data |= lmt_id;
 266
 267        /* STEOR */
 268        roc_lmt_submit_steorl(data, pa);
 269}
 270
 271#if defined(RTE_ARCH_ARM64)
 272static __rte_always_inline void
 273cn10k_nix_prep_sec_vec(struct rte_mbuf *m, uint64x2_t *cmd0, uint64x2_t *cmd1,
 274                       uintptr_t *nixtx_addr, uintptr_t lbase, uint8_t *lnum,
 275                       uint8_t *loff, uint8_t *shft, uint64_t sa_base,
 276                       const uint16_t flags)
 277{
 278        struct cn10k_sec_sess_priv sess_priv;
 279        uint32_t pkt_len, dlen_adj, rlen;
 280        uint8_t l3l4type, chksum;
 281        uint64x2_t cmd01, cmd23;
 282        uint8_t l2_len, l3_len;
 283        uintptr_t dptr, nixtx;
 284        uint64_t ucode_cmd[4];
 285        uint64_t *laddr;
 286        uint16_t tag;
 287        uint64_t sa;
 288
 289        sess_priv.u64 = *rte_security_dynfield(m);
 290
 291        if (flags & NIX_TX_NEED_SEND_HDR_W1) {
 292                /* Extract l3l4type either from il3il4type or ol3ol4type */
 293                if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
 294                    flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
 295                        l2_len = vgetq_lane_u8(*cmd0, 10);
 296                        /* L4 ptr from send hdr includes l2 and l3 len */
 297                        l3_len = vgetq_lane_u8(*cmd0, 11) - l2_len;
 298                        l3l4type = vgetq_lane_u8(*cmd0, 13);
 299                } else {
 300                        l2_len = vgetq_lane_u8(*cmd0, 8);
 301                        /* L4 ptr from send hdr includes l2 and l3 len */
 302                        l3_len = vgetq_lane_u8(*cmd0, 9) - l2_len;
 303                        l3l4type = vgetq_lane_u8(*cmd0, 12);
 304                }
 305
 306                chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
 307                chksum = ~chksum;
 308                sess_priv.chksum = sess_priv.chksum & chksum;
 309                /* Clear SEND header flags */
 310                *cmd0 = vsetq_lane_u16(0, *cmd0, 6);
 311        } else {
 312                l2_len = m->l2_len;
 313                l3_len = m->l3_len;
 314        }
 315
 316        /* Retrieve DPTR */
 317        dptr = vgetq_lane_u64(*cmd1, 1);
 318        pkt_len = vgetq_lane_u16(*cmd0, 0);
 319
 320        /* Calculate dlen adj */
 321        dlen_adj = pkt_len - l2_len;
 322        /* Exclude l3 len from roundup for transport mode */
 323        dlen_adj -= sess_priv.mode ? 0 : l3_len;
 324        rlen = (dlen_adj + sess_priv.roundup_len) +
 325               (sess_priv.roundup_byte - 1);
 326        rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
 327        rlen += sess_priv.partial_len;
 328        dlen_adj = rlen - dlen_adj;
 329
 330        /* Update send descriptors. Security is single segment only */
 331        *cmd0 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd0, 0);
 332        *cmd1 = vsetq_lane_u16(pkt_len + dlen_adj, *cmd1, 0);
 333
 334        /* Get area where NIX descriptor needs to be stored */
 335        nixtx = dptr + pkt_len + dlen_adj;
 336        nixtx += BIT_ULL(7);
 337        nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
 338
 339        /* Return nixtx addr */
 340        *nixtx_addr = (nixtx + 16);
 341
 342        /* DLEN passed is excluding L2HDR */
 343        pkt_len -= l2_len;
 344        tag = sa_base & 0xFFFFUL;
 345        sa_base &= ~0xFFFFUL;
 346        sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
 347        ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
 348        ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
 349                        ((uint64_t)sess_priv.chksum) << 32 |
 350                        ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
 351
 352        /* CPT Word 0 and Word 1 */
 353        cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
 354        /* CPT_RES_S is 16B above NIXTX */
 355        cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
 356
 357        /* CPT word 2 and 3 */
 358        cmd23 = vdupq_n_u64(0);
 359        cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
 360                                CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
 361        cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
 362
 363        dptr += l2_len;
 364
 365        if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
 366                if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
 367                        *((uint16_t *)(dptr - 2)) =
 368                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 369                else
 370                        *((uint16_t *)(dptr - 2)) =
 371                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
 372        }
 373
 374        ucode_cmd[1] = dptr;
 375        ucode_cmd[2] = dptr;
 376
 377        /* Move to our line */
 378        laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
 379
 380        /* Write CPT instruction to lmt line */
 381        vst1q_u64(laddr, cmd01);
 382        vst1q_u64((laddr + 2), cmd23);
 383
 384        *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
 385        *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
 386
 387        /* Move to next line for every other CPT inst */
 388        *loff = !(*loff);
 389        *lnum = *lnum + (*loff ? 0 : 1);
 390        *shft = *shft + (*loff ? 0 : 3);
 391}
 392
 393static __rte_always_inline void
 394cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
 395                   uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
 396                   uint64_t sa_base, const uint16_t flags)
 397{
 398        struct cn10k_sec_sess_priv sess_priv;
 399        uint32_t pkt_len, dlen_adj, rlen;
 400        struct nix_send_hdr_s *send_hdr;
 401        uint8_t l3l4type, chksum;
 402        uint64x2_t cmd01, cmd23;
 403        union nix_send_sg_s *sg;
 404        uint8_t l2_len, l3_len;
 405        uintptr_t dptr, nixtx;
 406        uint64_t ucode_cmd[4];
 407        uint64_t *laddr;
 408        uint16_t tag;
 409        uint64_t sa;
 410
 411        /* Move to our line from base */
 412        sess_priv.u64 = *rte_security_dynfield(m);
 413        send_hdr = (struct nix_send_hdr_s *)cmd;
 414        if (flags & NIX_TX_NEED_EXT_HDR)
 415                sg = (union nix_send_sg_s *)&cmd[4];
 416        else
 417                sg = (union nix_send_sg_s *)&cmd[2];
 418
 419        if (flags & NIX_TX_NEED_SEND_HDR_W1) {
 420                /* Extract l3l4type either from il3il4type or ol3ol4type */
 421                if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F &&
 422                    flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
 423                        l2_len = (cmd[1] >> 16) & 0xFF;
 424                        /* L4 ptr from send hdr includes l2 and l3 len */
 425                        l3_len = ((cmd[1] >> 24) & 0xFF) - l2_len;
 426                        l3l4type = (cmd[1] >> 40) & 0xFF;
 427                } else {
 428                        l2_len = cmd[1] & 0xFF;
 429                        /* L4 ptr from send hdr includes l2 and l3 len */
 430                        l3_len = ((cmd[1] >> 8) & 0xFF) - l2_len;
 431                        l3l4type = (cmd[1] >> 32) & 0xFF;
 432                }
 433
 434                chksum = (l3l4type & 0x1) << 1 | !!(l3l4type & 0x30);
 435                chksum = ~chksum;
 436                sess_priv.chksum = sess_priv.chksum & chksum;
 437                /* Clear SEND header flags */
 438                cmd[1] &= ~(0xFFFFUL << 32);
 439        } else {
 440                l2_len = m->l2_len;
 441                l3_len = m->l3_len;
 442        }
 443
 444        /* Retrieve DPTR */
 445        dptr = *(uint64_t *)(sg + 1);
 446        pkt_len = send_hdr->w0.total;
 447
 448        /* Calculate dlen adj */
 449        dlen_adj = pkt_len - l2_len;
 450        /* Exclude l3 len from roundup for transport mode */
 451        dlen_adj -= sess_priv.mode ? 0 : l3_len;
 452        rlen = (dlen_adj + sess_priv.roundup_len) +
 453               (sess_priv.roundup_byte - 1);
 454        rlen &= ~(uint64_t)(sess_priv.roundup_byte - 1);
 455        rlen += sess_priv.partial_len;
 456        dlen_adj = rlen - dlen_adj;
 457
 458        /* Update send descriptors. Security is single segment only */
 459        send_hdr->w0.total = pkt_len + dlen_adj;
 460        sg->seg1_size = pkt_len + dlen_adj;
 461
 462        /* Get area where NIX descriptor needs to be stored */
 463        nixtx = dptr + pkt_len + dlen_adj;
 464        nixtx += BIT_ULL(7);
 465        nixtx = (nixtx - 1) & ~(BIT_ULL(7) - 1);
 466
 467        /* Return nixtx addr */
 468        *nixtx_addr = (nixtx + 16);
 469
 470        /* DLEN passed is excluding L2HDR */
 471        pkt_len -= l2_len;
 472        tag = sa_base & 0xFFFFUL;
 473        sa_base &= ~0xFFFFUL;
 474        sa = (uintptr_t)roc_nix_inl_ot_ipsec_outb_sa(sa_base, sess_priv.sa_idx);
 475        ucode_cmd[3] = (ROC_CPT_DFLT_ENG_GRP_SE_IE << 61 | 1UL << 60 | sa);
 476        ucode_cmd[0] = (ROC_IE_OT_MAJOR_OP_PROCESS_OUTBOUND_IPSEC << 48 |
 477                        ((uint64_t)sess_priv.chksum) << 32 |
 478                        ((uint64_t)sess_priv.dec_ttl) << 34 | pkt_len);
 479
 480        /* CPT Word 0 and Word 1. Assume no multi-seg support */
 481        cmd01 = vdupq_n_u64((nixtx + 16) | (cn10k_nix_tx_ext_subs(flags) + 1));
 482        /* CPT_RES_S is 16B above NIXTX */
 483        cmd01 = vsetq_lane_u8(nixtx & BIT_ULL(7), cmd01, 8);
 484
 485        /* CPT word 2 and 3 */
 486        cmd23 = vdupq_n_u64(0);
 487        cmd23 = vsetq_lane_u64((((uint64_t)RTE_EVENT_TYPE_CPU << 28) | tag |
 488                                CNXK_ETHDEV_SEC_OUTB_EV_SUB << 20), cmd23, 0);
 489        cmd23 = vsetq_lane_u64((uintptr_t)m | 1, cmd23, 1);
 490
 491        dptr += l2_len;
 492
 493        if (sess_priv.mode == ROC_IE_SA_MODE_TUNNEL) {
 494                if (sess_priv.outer_ip_ver == ROC_IE_SA_IP_VERSION_4)
 495                        *((uint16_t *)(dptr - 2)) =
 496                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 497                else
 498                        *((uint16_t *)(dptr - 2)) =
 499                                rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV6);
 500        }
 501        ucode_cmd[1] = dptr;
 502        ucode_cmd[2] = dptr;
 503
 504        /* Move to our line */
 505        laddr = LMT_OFF(lbase, *lnum, *loff ? 64 : 0);
 506
 507        /* Write CPT instruction to lmt line */
 508        vst1q_u64(laddr, cmd01);
 509        vst1q_u64((laddr + 2), cmd23);
 510
 511        *(__uint128_t *)(laddr + 4) = *(__uint128_t *)ucode_cmd;
 512        *(__uint128_t *)(laddr + 6) = *(__uint128_t *)(ucode_cmd + 2);
 513
 514        /* Move to next line for every other CPT inst */
 515        *loff = !(*loff);
 516        *lnum = *lnum + (*loff ? 0 : 1);
 517        *shft = *shft + (*loff ? 0 : 3);
 518}
 519
 520#else
 521
 522static __rte_always_inline void
 523cn10k_nix_prep_sec(struct rte_mbuf *m, uint64_t *cmd, uintptr_t *nixtx_addr,
 524                   uintptr_t lbase, uint8_t *lnum, uint8_t *loff, uint8_t *shft,
 525                   uint64_t sa_base, const uint16_t flags)
 526{
 527        RTE_SET_USED(m);
 528        RTE_SET_USED(cmd);
 529        RTE_SET_USED(nixtx_addr);
 530        RTE_SET_USED(lbase);
 531        RTE_SET_USED(lnum);
 532        RTE_SET_USED(loff);
 533        RTE_SET_USED(shft);
 534        RTE_SET_USED(sa_base);
 535        RTE_SET_USED(flags);
 536}
 537#endif
 538
 539static __rte_always_inline void
 540cn10k_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 541{
 542        uint64_t mask, ol_flags = m->ol_flags;
 543
 544        if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
 545                uintptr_t mdata = rte_pktmbuf_mtod(m, uintptr_t);
 546                uint16_t *iplen, *oiplen, *oudplen;
 547                uint16_t lso_sb, paylen;
 548
 549                mask = -!!(ol_flags & (RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IPV6));
 550                lso_sb = (mask & (m->outer_l2_len + m->outer_l3_len)) +
 551                         m->l2_len + m->l3_len + m->l4_len;
 552
 553                /* Reduce payload len from base headers */
 554                paylen = m->pkt_len - lso_sb;
 555
 556                /* Get iplen position assuming no tunnel hdr */
 557                iplen = (uint16_t *)(mdata + m->l2_len +
 558                                     (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
 559                /* Handle tunnel tso */
 560                if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
 561                    (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
 562                        const uint8_t is_udp_tun =
 563                                (CNXK_NIX_UDP_TUN_BITMASK >>
 564                                 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
 565                                0x1;
 566
 567                        oiplen = (uint16_t *)(mdata + m->outer_l2_len +
 568                                              (2 << !!(ol_flags &
 569                                                       RTE_MBUF_F_TX_OUTER_IPV6)));
 570                        *oiplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*oiplen) -
 571                                                   paylen);
 572
 573                        /* Update format for UDP tunneled packet */
 574                        if (is_udp_tun) {
 575                                oudplen = (uint16_t *)(mdata + m->outer_l2_len +
 576                                                       m->outer_l3_len + 4);
 577                                *oudplen = rte_cpu_to_be_16(
 578                                        rte_be_to_cpu_16(*oudplen) - paylen);
 579                        }
 580
 581                        /* Update iplen position to inner ip hdr */
 582                        iplen = (uint16_t *)(mdata + lso_sb - m->l3_len -
 583                                             m->l4_len +
 584                                             (2 << !!(ol_flags & RTE_MBUF_F_TX_IPV6)));
 585                }
 586
 587                *iplen = rte_cpu_to_be_16(rte_be_to_cpu_16(*iplen) - paylen);
 588        }
 589}
 590
 591static __rte_always_inline void
 592cn10k_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
 593                       const uint64_t lso_tun_fmt, bool *sec, uint8_t mark_flag,
 594                       uint64_t mark_fmt)
 595{
 596        uint8_t mark_off = 0, mark_vlan = 0, markptr = 0;
 597        struct nix_send_ext_s *send_hdr_ext;
 598        struct nix_send_hdr_s *send_hdr;
 599        uint64_t ol_flags = 0, mask;
 600        union nix_send_hdr_w1_u w1;
 601        union nix_send_sg_s *sg;
 602        uint16_t mark_form = 0;
 603
 604        send_hdr = (struct nix_send_hdr_s *)cmd;
 605        if (flags & NIX_TX_NEED_EXT_HDR) {
 606                send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
 607                sg = (union nix_send_sg_s *)(cmd + 4);
 608                /* Clear previous markings */
 609                send_hdr_ext->w0.lso = 0;
 610                send_hdr_ext->w0.mark_en = 0;
 611                send_hdr_ext->w1.u = 0;
 612                ol_flags = m->ol_flags;
 613        } else {
 614                sg = (union nix_send_sg_s *)(cmd + 2);
 615        }
 616
 617        if (flags & (NIX_TX_NEED_SEND_HDR_W1 | NIX_TX_OFFLOAD_SECURITY_F)) {
 618                ol_flags = m->ol_flags;
 619                w1.u = 0;
 620        }
 621
 622        if (!(flags & NIX_TX_MULTI_SEG_F))
 623                send_hdr->w0.total = m->data_len;
 624        else
 625                send_hdr->w0.total = m->pkt_len;
 626        send_hdr->w0.aura = roc_npa_aura_handle_to_aura(m->pool->pool_id);
 627
 628        /*
 629         * L3type:  2 => IPV4
 630         *          3 => IPV4 with csum
 631         *          4 => IPV6
 632         * L3type and L3ptr needs to be set for either
 633         * L3 csum or L4 csum or LSO
 634         *
 635         */
 636
 637        if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
 638            (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
 639                const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
 640                const uint8_t ol3type =
 641                        ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
 642                        ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
 643                        !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
 644
 645                /* Outer L3 */
 646                w1.ol3type = ol3type;
 647                mask = 0xffffull << ((!!ol3type) << 4);
 648                w1.ol3ptr = ~mask & m->outer_l2_len;
 649                w1.ol4ptr = ~mask & (w1.ol3ptr + m->outer_l3_len);
 650
 651                /* Outer L4 */
 652                w1.ol4type = csum + (csum << 1);
 653
 654                /* Inner L3 */
 655                w1.il3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
 656                             ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2);
 657                w1.il3ptr = w1.ol4ptr + m->l2_len;
 658                w1.il4ptr = w1.il3ptr + m->l3_len;
 659                /* Increment it by 1 if it is IPV4 as 3 is with csum */
 660                w1.il3type = w1.il3type + !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
 661
 662                /* Inner L4 */
 663                w1.il4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
 664
 665                /* In case of no tunnel header use only
 666                 * shift IL3/IL4 fields a bit to use
 667                 * OL3/OL4 for header checksum
 668                 */
 669                mask = !ol3type;
 670                w1.u = ((w1.u & 0xFFFFFFFF00000000) >> (mask << 3)) |
 671                       ((w1.u & 0X00000000FFFFFFFF) >> (mask << 4));
 672
 673        } else if (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) {
 674                const uint8_t csum = !!(ol_flags & RTE_MBUF_F_TX_OUTER_UDP_CKSUM);
 675                const uint8_t outer_l2_len = m->outer_l2_len;
 676
 677                /* Outer L3 */
 678                w1.ol3ptr = outer_l2_len;
 679                w1.ol4ptr = outer_l2_len + m->outer_l3_len;
 680                /* Increment it by 1 if it is IPV4 as 3 is with csum */
 681                w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV4)) << 1) +
 682                             ((!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6)) << 2) +
 683                             !!(ol_flags & RTE_MBUF_F_TX_OUTER_IP_CKSUM);
 684
 685                /* Outer L4 */
 686                w1.ol4type = csum + (csum << 1);
 687
 688        } else if (flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) {
 689                const uint8_t l2_len = m->l2_len;
 690
 691                /* Always use OLXPTR and OLXTYPE when only
 692                 * when one header is present
 693                 */
 694
 695                /* Inner L3 */
 696                w1.ol3ptr = l2_len;
 697                w1.ol4ptr = l2_len + m->l3_len;
 698                /* Increment it by 1 if it is IPV4 as 3 is with csum */
 699                w1.ol3type = ((!!(ol_flags & RTE_MBUF_F_TX_IPV4)) << 1) +
 700                             ((!!(ol_flags & RTE_MBUF_F_TX_IPV6)) << 2) +
 701                             !!(ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
 702
 703                /* Inner L4 */
 704                w1.ol4type = (ol_flags & RTE_MBUF_F_TX_L4_MASK) >> 52;
 705        }
 706
 707        if (flags & NIX_TX_NEED_EXT_HDR && flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
 708                const uint8_t ipv6 = !!(ol_flags & RTE_MBUF_F_TX_IPV6);
 709                const uint8_t ip = !!(ol_flags & (RTE_MBUF_F_TX_IPV4 |
 710                                                  RTE_MBUF_F_TX_IPV6));
 711
 712                send_hdr_ext->w1.vlan1_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_VLAN);
 713                /* HW will update ptr after vlan0 update */
 714                send_hdr_ext->w1.vlan1_ins_ptr = 12;
 715                send_hdr_ext->w1.vlan1_ins_tci = m->vlan_tci;
 716
 717                send_hdr_ext->w1.vlan0_ins_ena = !!(ol_flags & RTE_MBUF_F_TX_QINQ);
 718                /* 2B before end of l2 header */
 719                send_hdr_ext->w1.vlan0_ins_ptr = 12;
 720                send_hdr_ext->w1.vlan0_ins_tci = m->vlan_tci_outer;
 721                /* Fill for VLAN marking only when VLAN insertion enabled */
 722                mark_vlan = ((mark_flag & CNXK_TM_MARK_VLAN_DEI) &
 723                             (send_hdr_ext->w1.vlan1_ins_ena ||
 724                              send_hdr_ext->w1.vlan0_ins_ena));
 725
 726                /* Mask requested flags with packet data information */
 727                mark_off = mark_flag & ((ip << 2) | (ip << 1) | mark_vlan);
 728                mark_off = ffs(mark_off & CNXK_TM_MARK_MASK);
 729
 730                mark_form = (mark_fmt >> ((mark_off - !!mark_off) << 4));
 731                mark_form = (mark_form >> (ipv6 << 3)) & 0xFF;
 732                markptr = m->l2_len + (mark_form >> 7) - (mark_vlan << 2);
 733
 734                send_hdr_ext->w0.mark_en = !!mark_off;
 735                send_hdr_ext->w0.markform = mark_form & 0x7F;
 736                send_hdr_ext->w0.markptr = markptr;
 737        }
 738
 739        if (flags & NIX_TX_OFFLOAD_TSO_F && (ol_flags & RTE_MBUF_F_TX_TCP_SEG)) {
 740                uint16_t lso_sb;
 741                uint64_t mask;
 742
 743                mask = -(!w1.il3type);
 744                lso_sb = (mask & w1.ol4ptr) + (~mask & w1.il4ptr) + m->l4_len;
 745
 746                send_hdr_ext->w0.lso_sb = lso_sb;
 747                send_hdr_ext->w0.lso = 1;
 748                send_hdr_ext->w0.lso_mps = m->tso_segsz;
 749                send_hdr_ext->w0.lso_format =
 750                        NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
 751                w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
 752
 753                /* Handle tunnel tso */
 754                if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
 755                    (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
 756                        const uint8_t is_udp_tun =
 757                                (CNXK_NIX_UDP_TUN_BITMASK >>
 758                                 ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
 759                                0x1;
 760                        uint8_t shift = is_udp_tun ? 32 : 0;
 761
 762                        shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
 763                        shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
 764
 765                        w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
 766                        w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
 767                        /* Update format for UDP tunneled packet */
 768                        send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
 769                }
 770        }
 771
 772        if (flags & NIX_TX_NEED_SEND_HDR_W1)
 773                send_hdr->w1.u = w1.u;
 774
 775        if (!(flags & NIX_TX_MULTI_SEG_F)) {
 776                sg->seg1_size = send_hdr->w0.total;
 777                *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
 778
 779                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
 780                        /* DF bit = 1 if refcount of current mbuf or parent mbuf
 781                         *              is greater than 1
 782                         * DF bit = 0 otherwise
 783                         */
 784                        send_hdr->w0.df = cnxk_nix_prefree_seg(m);
 785                }
 786                /* Mark mempool object as "put" since it is freed by NIX */
 787                if (!send_hdr->w0.df)
 788                        RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
 789        } else {
 790                sg->seg1_size = m->data_len;
 791                *(rte_iova_t *)(sg + 1) = rte_mbuf_data_iova(m);
 792
 793                /* NOFF is handled later for multi-seg */
 794        }
 795
 796        if (flags & NIX_TX_OFFLOAD_SECURITY_F)
 797                *sec = !!(ol_flags & RTE_MBUF_F_TX_SEC_OFFLOAD);
 798}
 799
 800static __rte_always_inline void
 801cn10k_nix_xmit_mv_lmt_base(uintptr_t lmt_addr, uint64_t *cmd,
 802                           const uint16_t flags)
 803{
 804        struct nix_send_ext_s *send_hdr_ext;
 805        union nix_send_sg_s *sg;
 806
 807        /* With minimal offloads, 'cmd' being local could be optimized out to
 808         * registers. In other cases, 'cmd' will be in stack. Intent is
 809         * 'cmd' stores content from txq->cmd which is copied only once.
 810         */
 811        *((struct nix_send_hdr_s *)lmt_addr) = *(struct nix_send_hdr_s *)cmd;
 812        lmt_addr += 16;
 813        if (flags & NIX_TX_NEED_EXT_HDR) {
 814                send_hdr_ext = (struct nix_send_ext_s *)(cmd + 2);
 815                *((struct nix_send_ext_s *)lmt_addr) = *send_hdr_ext;
 816                lmt_addr += 16;
 817
 818                sg = (union nix_send_sg_s *)(cmd + 4);
 819        } else {
 820                sg = (union nix_send_sg_s *)(cmd + 2);
 821        }
 822        /* In case of multi-seg, sg template is stored here */
 823        *((union nix_send_sg_s *)lmt_addr) = *sg;
 824        *(rte_iova_t *)(lmt_addr + 8) = *(rte_iova_t *)(sg + 1);
 825}
 826
 827static __rte_always_inline void
 828cn10k_nix_xmit_prepare_tstamp(struct cn10k_eth_txq *txq, uintptr_t lmt_addr,
 829                              const uint64_t ol_flags, const uint16_t no_segdw,
 830                              const uint16_t flags)
 831{
 832        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
 833                const uint8_t is_ol_tstamp =
 834                        !(ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST);
 835                uint64_t *lmt = (uint64_t *)lmt_addr;
 836                uint16_t off = (no_segdw - 1) << 1;
 837                struct nix_send_mem_s *send_mem;
 838
 839                send_mem = (struct nix_send_mem_s *)(lmt + off);
 840                /* Packets for which PKT_TX_IEEE1588_TMST is not set, tx tstamp
 841                 * should not be recorded, hence changing the alg type to
 842                 * NIX_SENDMEMALG_SUB and also changing send mem addr field to
 843                 * next 8 bytes as it corrupts the actual Tx tstamp registered
 844                 * address.
 845                 */
 846                send_mem->w0.subdc = NIX_SUBDC_MEM;
 847                send_mem->w0.alg =
 848                        NIX_SENDMEMALG_SETTSTMP + (is_ol_tstamp << 3);
 849                send_mem->addr =
 850                        (rte_iova_t)(((uint64_t *)txq->ts_mem) + is_ol_tstamp);
 851        }
 852}
 853
 854static __rte_always_inline uint16_t
 855cn10k_nix_prepare_mseg(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 856{
 857        struct nix_send_hdr_s *send_hdr;
 858        union nix_send_sg_s *sg;
 859        struct rte_mbuf *m_next;
 860        uint64_t *slist, sg_u;
 861        uint64_t nb_segs;
 862        uint64_t segdw;
 863        uint8_t off, i;
 864
 865        send_hdr = (struct nix_send_hdr_s *)cmd;
 866
 867        if (flags & NIX_TX_NEED_EXT_HDR)
 868                off = 2;
 869        else
 870                off = 0;
 871
 872        sg = (union nix_send_sg_s *)&cmd[2 + off];
 873
 874        /* Start from second segment, first segment is already there */
 875        i = 1;
 876        sg_u = sg->u;
 877        nb_segs = m->nb_segs - 1;
 878        m_next = m->next;
 879        slist = &cmd[3 + off + 1];
 880
 881        /* Set invert df if buffer is not to be freed by H/W */
 882        if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
 883                sg_u |= (cnxk_nix_prefree_seg(m) << 55);
 884
 885                /* Mark mempool object as "put" since it is freed by NIX */
 886#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 887        if (!(sg_u & (1ULL << 55)))
 888                RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
 889        rte_io_wmb();
 890#endif
 891        m = m_next;
 892        if (!m)
 893                goto done;
 894
 895        /* Fill mbuf segments */
 896        do {
 897                m_next = m->next;
 898                sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
 899                *slist = rte_mbuf_data_iova(m);
 900                /* Set invert df if buffer is not to be freed by H/W */
 901                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
 902                        sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
 903                        /* Mark mempool object as "put" since it is freed by NIX
 904                         */
 905#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
 906                if (!(sg_u & (1ULL << (i + 55))))
 907                        RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
 908#endif
 909                slist++;
 910                i++;
 911                nb_segs--;
 912                if (i > 2 && nb_segs) {
 913                        i = 0;
 914                        /* Next SG subdesc */
 915                        *(uint64_t *)slist = sg_u & 0xFC00000000000000;
 916                        sg->u = sg_u;
 917                        sg->segs = 3;
 918                        sg = (union nix_send_sg_s *)slist;
 919                        sg_u = sg->u;
 920                        slist++;
 921                }
 922                m = m_next;
 923        } while (nb_segs);
 924
 925done:
 926        sg->u = sg_u;
 927        sg->segs = i;
 928        segdw = (uint64_t *)slist - (uint64_t *)&cmd[2 + off];
 929        /* Roundup extra dwords to multiple of 2 */
 930        segdw = (segdw >> 1) + (segdw & 0x1);
 931        /* Default dwords */
 932        segdw += (off >> 1) + 1 + !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
 933        send_hdr->w0.sizem1 = segdw - 1;
 934
 935        return segdw;
 936}
 937
 938static __rte_always_inline uint16_t
 939cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 940                    uint16_t pkts, uint64_t *cmd, const uint16_t flags)
 941{
 942        struct cn10k_eth_txq *txq = tx_queue;
 943        const rte_iova_t io_addr = txq->io_addr;
 944        uint8_t lnum, c_lnum, c_shft, c_loff;
 945        uintptr_t pa, lbase = txq->lmt_base;
 946        uint16_t lmt_id, burst, left, i;
 947        uintptr_t c_lbase = lbase;
 948        uint64_t lso_tun_fmt = 0;
 949        uint64_t mark_fmt = 0;
 950        uint8_t mark_flag = 0;
 951        rte_iova_t c_io_addr;
 952        uint16_t c_lmt_id;
 953        uint64_t sa_base;
 954        uintptr_t laddr;
 955        uint64_t data;
 956        bool sec;
 957
 958        if (!(flags & NIX_TX_VWQE_F)) {
 959                NIX_XMIT_FC_OR_RETURN(txq, pkts);
 960                /* Reduce the cached count */
 961                txq->fc_cache_pkts -= pkts;
 962        }
 963        /* Get cmd skeleton */
 964        cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
 965
 966        if (flags & NIX_TX_OFFLOAD_TSO_F)
 967                lso_tun_fmt = txq->lso_tun_fmt;
 968
 969        if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
 970                mark_fmt = txq->mark_fmt;
 971                mark_flag = txq->mark_flag;
 972        }
 973
 974        /* Get LMT base address and LMT ID as lcore id */
 975        ROC_LMT_BASE_ID_GET(lbase, lmt_id);
 976        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
 977                ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
 978                c_io_addr = txq->cpt_io_addr;
 979                sa_base = txq->sa_base;
 980        }
 981
 982        left = pkts;
 983again:
 984        burst = left > 32 ? 32 : left;
 985
 986        lnum = 0;
 987        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
 988                c_lnum = 0;
 989                c_loff = 0;
 990                c_shft = 16;
 991        }
 992
 993        for (i = 0; i < burst; i++) {
 994                /* Perform header writes for TSO, barrier at
 995                 * lmt steorl will suffice.
 996                 */
 997                if (flags & NIX_TX_OFFLOAD_TSO_F)
 998                        cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
 999
1000                cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1001                                       &sec, mark_flag, mark_fmt);
1002
1003                laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1004
1005                /* Prepare CPT instruction and get nixtx addr */
1006                if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1007                        cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1008                                           &c_lnum, &c_loff, &c_shft, sa_base,
1009                                           flags);
1010
1011                /* Move NIX desc to LMT/NIXTX area */
1012                cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1013                cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1014                                              4, flags);
1015                if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec)
1016                        lnum++;
1017        }
1018
1019        if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1020                ws[1] = roc_sso_hws_head_wait(ws[0]);
1021
1022        left -= burst;
1023        tx_pkts += burst;
1024
1025        /* Submit CPT instructions if any */
1026        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1027                /* Reduce pkts to be sent to CPT */
1028                burst -= ((c_lnum << 1) + c_loff);
1029                cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1030                cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1031                                     c_shft);
1032        }
1033
1034        /* Trigger LMTST */
1035        if (burst > 16) {
1036                data = cn10k_nix_tx_steor_data(flags);
1037                pa = io_addr | (data & 0x7) << 4;
1038                data &= ~0x7ULL;
1039                data |= (15ULL << 12);
1040                data |= (uint64_t)lmt_id;
1041
1042                /* STEOR0 */
1043                roc_lmt_submit_steorl(data, pa);
1044
1045                data = cn10k_nix_tx_steor_data(flags);
1046                pa = io_addr | (data & 0x7) << 4;
1047                data &= ~0x7ULL;
1048                data |= ((uint64_t)(burst - 17)) << 12;
1049                data |= (uint64_t)(lmt_id + 16);
1050
1051                /* STEOR1 */
1052                roc_lmt_submit_steorl(data, pa);
1053        } else if (burst) {
1054                data = cn10k_nix_tx_steor_data(flags);
1055                pa = io_addr | (data & 0x7) << 4;
1056                data &= ~0x7ULL;
1057                data |= ((uint64_t)(burst - 1)) << 12;
1058                data |= lmt_id;
1059
1060                /* STEOR0 */
1061                roc_lmt_submit_steorl(data, pa);
1062        }
1063
1064        rte_io_wmb();
1065        if (left)
1066                goto again;
1067
1068        return pkts;
1069}
1070
1071static __rte_always_inline uint16_t
1072cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
1073                         struct rte_mbuf **tx_pkts, uint16_t pkts,
1074                         uint64_t *cmd, const uint16_t flags)
1075{
1076        struct cn10k_eth_txq *txq = tx_queue;
1077        uintptr_t pa0, pa1, lbase = txq->lmt_base;
1078        const rte_iova_t io_addr = txq->io_addr;
1079        uint16_t segdw, lmt_id, burst, left, i;
1080        uint8_t lnum, c_lnum, c_loff;
1081        uintptr_t c_lbase = lbase;
1082        uint64_t lso_tun_fmt = 0;
1083        uint64_t mark_fmt = 0;
1084        uint8_t mark_flag = 0;
1085        uint64_t data0, data1;
1086        rte_iova_t c_io_addr;
1087        uint8_t shft, c_shft;
1088        __uint128_t data128;
1089        uint16_t c_lmt_id;
1090        uint64_t sa_base;
1091        uintptr_t laddr;
1092        bool sec;
1093
1094        if (!(flags & NIX_TX_VWQE_F)) {
1095                NIX_XMIT_FC_OR_RETURN(txq, pkts);
1096                /* Reduce the cached count */
1097                txq->fc_cache_pkts -= pkts;
1098        }
1099        /* Get cmd skeleton */
1100        cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
1101
1102        if (flags & NIX_TX_OFFLOAD_TSO_F)
1103                lso_tun_fmt = txq->lso_tun_fmt;
1104
1105        if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
1106                mark_fmt = txq->mark_fmt;
1107                mark_flag = txq->mark_flag;
1108        }
1109
1110        /* Get LMT base address and LMT ID as lcore id */
1111        ROC_LMT_BASE_ID_GET(lbase, lmt_id);
1112        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1113                ROC_LMT_CPT_BASE_ID_GET(c_lbase, c_lmt_id);
1114                c_io_addr = txq->cpt_io_addr;
1115                sa_base = txq->sa_base;
1116        }
1117
1118        left = pkts;
1119again:
1120        burst = left > 32 ? 32 : left;
1121        shft = 16;
1122        data128 = 0;
1123
1124        lnum = 0;
1125        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1126                c_lnum = 0;
1127                c_loff = 0;
1128                c_shft = 16;
1129        }
1130
1131        for (i = 0; i < burst; i++) {
1132                /* Perform header writes for TSO, barrier at
1133                 * lmt steorl will suffice.
1134                 */
1135                if (flags & NIX_TX_OFFLOAD_TSO_F)
1136                        cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1137
1138                cn10k_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt,
1139                                       &sec, mark_flag, mark_fmt);
1140
1141                laddr = (uintptr_t)LMT_OFF(lbase, lnum, 0);
1142
1143                /* Prepare CPT instruction and get nixtx addr */
1144                if (flags & NIX_TX_OFFLOAD_SECURITY_F && sec)
1145                        cn10k_nix_prep_sec(tx_pkts[i], cmd, &laddr, c_lbase,
1146                                           &c_lnum, &c_loff, &c_shft, sa_base,
1147                                           flags);
1148
1149                /* Move NIX desc to LMT/NIXTX area */
1150                cn10k_nix_xmit_mv_lmt_base(laddr, cmd, flags);
1151                /* Store sg list directly on lmt line */
1152                segdw = cn10k_nix_prepare_mseg(tx_pkts[i], (uint64_t *)laddr,
1153                                               flags);
1154                cn10k_nix_xmit_prepare_tstamp(txq, laddr, tx_pkts[i]->ol_flags,
1155                                              segdw, flags);
1156                if (!(flags & NIX_TX_OFFLOAD_SECURITY_F) || !sec) {
1157                        lnum++;
1158                        data128 |= (((__uint128_t)(segdw - 1)) << shft);
1159                        shft += 3;
1160                }
1161        }
1162
1163        if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
1164                ws[1] = roc_sso_hws_head_wait(ws[0]);
1165
1166        left -= burst;
1167        tx_pkts += burst;
1168
1169        /* Submit CPT instructions if any */
1170        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1171                /* Reduce pkts to be sent to CPT */
1172                burst -= ((c_lnum << 1) + c_loff);
1173                cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
1174                cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
1175                                     c_shft);
1176        }
1177
1178        data0 = (uint64_t)data128;
1179        data1 = (uint64_t)(data128 >> 64);
1180        /* Make data0 similar to data1 */
1181        data0 >>= 16;
1182        /* Trigger LMTST */
1183        if (burst > 16) {
1184                pa0 = io_addr | (data0 & 0x7) << 4;
1185                data0 &= ~0x7ULL;
1186                /* Move lmtst1..15 sz to bits 63:19 */
1187                data0 <<= 16;
1188                data0 |= (15ULL << 12);
1189                data0 |= (uint64_t)lmt_id;
1190
1191                /* STEOR0 */
1192                roc_lmt_submit_steorl(data0, pa0);
1193
1194                pa1 = io_addr | (data1 & 0x7) << 4;
1195                data1 &= ~0x7ULL;
1196                data1 <<= 16;
1197                data1 |= ((uint64_t)(burst - 17)) << 12;
1198                data1 |= (uint64_t)(lmt_id + 16);
1199
1200                /* STEOR1 */
1201                roc_lmt_submit_steorl(data1, pa1);
1202        } else if (burst) {
1203                pa0 = io_addr | (data0 & 0x7) << 4;
1204                data0 &= ~0x7ULL;
1205                /* Move lmtst1..15 sz to bits 63:19 */
1206                data0 <<= 16;
1207                data0 |= ((burst - 1) << 12);
1208                data0 |= (uint64_t)lmt_id;
1209
1210                /* STEOR0 */
1211                roc_lmt_submit_steorl(data0, pa0);
1212        }
1213
1214        rte_io_wmb();
1215        if (left)
1216                goto again;
1217
1218        return pkts;
1219}
1220
1221#if defined(RTE_ARCH_ARM64)
1222
1223static __rte_always_inline void
1224cn10k_nix_prepare_tso(struct rte_mbuf *m, union nix_send_hdr_w1_u *w1,
1225                      union nix_send_ext_w0_u *w0, uint64_t ol_flags,
1226                      const uint64_t flags, const uint64_t lso_tun_fmt)
1227{
1228        uint16_t lso_sb;
1229        uint64_t mask;
1230
1231        if (!(ol_flags & RTE_MBUF_F_TX_TCP_SEG))
1232                return;
1233
1234        mask = -(!w1->il3type);
1235        lso_sb = (mask & w1->ol4ptr) + (~mask & w1->il4ptr) + m->l4_len;
1236
1237        w0->u |= BIT(14);
1238        w0->lso_sb = lso_sb;
1239        w0->lso_mps = m->tso_segsz;
1240        w0->lso_format = NIX_LSO_FORMAT_IDX_TSOV4 + !!(ol_flags & RTE_MBUF_F_TX_IPV6);
1241        w1->ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
1242
1243        /* Handle tunnel tso */
1244        if ((flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F) &&
1245            (ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK)) {
1246                const uint8_t is_udp_tun =
1247                        (CNXK_NIX_UDP_TUN_BITMASK >>
1248                         ((ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) >> 45)) &
1249                        0x1;
1250                uint8_t shift = is_udp_tun ? 32 : 0;
1251
1252                shift += (!!(ol_flags & RTE_MBUF_F_TX_OUTER_IPV6) << 4);
1253                shift += (!!(ol_flags & RTE_MBUF_F_TX_IPV6) << 3);
1254
1255                w1->il4type = NIX_SENDL4TYPE_TCP_CKSUM;
1256                w1->ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
1257                /* Update format for UDP tunneled packet */
1258
1259                w0->lso_format = (lso_tun_fmt >> shift);
1260        }
1261}
1262
1263static __rte_always_inline void
1264cn10k_nix_prepare_mseg_vec_list(struct rte_mbuf *m, uint64_t *cmd,
1265                                union nix_send_hdr_w0_u *sh,
1266                                union nix_send_sg_s *sg, const uint32_t flags)
1267{
1268        struct rte_mbuf *m_next;
1269        uint64_t *slist, sg_u;
1270        uint16_t nb_segs;
1271        int i = 1;
1272
1273        sh->total = m->pkt_len;
1274        /* Clear sg->u header before use */
1275        sg->u &= 0xFC00000000000000;
1276        sg_u = sg->u;
1277        slist = &cmd[0];
1278
1279        sg_u = sg_u | ((uint64_t)m->data_len);
1280
1281        nb_segs = m->nb_segs - 1;
1282        m_next = m->next;
1283
1284        /* Set invert df if buffer is not to be freed by H/W */
1285        if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1286                sg_u |= (cnxk_nix_prefree_seg(m) << 55);
1287                /* Mark mempool object as "put" since it is freed by NIX */
1288#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1289        if (!(sg_u & (1ULL << 55)))
1290                RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1291        rte_io_wmb();
1292#endif
1293
1294        m = m_next;
1295        /* Fill mbuf segments */
1296        do {
1297                m_next = m->next;
1298                sg_u = sg_u | ((uint64_t)m->data_len << (i << 4));
1299                *slist = rte_mbuf_data_iova(m);
1300                /* Set invert df if buffer is not to be freed by H/W */
1301                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F)
1302                        sg_u |= (cnxk_nix_prefree_seg(m) << (i + 55));
1303                        /* Mark mempool object as "put" since it is freed by NIX
1304                         */
1305#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1306                if (!(sg_u & (1ULL << (i + 55))))
1307                        RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1308                rte_io_wmb();
1309#endif
1310                slist++;
1311                i++;
1312                nb_segs--;
1313                if (i > 2 && nb_segs) {
1314                        i = 0;
1315                        /* Next SG subdesc */
1316                        *(uint64_t *)slist = sg_u & 0xFC00000000000000;
1317                        sg->u = sg_u;
1318                        sg->segs = 3;
1319                        sg = (union nix_send_sg_s *)slist;
1320                        sg_u = sg->u;
1321                        slist++;
1322                }
1323                m = m_next;
1324        } while (nb_segs);
1325
1326        sg->u = sg_u;
1327        sg->segs = i;
1328}
1329
1330static __rte_always_inline void
1331cn10k_nix_prepare_mseg_vec(struct rte_mbuf *m, uint64_t *cmd, uint64x2_t *cmd0,
1332                           uint64x2_t *cmd1, const uint8_t segdw,
1333                           const uint32_t flags)
1334{
1335        union nix_send_hdr_w0_u sh;
1336        union nix_send_sg_s sg;
1337
1338        if (m->nb_segs == 1) {
1339                if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
1340                        sg.u = vgetq_lane_u64(cmd1[0], 0);
1341                        sg.u |= (cnxk_nix_prefree_seg(m) << 55);
1342                        cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1343                }
1344
1345#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1346                sg.u = vgetq_lane_u64(cmd1[0], 0);
1347                if (!(sg.u & (1ULL << 55)))
1348                        RTE_MEMPOOL_CHECK_COOKIES(m->pool, (void **)&m, 1, 0);
1349                rte_io_wmb();
1350#endif
1351                return;
1352        }
1353
1354        sh.u = vgetq_lane_u64(cmd0[0], 0);
1355        sg.u = vgetq_lane_u64(cmd1[0], 0);
1356
1357        cn10k_nix_prepare_mseg_vec_list(m, cmd, &sh, &sg, flags);
1358
1359        sh.sizem1 = segdw - 1;
1360        cmd0[0] = vsetq_lane_u64(sh.u, cmd0[0], 0);
1361        cmd1[0] = vsetq_lane_u64(sg.u, cmd1[0], 0);
1362}
1363
1364#define NIX_DESCS_PER_LOOP 4
1365
1366static __rte_always_inline uint8_t
1367cn10k_nix_prep_lmt_mseg_vector(struct rte_mbuf **mbufs, uint64x2_t *cmd0,
1368                               uint64x2_t *cmd1, uint64x2_t *cmd2,
1369                               uint64x2_t *cmd3, uint8_t *segdw,
1370                               uint64_t *lmt_addr, __uint128_t *data128,
1371                               uint8_t *shift, const uint16_t flags)
1372{
1373        uint8_t j, off, lmt_used;
1374
1375        if (!(flags & NIX_TX_NEED_EXT_HDR) &&
1376            !(flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1377                /* No segments in 4 consecutive packets. */
1378                if ((segdw[0] + segdw[1] + segdw[2] + segdw[3]) <= 8) {
1379                        for (j = 0; j < NIX_DESCS_PER_LOOP; j++)
1380                                cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1381                                                           &cmd0[j], &cmd1[j],
1382                                                           segdw[j], flags);
1383                        vst1q_u64(lmt_addr, cmd0[0]);
1384                        vst1q_u64(lmt_addr + 2, cmd1[0]);
1385                        vst1q_u64(lmt_addr + 4, cmd0[1]);
1386                        vst1q_u64(lmt_addr + 6, cmd1[1]);
1387                        vst1q_u64(lmt_addr + 8, cmd0[2]);
1388                        vst1q_u64(lmt_addr + 10, cmd1[2]);
1389                        vst1q_u64(lmt_addr + 12, cmd0[3]);
1390                        vst1q_u64(lmt_addr + 14, cmd1[3]);
1391
1392                        *data128 |= ((__uint128_t)7) << *shift;
1393                        *shift += 3;
1394
1395                        return 1;
1396                }
1397        }
1398
1399        lmt_used = 0;
1400        for (j = 0; j < NIX_DESCS_PER_LOOP;) {
1401                /* Fit consecutive packets in same LMTLINE. */
1402                if ((segdw[j] + segdw[j + 1]) <= 8) {
1403                        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1404                                cn10k_nix_prepare_mseg_vec(mbufs[j], NULL,
1405                                                           &cmd0[j], &cmd1[j],
1406                                                           segdw[j], flags);
1407                                cn10k_nix_prepare_mseg_vec(mbufs[j + 1], NULL,
1408                                                           &cmd0[j + 1],
1409                                                           &cmd1[j + 1],
1410                                                           segdw[j + 1], flags);
1411                                /* TSTAMP takes 4 each, no segs. */
1412                                vst1q_u64(lmt_addr, cmd0[j]);
1413                                vst1q_u64(lmt_addr + 2, cmd2[j]);
1414                                vst1q_u64(lmt_addr + 4, cmd1[j]);
1415                                vst1q_u64(lmt_addr + 6, cmd3[j]);
1416
1417                                vst1q_u64(lmt_addr + 8, cmd0[j + 1]);
1418                                vst1q_u64(lmt_addr + 10, cmd2[j + 1]);
1419                                vst1q_u64(lmt_addr + 12, cmd1[j + 1]);
1420                                vst1q_u64(lmt_addr + 14, cmd3[j + 1]);
1421                        } else if (flags & NIX_TX_NEED_EXT_HDR) {
1422                                /* EXT header take 3 each, space for 2 segs.*/
1423                                cn10k_nix_prepare_mseg_vec(mbufs[j],
1424                                                           lmt_addr + 6,
1425                                                           &cmd0[j], &cmd1[j],
1426                                                           segdw[j], flags);
1427                                vst1q_u64(lmt_addr, cmd0[j]);
1428                                vst1q_u64(lmt_addr + 2, cmd2[j]);
1429                                vst1q_u64(lmt_addr + 4, cmd1[j]);
1430                                off = segdw[j] - 3;
1431                                off <<= 1;
1432                                cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1433                                                           lmt_addr + 12 + off,
1434                                                           &cmd0[j + 1],
1435                                                           &cmd1[j + 1],
1436                                                           segdw[j + 1], flags);
1437                                vst1q_u64(lmt_addr + 6 + off, cmd0[j + 1]);
1438                                vst1q_u64(lmt_addr + 8 + off, cmd2[j + 1]);
1439                                vst1q_u64(lmt_addr + 10 + off, cmd1[j + 1]);
1440                        } else {
1441                                cn10k_nix_prepare_mseg_vec(mbufs[j],
1442                                                           lmt_addr + 4,
1443                                                           &cmd0[j], &cmd1[j],
1444                                                           segdw[j], flags);
1445                                vst1q_u64(lmt_addr, cmd0[j]);
1446                                vst1q_u64(lmt_addr + 2, cmd1[j]);
1447                                off = segdw[j] - 2;
1448                                off <<= 1;
1449                                cn10k_nix_prepare_mseg_vec(mbufs[j + 1],
1450                                                           lmt_addr + 8 + off,
1451                                                           &cmd0[j + 1],
1452                                                           &cmd1[j + 1],
1453                                                           segdw[j + 1], flags);
1454                                vst1q_u64(lmt_addr + 4 + off, cmd0[j + 1]);
1455                                vst1q_u64(lmt_addr + 6 + off, cmd1[j + 1]);
1456                        }
1457                        *data128 |= ((__uint128_t)(segdw[j] + segdw[j + 1]) - 1)
1458                                    << *shift;
1459                        *shift += 3;
1460                        j += 2;
1461                } else {
1462                        if ((flags & NIX_TX_NEED_EXT_HDR) &&
1463                            (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1464                                cn10k_nix_prepare_mseg_vec(mbufs[j],
1465                                                           lmt_addr + 6,
1466                                                           &cmd0[j], &cmd1[j],
1467                                                           segdw[j], flags);
1468                                vst1q_u64(lmt_addr, cmd0[j]);
1469                                vst1q_u64(lmt_addr + 2, cmd2[j]);
1470                                vst1q_u64(lmt_addr + 4, cmd1[j]);
1471                                off = segdw[j] - 4;
1472                                off <<= 1;
1473                                vst1q_u64(lmt_addr + 6 + off, cmd3[j]);
1474                        } else if (flags & NIX_TX_NEED_EXT_HDR) {
1475                                cn10k_nix_prepare_mseg_vec(mbufs[j],
1476                                                           lmt_addr + 6,
1477                                                           &cmd0[j], &cmd1[j],
1478                                                           segdw[j], flags);
1479                                vst1q_u64(lmt_addr, cmd0[j]);
1480                                vst1q_u64(lmt_addr + 2, cmd2[j]);
1481                                vst1q_u64(lmt_addr + 4, cmd1[j]);
1482                        } else {
1483                                cn10k_nix_prepare_mseg_vec(mbufs[j],
1484                                                           lmt_addr + 4,
1485                                                           &cmd0[j], &cmd1[j],
1486                                                           segdw[j], flags);
1487                                vst1q_u64(lmt_addr, cmd0[j]);
1488                                vst1q_u64(lmt_addr + 2, cmd1[j]);
1489                        }
1490                        *data128 |= ((__uint128_t)(segdw[j]) - 1) << *shift;
1491                        *shift += 3;
1492                        j++;
1493                }
1494                lmt_used++;
1495                lmt_addr += 16;
1496        }
1497
1498        return lmt_used;
1499}
1500
1501static __rte_always_inline void
1502cn10k_nix_lmt_next(uint8_t dw, uintptr_t laddr, uint8_t *lnum, uint8_t *loff,
1503                   uint8_t *shift, __uint128_t *data128, uintptr_t *next)
1504{
1505        /* Go to next line if we are out of space */
1506        if ((*loff + (dw << 4)) > 128) {
1507                *data128 = *data128 |
1508                           (((__uint128_t)((*loff >> 4) - 1)) << *shift);
1509                *shift = *shift + 3;
1510                *loff = 0;
1511                *lnum = *lnum + 1;
1512        }
1513
1514        *next = (uintptr_t)LMT_OFF(laddr, *lnum, *loff);
1515        *loff = *loff + (dw << 4);
1516}
1517
1518static __rte_always_inline void
1519cn10k_nix_xmit_store(struct rte_mbuf *mbuf, uint8_t segdw, uintptr_t laddr,
1520                     uint64x2_t cmd0, uint64x2_t cmd1, uint64x2_t cmd2,
1521                     uint64x2_t cmd3, const uint16_t flags)
1522{
1523        uint8_t off;
1524
1525        /* Handle no fast free when security is enabled without mseg */
1526        if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
1527            (flags & NIX_TX_OFFLOAD_SECURITY_F) &&
1528            !(flags & NIX_TX_MULTI_SEG_F)) {
1529                union nix_send_sg_s sg;
1530
1531                sg.u = vgetq_lane_u64(cmd1, 0);
1532                sg.u |= (cnxk_nix_prefree_seg(mbuf) << 55);
1533                cmd1 = vsetq_lane_u64(sg.u, cmd1, 0);
1534
1535#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
1536                sg.u = vgetq_lane_u64(cmd1, 0);
1537                if (!(sg.u & (1ULL << 55)))
1538                        RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1,
1539                                                0);
1540                rte_io_wmb();
1541#endif
1542        }
1543        if (flags & NIX_TX_MULTI_SEG_F) {
1544                if ((flags & NIX_TX_NEED_EXT_HDR) &&
1545                    (flags & NIX_TX_OFFLOAD_TSTAMP_F)) {
1546                        cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1547                                                   &cmd0, &cmd1, segdw, flags);
1548                        vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1549                        vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1550                        vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1551                        off = segdw - 4;
1552                        off <<= 4;
1553                        vst1q_u64(LMT_OFF(laddr, 0, 48 + off), cmd3);
1554                } else if (flags & NIX_TX_NEED_EXT_HDR) {
1555                        cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 48),
1556                                                   &cmd0, &cmd1, segdw, flags);
1557                        vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1558                        vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1559                        vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1560                } else {
1561                        cn10k_nix_prepare_mseg_vec(mbuf, LMT_OFF(laddr, 0, 32),
1562                                                   &cmd0, &cmd1, segdw, flags);
1563                        vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1564                        vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1565                }
1566        } else if (flags & NIX_TX_NEED_EXT_HDR) {
1567                /* Store the prepared send desc to LMT lines */
1568                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1569                        vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1570                        vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1571                        vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1572                        vst1q_u64(LMT_OFF(laddr, 0, 48), cmd3);
1573                } else {
1574                        vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1575                        vst1q_u64(LMT_OFF(laddr, 0, 16), cmd2);
1576                        vst1q_u64(LMT_OFF(laddr, 0, 32), cmd1);
1577                }
1578        } else {
1579                /* Store the prepared send desc to LMT lines */
1580                vst1q_u64(LMT_OFF(laddr, 0, 0), cmd0);
1581                vst1q_u64(LMT_OFF(laddr, 0, 16), cmd1);
1582        }
1583}
1584
1585static __rte_always_inline uint16_t
1586cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
1587                           struct rte_mbuf **tx_pkts, uint16_t pkts,
1588                           uint64_t *cmd, const uint16_t flags)
1589{
1590        uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
1591        uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
1592        uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
1593                cmd2[NIX_DESCS_PER_LOOP], cmd3[NIX_DESCS_PER_LOOP];
1594        uint16_t left, scalar, burst, i, lmt_id, c_lmt_id;
1595        uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, pa;
1596        uint64x2_t senddesc01_w0, senddesc23_w0;
1597        uint64x2_t senddesc01_w1, senddesc23_w1;
1598        uint64x2_t sendext01_w0, sendext23_w0;
1599        uint64x2_t sendext01_w1, sendext23_w1;
1600        uint64x2_t sendmem01_w0, sendmem23_w0;
1601        uint64x2_t sendmem01_w1, sendmem23_w1;
1602        uint8_t segdw[NIX_DESCS_PER_LOOP + 1];
1603        uint64x2_t sgdesc01_w0, sgdesc23_w0;
1604        uint64x2_t sgdesc01_w1, sgdesc23_w1;
1605        struct cn10k_eth_txq *txq = tx_queue;
1606        rte_iova_t io_addr = txq->io_addr;
1607        uintptr_t laddr = txq->lmt_base;
1608        uint8_t c_lnum, c_shft, c_loff;
1609        uint64x2_t ltypes01, ltypes23;
1610        uint64x2_t xtmp128, ytmp128;
1611        uint64x2_t xmask01, xmask23;
1612        uintptr_t c_laddr = laddr;
1613        uint8_t lnum, shift, loff;
1614        rte_iova_t c_io_addr;
1615        uint64_t sa_base;
1616        union wdata {
1617                __uint128_t data128;
1618                uint64_t data[2];
1619        } wd;
1620
1621        if (!(flags & NIX_TX_VWQE_F)) {
1622                NIX_XMIT_FC_OR_RETURN(txq, pkts);
1623                scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1624                pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1625                /* Reduce the cached count */
1626                txq->fc_cache_pkts -= pkts;
1627        } else {
1628                scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
1629                pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
1630        }
1631
1632        /* Perform header writes before barrier for TSO */
1633        if (flags & NIX_TX_OFFLOAD_TSO_F) {
1634                for (i = 0; i < pkts; i++)
1635                        cn10k_nix_xmit_prepare_tso(tx_pkts[i], flags);
1636        }
1637
1638        if (!(flags & NIX_TX_VWQE_F)) {
1639                senddesc01_w0 = vld1q_dup_u64(&txq->send_hdr_w0);
1640        } else {
1641                uint64_t w0 =
1642                        (txq->send_hdr_w0 & 0xFFFFF00000000000) |
1643                        ((uint64_t)(cn10k_nix_tx_ext_subs(flags) + 1) << 40);
1644
1645                senddesc01_w0 = vdupq_n_u64(w0);
1646        }
1647        senddesc23_w0 = senddesc01_w0;
1648
1649        senddesc01_w1 = vdupq_n_u64(0);
1650        senddesc23_w1 = senddesc01_w1;
1651        sgdesc01_w0 = vdupq_n_u64((NIX_SUBDC_SG << 60) | BIT_ULL(48));
1652        sgdesc23_w0 = sgdesc01_w0;
1653
1654        if (flags & NIX_TX_NEED_EXT_HDR) {
1655                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1656                        sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60) |
1657                                                   BIT_ULL(15));
1658                        sendmem01_w0 =
1659                                vdupq_n_u64((NIX_SUBDC_MEM << 60) |
1660                                            (NIX_SENDMEMALG_SETTSTMP << 56));
1661                        sendmem23_w0 = sendmem01_w0;
1662                        sendmem01_w1 = vdupq_n_u64(txq->ts_mem);
1663                        sendmem23_w1 = sendmem01_w1;
1664                } else {
1665                        sendext01_w0 = vdupq_n_u64((NIX_SUBDC_EXT << 60));
1666                }
1667                sendext23_w0 = sendext01_w0;
1668
1669                if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F)
1670                        sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
1671                else
1672                        sendext01_w1 = vdupq_n_u64(0);
1673                sendext23_w1 = sendext01_w1;
1674        }
1675
1676        /* Get LMT base address and LMT ID as lcore id */
1677        ROC_LMT_BASE_ID_GET(laddr, lmt_id);
1678        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1679                ROC_LMT_CPT_BASE_ID_GET(c_laddr, c_lmt_id);
1680                c_io_addr = txq->cpt_io_addr;
1681                sa_base = txq->sa_base;
1682        }
1683
1684        left = pkts;
1685again:
1686        /* Number of packets to prepare depends on offloads enabled. */
1687        burst = left > cn10k_nix_pkts_per_vec_brst(flags) ?
1688                              cn10k_nix_pkts_per_vec_brst(flags) :
1689                              left;
1690        if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)) {
1691                wd.data128 = 0;
1692                shift = 16;
1693        }
1694        lnum = 0;
1695        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
1696                loff = 0;
1697                c_loff = 0;
1698                c_lnum = 0;
1699                c_shft = 16;
1700        }
1701
1702        for (i = 0; i < burst; i += NIX_DESCS_PER_LOOP) {
1703                if (flags & NIX_TX_OFFLOAD_SECURITY_F && c_lnum + 2 > 16) {
1704                        burst = i;
1705                        break;
1706                }
1707
1708                if (flags & NIX_TX_MULTI_SEG_F) {
1709                        uint8_t j;
1710
1711                        for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1712                                struct rte_mbuf *m = tx_pkts[j];
1713
1714                                /* Get dwords based on nb_segs. */
1715                                segdw[j] = NIX_NB_SEGS_TO_SEGDW(m->nb_segs);
1716                                /* Add dwords based on offloads. */
1717                                segdw[j] += 1 + /* SEND HDR */
1718                                            !!(flags & NIX_TX_NEED_EXT_HDR) +
1719                                            !!(flags & NIX_TX_OFFLOAD_TSTAMP_F);
1720                        }
1721
1722                        /* Check if there are enough LMTLINES for this loop */
1723                        if (lnum + 4 > 32) {
1724                                uint8_t ldwords_con = 0, lneeded = 0;
1725                                for (j = 0; j < NIX_DESCS_PER_LOOP; j++) {
1726                                        ldwords_con += segdw[j];
1727                                        if (ldwords_con > 8) {
1728                                                lneeded += 1;
1729                                                ldwords_con = segdw[j];
1730                                        }
1731                                }
1732                                lneeded += 1;
1733                                if (lnum + lneeded > 32) {
1734                                        burst = i;
1735                                        break;
1736                                }
1737                        }
1738                }
1739                /* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
1740                senddesc01_w0 =
1741                        vbicq_u64(senddesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1742                sgdesc01_w0 = vbicq_u64(sgdesc01_w0, vdupq_n_u64(0xFFFFFFFF));
1743
1744                senddesc23_w0 = senddesc01_w0;
1745                sgdesc23_w0 = sgdesc01_w0;
1746
1747                /* Clear vlan enables. */
1748                if (flags & NIX_TX_NEED_EXT_HDR) {
1749                        sendext01_w1 = vbicq_u64(sendext01_w1,
1750                                                 vdupq_n_u64(0x3FFFF00FFFF00));
1751                        sendext23_w1 = sendext01_w1;
1752                }
1753
1754                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
1755                        /* Reset send mem alg to SETTSTMP from SUB*/
1756                        sendmem01_w0 = vbicq_u64(sendmem01_w0,
1757                                                 vdupq_n_u64(BIT_ULL(59)));
1758                        /* Reset send mem address to default. */
1759                        sendmem01_w1 =
1760                                vbicq_u64(sendmem01_w1, vdupq_n_u64(0xF));
1761                        sendmem23_w0 = sendmem01_w0;
1762                        sendmem23_w1 = sendmem01_w1;
1763                }
1764
1765                if (flags & NIX_TX_OFFLOAD_TSO_F) {
1766                        /* Clear the LSO enable bit. */
1767                        sendext01_w0 = vbicq_u64(sendext01_w0,
1768                                                 vdupq_n_u64(BIT_ULL(14)));
1769                        sendext23_w0 = sendext01_w0;
1770                }
1771
1772                /* Move mbufs to iova */
1773                mbuf0 = (uint64_t *)tx_pkts[0];
1774                mbuf1 = (uint64_t *)tx_pkts[1];
1775                mbuf2 = (uint64_t *)tx_pkts[2];
1776                mbuf3 = (uint64_t *)tx_pkts[3];
1777
1778                mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1779                                     offsetof(struct rte_mbuf, buf_iova));
1780                mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1781                                     offsetof(struct rte_mbuf, buf_iova));
1782                mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1783                                     offsetof(struct rte_mbuf, buf_iova));
1784                mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1785                                     offsetof(struct rte_mbuf, buf_iova));
1786                /*
1787                 * Get mbuf's, olflags, iova, pktlen, dataoff
1788                 * dataoff_iovaX.D[0] = iova,
1789                 * dataoff_iovaX.D[1](15:0) = mbuf->dataoff
1790                 * len_olflagsX.D[0] = ol_flags,
1791                 * len_olflagsX.D[1](63:32) = mbuf->pkt_len
1792                 */
1793                dataoff_iova0 = vld1q_u64(mbuf0);
1794                len_olflags0 = vld1q_u64(mbuf0 + 2);
1795                dataoff_iova1 = vld1q_u64(mbuf1);
1796                len_olflags1 = vld1q_u64(mbuf1 + 2);
1797                dataoff_iova2 = vld1q_u64(mbuf2);
1798                len_olflags2 = vld1q_u64(mbuf2 + 2);
1799                dataoff_iova3 = vld1q_u64(mbuf3);
1800                len_olflags3 = vld1q_u64(mbuf3 + 2);
1801
1802                /* Move mbufs to point pool */
1803                mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1804                                     offsetof(struct rte_mbuf, pool) -
1805                                     offsetof(struct rte_mbuf, buf_iova));
1806                mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1807                                     offsetof(struct rte_mbuf, pool) -
1808                                     offsetof(struct rte_mbuf, buf_iova));
1809                mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1810                                     offsetof(struct rte_mbuf, pool) -
1811                                     offsetof(struct rte_mbuf, buf_iova));
1812                mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1813                                     offsetof(struct rte_mbuf, pool) -
1814                                     offsetof(struct rte_mbuf, buf_iova));
1815
1816                if (flags & (NIX_TX_OFFLOAD_OL3_OL4_CSUM_F |
1817                             NIX_TX_OFFLOAD_L3_L4_CSUM_F)) {
1818                        /* Get tx_offload for ol2, ol3, l2, l3 lengths */
1819                        /*
1820                         * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1821                         * E(8):OL2_LEN(7):OL3_LEN(9):E(24):L3_LEN(9):L2_LEN(7)
1822                         */
1823
1824                        asm volatile("LD1 {%[a].D}[0],[%[in]]\n\t"
1825                                     : [a] "+w"(senddesc01_w1)
1826                                     : [in] "r"(mbuf0 + 2)
1827                                     : "memory");
1828
1829                        asm volatile("LD1 {%[a].D}[1],[%[in]]\n\t"
1830                                     : [a] "+w"(senddesc01_w1)
1831                                     : [in] "r"(mbuf1 + 2)
1832                                     : "memory");
1833
1834                        asm volatile("LD1 {%[b].D}[0],[%[in]]\n\t"
1835                                     : [b] "+w"(senddesc23_w1)
1836                                     : [in] "r"(mbuf2 + 2)
1837                                     : "memory");
1838
1839                        asm volatile("LD1 {%[b].D}[1],[%[in]]\n\t"
1840                                     : [b] "+w"(senddesc23_w1)
1841                                     : [in] "r"(mbuf3 + 2)
1842                                     : "memory");
1843
1844                        /* Get pool pointer alone */
1845                        mbuf0 = (uint64_t *)*mbuf0;
1846                        mbuf1 = (uint64_t *)*mbuf1;
1847                        mbuf2 = (uint64_t *)*mbuf2;
1848                        mbuf3 = (uint64_t *)*mbuf3;
1849                } else {
1850                        /* Get pool pointer alone */
1851                        mbuf0 = (uint64_t *)*mbuf0;
1852                        mbuf1 = (uint64_t *)*mbuf1;
1853                        mbuf2 = (uint64_t *)*mbuf2;
1854                        mbuf3 = (uint64_t *)*mbuf3;
1855                }
1856
1857                const uint8x16_t shuf_mask2 = {
1858                        0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1859                        0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1860                };
1861                xtmp128 = vzip2q_u64(len_olflags0, len_olflags1);
1862                ytmp128 = vzip2q_u64(len_olflags2, len_olflags3);
1863
1864                /* Clear dataoff_iovaX.D[1] bits other than dataoff(15:0) */
1865                const uint64x2_t and_mask0 = {
1866                        0xFFFFFFFFFFFFFFFF,
1867                        0x000000000000FFFF,
1868                };
1869
1870                dataoff_iova0 = vandq_u64(dataoff_iova0, and_mask0);
1871                dataoff_iova1 = vandq_u64(dataoff_iova1, and_mask0);
1872                dataoff_iova2 = vandq_u64(dataoff_iova2, and_mask0);
1873                dataoff_iova3 = vandq_u64(dataoff_iova3, and_mask0);
1874
1875                /*
1876                 * Pick only 16 bits of pktlen preset at bits 63:32
1877                 * and place them at bits 15:0.
1878                 */
1879                xtmp128 = vqtbl1q_u8(xtmp128, shuf_mask2);
1880                ytmp128 = vqtbl1q_u8(ytmp128, shuf_mask2);
1881
1882                /* Add pairwise to get dataoff + iova in sgdesc_w1 */
1883                sgdesc01_w1 = vpaddq_u64(dataoff_iova0, dataoff_iova1);
1884                sgdesc23_w1 = vpaddq_u64(dataoff_iova2, dataoff_iova3);
1885
1886                /* Orr both sgdesc_w0 and senddesc_w0 with 16 bits of
1887                 * pktlen at 15:0 position.
1888                 */
1889                sgdesc01_w0 = vorrq_u64(sgdesc01_w0, xtmp128);
1890                sgdesc23_w0 = vorrq_u64(sgdesc23_w0, ytmp128);
1891                senddesc01_w0 = vorrq_u64(senddesc01_w0, xtmp128);
1892                senddesc23_w0 = vorrq_u64(senddesc23_w0, ytmp128);
1893
1894                /* Move mbuf to point to pool_id. */
1895                mbuf0 = (uint64_t *)((uintptr_t)mbuf0 +
1896                                     offsetof(struct rte_mempool, pool_id));
1897                mbuf1 = (uint64_t *)((uintptr_t)mbuf1 +
1898                                     offsetof(struct rte_mempool, pool_id));
1899                mbuf2 = (uint64_t *)((uintptr_t)mbuf2 +
1900                                     offsetof(struct rte_mempool, pool_id));
1901                mbuf3 = (uint64_t *)((uintptr_t)mbuf3 +
1902                                     offsetof(struct rte_mempool, pool_id));
1903
1904                if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1905                    !(flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1906                        /*
1907                         * Lookup table to translate ol_flags to
1908                         * il3/il4 types. But we still use ol3/ol4 types in
1909                         * senddesc_w1 as only one header processing is enabled.
1910                         */
1911                        const uint8x16_t tbl = {
1912                                /* [0-15] = il4type:il3type */
1913                                0x04, /* none (IPv6 assumed) */
1914                                0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6 assumed) */
1915                                0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6 assumed) */
1916                                0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6 assumed) */
1917                                0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
1918                                0x13, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_TCP_CKSUM */
1919                                0x23, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_SCTP_CKSUM */
1920                                0x33, /* RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_UDP_CKSUM */
1921                                0x02, /* RTE_MBUF_F_TX_IPV4  */
1922                                0x12, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_TCP_CKSUM */
1923                                0x22, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_SCTP_CKSUM */
1924                                0x32, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_UDP_CKSUM */
1925                                0x03, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM */
1926                                0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1927                                       * RTE_MBUF_F_TX_TCP_CKSUM
1928                                       */
1929                                0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1930                                       * RTE_MBUF_F_TX_SCTP_CKSUM
1931                                       */
1932                                0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
1933                                       * RTE_MBUF_F_TX_UDP_CKSUM
1934                                       */
1935                        };
1936
1937                        /* Extract olflags to translate to iltypes */
1938                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
1939                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
1940
1941                        /*
1942                         * E(47):L3_LEN(9):L2_LEN(7+z)
1943                         * E(47):L3_LEN(9):L2_LEN(7+z)
1944                         */
1945                        senddesc01_w1 = vshlq_n_u64(senddesc01_w1, 1);
1946                        senddesc23_w1 = vshlq_n_u64(senddesc23_w1, 1);
1947
1948                        /* Move OLFLAGS bits 55:52 to 51:48
1949                         * with zeros preprended on the byte and rest
1950                         * don't care
1951                         */
1952                        xtmp128 = vshrq_n_u8(xtmp128, 4);
1953                        ytmp128 = vshrq_n_u8(ytmp128, 4);
1954                        /*
1955                         * E(48):L3_LEN(8):L2_LEN(z+7)
1956                         * E(48):L3_LEN(8):L2_LEN(z+7)
1957                         */
1958                        const int8x16_t tshft3 = {
1959                                -1, 0, 8, 8, 8, 8, 8, 8,
1960                                -1, 0, 8, 8, 8, 8, 8, 8,
1961                        };
1962
1963                        senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
1964                        senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
1965
1966                        /* Do the lookup */
1967                        ltypes01 = vqtbl1q_u8(tbl, xtmp128);
1968                        ltypes23 = vqtbl1q_u8(tbl, ytmp128);
1969
1970                        /* Pick only relevant fields i.e Bit 48:55 of iltype
1971                         * and place it in ol3/ol4type of senddesc_w1
1972                         */
1973                        const uint8x16_t shuf_mask0 = {
1974                                0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0xFF, 0xFF, 0xFF,
1975                                0xFF, 0xFF, 0xFF, 0xFF, 0xE, 0xFF, 0xFF, 0xFF,
1976                        };
1977
1978                        ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
1979                        ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
1980
1981                        /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
1982                         * a [E(32):E(16):OL3(8):OL2(8)]
1983                         * a = a + (a << 8)
1984                         * a [E(32):E(16):(OL3+OL2):OL2]
1985                         * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
1986                         */
1987                        senddesc01_w1 = vaddq_u8(senddesc01_w1,
1988                                                 vshlq_n_u16(senddesc01_w1, 8));
1989                        senddesc23_w1 = vaddq_u8(senddesc23_w1,
1990                                                 vshlq_n_u16(senddesc23_w1, 8));
1991
1992                        /* Move ltypes to senddesc*_w1 */
1993                        senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
1994                        senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
1995                } else if (!(flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
1996                           (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
1997                        /*
1998                         * Lookup table to translate ol_flags to
1999                         * ol3/ol4 types.
2000                         */
2001
2002                        const uint8x16_t tbl = {
2003                                /* [0-15] = ol4type:ol3type */
2004                                0x00, /* none */
2005                                0x03, /* OUTER_IP_CKSUM */
2006                                0x02, /* OUTER_IPV4 */
2007                                0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2008                                0x04, /* OUTER_IPV6 */
2009                                0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2010                                0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2011                                0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2012                                       * OUTER_IP_CKSUM
2013                                       */
2014                                0x00, /* OUTER_UDP_CKSUM */
2015                                0x33, /* OUTER_UDP_CKSUM | OUTER_IP_CKSUM */
2016                                0x32, /* OUTER_UDP_CKSUM | OUTER_IPV4 */
2017                                0x33, /* OUTER_UDP_CKSUM | OUTER_IPV4 |
2018                                       * OUTER_IP_CKSUM
2019                                       */
2020                                0x34, /* OUTER_UDP_CKSUM | OUTER_IPV6 */
2021                                0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2022                                       * OUTER_IP_CKSUM
2023                                       */
2024                                0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2025                                       * OUTER_IPV4
2026                                       */
2027                                0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2028                                       * OUTER_IPV4 | OUTER_IP_CKSUM
2029                                       */
2030                        };
2031
2032                        /* Extract olflags to translate to iltypes */
2033                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2034                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2035
2036                        /*
2037                         * E(47):OL3_LEN(9):OL2_LEN(7+z)
2038                         * E(47):OL3_LEN(9):OL2_LEN(7+z)
2039                         */
2040                        const uint8x16_t shuf_mask5 = {
2041                                0x6, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2042                                0xE, 0xD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
2043                        };
2044                        senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2045                        senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2046
2047                        /* Extract outer ol flags only */
2048                        const uint64x2_t o_cksum_mask = {
2049                                0x1C00020000000000,
2050                                0x1C00020000000000,
2051                        };
2052
2053                        xtmp128 = vandq_u64(xtmp128, o_cksum_mask);
2054                        ytmp128 = vandq_u64(ytmp128, o_cksum_mask);
2055
2056                        /* Extract OUTER_UDP_CKSUM bit 41 and
2057                         * move it to bit 61
2058                         */
2059
2060                        xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2061                        ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2062
2063                        /* Shift oltype by 2 to start nibble from BIT(56)
2064                         * instead of BIT(58)
2065                         */
2066                        xtmp128 = vshrq_n_u8(xtmp128, 2);
2067                        ytmp128 = vshrq_n_u8(ytmp128, 2);
2068                        /*
2069                         * E(48):L3_LEN(8):L2_LEN(z+7)
2070                         * E(48):L3_LEN(8):L2_LEN(z+7)
2071                         */
2072                        const int8x16_t tshft3 = {
2073                                -1, 0, 8, 8, 8, 8, 8, 8,
2074                                -1, 0, 8, 8, 8, 8, 8, 8,
2075                        };
2076
2077                        senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2078                        senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2079
2080                        /* Do the lookup */
2081                        ltypes01 = vqtbl1q_u8(tbl, xtmp128);
2082                        ltypes23 = vqtbl1q_u8(tbl, ytmp128);
2083
2084                        /* Pick only relevant fields i.e Bit 56:63 of oltype
2085                         * and place it in ol3/ol4type of senddesc_w1
2086                         */
2087                        const uint8x16_t shuf_mask0 = {
2088                                0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xFF, 0xFF, 0xFF,
2089                                0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xFF, 0xFF, 0xFF,
2090                        };
2091
2092                        ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2093                        ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2094
2095                        /* Prepare ol4ptr, ol3ptr from ol3len, ol2len.
2096                         * a [E(32):E(16):OL3(8):OL2(8)]
2097                         * a = a + (a << 8)
2098                         * a [E(32):E(16):(OL3+OL2):OL2]
2099                         * => E(32):E(16)::OL4PTR(8):OL3PTR(8)
2100                         */
2101                        senddesc01_w1 = vaddq_u8(senddesc01_w1,
2102                                                 vshlq_n_u16(senddesc01_w1, 8));
2103                        senddesc23_w1 = vaddq_u8(senddesc23_w1,
2104                                                 vshlq_n_u16(senddesc23_w1, 8));
2105
2106                        /* Move ltypes to senddesc*_w1 */
2107                        senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2108                        senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2109                } else if ((flags & NIX_TX_OFFLOAD_L3_L4_CSUM_F) &&
2110                           (flags & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)) {
2111                        /* Lookup table to translate ol_flags to
2112                         * ol4type, ol3type, il4type, il3type of senddesc_w1
2113                         */
2114                        const uint8x16x2_t tbl = {{
2115                                {
2116                                        /* [0-15] = il4type:il3type */
2117                                        0x04, /* none (IPv6) */
2118                                        0x14, /* RTE_MBUF_F_TX_TCP_CKSUM (IPv6) */
2119                                        0x24, /* RTE_MBUF_F_TX_SCTP_CKSUM (IPv6) */
2120                                        0x34, /* RTE_MBUF_F_TX_UDP_CKSUM (IPv6) */
2121                                        0x03, /* RTE_MBUF_F_TX_IP_CKSUM */
2122                                        0x13, /* RTE_MBUF_F_TX_IP_CKSUM |
2123                                               * RTE_MBUF_F_TX_TCP_CKSUM
2124                                               */
2125                                        0x23, /* RTE_MBUF_F_TX_IP_CKSUM |
2126                                               * RTE_MBUF_F_TX_SCTP_CKSUM
2127                                               */
2128                                        0x33, /* RTE_MBUF_F_TX_IP_CKSUM |
2129                                               * RTE_MBUF_F_TX_UDP_CKSUM
2130                                               */
2131                                        0x02, /* RTE_MBUF_F_TX_IPV4 */
2132                                        0x12, /* RTE_MBUF_F_TX_IPV4 |
2133                                               * RTE_MBUF_F_TX_TCP_CKSUM
2134                                               */
2135                                        0x22, /* RTE_MBUF_F_TX_IPV4 |
2136                                               * RTE_MBUF_F_TX_SCTP_CKSUM
2137                                               */
2138                                        0x32, /* RTE_MBUF_F_TX_IPV4 |
2139                                               * RTE_MBUF_F_TX_UDP_CKSUM
2140                                               */
2141                                        0x03, /* RTE_MBUF_F_TX_IPV4 |
2142                                               * RTE_MBUF_F_TX_IP_CKSUM
2143                                               */
2144                                        0x13, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2145                                               * RTE_MBUF_F_TX_TCP_CKSUM
2146                                               */
2147                                        0x23, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2148                                               * RTE_MBUF_F_TX_SCTP_CKSUM
2149                                               */
2150                                        0x33, /* RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IP_CKSUM |
2151                                               * RTE_MBUF_F_TX_UDP_CKSUM
2152                                               */
2153                                },
2154
2155                                {
2156                                        /* [16-31] = ol4type:ol3type */
2157                                        0x00, /* none */
2158                                        0x03, /* OUTER_IP_CKSUM */
2159                                        0x02, /* OUTER_IPV4 */
2160                                        0x03, /* OUTER_IPV4 | OUTER_IP_CKSUM */
2161                                        0x04, /* OUTER_IPV6 */
2162                                        0x00, /* OUTER_IPV6 | OUTER_IP_CKSUM */
2163                                        0x00, /* OUTER_IPV6 | OUTER_IPV4 */
2164                                        0x00, /* OUTER_IPV6 | OUTER_IPV4 |
2165                                               * OUTER_IP_CKSUM
2166                                               */
2167                                        0x00, /* OUTER_UDP_CKSUM */
2168                                        0x33, /* OUTER_UDP_CKSUM |
2169                                               * OUTER_IP_CKSUM
2170                                               */
2171                                        0x32, /* OUTER_UDP_CKSUM |
2172                                               * OUTER_IPV4
2173                                               */
2174                                        0x33, /* OUTER_UDP_CKSUM |
2175                                               * OUTER_IPV4 | OUTER_IP_CKSUM
2176                                               */
2177                                        0x34, /* OUTER_UDP_CKSUM |
2178                                               * OUTER_IPV6
2179                                               */
2180                                        0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2181                                               * OUTER_IP_CKSUM
2182                                               */
2183                                        0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2184                                               * OUTER_IPV4
2185                                               */
2186                                        0x00, /* OUTER_UDP_CKSUM | OUTER_IPV6 |
2187                                               * OUTER_IPV4 | OUTER_IP_CKSUM
2188                                               */
2189                                },
2190                        }};
2191
2192                        /* Extract olflags to translate to oltype & iltype */
2193                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2194                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2195
2196                        /*
2197                         * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2198                         * E(8):OL2_LN(7):OL3_LN(9):E(23):L3_LN(9):L2_LN(7+z)
2199                         */
2200                        const uint32x4_t tshft_4 = {
2201                                1,
2202                                0,
2203                                1,
2204                                0,
2205                        };
2206                        senddesc01_w1 = vshlq_u32(senddesc01_w1, tshft_4);
2207                        senddesc23_w1 = vshlq_u32(senddesc23_w1, tshft_4);
2208
2209                        /*
2210                         * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2211                         * E(32):L3_LEN(8):L2_LEN(7+Z):OL3_LEN(8):OL2_LEN(7+Z)
2212                         */
2213                        const uint8x16_t shuf_mask5 = {
2214                                0x6, 0x5, 0x0, 0x1, 0xFF, 0xFF, 0xFF, 0xFF,
2215                                0xE, 0xD, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
2216                        };
2217                        senddesc01_w1 = vqtbl1q_u8(senddesc01_w1, shuf_mask5);
2218                        senddesc23_w1 = vqtbl1q_u8(senddesc23_w1, shuf_mask5);
2219
2220                        /* Extract outer and inner header ol_flags */
2221                        const uint64x2_t oi_cksum_mask = {
2222                                0x1CF0020000000000,
2223                                0x1CF0020000000000,
2224                        };
2225
2226                        xtmp128 = vandq_u64(xtmp128, oi_cksum_mask);
2227                        ytmp128 = vandq_u64(ytmp128, oi_cksum_mask);
2228
2229                        /* Extract OUTER_UDP_CKSUM bit 41 and
2230                         * move it to bit 61
2231                         */
2232
2233                        xtmp128 = xtmp128 | vshlq_n_u64(xtmp128, 20);
2234                        ytmp128 = ytmp128 | vshlq_n_u64(ytmp128, 20);
2235
2236                        /* Shift right oltype by 2 and iltype by 4
2237                         * to start oltype nibble from BIT(58)
2238                         * instead of BIT(56) and iltype nibble from BIT(48)
2239                         * instead of BIT(52).
2240                         */
2241                        const int8x16_t tshft5 = {
2242                                8, 8, 8, 8, 8, 8, -4, -2,
2243                                8, 8, 8, 8, 8, 8, -4, -2,
2244                        };
2245
2246                        xtmp128 = vshlq_u8(xtmp128, tshft5);
2247                        ytmp128 = vshlq_u8(ytmp128, tshft5);
2248                        /*
2249                         * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2250                         * E(32):L3_LEN(8):L2_LEN(8):OL3_LEN(8):OL2_LEN(8)
2251                         */
2252                        const int8x16_t tshft3 = {
2253                                -1, 0, -1, 0, 0, 0, 0, 0,
2254                                -1, 0, -1, 0, 0, 0, 0, 0,
2255                        };
2256
2257                        senddesc01_w1 = vshlq_u8(senddesc01_w1, tshft3);
2258                        senddesc23_w1 = vshlq_u8(senddesc23_w1, tshft3);
2259
2260                        /* Mark Bit(4) of oltype */
2261                        const uint64x2_t oi_cksum_mask2 = {
2262                                0x1000000000000000,
2263                                0x1000000000000000,
2264                        };
2265
2266                        xtmp128 = vorrq_u64(xtmp128, oi_cksum_mask2);
2267                        ytmp128 = vorrq_u64(ytmp128, oi_cksum_mask2);
2268
2269                        /* Do the lookup */
2270                        ltypes01 = vqtbl2q_u8(tbl, xtmp128);
2271                        ltypes23 = vqtbl2q_u8(tbl, ytmp128);
2272
2273                        /* Pick only relevant fields i.e Bit 48:55 of iltype and
2274                         * Bit 56:63 of oltype and place it in corresponding
2275                         * place in senddesc_w1.
2276                         */
2277                        const uint8x16_t shuf_mask0 = {
2278                                0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0x6, 0xFF, 0xFF,
2279                                0xFF, 0xFF, 0xFF, 0xFF, 0xF, 0xE, 0xFF, 0xFF,
2280                        };
2281
2282                        ltypes01 = vqtbl1q_u8(ltypes01, shuf_mask0);
2283                        ltypes23 = vqtbl1q_u8(ltypes23, shuf_mask0);
2284
2285                        /* Prepare l4ptr, l3ptr, ol4ptr, ol3ptr from
2286                         * l3len, l2len, ol3len, ol2len.
2287                         * a [E(32):L3(8):L2(8):OL3(8):OL2(8)]
2288                         * a = a + (a << 8)
2289                         * a [E:(L3+L2):(L2+OL3):(OL3+OL2):OL2]
2290                         * a = a + (a << 16)
2291                         * a [E:(L3+L2+OL3+OL2):(L2+OL3+OL2):(OL3+OL2):OL2]
2292                         * => E(32):IL4PTR(8):IL3PTR(8):OL4PTR(8):OL3PTR(8)
2293                         */
2294                        senddesc01_w1 = vaddq_u8(senddesc01_w1,
2295                                                 vshlq_n_u32(senddesc01_w1, 8));
2296                        senddesc23_w1 = vaddq_u8(senddesc23_w1,
2297                                                 vshlq_n_u32(senddesc23_w1, 8));
2298
2299                        /* Continue preparing l4ptr, l3ptr, ol4ptr, ol3ptr */
2300                        senddesc01_w1 = vaddq_u8(
2301                                senddesc01_w1, vshlq_n_u32(senddesc01_w1, 16));
2302                        senddesc23_w1 = vaddq_u8(
2303                                senddesc23_w1, vshlq_n_u32(senddesc23_w1, 16));
2304
2305                        /* Move ltypes to senddesc*_w1 */
2306                        senddesc01_w1 = vorrq_u64(senddesc01_w1, ltypes01);
2307                        senddesc23_w1 = vorrq_u64(senddesc23_w1, ltypes23);
2308                }
2309
2310                xmask01 = vdupq_n_u64(0);
2311                xmask23 = xmask01;
2312                asm volatile("LD1 {%[a].H}[0],[%[in]]\n\t"
2313                             : [a] "+w"(xmask01)
2314                             : [in] "r"(mbuf0)
2315                             : "memory");
2316
2317                asm volatile("LD1 {%[a].H}[4],[%[in]]\n\t"
2318                             : [a] "+w"(xmask01)
2319                             : [in] "r"(mbuf1)
2320                             : "memory");
2321
2322                asm volatile("LD1 {%[b].H}[0],[%[in]]\n\t"
2323                             : [b] "+w"(xmask23)
2324                             : [in] "r"(mbuf2)
2325                             : "memory");
2326
2327                asm volatile("LD1 {%[b].H}[4],[%[in]]\n\t"
2328                             : [b] "+w"(xmask23)
2329                             : [in] "r"(mbuf3)
2330                             : "memory");
2331                xmask01 = vshlq_n_u64(xmask01, 20);
2332                xmask23 = vshlq_n_u64(xmask23, 20);
2333
2334                senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2335                senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2336
2337                if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
2338                        /* Tx ol_flag for vlan. */
2339                        const uint64x2_t olv = {RTE_MBUF_F_TX_VLAN, RTE_MBUF_F_TX_VLAN};
2340                        /* Bit enable for VLAN1 */
2341                        const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
2342                        /* Tx ol_flag for QnQ. */
2343                        const uint64x2_t olq = {RTE_MBUF_F_TX_QINQ, RTE_MBUF_F_TX_QINQ};
2344                        /* Bit enable for VLAN0 */
2345                        const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
2346                        /* Load vlan values from packet. outer is VLAN 0 */
2347                        uint64x2_t ext01 = {
2348                                ((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
2349                                        ((uint64_t)tx_pkts[0]->vlan_tci) << 32,
2350                                ((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
2351                                        ((uint64_t)tx_pkts[1]->vlan_tci) << 32,
2352                        };
2353                        uint64x2_t ext23 = {
2354                                ((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
2355                                        ((uint64_t)tx_pkts[2]->vlan_tci) << 32,
2356                                ((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
2357                                        ((uint64_t)tx_pkts[3]->vlan_tci) << 32,
2358                        };
2359
2360                        /* Get ol_flags of the packets. */
2361                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2362                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2363
2364                        /* ORR vlan outer/inner values into cmd. */
2365                        sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
2366                        sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
2367
2368                        /* Test for offload enable bits and generate masks. */
2369                        xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
2370                                                      mlv),
2371                                            vandq_u64(vtstq_u64(xtmp128, olq),
2372                                                      mlq));
2373                        ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
2374                                                      mlv),
2375                                            vandq_u64(vtstq_u64(ytmp128, olq),
2376                                                      mlq));
2377
2378                        /* Set vlan enable bits into cmd based on mask. */
2379                        sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
2380                        sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
2381                }
2382
2383                if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2384                        /* Tx ol_flag for timestamp. */
2385                        const uint64x2_t olf = {RTE_MBUF_F_TX_IEEE1588_TMST,
2386                                                RTE_MBUF_F_TX_IEEE1588_TMST};
2387                        /* Set send mem alg to SUB. */
2388                        const uint64x2_t alg = {BIT_ULL(59), BIT_ULL(59)};
2389                        /* Increment send mem address by 8. */
2390                        const uint64x2_t addr = {0x8, 0x8};
2391
2392                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2393                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2394
2395                        /* Check if timestamp is requested and generate inverted
2396                         * mask as we need not make any changes to default cmd
2397                         * value.
2398                         */
2399                        xtmp128 = vmvnq_u32(vtstq_u64(olf, xtmp128));
2400                        ytmp128 = vmvnq_u32(vtstq_u64(olf, ytmp128));
2401
2402                        /* Change send mem address to an 8 byte offset when
2403                         * TSTMP is disabled.
2404                         */
2405                        sendmem01_w1 = vaddq_u64(sendmem01_w1,
2406                                                 vandq_u64(xtmp128, addr));
2407                        sendmem23_w1 = vaddq_u64(sendmem23_w1,
2408                                                 vandq_u64(ytmp128, addr));
2409                        /* Change send mem alg to SUB when TSTMP is disabled. */
2410                        sendmem01_w0 = vorrq_u64(sendmem01_w0,
2411                                                 vandq_u64(xtmp128, alg));
2412                        sendmem23_w0 = vorrq_u64(sendmem23_w0,
2413                                                 vandq_u64(ytmp128, alg));
2414
2415                        cmd3[0] = vzip1q_u64(sendmem01_w0, sendmem01_w1);
2416                        cmd3[1] = vzip2q_u64(sendmem01_w0, sendmem01_w1);
2417                        cmd3[2] = vzip1q_u64(sendmem23_w0, sendmem23_w1);
2418                        cmd3[3] = vzip2q_u64(sendmem23_w0, sendmem23_w1);
2419                }
2420
2421                if (flags & NIX_TX_OFFLOAD_TSO_F) {
2422                        const uint64_t lso_fmt = txq->lso_tun_fmt;
2423                        uint64_t sx_w0[NIX_DESCS_PER_LOOP];
2424                        uint64_t sd_w1[NIX_DESCS_PER_LOOP];
2425
2426                        /* Extract SD W1 as we need to set L4 types. */
2427                        vst1q_u64(sd_w1, senddesc01_w1);
2428                        vst1q_u64(sd_w1 + 2, senddesc23_w1);
2429
2430                        /* Extract SX W0 as we need to set LSO fields. */
2431                        vst1q_u64(sx_w0, sendext01_w0);
2432                        vst1q_u64(sx_w0 + 2, sendext23_w0);
2433
2434                        /* Extract ol_flags. */
2435                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2436                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2437
2438                        /* Prepare individual mbufs. */
2439                        cn10k_nix_prepare_tso(tx_pkts[0],
2440                                (union nix_send_hdr_w1_u *)&sd_w1[0],
2441                                (union nix_send_ext_w0_u *)&sx_w0[0],
2442                                vgetq_lane_u64(xtmp128, 0), flags, lso_fmt);
2443
2444                        cn10k_nix_prepare_tso(tx_pkts[1],
2445                                (union nix_send_hdr_w1_u *)&sd_w1[1],
2446                                (union nix_send_ext_w0_u *)&sx_w0[1],
2447                                vgetq_lane_u64(xtmp128, 1), flags, lso_fmt);
2448
2449                        cn10k_nix_prepare_tso(tx_pkts[2],
2450                                (union nix_send_hdr_w1_u *)&sd_w1[2],
2451                                (union nix_send_ext_w0_u *)&sx_w0[2],
2452                                vgetq_lane_u64(ytmp128, 0), flags, lso_fmt);
2453
2454                        cn10k_nix_prepare_tso(tx_pkts[3],
2455                                (union nix_send_hdr_w1_u *)&sd_w1[3],
2456                                (union nix_send_ext_w0_u *)&sx_w0[3],
2457                                vgetq_lane_u64(ytmp128, 1), flags, lso_fmt);
2458
2459                        senddesc01_w1 = vld1q_u64(sd_w1);
2460                        senddesc23_w1 = vld1q_u64(sd_w1 + 2);
2461
2462                        sendext01_w0 = vld1q_u64(sx_w0);
2463                        sendext23_w0 = vld1q_u64(sx_w0 + 2);
2464                }
2465
2466                if ((flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) &&
2467                    !(flags & NIX_TX_MULTI_SEG_F) &&
2468                    !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2469                        /* Set don't free bit if reference count > 1 */
2470                        xmask01 = vdupq_n_u64(0);
2471                        xmask23 = xmask01;
2472
2473                        /* Move mbufs to iova */
2474                        mbuf0 = (uint64_t *)tx_pkts[0];
2475                        mbuf1 = (uint64_t *)tx_pkts[1];
2476                        mbuf2 = (uint64_t *)tx_pkts[2];
2477                        mbuf3 = (uint64_t *)tx_pkts[3];
2478
2479                        if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf0))
2480                                vsetq_lane_u64(0x80000, xmask01, 0);
2481                        else
2482                                RTE_MEMPOOL_CHECK_COOKIES(
2483                                        ((struct rte_mbuf *)mbuf0)->pool,
2484                                        (void **)&mbuf0, 1, 0);
2485
2486                        if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf1))
2487                                vsetq_lane_u64(0x80000, xmask01, 1);
2488                        else
2489                                RTE_MEMPOOL_CHECK_COOKIES(
2490                                        ((struct rte_mbuf *)mbuf1)->pool,
2491                                        (void **)&mbuf1, 1, 0);
2492
2493                        if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf2))
2494                                vsetq_lane_u64(0x80000, xmask23, 0);
2495                        else
2496                                RTE_MEMPOOL_CHECK_COOKIES(
2497                                        ((struct rte_mbuf *)mbuf2)->pool,
2498                                        (void **)&mbuf2, 1, 0);
2499
2500                        if (cnxk_nix_prefree_seg((struct rte_mbuf *)mbuf3))
2501                                vsetq_lane_u64(0x80000, xmask23, 1);
2502                        else
2503                                RTE_MEMPOOL_CHECK_COOKIES(
2504                                        ((struct rte_mbuf *)mbuf3)->pool,
2505                                        (void **)&mbuf3, 1, 0);
2506                        senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
2507                        senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
2508                } else if (!(flags & NIX_TX_MULTI_SEG_F) &&
2509                           !(flags & NIX_TX_OFFLOAD_SECURITY_F)) {
2510                        /* Move mbufs to iova */
2511                        mbuf0 = (uint64_t *)tx_pkts[0];
2512                        mbuf1 = (uint64_t *)tx_pkts[1];
2513                        mbuf2 = (uint64_t *)tx_pkts[2];
2514                        mbuf3 = (uint64_t *)tx_pkts[3];
2515
2516                        /* Mark mempool object as "put" since
2517                         * it is freed by NIX
2518                         */
2519                        RTE_MEMPOOL_CHECK_COOKIES(
2520                                ((struct rte_mbuf *)mbuf0)->pool,
2521                                (void **)&mbuf0, 1, 0);
2522
2523                        RTE_MEMPOOL_CHECK_COOKIES(
2524                                ((struct rte_mbuf *)mbuf1)->pool,
2525                                (void **)&mbuf1, 1, 0);
2526
2527                        RTE_MEMPOOL_CHECK_COOKIES(
2528                                ((struct rte_mbuf *)mbuf2)->pool,
2529                                (void **)&mbuf2, 1, 0);
2530
2531                        RTE_MEMPOOL_CHECK_COOKIES(
2532                                ((struct rte_mbuf *)mbuf3)->pool,
2533                                (void **)&mbuf3, 1, 0);
2534                }
2535
2536                /* Create 4W cmd for 4 mbufs (sendhdr, sgdesc) */
2537                cmd0[0] = vzip1q_u64(senddesc01_w0, senddesc01_w1);
2538                cmd0[1] = vzip2q_u64(senddesc01_w0, senddesc01_w1);
2539                cmd0[2] = vzip1q_u64(senddesc23_w0, senddesc23_w1);
2540                cmd0[3] = vzip2q_u64(senddesc23_w0, senddesc23_w1);
2541
2542                cmd1[0] = vzip1q_u64(sgdesc01_w0, sgdesc01_w1);
2543                cmd1[1] = vzip2q_u64(sgdesc01_w0, sgdesc01_w1);
2544                cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
2545                cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
2546
2547                if (flags & NIX_TX_NEED_EXT_HDR) {
2548                        cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
2549                        cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
2550                        cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
2551                        cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
2552                }
2553
2554                if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2555                        const uint64x2_t olf = {RTE_MBUF_F_TX_SEC_OFFLOAD,
2556                                                RTE_MBUF_F_TX_SEC_OFFLOAD};
2557                        uintptr_t next;
2558                        uint8_t dw;
2559
2560                        /* Extract ol_flags. */
2561                        xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
2562                        ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
2563
2564                        xtmp128 = vtstq_u64(olf, xtmp128);
2565                        ytmp128 = vtstq_u64(olf, ytmp128);
2566
2567                        /* Process mbuf0 */
2568                        dw = cn10k_nix_tx_dwords(flags, segdw[0]);
2569                        if (vgetq_lane_u64(xtmp128, 0))
2570                                cn10k_nix_prep_sec_vec(tx_pkts[0], &cmd0[0],
2571                                                       &cmd1[0], &next, c_laddr,
2572                                                       &c_lnum, &c_loff,
2573                                                       &c_shft, sa_base, flags);
2574                        else
2575                                cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2576                                                   &shift, &wd.data128, &next);
2577
2578                        /* Store mbuf0 to LMTLINE/CPT NIXTX area */
2579                        cn10k_nix_xmit_store(tx_pkts[0], segdw[0], next,
2580                                             cmd0[0], cmd1[0], cmd2[0], cmd3[0],
2581                                             flags);
2582
2583                        /* Process mbuf1 */
2584                        dw = cn10k_nix_tx_dwords(flags, segdw[1]);
2585                        if (vgetq_lane_u64(xtmp128, 1))
2586                                cn10k_nix_prep_sec_vec(tx_pkts[1], &cmd0[1],
2587                                                       &cmd1[1], &next, c_laddr,
2588                                                       &c_lnum, &c_loff,
2589                                                       &c_shft, sa_base, flags);
2590                        else
2591                                cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2592                                                   &shift, &wd.data128, &next);
2593
2594                        /* Store mbuf1 to LMTLINE/CPT NIXTX area */
2595                        cn10k_nix_xmit_store(tx_pkts[1], segdw[1], next,
2596                                             cmd0[1], cmd1[1], cmd2[1], cmd3[1],
2597                                             flags);
2598
2599                        /* Process mbuf2 */
2600                        dw = cn10k_nix_tx_dwords(flags, segdw[2]);
2601                        if (vgetq_lane_u64(ytmp128, 0))
2602                                cn10k_nix_prep_sec_vec(tx_pkts[2], &cmd0[2],
2603                                                       &cmd1[2], &next, c_laddr,
2604                                                       &c_lnum, &c_loff,
2605                                                       &c_shft, sa_base, flags);
2606                        else
2607                                cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2608                                                   &shift, &wd.data128, &next);
2609
2610                        /* Store mbuf2 to LMTLINE/CPT NIXTX area */
2611                        cn10k_nix_xmit_store(tx_pkts[2], segdw[2], next,
2612                                             cmd0[2], cmd1[2], cmd2[2], cmd3[2],
2613                                             flags);
2614
2615                        /* Process mbuf3 */
2616                        dw = cn10k_nix_tx_dwords(flags, segdw[3]);
2617                        if (vgetq_lane_u64(ytmp128, 1))
2618                                cn10k_nix_prep_sec_vec(tx_pkts[3], &cmd0[3],
2619                                                       &cmd1[3], &next, c_laddr,
2620                                                       &c_lnum, &c_loff,
2621                                                       &c_shft, sa_base, flags);
2622                        else
2623                                cn10k_nix_lmt_next(dw, laddr, &lnum, &loff,
2624                                                   &shift, &wd.data128, &next);
2625
2626                        /* Store mbuf3 to LMTLINE/CPT NIXTX area */
2627                        cn10k_nix_xmit_store(tx_pkts[3], segdw[3], next,
2628                                             cmd0[3], cmd1[3], cmd2[3], cmd3[3],
2629                                             flags);
2630
2631                } else if (flags & NIX_TX_MULTI_SEG_F) {
2632                        uint8_t j;
2633
2634                        segdw[4] = 8;
2635                        j = cn10k_nix_prep_lmt_mseg_vector(tx_pkts, cmd0, cmd1,
2636                                                          cmd2, cmd3, segdw,
2637                                                          (uint64_t *)
2638                                                          LMT_OFF(laddr, lnum,
2639                                                                  0),
2640                                                          &wd.data128, &shift,
2641                                                          flags);
2642                        lnum += j;
2643                } else if (flags & NIX_TX_NEED_EXT_HDR) {
2644                        /* Store the prepared send desc to LMT lines */
2645                        if (flags & NIX_TX_OFFLOAD_TSTAMP_F) {
2646                                vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2647                                vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2648                                vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2649                                vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[0]);
2650                                vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[1]);
2651                                vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[1]);
2652                                vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[1]);
2653                                vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[1]);
2654                                lnum += 1;
2655                                vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2656                                vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2657                                vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2658                                vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd3[2]);
2659                                vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[3]);
2660                                vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd2[3]);
2661                                vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd1[3]);
2662                                vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd3[3]);
2663                        } else {
2664                                vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2665                                vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
2666                                vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
2667                                vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
2668                                vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
2669                                vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
2670                                lnum += 1;
2671                                vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
2672                                vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
2673                                vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
2674                                vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
2675                                vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
2676                                vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
2677                        }
2678                        lnum += 1;
2679                } else {
2680                        /* Store the prepared send desc to LMT lines */
2681                        vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
2682                        vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
2683                        vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
2684                        vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
2685                        vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
2686                        vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
2687                        vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
2688                        vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
2689                        lnum += 1;
2690                }
2691
2692                if (flags & NIX_TX_MULTI_SEG_F) {
2693                        tx_pkts[0]->next = NULL;
2694                        tx_pkts[1]->next = NULL;
2695                        tx_pkts[2]->next = NULL;
2696                        tx_pkts[3]->next = NULL;
2697                }
2698
2699                tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
2700        }
2701
2702        /* Roundup lnum to last line if it is partial */
2703        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2704                lnum = lnum + !!loff;
2705                wd.data128 = wd.data128 |
2706                        (((__uint128_t)(((loff >> 4) - 1) & 0x7) << shift));
2707        }
2708
2709        if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2710                wd.data[0] >>= 16;
2711
2712        if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
2713                ws[1] = roc_sso_hws_head_wait(ws[0]);
2714
2715        left -= burst;
2716
2717        /* Submit CPT instructions if any */
2718        if (flags & NIX_TX_OFFLOAD_SECURITY_F) {
2719                cn10k_nix_sec_fc_wait(txq, (c_lnum << 1) + c_loff);
2720                cn10k_nix_sec_steorl(c_io_addr, c_lmt_id, c_lnum, c_loff,
2721                                     c_shft);
2722        }
2723
2724        /* Trigger LMTST */
2725        if (lnum > 16) {
2726                if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2727                        wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2728
2729                pa = io_addr | (wd.data[0] & 0x7) << 4;
2730                wd.data[0] &= ~0x7ULL;
2731
2732                if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2733                        wd.data[0] <<= 16;
2734
2735                wd.data[0] |= (15ULL << 12);
2736                wd.data[0] |= (uint64_t)lmt_id;
2737
2738                /* STEOR0 */
2739                roc_lmt_submit_steorl(wd.data[0], pa);
2740
2741                if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2742                        wd.data[1] = cn10k_nix_tx_steor_vec_data(flags);
2743
2744                pa = io_addr | (wd.data[1] & 0x7) << 4;
2745                wd.data[1] &= ~0x7ULL;
2746
2747                if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2748                        wd.data[1] <<= 16;
2749
2750                wd.data[1] |= ((uint64_t)(lnum - 17)) << 12;
2751                wd.data[1] |= (uint64_t)(lmt_id + 16);
2752
2753                /* STEOR1 */
2754                roc_lmt_submit_steorl(wd.data[1], pa);
2755        } else if (lnum) {
2756                if (!(flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F)))
2757                        wd.data[0] = cn10k_nix_tx_steor_vec_data(flags);
2758
2759                pa = io_addr | (wd.data[0] & 0x7) << 4;
2760                wd.data[0] &= ~0x7ULL;
2761
2762                if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
2763                        wd.data[0] <<= 16;
2764
2765                wd.data[0] |= ((uint64_t)(lnum - 1)) << 12;
2766                wd.data[0] |= lmt_id;
2767
2768                /* STEOR0 */
2769                roc_lmt_submit_steorl(wd.data[0], pa);
2770        }
2771
2772        rte_io_wmb();
2773        if (left)
2774                goto again;
2775
2776        if (unlikely(scalar)) {
2777                if (flags & NIX_TX_MULTI_SEG_F)
2778                        pkts += cn10k_nix_xmit_pkts_mseg(tx_queue, ws, tx_pkts,
2779                                                         scalar, cmd, flags);
2780                else
2781                        pkts += cn10k_nix_xmit_pkts(tx_queue, ws, tx_pkts,
2782                                                    scalar, cmd, flags);
2783        }
2784
2785        return pkts;
2786}
2787
2788#else
2789static __rte_always_inline uint16_t
2790cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
2791                           struct rte_mbuf **tx_pkts, uint16_t pkts,
2792                           uint64_t *cmd, const uint16_t flags)
2793{
2794        RTE_SET_USED(ws);
2795        RTE_SET_USED(tx_queue);
2796        RTE_SET_USED(tx_pkts);
2797        RTE_SET_USED(pkts);
2798        RTE_SET_USED(cmd);
2799        RTE_SET_USED(flags);
2800        return 0;
2801}
2802#endif
2803
2804#define L3L4CSUM_F   NIX_TX_OFFLOAD_L3_L4_CSUM_F
2805#define OL3OL4CSUM_F NIX_TX_OFFLOAD_OL3_OL4_CSUM_F
2806#define VLAN_F       NIX_TX_OFFLOAD_VLAN_QINQ_F
2807#define NOFF_F       NIX_TX_OFFLOAD_MBUF_NOFF_F
2808#define TSO_F        NIX_TX_OFFLOAD_TSO_F
2809#define TSP_F        NIX_TX_OFFLOAD_TSTAMP_F
2810#define T_SEC_F      NIX_TX_OFFLOAD_SECURITY_F
2811
2812/* [T_SEC_F] [TSP] [TSO] [NOFF] [VLAN] [OL3OL4CSUM] [L3L4CSUM] */
2813#define NIX_TX_FASTPATH_MODES_0_15                                             \
2814        T(no_offload, 6, NIX_TX_OFFLOAD_NONE)                                  \
2815        T(l3l4csum, 6, L3L4CSUM_F)                                             \
2816        T(ol3ol4csum, 6, OL3OL4CSUM_F)                                         \
2817        T(ol3ol4csum_l3l4csum, 6, OL3OL4CSUM_F | L3L4CSUM_F)                   \
2818        T(vlan, 6, VLAN_F)                                                     \
2819        T(vlan_l3l4csum, 6, VLAN_F | L3L4CSUM_F)                               \
2820        T(vlan_ol3ol4csum, 6, VLAN_F | OL3OL4CSUM_F)                           \
2821        T(vlan_ol3ol4csum_l3l4csum, 6, VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2822        T(noff, 6, NOFF_F)                                                     \
2823        T(noff_l3l4csum, 6, NOFF_F | L3L4CSUM_F)                               \
2824        T(noff_ol3ol4csum, 6, NOFF_F | OL3OL4CSUM_F)                           \
2825        T(noff_ol3ol4csum_l3l4csum, 6, NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2826        T(noff_vlan, 6, NOFF_F | VLAN_F)                                       \
2827        T(noff_vlan_l3l4csum, 6, NOFF_F | VLAN_F | L3L4CSUM_F)                 \
2828        T(noff_vlan_ol3ol4csum, 6, NOFF_F | VLAN_F | OL3OL4CSUM_F)             \
2829        T(noff_vlan_ol3ol4csum_l3l4csum, 6,                                    \
2830          NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2831
2832#define NIX_TX_FASTPATH_MODES_16_31                                            \
2833        T(tso, 6, TSO_F)                                                       \
2834        T(tso_l3l4csum, 6, TSO_F | L3L4CSUM_F)                                 \
2835        T(tso_ol3ol4csum, 6, TSO_F | OL3OL4CSUM_F)                             \
2836        T(tso_ol3ol4csum_l3l4csum, 6, TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)       \
2837        T(tso_vlan, 6, TSO_F | VLAN_F)                                         \
2838        T(tso_vlan_l3l4csum, 6, TSO_F | VLAN_F | L3L4CSUM_F)                   \
2839        T(tso_vlan_ol3ol4csum, 6, TSO_F | VLAN_F | OL3OL4CSUM_F)               \
2840        T(tso_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2841          TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2842        T(tso_noff, 6, TSO_F | NOFF_F)                                         \
2843        T(tso_noff_l3l4csum, 6, TSO_F | NOFF_F | L3L4CSUM_F)                   \
2844        T(tso_noff_ol3ol4csum, 6, TSO_F | NOFF_F | OL3OL4CSUM_F)               \
2845        T(tso_noff_ol3ol4csum_l3l4csum, 6,                                     \
2846          TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2847        T(tso_noff_vlan, 6, TSO_F | NOFF_F | VLAN_F)                           \
2848        T(tso_noff_vlan_l3l4csum, 6, TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)     \
2849        T(tso_noff_vlan_ol3ol4csum, 6, TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F) \
2850        T(tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2851          TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2852
2853#define NIX_TX_FASTPATH_MODES_32_47                                            \
2854        T(ts, 8, TSP_F)                                                        \
2855        T(ts_l3l4csum, 8, TSP_F | L3L4CSUM_F)                                  \
2856        T(ts_ol3ol4csum, 8, TSP_F | OL3OL4CSUM_F)                              \
2857        T(ts_ol3ol4csum_l3l4csum, 8, TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2858        T(ts_vlan, 8, TSP_F | VLAN_F)                                          \
2859        T(ts_vlan_l3l4csum, 8, TSP_F | VLAN_F | L3L4CSUM_F)                    \
2860        T(ts_vlan_ol3ol4csum, 8, TSP_F | VLAN_F | OL3OL4CSUM_F)                \
2861        T(ts_vlan_ol3ol4csum_l3l4csum, 8,                                      \
2862          TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2863        T(ts_noff, 8, TSP_F | NOFF_F)                                          \
2864        T(ts_noff_l3l4csum, 8, TSP_F | NOFF_F | L3L4CSUM_F)                    \
2865        T(ts_noff_ol3ol4csum, 8, TSP_F | NOFF_F | OL3OL4CSUM_F)                \
2866        T(ts_noff_ol3ol4csum_l3l4csum, 8,                                      \
2867          TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                          \
2868        T(ts_noff_vlan, 8, TSP_F | NOFF_F | VLAN_F)                            \
2869        T(ts_noff_vlan_l3l4csum, 8, TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)      \
2870        T(ts_noff_vlan_ol3ol4csum, 8, TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)  \
2871        T(ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                                 \
2872          TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2873
2874#define NIX_TX_FASTPATH_MODES_48_63                                            \
2875        T(ts_tso, 8, TSP_F | TSO_F)                                            \
2876        T(ts_tso_l3l4csum, 8, TSP_F | TSO_F | L3L4CSUM_F)                      \
2877        T(ts_tso_ol3ol4csum, 8, TSP_F | TSO_F | OL3OL4CSUM_F)                  \
2878        T(ts_tso_ol3ol4csum_l3l4csum, 8,                                       \
2879          TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                           \
2880        T(ts_tso_vlan, 8, TSP_F | TSO_F | VLAN_F)                              \
2881        T(ts_tso_vlan_l3l4csum, 8, TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)        \
2882        T(ts_tso_vlan_ol3ol4csum, 8, TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)    \
2883        T(ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2884          TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2885        T(ts_tso_noff, 8, TSP_F | TSO_F | NOFF_F)                              \
2886        T(ts_tso_noff_l3l4csum, 8, TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)        \
2887        T(ts_tso_noff_ol3ol4csum, 8, TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)    \
2888        T(ts_tso_noff_ol3ol4csum_l3l4csum, 8,                                  \
2889          TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                  \
2890        T(ts_tso_noff_vlan, 8, TSP_F | TSO_F | NOFF_F | VLAN_F)                \
2891        T(ts_tso_noff_vlan_l3l4csum, 8,                                        \
2892          TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                        \
2893        T(ts_tso_noff_vlan_ol3ol4csum, 8,                                      \
2894          TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                      \
2895        T(ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2896          TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2897
2898#define NIX_TX_FASTPATH_MODES_64_79                                            \
2899        T(sec, 6, T_SEC_F)                                                     \
2900        T(sec_l3l4csum, 6, T_SEC_F | L3L4CSUM_F)                               \
2901        T(sec_ol3ol4csum, 6, T_SEC_F | OL3OL4CSUM_F)                           \
2902        T(sec_ol3ol4csum_l3l4csum, 6, T_SEC_F | OL3OL4CSUM_F | L3L4CSUM_F)     \
2903        T(sec_vlan, 6, T_SEC_F | VLAN_F)                                       \
2904        T(sec_vlan_l3l4csum, 6, T_SEC_F | VLAN_F | L3L4CSUM_F)                 \
2905        T(sec_vlan_ol3ol4csum, 6, T_SEC_F | VLAN_F | OL3OL4CSUM_F)             \
2906        T(sec_vlan_ol3ol4csum_l3l4csum, 6,                                     \
2907          T_SEC_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2908        T(sec_noff, 6, T_SEC_F | NOFF_F)                                       \
2909        T(sec_noff_l3l4csum, 6, T_SEC_F | NOFF_F | L3L4CSUM_F)                 \
2910        T(sec_noff_ol3ol4csum, 6, T_SEC_F | NOFF_F | OL3OL4CSUM_F)             \
2911        T(sec_noff_ol3ol4csum_l3l4csum, 6,                                     \
2912          T_SEC_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                        \
2913        T(sec_noff_vlan, 6, T_SEC_F | NOFF_F | VLAN_F)                         \
2914        T(sec_noff_vlan_l3l4csum, 6, T_SEC_F | NOFF_F | VLAN_F | L3L4CSUM_F)   \
2915        T(sec_noff_vlan_ol3ol4csum, 6,                                         \
2916          T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                            \
2917        T(sec_noff_vlan_ol3ol4csum_l3l4csum, 6,                                \
2918          T_SEC_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2919
2920#define NIX_TX_FASTPATH_MODES_80_95                                            \
2921        T(sec_tso, 6, T_SEC_F | TSO_F)                                         \
2922        T(sec_tso_l3l4csum, 6, T_SEC_F | TSO_F | L3L4CSUM_F)                   \
2923        T(sec_tso_ol3ol4csum, 6, T_SEC_F | TSO_F | OL3OL4CSUM_F)               \
2924        T(sec_tso_ol3ol4csum_l3l4csum, 6,                                      \
2925          T_SEC_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2926        T(sec_tso_vlan, 6, T_SEC_F | TSO_F | VLAN_F)                           \
2927        T(sec_tso_vlan_l3l4csum, 6, T_SEC_F | TSO_F | VLAN_F | L3L4CSUM_F)     \
2928        T(sec_tso_vlan_ol3ol4csum, 6, T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F) \
2929        T(sec_tso_vlan_ol3ol4csum_l3l4csum, 6,                                 \
2930          T_SEC_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2931        T(sec_tso_noff, 6, T_SEC_F | TSO_F | NOFF_F)                           \
2932        T(sec_tso_noff_l3l4csum, 6, T_SEC_F | TSO_F | NOFF_F | L3L4CSUM_F)     \
2933        T(sec_tso_noff_ol3ol4csum, 6, T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F) \
2934        T(sec_tso_noff_ol3ol4csum_l3l4csum, 6,                                 \
2935          T_SEC_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2936        T(sec_tso_noff_vlan, 6, T_SEC_F | TSO_F | NOFF_F | VLAN_F)             \
2937        T(sec_tso_noff_vlan_l3l4csum, 6,                                       \
2938          T_SEC_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2939        T(sec_tso_noff_vlan_ol3ol4csum, 6,                                     \
2940          T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2941        T(sec_tso_noff_vlan_ol3ol4csum_l3l4csum, 6,                            \
2942          T_SEC_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2943
2944#define NIX_TX_FASTPATH_MODES_96_111                                           \
2945        T(sec_ts, 8, T_SEC_F | TSP_F)                                          \
2946        T(sec_ts_l3l4csum, 8, T_SEC_F | TSP_F | L3L4CSUM_F)                    \
2947        T(sec_ts_ol3ol4csum, 8, T_SEC_F | TSP_F | OL3OL4CSUM_F)                \
2948        T(sec_ts_ol3ol4csum_l3l4csum, 8,                                       \
2949          T_SEC_F | TSP_F | OL3OL4CSUM_F | L3L4CSUM_F)                         \
2950        T(sec_ts_vlan, 8, T_SEC_F | TSP_F | VLAN_F)                            \
2951        T(sec_ts_vlan_l3l4csum, 8, T_SEC_F | TSP_F | VLAN_F | L3L4CSUM_F)      \
2952        T(sec_ts_vlan_ol3ol4csum, 8, T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F)  \
2953        T(sec_ts_vlan_ol3ol4csum_l3l4csum, 8,                                  \
2954          T_SEC_F | TSP_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2955        T(sec_ts_noff, 8, T_SEC_F | TSP_F | NOFF_F)                            \
2956        T(sec_ts_noff_l3l4csum, 8, T_SEC_F | TSP_F | NOFF_F | L3L4CSUM_F)      \
2957        T(sec_ts_noff_ol3ol4csum, 8, T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F)  \
2958        T(sec_ts_noff_ol3ol4csum_l3l4csum, 8,                                  \
2959          T_SEC_F | TSP_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)                \
2960        T(sec_ts_noff_vlan, 8, T_SEC_F | TSP_F | NOFF_F | VLAN_F)              \
2961        T(sec_ts_noff_vlan_l3l4csum, 8,                                        \
2962          T_SEC_F | TSP_F | NOFF_F | VLAN_F | L3L4CSUM_F)                      \
2963        T(sec_ts_noff_vlan_ol3ol4csum, 8,                                      \
2964          T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)                    \
2965        T(sec_ts_noff_vlan_ol3ol4csum_l3l4csum, 8,                             \
2966          T_SEC_F | TSP_F | NOFF_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)
2967
2968#define NIX_TX_FASTPATH_MODES_112_127                                          \
2969        T(sec_ts_tso, 8, T_SEC_F | TSP_F | TSO_F)                              \
2970        T(sec_ts_tso_l3l4csum, 8, T_SEC_F | TSP_F | TSO_F | L3L4CSUM_F)        \
2971        T(sec_ts_tso_ol3ol4csum, 8, T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F)    \
2972        T(sec_ts_tso_ol3ol4csum_l3l4csum, 8,                                   \
2973          T_SEC_F | TSP_F | TSO_F | OL3OL4CSUM_F | L3L4CSUM_F)                 \
2974        T(sec_ts_tso_vlan, 8, T_SEC_F | TSP_F | TSO_F | VLAN_F)                \
2975        T(sec_ts_tso_vlan_l3l4csum, 8,                                         \
2976          T_SEC_F | TSP_F | TSO_F | VLAN_F | L3L4CSUM_F)                       \
2977        T(sec_ts_tso_vlan_ol3ol4csum, 8,                                       \
2978          T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F)                     \
2979        T(sec_ts_tso_vlan_ol3ol4csum_l3l4csum, 8,                              \
2980          T_SEC_F | TSP_F | TSO_F | VLAN_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2981        T(sec_ts_tso_noff, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F)                \
2982        T(sec_ts_tso_noff_l3l4csum, 8,                                         \
2983          T_SEC_F | TSP_F | TSO_F | NOFF_F | L3L4CSUM_F)                       \
2984        T(sec_ts_tso_noff_ol3ol4csum, 8,                                       \
2985          T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F)                     \
2986        T(sec_ts_tso_noff_ol3ol4csum_l3l4csum, 8,                              \
2987          T_SEC_F | TSP_F | TSO_F | NOFF_F | OL3OL4CSUM_F | L3L4CSUM_F)        \
2988        T(sec_ts_tso_noff_vlan, 8, T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F)  \
2989        T(sec_ts_tso_noff_vlan_l3l4csum, 8,                                    \
2990          T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | L3L4CSUM_F)              \
2991        T(sec_ts_tso_noff_vlan_ol3ol4csum, 8,                                  \
2992          T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F)            \
2993        T(sec_ts_tso_noff_vlan_ol3ol4csum_l3l4csum, 8,                         \
2994          T_SEC_F | TSP_F | TSO_F | NOFF_F | VLAN_F | OL3OL4CSUM_F |           \
2995                  L3L4CSUM_F)
2996
2997#define NIX_TX_FASTPATH_MODES                                                  \
2998        NIX_TX_FASTPATH_MODES_0_15                                             \
2999        NIX_TX_FASTPATH_MODES_16_31                                            \
3000        NIX_TX_FASTPATH_MODES_32_47                                            \
3001        NIX_TX_FASTPATH_MODES_48_63                                            \
3002        NIX_TX_FASTPATH_MODES_64_79                                            \
3003        NIX_TX_FASTPATH_MODES_80_95                                            \
3004        NIX_TX_FASTPATH_MODES_96_111                                           \
3005        NIX_TX_FASTPATH_MODES_112_127
3006
3007#define T(name, sz, flags)                                                     \
3008        uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_##name(          \
3009                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3010        uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_mseg_##name(     \
3011                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3012        uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_##name(      \
3013                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);     \
3014        uint16_t __rte_noinline __rte_hot cn10k_nix_xmit_pkts_vec_mseg_##name( \
3015                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts);
3016
3017NIX_TX_FASTPATH_MODES
3018#undef T
3019
3020#define NIX_TX_XMIT(fn, sz, flags)                                             \
3021        uint16_t __rte_noinline __rte_hot fn(                                  \
3022                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3023        {                                                                      \
3024                uint64_t cmd[sz];                                              \
3025                /* For TSO inner checksum is a must */                         \
3026                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3027                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3028                        return 0;                                              \
3029                return cn10k_nix_xmit_pkts(tx_queue, NULL, tx_pkts, pkts, cmd, \
3030                                           flags);                             \
3031        }
3032
3033#define NIX_TX_XMIT_MSEG(fn, sz, flags)                                        \
3034        uint16_t __rte_noinline __rte_hot fn(                                  \
3035                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3036        {                                                                      \
3037                uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3038                /* For TSO inner checksum is a must */                         \
3039                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3040                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3041                        return 0;                                              \
3042                return cn10k_nix_xmit_pkts_mseg(tx_queue, NULL, tx_pkts, pkts, \
3043                                                cmd,                           \
3044                                                flags | NIX_TX_MULTI_SEG_F);   \
3045        }
3046
3047#define NIX_TX_XMIT_VEC(fn, sz, flags)                                         \
3048        uint16_t __rte_noinline __rte_hot fn(                                  \
3049                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3050        {                                                                      \
3051                uint64_t cmd[sz];                                              \
3052                /* For TSO inner checksum is a must */                         \
3053                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3054                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3055                        return 0;                                              \
3056                return cn10k_nix_xmit_pkts_vector(tx_queue, NULL, tx_pkts,     \
3057                                                  pkts, cmd, (flags));         \
3058        }
3059
3060#define NIX_TX_XMIT_VEC_MSEG(fn, sz, flags)                                    \
3061        uint16_t __rte_noinline __rte_hot fn(                                  \
3062                void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t pkts)      \
3063        {                                                                      \
3064                uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
3065                /* For TSO inner checksum is a must */                         \
3066                if (((flags) & NIX_TX_OFFLOAD_TSO_F) &&                        \
3067                    !((flags) & NIX_TX_OFFLOAD_L3_L4_CSUM_F))                  \
3068                        return 0;                                              \
3069                return cn10k_nix_xmit_pkts_vector(                             \
3070                        tx_queue, NULL, tx_pkts, pkts, cmd,                    \
3071                        (flags) | NIX_TX_MULTI_SEG_F);                         \
3072        }
3073
3074#endif /* __CN10K_TX_H__ */
3075