dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c
<<
>>
Prefs
   1/* SPDX-License-Identifier: BSD-3-Clause
   2 * Copyright(c) 2010-2015 Intel Corporation
   3 */
   4
   5#include <stdint.h>
   6#include <ethdev_driver.h>
   7#include <rte_malloc.h>
   8#include <rte_vect.h>
   9
  10#include "ixgbe_ethdev.h"
  11#include "ixgbe_rxtx.h"
  12#include "ixgbe_rxtx_vec_common.h"
  13
  14#pragma GCC diagnostic ignored "-Wcast-qual"
  15
  16static inline void
  17ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq)
  18{
  19        int i;
  20        uint16_t rx_id;
  21        volatile union ixgbe_adv_rx_desc *rxdp;
  22        struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
  23        struct rte_mbuf *mb0, *mb1;
  24        uint64x2_t dma_addr0, dma_addr1;
  25        uint64x2_t zero = vdupq_n_u64(0);
  26        uint64_t paddr;
  27        uint8x8_t p;
  28
  29        rxdp = rxq->rx_ring + rxq->rxrearm_start;
  30
  31        /* Pull 'n' more MBUFs into the software ring */
  32        if (unlikely(rte_mempool_get_bulk(rxq->mb_pool,
  33                                          (void *)rxep,
  34                                          RTE_IXGBE_RXQ_REARM_THRESH) < 0)) {
  35                if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >=
  36                    rxq->nb_rx_desc) {
  37                        for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) {
  38                                rxep[i].mbuf = &rxq->fake_mbuf;
  39                                vst1q_u64((uint64_t *)&rxdp[i].read,
  40                                          zero);
  41                        }
  42                }
  43                rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
  44                        RTE_IXGBE_RXQ_REARM_THRESH;
  45                return;
  46        }
  47
  48        p = vld1_u8((uint8_t *)&rxq->mbuf_initializer);
  49
  50        /* Initialize the mbufs in vector, process 2 mbufs in one loop */
  51        for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) {
  52                mb0 = rxep[0].mbuf;
  53                mb1 = rxep[1].mbuf;
  54
  55                /*
  56                 * Flush mbuf with pkt template.
  57                 * Data to be rearmed is 6 bytes long.
  58                 */
  59                vst1_u8((uint8_t *)&mb0->rearm_data, p);
  60                paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM;
  61                dma_addr0 = vsetq_lane_u64(paddr, zero, 0);
  62                /* flush desc with pa dma_addr */
  63                vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0);
  64
  65                vst1_u8((uint8_t *)&mb1->rearm_data, p);
  66                paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM;
  67                dma_addr1 = vsetq_lane_u64(paddr, zero, 0);
  68                vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1);
  69        }
  70
  71        rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH;
  72        if (rxq->rxrearm_start >= rxq->nb_rx_desc)
  73                rxq->rxrearm_start = 0;
  74
  75        rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH;
  76
  77        rx_id = (uint16_t)((rxq->rxrearm_start == 0) ?
  78                             (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
  79
  80        /* Update the tail pointer on the NIC */
  81        IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
  82}
  83
  84static inline void
  85desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2,
  86                  uint8x16_t staterr, uint8_t vlan_flags, uint16_t udp_p_flag,
  87                  struct rte_mbuf **rx_pkts)
  88{
  89        uint16_t udp_p_flag_hi;
  90        uint8x16_t ptype, udp_csum_skip;
  91        uint32x4_t temp_udp_csum_skip = {0, 0, 0, 0};
  92        uint8x16_t vtag_lo, vtag_hi, vtag;
  93        uint8x16_t temp_csum;
  94        uint32x4_t csum = {0, 0, 0, 0};
  95
  96        union {
  97                uint16_t e[4];
  98                uint64_t word;
  99        } vol;
 100
 101        const uint8x16_t rsstype_msk = {
 102                        0x0F, 0x0F, 0x0F, 0x0F,
 103                        0x00, 0x00, 0x00, 0x00,
 104                        0x00, 0x00, 0x00, 0x00,
 105                        0x00, 0x00, 0x00, 0x00};
 106
 107        const uint8x16_t rss_flags = {
 108                        0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH,
 109                        0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH,
 110                        RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0,
 111                        0, 0, 0, RTE_MBUF_F_RX_FDIR};
 112
 113        /* mask everything except vlan present and l4/ip csum error */
 114        const uint8x16_t vlan_csum_msk = {
 115                        IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
 116                        IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP,
 117                        0, 0, 0, 0,
 118                        0, 0, 0, 0,
 119                        (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
 120                        (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
 121                        (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24,
 122                        (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 24};
 123
 124        /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */
 125        const uint8x16_t vlan_csum_map_lo = {
 126                        RTE_MBUF_F_RX_IP_CKSUM_GOOD,
 127                        RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
 128                        RTE_MBUF_F_RX_IP_CKSUM_BAD,
 129                        RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
 130                        0, 0, 0, 0,
 131                        vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD,
 132                        vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
 133                        vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD,
 134                        vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
 135                        0, 0, 0, 0};
 136
 137        const uint8x16_t vlan_csum_map_hi = {
 138                        RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
 139                        RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
 140                        0, 0, 0, 0,
 141                        RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
 142                        RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0,
 143                        0, 0, 0, 0};
 144
 145        /* change mask from 0x200(IXGBE_RXDADV_PKTTYPE_UDP) to 0x2 */
 146        udp_p_flag_hi = udp_p_flag >> 8;
 147
 148        /* mask everything except UDP header present if specified */
 149        const uint8x16_t udp_hdr_p_msk = {
 150                        0, 0, 0, 0,
 151                        udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi, udp_p_flag_hi,
 152                        0, 0, 0, 0,
 153                        0, 0, 0, 0};
 154
 155        const uint8x16_t udp_csum_bad_shuf = {
 156                        0xFF, ~(uint8_t)RTE_MBUF_F_RX_L4_CKSUM_BAD, 0, 0,
 157                        0, 0, 0, 0,
 158                        0, 0, 0, 0,
 159                        0, 0, 0, 0};
 160
 161        ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0];
 162
 163        /* save the UDP header present information */
 164        udp_csum_skip = vandq_u8(ptype, udp_hdr_p_msk);
 165
 166        /* move UDP header present information to low 32bits */
 167        temp_udp_csum_skip = vcopyq_laneq_u32(temp_udp_csum_skip, 0,
 168                                vreinterpretq_u32_u8(udp_csum_skip), 1);
 169
 170        ptype = vandq_u8(ptype, rsstype_msk);
 171        ptype = vqtbl1q_u8(rss_flags, ptype);
 172
 173        /* extract vlan_flags and csum_error from staterr */
 174        vtag = vandq_u8(staterr, vlan_csum_msk);
 175
 176        /* csum bits are in the most significant, to use shuffle we need to
 177         * shift them. Change mask from 0xc0 to 0x03.
 178         */
 179        temp_csum = vshrq_n_u8(vtag, 6);
 180
 181        /* 'OR' the most significant 32 bits containing the checksum
 182         * flags with the vlan present flags
 183         * Then bits layout of each lane(8bits) will be 'xxxx,VP,x,IPE,L4E'
 184         */
 185        csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 3), csum, 0);
 186        vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag);
 187
 188        /* convert L4 checksum correct type to vtag_hi */
 189        vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag);
 190        vtag_hi = vshrq_n_u8(vtag_hi, 7);
 191
 192        /* convert VP, IPE, L4E to vtag_lo */
 193        vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag);
 194        vtag_lo = vorrq_u8(ptype, vtag_lo);
 195
 196        /* convert the UDP header present 0x2 to 0x1 for aligning with each
 197         * RTE_MBUF_F_RX_L4_CKSUM_BAD value in low byte of 8 bits word ol_flag in
 198         * vtag_lo (4x8). Then mask out the bad checksum value by shuffle and
 199         * bit-mask.
 200         */
 201        udp_csum_skip = vshrq_n_u8(vreinterpretq_u8_u32(temp_udp_csum_skip), 1);
 202        udp_csum_skip = vqtbl1q_u8(udp_csum_bad_shuf, udp_csum_skip);
 203        vtag_lo = vandq_u8(vtag_lo, udp_csum_skip);
 204
 205        vtag = vzipq_u8(vtag_lo, vtag_hi).val[0];
 206        vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0);
 207
 208        rx_pkts[0]->ol_flags = vol.e[0];
 209        rx_pkts[1]->ol_flags = vol.e[1];
 210        rx_pkts[2]->ol_flags = vol.e[2];
 211        rx_pkts[3]->ol_flags = vol.e[3];
 212}
 213
 214#define IXGBE_VPMD_DESC_EOP_MASK        0x02020202
 215#define IXGBE_UINT8_BIT                 (CHAR_BIT * sizeof(uint8_t))
 216
 217static inline uint32_t
 218get_packet_type(uint32_t pkt_info,
 219                uint32_t etqf_check,
 220                uint32_t tunnel_check)
 221{
 222        if (etqf_check)
 223                return RTE_PTYPE_UNKNOWN;
 224
 225        if (tunnel_check) {
 226                pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL;
 227                return ptype_table_tn[pkt_info];
 228        }
 229
 230        pkt_info &= IXGBE_PACKET_TYPE_MASK_82599;
 231        return ptype_table[pkt_info];
 232}
 233
 234static inline void
 235desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask,
 236                struct rte_mbuf **rx_pkts)
 237{
 238        uint32x4_t etqf_check, tunnel_check;
 239        uint32x4_t etqf_mask = vdupq_n_u32(0x8000);
 240        uint32x4_t tunnel_mask = vdupq_n_u32(0x10000);
 241        uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask);
 242        uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]),
 243                                vreinterpretq_u32_u64(descs[2])).val[0];
 244        uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]),
 245                                vreinterpretq_u32_u64(descs[3])).val[0];
 246
 247        /* interleave low 32 bits,
 248         * now we have 4 ptypes in a NEON register
 249         */
 250        ptype0 = vzipq_u32(ptype0, ptype1).val[0];
 251
 252        /* mask etqf bits */
 253        etqf_check = vandq_u32(ptype0, etqf_mask);
 254        /* mask tunnel bits */
 255        tunnel_check = vandq_u32(ptype0, tunnel_mask);
 256
 257        /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */
 258        ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT),
 259                        ptype_mask);
 260
 261        rx_pkts[0]->packet_type =
 262                get_packet_type(vgetq_lane_u32(ptype0, 0),
 263                                vgetq_lane_u32(etqf_check, 0),
 264                                vgetq_lane_u32(tunnel_check, 0));
 265        rx_pkts[1]->packet_type =
 266                get_packet_type(vgetq_lane_u32(ptype0, 1),
 267                                vgetq_lane_u32(etqf_check, 1),
 268                                vgetq_lane_u32(tunnel_check, 1));
 269        rx_pkts[2]->packet_type =
 270                get_packet_type(vgetq_lane_u32(ptype0, 2),
 271                                vgetq_lane_u32(etqf_check, 2),
 272                                vgetq_lane_u32(tunnel_check, 2));
 273        rx_pkts[3]->packet_type =
 274                get_packet_type(vgetq_lane_u32(ptype0, 3),
 275                                vgetq_lane_u32(etqf_check, 3),
 276                                vgetq_lane_u32(tunnel_check, 3));
 277}
 278
 279/**
 280 * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
 281 *
 282 * Notice:
 283 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
 284 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
 285 */
 286static inline uint16_t
 287_recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 288                   uint16_t nb_pkts, uint8_t *split_packet)
 289{
 290        volatile union ixgbe_adv_rx_desc *rxdp;
 291        struct ixgbe_rx_entry *sw_ring;
 292        uint16_t nb_pkts_recd;
 293        int pos;
 294        uint8x16_t shuf_msk = {
 295                0xFF, 0xFF,
 296                0xFF, 0xFF,  /* skip 32 bits pkt_type */
 297                12, 13,      /* octet 12~13, low 16 bits pkt_len */
 298                0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */
 299                12, 13,      /* octet 12~13, 16 bits data_len */
 300                14, 15,      /* octet 14~15, low 16 bits vlan_macip */
 301                4, 5, 6, 7  /* octet 4~7, 32bits rss */
 302                };
 303        uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0,
 304                                 rxq->crc_len, 0, 0, 0};
 305        uint8_t vlan_flags;
 306        uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */
 307
 308        /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */
 309        nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
 310
 311        /* Just the act of getting into the function from the application is
 312         * going to cost about 7 cycles
 313         */
 314        rxdp = rxq->rx_ring + rxq->rx_tail;
 315
 316        rte_prefetch_non_temporal(rxdp);
 317
 318        /* See if we need to rearm the RX queue - gives the prefetch a bit
 319         * of time to act
 320         */
 321        if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH)
 322                ixgbe_rxq_rearm(rxq);
 323
 324        /* Before we start moving massive data around, check to see if
 325         * there is actually a packet available
 326         */
 327        if (!(rxdp->wb.upper.status_error &
 328                                rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD)))
 329                return 0;
 330
 331        if (rxq->rx_udp_csum_zero_err)
 332                udp_p_flag = IXGBE_RXDADV_PKTTYPE_UDP;
 333
 334        /* Cache is empty -> need to scan the buffer rings, but first move
 335         * the next 'n' mbufs into the cache
 336         */
 337        sw_ring = &rxq->sw_ring[rxq->rx_tail];
 338
 339        /* ensure these 2 flags are in the lower 8 bits */
 340        RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > UINT8_MAX);
 341        vlan_flags = rxq->vlan_flags & UINT8_MAX;
 342
 343        /* A. load 4 packet in one loop
 344         * B. copy 4 mbuf point from swring to rx_pkts
 345         * C. calc the number of DD bits among the 4 packets
 346         * [C*. extract the end-of-packet bit, if requested]
 347         * D. fill info. from desc to mbuf
 348         */
 349        for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
 350                        pos += RTE_IXGBE_DESCS_PER_LOOP,
 351                        rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
 352                uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP];
 353                uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
 354                uint8x16x2_t sterr_tmp1, sterr_tmp2;
 355                uint64x2_t mbp1, mbp2;
 356                uint8x16_t staterr;
 357                uint16x8_t tmp;
 358                uint32_t stat;
 359
 360                /* B.1 load 2 mbuf point */
 361                mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
 362
 363                /* B.2 copy 2 mbuf point into rx_pkts  */
 364                vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
 365
 366                /* B.1 load 2 mbuf point */
 367                mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
 368
 369                /* A. load 4 pkts descs */
 370                descs[0] =  vld1q_u64((uint64_t *)(rxdp));
 371                descs[1] =  vld1q_u64((uint64_t *)(rxdp + 1));
 372                descs[2] =  vld1q_u64((uint64_t *)(rxdp + 2));
 373                descs[3] =  vld1q_u64((uint64_t *)(rxdp + 3));
 374
 375                /* B.2 copy 2 mbuf point into rx_pkts  */
 376                vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
 377
 378                if (split_packet) {
 379                        rte_mbuf_prefetch_part2(rx_pkts[pos]);
 380                        rte_mbuf_prefetch_part2(rx_pkts[pos + 1]);
 381                        rte_mbuf_prefetch_part2(rx_pkts[pos + 2]);
 382                        rte_mbuf_prefetch_part2(rx_pkts[pos + 3]);
 383                }
 384
 385                /* D.1 pkt 3,4 convert format from desc to pktmbuf */
 386                pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk);
 387                pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk);
 388
 389                /* D.1 pkt 1,2 convert format from desc to pktmbuf */
 390                pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk);
 391                pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk);
 392
 393                /* C.1 4=>2 filter staterr info only */
 394                sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]),
 395                                      vreinterpretq_u8_u64(descs[3]));
 396                /* C.1 4=>2 filter staterr info only */
 397                sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]),
 398                                      vreinterpretq_u8_u64(descs[2]));
 399
 400                /* C.2 get 4 pkts staterr value  */
 401                staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0];
 402
 403                /* set ol_flags with vlan packet type */
 404                desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags,
 405                                  udp_p_flag, &rx_pkts[pos]);
 406
 407                /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
 408                tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
 409                pkt_mb4 = vreinterpretq_u8_u16(tmp);
 410                tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
 411                pkt_mb3 = vreinterpretq_u8_u16(tmp);
 412
 413                /* D.3 copy final 3,4 data to rx_pkts */
 414                vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1,
 415                         pkt_mb4);
 416                vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1,
 417                         pkt_mb3);
 418
 419                /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
 420                tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
 421                pkt_mb2 = vreinterpretq_u8_u16(tmp);
 422                tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
 423                pkt_mb1 = vreinterpretq_u8_u16(tmp);
 424
 425                /* C* extract and record EOP bit */
 426                if (split_packet) {
 427                        stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
 428                        /* and with mask to extract bits, flipping 1-0 */
 429                        *(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK;
 430
 431                        split_packet += RTE_IXGBE_DESCS_PER_LOOP;
 432                }
 433
 434                /* C.4 expand DD bit to saturate UINT8 */
 435                staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1);
 436                staterr = vreinterpretq_u8_s8
 437                                (vshrq_n_s8(vreinterpretq_s8_u8(staterr),
 438                                        IXGBE_UINT8_BIT - 1));
 439                stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0);
 440
 441                rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP);
 442
 443                /* D.3 copy final 1,2 data to rx_pkts */
 444                vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1,
 445                         pkt_mb2);
 446                vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1,
 447                         pkt_mb1);
 448
 449                desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]);
 450
 451                /* C.5 calc available number of desc */
 452                if (unlikely(stat == 0)) {
 453                        nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP;
 454                } else {
 455                        nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT;
 456                        break;
 457                }
 458        }
 459
 460        /* Update our internal tail pointer */
 461        rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);
 462        rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));
 463        rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);
 464
 465        return nb_pkts_recd;
 466}
 467
 468/**
 469 * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP)
 470 *
 471 * Notice:
 472 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
 473 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
 474 */
 475uint16_t
 476ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 477                uint16_t nb_pkts)
 478{
 479        return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);
 480}
 481
 482/**
 483 * vPMD receive routine that reassembles scattered packets
 484 *
 485 * Notice:
 486 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet
 487 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
 488 */
 489static uint16_t
 490ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 491                               uint16_t nb_pkts)
 492{
 493        struct ixgbe_rx_queue *rxq = rx_queue;
 494        uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0};
 495
 496        /* get some new buffers */
 497        uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
 498                        split_flags);
 499        if (nb_bufs == 0)
 500                return 0;
 501
 502        /* happy day case, full burst + no packets to be joined */
 503        const uint64_t *split_fl64 = (uint64_t *)split_flags;
 504        if (rxq->pkt_first_seg == NULL &&
 505                        split_fl64[0] == 0 && split_fl64[1] == 0 &&
 506                        split_fl64[2] == 0 && split_fl64[3] == 0)
 507                return nb_bufs;
 508
 509        /* reassemble any packets that need reassembly*/
 510        unsigned int i = 0;
 511        if (rxq->pkt_first_seg == NULL) {
 512                /* find the first split flag, and only reassemble then*/
 513                while (i < nb_bufs && !split_flags[i])
 514                        i++;
 515                if (i == nb_bufs)
 516                        return nb_bufs;
 517                rxq->pkt_first_seg = rx_pkts[i];
 518        }
 519        return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,
 520                &split_flags[i]);
 521}
 522
 523/**
 524 * vPMD receive routine that reassembles scattered packets.
 525 */
 526uint16_t
 527ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 528                              uint16_t nb_pkts)
 529{
 530        uint16_t retval = 0;
 531
 532        while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) {
 533                uint16_t burst;
 534
 535                burst = ixgbe_recv_scattered_burst_vec(rx_queue,
 536                                                       rx_pkts + retval,
 537                                                       RTE_IXGBE_MAX_RX_BURST);
 538                retval += burst;
 539                nb_pkts -= burst;
 540                if (burst < RTE_IXGBE_MAX_RX_BURST)
 541                        return retval;
 542        }
 543
 544        return retval + ixgbe_recv_scattered_burst_vec(rx_queue,
 545                                                       rx_pkts + retval,
 546                                                       nb_pkts);
 547}
 548
 549static inline void
 550vtx1(volatile union ixgbe_adv_tx_desc *txdp,
 551                struct rte_mbuf *pkt, uint64_t flags)
 552{
 553        uint64x2_t descriptor = {
 554                        pkt->buf_iova + pkt->data_off,
 555                        (uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len};
 556
 557        vst1q_u64((uint64_t *)&txdp->read, descriptor);
 558}
 559
 560static inline void
 561vtx(volatile union ixgbe_adv_tx_desc *txdp,
 562                struct rte_mbuf **pkt, uint16_t nb_pkts,  uint64_t flags)
 563{
 564        int i;
 565
 566        for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt)
 567                vtx1(txdp, *pkt, flags);
 568}
 569
 570uint16_t
 571ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 572                           uint16_t nb_pkts)
 573{
 574        struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue;
 575        volatile union ixgbe_adv_tx_desc *txdp;
 576        struct ixgbe_tx_entry_v *txep;
 577        uint16_t n, nb_commit, tx_id;
 578        uint64_t flags = DCMD_DTYP_FLAGS;
 579        uint64_t rs = IXGBE_ADVTXD_DCMD_RS | DCMD_DTYP_FLAGS;
 580        int i;
 581
 582        /* cross rx_thresh boundary is not allowed */
 583        nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
 584
 585        if (txq->nb_tx_free < txq->tx_free_thresh)
 586                ixgbe_tx_free_bufs(txq);
 587
 588        nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);
 589        if (unlikely(nb_pkts == 0))
 590                return 0;
 591
 592        tx_id = txq->tx_tail;
 593        txdp = &txq->tx_ring[tx_id];
 594        txep = &txq->sw_ring_v[tx_id];
 595
 596        txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);
 597
 598        n = (uint16_t)(txq->nb_tx_desc - tx_id);
 599        if (nb_commit >= n) {
 600                tx_backlog_entry(txep, tx_pkts, n);
 601
 602                for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)
 603                        vtx1(txdp, *tx_pkts, flags);
 604
 605                vtx1(txdp, *tx_pkts++, rs);
 606
 607                nb_commit = (uint16_t)(nb_commit - n);
 608
 609                tx_id = 0;
 610                txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1);
 611
 612                /* avoid reach the end of ring */
 613                txdp = &txq->tx_ring[tx_id];
 614                txep = &txq->sw_ring_v[tx_id];
 615        }
 616
 617        tx_backlog_entry(txep, tx_pkts, nb_commit);
 618
 619        vtx(txdp, tx_pkts, nb_commit, flags);
 620
 621        tx_id = (uint16_t)(tx_id + nb_commit);
 622        if (tx_id > txq->tx_next_rs) {
 623                txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |=
 624                        rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS);
 625                txq->tx_next_rs = (uint16_t)(txq->tx_next_rs +
 626                        txq->tx_rs_thresh);
 627        }
 628
 629        txq->tx_tail = tx_id;
 630
 631        IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);
 632
 633        return nb_pkts;
 634}
 635
 636static void __rte_cold
 637ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq)
 638{
 639        _ixgbe_tx_queue_release_mbufs_vec(txq);
 640}
 641
 642void __rte_cold
 643ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq)
 644{
 645        _ixgbe_rx_queue_release_mbufs_vec(rxq);
 646}
 647
 648static void __rte_cold
 649ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq)
 650{
 651        _ixgbe_tx_free_swring_vec(txq);
 652}
 653
 654static void __rte_cold
 655ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq)
 656{
 657        _ixgbe_reset_tx_queue_vec(txq);
 658}
 659
 660static const struct ixgbe_txq_ops vec_txq_ops = {
 661        .release_mbufs = ixgbe_tx_queue_release_mbufs_vec,
 662        .free_swring = ixgbe_tx_free_swring,
 663        .reset = ixgbe_reset_tx_queue,
 664};
 665
 666int __rte_cold
 667ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq)
 668{
 669        return ixgbe_rxq_vec_setup_default(rxq);
 670}
 671
 672int __rte_cold
 673ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq)
 674{
 675        return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops);
 676}
 677
 678int __rte_cold
 679ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev)
 680{
 681        return ixgbe_rx_vec_dev_conf_condition_check_default(dev);
 682}
 683