1
2
3
4
5#include <stdint.h>
6#include <stdbool.h>
7#include <linux/virtio_net.h>
8
9#include <rte_mbuf.h>
10#include <rte_memcpy.h>
11#include <rte_net.h>
12#include <rte_ether.h>
13#include <rte_ip.h>
14#include <rte_dmadev.h>
15#include <rte_vhost.h>
16#include <rte_tcp.h>
17#include <rte_udp.h>
18#include <rte_sctp.h>
19#include <rte_arp.h>
20#include <rte_spinlock.h>
21#include <rte_malloc.h>
22#include <rte_vhost_async.h>
23
24#include "iotlb.h"
25#include "vhost.h"
26
27#define MAX_BATCH_LEN 256
28
29static __rte_always_inline uint16_t
30async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
31 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
32 uint16_t vchan_id, bool legacy_ol_flags);
33
34
35struct async_dma_info dma_copy_track[RTE_DMADEV_DEFAULT_MAX];
36
37static __rte_always_inline bool
38rxvq_is_mergeable(struct virtio_net *dev)
39{
40 return dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF);
41}
42
43static __rte_always_inline bool
44virtio_net_is_inorder(struct virtio_net *dev)
45{
46 return dev->features & (1ULL << VIRTIO_F_IN_ORDER);
47}
48
49static bool
50is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t nr_vring)
51{
52 return (is_tx ^ (idx & 1)) == 0 && idx < nr_vring;
53}
54
55
56
57
58static inline void
59vhost_queue_stats_update(struct virtio_net *dev, struct vhost_virtqueue *vq,
60 struct rte_mbuf **pkts, uint16_t count)
61{
62 struct virtqueue_stats *stats = &vq->stats;
63 int i;
64
65 if (!(dev->flags & VIRTIO_DEV_STATS_ENABLED))
66 return;
67
68 for (i = 0; i < count; i++) {
69 struct rte_ether_addr *ea;
70 struct rte_mbuf *pkt = pkts[i];
71 uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt);
72
73 stats->packets++;
74 stats->bytes += pkt_len;
75
76 if (pkt_len == 64) {
77 stats->size_bins[1]++;
78 } else if (pkt_len > 64 && pkt_len < 1024) {
79 uint32_t bin;
80
81
82 bin = (sizeof(pkt_len) * 8) - __builtin_clz(pkt_len) - 5;
83 stats->size_bins[bin]++;
84 } else {
85 if (pkt_len < 64)
86 stats->size_bins[0]++;
87 else if (pkt_len < 1519)
88 stats->size_bins[6]++;
89 else
90 stats->size_bins[7]++;
91 }
92
93 ea = rte_pktmbuf_mtod(pkt, struct rte_ether_addr *);
94 if (rte_is_multicast_ether_addr(ea)) {
95 if (rte_is_broadcast_ether_addr(ea))
96 stats->broadcast++;
97 else
98 stats->multicast++;
99 }
100 }
101}
102
103static __rte_always_inline int64_t
104vhost_async_dma_transfer_one(struct virtio_net *dev, struct vhost_virtqueue *vq,
105 int16_t dma_id, uint16_t vchan_id, uint16_t flag_idx,
106 struct vhost_iov_iter *pkt)
107{
108 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
109 uint16_t ring_mask = dma_info->ring_mask;
110 static bool vhost_async_dma_copy_log;
111
112
113 struct vhost_iovec *iov = pkt->iov;
114 int copy_idx = 0;
115 uint32_t nr_segs = pkt->nr_segs;
116 uint16_t i;
117
118 if (rte_dma_burst_capacity(dma_id, vchan_id) < nr_segs)
119 return -1;
120
121 for (i = 0; i < nr_segs; i++) {
122 copy_idx = rte_dma_copy(dma_id, vchan_id, (rte_iova_t)iov[i].src_addr,
123 (rte_iova_t)iov[i].dst_addr, iov[i].len, RTE_DMA_OP_FLAG_LLC);
124
125
126
127
128
129
130
131
132 if (unlikely(copy_idx < 0)) {
133 if (!vhost_async_dma_copy_log) {
134 VHOST_LOG_DATA(dev->ifname, ERR,
135 "DMA copy failed for channel %d:%u\n",
136 dma_id, vchan_id);
137 vhost_async_dma_copy_log = true;
138 }
139 return -1;
140 }
141 }
142
143
144
145
146
147 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = &vq->async->pkts_cmpl_flag[flag_idx];
148
149 return nr_segs;
150}
151
152static __rte_always_inline uint16_t
153vhost_async_dma_transfer(struct virtio_net *dev, struct vhost_virtqueue *vq,
154 int16_t dma_id, uint16_t vchan_id, uint16_t head_idx,
155 struct vhost_iov_iter *pkts, uint16_t nr_pkts)
156{
157 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
158 int64_t ret, nr_copies = 0;
159 uint16_t pkt_idx;
160
161 rte_spinlock_lock(&dma_info->dma_lock);
162
163 for (pkt_idx = 0; pkt_idx < nr_pkts; pkt_idx++) {
164 ret = vhost_async_dma_transfer_one(dev, vq, dma_id, vchan_id, head_idx,
165 &pkts[pkt_idx]);
166 if (unlikely(ret < 0))
167 break;
168
169 nr_copies += ret;
170 head_idx++;
171 if (head_idx >= vq->size)
172 head_idx -= vq->size;
173 }
174
175 if (likely(nr_copies > 0))
176 rte_dma_submit(dma_id, vchan_id);
177
178 rte_spinlock_unlock(&dma_info->dma_lock);
179
180 return pkt_idx;
181}
182
183static __rte_always_inline uint16_t
184vhost_async_dma_check_completed(struct virtio_net *dev, int16_t dma_id, uint16_t vchan_id,
185 uint16_t max_pkts)
186{
187 struct async_dma_vchan_info *dma_info = &dma_copy_track[dma_id].vchans[vchan_id];
188 uint16_t ring_mask = dma_info->ring_mask;
189 uint16_t last_idx = 0;
190 uint16_t nr_copies;
191 uint16_t copy_idx;
192 uint16_t i;
193 bool has_error = false;
194 static bool vhost_async_dma_complete_log;
195
196 rte_spinlock_lock(&dma_info->dma_lock);
197
198
199
200
201
202 nr_copies = rte_dma_completed(dma_id, vchan_id, max_pkts, &last_idx, &has_error);
203 if (unlikely(!vhost_async_dma_complete_log && has_error)) {
204 VHOST_LOG_DATA(dev->ifname, ERR,
205 "DMA completion failure on channel %d:%u\n",
206 dma_id, vchan_id);
207 vhost_async_dma_complete_log = true;
208 } else if (nr_copies == 0) {
209 goto out;
210 }
211
212 copy_idx = last_idx - nr_copies + 1;
213 for (i = 0; i < nr_copies; i++) {
214 bool *flag;
215
216 flag = dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask];
217 if (flag) {
218
219
220
221
222
223 *flag = true;
224 dma_info->pkts_cmpl_flag_addr[copy_idx & ring_mask] = NULL;
225 }
226 copy_idx++;
227 }
228
229out:
230 rte_spinlock_unlock(&dma_info->dma_lock);
231 return nr_copies;
232}
233
234static inline void
235do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
236{
237 struct batch_copy_elem *elem = vq->batch_copy_elems;
238 uint16_t count = vq->batch_copy_nb_elems;
239 int i;
240
241 for (i = 0; i < count; i++) {
242 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
243 vhost_log_cache_write_iova(dev, vq, elem[i].log_addr,
244 elem[i].len);
245 PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
246 }
247
248 vq->batch_copy_nb_elems = 0;
249}
250
251static inline void
252do_data_copy_dequeue(struct vhost_virtqueue *vq)
253{
254 struct batch_copy_elem *elem = vq->batch_copy_elems;
255 uint16_t count = vq->batch_copy_nb_elems;
256 int i;
257
258 for (i = 0; i < count; i++)
259 rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
260
261 vq->batch_copy_nb_elems = 0;
262}
263
264static __rte_always_inline void
265do_flush_shadow_used_ring_split(struct virtio_net *dev,
266 struct vhost_virtqueue *vq,
267 uint16_t to, uint16_t from, uint16_t size)
268{
269 rte_memcpy(&vq->used->ring[to],
270 &vq->shadow_used_split[from],
271 size * sizeof(struct vring_used_elem));
272 vhost_log_cache_used_vring(dev, vq,
273 offsetof(struct vring_used, ring[to]),
274 size * sizeof(struct vring_used_elem));
275}
276
277static __rte_always_inline void
278flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
279{
280 uint16_t used_idx = vq->last_used_idx & (vq->size - 1);
281
282 if (used_idx + vq->shadow_used_idx <= vq->size) {
283 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0,
284 vq->shadow_used_idx);
285 } else {
286 uint16_t size;
287
288
289 size = vq->size - used_idx;
290 do_flush_shadow_used_ring_split(dev, vq, used_idx, 0, size);
291
292
293 do_flush_shadow_used_ring_split(dev, vq, 0, size,
294 vq->shadow_used_idx - size);
295 }
296 vq->last_used_idx += vq->shadow_used_idx;
297
298 vhost_log_cache_sync(dev, vq);
299
300 __atomic_add_fetch(&vq->used->idx, vq->shadow_used_idx,
301 __ATOMIC_RELEASE);
302 vq->shadow_used_idx = 0;
303 vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
304 sizeof(vq->used->idx));
305}
306
307static __rte_always_inline void
308update_shadow_used_ring_split(struct vhost_virtqueue *vq,
309 uint16_t desc_idx, uint32_t len)
310{
311 uint16_t i = vq->shadow_used_idx++;
312
313 vq->shadow_used_split[i].id = desc_idx;
314 vq->shadow_used_split[i].len = len;
315}
316
317static __rte_always_inline void
318vhost_flush_enqueue_shadow_packed(struct virtio_net *dev,
319 struct vhost_virtqueue *vq)
320{
321 int i;
322 uint16_t used_idx = vq->last_used_idx;
323 uint16_t head_idx = vq->last_used_idx;
324 uint16_t head_flags = 0;
325
326
327 for (i = 0; i < vq->shadow_used_idx; i++) {
328 vq->desc_packed[used_idx].id = vq->shadow_used_packed[i].id;
329 vq->desc_packed[used_idx].len = vq->shadow_used_packed[i].len;
330
331 used_idx += vq->shadow_used_packed[i].count;
332 if (used_idx >= vq->size)
333 used_idx -= vq->size;
334 }
335
336
337 rte_atomic_thread_fence(__ATOMIC_RELEASE);
338
339 for (i = 0; i < vq->shadow_used_idx; i++) {
340 uint16_t flags;
341
342 if (vq->shadow_used_packed[i].len)
343 flags = VRING_DESC_F_WRITE;
344 else
345 flags = 0;
346
347 if (vq->used_wrap_counter) {
348 flags |= VRING_DESC_F_USED;
349 flags |= VRING_DESC_F_AVAIL;
350 } else {
351 flags &= ~VRING_DESC_F_USED;
352 flags &= ~VRING_DESC_F_AVAIL;
353 }
354
355 if (i > 0) {
356 vq->desc_packed[vq->last_used_idx].flags = flags;
357
358 vhost_log_cache_used_vring(dev, vq,
359 vq->last_used_idx *
360 sizeof(struct vring_packed_desc),
361 sizeof(struct vring_packed_desc));
362 } else {
363 head_idx = vq->last_used_idx;
364 head_flags = flags;
365 }
366
367 vq_inc_last_used_packed(vq, vq->shadow_used_packed[i].count);
368 }
369
370 vq->desc_packed[head_idx].flags = head_flags;
371
372 vhost_log_cache_used_vring(dev, vq,
373 head_idx *
374 sizeof(struct vring_packed_desc),
375 sizeof(struct vring_packed_desc));
376
377 vq->shadow_used_idx = 0;
378 vhost_log_cache_sync(dev, vq);
379}
380
381static __rte_always_inline void
382vhost_flush_dequeue_shadow_packed(struct virtio_net *dev,
383 struct vhost_virtqueue *vq)
384{
385 struct vring_used_elem_packed *used_elem = &vq->shadow_used_packed[0];
386
387 vq->desc_packed[vq->shadow_last_used_idx].id = used_elem->id;
388
389 __atomic_store_n(&vq->desc_packed[vq->shadow_last_used_idx].flags,
390 used_elem->flags, __ATOMIC_RELEASE);
391
392 vhost_log_cache_used_vring(dev, vq, vq->shadow_last_used_idx *
393 sizeof(struct vring_packed_desc),
394 sizeof(struct vring_packed_desc));
395 vq->shadow_used_idx = 0;
396 vhost_log_cache_sync(dev, vq);
397}
398
399static __rte_always_inline void
400vhost_flush_enqueue_batch_packed(struct virtio_net *dev,
401 struct vhost_virtqueue *vq,
402 uint64_t *lens,
403 uint16_t *ids)
404{
405 uint16_t i;
406 uint16_t flags;
407 uint16_t last_used_idx;
408 struct vring_packed_desc *desc_base;
409
410 last_used_idx = vq->last_used_idx;
411 desc_base = &vq->desc_packed[last_used_idx];
412
413 flags = PACKED_DESC_ENQUEUE_USED_FLAG(vq->used_wrap_counter);
414
415 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
416 desc_base[i].id = ids[i];
417 desc_base[i].len = lens[i];
418 }
419
420 rte_atomic_thread_fence(__ATOMIC_RELEASE);
421
422 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
423 desc_base[i].flags = flags;
424 }
425
426 vhost_log_cache_used_vring(dev, vq, last_used_idx *
427 sizeof(struct vring_packed_desc),
428 sizeof(struct vring_packed_desc) *
429 PACKED_BATCH_SIZE);
430 vhost_log_cache_sync(dev, vq);
431
432 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
433}
434
435static __rte_always_inline void
436vhost_shadow_dequeue_batch_packed_inorder(struct vhost_virtqueue *vq,
437 uint16_t id)
438{
439 vq->shadow_used_packed[0].id = id;
440
441 if (!vq->shadow_used_idx) {
442 vq->shadow_last_used_idx = vq->last_used_idx;
443 vq->shadow_used_packed[0].flags =
444 PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
445 vq->shadow_used_packed[0].len = 0;
446 vq->shadow_used_packed[0].count = 1;
447 vq->shadow_used_idx++;
448 }
449
450 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
451}
452
453static __rte_always_inline void
454vhost_shadow_dequeue_batch_packed(struct virtio_net *dev,
455 struct vhost_virtqueue *vq,
456 uint16_t *ids)
457{
458 uint16_t flags;
459 uint16_t i;
460 uint16_t begin;
461
462 flags = PACKED_DESC_DEQUEUE_USED_FLAG(vq->used_wrap_counter);
463
464 if (!vq->shadow_used_idx) {
465 vq->shadow_last_used_idx = vq->last_used_idx;
466 vq->shadow_used_packed[0].id = ids[0];
467 vq->shadow_used_packed[0].len = 0;
468 vq->shadow_used_packed[0].count = 1;
469 vq->shadow_used_packed[0].flags = flags;
470 vq->shadow_used_idx++;
471 begin = 1;
472 } else
473 begin = 0;
474
475 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE) {
476 vq->desc_packed[vq->last_used_idx + i].id = ids[i];
477 vq->desc_packed[vq->last_used_idx + i].len = 0;
478 }
479
480 rte_atomic_thread_fence(__ATOMIC_RELEASE);
481 vhost_for_each_try_unroll(i, begin, PACKED_BATCH_SIZE)
482 vq->desc_packed[vq->last_used_idx + i].flags = flags;
483
484 vhost_log_cache_used_vring(dev, vq, vq->last_used_idx *
485 sizeof(struct vring_packed_desc),
486 sizeof(struct vring_packed_desc) *
487 PACKED_BATCH_SIZE);
488 vhost_log_cache_sync(dev, vq);
489
490 vq_inc_last_used_packed(vq, PACKED_BATCH_SIZE);
491}
492
493static __rte_always_inline void
494vhost_shadow_dequeue_single_packed(struct vhost_virtqueue *vq,
495 uint16_t buf_id,
496 uint16_t count)
497{
498 uint16_t flags;
499
500 flags = vq->desc_packed[vq->last_used_idx].flags;
501 if (vq->used_wrap_counter) {
502 flags |= VRING_DESC_F_USED;
503 flags |= VRING_DESC_F_AVAIL;
504 } else {
505 flags &= ~VRING_DESC_F_USED;
506 flags &= ~VRING_DESC_F_AVAIL;
507 }
508
509 if (!vq->shadow_used_idx) {
510 vq->shadow_last_used_idx = vq->last_used_idx;
511
512 vq->shadow_used_packed[0].id = buf_id;
513 vq->shadow_used_packed[0].len = 0;
514 vq->shadow_used_packed[0].flags = flags;
515 vq->shadow_used_idx++;
516 } else {
517 vq->desc_packed[vq->last_used_idx].id = buf_id;
518 vq->desc_packed[vq->last_used_idx].len = 0;
519 vq->desc_packed[vq->last_used_idx].flags = flags;
520 }
521
522 vq_inc_last_used_packed(vq, count);
523}
524
525static __rte_always_inline void
526vhost_shadow_dequeue_single_packed_inorder(struct vhost_virtqueue *vq,
527 uint16_t buf_id,
528 uint16_t count)
529{
530 uint16_t flags;
531
532 vq->shadow_used_packed[0].id = buf_id;
533
534 flags = vq->desc_packed[vq->last_used_idx].flags;
535 if (vq->used_wrap_counter) {
536 flags |= VRING_DESC_F_USED;
537 flags |= VRING_DESC_F_AVAIL;
538 } else {
539 flags &= ~VRING_DESC_F_USED;
540 flags &= ~VRING_DESC_F_AVAIL;
541 }
542
543 if (!vq->shadow_used_idx) {
544 vq->shadow_last_used_idx = vq->last_used_idx;
545 vq->shadow_used_packed[0].len = 0;
546 vq->shadow_used_packed[0].flags = flags;
547 vq->shadow_used_idx++;
548 }
549
550 vq_inc_last_used_packed(vq, count);
551}
552
553static __rte_always_inline void
554vhost_shadow_enqueue_packed(struct vhost_virtqueue *vq,
555 uint32_t *len,
556 uint16_t *id,
557 uint16_t *count,
558 uint16_t num_buffers)
559{
560 uint16_t i;
561
562 for (i = 0; i < num_buffers; i++) {
563
564 if (!vq->shadow_used_idx)
565 vq->shadow_aligned_idx = vq->last_used_idx &
566 PACKED_BATCH_MASK;
567 vq->shadow_used_packed[vq->shadow_used_idx].id = id[i];
568 vq->shadow_used_packed[vq->shadow_used_idx].len = len[i];
569 vq->shadow_used_packed[vq->shadow_used_idx].count = count[i];
570 vq->shadow_aligned_idx += count[i];
571 vq->shadow_used_idx++;
572 }
573}
574
575static __rte_always_inline void
576vhost_shadow_enqueue_single_packed(struct virtio_net *dev,
577 struct vhost_virtqueue *vq,
578 uint32_t *len,
579 uint16_t *id,
580 uint16_t *count,
581 uint16_t num_buffers)
582{
583 vhost_shadow_enqueue_packed(vq, len, id, count, num_buffers);
584
585 if (vq->shadow_aligned_idx >= PACKED_BATCH_SIZE) {
586 do_data_copy_enqueue(dev, vq);
587 vhost_flush_enqueue_shadow_packed(dev, vq);
588 }
589}
590
591
592#define ASSIGN_UNLESS_EQUAL(var, val) do { \
593 if ((var) != (val)) \
594 (var) = (val); \
595} while (0)
596
597static __rte_always_inline void
598virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr)
599{
600 uint64_t csum_l4 = m_buf->ol_flags & RTE_MBUF_F_TX_L4_MASK;
601
602 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
603 csum_l4 |= RTE_MBUF_F_TX_TCP_CKSUM;
604
605 if (csum_l4) {
606
607
608
609
610
611
612
613
614 rte_net_intel_cksum_prepare(m_buf);
615
616 net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
617 net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len;
618
619 switch (csum_l4) {
620 case RTE_MBUF_F_TX_TCP_CKSUM:
621 net_hdr->csum_offset = (offsetof(struct rte_tcp_hdr,
622 cksum));
623 break;
624 case RTE_MBUF_F_TX_UDP_CKSUM:
625 net_hdr->csum_offset = (offsetof(struct rte_udp_hdr,
626 dgram_cksum));
627 break;
628 case RTE_MBUF_F_TX_SCTP_CKSUM:
629 net_hdr->csum_offset = (offsetof(struct rte_sctp_hdr,
630 cksum));
631 break;
632 }
633 } else {
634 ASSIGN_UNLESS_EQUAL(net_hdr->csum_start, 0);
635 ASSIGN_UNLESS_EQUAL(net_hdr->csum_offset, 0);
636 ASSIGN_UNLESS_EQUAL(net_hdr->flags, 0);
637 }
638
639
640 if (m_buf->ol_flags & RTE_MBUF_F_TX_IP_CKSUM) {
641 struct rte_ipv4_hdr *ipv4_hdr;
642
643 ipv4_hdr = rte_pktmbuf_mtod_offset(m_buf, struct rte_ipv4_hdr *,
644 m_buf->l2_len);
645 ipv4_hdr->hdr_checksum = 0;
646 ipv4_hdr->hdr_checksum = rte_ipv4_cksum(ipv4_hdr);
647 }
648
649 if (m_buf->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
650 if (m_buf->ol_flags & RTE_MBUF_F_TX_IPV4)
651 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
652 else
653 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
654 net_hdr->gso_size = m_buf->tso_segsz;
655 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len
656 + m_buf->l4_len;
657 } else if (m_buf->ol_flags & RTE_MBUF_F_TX_UDP_SEG) {
658 net_hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP;
659 net_hdr->gso_size = m_buf->tso_segsz;
660 net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len +
661 m_buf->l4_len;
662 } else {
663 ASSIGN_UNLESS_EQUAL(net_hdr->gso_type, 0);
664 ASSIGN_UNLESS_EQUAL(net_hdr->gso_size, 0);
665 ASSIGN_UNLESS_EQUAL(net_hdr->hdr_len, 0);
666 }
667}
668
669static __rte_always_inline int
670map_one_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
671 struct buf_vector *buf_vec, uint16_t *vec_idx,
672 uint64_t desc_iova, uint64_t desc_len, uint8_t perm)
673{
674 uint16_t vec_id = *vec_idx;
675
676 while (desc_len) {
677 uint64_t desc_addr;
678 uint64_t desc_chunck_len = desc_len;
679
680 if (unlikely(vec_id >= BUF_VECTOR_MAX))
681 return -1;
682
683 desc_addr = vhost_iova_to_vva(dev, vq,
684 desc_iova,
685 &desc_chunck_len,
686 perm);
687 if (unlikely(!desc_addr))
688 return -1;
689
690 rte_prefetch0((void *)(uintptr_t)desc_addr);
691
692 buf_vec[vec_id].buf_iova = desc_iova;
693 buf_vec[vec_id].buf_addr = desc_addr;
694 buf_vec[vec_id].buf_len = desc_chunck_len;
695
696 desc_len -= desc_chunck_len;
697 desc_iova += desc_chunck_len;
698 vec_id++;
699 }
700 *vec_idx = vec_id;
701
702 return 0;
703}
704
705static __rte_always_inline int
706fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
707 uint32_t avail_idx, uint16_t *vec_idx,
708 struct buf_vector *buf_vec, uint16_t *desc_chain_head,
709 uint32_t *desc_chain_len, uint8_t perm)
710{
711 uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
712 uint16_t vec_id = *vec_idx;
713 uint32_t len = 0;
714 uint64_t dlen;
715 uint32_t nr_descs = vq->size;
716 uint32_t cnt = 0;
717 struct vring_desc *descs = vq->desc;
718 struct vring_desc *idesc = NULL;
719
720 if (unlikely(idx >= vq->size))
721 return -1;
722
723 *desc_chain_head = idx;
724
725 if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
726 dlen = vq->desc[idx].len;
727 nr_descs = dlen / sizeof(struct vring_desc);
728 if (unlikely(nr_descs > vq->size))
729 return -1;
730
731 descs = (struct vring_desc *)(uintptr_t)
732 vhost_iova_to_vva(dev, vq, vq->desc[idx].addr,
733 &dlen,
734 VHOST_ACCESS_RO);
735 if (unlikely(!descs))
736 return -1;
737
738 if (unlikely(dlen < vq->desc[idx].len)) {
739
740
741
742
743 idesc = vhost_alloc_copy_ind_table(dev, vq,
744 vq->desc[idx].addr, vq->desc[idx].len);
745 if (unlikely(!idesc))
746 return -1;
747
748 descs = idesc;
749 }
750
751 idx = 0;
752 }
753
754 while (1) {
755 if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) {
756 free_ind_table(idesc);
757 return -1;
758 }
759
760 dlen = descs[idx].len;
761 len += dlen;
762
763 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
764 descs[idx].addr, dlen,
765 perm))) {
766 free_ind_table(idesc);
767 return -1;
768 }
769
770 if ((descs[idx].flags & VRING_DESC_F_NEXT) == 0)
771 break;
772
773 idx = descs[idx].next;
774 }
775
776 *desc_chain_len = len;
777 *vec_idx = vec_id;
778
779 if (unlikely(!!idesc))
780 free_ind_table(idesc);
781
782 return 0;
783}
784
785
786
787
788static inline int
789reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
790 uint32_t size, struct buf_vector *buf_vec,
791 uint16_t *num_buffers, uint16_t avail_head,
792 uint16_t *nr_vec)
793{
794 uint16_t cur_idx;
795 uint16_t vec_idx = 0;
796 uint16_t max_tries, tries = 0;
797
798 uint16_t head_idx = 0;
799 uint32_t len = 0;
800
801 *num_buffers = 0;
802 cur_idx = vq->last_avail_idx;
803
804 if (rxvq_is_mergeable(dev))
805 max_tries = vq->size - 1;
806 else
807 max_tries = 1;
808
809 while (size > 0) {
810 if (unlikely(cur_idx == avail_head))
811 return -1;
812
813
814
815
816
817 if (unlikely(++tries > max_tries))
818 return -1;
819
820 if (unlikely(fill_vec_buf_split(dev, vq, cur_idx,
821 &vec_idx, buf_vec,
822 &head_idx, &len,
823 VHOST_ACCESS_RW) < 0))
824 return -1;
825 len = RTE_MIN(len, size);
826 update_shadow_used_ring_split(vq, head_idx, len);
827 size -= len;
828
829 cur_idx++;
830 *num_buffers += 1;
831 }
832
833 *nr_vec = vec_idx;
834
835 return 0;
836}
837
838static __rte_always_inline int
839fill_vec_buf_packed_indirect(struct virtio_net *dev,
840 struct vhost_virtqueue *vq,
841 struct vring_packed_desc *desc, uint16_t *vec_idx,
842 struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
843{
844 uint16_t i;
845 uint32_t nr_descs;
846 uint16_t vec_id = *vec_idx;
847 uint64_t dlen;
848 struct vring_packed_desc *descs, *idescs = NULL;
849
850 dlen = desc->len;
851 descs = (struct vring_packed_desc *)(uintptr_t)
852 vhost_iova_to_vva(dev, vq, desc->addr, &dlen, VHOST_ACCESS_RO);
853 if (unlikely(!descs))
854 return -1;
855
856 if (unlikely(dlen < desc->len)) {
857
858
859
860
861 idescs = vhost_alloc_copy_ind_table(dev,
862 vq, desc->addr, desc->len);
863 if (unlikely(!idescs))
864 return -1;
865
866 descs = idescs;
867 }
868
869 nr_descs = desc->len / sizeof(struct vring_packed_desc);
870 if (unlikely(nr_descs >= vq->size)) {
871 free_ind_table(idescs);
872 return -1;
873 }
874
875 for (i = 0; i < nr_descs; i++) {
876 if (unlikely(vec_id >= BUF_VECTOR_MAX)) {
877 free_ind_table(idescs);
878 return -1;
879 }
880
881 dlen = descs[i].len;
882 *len += dlen;
883 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
884 descs[i].addr, dlen,
885 perm)))
886 return -1;
887 }
888 *vec_idx = vec_id;
889
890 if (unlikely(!!idescs))
891 free_ind_table(idescs);
892
893 return 0;
894}
895
896static __rte_always_inline int
897fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
898 uint16_t avail_idx, uint16_t *desc_count,
899 struct buf_vector *buf_vec, uint16_t *vec_idx,
900 uint16_t *buf_id, uint32_t *len, uint8_t perm)
901{
902 bool wrap_counter = vq->avail_wrap_counter;
903 struct vring_packed_desc *descs = vq->desc_packed;
904 uint16_t vec_id = *vec_idx;
905 uint64_t dlen;
906
907 if (avail_idx < vq->last_avail_idx)
908 wrap_counter ^= 1;
909
910
911
912
913
914
915 if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter)))
916 return -1;
917
918 *desc_count = 0;
919 *len = 0;
920
921 while (1) {
922 if (unlikely(vec_id >= BUF_VECTOR_MAX))
923 return -1;
924
925 if (unlikely(*desc_count >= vq->size))
926 return -1;
927
928 *desc_count += 1;
929 *buf_id = descs[avail_idx].id;
930
931 if (descs[avail_idx].flags & VRING_DESC_F_INDIRECT) {
932 if (unlikely(fill_vec_buf_packed_indirect(dev, vq,
933 &descs[avail_idx],
934 &vec_id, buf_vec,
935 len, perm) < 0))
936 return -1;
937 } else {
938 dlen = descs[avail_idx].len;
939 *len += dlen;
940
941 if (unlikely(map_one_desc(dev, vq, buf_vec, &vec_id,
942 descs[avail_idx].addr,
943 dlen,
944 perm)))
945 return -1;
946 }
947
948 if ((descs[avail_idx].flags & VRING_DESC_F_NEXT) == 0)
949 break;
950
951 if (++avail_idx >= vq->size) {
952 avail_idx -= vq->size;
953 wrap_counter ^= 1;
954 }
955 }
956
957 *vec_idx = vec_id;
958
959 return 0;
960}
961
962static __rte_noinline void
963copy_vnet_hdr_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
964 struct buf_vector *buf_vec,
965 struct virtio_net_hdr_mrg_rxbuf *hdr)
966{
967 uint64_t len;
968 uint64_t remain = dev->vhost_hlen;
969 uint64_t src = (uint64_t)(uintptr_t)hdr, dst;
970 uint64_t iova = buf_vec->buf_iova;
971
972 while (remain) {
973 len = RTE_MIN(remain,
974 buf_vec->buf_len);
975 dst = buf_vec->buf_addr;
976 rte_memcpy((void *)(uintptr_t)dst,
977 (void *)(uintptr_t)src,
978 len);
979
980 PRINT_PACKET(dev, (uintptr_t)dst,
981 (uint32_t)len, 0);
982 vhost_log_cache_write_iova(dev, vq,
983 iova, len);
984
985 remain -= len;
986 iova += len;
987 src += len;
988 buf_vec++;
989 }
990}
991
992static __rte_always_inline int
993async_iter_initialize(struct virtio_net *dev, struct vhost_async *async)
994{
995 struct vhost_iov_iter *iter;
996
997 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) {
998 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n");
999 return -1;
1000 }
1001
1002 iter = async->iov_iter + async->iter_idx;
1003 iter->iov = async->iovec + async->iovec_idx;
1004 iter->nr_segs = 0;
1005
1006 return 0;
1007}
1008
1009static __rte_always_inline int
1010async_iter_add_iovec(struct virtio_net *dev, struct vhost_async *async,
1011 void *src, void *dst, size_t len)
1012{
1013 struct vhost_iov_iter *iter;
1014 struct vhost_iovec *iovec;
1015
1016 if (unlikely(async->iovec_idx >= VHOST_MAX_ASYNC_VEC)) {
1017 static bool vhost_max_async_vec_log;
1018
1019 if (!vhost_max_async_vec_log) {
1020 VHOST_LOG_DATA(dev->ifname, ERR, "no more async iovec available\n");
1021 vhost_max_async_vec_log = true;
1022 }
1023
1024 return -1;
1025 }
1026
1027 iter = async->iov_iter + async->iter_idx;
1028 iovec = async->iovec + async->iovec_idx;
1029
1030 iovec->src_addr = src;
1031 iovec->dst_addr = dst;
1032 iovec->len = len;
1033
1034 iter->nr_segs++;
1035 async->iovec_idx++;
1036
1037 return 0;
1038}
1039
1040static __rte_always_inline void
1041async_iter_finalize(struct vhost_async *async)
1042{
1043 async->iter_idx++;
1044}
1045
1046static __rte_always_inline void
1047async_iter_cancel(struct vhost_async *async)
1048{
1049 struct vhost_iov_iter *iter;
1050
1051 iter = async->iov_iter + async->iter_idx;
1052 async->iovec_idx -= iter->nr_segs;
1053 iter->nr_segs = 0;
1054 iter->iov = NULL;
1055}
1056
1057static __rte_always_inline void
1058async_iter_reset(struct vhost_async *async)
1059{
1060 async->iter_idx = 0;
1061 async->iovec_idx = 0;
1062}
1063
1064static __rte_always_inline int
1065async_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
1066 struct rte_mbuf *m, uint32_t mbuf_offset,
1067 uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
1068{
1069 struct vhost_async *async = vq->async;
1070 uint64_t mapped_len;
1071 uint32_t buf_offset = 0;
1072 void *src, *dst;
1073 void *host_iova;
1074
1075 while (cpy_len) {
1076 host_iova = (void *)(uintptr_t)gpa_to_first_hpa(dev,
1077 buf_iova + buf_offset, cpy_len, &mapped_len);
1078 if (unlikely(!host_iova)) {
1079 VHOST_LOG_DATA(dev->ifname, ERR,
1080 "%s: failed to get host iova.\n",
1081 __func__);
1082 return -1;
1083 }
1084
1085 if (to_desc) {
1086 src = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
1087 dst = host_iova;
1088 } else {
1089 src = host_iova;
1090 dst = (void *)(uintptr_t)rte_pktmbuf_iova_offset(m, mbuf_offset);
1091 }
1092
1093 if (unlikely(async_iter_add_iovec(dev, async, src, dst, (size_t)mapped_len)))
1094 return -1;
1095
1096 cpy_len -= (uint32_t)mapped_len;
1097 mbuf_offset += (uint32_t)mapped_len;
1098 buf_offset += (uint32_t)mapped_len;
1099 }
1100
1101 return 0;
1102}
1103
1104static __rte_always_inline void
1105sync_fill_seg(struct virtio_net *dev, struct vhost_virtqueue *vq,
1106 struct rte_mbuf *m, uint32_t mbuf_offset,
1107 uint64_t buf_addr, uint64_t buf_iova, uint32_t cpy_len, bool to_desc)
1108{
1109 struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
1110
1111 if (likely(cpy_len > MAX_BATCH_LEN || vq->batch_copy_nb_elems >= vq->size)) {
1112 if (to_desc) {
1113 rte_memcpy((void *)((uintptr_t)(buf_addr)),
1114 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1115 cpy_len);
1116 vhost_log_cache_write_iova(dev, vq, buf_iova, cpy_len);
1117 PRINT_PACKET(dev, (uintptr_t)(buf_addr), cpy_len, 0);
1118 } else {
1119 rte_memcpy(rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
1120 (void *)((uintptr_t)(buf_addr)),
1121 cpy_len);
1122 }
1123 } else {
1124 if (to_desc) {
1125 batch_copy[vq->batch_copy_nb_elems].dst =
1126 (void *)((uintptr_t)(buf_addr));
1127 batch_copy[vq->batch_copy_nb_elems].src =
1128 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1129 batch_copy[vq->batch_copy_nb_elems].log_addr = buf_iova;
1130 } else {
1131 batch_copy[vq->batch_copy_nb_elems].dst =
1132 rte_pktmbuf_mtod_offset(m, void *, mbuf_offset);
1133 batch_copy[vq->batch_copy_nb_elems].src =
1134 (void *)((uintptr_t)(buf_addr));
1135 }
1136 batch_copy[vq->batch_copy_nb_elems].len = cpy_len;
1137 vq->batch_copy_nb_elems++;
1138 }
1139}
1140
1141static __rte_always_inline int
1142mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
1143 struct rte_mbuf *m, struct buf_vector *buf_vec,
1144 uint16_t nr_vec, uint16_t num_buffers, bool is_async)
1145{
1146 uint32_t vec_idx = 0;
1147 uint32_t mbuf_offset, mbuf_avail;
1148 uint32_t buf_offset, buf_avail;
1149 uint64_t buf_addr, buf_iova, buf_len;
1150 uint32_t cpy_len;
1151 uint64_t hdr_addr;
1152 struct rte_mbuf *hdr_mbuf;
1153 struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
1154 struct vhost_async *async = vq->async;
1155
1156 if (unlikely(m == NULL))
1157 return -1;
1158
1159 buf_addr = buf_vec[vec_idx].buf_addr;
1160 buf_iova = buf_vec[vec_idx].buf_iova;
1161 buf_len = buf_vec[vec_idx].buf_len;
1162
1163 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
1164 return -1;
1165
1166 hdr_mbuf = m;
1167 hdr_addr = buf_addr;
1168 if (unlikely(buf_len < dev->vhost_hlen)) {
1169 memset(&tmp_hdr, 0, sizeof(struct virtio_net_hdr_mrg_rxbuf));
1170 hdr = &tmp_hdr;
1171 } else
1172 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
1173
1174 VHOST_LOG_DATA(dev->ifname, DEBUG, "RX: num merge buffers %d\n", num_buffers);
1175
1176 if (unlikely(buf_len < dev->vhost_hlen)) {
1177 buf_offset = dev->vhost_hlen - buf_len;
1178 vec_idx++;
1179 buf_addr = buf_vec[vec_idx].buf_addr;
1180 buf_iova = buf_vec[vec_idx].buf_iova;
1181 buf_len = buf_vec[vec_idx].buf_len;
1182 buf_avail = buf_len - buf_offset;
1183 } else {
1184 buf_offset = dev->vhost_hlen;
1185 buf_avail = buf_len - dev->vhost_hlen;
1186 }
1187
1188 mbuf_avail = rte_pktmbuf_data_len(m);
1189 mbuf_offset = 0;
1190
1191 if (is_async) {
1192 if (async_iter_initialize(dev, async))
1193 return -1;
1194 }
1195
1196 while (mbuf_avail != 0 || m->next != NULL) {
1197
1198 if (buf_avail == 0) {
1199 vec_idx++;
1200 if (unlikely(vec_idx >= nr_vec))
1201 goto error;
1202
1203 buf_addr = buf_vec[vec_idx].buf_addr;
1204 buf_iova = buf_vec[vec_idx].buf_iova;
1205 buf_len = buf_vec[vec_idx].buf_len;
1206
1207 buf_offset = 0;
1208 buf_avail = buf_len;
1209 }
1210
1211
1212 if (mbuf_avail == 0) {
1213 m = m->next;
1214
1215 mbuf_offset = 0;
1216 mbuf_avail = rte_pktmbuf_data_len(m);
1217 }
1218
1219 if (hdr_addr) {
1220 virtio_enqueue_offload(hdr_mbuf, &hdr->hdr);
1221 if (rxvq_is_mergeable(dev))
1222 ASSIGN_UNLESS_EQUAL(hdr->num_buffers,
1223 num_buffers);
1224
1225 if (unlikely(hdr == &tmp_hdr)) {
1226 copy_vnet_hdr_to_desc(dev, vq, buf_vec, hdr);
1227 } else {
1228 PRINT_PACKET(dev, (uintptr_t)hdr_addr,
1229 dev->vhost_hlen, 0);
1230 vhost_log_cache_write_iova(dev, vq,
1231 buf_vec[0].buf_iova,
1232 dev->vhost_hlen);
1233 }
1234
1235 hdr_addr = 0;
1236 }
1237
1238 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
1239
1240 if (is_async) {
1241 if (async_fill_seg(dev, vq, m, mbuf_offset,
1242 buf_iova + buf_offset, cpy_len, true) < 0)
1243 goto error;
1244 } else {
1245 sync_fill_seg(dev, vq, m, mbuf_offset,
1246 buf_addr + buf_offset,
1247 buf_iova + buf_offset, cpy_len, true);
1248 }
1249
1250 mbuf_avail -= cpy_len;
1251 mbuf_offset += cpy_len;
1252 buf_avail -= cpy_len;
1253 buf_offset += cpy_len;
1254 }
1255
1256 if (is_async)
1257 async_iter_finalize(async);
1258
1259 return 0;
1260error:
1261 if (is_async)
1262 async_iter_cancel(async);
1263
1264 return -1;
1265}
1266
1267static __rte_always_inline int
1268vhost_enqueue_single_packed(struct virtio_net *dev,
1269 struct vhost_virtqueue *vq,
1270 struct rte_mbuf *pkt,
1271 struct buf_vector *buf_vec,
1272 uint16_t *nr_descs)
1273{
1274 uint16_t nr_vec = 0;
1275 uint16_t avail_idx = vq->last_avail_idx;
1276 uint16_t max_tries, tries = 0;
1277 uint16_t buf_id = 0;
1278 uint32_t len = 0;
1279 uint16_t desc_count;
1280 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1281 uint16_t num_buffers = 0;
1282 uint32_t buffer_len[vq->size];
1283 uint16_t buffer_buf_id[vq->size];
1284 uint16_t buffer_desc_count[vq->size];
1285
1286 if (rxvq_is_mergeable(dev))
1287 max_tries = vq->size - 1;
1288 else
1289 max_tries = 1;
1290
1291 while (size > 0) {
1292
1293
1294
1295
1296
1297 if (unlikely(++tries > max_tries))
1298 return -1;
1299
1300 if (unlikely(fill_vec_buf_packed(dev, vq,
1301 avail_idx, &desc_count,
1302 buf_vec, &nr_vec,
1303 &buf_id, &len,
1304 VHOST_ACCESS_RW) < 0))
1305 return -1;
1306
1307 len = RTE_MIN(len, size);
1308 size -= len;
1309
1310 buffer_len[num_buffers] = len;
1311 buffer_buf_id[num_buffers] = buf_id;
1312 buffer_desc_count[num_buffers] = desc_count;
1313 num_buffers += 1;
1314
1315 *nr_descs += desc_count;
1316 avail_idx += desc_count;
1317 if (avail_idx >= vq->size)
1318 avail_idx -= vq->size;
1319 }
1320
1321 if (mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, num_buffers, false) < 0)
1322 return -1;
1323
1324 vhost_shadow_enqueue_single_packed(dev, vq, buffer_len, buffer_buf_id,
1325 buffer_desc_count, num_buffers);
1326
1327 return 0;
1328}
1329
1330static __rte_noinline uint32_t
1331virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
1332 struct rte_mbuf **pkts, uint32_t count)
1333{
1334 uint32_t pkt_idx = 0;
1335 uint16_t num_buffers;
1336 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1337 uint16_t avail_head;
1338
1339
1340
1341
1342
1343 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1344
1345 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1346
1347 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1348 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1349 uint16_t nr_vec = 0;
1350
1351 if (unlikely(reserve_avail_buf_split(dev, vq,
1352 pkt_len, buf_vec, &num_buffers,
1353 avail_head, &nr_vec) < 0)) {
1354 VHOST_LOG_DATA(dev->ifname, DEBUG,
1355 "failed to get enough desc from vring\n");
1356 vq->shadow_used_idx -= num_buffers;
1357 break;
1358 }
1359
1360 VHOST_LOG_DATA(dev->ifname, DEBUG,
1361 "current index %d | end index %d\n",
1362 vq->last_avail_idx, vq->last_avail_idx + num_buffers);
1363
1364 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec,
1365 num_buffers, false) < 0) {
1366 vq->shadow_used_idx -= num_buffers;
1367 break;
1368 }
1369
1370 vq->last_avail_idx += num_buffers;
1371 }
1372
1373 do_data_copy_enqueue(dev, vq);
1374
1375 if (likely(vq->shadow_used_idx)) {
1376 flush_shadow_used_ring_split(dev, vq);
1377 vhost_vring_call_split(dev, vq);
1378 }
1379
1380 return pkt_idx;
1381}
1382
1383static __rte_always_inline int
1384virtio_dev_rx_sync_batch_check(struct virtio_net *dev,
1385 struct vhost_virtqueue *vq,
1386 struct rte_mbuf **pkts,
1387 uint64_t *desc_addrs,
1388 uint64_t *lens)
1389{
1390 bool wrap_counter = vq->avail_wrap_counter;
1391 struct vring_packed_desc *descs = vq->desc_packed;
1392 uint16_t avail_idx = vq->last_avail_idx;
1393 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1394 uint16_t i;
1395
1396 if (unlikely(avail_idx & PACKED_BATCH_MASK))
1397 return -1;
1398
1399 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
1400 return -1;
1401
1402 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1403 if (unlikely(pkts[i]->next != NULL))
1404 return -1;
1405 if (unlikely(!desc_is_avail(&descs[avail_idx + i],
1406 wrap_counter)))
1407 return -1;
1408 }
1409
1410 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1411 lens[i] = descs[avail_idx + i].len;
1412
1413 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1414 if (unlikely(pkts[i]->pkt_len > (lens[i] - buf_offset)))
1415 return -1;
1416 }
1417
1418 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1419 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
1420 descs[avail_idx + i].addr,
1421 &lens[i],
1422 VHOST_ACCESS_RW);
1423
1424 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1425 if (unlikely(!desc_addrs[i]))
1426 return -1;
1427 if (unlikely(lens[i] != descs[avail_idx + i].len))
1428 return -1;
1429 }
1430
1431 return 0;
1432}
1433
1434static __rte_always_inline void
1435virtio_dev_rx_batch_packed_copy(struct virtio_net *dev,
1436 struct vhost_virtqueue *vq,
1437 struct rte_mbuf **pkts,
1438 uint64_t *desc_addrs,
1439 uint64_t *lens)
1440{
1441 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1442 struct virtio_net_hdr_mrg_rxbuf *hdrs[PACKED_BATCH_SIZE];
1443 struct vring_packed_desc *descs = vq->desc_packed;
1444 uint16_t avail_idx = vq->last_avail_idx;
1445 uint16_t ids[PACKED_BATCH_SIZE];
1446 uint16_t i;
1447
1448 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1449 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
1450 hdrs[i] = (struct virtio_net_hdr_mrg_rxbuf *)
1451 (uintptr_t)desc_addrs[i];
1452 lens[i] = pkts[i]->pkt_len +
1453 sizeof(struct virtio_net_hdr_mrg_rxbuf);
1454 }
1455
1456 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1457 virtio_enqueue_offload(pkts[i], &hdrs[i]->hdr);
1458
1459 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
1460
1461 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
1462 rte_memcpy((void *)(uintptr_t)(desc_addrs[i] + buf_offset),
1463 rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
1464 pkts[i]->pkt_len);
1465 }
1466
1467 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1468 vhost_log_cache_write_iova(dev, vq, descs[avail_idx + i].addr,
1469 lens[i]);
1470
1471 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
1472 ids[i] = descs[avail_idx + i].id;
1473
1474 vhost_flush_enqueue_batch_packed(dev, vq, lens, ids);
1475}
1476
1477static __rte_always_inline int
1478virtio_dev_rx_sync_batch_packed(struct virtio_net *dev,
1479 struct vhost_virtqueue *vq,
1480 struct rte_mbuf **pkts)
1481{
1482 uint64_t desc_addrs[PACKED_BATCH_SIZE];
1483 uint64_t lens[PACKED_BATCH_SIZE];
1484
1485 if (virtio_dev_rx_sync_batch_check(dev, vq, pkts, desc_addrs, lens) == -1)
1486 return -1;
1487
1488 if (vq->shadow_used_idx) {
1489 do_data_copy_enqueue(dev, vq);
1490 vhost_flush_enqueue_shadow_packed(dev, vq);
1491 }
1492
1493 virtio_dev_rx_batch_packed_copy(dev, vq, pkts, desc_addrs, lens);
1494
1495 return 0;
1496}
1497
1498static __rte_always_inline int16_t
1499virtio_dev_rx_single_packed(struct virtio_net *dev,
1500 struct vhost_virtqueue *vq,
1501 struct rte_mbuf *pkt)
1502{
1503 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1504 uint16_t nr_descs = 0;
1505
1506 if (unlikely(vhost_enqueue_single_packed(dev, vq, pkt, buf_vec,
1507 &nr_descs) < 0)) {
1508 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n");
1509 return -1;
1510 }
1511
1512 VHOST_LOG_DATA(dev->ifname, DEBUG,
1513 "current index %d | end index %d\n",
1514 vq->last_avail_idx, vq->last_avail_idx + nr_descs);
1515
1516 vq_inc_last_avail_packed(vq, nr_descs);
1517
1518 return 0;
1519}
1520
1521static __rte_noinline uint32_t
1522virtio_dev_rx_packed(struct virtio_net *dev,
1523 struct vhost_virtqueue *__rte_restrict vq,
1524 struct rte_mbuf **__rte_restrict pkts,
1525 uint32_t count)
1526{
1527 uint32_t pkt_idx = 0;
1528
1529 do {
1530 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
1531
1532 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
1533 if (!virtio_dev_rx_sync_batch_packed(dev, vq,
1534 &pkts[pkt_idx])) {
1535 pkt_idx += PACKED_BATCH_SIZE;
1536 continue;
1537 }
1538 }
1539
1540 if (virtio_dev_rx_single_packed(dev, vq, pkts[pkt_idx]))
1541 break;
1542 pkt_idx++;
1543
1544 } while (pkt_idx < count);
1545
1546 if (vq->shadow_used_idx) {
1547 do_data_copy_enqueue(dev, vq);
1548 vhost_flush_enqueue_shadow_packed(dev, vq);
1549 }
1550
1551 if (pkt_idx)
1552 vhost_vring_call_packed(dev, vq);
1553
1554 return pkt_idx;
1555}
1556
1557static __rte_always_inline uint32_t
1558virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
1559 struct rte_mbuf **pkts, uint32_t count)
1560{
1561 struct vhost_virtqueue *vq;
1562 uint32_t nb_tx = 0;
1563
1564 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
1565 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
1566 VHOST_LOG_DATA(dev->ifname, ERR,
1567 "%s: invalid virtqueue idx %d.\n",
1568 __func__, queue_id);
1569 return 0;
1570 }
1571
1572 vq = dev->virtqueue[queue_id];
1573
1574 rte_spinlock_lock(&vq->access_lock);
1575
1576 if (unlikely(!vq->enabled))
1577 goto out_access_unlock;
1578
1579 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1580 vhost_user_iotlb_rd_lock(vq);
1581
1582 if (unlikely(!vq->access_ok))
1583 if (unlikely(vring_translate(dev, vq) < 0))
1584 goto out;
1585
1586 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
1587 if (count == 0)
1588 goto out;
1589
1590 if (vq_is_packed(dev))
1591 nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
1592 else
1593 nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
1594
1595 vhost_queue_stats_update(dev, vq, pkts, nb_tx);
1596
1597out:
1598 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
1599 vhost_user_iotlb_rd_unlock(vq);
1600
1601out_access_unlock:
1602 rte_spinlock_unlock(&vq->access_lock);
1603
1604 return nb_tx;
1605}
1606
1607uint16_t
1608rte_vhost_enqueue_burst(int vid, uint16_t queue_id,
1609 struct rte_mbuf **__rte_restrict pkts, uint16_t count)
1610{
1611 struct virtio_net *dev = get_device(vid);
1612
1613 if (!dev)
1614 return 0;
1615
1616 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
1617 VHOST_LOG_DATA(dev->ifname, ERR,
1618 "%s: built-in vhost net backend is disabled.\n",
1619 __func__);
1620 return 0;
1621 }
1622
1623 return virtio_dev_rx(dev, queue_id, pkts, count);
1624}
1625
1626static __rte_always_inline uint16_t
1627async_get_first_inflight_pkt_idx(struct vhost_virtqueue *vq)
1628{
1629 struct vhost_async *async = vq->async;
1630
1631 if (async->pkts_idx >= async->pkts_inflight_n)
1632 return async->pkts_idx - async->pkts_inflight_n;
1633 else
1634 return vq->size - async->pkts_inflight_n + async->pkts_idx;
1635}
1636
1637static __rte_always_inline void
1638store_dma_desc_info_split(struct vring_used_elem *s_ring, struct vring_used_elem *d_ring,
1639 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1640{
1641 size_t elem_size = sizeof(struct vring_used_elem);
1642
1643 if (d_idx + count <= ring_size) {
1644 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1645 } else {
1646 uint16_t size = ring_size - d_idx;
1647
1648 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1649 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1650 }
1651}
1652
1653static __rte_always_inline void
1654store_dma_desc_info_packed(struct vring_used_elem_packed *s_ring,
1655 struct vring_used_elem_packed *d_ring,
1656 uint16_t ring_size, uint16_t s_idx, uint16_t d_idx, uint16_t count)
1657{
1658 size_t elem_size = sizeof(struct vring_used_elem_packed);
1659
1660 if (d_idx + count <= ring_size) {
1661 rte_memcpy(d_ring + d_idx, s_ring + s_idx, count * elem_size);
1662 } else {
1663 uint16_t size = ring_size - d_idx;
1664
1665 rte_memcpy(d_ring + d_idx, s_ring + s_idx, size * elem_size);
1666 rte_memcpy(d_ring, s_ring + s_idx + size, (count - size) * elem_size);
1667 }
1668}
1669
1670static __rte_noinline uint32_t
1671virtio_dev_rx_async_submit_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
1672 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
1673 int16_t dma_id, uint16_t vchan_id)
1674{
1675 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1676 uint32_t pkt_idx = 0;
1677 uint16_t num_buffers;
1678 uint16_t avail_head;
1679
1680 struct vhost_async *async = vq->async;
1681 struct async_inflight_info *pkts_info = async->pkts_info;
1682 uint32_t pkt_err = 0;
1683 uint16_t n_xfer;
1684 uint16_t slot_idx = 0;
1685
1686
1687
1688
1689 avail_head = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE);
1690
1691 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
1692
1693 async_iter_reset(async);
1694
1695 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
1696 uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen;
1697 uint16_t nr_vec = 0;
1698
1699 if (unlikely(reserve_avail_buf_split(dev, vq, pkt_len, buf_vec,
1700 &num_buffers, avail_head, &nr_vec) < 0)) {
1701 VHOST_LOG_DATA(dev->ifname, DEBUG,
1702 "failed to get enough desc from vring\n");
1703 vq->shadow_used_idx -= num_buffers;
1704 break;
1705 }
1706
1707 VHOST_LOG_DATA(dev->ifname, DEBUG,
1708 "current index %d | end index %d\n",
1709 vq->last_avail_idx, vq->last_avail_idx + num_buffers);
1710
1711 if (mbuf_to_desc(dev, vq, pkts[pkt_idx], buf_vec, nr_vec, num_buffers, true) < 0) {
1712 vq->shadow_used_idx -= num_buffers;
1713 break;
1714 }
1715
1716 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
1717 pkts_info[slot_idx].descs = num_buffers;
1718 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
1719
1720 vq->last_avail_idx += num_buffers;
1721 }
1722
1723 if (unlikely(pkt_idx == 0))
1724 return 0;
1725
1726 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
1727 async->iov_iter, pkt_idx);
1728
1729 pkt_err = pkt_idx - n_xfer;
1730 if (unlikely(pkt_err)) {
1731 uint16_t num_descs = 0;
1732
1733 VHOST_LOG_DATA(dev->ifname, DEBUG,
1734 "%s: failed to transfer %u packets for queue %u.\n",
1735 __func__, pkt_err, queue_id);
1736
1737
1738 pkt_idx = n_xfer;
1739
1740
1741 while (pkt_err-- > 0) {
1742 num_descs += pkts_info[slot_idx & (vq->size - 1)].descs;
1743 slot_idx--;
1744 }
1745
1746
1747 vq->shadow_used_idx -= num_descs;
1748 vq->last_avail_idx -= num_descs;
1749 }
1750
1751
1752 if (likely(vq->shadow_used_idx)) {
1753 uint16_t to = async->desc_idx_split & (vq->size - 1);
1754
1755 store_dma_desc_info_split(vq->shadow_used_split,
1756 async->descs_split, vq->size, 0, to,
1757 vq->shadow_used_idx);
1758
1759 async->desc_idx_split += vq->shadow_used_idx;
1760
1761 async->pkts_idx += pkt_idx;
1762 if (async->pkts_idx >= vq->size)
1763 async->pkts_idx -= vq->size;
1764
1765 async->pkts_inflight_n += pkt_idx;
1766 vq->shadow_used_idx = 0;
1767 }
1768
1769 return pkt_idx;
1770}
1771
1772
1773static __rte_always_inline int
1774vhost_enqueue_async_packed(struct virtio_net *dev,
1775 struct vhost_virtqueue *vq,
1776 struct rte_mbuf *pkt,
1777 struct buf_vector *buf_vec,
1778 uint16_t *nr_descs,
1779 uint16_t *nr_buffers)
1780{
1781 uint16_t nr_vec = 0;
1782 uint16_t avail_idx = vq->last_avail_idx;
1783 uint16_t max_tries, tries = 0;
1784 uint16_t buf_id = 0;
1785 uint32_t len = 0;
1786 uint16_t desc_count = 0;
1787 uint32_t size = pkt->pkt_len + sizeof(struct virtio_net_hdr_mrg_rxbuf);
1788 uint32_t buffer_len[vq->size];
1789 uint16_t buffer_buf_id[vq->size];
1790 uint16_t buffer_desc_count[vq->size];
1791
1792 if (rxvq_is_mergeable(dev))
1793 max_tries = vq->size - 1;
1794 else
1795 max_tries = 1;
1796
1797 while (size > 0) {
1798
1799
1800
1801
1802
1803 if (unlikely(++tries > max_tries))
1804 return -1;
1805
1806 if (unlikely(fill_vec_buf_packed(dev, vq,
1807 avail_idx, &desc_count,
1808 buf_vec, &nr_vec,
1809 &buf_id, &len,
1810 VHOST_ACCESS_RW) < 0))
1811 return -1;
1812
1813 len = RTE_MIN(len, size);
1814 size -= len;
1815
1816 buffer_len[*nr_buffers] = len;
1817 buffer_buf_id[*nr_buffers] = buf_id;
1818 buffer_desc_count[*nr_buffers] = desc_count;
1819 *nr_buffers += 1;
1820 *nr_descs += desc_count;
1821 avail_idx += desc_count;
1822 if (avail_idx >= vq->size)
1823 avail_idx -= vq->size;
1824 }
1825
1826 if (unlikely(mbuf_to_desc(dev, vq, pkt, buf_vec, nr_vec, *nr_buffers, true) < 0))
1827 return -1;
1828
1829 vhost_shadow_enqueue_packed(vq, buffer_len, buffer_buf_id, buffer_desc_count, *nr_buffers);
1830
1831 return 0;
1832}
1833
1834static __rte_always_inline int16_t
1835virtio_dev_rx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1836 struct rte_mbuf *pkt, uint16_t *nr_descs, uint16_t *nr_buffers)
1837{
1838 struct buf_vector buf_vec[BUF_VECTOR_MAX];
1839
1840 if (unlikely(vhost_enqueue_async_packed(dev, vq, pkt, buf_vec,
1841 nr_descs, nr_buffers) < 0)) {
1842 VHOST_LOG_DATA(dev->ifname, DEBUG, "failed to get enough desc from vring\n");
1843 return -1;
1844 }
1845
1846 VHOST_LOG_DATA(dev->ifname, DEBUG,
1847 "current index %d | end index %d\n",
1848 vq->last_avail_idx, vq->last_avail_idx + *nr_descs);
1849
1850 return 0;
1851}
1852
1853static __rte_always_inline void
1854dma_error_handler_packed(struct vhost_virtqueue *vq, uint16_t slot_idx,
1855 uint32_t nr_err, uint32_t *pkt_idx)
1856{
1857 uint16_t descs_err = 0;
1858 uint16_t buffers_err = 0;
1859 struct async_inflight_info *pkts_info = vq->async->pkts_info;
1860
1861 *pkt_idx -= nr_err;
1862
1863 while (nr_err-- > 0) {
1864 descs_err += pkts_info[slot_idx % vq->size].descs;
1865 buffers_err += pkts_info[slot_idx % vq->size].nr_buffers;
1866 slot_idx--;
1867 }
1868
1869 if (vq->last_avail_idx >= descs_err) {
1870 vq->last_avail_idx -= descs_err;
1871 } else {
1872 vq->last_avail_idx = vq->last_avail_idx + vq->size - descs_err;
1873 vq->avail_wrap_counter ^= 1;
1874 }
1875
1876 vq->shadow_used_idx -= buffers_err;
1877}
1878
1879static __rte_noinline uint32_t
1880virtio_dev_rx_async_submit_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
1881 uint16_t queue_id, struct rte_mbuf **pkts, uint32_t count,
1882 int16_t dma_id, uint16_t vchan_id)
1883{
1884 uint32_t pkt_idx = 0;
1885 uint32_t remained = count;
1886 uint16_t n_xfer;
1887 uint16_t num_buffers;
1888 uint16_t num_descs;
1889
1890 struct vhost_async *async = vq->async;
1891 struct async_inflight_info *pkts_info = async->pkts_info;
1892 uint32_t pkt_err = 0;
1893 uint16_t slot_idx = 0;
1894
1895 do {
1896 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
1897
1898 num_buffers = 0;
1899 num_descs = 0;
1900 if (unlikely(virtio_dev_rx_async_packed(dev, vq, pkts[pkt_idx],
1901 &num_descs, &num_buffers) < 0))
1902 break;
1903
1904 slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
1905
1906 pkts_info[slot_idx].descs = num_descs;
1907 pkts_info[slot_idx].nr_buffers = num_buffers;
1908 pkts_info[slot_idx].mbuf = pkts[pkt_idx];
1909
1910 pkt_idx++;
1911 remained--;
1912 vq_inc_last_avail_packed(vq, num_descs);
1913 } while (pkt_idx < count);
1914
1915 if (unlikely(pkt_idx == 0))
1916 return 0;
1917
1918 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
1919 async->iov_iter, pkt_idx);
1920
1921 async_iter_reset(async);
1922
1923 pkt_err = pkt_idx - n_xfer;
1924 if (unlikely(pkt_err)) {
1925 VHOST_LOG_DATA(dev->ifname, DEBUG,
1926 "%s: failed to transfer %u packets for queue %u.\n",
1927 __func__, pkt_err, queue_id);
1928 dma_error_handler_packed(vq, slot_idx, pkt_err, &pkt_idx);
1929 }
1930
1931 if (likely(vq->shadow_used_idx)) {
1932
1933 store_dma_desc_info_packed(vq->shadow_used_packed, async->buffers_packed,
1934 vq->size, 0, async->buffer_idx_packed,
1935 vq->shadow_used_idx);
1936
1937 async->buffer_idx_packed += vq->shadow_used_idx;
1938 if (async->buffer_idx_packed >= vq->size)
1939 async->buffer_idx_packed -= vq->size;
1940
1941 async->pkts_idx += pkt_idx;
1942 if (async->pkts_idx >= vq->size)
1943 async->pkts_idx -= vq->size;
1944
1945 vq->shadow_used_idx = 0;
1946 async->pkts_inflight_n += pkt_idx;
1947 }
1948
1949 return pkt_idx;
1950}
1951
1952static __rte_always_inline void
1953write_back_completed_descs_split(struct vhost_virtqueue *vq, uint16_t n_descs)
1954{
1955 struct vhost_async *async = vq->async;
1956 uint16_t nr_left = n_descs;
1957 uint16_t nr_copy;
1958 uint16_t to, from;
1959
1960 do {
1961 from = async->last_desc_idx_split & (vq->size - 1);
1962 nr_copy = nr_left + from <= vq->size ? nr_left : vq->size - from;
1963 to = vq->last_used_idx & (vq->size - 1);
1964
1965 if (to + nr_copy <= vq->size) {
1966 rte_memcpy(&vq->used->ring[to], &async->descs_split[from],
1967 nr_copy * sizeof(struct vring_used_elem));
1968 } else {
1969 uint16_t size = vq->size - to;
1970
1971 rte_memcpy(&vq->used->ring[to], &async->descs_split[from],
1972 size * sizeof(struct vring_used_elem));
1973 rte_memcpy(&vq->used->ring[0], &async->descs_split[from + size],
1974 (nr_copy - size) * sizeof(struct vring_used_elem));
1975 }
1976
1977 async->last_desc_idx_split += nr_copy;
1978 vq->last_used_idx += nr_copy;
1979 nr_left -= nr_copy;
1980 } while (nr_left > 0);
1981}
1982
1983static __rte_always_inline void
1984write_back_completed_descs_packed(struct vhost_virtqueue *vq,
1985 uint16_t n_buffers)
1986{
1987 struct vhost_async *async = vq->async;
1988 uint16_t from = async->last_buffer_idx_packed;
1989 uint16_t used_idx = vq->last_used_idx;
1990 uint16_t head_idx = vq->last_used_idx;
1991 uint16_t head_flags = 0;
1992 uint16_t i;
1993
1994
1995 for (i = 0; i < n_buffers; i++) {
1996 vq->desc_packed[used_idx].id = async->buffers_packed[from].id;
1997 vq->desc_packed[used_idx].len = async->buffers_packed[from].len;
1998
1999 used_idx += async->buffers_packed[from].count;
2000 if (used_idx >= vq->size)
2001 used_idx -= vq->size;
2002
2003 from++;
2004 if (from >= vq->size)
2005 from = 0;
2006 }
2007
2008
2009 rte_atomic_thread_fence(__ATOMIC_RELEASE);
2010
2011 from = async->last_buffer_idx_packed;
2012
2013 for (i = 0; i < n_buffers; i++) {
2014 uint16_t flags;
2015
2016 if (async->buffers_packed[from].len)
2017 flags = VRING_DESC_F_WRITE;
2018 else
2019 flags = 0;
2020
2021 if (vq->used_wrap_counter) {
2022 flags |= VRING_DESC_F_USED;
2023 flags |= VRING_DESC_F_AVAIL;
2024 } else {
2025 flags &= ~VRING_DESC_F_USED;
2026 flags &= ~VRING_DESC_F_AVAIL;
2027 }
2028
2029 if (i > 0) {
2030 vq->desc_packed[vq->last_used_idx].flags = flags;
2031 } else {
2032 head_idx = vq->last_used_idx;
2033 head_flags = flags;
2034 }
2035
2036 vq_inc_last_used_packed(vq, async->buffers_packed[from].count);
2037
2038 from++;
2039 if (from == vq->size)
2040 from = 0;
2041 }
2042
2043 vq->desc_packed[head_idx].flags = head_flags;
2044 async->last_buffer_idx_packed = from;
2045}
2046
2047static __rte_always_inline uint16_t
2048vhost_poll_enqueue_completed(struct virtio_net *dev, uint16_t queue_id,
2049 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
2050 uint16_t vchan_id)
2051{
2052 struct vhost_virtqueue *vq = dev->virtqueue[queue_id];
2053 struct vhost_async *async = vq->async;
2054 struct async_inflight_info *pkts_info = async->pkts_info;
2055 uint16_t nr_cpl_pkts = 0;
2056 uint16_t n_descs = 0, n_buffers = 0;
2057 uint16_t start_idx, from, i;
2058
2059
2060 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
2061
2062 start_idx = async_get_first_inflight_pkt_idx(vq);
2063
2064
2065
2066
2067
2068
2069
2070 from = start_idx;
2071 while (vq->async->pkts_cmpl_flag[from] && count--) {
2072 vq->async->pkts_cmpl_flag[from] = false;
2073 from++;
2074 if (from >= vq->size)
2075 from -= vq->size;
2076 nr_cpl_pkts++;
2077 }
2078
2079 if (nr_cpl_pkts == 0)
2080 return 0;
2081
2082 for (i = 0; i < nr_cpl_pkts; i++) {
2083 from = (start_idx + i) % vq->size;
2084
2085 n_buffers += pkts_info[from].nr_buffers;
2086
2087 n_descs += pkts_info[from].descs;
2088 pkts[i] = pkts_info[from].mbuf;
2089 }
2090
2091 async->pkts_inflight_n -= nr_cpl_pkts;
2092
2093 if (likely(vq->enabled && vq->access_ok)) {
2094 if (vq_is_packed(dev)) {
2095 write_back_completed_descs_packed(vq, n_buffers);
2096 vhost_vring_call_packed(dev, vq);
2097 } else {
2098 write_back_completed_descs_split(vq, n_descs);
2099 __atomic_add_fetch(&vq->used->idx, n_descs, __ATOMIC_RELEASE);
2100 vhost_vring_call_split(dev, vq);
2101 }
2102 } else {
2103 if (vq_is_packed(dev)) {
2104 async->last_buffer_idx_packed += n_buffers;
2105 if (async->last_buffer_idx_packed >= vq->size)
2106 async->last_buffer_idx_packed -= vq->size;
2107 } else {
2108 async->last_desc_idx_split += n_descs;
2109 }
2110 }
2111
2112 return nr_cpl_pkts;
2113}
2114
2115uint16_t
2116rte_vhost_poll_enqueue_completed(int vid, uint16_t queue_id,
2117 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
2118 uint16_t vchan_id)
2119{
2120 struct virtio_net *dev = get_device(vid);
2121 struct vhost_virtqueue *vq;
2122 uint16_t n_pkts_cpl = 0;
2123
2124 if (unlikely(!dev))
2125 return 0;
2126
2127 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
2128 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2129 VHOST_LOG_DATA(dev->ifname, ERR,
2130 "%s: invalid virtqueue idx %d.\n",
2131 __func__, queue_id);
2132 return 0;
2133 }
2134
2135 if (unlikely(!dma_copy_track[dma_id].vchans ||
2136 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
2137 VHOST_LOG_DATA(dev->ifname, ERR,
2138 "%s: invalid channel %d:%u.\n",
2139 __func__, dma_id, vchan_id);
2140 return 0;
2141 }
2142
2143 vq = dev->virtqueue[queue_id];
2144
2145 if (!rte_spinlock_trylock(&vq->access_lock)) {
2146 VHOST_LOG_DATA(dev->ifname, DEBUG,
2147 "%s: virtqueue %u is busy.\n",
2148 __func__, queue_id);
2149 return 0;
2150 }
2151
2152 if (unlikely(!vq->async)) {
2153 VHOST_LOG_DATA(dev->ifname, ERR,
2154 "%s: async not registered for virtqueue %d.\n",
2155 __func__, queue_id);
2156 goto out;
2157 }
2158
2159 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id, pkts, count, dma_id, vchan_id);
2160
2161 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
2162 vq->stats.inflight_completed += n_pkts_cpl;
2163
2164out:
2165 rte_spinlock_unlock(&vq->access_lock);
2166
2167 return n_pkts_cpl;
2168}
2169
2170uint16_t
2171rte_vhost_clear_queue_thread_unsafe(int vid, uint16_t queue_id,
2172 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
2173 uint16_t vchan_id)
2174{
2175 struct virtio_net *dev = get_device(vid);
2176 struct vhost_virtqueue *vq;
2177 uint16_t n_pkts_cpl = 0;
2178
2179 if (!dev)
2180 return 0;
2181
2182 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
2183 if (unlikely(queue_id >= dev->nr_vring)) {
2184 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n",
2185 __func__, queue_id);
2186 return 0;
2187 }
2188
2189 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
2190 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n",
2191 __func__, dma_id);
2192 return 0;
2193 }
2194
2195 vq = dev->virtqueue[queue_id];
2196
2197 if (unlikely(!rte_spinlock_is_locked(&vq->access_lock))) {
2198 VHOST_LOG_DATA(dev->ifname, ERR, "%s() called without access lock taken.\n",
2199 __func__);
2200 return -1;
2201 }
2202
2203 if (unlikely(!vq->async)) {
2204 VHOST_LOG_DATA(dev->ifname, ERR,
2205 "%s: async not registered for virtqueue %d.\n",
2206 __func__, queue_id);
2207 return 0;
2208 }
2209
2210 if (unlikely(!dma_copy_track[dma_id].vchans ||
2211 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
2212 VHOST_LOG_DATA(dev->ifname, ERR,
2213 "%s: invalid channel %d:%u.\n",
2214 __func__, dma_id, vchan_id);
2215 return 0;
2216 }
2217
2218 if ((queue_id & 1) == 0)
2219 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id,
2220 pkts, count, dma_id, vchan_id);
2221 else {
2222 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count,
2223 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS);
2224 }
2225
2226 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
2227 vq->stats.inflight_completed += n_pkts_cpl;
2228
2229 return n_pkts_cpl;
2230}
2231
2232uint16_t
2233rte_vhost_clear_queue(int vid, uint16_t queue_id, struct rte_mbuf **pkts,
2234 uint16_t count, int16_t dma_id, uint16_t vchan_id)
2235{
2236 struct virtio_net *dev = get_device(vid);
2237 struct vhost_virtqueue *vq;
2238 uint16_t n_pkts_cpl = 0;
2239
2240 if (!dev)
2241 return 0;
2242
2243 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
2244 if (unlikely(queue_id >= dev->nr_vring)) {
2245 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %u.\n",
2246 __func__, queue_id);
2247 return 0;
2248 }
2249
2250 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
2251 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n",
2252 __func__, dma_id);
2253 return 0;
2254 }
2255
2256 vq = dev->virtqueue[queue_id];
2257
2258 if (!rte_spinlock_trylock(&vq->access_lock)) {
2259 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: virtqueue %u is busy.\n",
2260 __func__, queue_id);
2261 return 0;
2262 }
2263
2264 if (unlikely(!vq->async)) {
2265 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %u.\n",
2266 __func__, queue_id);
2267 goto out_access_unlock;
2268 }
2269
2270 if (unlikely(!dma_copy_track[dma_id].vchans ||
2271 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
2272 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n",
2273 __func__, dma_id, vchan_id);
2274 goto out_access_unlock;
2275 }
2276
2277 if ((queue_id & 1) == 0)
2278 n_pkts_cpl = vhost_poll_enqueue_completed(dev, queue_id,
2279 pkts, count, dma_id, vchan_id);
2280 else {
2281 n_pkts_cpl = async_poll_dequeue_completed(dev, vq, pkts, count,
2282 dma_id, vchan_id, dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS);
2283 }
2284
2285 vhost_queue_stats_update(dev, vq, pkts, n_pkts_cpl);
2286 vq->stats.inflight_completed += n_pkts_cpl;
2287
2288out_access_unlock:
2289 rte_spinlock_unlock(&vq->access_lock);
2290
2291 return n_pkts_cpl;
2292}
2293
2294static __rte_always_inline uint32_t
2295virtio_dev_rx_async_submit(struct virtio_net *dev, uint16_t queue_id,
2296 struct rte_mbuf **pkts, uint32_t count, int16_t dma_id, uint16_t vchan_id)
2297{
2298 struct vhost_virtqueue *vq;
2299 uint32_t nb_tx = 0;
2300
2301 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
2302 if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
2303 VHOST_LOG_DATA(dev->ifname, ERR,
2304 "%s: invalid virtqueue idx %d.\n",
2305 __func__, queue_id);
2306 return 0;
2307 }
2308
2309 if (unlikely(!dma_copy_track[dma_id].vchans ||
2310 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
2311 VHOST_LOG_DATA(dev->ifname, ERR,
2312 "%s: invalid channel %d:%u.\n",
2313 __func__, dma_id, vchan_id);
2314 return 0;
2315 }
2316
2317 vq = dev->virtqueue[queue_id];
2318
2319 rte_spinlock_lock(&vq->access_lock);
2320
2321 if (unlikely(!vq->enabled || !vq->async))
2322 goto out_access_unlock;
2323
2324 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2325 vhost_user_iotlb_rd_lock(vq);
2326
2327 if (unlikely(!vq->access_ok))
2328 if (unlikely(vring_translate(dev, vq) < 0))
2329 goto out;
2330
2331 count = RTE_MIN((uint32_t)MAX_PKT_BURST, count);
2332 if (count == 0)
2333 goto out;
2334
2335 if (vq_is_packed(dev))
2336 nb_tx = virtio_dev_rx_async_submit_packed(dev, vq, queue_id,
2337 pkts, count, dma_id, vchan_id);
2338 else
2339 nb_tx = virtio_dev_rx_async_submit_split(dev, vq, queue_id,
2340 pkts, count, dma_id, vchan_id);
2341
2342 vq->stats.inflight_submitted += nb_tx;
2343
2344out:
2345 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
2346 vhost_user_iotlb_rd_unlock(vq);
2347
2348out_access_unlock:
2349 rte_spinlock_unlock(&vq->access_lock);
2350
2351 return nb_tx;
2352}
2353
2354uint16_t
2355rte_vhost_submit_enqueue_burst(int vid, uint16_t queue_id,
2356 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
2357 uint16_t vchan_id)
2358{
2359 struct virtio_net *dev = get_device(vid);
2360
2361 if (!dev)
2362 return 0;
2363
2364 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
2365 VHOST_LOG_DATA(dev->ifname, ERR,
2366 "%s: built-in vhost net backend is disabled.\n",
2367 __func__);
2368 return 0;
2369 }
2370
2371 return virtio_dev_rx_async_submit(dev, queue_id, pkts, count, dma_id, vchan_id);
2372}
2373
2374static inline bool
2375virtio_net_with_host_offload(struct virtio_net *dev)
2376{
2377 if (dev->features &
2378 ((1ULL << VIRTIO_NET_F_CSUM) |
2379 (1ULL << VIRTIO_NET_F_HOST_ECN) |
2380 (1ULL << VIRTIO_NET_F_HOST_TSO4) |
2381 (1ULL << VIRTIO_NET_F_HOST_TSO6) |
2382 (1ULL << VIRTIO_NET_F_HOST_UFO)))
2383 return true;
2384
2385 return false;
2386}
2387
2388static int
2389parse_headers(struct rte_mbuf *m, uint8_t *l4_proto)
2390{
2391 struct rte_ipv4_hdr *ipv4_hdr;
2392 struct rte_ipv6_hdr *ipv6_hdr;
2393 struct rte_ether_hdr *eth_hdr;
2394 uint16_t ethertype;
2395 uint16_t data_len = rte_pktmbuf_data_len(m);
2396
2397 if (data_len < sizeof(struct rte_ether_hdr))
2398 return -EINVAL;
2399
2400 eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
2401
2402 m->l2_len = sizeof(struct rte_ether_hdr);
2403 ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
2404
2405 if (ethertype == RTE_ETHER_TYPE_VLAN) {
2406 if (data_len < sizeof(struct rte_ether_hdr) +
2407 sizeof(struct rte_vlan_hdr))
2408 goto error;
2409
2410 struct rte_vlan_hdr *vlan_hdr =
2411 (struct rte_vlan_hdr *)(eth_hdr + 1);
2412
2413 m->l2_len += sizeof(struct rte_vlan_hdr);
2414 ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
2415 }
2416
2417 switch (ethertype) {
2418 case RTE_ETHER_TYPE_IPV4:
2419 if (data_len < m->l2_len + sizeof(struct rte_ipv4_hdr))
2420 goto error;
2421 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
2422 m->l2_len);
2423 m->l3_len = rte_ipv4_hdr_len(ipv4_hdr);
2424 if (data_len < m->l2_len + m->l3_len)
2425 goto error;
2426 m->ol_flags |= RTE_MBUF_F_TX_IPV4;
2427 *l4_proto = ipv4_hdr->next_proto_id;
2428 break;
2429 case RTE_ETHER_TYPE_IPV6:
2430 if (data_len < m->l2_len + sizeof(struct rte_ipv6_hdr))
2431 goto error;
2432 ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
2433 m->l2_len);
2434 m->l3_len = sizeof(struct rte_ipv6_hdr);
2435 m->ol_flags |= RTE_MBUF_F_TX_IPV6;
2436 *l4_proto = ipv6_hdr->proto;
2437 break;
2438 default:
2439
2440 goto error;
2441 }
2442
2443
2444 switch (*l4_proto) {
2445 case IPPROTO_TCP:
2446 if (data_len < m->l2_len + m->l3_len +
2447 sizeof(struct rte_tcp_hdr))
2448 goto error;
2449 break;
2450 case IPPROTO_UDP:
2451 if (data_len < m->l2_len + m->l3_len +
2452 sizeof(struct rte_udp_hdr))
2453 goto error;
2454 break;
2455 case IPPROTO_SCTP:
2456 if (data_len < m->l2_len + m->l3_len +
2457 sizeof(struct rte_sctp_hdr))
2458 goto error;
2459 break;
2460 default:
2461 goto error;
2462 }
2463
2464 return 0;
2465
2466error:
2467 m->l2_len = 0;
2468 m->l3_len = 0;
2469 m->ol_flags = 0;
2470 return -EINVAL;
2471}
2472
2473static __rte_always_inline void
2474vhost_dequeue_offload_legacy(struct virtio_net *dev, struct virtio_net_hdr *hdr,
2475 struct rte_mbuf *m)
2476{
2477 uint8_t l4_proto = 0;
2478 struct rte_tcp_hdr *tcp_hdr = NULL;
2479 uint16_t tcp_len;
2480 uint16_t data_len = rte_pktmbuf_data_len(m);
2481
2482 if (parse_headers(m, &l4_proto) < 0)
2483 return;
2484
2485 if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2486 if (hdr->csum_start == (m->l2_len + m->l3_len)) {
2487 switch (hdr->csum_offset) {
2488 case (offsetof(struct rte_tcp_hdr, cksum)):
2489 if (l4_proto != IPPROTO_TCP)
2490 goto error;
2491 m->ol_flags |= RTE_MBUF_F_TX_TCP_CKSUM;
2492 break;
2493 case (offsetof(struct rte_udp_hdr, dgram_cksum)):
2494 if (l4_proto != IPPROTO_UDP)
2495 goto error;
2496 m->ol_flags |= RTE_MBUF_F_TX_UDP_CKSUM;
2497 break;
2498 case (offsetof(struct rte_sctp_hdr, cksum)):
2499 if (l4_proto != IPPROTO_SCTP)
2500 goto error;
2501 m->ol_flags |= RTE_MBUF_F_TX_SCTP_CKSUM;
2502 break;
2503 default:
2504 goto error;
2505 }
2506 } else {
2507 goto error;
2508 }
2509 }
2510
2511 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2512 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2513 case VIRTIO_NET_HDR_GSO_TCPV4:
2514 case VIRTIO_NET_HDR_GSO_TCPV6:
2515 if (l4_proto != IPPROTO_TCP)
2516 goto error;
2517 tcp_hdr = rte_pktmbuf_mtod_offset(m,
2518 struct rte_tcp_hdr *,
2519 m->l2_len + m->l3_len);
2520 tcp_len = (tcp_hdr->data_off & 0xf0) >> 2;
2521 if (data_len < m->l2_len + m->l3_len + tcp_len)
2522 goto error;
2523 m->ol_flags |= RTE_MBUF_F_TX_TCP_SEG;
2524 m->tso_segsz = hdr->gso_size;
2525 m->l4_len = tcp_len;
2526 break;
2527 case VIRTIO_NET_HDR_GSO_UDP:
2528 if (l4_proto != IPPROTO_UDP)
2529 goto error;
2530 m->ol_flags |= RTE_MBUF_F_TX_UDP_SEG;
2531 m->tso_segsz = hdr->gso_size;
2532 m->l4_len = sizeof(struct rte_udp_hdr);
2533 break;
2534 default:
2535 VHOST_LOG_DATA(dev->ifname, WARNING,
2536 "unsupported gso type %u.\n",
2537 hdr->gso_type);
2538 goto error;
2539 }
2540 }
2541 return;
2542
2543error:
2544 m->l2_len = 0;
2545 m->l3_len = 0;
2546 m->ol_flags = 0;
2547}
2548
2549static __rte_always_inline void
2550vhost_dequeue_offload(struct virtio_net *dev, struct virtio_net_hdr *hdr,
2551 struct rte_mbuf *m, bool legacy_ol_flags)
2552{
2553 struct rte_net_hdr_lens hdr_lens;
2554 int l4_supported = 0;
2555 uint32_t ptype;
2556
2557 if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE)
2558 return;
2559
2560 if (legacy_ol_flags) {
2561 vhost_dequeue_offload_legacy(dev, hdr, m);
2562 return;
2563 }
2564
2565 m->ol_flags |= RTE_MBUF_F_RX_IP_CKSUM_UNKNOWN;
2566
2567 ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);
2568 m->packet_type = ptype;
2569 if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||
2570 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||
2571 (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)
2572 l4_supported = 1;
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
2588 uint32_t hdrlen;
2589
2590 hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;
2591 if (hdr->csum_start <= hdrlen && l4_supported != 0) {
2592 m->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_NONE;
2593 } else {
2594
2595
2596
2597
2598
2599
2600 uint16_t csum = 0, off;
2601
2602 if (rte_raw_cksum_mbuf(m, hdr->csum_start,
2603 rte_pktmbuf_pkt_len(m) - hdr->csum_start, &csum) < 0)
2604 return;
2605 if (likely(csum != 0xffff))
2606 csum = ~csum;
2607 off = hdr->csum_offset + hdr->csum_start;
2608 if (rte_pktmbuf_data_len(m) >= off + 1)
2609 *rte_pktmbuf_mtod_offset(m, uint16_t *, off) = csum;
2610 }
2611 }
2612
2613 if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
2614 if (hdr->gso_size == 0)
2615 return;
2616
2617 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2618 case VIRTIO_NET_HDR_GSO_TCPV4:
2619 case VIRTIO_NET_HDR_GSO_TCPV6:
2620 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_TCP)
2621 break;
2622 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
2623 m->tso_segsz = hdr->gso_size;
2624 break;
2625 case VIRTIO_NET_HDR_GSO_UDP:
2626 if ((ptype & RTE_PTYPE_L4_MASK) != RTE_PTYPE_L4_UDP)
2627 break;
2628 m->ol_flags |= RTE_MBUF_F_RX_LRO | RTE_MBUF_F_RX_L4_CKSUM_NONE;
2629 m->tso_segsz = hdr->gso_size;
2630 break;
2631 default:
2632 break;
2633 }
2634 }
2635}
2636
2637static __rte_noinline void
2638copy_vnet_hdr_from_desc(struct virtio_net_hdr *hdr,
2639 struct buf_vector *buf_vec)
2640{
2641 uint64_t len;
2642 uint64_t remain = sizeof(struct virtio_net_hdr);
2643 uint64_t src;
2644 uint64_t dst = (uint64_t)(uintptr_t)hdr;
2645
2646 while (remain) {
2647 len = RTE_MIN(remain, buf_vec->buf_len);
2648 src = buf_vec->buf_addr;
2649 rte_memcpy((void *)(uintptr_t)dst,
2650 (void *)(uintptr_t)src, len);
2651
2652 remain -= len;
2653 dst += len;
2654 buf_vec++;
2655 }
2656}
2657
2658static __rte_always_inline int
2659desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq,
2660 struct buf_vector *buf_vec, uint16_t nr_vec,
2661 struct rte_mbuf *m, struct rte_mempool *mbuf_pool,
2662 bool legacy_ol_flags, uint16_t slot_idx, bool is_async)
2663{
2664 uint32_t buf_avail, buf_offset, buf_len;
2665 uint64_t buf_addr, buf_iova;
2666 uint32_t mbuf_avail, mbuf_offset;
2667 uint32_t cpy_len;
2668 struct rte_mbuf *cur = m, *prev = m;
2669 struct virtio_net_hdr tmp_hdr;
2670 struct virtio_net_hdr *hdr = NULL;
2671
2672 uint16_t vec_idx = 0;
2673 struct vhost_async *async = vq->async;
2674 struct async_inflight_info *pkts_info;
2675
2676 buf_addr = buf_vec[vec_idx].buf_addr;
2677 buf_iova = buf_vec[vec_idx].buf_iova;
2678 buf_len = buf_vec[vec_idx].buf_len;
2679
2680 if (unlikely(buf_len < dev->vhost_hlen && nr_vec <= 1))
2681 return -1;
2682
2683 if (virtio_net_with_host_offload(dev)) {
2684 if (unlikely(buf_len < sizeof(struct virtio_net_hdr))) {
2685
2686
2687
2688
2689 copy_vnet_hdr_from_desc(&tmp_hdr, buf_vec);
2690 hdr = &tmp_hdr;
2691 } else {
2692 hdr = (struct virtio_net_hdr *)((uintptr_t)buf_addr);
2693 }
2694 }
2695
2696
2697
2698
2699
2700
2701 if (unlikely(buf_len < dev->vhost_hlen)) {
2702 buf_offset = dev->vhost_hlen - buf_len;
2703 vec_idx++;
2704 buf_addr = buf_vec[vec_idx].buf_addr;
2705 buf_iova = buf_vec[vec_idx].buf_iova;
2706 buf_len = buf_vec[vec_idx].buf_len;
2707 buf_avail = buf_len - buf_offset;
2708 } else if (buf_len == dev->vhost_hlen) {
2709 if (unlikely(++vec_idx >= nr_vec))
2710 goto error;
2711 buf_addr = buf_vec[vec_idx].buf_addr;
2712 buf_iova = buf_vec[vec_idx].buf_iova;
2713 buf_len = buf_vec[vec_idx].buf_len;
2714
2715 buf_offset = 0;
2716 buf_avail = buf_len;
2717 } else {
2718 buf_offset = dev->vhost_hlen;
2719 buf_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
2720 }
2721
2722 PRINT_PACKET(dev,
2723 (uintptr_t)(buf_addr + buf_offset),
2724 (uint32_t)buf_avail, 0);
2725
2726 mbuf_offset = 0;
2727 mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
2728
2729 if (is_async) {
2730 pkts_info = async->pkts_info;
2731 if (async_iter_initialize(dev, async))
2732 return -1;
2733 }
2734
2735 while (1) {
2736 cpy_len = RTE_MIN(buf_avail, mbuf_avail);
2737
2738 if (is_async) {
2739 if (async_fill_seg(dev, vq, cur, mbuf_offset,
2740 buf_iova + buf_offset, cpy_len, false) < 0)
2741 goto error;
2742 } else if (likely(hdr && cur == m)) {
2743 rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset),
2744 (void *)((uintptr_t)(buf_addr + buf_offset)),
2745 cpy_len);
2746 } else {
2747 sync_fill_seg(dev, vq, cur, mbuf_offset,
2748 buf_addr + buf_offset,
2749 buf_iova + buf_offset, cpy_len, false);
2750 }
2751
2752 mbuf_avail -= cpy_len;
2753 mbuf_offset += cpy_len;
2754 buf_avail -= cpy_len;
2755 buf_offset += cpy_len;
2756
2757
2758 if (buf_avail == 0) {
2759 if (++vec_idx >= nr_vec)
2760 break;
2761
2762 buf_addr = buf_vec[vec_idx].buf_addr;
2763 buf_iova = buf_vec[vec_idx].buf_iova;
2764 buf_len = buf_vec[vec_idx].buf_len;
2765
2766 buf_offset = 0;
2767 buf_avail = buf_len;
2768
2769 PRINT_PACKET(dev, (uintptr_t)buf_addr,
2770 (uint32_t)buf_avail, 0);
2771 }
2772
2773
2774
2775
2776
2777 if (mbuf_avail == 0) {
2778 cur = rte_pktmbuf_alloc(mbuf_pool);
2779 if (unlikely(cur == NULL)) {
2780 VHOST_LOG_DATA(dev->ifname, ERR,
2781 "failed to allocate memory for mbuf.\n");
2782 goto error;
2783 }
2784
2785 prev->next = cur;
2786 prev->data_len = mbuf_offset;
2787 m->nb_segs += 1;
2788 m->pkt_len += mbuf_offset;
2789 prev = cur;
2790
2791 mbuf_offset = 0;
2792 mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM;
2793 }
2794 }
2795
2796 prev->data_len = mbuf_offset;
2797 m->pkt_len += mbuf_offset;
2798
2799 if (is_async) {
2800 async_iter_finalize(async);
2801 if (hdr)
2802 pkts_info[slot_idx].nethdr = *hdr;
2803 } else if (hdr) {
2804 vhost_dequeue_offload(dev, hdr, m, legacy_ol_flags);
2805 }
2806
2807 return 0;
2808error:
2809 if (is_async)
2810 async_iter_cancel(async);
2811
2812 return -1;
2813}
2814
2815static void
2816virtio_dev_extbuf_free(void *addr __rte_unused, void *opaque)
2817{
2818 rte_free(opaque);
2819}
2820
2821static int
2822virtio_dev_extbuf_alloc(struct virtio_net *dev, struct rte_mbuf *pkt, uint32_t size)
2823{
2824 struct rte_mbuf_ext_shared_info *shinfo = NULL;
2825 uint32_t total_len = RTE_PKTMBUF_HEADROOM + size;
2826 uint16_t buf_len;
2827 rte_iova_t iova;
2828 void *buf;
2829
2830 total_len += sizeof(*shinfo) + sizeof(uintptr_t);
2831 total_len = RTE_ALIGN_CEIL(total_len, sizeof(uintptr_t));
2832
2833 if (unlikely(total_len > UINT16_MAX))
2834 return -ENOSPC;
2835
2836 buf_len = total_len;
2837 buf = rte_malloc(NULL, buf_len, RTE_CACHE_LINE_SIZE);
2838 if (unlikely(buf == NULL))
2839 return -ENOMEM;
2840
2841
2842 shinfo = rte_pktmbuf_ext_shinfo_init_helper(buf, &buf_len,
2843 virtio_dev_extbuf_free, buf);
2844 if (unlikely(shinfo == NULL)) {
2845 rte_free(buf);
2846 VHOST_LOG_DATA(dev->ifname, ERR, "failed to init shinfo\n");
2847 return -1;
2848 }
2849
2850 iova = rte_malloc_virt2iova(buf);
2851 rte_pktmbuf_attach_extbuf(pkt, buf, iova, buf_len, shinfo);
2852 rte_pktmbuf_reset_headroom(pkt);
2853
2854 return 0;
2855}
2856
2857
2858
2859
2860static __rte_always_inline int
2861virtio_dev_pktmbuf_prep(struct virtio_net *dev, struct rte_mbuf *pkt,
2862 uint32_t data_len)
2863{
2864 if (rte_pktmbuf_tailroom(pkt) >= data_len)
2865 return 0;
2866
2867
2868 if (dev->extbuf && !virtio_dev_extbuf_alloc(dev, pkt, data_len))
2869 return 0;
2870
2871
2872 if (!dev->linearbuf)
2873 return 0;
2874
2875 return -1;
2876}
2877
2878__rte_always_inline
2879static uint16_t
2880virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
2881 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
2882 bool legacy_ol_flags)
2883{
2884 uint16_t i;
2885 uint16_t avail_entries;
2886 uint16_t dropped = 0;
2887 static bool allocerr_warned;
2888
2889
2890
2891
2892
2893 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
2894 vq->last_avail_idx;
2895 if (avail_entries == 0)
2896 return 0;
2897
2898 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
2899
2900 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s\n", __func__);
2901
2902 count = RTE_MIN(count, MAX_PKT_BURST);
2903 count = RTE_MIN(count, avail_entries);
2904 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count);
2905
2906 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
2907 return 0;
2908
2909 for (i = 0; i < count; i++) {
2910 struct buf_vector buf_vec[BUF_VECTOR_MAX];
2911 uint16_t head_idx;
2912 uint32_t buf_len;
2913 uint16_t nr_vec = 0;
2914 int err;
2915
2916 if (unlikely(fill_vec_buf_split(dev, vq,
2917 vq->last_avail_idx + i,
2918 &nr_vec, buf_vec,
2919 &head_idx, &buf_len,
2920 VHOST_ACCESS_RO) < 0))
2921 break;
2922
2923 update_shadow_used_ring_split(vq, head_idx, 0);
2924
2925 err = virtio_dev_pktmbuf_prep(dev, pkts[i], buf_len);
2926 if (unlikely(err)) {
2927
2928
2929
2930
2931
2932 if (!allocerr_warned) {
2933 VHOST_LOG_DATA(dev->ifname, ERR,
2934 "failed mbuf alloc of size %d from %s.\n",
2935 buf_len, mbuf_pool->name);
2936 allocerr_warned = true;
2937 }
2938 dropped += 1;
2939 i++;
2940 break;
2941 }
2942
2943 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts[i],
2944 mbuf_pool, legacy_ol_flags, 0, false);
2945 if (unlikely(err)) {
2946 if (!allocerr_warned) {
2947 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n");
2948 allocerr_warned = true;
2949 }
2950 dropped += 1;
2951 i++;
2952 break;
2953 }
2954
2955 }
2956
2957 if (dropped)
2958 rte_pktmbuf_free_bulk(&pkts[i - 1], count - i + 1);
2959
2960 vq->last_avail_idx += i;
2961
2962 do_data_copy_dequeue(vq);
2963 if (unlikely(i < count))
2964 vq->shadow_used_idx = i;
2965 if (likely(vq->shadow_used_idx)) {
2966 flush_shadow_used_ring_split(dev, vq);
2967 vhost_vring_call_split(dev, vq);
2968 }
2969
2970 return (i - dropped);
2971}
2972
2973__rte_noinline
2974static uint16_t
2975virtio_dev_tx_split_legacy(struct virtio_net *dev,
2976 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2977 struct rte_mbuf **pkts, uint16_t count)
2978{
2979 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, true);
2980}
2981
2982__rte_noinline
2983static uint16_t
2984virtio_dev_tx_split_compliant(struct virtio_net *dev,
2985 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
2986 struct rte_mbuf **pkts, uint16_t count)
2987{
2988 return virtio_dev_tx_split(dev, vq, mbuf_pool, pkts, count, false);
2989}
2990
2991static __rte_always_inline int
2992vhost_reserve_avail_batch_packed(struct virtio_net *dev,
2993 struct vhost_virtqueue *vq,
2994 struct rte_mbuf **pkts,
2995 uint16_t avail_idx,
2996 uintptr_t *desc_addrs,
2997 uint16_t *ids)
2998{
2999 bool wrap = vq->avail_wrap_counter;
3000 struct vring_packed_desc *descs = vq->desc_packed;
3001 uint64_t lens[PACKED_BATCH_SIZE];
3002 uint64_t buf_lens[PACKED_BATCH_SIZE];
3003 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3004 uint16_t flags, i;
3005
3006 if (unlikely(avail_idx & PACKED_BATCH_MASK))
3007 return -1;
3008 if (unlikely((avail_idx + PACKED_BATCH_SIZE) > vq->size))
3009 return -1;
3010
3011 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3012 flags = descs[avail_idx + i].flags;
3013 if (unlikely((wrap != !!(flags & VRING_DESC_F_AVAIL)) ||
3014 (wrap == !!(flags & VRING_DESC_F_USED)) ||
3015 (flags & PACKED_DESC_SINGLE_DEQUEUE_FLAG)))
3016 return -1;
3017 }
3018
3019 rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
3020
3021 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3022 lens[i] = descs[avail_idx + i].len;
3023
3024 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3025 desc_addrs[i] = vhost_iova_to_vva(dev, vq,
3026 descs[avail_idx + i].addr,
3027 &lens[i], VHOST_ACCESS_RW);
3028 }
3029
3030 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3031 if (unlikely(!desc_addrs[i]))
3032 return -1;
3033 if (unlikely((lens[i] != descs[avail_idx + i].len)))
3034 return -1;
3035 }
3036
3037 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3038 if (virtio_dev_pktmbuf_prep(dev, pkts[i], lens[i]))
3039 goto err;
3040 }
3041
3042 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3043 buf_lens[i] = pkts[i]->buf_len - pkts[i]->data_off;
3044
3045 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3046 if (unlikely(buf_lens[i] < (lens[i] - buf_offset)))
3047 goto err;
3048 }
3049
3050 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3051 pkts[i]->pkt_len = lens[i] - buf_offset;
3052 pkts[i]->data_len = pkts[i]->pkt_len;
3053 ids[i] = descs[avail_idx + i].id;
3054 }
3055
3056 return 0;
3057
3058err:
3059 return -1;
3060}
3061
3062static __rte_always_inline int
3063virtio_dev_tx_batch_packed(struct virtio_net *dev,
3064 struct vhost_virtqueue *vq,
3065 struct rte_mbuf **pkts,
3066 bool legacy_ol_flags)
3067{
3068 uint16_t avail_idx = vq->last_avail_idx;
3069 uint32_t buf_offset = sizeof(struct virtio_net_hdr_mrg_rxbuf);
3070 struct virtio_net_hdr *hdr;
3071 uintptr_t desc_addrs[PACKED_BATCH_SIZE];
3072 uint16_t ids[PACKED_BATCH_SIZE];
3073 uint16_t i;
3074
3075 if (vhost_reserve_avail_batch_packed(dev, vq, pkts, avail_idx,
3076 desc_addrs, ids))
3077 return -1;
3078
3079 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3080 rte_prefetch0((void *)(uintptr_t)desc_addrs[i]);
3081
3082 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)
3083 rte_memcpy(rte_pktmbuf_mtod_offset(pkts[i], void *, 0),
3084 (void *)(uintptr_t)(desc_addrs[i] + buf_offset),
3085 pkts[i]->pkt_len);
3086
3087 if (virtio_net_with_host_offload(dev)) {
3088 vhost_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {
3089 hdr = (struct virtio_net_hdr *)(desc_addrs[i]);
3090 vhost_dequeue_offload(dev, hdr, pkts[i], legacy_ol_flags);
3091 }
3092 }
3093
3094 if (virtio_net_is_inorder(dev))
3095 vhost_shadow_dequeue_batch_packed_inorder(vq,
3096 ids[PACKED_BATCH_SIZE - 1]);
3097 else
3098 vhost_shadow_dequeue_batch_packed(dev, vq, ids);
3099
3100 vq_inc_last_avail_packed(vq, PACKED_BATCH_SIZE);
3101
3102 return 0;
3103}
3104
3105static __rte_always_inline int
3106vhost_dequeue_single_packed(struct virtio_net *dev,
3107 struct vhost_virtqueue *vq,
3108 struct rte_mempool *mbuf_pool,
3109 struct rte_mbuf *pkts,
3110 uint16_t *buf_id,
3111 uint16_t *desc_count,
3112 bool legacy_ol_flags)
3113{
3114 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3115 uint32_t buf_len;
3116 uint16_t nr_vec = 0;
3117 int err;
3118 static bool allocerr_warned;
3119
3120 if (unlikely(fill_vec_buf_packed(dev, vq,
3121 vq->last_avail_idx, desc_count,
3122 buf_vec, &nr_vec,
3123 buf_id, &buf_len,
3124 VHOST_ACCESS_RO) < 0))
3125 return -1;
3126
3127 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3128 if (!allocerr_warned) {
3129 VHOST_LOG_DATA(dev->ifname, ERR,
3130 "failed mbuf alloc of size %d from %s.\n",
3131 buf_len, mbuf_pool->name);
3132 allocerr_warned = true;
3133 }
3134 return -1;
3135 }
3136
3137 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts,
3138 mbuf_pool, legacy_ol_flags, 0, false);
3139 if (unlikely(err)) {
3140 if (!allocerr_warned) {
3141 VHOST_LOG_DATA(dev->ifname, ERR, "failed to copy desc to mbuf.\n");
3142 allocerr_warned = true;
3143 }
3144 return -1;
3145 }
3146
3147 return 0;
3148}
3149
3150static __rte_always_inline int
3151virtio_dev_tx_single_packed(struct virtio_net *dev,
3152 struct vhost_virtqueue *vq,
3153 struct rte_mempool *mbuf_pool,
3154 struct rte_mbuf *pkts,
3155 bool legacy_ol_flags)
3156{
3157
3158 uint16_t buf_id, desc_count = 0;
3159 int ret;
3160
3161 ret = vhost_dequeue_single_packed(dev, vq, mbuf_pool, pkts, &buf_id,
3162 &desc_count, legacy_ol_flags);
3163
3164 if (likely(desc_count > 0)) {
3165 if (virtio_net_is_inorder(dev))
3166 vhost_shadow_dequeue_single_packed_inorder(vq, buf_id,
3167 desc_count);
3168 else
3169 vhost_shadow_dequeue_single_packed(vq, buf_id,
3170 desc_count);
3171
3172 vq_inc_last_avail_packed(vq, desc_count);
3173 }
3174
3175 return ret;
3176}
3177
3178__rte_always_inline
3179static uint16_t
3180virtio_dev_tx_packed(struct virtio_net *dev,
3181 struct vhost_virtqueue *__rte_restrict vq,
3182 struct rte_mempool *mbuf_pool,
3183 struct rte_mbuf **__rte_restrict pkts,
3184 uint32_t count,
3185 bool legacy_ol_flags)
3186{
3187 uint32_t pkt_idx = 0;
3188
3189 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts, count))
3190 return 0;
3191
3192 do {
3193 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
3194
3195 if (count - pkt_idx >= PACKED_BATCH_SIZE) {
3196 if (!virtio_dev_tx_batch_packed(dev, vq,
3197 &pkts[pkt_idx],
3198 legacy_ol_flags)) {
3199 pkt_idx += PACKED_BATCH_SIZE;
3200 continue;
3201 }
3202 }
3203
3204 if (virtio_dev_tx_single_packed(dev, vq, mbuf_pool,
3205 pkts[pkt_idx],
3206 legacy_ol_flags))
3207 break;
3208 pkt_idx++;
3209 } while (pkt_idx < count);
3210
3211 if (pkt_idx != count)
3212 rte_pktmbuf_free_bulk(&pkts[pkt_idx], count - pkt_idx);
3213
3214 if (vq->shadow_used_idx) {
3215 do_data_copy_dequeue(vq);
3216
3217 vhost_flush_dequeue_shadow_packed(dev, vq);
3218 vhost_vring_call_packed(dev, vq);
3219 }
3220
3221 return pkt_idx;
3222}
3223
3224__rte_noinline
3225static uint16_t
3226virtio_dev_tx_packed_legacy(struct virtio_net *dev,
3227 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3228 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3229{
3230 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, true);
3231}
3232
3233__rte_noinline
3234static uint16_t
3235virtio_dev_tx_packed_compliant(struct virtio_net *dev,
3236 struct vhost_virtqueue *__rte_restrict vq, struct rte_mempool *mbuf_pool,
3237 struct rte_mbuf **__rte_restrict pkts, uint32_t count)
3238{
3239 return virtio_dev_tx_packed(dev, vq, mbuf_pool, pkts, count, false);
3240}
3241
3242uint16_t
3243rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
3244 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
3245{
3246 struct virtio_net *dev;
3247 struct rte_mbuf *rarp_mbuf = NULL;
3248 struct vhost_virtqueue *vq;
3249 int16_t success = 1;
3250
3251 dev = get_device(vid);
3252 if (!dev)
3253 return 0;
3254
3255 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
3256 VHOST_LOG_DATA(dev->ifname, ERR,
3257 "%s: built-in vhost net backend is disabled.\n",
3258 __func__);
3259 return 0;
3260 }
3261
3262 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
3263 VHOST_LOG_DATA(dev->ifname, ERR,
3264 "%s: invalid virtqueue idx %d.\n",
3265 __func__, queue_id);
3266 return 0;
3267 }
3268
3269 vq = dev->virtqueue[queue_id];
3270
3271 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
3272 return 0;
3273
3274 if (unlikely(!vq->enabled)) {
3275 count = 0;
3276 goto out_access_unlock;
3277 }
3278
3279 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3280 vhost_user_iotlb_rd_lock(vq);
3281
3282 if (unlikely(!vq->access_ok))
3283 if (unlikely(vring_translate(dev, vq) < 0)) {
3284 count = 0;
3285 goto out;
3286 }
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
3305 __atomic_compare_exchange_n(&dev->broadcast_rarp,
3306 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
3307
3308 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3309 if (rarp_mbuf == NULL) {
3310 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n");
3311 count = 0;
3312 goto out;
3313 }
3314
3315
3316
3317
3318 pkts[0] = rarp_mbuf;
3319 vhost_queue_stats_update(dev, vq, pkts, 1);
3320 pkts++;
3321 count -= 1;
3322 }
3323
3324 if (vq_is_packed(dev)) {
3325 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3326 count = virtio_dev_tx_packed_legacy(dev, vq, mbuf_pool, pkts, count);
3327 else
3328 count = virtio_dev_tx_packed_compliant(dev, vq, mbuf_pool, pkts, count);
3329 } else {
3330 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3331 count = virtio_dev_tx_split_legacy(dev, vq, mbuf_pool, pkts, count);
3332 else
3333 count = virtio_dev_tx_split_compliant(dev, vq, mbuf_pool, pkts, count);
3334 }
3335
3336 vhost_queue_stats_update(dev, vq, pkts, count);
3337
3338out:
3339 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3340 vhost_user_iotlb_rd_unlock(vq);
3341
3342out_access_unlock:
3343 rte_spinlock_unlock(&vq->access_lock);
3344
3345 if (unlikely(rarp_mbuf != NULL))
3346 count += 1;
3347
3348 return count;
3349}
3350
3351static __rte_always_inline uint16_t
3352async_poll_dequeue_completed(struct virtio_net *dev, struct vhost_virtqueue *vq,
3353 struct rte_mbuf **pkts, uint16_t count, int16_t dma_id,
3354 uint16_t vchan_id, bool legacy_ol_flags)
3355{
3356 uint16_t start_idx, from, i;
3357 uint16_t nr_cpl_pkts = 0;
3358 struct async_inflight_info *pkts_info = vq->async->pkts_info;
3359
3360 vhost_async_dma_check_completed(dev, dma_id, vchan_id, VHOST_DMA_MAX_COPY_COMPLETE);
3361
3362 start_idx = async_get_first_inflight_pkt_idx(vq);
3363
3364 from = start_idx;
3365 while (vq->async->pkts_cmpl_flag[from] && count--) {
3366 vq->async->pkts_cmpl_flag[from] = false;
3367 from = (from + 1) % vq->size;
3368 nr_cpl_pkts++;
3369 }
3370
3371 if (nr_cpl_pkts == 0)
3372 return 0;
3373
3374 for (i = 0; i < nr_cpl_pkts; i++) {
3375 from = (start_idx + i) % vq->size;
3376 pkts[i] = pkts_info[from].mbuf;
3377
3378 if (virtio_net_with_host_offload(dev))
3379 vhost_dequeue_offload(dev, &pkts_info[from].nethdr, pkts[i],
3380 legacy_ol_flags);
3381 }
3382
3383
3384 if (vq_is_packed(dev)) {
3385 write_back_completed_descs_packed(vq, nr_cpl_pkts);
3386 vhost_vring_call_packed(dev, vq);
3387 } else {
3388 write_back_completed_descs_split(vq, nr_cpl_pkts);
3389 __atomic_add_fetch(&vq->used->idx, nr_cpl_pkts, __ATOMIC_RELEASE);
3390 vhost_vring_call_split(dev, vq);
3391 }
3392 vq->async->pkts_inflight_n -= nr_cpl_pkts;
3393
3394 return nr_cpl_pkts;
3395}
3396
3397static __rte_always_inline uint16_t
3398virtio_dev_tx_async_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
3399 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
3400 int16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
3401{
3402 static bool allocerr_warned;
3403 bool dropped = false;
3404 uint16_t avail_entries;
3405 uint16_t pkt_idx, slot_idx = 0;
3406 uint16_t nr_done_pkts = 0;
3407 uint16_t pkt_err = 0;
3408 uint16_t n_xfer;
3409 struct vhost_async *async = vq->async;
3410 struct async_inflight_info *pkts_info = async->pkts_info;
3411 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
3412 uint16_t pkts_size = count;
3413
3414
3415
3416
3417
3418 avail_entries = __atomic_load_n(&vq->avail->idx, __ATOMIC_ACQUIRE) -
3419 vq->last_avail_idx;
3420 if (avail_entries == 0)
3421 goto out;
3422
3423 rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
3424
3425 async_iter_reset(async);
3426
3427 count = RTE_MIN(count, MAX_PKT_BURST);
3428 count = RTE_MIN(count, avail_entries);
3429 VHOST_LOG_DATA(dev->ifname, DEBUG, "about to dequeue %u buffers\n", count);
3430
3431 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
3432 goto out;
3433
3434 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
3435 uint16_t head_idx = 0;
3436 uint16_t nr_vec = 0;
3437 uint16_t to;
3438 uint32_t buf_len;
3439 int err;
3440 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3441 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
3442
3443 if (unlikely(fill_vec_buf_split(dev, vq, vq->last_avail_idx,
3444 &nr_vec, buf_vec,
3445 &head_idx, &buf_len,
3446 VHOST_ACCESS_RO) < 0)) {
3447 dropped = true;
3448 break;
3449 }
3450
3451 err = virtio_dev_pktmbuf_prep(dev, pkt, buf_len);
3452 if (unlikely(err)) {
3453
3454
3455
3456
3457
3458 if (!allocerr_warned) {
3459 VHOST_LOG_DATA(dev->ifname, ERR,
3460 "%s: Failed mbuf alloc of size %d from %s\n",
3461 __func__, buf_len, mbuf_pool->name);
3462 allocerr_warned = true;
3463 }
3464 dropped = true;
3465 break;
3466 }
3467
3468 slot_idx = (async->pkts_idx + pkt_idx) & (vq->size - 1);
3469 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkt, mbuf_pool,
3470 legacy_ol_flags, slot_idx, true);
3471 if (unlikely(err)) {
3472 if (!allocerr_warned) {
3473 VHOST_LOG_DATA(dev->ifname, ERR,
3474 "%s: Failed to offload copies to async channel.\n",
3475 __func__);
3476 allocerr_warned = true;
3477 }
3478 dropped = true;
3479 break;
3480 }
3481
3482 pkts_info[slot_idx].mbuf = pkt;
3483
3484
3485 to = async->desc_idx_split & (vq->size - 1);
3486 async->descs_split[to].id = head_idx;
3487 async->descs_split[to].len = 0;
3488 async->desc_idx_split++;
3489
3490 vq->last_avail_idx++;
3491 }
3492
3493 if (unlikely(dropped))
3494 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
3495
3496 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
3497 async->iov_iter, pkt_idx);
3498
3499 async->pkts_inflight_n += n_xfer;
3500
3501 pkt_err = pkt_idx - n_xfer;
3502 if (unlikely(pkt_err)) {
3503 VHOST_LOG_DATA(dev->ifname, DEBUG, "%s: failed to transfer data.\n",
3504 __func__);
3505
3506 pkt_idx = n_xfer;
3507
3508 vq->last_avail_idx -= pkt_err;
3509
3510
3511
3512
3513
3514 async->desc_idx_split -= pkt_err;
3515 while (pkt_err-- > 0) {
3516 rte_pktmbuf_free(pkts_info[slot_idx & (vq->size - 1)].mbuf);
3517 slot_idx--;
3518 }
3519 }
3520
3521 async->pkts_idx += pkt_idx;
3522 if (async->pkts_idx >= vq->size)
3523 async->pkts_idx -= vq->size;
3524
3525out:
3526
3527 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, pkts_size,
3528 dma_id, vchan_id, legacy_ol_flags);
3529
3530 return nr_done_pkts;
3531}
3532
3533__rte_noinline
3534static uint16_t
3535virtio_dev_tx_async_split_legacy(struct virtio_net *dev,
3536 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3537 struct rte_mbuf **pkts, uint16_t count,
3538 int16_t dma_id, uint16_t vchan_id)
3539{
3540 return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
3541 pkts, count, dma_id, vchan_id, true);
3542}
3543
3544__rte_noinline
3545static uint16_t
3546virtio_dev_tx_async_split_compliant(struct virtio_net *dev,
3547 struct vhost_virtqueue *vq, struct rte_mempool *mbuf_pool,
3548 struct rte_mbuf **pkts, uint16_t count,
3549 int16_t dma_id, uint16_t vchan_id)
3550{
3551 return virtio_dev_tx_async_split(dev, vq, mbuf_pool,
3552 pkts, count, dma_id, vchan_id, false);
3553}
3554
3555static __rte_always_inline void
3556vhost_async_shadow_dequeue_single_packed(struct vhost_virtqueue *vq, uint16_t buf_id)
3557{
3558 struct vhost_async *async = vq->async;
3559 uint16_t idx = async->buffer_idx_packed;
3560
3561 async->buffers_packed[idx].id = buf_id;
3562 async->buffers_packed[idx].len = 0;
3563 async->buffers_packed[idx].count = 1;
3564
3565 async->buffer_idx_packed++;
3566 if (async->buffer_idx_packed >= vq->size)
3567 async->buffer_idx_packed -= vq->size;
3568
3569}
3570
3571static __rte_always_inline int
3572virtio_dev_tx_async_single_packed(struct virtio_net *dev,
3573 struct vhost_virtqueue *vq,
3574 struct rte_mempool *mbuf_pool,
3575 struct rte_mbuf *pkts,
3576 uint16_t slot_idx,
3577 bool legacy_ol_flags)
3578{
3579 int err;
3580 uint16_t buf_id, desc_count = 0;
3581 uint16_t nr_vec = 0;
3582 uint32_t buf_len;
3583 struct buf_vector buf_vec[BUF_VECTOR_MAX];
3584 static bool allocerr_warned;
3585
3586 if (unlikely(fill_vec_buf_packed(dev, vq, vq->last_avail_idx, &desc_count,
3587 buf_vec, &nr_vec, &buf_id, &buf_len,
3588 VHOST_ACCESS_RO) < 0))
3589 return -1;
3590
3591 if (unlikely(virtio_dev_pktmbuf_prep(dev, pkts, buf_len))) {
3592 if (!allocerr_warned) {
3593 VHOST_LOG_DATA(dev->ifname, ERR, "Failed mbuf alloc of size %d from %s.\n",
3594 buf_len, mbuf_pool->name);
3595
3596 allocerr_warned = true;
3597 }
3598 return -1;
3599 }
3600
3601 err = desc_to_mbuf(dev, vq, buf_vec, nr_vec, pkts, mbuf_pool,
3602 legacy_ol_flags, slot_idx, true);
3603 if (unlikely(err)) {
3604 rte_pktmbuf_free(pkts);
3605 if (!allocerr_warned) {
3606 VHOST_LOG_DATA(dev->ifname, ERR, "Failed to copy desc to mbuf on.\n");
3607 allocerr_warned = true;
3608 }
3609 return -1;
3610 }
3611
3612
3613 vhost_async_shadow_dequeue_single_packed(vq, buf_id);
3614
3615 return err;
3616}
3617
3618static __rte_always_inline uint16_t
3619virtio_dev_tx_async_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
3620 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
3621 uint16_t count, uint16_t dma_id, uint16_t vchan_id, bool legacy_ol_flags)
3622{
3623 uint16_t pkt_idx;
3624 uint16_t slot_idx = 0;
3625 uint16_t nr_done_pkts = 0;
3626 uint16_t pkt_err = 0;
3627 uint32_t n_xfer;
3628 struct vhost_async *async = vq->async;
3629 struct async_inflight_info *pkts_info = async->pkts_info;
3630 struct rte_mbuf *pkts_prealloc[MAX_PKT_BURST];
3631
3632 VHOST_LOG_DATA(dev->ifname, DEBUG, "(%d) about to dequeue %u buffers\n", dev->vid, count);
3633
3634 async_iter_reset(async);
3635
3636 if (rte_pktmbuf_alloc_bulk(mbuf_pool, pkts_prealloc, count))
3637 goto out;
3638
3639 for (pkt_idx = 0; pkt_idx < count; pkt_idx++) {
3640 struct rte_mbuf *pkt = pkts_prealloc[pkt_idx];
3641
3642 rte_prefetch0(&vq->desc_packed[vq->last_avail_idx]);
3643
3644 slot_idx = (async->pkts_idx + pkt_idx) % vq->size;
3645 if (unlikely(virtio_dev_tx_async_single_packed(dev, vq, mbuf_pool, pkt,
3646 slot_idx, legacy_ol_flags))) {
3647 rte_pktmbuf_free_bulk(&pkts_prealloc[pkt_idx], count - pkt_idx);
3648 break;
3649 }
3650
3651 pkts_info[slot_idx].mbuf = pkt;
3652
3653 vq_inc_last_avail_packed(vq, 1);
3654
3655 }
3656
3657 n_xfer = vhost_async_dma_transfer(dev, vq, dma_id, vchan_id, async->pkts_idx,
3658 async->iov_iter, pkt_idx);
3659
3660 async->pkts_inflight_n += n_xfer;
3661
3662 pkt_err = pkt_idx - n_xfer;
3663
3664 if (unlikely(pkt_err)) {
3665 pkt_idx -= pkt_err;
3666
3667
3668
3669
3670 if (async->buffer_idx_packed >= pkt_err)
3671 async->buffer_idx_packed -= pkt_err;
3672 else
3673 async->buffer_idx_packed += vq->size - pkt_err;
3674
3675 while (pkt_err-- > 0) {
3676 rte_pktmbuf_free(pkts_info[slot_idx % vq->size].mbuf);
3677 slot_idx--;
3678 }
3679
3680
3681 if (vq->last_avail_idx >= pkt_err) {
3682 vq->last_avail_idx -= pkt_err;
3683 } else {
3684 vq->last_avail_idx += vq->size - pkt_err;
3685 vq->avail_wrap_counter ^= 1;
3686 }
3687 }
3688
3689 async->pkts_idx += pkt_idx;
3690 if (async->pkts_idx >= vq->size)
3691 async->pkts_idx -= vq->size;
3692
3693out:
3694 nr_done_pkts = async_poll_dequeue_completed(dev, vq, pkts, count,
3695 dma_id, vchan_id, legacy_ol_flags);
3696
3697 return nr_done_pkts;
3698}
3699
3700__rte_noinline
3701static uint16_t
3702virtio_dev_tx_async_packed_legacy(struct virtio_net *dev, struct vhost_virtqueue *vq,
3703 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
3704 uint16_t count, uint16_t dma_id, uint16_t vchan_id)
3705{
3706 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
3707 pkts, count, dma_id, vchan_id, true);
3708}
3709
3710__rte_noinline
3711static uint16_t
3712virtio_dev_tx_async_packed_compliant(struct virtio_net *dev, struct vhost_virtqueue *vq,
3713 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts,
3714 uint16_t count, uint16_t dma_id, uint16_t vchan_id)
3715{
3716 return virtio_dev_tx_async_packed(dev, vq, mbuf_pool,
3717 pkts, count, dma_id, vchan_id, false);
3718}
3719
3720uint16_t
3721rte_vhost_async_try_dequeue_burst(int vid, uint16_t queue_id,
3722 struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count,
3723 int *nr_inflight, int16_t dma_id, uint16_t vchan_id)
3724{
3725 struct virtio_net *dev;
3726 struct rte_mbuf *rarp_mbuf = NULL;
3727 struct vhost_virtqueue *vq;
3728 int16_t success = 1;
3729
3730 dev = get_device(vid);
3731 if (!dev || !nr_inflight)
3732 return 0;
3733
3734 *nr_inflight = -1;
3735
3736 if (unlikely(!(dev->flags & VIRTIO_DEV_BUILTIN_VIRTIO_NET))) {
3737 VHOST_LOG_DATA(dev->ifname, ERR, "%s: built-in vhost net backend is disabled.\n",
3738 __func__);
3739 return 0;
3740 }
3741
3742 if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->nr_vring))) {
3743 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid virtqueue idx %d.\n",
3744 __func__, queue_id);
3745 return 0;
3746 }
3747
3748 if (unlikely(dma_id < 0 || dma_id >= RTE_DMADEV_DEFAULT_MAX)) {
3749 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid dma id %d.\n",
3750 __func__, dma_id);
3751 return 0;
3752 }
3753
3754 if (unlikely(!dma_copy_track[dma_id].vchans ||
3755 !dma_copy_track[dma_id].vchans[vchan_id].pkts_cmpl_flag_addr)) {
3756 VHOST_LOG_DATA(dev->ifname, ERR, "%s: invalid channel %d:%u.\n",
3757 __func__, dma_id, vchan_id);
3758 return 0;
3759 }
3760
3761 vq = dev->virtqueue[queue_id];
3762
3763 if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0))
3764 return 0;
3765
3766 if (unlikely(vq->enabled == 0)) {
3767 count = 0;
3768 goto out_access_unlock;
3769 }
3770
3771 if (unlikely(!vq->async)) {
3772 VHOST_LOG_DATA(dev->ifname, ERR, "%s: async not registered for queue id %d.\n",
3773 __func__, queue_id);
3774 count = 0;
3775 goto out_access_unlock;
3776 }
3777
3778 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3779 vhost_user_iotlb_rd_lock(vq);
3780
3781 if (unlikely(vq->access_ok == 0))
3782 if (unlikely(vring_translate(dev, vq) < 0)) {
3783 count = 0;
3784 goto out;
3785 }
3786
3787
3788
3789
3790
3791
3792
3793
3794
3795
3796
3797
3798
3799
3800
3801
3802
3803 if (unlikely(__atomic_load_n(&dev->broadcast_rarp, __ATOMIC_ACQUIRE) &&
3804 __atomic_compare_exchange_n(&dev->broadcast_rarp,
3805 &success, 0, 0, __ATOMIC_RELEASE, __ATOMIC_RELAXED))) {
3806
3807 rarp_mbuf = rte_net_make_rarp_packet(mbuf_pool, &dev->mac);
3808 if (rarp_mbuf == NULL) {
3809 VHOST_LOG_DATA(dev->ifname, ERR, "failed to make RARP packet.\n");
3810 count = 0;
3811 goto out;
3812 }
3813
3814
3815
3816
3817 pkts[0] = rarp_mbuf;
3818 vhost_queue_stats_update(dev, vq, pkts, 1);
3819 pkts++;
3820 count -= 1;
3821 }
3822
3823 if (vq_is_packed(dev)) {
3824 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3825 count = virtio_dev_tx_async_packed_legacy(dev, vq, mbuf_pool,
3826 pkts, count, dma_id, vchan_id);
3827 else
3828 count = virtio_dev_tx_async_packed_compliant(dev, vq, mbuf_pool,
3829 pkts, count, dma_id, vchan_id);
3830 } else {
3831 if (dev->flags & VIRTIO_DEV_LEGACY_OL_FLAGS)
3832 count = virtio_dev_tx_async_split_legacy(dev, vq, mbuf_pool,
3833 pkts, count, dma_id, vchan_id);
3834 else
3835 count = virtio_dev_tx_async_split_compliant(dev, vq, mbuf_pool,
3836 pkts, count, dma_id, vchan_id);
3837 }
3838
3839 *nr_inflight = vq->async->pkts_inflight_n;
3840 vhost_queue_stats_update(dev, vq, pkts, count);
3841
3842out:
3843 if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
3844 vhost_user_iotlb_rd_unlock(vq);
3845
3846out_access_unlock:
3847 rte_spinlock_unlock(&vq->access_lock);
3848
3849 if (unlikely(rarp_mbuf != NULL))
3850 count += 1;
3851
3852 return count;
3853}
3854