1
2
3
4
5#include <stdlib.h>
6#include <unistd.h>
7#include <pthread.h>
8#include <stdbool.h>
9#include <sys/epoll.h>
10
11#include <rte_mbuf.h>
12#include <ethdev_driver.h>
13#include <ethdev_vdev.h>
14#include <rte_malloc.h>
15#include <rte_memcpy.h>
16#include <rte_net.h>
17#include <bus_vdev_driver.h>
18#include <rte_kvargs.h>
19#include <rte_vhost.h>
20#include <rte_spinlock.h>
21
22#include "rte_eth_vhost.h"
23
24RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE);
25
26#define VHOST_LOG(level, ...) \
27 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
28
29enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
30
31#define ETH_VHOST_IFACE_ARG "iface"
32#define ETH_VHOST_QUEUES_ARG "queues"
33#define ETH_VHOST_CLIENT_ARG "client"
34#define ETH_VHOST_IOMMU_SUPPORT "iommu-support"
35#define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support"
36#define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso"
37#define ETH_VHOST_LINEAR_BUF "linear-buffer"
38#define ETH_VHOST_EXT_BUF "ext-buffer"
39#define ETH_VHOST_LEGACY_OL_FLAGS "legacy-ol-flags"
40#define VHOST_MAX_PKT_BURST 32
41
42static const char *valid_arguments[] = {
43 ETH_VHOST_IFACE_ARG,
44 ETH_VHOST_QUEUES_ARG,
45 ETH_VHOST_CLIENT_ARG,
46 ETH_VHOST_IOMMU_SUPPORT,
47 ETH_VHOST_POSTCOPY_SUPPORT,
48 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
49 ETH_VHOST_LINEAR_BUF,
50 ETH_VHOST_EXT_BUF,
51 ETH_VHOST_LEGACY_OL_FLAGS,
52 NULL
53};
54
55static struct rte_ether_addr base_eth_addr = {
56 .addr_bytes = {
57 0x56 ,
58 0x48 ,
59 0x4F ,
60 0x53 ,
61 0x54 ,
62 0x00
63 }
64};
65
66struct vhost_stats {
67 uint64_t pkts;
68 uint64_t bytes;
69 uint64_t missed_pkts;
70};
71
72struct vhost_queue {
73 int vid;
74 rte_atomic32_t allow_queuing;
75 rte_atomic32_t while_queuing;
76 struct pmd_internal *internal;
77 struct rte_mempool *mb_pool;
78 uint16_t port;
79 uint16_t virtqueue_id;
80 struct vhost_stats stats;
81 int intr_enable;
82 rte_spinlock_t intr_lock;
83};
84
85struct pmd_internal {
86 rte_atomic32_t dev_attached;
87 char *iface_name;
88 uint64_t flags;
89 uint64_t disable_flags;
90 uint64_t features;
91 uint16_t max_queues;
92 int vid;
93 rte_atomic32_t started;
94 bool vlan_strip;
95 bool rx_sw_csum;
96 bool tx_sw_csum;
97};
98
99struct internal_list {
100 TAILQ_ENTRY(internal_list) next;
101 struct rte_eth_dev *eth_dev;
102};
103
104TAILQ_HEAD(internal_list_head, internal_list);
105static struct internal_list_head internal_list =
106 TAILQ_HEAD_INITIALIZER(internal_list);
107
108static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
109
110static struct rte_eth_link pmd_link = {
111 .link_speed = 10000,
112 .link_duplex = RTE_ETH_LINK_FULL_DUPLEX,
113 .link_status = RTE_ETH_LINK_DOWN
114};
115
116struct rte_vhost_vring_state {
117 rte_spinlock_t lock;
118
119 bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
120 bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
121 unsigned int index;
122 unsigned int max_vring;
123};
124
125static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
126
127static int
128vhost_dev_xstats_reset(struct rte_eth_dev *dev)
129{
130 struct vhost_queue *vq;
131 int ret, i;
132
133 for (i = 0; i < dev->data->nb_rx_queues; i++) {
134 vq = dev->data->rx_queues[i];
135 ret = rte_vhost_vring_stats_reset(vq->vid, vq->virtqueue_id);
136 if (ret < 0)
137 return ret;
138 }
139
140 for (i = 0; i < dev->data->nb_tx_queues; i++) {
141 vq = dev->data->tx_queues[i];
142 ret = rte_vhost_vring_stats_reset(vq->vid, vq->virtqueue_id);
143 if (ret < 0)
144 return ret;
145 }
146
147 return 0;
148}
149
150static int
151vhost_dev_xstats_get_names(struct rte_eth_dev *dev,
152 struct rte_eth_xstat_name *xstats_names,
153 unsigned int limit)
154{
155 struct rte_vhost_stat_name *name;
156 struct vhost_queue *vq;
157 int ret, i, count = 0, nstats = 0;
158
159 for (i = 0; i < dev->data->nb_rx_queues; i++) {
160 vq = dev->data->rx_queues[i];
161 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, NULL, 0);
162 if (ret < 0)
163 return ret;
164
165 nstats += ret;
166 }
167
168 for (i = 0; i < dev->data->nb_tx_queues; i++) {
169 vq = dev->data->tx_queues[i];
170 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id, NULL, 0);
171 if (ret < 0)
172 return ret;
173
174 nstats += ret;
175 }
176
177 if (!xstats_names || limit < (unsigned int)nstats)
178 return nstats;
179
180 name = calloc(nstats, sizeof(*name));
181 if (!name)
182 return -1;
183
184 for (i = 0; i < dev->data->nb_rx_queues; i++) {
185 vq = dev->data->rx_queues[i];
186 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id,
187 name + count, nstats - count);
188 if (ret < 0) {
189 free(name);
190 return ret;
191 }
192
193 count += ret;
194 }
195
196 for (i = 0; i < dev->data->nb_tx_queues; i++) {
197 vq = dev->data->tx_queues[i];
198 ret = rte_vhost_vring_stats_get_names(vq->vid, vq->virtqueue_id,
199 name + count, nstats - count);
200 if (ret < 0) {
201 free(name);
202 return ret;
203 }
204
205 count += ret;
206 }
207
208 for (i = 0; i < count; i++)
209 strncpy(xstats_names[i].name, name[i].name, RTE_ETH_XSTATS_NAME_SIZE);
210
211 free(name);
212
213 return count;
214}
215
216static int
217vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
218 unsigned int n)
219{
220 struct rte_vhost_stat *stats;
221 struct vhost_queue *vq;
222 int ret, i, count = 0, nstats = 0;
223
224 for (i = 0; i < dev->data->nb_rx_queues; i++) {
225 vq = dev->data->rx_queues[i];
226 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, NULL, 0);
227 if (ret < 0)
228 return ret;
229
230 nstats += ret;
231 }
232
233 for (i = 0; i < dev->data->nb_tx_queues; i++) {
234 vq = dev->data->tx_queues[i];
235 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id, NULL, 0);
236 if (ret < 0)
237 return ret;
238
239 nstats += ret;
240 }
241
242 if (!xstats || n < (unsigned int)nstats)
243 return nstats;
244
245 stats = calloc(nstats, sizeof(*stats));
246 if (!stats)
247 return -1;
248
249 for (i = 0; i < dev->data->nb_rx_queues; i++) {
250 vq = dev->data->rx_queues[i];
251 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id,
252 stats + count, nstats - count);
253 if (ret < 0) {
254 free(stats);
255 return ret;
256 }
257
258 count += ret;
259 }
260
261 for (i = 0; i < dev->data->nb_tx_queues; i++) {
262 vq = dev->data->tx_queues[i];
263 ret = rte_vhost_vring_stats_get(vq->vid, vq->virtqueue_id,
264 stats + count, nstats - count);
265 if (ret < 0) {
266 free(stats);
267 return ret;
268 }
269
270 count += ret;
271 }
272
273 for (i = 0; i < count; i++) {
274 xstats[i].id = stats[i].id;
275 xstats[i].value = stats[i].value;
276 }
277
278 free(stats);
279
280 return nstats;
281}
282
283static void
284vhost_dev_csum_configure(struct rte_eth_dev *eth_dev)
285{
286 struct pmd_internal *internal = eth_dev->data->dev_private;
287 const struct rte_eth_rxmode *rxmode = ð_dev->data->dev_conf.rxmode;
288 const struct rte_eth_txmode *txmode = ð_dev->data->dev_conf.txmode;
289
290 internal->rx_sw_csum = false;
291 internal->tx_sw_csum = false;
292
293
294 if (!(internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS))
295 return;
296
297 if (internal->features & (1ULL << VIRTIO_NET_F_CSUM)) {
298 if (!(rxmode->offloads &
299 (RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM))) {
300 VHOST_LOG(NOTICE, "Rx csum will be done in SW, may impact performance.");
301 internal->rx_sw_csum = true;
302 }
303 }
304
305 if (!(internal->features & (1ULL << VIRTIO_NET_F_GUEST_CSUM))) {
306 if (txmode->offloads &
307 (RTE_ETH_TX_OFFLOAD_UDP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM)) {
308 VHOST_LOG(NOTICE, "Tx csum will be done in SW, may impact performance.");
309 internal->tx_sw_csum = true;
310 }
311 }
312}
313
314static void
315vhost_dev_tx_sw_csum(struct rte_mbuf *mbuf)
316{
317 uint32_t hdr_len;
318 uint16_t csum = 0, csum_offset;
319
320 switch (mbuf->ol_flags & RTE_MBUF_F_TX_L4_MASK) {
321 case RTE_MBUF_F_TX_L4_NO_CKSUM:
322 return;
323 case RTE_MBUF_F_TX_TCP_CKSUM:
324 csum_offset = offsetof(struct rte_tcp_hdr, cksum);
325 break;
326 case RTE_MBUF_F_TX_UDP_CKSUM:
327 csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum);
328 break;
329 default:
330
331 return;
332 }
333
334 hdr_len = mbuf->l2_len + mbuf->l3_len;
335 csum_offset += hdr_len;
336
337
338 if (rte_net_intel_cksum_prepare(mbuf) < 0)
339 return;
340
341 if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0)
342 return;
343
344 csum = ~csum;
345
346 if (unlikely((mbuf->packet_type & RTE_PTYPE_L4_UDP) && csum == 0))
347 csum = 0xffff;
348
349 if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
350 *rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
351
352 mbuf->ol_flags &= ~RTE_MBUF_F_TX_L4_MASK;
353 mbuf->ol_flags |= RTE_MBUF_F_TX_L4_NO_CKSUM;
354}
355
356static void
357vhost_dev_rx_sw_csum(struct rte_mbuf *mbuf)
358{
359 struct rte_net_hdr_lens hdr_lens;
360 uint32_t ptype, hdr_len;
361 uint16_t csum = 0, csum_offset;
362
363
364 if ((mbuf->ol_flags & RTE_MBUF_F_RX_L4_CKSUM_MASK) != RTE_MBUF_F_RX_L4_CKSUM_NONE)
365 return;
366
367 ptype = rte_net_get_ptype(mbuf, &hdr_lens, RTE_PTYPE_ALL_MASK);
368
369 hdr_len = hdr_lens.l2_len + hdr_lens.l3_len;
370
371 switch (ptype & RTE_PTYPE_L4_MASK) {
372 case RTE_PTYPE_L4_TCP:
373 csum_offset = offsetof(struct rte_tcp_hdr, cksum) + hdr_len;
374 break;
375 case RTE_PTYPE_L4_UDP:
376 csum_offset = offsetof(struct rte_udp_hdr, dgram_cksum) + hdr_len;
377 break;
378 default:
379
380 return;
381 }
382
383
384 if (rte_raw_cksum_mbuf(mbuf, hdr_len, rte_pktmbuf_pkt_len(mbuf) - hdr_len, &csum) < 0)
385 return;
386
387 csum = ~csum;
388
389 if (unlikely((ptype & RTE_PTYPE_L4_UDP) && csum == 0))
390 csum = 0xffff;
391
392 if (rte_pktmbuf_data_len(mbuf) >= csum_offset + 1)
393 *rte_pktmbuf_mtod_offset(mbuf, uint16_t *, csum_offset) = csum;
394
395 mbuf->ol_flags &= ~RTE_MBUF_F_RX_L4_CKSUM_MASK;
396 mbuf->ol_flags |= RTE_MBUF_F_RX_L4_CKSUM_GOOD;
397}
398
399static uint16_t
400eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
401{
402 struct vhost_queue *r = q;
403 uint16_t i, nb_rx = 0;
404 uint16_t nb_receive = nb_bufs;
405
406 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
407 return 0;
408
409 rte_atomic32_set(&r->while_queuing, 1);
410
411 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
412 goto out;
413
414
415 while (nb_receive) {
416 uint16_t nb_pkts;
417 uint16_t num = (uint16_t)RTE_MIN(nb_receive,
418 VHOST_MAX_PKT_BURST);
419
420 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id,
421 r->mb_pool, &bufs[nb_rx],
422 num);
423
424 nb_rx += nb_pkts;
425 nb_receive -= nb_pkts;
426 if (nb_pkts < num)
427 break;
428 }
429
430 r->stats.pkts += nb_rx;
431
432 for (i = 0; likely(i < nb_rx); i++) {
433 bufs[i]->port = r->port;
434 bufs[i]->vlan_tci = 0;
435
436 if (r->internal->vlan_strip)
437 rte_vlan_strip(bufs[i]);
438
439 if (r->internal->rx_sw_csum)
440 vhost_dev_rx_sw_csum(bufs[i]);
441
442 r->stats.bytes += bufs[i]->pkt_len;
443 }
444
445out:
446 rte_atomic32_set(&r->while_queuing, 0);
447
448 return nb_rx;
449}
450
451static uint16_t
452eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
453{
454 struct vhost_queue *r = q;
455 uint16_t i, nb_tx = 0;
456 uint16_t nb_send = 0;
457 uint64_t nb_bytes = 0;
458 uint64_t nb_missed = 0;
459
460 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
461 return 0;
462
463 rte_atomic32_set(&r->while_queuing, 1);
464
465 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
466 goto out;
467
468 for (i = 0; i < nb_bufs; i++) {
469 struct rte_mbuf *m = bufs[i];
470
471
472 if (m->ol_flags & RTE_MBUF_F_TX_VLAN) {
473 int error = rte_vlan_insert(&m);
474 if (unlikely(error)) {
475 rte_pktmbuf_free(m);
476 continue;
477 }
478 }
479
480 if (r->internal->tx_sw_csum)
481 vhost_dev_tx_sw_csum(m);
482
483
484 bufs[nb_send] = m;
485 ++nb_send;
486 }
487
488
489 while (nb_send) {
490 uint16_t nb_pkts;
491 uint16_t num = (uint16_t)RTE_MIN(nb_send,
492 VHOST_MAX_PKT_BURST);
493
494 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
495 &bufs[nb_tx], num);
496
497 nb_tx += nb_pkts;
498 nb_send -= nb_pkts;
499 if (nb_pkts < num)
500 break;
501 }
502
503 for (i = 0; likely(i < nb_tx); i++)
504 nb_bytes += bufs[i]->pkt_len;
505
506 nb_missed = nb_bufs - nb_tx;
507
508 r->stats.pkts += nb_tx;
509 r->stats.bytes += nb_bytes;
510 r->stats.missed_pkts += nb_missed;
511
512 for (i = 0; likely(i < nb_tx); i++)
513 rte_pktmbuf_free(bufs[i]);
514out:
515 rte_atomic32_set(&r->while_queuing, 0);
516
517 return nb_tx;
518}
519
520static inline struct internal_list *
521find_internal_resource(char *ifname)
522{
523 int found = 0;
524 struct internal_list *list;
525 struct pmd_internal *internal;
526
527 if (ifname == NULL)
528 return NULL;
529
530 pthread_mutex_lock(&internal_list_lock);
531
532 TAILQ_FOREACH(list, &internal_list, next) {
533 internal = list->eth_dev->data->dev_private;
534 if (!strcmp(internal->iface_name, ifname)) {
535 found = 1;
536 break;
537 }
538 }
539
540 pthread_mutex_unlock(&internal_list_lock);
541
542 if (!found)
543 return NULL;
544
545 return list;
546}
547
548static int
549eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
550{
551 struct rte_intr_handle *handle = eth_dev->intr_handle;
552 struct rte_epoll_event rev, *elist;
553 int epfd, ret;
554
555 if (handle == NULL)
556 return 0;
557
558 elist = rte_intr_elist_index_get(handle, rxq_idx);
559 if (rte_intr_efds_index_get(handle, rxq_idx) == elist->fd)
560 return 0;
561
562 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
563 rxq_idx);
564
565 if (elist->fd != -1)
566 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n",
567 elist->fd);
568
569
570
571
572
573
574 epfd = elist->epfd;
575 rev = *elist;
576 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
577 elist);
578 if (ret) {
579 VHOST_LOG(ERR, "Delete epoll event failed.\n");
580 return ret;
581 }
582
583 rev.fd = rte_intr_efds_index_get(handle, rxq_idx);
584 if (rte_intr_elist_index_set(handle, rxq_idx, rev))
585 return -rte_errno;
586
587 elist = rte_intr_elist_index_get(handle, rxq_idx);
588 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd, elist);
589 if (ret) {
590 VHOST_LOG(ERR, "Add epoll event failed.\n");
591 return ret;
592 }
593
594 return 0;
595}
596
597static int
598eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
599{
600 struct rte_vhost_vring vring;
601 struct vhost_queue *vq;
602 int old_intr_enable, ret = 0;
603
604 vq = dev->data->rx_queues[qid];
605 if (!vq) {
606 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
607 return -1;
608 }
609
610 rte_spinlock_lock(&vq->intr_lock);
611 old_intr_enable = vq->intr_enable;
612 vq->intr_enable = 1;
613 ret = eth_vhost_update_intr(dev, qid);
614 rte_spinlock_unlock(&vq->intr_lock);
615
616 if (ret < 0) {
617 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
618 vq->intr_enable = old_intr_enable;
619 return ret;
620 }
621
622 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
623 if (ret < 0) {
624 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
625 return ret;
626 }
627 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
628 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
629 rte_wmb();
630
631 return ret;
632}
633
634static int
635eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
636{
637 struct rte_vhost_vring vring;
638 struct vhost_queue *vq;
639 int ret = 0;
640
641 vq = dev->data->rx_queues[qid];
642 if (!vq) {
643 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
644 return -1;
645 }
646
647 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
648 if (ret < 0) {
649 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid);
650 return ret;
651 }
652 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
653 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
654 rte_wmb();
655
656 vq->intr_enable = 0;
657
658 return 0;
659}
660
661static void
662eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
663{
664 struct rte_intr_handle *intr_handle = dev->intr_handle;
665
666 if (intr_handle != NULL) {
667 rte_intr_vec_list_free(intr_handle);
668 rte_intr_instance_free(intr_handle);
669 }
670 dev->intr_handle = NULL;
671}
672
673static int
674eth_vhost_install_intr(struct rte_eth_dev *dev)
675{
676 struct rte_vhost_vring vring;
677 struct vhost_queue *vq;
678 int nb_rxq = dev->data->nb_rx_queues;
679 int i;
680 int ret;
681
682
683 if (dev->intr_handle != NULL)
684 eth_vhost_uninstall_intr(dev);
685
686 dev->intr_handle = rte_intr_instance_alloc(RTE_INTR_INSTANCE_F_PRIVATE);
687 if (dev->intr_handle == NULL) {
688 VHOST_LOG(ERR, "Fail to allocate intr_handle\n");
689 return -ENOMEM;
690 }
691 if (rte_intr_efd_counter_size_set(dev->intr_handle, sizeof(uint64_t)))
692 return -rte_errno;
693
694 if (rte_intr_vec_list_alloc(dev->intr_handle, NULL, nb_rxq)) {
695 VHOST_LOG(ERR,
696 "Failed to allocate memory for interrupt vector\n");
697 rte_intr_instance_free(dev->intr_handle);
698 return -ENOMEM;
699 }
700
701
702 VHOST_LOG(INFO, "Prepare intr vec\n");
703 for (i = 0; i < nb_rxq; i++) {
704 if (rte_intr_vec_list_index_set(dev->intr_handle, i, RTE_INTR_VEC_RXTX_OFFSET + i))
705 return -rte_errno;
706 if (rte_intr_efds_index_set(dev->intr_handle, i, -1))
707 return -rte_errno;
708 vq = dev->data->rx_queues[i];
709 if (!vq) {
710 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
711 continue;
712 }
713
714 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
715 if (ret < 0) {
716 VHOST_LOG(INFO,
717 "Failed to get rxq-%d's vring, skip!\n", i);
718 continue;
719 }
720
721 if (vring.kickfd < 0) {
722 VHOST_LOG(INFO,
723 "rxq-%d's kickfd is invalid, skip!\n", i);
724 continue;
725 }
726
727 if (rte_intr_efds_index_set(dev->intr_handle, i, vring.kickfd))
728 continue;
729 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
730 }
731
732 if (rte_intr_nb_efd_set(dev->intr_handle, nb_rxq))
733 return -rte_errno;
734
735 if (rte_intr_max_intr_set(dev->intr_handle, nb_rxq + 1))
736 return -rte_errno;
737
738 if (rte_intr_type_set(dev->intr_handle, RTE_INTR_HANDLE_VDEV))
739 return -rte_errno;
740
741 return 0;
742}
743
744static void
745update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing)
746{
747 struct pmd_internal *internal = dev->data->dev_private;
748 struct vhost_queue *vq;
749 struct rte_vhost_vring_state *state;
750 unsigned int i;
751 int allow_queuing = 1;
752
753 if (!dev->data->rx_queues || !dev->data->tx_queues)
754 return;
755
756 if (rte_atomic32_read(&internal->started) == 0 ||
757 rte_atomic32_read(&internal->dev_attached) == 0)
758 allow_queuing = 0;
759
760 state = vring_states[dev->data->port_id];
761
762
763 for (i = 0; i < dev->data->nb_rx_queues; i++) {
764 vq = dev->data->rx_queues[i];
765 if (vq == NULL)
766 continue;
767 if (allow_queuing && state->cur[vq->virtqueue_id])
768 rte_atomic32_set(&vq->allow_queuing, 1);
769 else
770 rte_atomic32_set(&vq->allow_queuing, 0);
771 while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
772 rte_pause();
773 }
774
775 for (i = 0; i < dev->data->nb_tx_queues; i++) {
776 vq = dev->data->tx_queues[i];
777 if (vq == NULL)
778 continue;
779 if (allow_queuing && state->cur[vq->virtqueue_id])
780 rte_atomic32_set(&vq->allow_queuing, 1);
781 else
782 rte_atomic32_set(&vq->allow_queuing, 0);
783 while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
784 rte_pause();
785 }
786}
787
788static void
789queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal)
790{
791 struct vhost_queue *vq;
792 int i;
793
794 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
795 vq = eth_dev->data->rx_queues[i];
796 if (!vq)
797 continue;
798 vq->vid = internal->vid;
799 vq->internal = internal;
800 vq->port = eth_dev->data->port_id;
801 }
802 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
803 vq = eth_dev->data->tx_queues[i];
804 if (!vq)
805 continue;
806 vq->vid = internal->vid;
807 vq->internal = internal;
808 vq->port = eth_dev->data->port_id;
809 }
810}
811
812static int
813new_device(int vid)
814{
815 struct rte_eth_dev *eth_dev;
816 struct internal_list *list;
817 struct pmd_internal *internal;
818 struct rte_eth_conf *dev_conf;
819 unsigned i;
820 char ifname[PATH_MAX];
821#ifdef RTE_LIBRTE_VHOST_NUMA
822 int newnode;
823#endif
824
825 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
826 list = find_internal_resource(ifname);
827 if (list == NULL) {
828 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname);
829 return -1;
830 }
831
832 eth_dev = list->eth_dev;
833 internal = eth_dev->data->dev_private;
834 dev_conf = ð_dev->data->dev_conf;
835
836#ifdef RTE_LIBRTE_VHOST_NUMA
837 newnode = rte_vhost_get_numa_node(vid);
838 if (newnode >= 0)
839 eth_dev->data->numa_node = newnode;
840#endif
841
842 if (rte_vhost_get_negotiated_features(vid, &internal->features)) {
843 VHOST_LOG(ERR, "Failed to get device features\n");
844 return -1;
845 }
846
847 internal->vid = vid;
848 if (rte_atomic32_read(&internal->started) == 1) {
849 queue_setup(eth_dev, internal);
850
851 if (dev_conf->intr_conf.rxq) {
852 if (eth_vhost_install_intr(eth_dev) < 0) {
853 VHOST_LOG(INFO,
854 "Failed to install interrupt handler.");
855 return -1;
856 }
857 }
858 } else {
859 VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
860 }
861
862 for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
863 rte_vhost_enable_guest_notification(vid, i, 0);
864
865 rte_vhost_get_mtu(vid, ð_dev->data->mtu);
866
867 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_UP;
868
869 vhost_dev_csum_configure(eth_dev);
870
871 rte_atomic32_set(&internal->dev_attached, 1);
872 update_queuing_status(eth_dev, false);
873
874 VHOST_LOG(INFO, "Vhost device %d created\n", vid);
875
876 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
877
878 return 0;
879}
880
881static void
882destroy_device(int vid)
883{
884 struct rte_eth_dev *eth_dev;
885 struct pmd_internal *internal;
886 struct vhost_queue *vq;
887 struct internal_list *list;
888 char ifname[PATH_MAX];
889 unsigned i;
890 struct rte_vhost_vring_state *state;
891
892 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
893 list = find_internal_resource(ifname);
894 if (list == NULL) {
895 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
896 return;
897 }
898 eth_dev = list->eth_dev;
899 internal = eth_dev->data->dev_private;
900
901 rte_atomic32_set(&internal->dev_attached, 0);
902 update_queuing_status(eth_dev, true);
903
904 eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
905
906 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) {
907 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
908 vq = eth_dev->data->rx_queues[i];
909 if (!vq)
910 continue;
911 vq->vid = -1;
912 }
913 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
914 vq = eth_dev->data->tx_queues[i];
915 if (!vq)
916 continue;
917 vq->vid = -1;
918 }
919 }
920
921 state = vring_states[eth_dev->data->port_id];
922 rte_spinlock_lock(&state->lock);
923 for (i = 0; i <= state->max_vring; i++) {
924 state->cur[i] = false;
925 state->seen[i] = false;
926 }
927 state->max_vring = 0;
928 rte_spinlock_unlock(&state->lock);
929
930 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
931 eth_vhost_uninstall_intr(eth_dev);
932
933 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
934}
935
936static int
937vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
938{
939 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
940 struct pmd_internal *internal = eth_dev->data->dev_private;
941 struct vhost_queue *vq;
942 struct rte_vhost_vring vring;
943 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
944 int ret = 0;
945
946
947
948
949
950 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues &&
951 rte_atomic32_read(&internal->dev_attached) &&
952 rte_atomic32_read(&internal->started) &&
953 dev_conf->intr_conf.rxq) {
954 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
955 if (ret) {
956 VHOST_LOG(ERR, "Failed to get vring %d information.\n",
957 vring_id);
958 return ret;
959 }
960
961 if (rte_intr_efds_index_set(eth_dev->intr_handle, rx_idx,
962 vring.kickfd))
963 return -rte_errno;
964
965 vq = eth_dev->data->rx_queues[rx_idx];
966 if (!vq) {
967 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
968 return -1;
969 }
970
971 rte_spinlock_lock(&vq->intr_lock);
972 if (vq->intr_enable)
973 ret = eth_vhost_update_intr(eth_dev, rx_idx);
974 rte_spinlock_unlock(&vq->intr_lock);
975 }
976
977 return ret;
978}
979
980static int
981vring_state_changed(int vid, uint16_t vring, int enable)
982{
983 struct rte_vhost_vring_state *state;
984 struct rte_eth_dev *eth_dev;
985 struct internal_list *list;
986 char ifname[PATH_MAX];
987
988 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
989 list = find_internal_resource(ifname);
990 if (list == NULL) {
991 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
992 return -1;
993 }
994
995 eth_dev = list->eth_dev;
996
997 state = vring_states[eth_dev->data->port_id];
998
999 if (enable && vring_conf_update(vid, eth_dev, vring))
1000 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n",
1001 (int)vring);
1002
1003 rte_spinlock_lock(&state->lock);
1004 if (state->cur[vring] == enable) {
1005 rte_spinlock_unlock(&state->lock);
1006 return 0;
1007 }
1008 state->cur[vring] = enable;
1009 state->max_vring = RTE_MAX(vring, state->max_vring);
1010 rte_spinlock_unlock(&state->lock);
1011
1012 update_queuing_status(eth_dev, false);
1013
1014 VHOST_LOG(INFO, "vring%u is %s\n",
1015 vring, enable ? "enabled" : "disabled");
1016
1017 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
1018
1019 return 0;
1020}
1021
1022static struct rte_vhost_device_ops vhost_ops = {
1023 .new_device = new_device,
1024 .destroy_device = destroy_device,
1025 .vring_state_changed = vring_state_changed,
1026};
1027
1028static int
1029vhost_driver_setup(struct rte_eth_dev *eth_dev)
1030{
1031 struct pmd_internal *internal = eth_dev->data->dev_private;
1032 struct internal_list *list = NULL;
1033 struct rte_vhost_vring_state *vring_state = NULL;
1034 unsigned int numa_node = eth_dev->device->numa_node;
1035 const char *name = eth_dev->device->name;
1036
1037
1038 list = find_internal_resource(internal->iface_name);
1039 if (list)
1040 return 0;
1041
1042 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
1043 if (list == NULL)
1044 return -1;
1045
1046 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state),
1047 0, numa_node);
1048 if (vring_state == NULL)
1049 goto free_list;
1050
1051 list->eth_dev = eth_dev;
1052 pthread_mutex_lock(&internal_list_lock);
1053 TAILQ_INSERT_TAIL(&internal_list, list, next);
1054 pthread_mutex_unlock(&internal_list_lock);
1055
1056 rte_spinlock_init(&vring_state->lock);
1057 vring_states[eth_dev->data->port_id] = vring_state;
1058
1059 if (rte_vhost_driver_register(internal->iface_name, internal->flags))
1060 goto list_remove;
1061
1062 if (internal->disable_flags) {
1063 if (rte_vhost_driver_disable_features(internal->iface_name,
1064 internal->disable_flags))
1065 goto drv_unreg;
1066 }
1067
1068 if (rte_vhost_driver_callback_register(internal->iface_name,
1069 &vhost_ops) < 0) {
1070 VHOST_LOG(ERR, "Can't register callbacks\n");
1071 goto drv_unreg;
1072 }
1073
1074 if (rte_vhost_driver_start(internal->iface_name) < 0) {
1075 VHOST_LOG(ERR, "Failed to start driver for %s\n",
1076 internal->iface_name);
1077 goto drv_unreg;
1078 }
1079
1080 return 0;
1081
1082drv_unreg:
1083 rte_vhost_driver_unregister(internal->iface_name);
1084list_remove:
1085 vring_states[eth_dev->data->port_id] = NULL;
1086 pthread_mutex_lock(&internal_list_lock);
1087 TAILQ_REMOVE(&internal_list, list, next);
1088 pthread_mutex_unlock(&internal_list_lock);
1089 rte_free(vring_state);
1090free_list:
1091 rte_free(list);
1092
1093 return -1;
1094}
1095
1096int
1097rte_eth_vhost_get_queue_event(uint16_t port_id,
1098 struct rte_eth_vhost_queue_event *event)
1099{
1100 struct rte_vhost_vring_state *state;
1101 unsigned int i;
1102 int idx;
1103
1104 if (port_id >= RTE_MAX_ETHPORTS) {
1105 VHOST_LOG(ERR, "Invalid port id\n");
1106 return -1;
1107 }
1108
1109 state = vring_states[port_id];
1110 if (!state) {
1111 VHOST_LOG(ERR, "Unused port\n");
1112 return -1;
1113 }
1114
1115 rte_spinlock_lock(&state->lock);
1116 for (i = 0; i <= state->max_vring; i++) {
1117 idx = state->index++ % (state->max_vring + 1);
1118
1119 if (state->cur[idx] != state->seen[idx]) {
1120 state->seen[idx] = state->cur[idx];
1121 event->queue_id = idx / 2;
1122 event->rx = idx & 1;
1123 event->enable = state->cur[idx];
1124 rte_spinlock_unlock(&state->lock);
1125 return 0;
1126 }
1127 }
1128 rte_spinlock_unlock(&state->lock);
1129
1130 return -1;
1131}
1132
1133int
1134rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
1135{
1136 struct internal_list *list;
1137 struct rte_eth_dev *eth_dev;
1138 struct vhost_queue *vq;
1139 int vid = -1;
1140
1141 if (!rte_eth_dev_is_valid_port(port_id))
1142 return -1;
1143
1144 pthread_mutex_lock(&internal_list_lock);
1145
1146 TAILQ_FOREACH(list, &internal_list, next) {
1147 eth_dev = list->eth_dev;
1148 if (eth_dev->data->port_id == port_id) {
1149 vq = eth_dev->data->rx_queues[0];
1150 if (vq) {
1151 vid = vq->vid;
1152 }
1153 break;
1154 }
1155 }
1156
1157 pthread_mutex_unlock(&internal_list_lock);
1158
1159 return vid;
1160}
1161
1162static int
1163eth_dev_configure(struct rte_eth_dev *dev)
1164{
1165 struct pmd_internal *internal = dev->data->dev_private;
1166 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1167
1168
1169
1170
1171
1172 if (vhost_driver_setup(dev) < 0)
1173 return -1;
1174
1175 internal->vlan_strip = !!(rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_STRIP);
1176
1177 vhost_dev_csum_configure(dev);
1178
1179 return 0;
1180}
1181
1182static int
1183eth_dev_start(struct rte_eth_dev *eth_dev)
1184{
1185 struct pmd_internal *internal = eth_dev->data->dev_private;
1186 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
1187
1188 queue_setup(eth_dev, internal);
1189
1190 if (rte_atomic32_read(&internal->dev_attached) == 1) {
1191 if (dev_conf->intr_conf.rxq) {
1192 if (eth_vhost_install_intr(eth_dev) < 0) {
1193 VHOST_LOG(INFO,
1194 "Failed to install interrupt handler.");
1195 return -1;
1196 }
1197 }
1198 }
1199
1200 rte_atomic32_set(&internal->started, 1);
1201 update_queuing_status(eth_dev, false);
1202
1203 return 0;
1204}
1205
1206static int
1207eth_dev_stop(struct rte_eth_dev *dev)
1208{
1209 struct pmd_internal *internal = dev->data->dev_private;
1210
1211 dev->data->dev_started = 0;
1212 rte_atomic32_set(&internal->started, 0);
1213 update_queuing_status(dev, true);
1214
1215 return 0;
1216}
1217
1218static int
1219eth_dev_close(struct rte_eth_dev *dev)
1220{
1221 struct pmd_internal *internal;
1222 struct internal_list *list;
1223 unsigned int i, ret;
1224
1225 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1226 return 0;
1227
1228 internal = dev->data->dev_private;
1229 if (!internal)
1230 return 0;
1231
1232 ret = eth_dev_stop(dev);
1233
1234 list = find_internal_resource(internal->iface_name);
1235 if (list) {
1236 rte_vhost_driver_unregister(internal->iface_name);
1237 pthread_mutex_lock(&internal_list_lock);
1238 TAILQ_REMOVE(&internal_list, list, next);
1239 pthread_mutex_unlock(&internal_list_lock);
1240 rte_free(list);
1241 }
1242
1243 if (dev->data->rx_queues)
1244 for (i = 0; i < dev->data->nb_rx_queues; i++)
1245 rte_free(dev->data->rx_queues[i]);
1246
1247 if (dev->data->tx_queues)
1248 for (i = 0; i < dev->data->nb_tx_queues; i++)
1249 rte_free(dev->data->tx_queues[i]);
1250
1251 rte_free(internal->iface_name);
1252 rte_free(internal);
1253
1254 dev->data->dev_private = NULL;
1255
1256 rte_free(vring_states[dev->data->port_id]);
1257 vring_states[dev->data->port_id] = NULL;
1258
1259 return ret;
1260}
1261
1262static int
1263eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1264 uint16_t nb_rx_desc __rte_unused,
1265 unsigned int socket_id,
1266 const struct rte_eth_rxconf *rx_conf __rte_unused,
1267 struct rte_mempool *mb_pool)
1268{
1269 struct vhost_queue *vq;
1270
1271 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1272 RTE_CACHE_LINE_SIZE, socket_id);
1273 if (vq == NULL) {
1274 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n");
1275 return -ENOMEM;
1276 }
1277
1278 vq->mb_pool = mb_pool;
1279 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
1280 rte_spinlock_init(&vq->intr_lock);
1281 dev->data->rx_queues[rx_queue_id] = vq;
1282
1283 return 0;
1284}
1285
1286static int
1287eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1288 uint16_t nb_tx_desc __rte_unused,
1289 unsigned int socket_id,
1290 const struct rte_eth_txconf *tx_conf __rte_unused)
1291{
1292 struct vhost_queue *vq;
1293
1294 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1295 RTE_CACHE_LINE_SIZE, socket_id);
1296 if (vq == NULL) {
1297 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n");
1298 return -ENOMEM;
1299 }
1300
1301 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
1302 rte_spinlock_init(&vq->intr_lock);
1303 dev->data->tx_queues[tx_queue_id] = vq;
1304
1305 return 0;
1306}
1307
1308static int
1309eth_dev_info(struct rte_eth_dev *dev,
1310 struct rte_eth_dev_info *dev_info)
1311{
1312 struct pmd_internal *internal;
1313
1314 internal = dev->data->dev_private;
1315 if (internal == NULL) {
1316 VHOST_LOG(ERR, "Invalid device specified\n");
1317 return -ENODEV;
1318 }
1319
1320 dev_info->max_mac_addrs = 1;
1321 dev_info->max_rx_pktlen = (uint32_t)-1;
1322 dev_info->max_rx_queues = internal->max_queues;
1323 dev_info->max_tx_queues = internal->max_queues;
1324 dev_info->min_rx_bufsize = 0;
1325
1326 dev_info->tx_offload_capa = RTE_ETH_TX_OFFLOAD_MULTI_SEGS |
1327 RTE_ETH_TX_OFFLOAD_VLAN_INSERT;
1328 if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
1329 dev_info->tx_offload_capa |= RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
1330 RTE_ETH_TX_OFFLOAD_TCP_CKSUM;
1331 }
1332
1333 dev_info->rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP;
1334 if (internal->flags & RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS) {
1335 dev_info->rx_offload_capa |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
1336 RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
1337 }
1338
1339 return 0;
1340}
1341
1342static int
1343eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1344{
1345 unsigned i;
1346 unsigned long rx_total = 0, tx_total = 0;
1347 unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
1348 struct vhost_queue *vq;
1349
1350 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1351 i < dev->data->nb_rx_queues; i++) {
1352 if (dev->data->rx_queues[i] == NULL)
1353 continue;
1354 vq = dev->data->rx_queues[i];
1355 stats->q_ipackets[i] = vq->stats.pkts;
1356 rx_total += stats->q_ipackets[i];
1357
1358 stats->q_ibytes[i] = vq->stats.bytes;
1359 rx_total_bytes += stats->q_ibytes[i];
1360 }
1361
1362 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1363 i < dev->data->nb_tx_queues; i++) {
1364 if (dev->data->tx_queues[i] == NULL)
1365 continue;
1366 vq = dev->data->tx_queues[i];
1367 stats->q_opackets[i] = vq->stats.pkts;
1368 tx_total += stats->q_opackets[i];
1369
1370 stats->q_obytes[i] = vq->stats.bytes;
1371 tx_total_bytes += stats->q_obytes[i];
1372 }
1373
1374 stats->ipackets = rx_total;
1375 stats->opackets = tx_total;
1376 stats->ibytes = rx_total_bytes;
1377 stats->obytes = tx_total_bytes;
1378
1379 return 0;
1380}
1381
1382static int
1383eth_stats_reset(struct rte_eth_dev *dev)
1384{
1385 struct vhost_queue *vq;
1386 unsigned i;
1387
1388 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1389 if (dev->data->rx_queues[i] == NULL)
1390 continue;
1391 vq = dev->data->rx_queues[i];
1392 vq->stats.pkts = 0;
1393 vq->stats.bytes = 0;
1394 }
1395 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1396 if (dev->data->tx_queues[i] == NULL)
1397 continue;
1398 vq = dev->data->tx_queues[i];
1399 vq->stats.pkts = 0;
1400 vq->stats.bytes = 0;
1401 vq->stats.missed_pkts = 0;
1402 }
1403
1404 return 0;
1405}
1406
1407static void
1408eth_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1409{
1410 rte_free(dev->data->rx_queues[qid]);
1411}
1412
1413static void
1414eth_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1415{
1416 rte_free(dev->data->tx_queues[qid]);
1417}
1418
1419static int
1420eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused)
1421{
1422
1423
1424
1425
1426 return 0;
1427}
1428
1429static int
1430eth_link_update(struct rte_eth_dev *dev __rte_unused,
1431 int wait_to_complete __rte_unused)
1432{
1433 return 0;
1434}
1435
1436static uint32_t
1437eth_rx_queue_count(void *rx_queue)
1438{
1439 struct vhost_queue *vq;
1440
1441 vq = rx_queue;
1442 if (vq == NULL)
1443 return 0;
1444
1445 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id);
1446}
1447
1448#define CLB_VAL_IDX 0
1449#define CLB_MSK_IDX 1
1450#define CLB_MATCH_IDX 2
1451static int
1452vhost_monitor_callback(const uint64_t value,
1453 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ])
1454{
1455 const uint64_t m = opaque[CLB_MSK_IDX];
1456 const uint64_t v = opaque[CLB_VAL_IDX];
1457 const uint64_t c = opaque[CLB_MATCH_IDX];
1458
1459 if (c)
1460 return (value & m) == v ? -1 : 0;
1461 else
1462 return (value & m) == v ? 0 : -1;
1463}
1464
1465static int
1466vhost_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
1467{
1468 struct vhost_queue *vq = rx_queue;
1469 struct rte_vhost_power_monitor_cond vhost_pmc;
1470 int ret;
1471 if (vq == NULL)
1472 return -EINVAL;
1473 ret = rte_vhost_get_monitor_addr(vq->vid, vq->virtqueue_id,
1474 &vhost_pmc);
1475 if (ret < 0)
1476 return -EINVAL;
1477 pmc->addr = vhost_pmc.addr;
1478 pmc->opaque[CLB_VAL_IDX] = vhost_pmc.val;
1479 pmc->opaque[CLB_MSK_IDX] = vhost_pmc.mask;
1480 pmc->opaque[CLB_MATCH_IDX] = vhost_pmc.match;
1481 pmc->size = vhost_pmc.size;
1482 pmc->fn = vhost_monitor_callback;
1483
1484 return 0;
1485}
1486
1487static const struct eth_dev_ops ops = {
1488 .dev_start = eth_dev_start,
1489 .dev_stop = eth_dev_stop,
1490 .dev_close = eth_dev_close,
1491 .dev_configure = eth_dev_configure,
1492 .dev_infos_get = eth_dev_info,
1493 .rx_queue_setup = eth_rx_queue_setup,
1494 .tx_queue_setup = eth_tx_queue_setup,
1495 .rx_queue_release = eth_rx_queue_release,
1496 .tx_queue_release = eth_tx_queue_release,
1497 .tx_done_cleanup = eth_tx_done_cleanup,
1498 .link_update = eth_link_update,
1499 .stats_get = eth_stats_get,
1500 .stats_reset = eth_stats_reset,
1501 .xstats_reset = vhost_dev_xstats_reset,
1502 .xstats_get = vhost_dev_xstats_get,
1503 .xstats_get_names = vhost_dev_xstats_get_names,
1504 .rx_queue_intr_enable = eth_rxq_intr_enable,
1505 .rx_queue_intr_disable = eth_rxq_intr_disable,
1506 .get_monitor_addr = vhost_get_monitor_addr,
1507};
1508
1509static int
1510eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
1511 int16_t queues, const unsigned int numa_node, uint64_t flags,
1512 uint64_t disable_flags)
1513{
1514 const char *name = rte_vdev_device_name(dev);
1515 struct rte_eth_dev_data *data;
1516 struct pmd_internal *internal = NULL;
1517 struct rte_eth_dev *eth_dev = NULL;
1518 struct rte_ether_addr *eth_addr = NULL;
1519
1520 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n",
1521 numa_node);
1522
1523
1524 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
1525 if (eth_dev == NULL)
1526 goto error;
1527 data = eth_dev->data;
1528
1529 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
1530 if (eth_addr == NULL)
1531 goto error;
1532 data->mac_addrs = eth_addr;
1533 *eth_addr = base_eth_addr;
1534 eth_addr->addr_bytes[5] = eth_dev->data->port_id;
1535
1536
1537
1538
1539
1540
1541 internal = eth_dev->data->dev_private;
1542 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1,
1543 0, numa_node);
1544 if (internal->iface_name == NULL)
1545 goto error;
1546 strcpy(internal->iface_name, iface_name);
1547
1548 data->nb_rx_queues = queues;
1549 data->nb_tx_queues = queues;
1550 internal->max_queues = queues;
1551 internal->vid = -1;
1552 internal->flags = flags;
1553 internal->disable_flags = disable_flags;
1554 data->dev_link = pmd_link;
1555 data->dev_flags = RTE_ETH_DEV_INTR_LSC |
1556 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1557 data->promiscuous = 1;
1558 data->all_multicast = 1;
1559
1560 eth_dev->dev_ops = &ops;
1561 eth_dev->rx_queue_count = eth_rx_queue_count;
1562
1563
1564 eth_dev->rx_pkt_burst = eth_vhost_rx;
1565 eth_dev->tx_pkt_burst = eth_vhost_tx;
1566
1567 rte_eth_dev_probing_finish(eth_dev);
1568 return 0;
1569
1570error:
1571 if (internal)
1572 rte_free(internal->iface_name);
1573 rte_eth_dev_release_port(eth_dev);
1574
1575 return -1;
1576}
1577
1578static inline int
1579open_iface(const char *key __rte_unused, const char *value, void *extra_args)
1580{
1581 const char **iface_name = extra_args;
1582
1583 if (value == NULL)
1584 return -1;
1585
1586 *iface_name = value;
1587
1588 return 0;
1589}
1590
1591static inline int
1592open_int(const char *key __rte_unused, const char *value, void *extra_args)
1593{
1594 uint16_t *n = extra_args;
1595
1596 if (value == NULL || extra_args == NULL)
1597 return -EINVAL;
1598
1599 *n = (uint16_t)strtoul(value, NULL, 0);
1600 if (*n == USHRT_MAX && errno == ERANGE)
1601 return -1;
1602
1603 return 0;
1604}
1605
1606static int
1607rte_pmd_vhost_probe(struct rte_vdev_device *dev)
1608{
1609 struct rte_kvargs *kvlist = NULL;
1610 int ret = 0;
1611 char *iface_name;
1612 uint16_t queues;
1613 uint64_t flags = RTE_VHOST_USER_NET_STATS_ENABLE;
1614 uint64_t disable_flags = 0;
1615 int client_mode = 0;
1616 int iommu_support = 0;
1617 int postcopy_support = 0;
1618 int tso = 0;
1619 int linear_buf = 0;
1620 int ext_buf = 0;
1621 int legacy_ol_flags = 0;
1622 struct rte_eth_dev *eth_dev;
1623 const char *name = rte_vdev_device_name(dev);
1624
1625 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name);
1626
1627 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1628 eth_dev = rte_eth_dev_attach_secondary(name);
1629 if (!eth_dev) {
1630 VHOST_LOG(ERR, "Failed to probe %s\n", name);
1631 return -1;
1632 }
1633 eth_dev->rx_pkt_burst = eth_vhost_rx;
1634 eth_dev->tx_pkt_burst = eth_vhost_tx;
1635 eth_dev->dev_ops = &ops;
1636 if (dev->device.numa_node == SOCKET_ID_ANY)
1637 dev->device.numa_node = rte_socket_id();
1638 eth_dev->device = &dev->device;
1639 rte_eth_dev_probing_finish(eth_dev);
1640 return 0;
1641 }
1642
1643 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1644 if (kvlist == NULL)
1645 return -1;
1646
1647 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
1648 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
1649 &open_iface, &iface_name);
1650 if (ret < 0)
1651 goto out_free;
1652 } else {
1653 ret = -1;
1654 goto out_free;
1655 }
1656
1657 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
1658 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
1659 &open_int, &queues);
1660 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
1661 goto out_free;
1662
1663 } else
1664 queues = 1;
1665
1666 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
1667 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
1668 &open_int, &client_mode);
1669 if (ret < 0)
1670 goto out_free;
1671
1672 if (client_mode)
1673 flags |= RTE_VHOST_USER_CLIENT;
1674 }
1675
1676 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
1677 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
1678 &open_int, &iommu_support);
1679 if (ret < 0)
1680 goto out_free;
1681
1682 if (iommu_support)
1683 flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
1684 }
1685
1686 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
1687 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
1688 &open_int, &postcopy_support);
1689 if (ret < 0)
1690 goto out_free;
1691
1692 if (postcopy_support)
1693 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
1694 }
1695
1696 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) {
1697 ret = rte_kvargs_process(kvlist,
1698 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
1699 &open_int, &tso);
1700 if (ret < 0)
1701 goto out_free;
1702 }
1703
1704 if (tso == 0) {
1705 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
1706 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
1707 }
1708
1709 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) {
1710 ret = rte_kvargs_process(kvlist,
1711 ETH_VHOST_LINEAR_BUF,
1712 &open_int, &linear_buf);
1713 if (ret < 0)
1714 goto out_free;
1715
1716 if (linear_buf == 1)
1717 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT;
1718 }
1719
1720 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) {
1721 ret = rte_kvargs_process(kvlist,
1722 ETH_VHOST_EXT_BUF,
1723 &open_int, &ext_buf);
1724 if (ret < 0)
1725 goto out_free;
1726
1727 if (ext_buf == 1)
1728 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
1729 }
1730
1731 if (rte_kvargs_count(kvlist, ETH_VHOST_LEGACY_OL_FLAGS) == 1) {
1732 ret = rte_kvargs_process(kvlist,
1733 ETH_VHOST_LEGACY_OL_FLAGS,
1734 &open_int, &legacy_ol_flags);
1735 if (ret < 0)
1736 goto out_free;
1737 }
1738
1739 if (legacy_ol_flags == 0)
1740 flags |= RTE_VHOST_USER_NET_COMPLIANT_OL_FLAGS;
1741
1742 if (dev->device.numa_node == SOCKET_ID_ANY)
1743 dev->device.numa_node = rte_socket_id();
1744
1745 ret = eth_dev_vhost_create(dev, iface_name, queues,
1746 dev->device.numa_node, flags, disable_flags);
1747 if (ret == -1)
1748 VHOST_LOG(ERR, "Failed to create %s\n", name);
1749
1750out_free:
1751 rte_kvargs_free(kvlist);
1752 return ret;
1753}
1754
1755static int
1756rte_pmd_vhost_remove(struct rte_vdev_device *dev)
1757{
1758 const char *name;
1759 struct rte_eth_dev *eth_dev = NULL;
1760
1761 name = rte_vdev_device_name(dev);
1762 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name);
1763
1764
1765 eth_dev = rte_eth_dev_allocated(name);
1766 if (eth_dev == NULL)
1767 return 0;
1768
1769 eth_dev_close(eth_dev);
1770 rte_eth_dev_release_port(eth_dev);
1771
1772 return 0;
1773}
1774
1775static struct rte_vdev_driver pmd_vhost_drv = {
1776 .probe = rte_pmd_vhost_probe,
1777 .remove = rte_pmd_vhost_remove,
1778};
1779
1780RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv);
1781RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost);
1782RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
1783 "iface=<ifc> "
1784 "queues=<int> "
1785 "client=<0|1> "
1786 "iommu-support=<0|1> "
1787 "postcopy-support=<0|1> "
1788 "tso=<0|1> "
1789 "linear-buffer=<0|1> "
1790 "ext-buffer=<0|1>");
1791