1
2
3
4
5#include <unistd.h>
6#include <pthread.h>
7#include <stdbool.h>
8#include <sys/epoll.h>
9
10#include <rte_mbuf.h>
11#include <ethdev_driver.h>
12#include <ethdev_vdev.h>
13#include <rte_malloc.h>
14#include <rte_memcpy.h>
15#include <rte_bus_vdev.h>
16#include <rte_kvargs.h>
17#include <rte_vhost.h>
18#include <rte_spinlock.h>
19
20#include "rte_eth_vhost.h"
21
22RTE_LOG_REGISTER_DEFAULT(vhost_logtype, NOTICE);
23
24#define VHOST_LOG(level, ...) \
25 rte_log(RTE_LOG_ ## level, vhost_logtype, __VA_ARGS__)
26
27enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
28
29#define ETH_VHOST_IFACE_ARG "iface"
30#define ETH_VHOST_QUEUES_ARG "queues"
31#define ETH_VHOST_CLIENT_ARG "client"
32#define ETH_VHOST_IOMMU_SUPPORT "iommu-support"
33#define ETH_VHOST_POSTCOPY_SUPPORT "postcopy-support"
34#define ETH_VHOST_VIRTIO_NET_F_HOST_TSO "tso"
35#define ETH_VHOST_LINEAR_BUF "linear-buffer"
36#define ETH_VHOST_EXT_BUF "ext-buffer"
37#define VHOST_MAX_PKT_BURST 32
38
39static const char *valid_arguments[] = {
40 ETH_VHOST_IFACE_ARG,
41 ETH_VHOST_QUEUES_ARG,
42 ETH_VHOST_CLIENT_ARG,
43 ETH_VHOST_IOMMU_SUPPORT,
44 ETH_VHOST_POSTCOPY_SUPPORT,
45 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
46 ETH_VHOST_LINEAR_BUF,
47 ETH_VHOST_EXT_BUF,
48 NULL
49};
50
51static struct rte_ether_addr base_eth_addr = {
52 .addr_bytes = {
53 0x56 ,
54 0x48 ,
55 0x4F ,
56 0x53 ,
57 0x54 ,
58 0x00
59 }
60};
61
62enum vhost_xstats_pkts {
63 VHOST_UNDERSIZE_PKT = 0,
64 VHOST_64_PKT,
65 VHOST_65_TO_127_PKT,
66 VHOST_128_TO_255_PKT,
67 VHOST_256_TO_511_PKT,
68 VHOST_512_TO_1023_PKT,
69 VHOST_1024_TO_1522_PKT,
70 VHOST_1523_TO_MAX_PKT,
71 VHOST_BROADCAST_PKT,
72 VHOST_MULTICAST_PKT,
73 VHOST_UNICAST_PKT,
74 VHOST_PKT,
75 VHOST_BYTE,
76 VHOST_MISSED_PKT,
77 VHOST_ERRORS_PKT,
78 VHOST_ERRORS_FRAGMENTED,
79 VHOST_ERRORS_JABBER,
80 VHOST_UNKNOWN_PROTOCOL,
81 VHOST_XSTATS_MAX,
82};
83
84struct vhost_stats {
85 uint64_t pkts;
86 uint64_t bytes;
87 uint64_t missed_pkts;
88 uint64_t xstats[VHOST_XSTATS_MAX];
89};
90
91struct vhost_queue {
92 int vid;
93 rte_atomic32_t allow_queuing;
94 rte_atomic32_t while_queuing;
95 struct pmd_internal *internal;
96 struct rte_mempool *mb_pool;
97 uint16_t port;
98 uint16_t virtqueue_id;
99 struct vhost_stats stats;
100 int intr_enable;
101 rte_spinlock_t intr_lock;
102};
103
104struct pmd_internal {
105 rte_atomic32_t dev_attached;
106 char *iface_name;
107 uint64_t flags;
108 uint64_t disable_flags;
109 uint16_t max_queues;
110 int vid;
111 rte_atomic32_t started;
112 uint8_t vlan_strip;
113};
114
115struct internal_list {
116 TAILQ_ENTRY(internal_list) next;
117 struct rte_eth_dev *eth_dev;
118};
119
120TAILQ_HEAD(internal_list_head, internal_list);
121static struct internal_list_head internal_list =
122 TAILQ_HEAD_INITIALIZER(internal_list);
123
124static pthread_mutex_t internal_list_lock = PTHREAD_MUTEX_INITIALIZER;
125
126static struct rte_eth_link pmd_link = {
127 .link_speed = 10000,
128 .link_duplex = ETH_LINK_FULL_DUPLEX,
129 .link_status = ETH_LINK_DOWN
130};
131
132struct rte_vhost_vring_state {
133 rte_spinlock_t lock;
134
135 bool cur[RTE_MAX_QUEUES_PER_PORT * 2];
136 bool seen[RTE_MAX_QUEUES_PER_PORT * 2];
137 unsigned int index;
138 unsigned int max_vring;
139};
140
141static struct rte_vhost_vring_state *vring_states[RTE_MAX_ETHPORTS];
142
143#define VHOST_XSTATS_NAME_SIZE 64
144
145struct vhost_xstats_name_off {
146 char name[VHOST_XSTATS_NAME_SIZE];
147 uint64_t offset;
148};
149
150
151static const struct vhost_xstats_name_off vhost_rxport_stat_strings[] = {
152 {"good_packets",
153 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
154 {"total_bytes",
155 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
156 {"missed_pkts",
157 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
158 {"broadcast_packets",
159 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
160 {"multicast_packets",
161 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
162 {"unicast_packets",
163 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
164 {"undersize_packets",
165 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
166 {"size_64_packets",
167 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
168 {"size_65_to_127_packets",
169 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
170 {"size_128_to_255_packets",
171 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
172 {"size_256_to_511_packets",
173 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
174 {"size_512_to_1023_packets",
175 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
176 {"size_1024_to_1522_packets",
177 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
178 {"size_1523_to_max_packets",
179 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
180 {"errors_with_bad_CRC",
181 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
182 {"fragmented_errors",
183 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_FRAGMENTED])},
184 {"jabber_errors",
185 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_JABBER])},
186 {"unknown_protos_packets",
187 offsetof(struct vhost_queue, stats.xstats[VHOST_UNKNOWN_PROTOCOL])},
188};
189
190
191static const struct vhost_xstats_name_off vhost_txport_stat_strings[] = {
192 {"good_packets",
193 offsetof(struct vhost_queue, stats.xstats[VHOST_PKT])},
194 {"total_bytes",
195 offsetof(struct vhost_queue, stats.xstats[VHOST_BYTE])},
196 {"missed_pkts",
197 offsetof(struct vhost_queue, stats.xstats[VHOST_MISSED_PKT])},
198 {"broadcast_packets",
199 offsetof(struct vhost_queue, stats.xstats[VHOST_BROADCAST_PKT])},
200 {"multicast_packets",
201 offsetof(struct vhost_queue, stats.xstats[VHOST_MULTICAST_PKT])},
202 {"unicast_packets",
203 offsetof(struct vhost_queue, stats.xstats[VHOST_UNICAST_PKT])},
204 {"undersize_packets",
205 offsetof(struct vhost_queue, stats.xstats[VHOST_UNDERSIZE_PKT])},
206 {"size_64_packets",
207 offsetof(struct vhost_queue, stats.xstats[VHOST_64_PKT])},
208 {"size_65_to_127_packets",
209 offsetof(struct vhost_queue, stats.xstats[VHOST_65_TO_127_PKT])},
210 {"size_128_to_255_packets",
211 offsetof(struct vhost_queue, stats.xstats[VHOST_128_TO_255_PKT])},
212 {"size_256_to_511_packets",
213 offsetof(struct vhost_queue, stats.xstats[VHOST_256_TO_511_PKT])},
214 {"size_512_to_1023_packets",
215 offsetof(struct vhost_queue, stats.xstats[VHOST_512_TO_1023_PKT])},
216 {"size_1024_to_1522_packets",
217 offsetof(struct vhost_queue, stats.xstats[VHOST_1024_TO_1522_PKT])},
218 {"size_1523_to_max_packets",
219 offsetof(struct vhost_queue, stats.xstats[VHOST_1523_TO_MAX_PKT])},
220 {"errors_with_bad_CRC",
221 offsetof(struct vhost_queue, stats.xstats[VHOST_ERRORS_PKT])},
222};
223
224#define VHOST_NB_XSTATS_RXPORT (sizeof(vhost_rxport_stat_strings) / \
225 sizeof(vhost_rxport_stat_strings[0]))
226
227#define VHOST_NB_XSTATS_TXPORT (sizeof(vhost_txport_stat_strings) / \
228 sizeof(vhost_txport_stat_strings[0]))
229
230static int
231vhost_dev_xstats_reset(struct rte_eth_dev *dev)
232{
233 struct vhost_queue *vq = NULL;
234 unsigned int i = 0;
235
236 for (i = 0; i < dev->data->nb_rx_queues; i++) {
237 vq = dev->data->rx_queues[i];
238 if (!vq)
239 continue;
240 memset(&vq->stats, 0, sizeof(vq->stats));
241 }
242 for (i = 0; i < dev->data->nb_tx_queues; i++) {
243 vq = dev->data->tx_queues[i];
244 if (!vq)
245 continue;
246 memset(&vq->stats, 0, sizeof(vq->stats));
247 }
248
249 return 0;
250}
251
252static int
253vhost_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
254 struct rte_eth_xstat_name *xstats_names,
255 unsigned int limit __rte_unused)
256{
257 unsigned int t = 0;
258 int count = 0;
259 int nstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
260
261 if (!xstats_names)
262 return nstats;
263 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
264 snprintf(xstats_names[count].name,
265 sizeof(xstats_names[count].name),
266 "rx_%s", vhost_rxport_stat_strings[t].name);
267 count++;
268 }
269 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
270 snprintf(xstats_names[count].name,
271 sizeof(xstats_names[count].name),
272 "tx_%s", vhost_txport_stat_strings[t].name);
273 count++;
274 }
275 return count;
276}
277
278static int
279vhost_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
280 unsigned int n)
281{
282 unsigned int i;
283 unsigned int t;
284 unsigned int count = 0;
285 struct vhost_queue *vq = NULL;
286 unsigned int nxstats = VHOST_NB_XSTATS_RXPORT + VHOST_NB_XSTATS_TXPORT;
287
288 if (n < nxstats)
289 return nxstats;
290
291 for (t = 0; t < VHOST_NB_XSTATS_RXPORT; t++) {
292 xstats[count].value = 0;
293 for (i = 0; i < dev->data->nb_rx_queues; i++) {
294 vq = dev->data->rx_queues[i];
295 if (!vq)
296 continue;
297 xstats[count].value +=
298 *(uint64_t *)(((char *)vq)
299 + vhost_rxport_stat_strings[t].offset);
300 }
301 xstats[count].id = count;
302 count++;
303 }
304 for (t = 0; t < VHOST_NB_XSTATS_TXPORT; t++) {
305 xstats[count].value = 0;
306 for (i = 0; i < dev->data->nb_tx_queues; i++) {
307 vq = dev->data->tx_queues[i];
308 if (!vq)
309 continue;
310 xstats[count].value +=
311 *(uint64_t *)(((char *)vq)
312 + vhost_txport_stat_strings[t].offset);
313 }
314 xstats[count].id = count;
315 count++;
316 }
317 return count;
318}
319
320static inline void
321vhost_count_xcast_packets(struct vhost_queue *vq,
322 struct rte_mbuf *mbuf)
323{
324 struct rte_ether_addr *ea = NULL;
325 struct vhost_stats *pstats = &vq->stats;
326
327 ea = rte_pktmbuf_mtod(mbuf, struct rte_ether_addr *);
328 if (rte_is_multicast_ether_addr(ea)) {
329 if (rte_is_broadcast_ether_addr(ea))
330 pstats->xstats[VHOST_BROADCAST_PKT]++;
331 else
332 pstats->xstats[VHOST_MULTICAST_PKT]++;
333 } else {
334 pstats->xstats[VHOST_UNICAST_PKT]++;
335 }
336}
337
338static void
339vhost_update_packet_xstats(struct vhost_queue *vq, struct rte_mbuf **bufs,
340 uint16_t count, uint64_t nb_bytes,
341 uint64_t nb_missed)
342{
343 uint32_t pkt_len = 0;
344 uint64_t i = 0;
345 uint64_t index;
346 struct vhost_stats *pstats = &vq->stats;
347
348 pstats->xstats[VHOST_BYTE] += nb_bytes;
349 pstats->xstats[VHOST_MISSED_PKT] += nb_missed;
350 pstats->xstats[VHOST_UNICAST_PKT] += nb_missed;
351
352 for (i = 0; i < count ; i++) {
353 pstats->xstats[VHOST_PKT]++;
354 pkt_len = bufs[i]->pkt_len;
355 if (pkt_len == 64) {
356 pstats->xstats[VHOST_64_PKT]++;
357 } else if (pkt_len > 64 && pkt_len < 1024) {
358 index = (sizeof(pkt_len) * 8)
359 - __builtin_clz(pkt_len) - 5;
360 pstats->xstats[index]++;
361 } else {
362 if (pkt_len < 64)
363 pstats->xstats[VHOST_UNDERSIZE_PKT]++;
364 else if (pkt_len <= 1522)
365 pstats->xstats[VHOST_1024_TO_1522_PKT]++;
366 else if (pkt_len > 1522)
367 pstats->xstats[VHOST_1523_TO_MAX_PKT]++;
368 }
369 vhost_count_xcast_packets(vq, bufs[i]);
370 }
371}
372
373static uint16_t
374eth_vhost_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
375{
376 struct vhost_queue *r = q;
377 uint16_t i, nb_rx = 0;
378 uint16_t nb_receive = nb_bufs;
379 uint64_t nb_bytes = 0;
380
381 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
382 return 0;
383
384 rte_atomic32_set(&r->while_queuing, 1);
385
386 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
387 goto out;
388
389
390 while (nb_receive) {
391 uint16_t nb_pkts;
392 uint16_t num = (uint16_t)RTE_MIN(nb_receive,
393 VHOST_MAX_PKT_BURST);
394
395 nb_pkts = rte_vhost_dequeue_burst(r->vid, r->virtqueue_id,
396 r->mb_pool, &bufs[nb_rx],
397 num);
398
399 nb_rx += nb_pkts;
400 nb_receive -= nb_pkts;
401 if (nb_pkts < num)
402 break;
403 }
404
405 r->stats.pkts += nb_rx;
406
407 for (i = 0; likely(i < nb_rx); i++) {
408 bufs[i]->port = r->port;
409 bufs[i]->vlan_tci = 0;
410
411 if (r->internal->vlan_strip)
412 rte_vlan_strip(bufs[i]);
413
414 nb_bytes += bufs[i]->pkt_len;
415 }
416
417 r->stats.bytes += nb_bytes;
418 vhost_update_packet_xstats(r, bufs, nb_rx, nb_bytes, 0);
419
420out:
421 rte_atomic32_set(&r->while_queuing, 0);
422
423 return nb_rx;
424}
425
426static uint16_t
427eth_vhost_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
428{
429 struct vhost_queue *r = q;
430 uint16_t i, nb_tx = 0;
431 uint16_t nb_send = 0;
432 uint64_t nb_bytes = 0;
433 uint64_t nb_missed = 0;
434
435 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
436 return 0;
437
438 rte_atomic32_set(&r->while_queuing, 1);
439
440 if (unlikely(rte_atomic32_read(&r->allow_queuing) == 0))
441 goto out;
442
443 for (i = 0; i < nb_bufs; i++) {
444 struct rte_mbuf *m = bufs[i];
445
446
447 if (m->ol_flags & PKT_TX_VLAN_PKT) {
448 int error = rte_vlan_insert(&m);
449 if (unlikely(error)) {
450 rte_pktmbuf_free(m);
451 continue;
452 }
453 }
454
455 bufs[nb_send] = m;
456 ++nb_send;
457 }
458
459
460 while (nb_send) {
461 uint16_t nb_pkts;
462 uint16_t num = (uint16_t)RTE_MIN(nb_send,
463 VHOST_MAX_PKT_BURST);
464
465 nb_pkts = rte_vhost_enqueue_burst(r->vid, r->virtqueue_id,
466 &bufs[nb_tx], num);
467
468 nb_tx += nb_pkts;
469 nb_send -= nb_pkts;
470 if (nb_pkts < num)
471 break;
472 }
473
474 for (i = 0; likely(i < nb_tx); i++)
475 nb_bytes += bufs[i]->pkt_len;
476
477 nb_missed = nb_bufs - nb_tx;
478
479 r->stats.pkts += nb_tx;
480 r->stats.bytes += nb_bytes;
481 r->stats.missed_pkts += nb_bufs - nb_tx;
482
483 vhost_update_packet_xstats(r, bufs, nb_tx, nb_bytes, nb_missed);
484
485
486
487
488
489 for (i = nb_tx; i < nb_bufs; i++)
490 vhost_count_xcast_packets(r, bufs[i]);
491
492 for (i = 0; likely(i < nb_tx); i++)
493 rte_pktmbuf_free(bufs[i]);
494out:
495 rte_atomic32_set(&r->while_queuing, 0);
496
497 return nb_tx;
498}
499
500static inline struct internal_list *
501find_internal_resource(char *ifname)
502{
503 int found = 0;
504 struct internal_list *list;
505 struct pmd_internal *internal;
506
507 if (ifname == NULL)
508 return NULL;
509
510 pthread_mutex_lock(&internal_list_lock);
511
512 TAILQ_FOREACH(list, &internal_list, next) {
513 internal = list->eth_dev->data->dev_private;
514 if (!strcmp(internal->iface_name, ifname)) {
515 found = 1;
516 break;
517 }
518 }
519
520 pthread_mutex_unlock(&internal_list_lock);
521
522 if (!found)
523 return NULL;
524
525 return list;
526}
527
528static int
529eth_vhost_update_intr(struct rte_eth_dev *eth_dev, uint16_t rxq_idx)
530{
531 struct rte_intr_handle *handle = eth_dev->intr_handle;
532 struct rte_epoll_event rev;
533 int epfd, ret;
534
535 if (!handle)
536 return 0;
537
538 if (handle->efds[rxq_idx] == handle->elist[rxq_idx].fd)
539 return 0;
540
541 VHOST_LOG(INFO, "kickfd for rxq-%d was changed, updating handler.\n",
542 rxq_idx);
543
544 if (handle->elist[rxq_idx].fd != -1)
545 VHOST_LOG(ERR, "Unexpected previous kickfd value (Got %d, expected -1).\n",
546 handle->elist[rxq_idx].fd);
547
548
549
550
551
552
553 epfd = handle->elist[rxq_idx].epfd;
554 rev = handle->elist[rxq_idx];
555 ret = rte_epoll_ctl(epfd, EPOLL_CTL_DEL, rev.fd,
556 &handle->elist[rxq_idx]);
557 if (ret) {
558 VHOST_LOG(ERR, "Delete epoll event failed.\n");
559 return ret;
560 }
561
562 rev.fd = handle->efds[rxq_idx];
563 handle->elist[rxq_idx] = rev;
564 ret = rte_epoll_ctl(epfd, EPOLL_CTL_ADD, rev.fd,
565 &handle->elist[rxq_idx]);
566 if (ret) {
567 VHOST_LOG(ERR, "Add epoll event failed.\n");
568 return ret;
569 }
570
571 return 0;
572}
573
574static int
575eth_rxq_intr_enable(struct rte_eth_dev *dev, uint16_t qid)
576{
577 struct rte_vhost_vring vring;
578 struct vhost_queue *vq;
579 int old_intr_enable, ret = 0;
580
581 vq = dev->data->rx_queues[qid];
582 if (!vq) {
583 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
584 return -1;
585 }
586
587 rte_spinlock_lock(&vq->intr_lock);
588 old_intr_enable = vq->intr_enable;
589 vq->intr_enable = 1;
590 ret = eth_vhost_update_intr(dev, qid);
591 rte_spinlock_unlock(&vq->intr_lock);
592
593 if (ret < 0) {
594 VHOST_LOG(ERR, "Failed to update rxq%d's intr\n", qid);
595 vq->intr_enable = old_intr_enable;
596 return ret;
597 }
598
599 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
600 if (ret < 0) {
601 VHOST_LOG(ERR, "Failed to get rxq%d's vring\n", qid);
602 return ret;
603 }
604 VHOST_LOG(INFO, "Enable interrupt for rxq%d\n", qid);
605 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 1);
606 rte_wmb();
607
608 return ret;
609}
610
611static int
612eth_rxq_intr_disable(struct rte_eth_dev *dev, uint16_t qid)
613{
614 struct rte_vhost_vring vring;
615 struct vhost_queue *vq;
616 int ret = 0;
617
618 vq = dev->data->rx_queues[qid];
619 if (!vq) {
620 VHOST_LOG(ERR, "rxq%d is not setup yet\n", qid);
621 return -1;
622 }
623
624 ret = rte_vhost_get_vhost_vring(vq->vid, (qid << 1) + 1, &vring);
625 if (ret < 0) {
626 VHOST_LOG(ERR, "Failed to get rxq%d's vring", qid);
627 return ret;
628 }
629 VHOST_LOG(INFO, "Disable interrupt for rxq%d\n", qid);
630 rte_vhost_enable_guest_notification(vq->vid, (qid << 1) + 1, 0);
631 rte_wmb();
632
633 vq->intr_enable = 0;
634
635 return 0;
636}
637
638static void
639eth_vhost_uninstall_intr(struct rte_eth_dev *dev)
640{
641 struct rte_intr_handle *intr_handle = dev->intr_handle;
642
643 if (intr_handle) {
644 if (intr_handle->intr_vec)
645 free(intr_handle->intr_vec);
646 free(intr_handle);
647 }
648
649 dev->intr_handle = NULL;
650}
651
652static int
653eth_vhost_install_intr(struct rte_eth_dev *dev)
654{
655 struct rte_vhost_vring vring;
656 struct vhost_queue *vq;
657 int nb_rxq = dev->data->nb_rx_queues;
658 int i;
659 int ret;
660
661
662 if (dev->intr_handle)
663 eth_vhost_uninstall_intr(dev);
664
665 dev->intr_handle = malloc(sizeof(*dev->intr_handle));
666 if (!dev->intr_handle) {
667 VHOST_LOG(ERR, "Fail to allocate intr_handle\n");
668 return -ENOMEM;
669 }
670 memset(dev->intr_handle, 0, sizeof(*dev->intr_handle));
671
672 dev->intr_handle->efd_counter_size = sizeof(uint64_t);
673
674 dev->intr_handle->intr_vec =
675 malloc(nb_rxq * sizeof(dev->intr_handle->intr_vec[0]));
676
677 if (!dev->intr_handle->intr_vec) {
678 VHOST_LOG(ERR,
679 "Failed to allocate memory for interrupt vector\n");
680 free(dev->intr_handle);
681 return -ENOMEM;
682 }
683
684 VHOST_LOG(INFO, "Prepare intr vec\n");
685 for (i = 0; i < nb_rxq; i++) {
686 dev->intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + i;
687 dev->intr_handle->efds[i] = -1;
688 vq = dev->data->rx_queues[i];
689 if (!vq) {
690 VHOST_LOG(INFO, "rxq-%d not setup yet, skip!\n", i);
691 continue;
692 }
693
694 ret = rte_vhost_get_vhost_vring(vq->vid, (i << 1) + 1, &vring);
695 if (ret < 0) {
696 VHOST_LOG(INFO,
697 "Failed to get rxq-%d's vring, skip!\n", i);
698 continue;
699 }
700
701 if (vring.kickfd < 0) {
702 VHOST_LOG(INFO,
703 "rxq-%d's kickfd is invalid, skip!\n", i);
704 continue;
705 }
706 dev->intr_handle->efds[i] = vring.kickfd;
707 VHOST_LOG(INFO, "Installed intr vec for rxq-%d\n", i);
708 }
709
710 dev->intr_handle->nb_efd = nb_rxq;
711 dev->intr_handle->max_intr = nb_rxq + 1;
712 dev->intr_handle->type = RTE_INTR_HANDLE_VDEV;
713
714 return 0;
715}
716
717static void
718update_queuing_status(struct rte_eth_dev *dev)
719{
720 struct pmd_internal *internal = dev->data->dev_private;
721 struct vhost_queue *vq;
722 unsigned int i;
723 int allow_queuing = 1;
724
725 if (!dev->data->rx_queues || !dev->data->tx_queues)
726 return;
727
728 if (rte_atomic32_read(&internal->started) == 0 ||
729 rte_atomic32_read(&internal->dev_attached) == 0)
730 allow_queuing = 0;
731
732
733 for (i = 0; i < dev->data->nb_rx_queues; i++) {
734 vq = dev->data->rx_queues[i];
735 if (vq == NULL)
736 continue;
737 rte_atomic32_set(&vq->allow_queuing, allow_queuing);
738 while (rte_atomic32_read(&vq->while_queuing))
739 rte_pause();
740 }
741
742 for (i = 0; i < dev->data->nb_tx_queues; i++) {
743 vq = dev->data->tx_queues[i];
744 if (vq == NULL)
745 continue;
746 rte_atomic32_set(&vq->allow_queuing, allow_queuing);
747 while (rte_atomic32_read(&vq->while_queuing))
748 rte_pause();
749 }
750}
751
752static void
753queue_setup(struct rte_eth_dev *eth_dev, struct pmd_internal *internal)
754{
755 struct vhost_queue *vq;
756 int i;
757
758 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
759 vq = eth_dev->data->rx_queues[i];
760 if (!vq)
761 continue;
762 vq->vid = internal->vid;
763 vq->internal = internal;
764 vq->port = eth_dev->data->port_id;
765 }
766 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
767 vq = eth_dev->data->tx_queues[i];
768 if (!vq)
769 continue;
770 vq->vid = internal->vid;
771 vq->internal = internal;
772 vq->port = eth_dev->data->port_id;
773 }
774}
775
776static int
777new_device(int vid)
778{
779 struct rte_eth_dev *eth_dev;
780 struct internal_list *list;
781 struct pmd_internal *internal;
782 struct rte_eth_conf *dev_conf;
783 unsigned i;
784 char ifname[PATH_MAX];
785#ifdef RTE_LIBRTE_VHOST_NUMA
786 int newnode;
787#endif
788
789 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
790 list = find_internal_resource(ifname);
791 if (list == NULL) {
792 VHOST_LOG(INFO, "Invalid device name: %s\n", ifname);
793 return -1;
794 }
795
796 eth_dev = list->eth_dev;
797 internal = eth_dev->data->dev_private;
798 dev_conf = ð_dev->data->dev_conf;
799
800#ifdef RTE_LIBRTE_VHOST_NUMA
801 newnode = rte_vhost_get_numa_node(vid);
802 if (newnode >= 0)
803 eth_dev->data->numa_node = newnode;
804#endif
805
806 internal->vid = vid;
807 if (rte_atomic32_read(&internal->started) == 1) {
808 queue_setup(eth_dev, internal);
809
810 if (dev_conf->intr_conf.rxq) {
811 if (eth_vhost_install_intr(eth_dev) < 0) {
812 VHOST_LOG(INFO,
813 "Failed to install interrupt handler.");
814 return -1;
815 }
816 }
817 } else {
818 VHOST_LOG(INFO, "RX/TX queues not exist yet\n");
819 }
820
821 for (i = 0; i < rte_vhost_get_vring_num(vid); i++)
822 rte_vhost_enable_guest_notification(vid, i, 0);
823
824 rte_vhost_get_mtu(vid, ð_dev->data->mtu);
825
826 eth_dev->data->dev_link.link_status = ETH_LINK_UP;
827
828 rte_atomic32_set(&internal->dev_attached, 1);
829 update_queuing_status(eth_dev);
830
831 VHOST_LOG(INFO, "Vhost device %d created\n", vid);
832
833 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
834
835 return 0;
836}
837
838static void
839destroy_device(int vid)
840{
841 struct rte_eth_dev *eth_dev;
842 struct pmd_internal *internal;
843 struct vhost_queue *vq;
844 struct internal_list *list;
845 char ifname[PATH_MAX];
846 unsigned i;
847 struct rte_vhost_vring_state *state;
848
849 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
850 list = find_internal_resource(ifname);
851 if (list == NULL) {
852 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
853 return;
854 }
855 eth_dev = list->eth_dev;
856 internal = eth_dev->data->dev_private;
857
858 rte_atomic32_set(&internal->dev_attached, 0);
859 update_queuing_status(eth_dev);
860
861 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
862
863 if (eth_dev->data->rx_queues && eth_dev->data->tx_queues) {
864 for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
865 vq = eth_dev->data->rx_queues[i];
866 if (!vq)
867 continue;
868 vq->vid = -1;
869 }
870 for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
871 vq = eth_dev->data->tx_queues[i];
872 if (!vq)
873 continue;
874 vq->vid = -1;
875 }
876 }
877
878 state = vring_states[eth_dev->data->port_id];
879 rte_spinlock_lock(&state->lock);
880 for (i = 0; i <= state->max_vring; i++) {
881 state->cur[i] = false;
882 state->seen[i] = false;
883 }
884 state->max_vring = 0;
885 rte_spinlock_unlock(&state->lock);
886
887 VHOST_LOG(INFO, "Vhost device %d destroyed\n", vid);
888 eth_vhost_uninstall_intr(eth_dev);
889
890 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL);
891}
892
893static int
894vring_conf_update(int vid, struct rte_eth_dev *eth_dev, uint16_t vring_id)
895{
896 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
897 struct pmd_internal *internal = eth_dev->data->dev_private;
898 struct vhost_queue *vq;
899 struct rte_vhost_vring vring;
900 int rx_idx = vring_id % 2 ? (vring_id - 1) >> 1 : -1;
901 int ret = 0;
902
903
904
905
906
907 if (rx_idx >= 0 && rx_idx < eth_dev->data->nb_rx_queues &&
908 rte_atomic32_read(&internal->dev_attached) &&
909 rte_atomic32_read(&internal->started) &&
910 dev_conf->intr_conf.rxq) {
911 ret = rte_vhost_get_vhost_vring(vid, vring_id, &vring);
912 if (ret) {
913 VHOST_LOG(ERR, "Failed to get vring %d information.\n",
914 vring_id);
915 return ret;
916 }
917 eth_dev->intr_handle->efds[rx_idx] = vring.kickfd;
918
919 vq = eth_dev->data->rx_queues[rx_idx];
920 if (!vq) {
921 VHOST_LOG(ERR, "rxq%d is not setup yet\n", rx_idx);
922 return -1;
923 }
924
925 rte_spinlock_lock(&vq->intr_lock);
926 if (vq->intr_enable)
927 ret = eth_vhost_update_intr(eth_dev, rx_idx);
928 rte_spinlock_unlock(&vq->intr_lock);
929 }
930
931 return ret;
932}
933
934static int
935vring_state_changed(int vid, uint16_t vring, int enable)
936{
937 struct rte_vhost_vring_state *state;
938 struct rte_eth_dev *eth_dev;
939 struct internal_list *list;
940 char ifname[PATH_MAX];
941
942 rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
943 list = find_internal_resource(ifname);
944 if (list == NULL) {
945 VHOST_LOG(ERR, "Invalid interface name: %s\n", ifname);
946 return -1;
947 }
948
949 eth_dev = list->eth_dev;
950
951 state = vring_states[eth_dev->data->port_id];
952
953 if (enable && vring_conf_update(vid, eth_dev, vring))
954 VHOST_LOG(INFO, "Failed to update vring-%d configuration.\n",
955 (int)vring);
956
957 rte_spinlock_lock(&state->lock);
958 if (state->cur[vring] == enable) {
959 rte_spinlock_unlock(&state->lock);
960 return 0;
961 }
962 state->cur[vring] = enable;
963 state->max_vring = RTE_MAX(vring, state->max_vring);
964 rte_spinlock_unlock(&state->lock);
965
966 VHOST_LOG(INFO, "vring%u is %s\n",
967 vring, enable ? "enabled" : "disabled");
968
969 rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_QUEUE_STATE, NULL);
970
971 return 0;
972}
973
974static struct vhost_device_ops vhost_ops = {
975 .new_device = new_device,
976 .destroy_device = destroy_device,
977 .vring_state_changed = vring_state_changed,
978};
979
980static int
981vhost_driver_setup(struct rte_eth_dev *eth_dev)
982{
983 struct pmd_internal *internal = eth_dev->data->dev_private;
984 struct internal_list *list = NULL;
985 struct rte_vhost_vring_state *vring_state = NULL;
986 unsigned int numa_node = eth_dev->device->numa_node;
987 const char *name = eth_dev->device->name;
988
989
990 list = find_internal_resource(internal->iface_name);
991 if (list)
992 return 0;
993
994 list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node);
995 if (list == NULL)
996 return -1;
997
998 vring_state = rte_zmalloc_socket(name, sizeof(*vring_state),
999 0, numa_node);
1000 if (vring_state == NULL)
1001 goto free_list;
1002
1003 list->eth_dev = eth_dev;
1004 pthread_mutex_lock(&internal_list_lock);
1005 TAILQ_INSERT_TAIL(&internal_list, list, next);
1006 pthread_mutex_unlock(&internal_list_lock);
1007
1008 rte_spinlock_init(&vring_state->lock);
1009 vring_states[eth_dev->data->port_id] = vring_state;
1010
1011 if (rte_vhost_driver_register(internal->iface_name, internal->flags))
1012 goto list_remove;
1013
1014 if (internal->disable_flags) {
1015 if (rte_vhost_driver_disable_features(internal->iface_name,
1016 internal->disable_flags))
1017 goto drv_unreg;
1018 }
1019
1020 if (rte_vhost_driver_callback_register(internal->iface_name,
1021 &vhost_ops) < 0) {
1022 VHOST_LOG(ERR, "Can't register callbacks\n");
1023 goto drv_unreg;
1024 }
1025
1026 if (rte_vhost_driver_start(internal->iface_name) < 0) {
1027 VHOST_LOG(ERR, "Failed to start driver for %s\n",
1028 internal->iface_name);
1029 goto drv_unreg;
1030 }
1031
1032 return 0;
1033
1034drv_unreg:
1035 rte_vhost_driver_unregister(internal->iface_name);
1036list_remove:
1037 vring_states[eth_dev->data->port_id] = NULL;
1038 pthread_mutex_lock(&internal_list_lock);
1039 TAILQ_REMOVE(&internal_list, list, next);
1040 pthread_mutex_unlock(&internal_list_lock);
1041 rte_free(vring_state);
1042free_list:
1043 rte_free(list);
1044
1045 return -1;
1046}
1047
1048int
1049rte_eth_vhost_get_queue_event(uint16_t port_id,
1050 struct rte_eth_vhost_queue_event *event)
1051{
1052 struct rte_vhost_vring_state *state;
1053 unsigned int i;
1054 int idx;
1055
1056 if (port_id >= RTE_MAX_ETHPORTS) {
1057 VHOST_LOG(ERR, "Invalid port id\n");
1058 return -1;
1059 }
1060
1061 state = vring_states[port_id];
1062 if (!state) {
1063 VHOST_LOG(ERR, "Unused port\n");
1064 return -1;
1065 }
1066
1067 rte_spinlock_lock(&state->lock);
1068 for (i = 0; i <= state->max_vring; i++) {
1069 idx = state->index++ % (state->max_vring + 1);
1070
1071 if (state->cur[idx] != state->seen[idx]) {
1072 state->seen[idx] = state->cur[idx];
1073 event->queue_id = idx / 2;
1074 event->rx = idx & 1;
1075 event->enable = state->cur[idx];
1076 rte_spinlock_unlock(&state->lock);
1077 return 0;
1078 }
1079 }
1080 rte_spinlock_unlock(&state->lock);
1081
1082 return -1;
1083}
1084
1085int
1086rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
1087{
1088 struct internal_list *list;
1089 struct rte_eth_dev *eth_dev;
1090 struct vhost_queue *vq;
1091 int vid = -1;
1092
1093 if (!rte_eth_dev_is_valid_port(port_id))
1094 return -1;
1095
1096 pthread_mutex_lock(&internal_list_lock);
1097
1098 TAILQ_FOREACH(list, &internal_list, next) {
1099 eth_dev = list->eth_dev;
1100 if (eth_dev->data->port_id == port_id) {
1101 vq = eth_dev->data->rx_queues[0];
1102 if (vq) {
1103 vid = vq->vid;
1104 }
1105 break;
1106 }
1107 }
1108
1109 pthread_mutex_unlock(&internal_list_lock);
1110
1111 return vid;
1112}
1113
1114static int
1115eth_dev_configure(struct rte_eth_dev *dev)
1116{
1117 struct pmd_internal *internal = dev->data->dev_private;
1118 const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
1119
1120
1121
1122
1123
1124 if (vhost_driver_setup(dev) < 0)
1125 return -1;
1126
1127 internal->vlan_strip = !!(rxmode->offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
1128
1129 return 0;
1130}
1131
1132static int
1133eth_dev_start(struct rte_eth_dev *eth_dev)
1134{
1135 struct pmd_internal *internal = eth_dev->data->dev_private;
1136 struct rte_eth_conf *dev_conf = ð_dev->data->dev_conf;
1137
1138 queue_setup(eth_dev, internal);
1139
1140 if (rte_atomic32_read(&internal->dev_attached) == 1) {
1141 if (dev_conf->intr_conf.rxq) {
1142 if (eth_vhost_install_intr(eth_dev) < 0) {
1143 VHOST_LOG(INFO,
1144 "Failed to install interrupt handler.");
1145 return -1;
1146 }
1147 }
1148 }
1149
1150 rte_atomic32_set(&internal->started, 1);
1151 update_queuing_status(eth_dev);
1152
1153 return 0;
1154}
1155
1156static int
1157eth_dev_stop(struct rte_eth_dev *dev)
1158{
1159 struct pmd_internal *internal = dev->data->dev_private;
1160
1161 dev->data->dev_started = 0;
1162 rte_atomic32_set(&internal->started, 0);
1163 update_queuing_status(dev);
1164
1165 return 0;
1166}
1167
1168static int
1169eth_dev_close(struct rte_eth_dev *dev)
1170{
1171 struct pmd_internal *internal;
1172 struct internal_list *list;
1173 unsigned int i, ret;
1174
1175 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
1176 return 0;
1177
1178 internal = dev->data->dev_private;
1179 if (!internal)
1180 return 0;
1181
1182 ret = eth_dev_stop(dev);
1183
1184 list = find_internal_resource(internal->iface_name);
1185 if (list) {
1186 rte_vhost_driver_unregister(internal->iface_name);
1187 pthread_mutex_lock(&internal_list_lock);
1188 TAILQ_REMOVE(&internal_list, list, next);
1189 pthread_mutex_unlock(&internal_list_lock);
1190 rte_free(list);
1191 }
1192
1193 if (dev->data->rx_queues)
1194 for (i = 0; i < dev->data->nb_rx_queues; i++)
1195 rte_free(dev->data->rx_queues[i]);
1196
1197 if (dev->data->tx_queues)
1198 for (i = 0; i < dev->data->nb_tx_queues; i++)
1199 rte_free(dev->data->tx_queues[i]);
1200
1201 rte_free(internal->iface_name);
1202 rte_free(internal);
1203
1204 dev->data->dev_private = NULL;
1205
1206 rte_free(vring_states[dev->data->port_id]);
1207 vring_states[dev->data->port_id] = NULL;
1208
1209 return ret;
1210}
1211
1212static int
1213eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
1214 uint16_t nb_rx_desc __rte_unused,
1215 unsigned int socket_id,
1216 const struct rte_eth_rxconf *rx_conf __rte_unused,
1217 struct rte_mempool *mb_pool)
1218{
1219 struct vhost_queue *vq;
1220
1221 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1222 RTE_CACHE_LINE_SIZE, socket_id);
1223 if (vq == NULL) {
1224 VHOST_LOG(ERR, "Failed to allocate memory for rx queue\n");
1225 return -ENOMEM;
1226 }
1227
1228 vq->mb_pool = mb_pool;
1229 vq->virtqueue_id = rx_queue_id * VIRTIO_QNUM + VIRTIO_TXQ;
1230 rte_spinlock_init(&vq->intr_lock);
1231 dev->data->rx_queues[rx_queue_id] = vq;
1232
1233 return 0;
1234}
1235
1236static int
1237eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
1238 uint16_t nb_tx_desc __rte_unused,
1239 unsigned int socket_id,
1240 const struct rte_eth_txconf *tx_conf __rte_unused)
1241{
1242 struct vhost_queue *vq;
1243
1244 vq = rte_zmalloc_socket(NULL, sizeof(struct vhost_queue),
1245 RTE_CACHE_LINE_SIZE, socket_id);
1246 if (vq == NULL) {
1247 VHOST_LOG(ERR, "Failed to allocate memory for tx queue\n");
1248 return -ENOMEM;
1249 }
1250
1251 vq->virtqueue_id = tx_queue_id * VIRTIO_QNUM + VIRTIO_RXQ;
1252 rte_spinlock_init(&vq->intr_lock);
1253 dev->data->tx_queues[tx_queue_id] = vq;
1254
1255 return 0;
1256}
1257
1258static int
1259eth_dev_info(struct rte_eth_dev *dev,
1260 struct rte_eth_dev_info *dev_info)
1261{
1262 struct pmd_internal *internal;
1263
1264 internal = dev->data->dev_private;
1265 if (internal == NULL) {
1266 VHOST_LOG(ERR, "Invalid device specified\n");
1267 return -ENODEV;
1268 }
1269
1270 dev_info->max_mac_addrs = 1;
1271 dev_info->max_rx_pktlen = (uint32_t)-1;
1272 dev_info->max_rx_queues = internal->max_queues;
1273 dev_info->max_tx_queues = internal->max_queues;
1274 dev_info->min_rx_bufsize = 0;
1275
1276 dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
1277 DEV_TX_OFFLOAD_VLAN_INSERT;
1278 dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
1279
1280 return 0;
1281}
1282
1283static int
1284eth_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
1285{
1286 unsigned i;
1287 unsigned long rx_total = 0, tx_total = 0;
1288 unsigned long rx_total_bytes = 0, tx_total_bytes = 0;
1289 struct vhost_queue *vq;
1290
1291 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1292 i < dev->data->nb_rx_queues; i++) {
1293 if (dev->data->rx_queues[i] == NULL)
1294 continue;
1295 vq = dev->data->rx_queues[i];
1296 stats->q_ipackets[i] = vq->stats.pkts;
1297 rx_total += stats->q_ipackets[i];
1298
1299 stats->q_ibytes[i] = vq->stats.bytes;
1300 rx_total_bytes += stats->q_ibytes[i];
1301 }
1302
1303 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS &&
1304 i < dev->data->nb_tx_queues; i++) {
1305 if (dev->data->tx_queues[i] == NULL)
1306 continue;
1307 vq = dev->data->tx_queues[i];
1308 stats->q_opackets[i] = vq->stats.pkts;
1309 tx_total += stats->q_opackets[i];
1310
1311 stats->q_obytes[i] = vq->stats.bytes;
1312 tx_total_bytes += stats->q_obytes[i];
1313 }
1314
1315 stats->ipackets = rx_total;
1316 stats->opackets = tx_total;
1317 stats->ibytes = rx_total_bytes;
1318 stats->obytes = tx_total_bytes;
1319
1320 return 0;
1321}
1322
1323static int
1324eth_stats_reset(struct rte_eth_dev *dev)
1325{
1326 struct vhost_queue *vq;
1327 unsigned i;
1328
1329 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1330 if (dev->data->rx_queues[i] == NULL)
1331 continue;
1332 vq = dev->data->rx_queues[i];
1333 vq->stats.pkts = 0;
1334 vq->stats.bytes = 0;
1335 }
1336 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1337 if (dev->data->tx_queues[i] == NULL)
1338 continue;
1339 vq = dev->data->tx_queues[i];
1340 vq->stats.pkts = 0;
1341 vq->stats.bytes = 0;
1342 vq->stats.missed_pkts = 0;
1343 }
1344
1345 return 0;
1346}
1347
1348static void
1349eth_queue_release(void *q)
1350{
1351 rte_free(q);
1352}
1353
1354static int
1355eth_tx_done_cleanup(void *txq __rte_unused, uint32_t free_cnt __rte_unused)
1356{
1357
1358
1359
1360
1361 return 0;
1362}
1363
1364static int
1365eth_link_update(struct rte_eth_dev *dev __rte_unused,
1366 int wait_to_complete __rte_unused)
1367{
1368 return 0;
1369}
1370
1371static uint32_t
1372eth_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1373{
1374 struct vhost_queue *vq;
1375
1376 vq = dev->data->rx_queues[rx_queue_id];
1377 if (vq == NULL)
1378 return 0;
1379
1380 return rte_vhost_rx_queue_count(vq->vid, vq->virtqueue_id);
1381}
1382
1383static const struct eth_dev_ops ops = {
1384 .dev_start = eth_dev_start,
1385 .dev_stop = eth_dev_stop,
1386 .dev_close = eth_dev_close,
1387 .dev_configure = eth_dev_configure,
1388 .dev_infos_get = eth_dev_info,
1389 .rx_queue_setup = eth_rx_queue_setup,
1390 .tx_queue_setup = eth_tx_queue_setup,
1391 .rx_queue_release = eth_queue_release,
1392 .tx_queue_release = eth_queue_release,
1393 .tx_done_cleanup = eth_tx_done_cleanup,
1394 .link_update = eth_link_update,
1395 .stats_get = eth_stats_get,
1396 .stats_reset = eth_stats_reset,
1397 .xstats_reset = vhost_dev_xstats_reset,
1398 .xstats_get = vhost_dev_xstats_get,
1399 .xstats_get_names = vhost_dev_xstats_get_names,
1400 .rx_queue_intr_enable = eth_rxq_intr_enable,
1401 .rx_queue_intr_disable = eth_rxq_intr_disable,
1402};
1403
1404static int
1405eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
1406 int16_t queues, const unsigned int numa_node, uint64_t flags,
1407 uint64_t disable_flags)
1408{
1409 const char *name = rte_vdev_device_name(dev);
1410 struct rte_eth_dev_data *data;
1411 struct pmd_internal *internal = NULL;
1412 struct rte_eth_dev *eth_dev = NULL;
1413 struct rte_ether_addr *eth_addr = NULL;
1414
1415 VHOST_LOG(INFO, "Creating VHOST-USER backend on numa socket %u\n",
1416 numa_node);
1417
1418
1419 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
1420 if (eth_dev == NULL)
1421 goto error;
1422 data = eth_dev->data;
1423
1424 eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
1425 if (eth_addr == NULL)
1426 goto error;
1427 data->mac_addrs = eth_addr;
1428 *eth_addr = base_eth_addr;
1429 eth_addr->addr_bytes[5] = eth_dev->data->port_id;
1430
1431
1432
1433
1434
1435
1436 internal = eth_dev->data->dev_private;
1437 internal->iface_name = rte_malloc_socket(name, strlen(iface_name) + 1,
1438 0, numa_node);
1439 if (internal->iface_name == NULL)
1440 goto error;
1441 strcpy(internal->iface_name, iface_name);
1442
1443 data->nb_rx_queues = queues;
1444 data->nb_tx_queues = queues;
1445 internal->max_queues = queues;
1446 internal->vid = -1;
1447 internal->flags = flags;
1448 internal->disable_flags = disable_flags;
1449 data->dev_link = pmd_link;
1450 data->dev_flags = RTE_ETH_DEV_INTR_LSC |
1451 RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;
1452 data->promiscuous = 1;
1453 data->all_multicast = 1;
1454
1455 eth_dev->dev_ops = &ops;
1456 eth_dev->rx_queue_count = eth_rx_queue_count;
1457
1458
1459 eth_dev->rx_pkt_burst = eth_vhost_rx;
1460 eth_dev->tx_pkt_burst = eth_vhost_tx;
1461
1462 rte_eth_dev_probing_finish(eth_dev);
1463 return 0;
1464
1465error:
1466 if (internal)
1467 rte_free(internal->iface_name);
1468 rte_eth_dev_release_port(eth_dev);
1469
1470 return -1;
1471}
1472
1473static inline int
1474open_iface(const char *key __rte_unused, const char *value, void *extra_args)
1475{
1476 const char **iface_name = extra_args;
1477
1478 if (value == NULL)
1479 return -1;
1480
1481 *iface_name = value;
1482
1483 return 0;
1484}
1485
1486static inline int
1487open_int(const char *key __rte_unused, const char *value, void *extra_args)
1488{
1489 uint16_t *n = extra_args;
1490
1491 if (value == NULL || extra_args == NULL)
1492 return -EINVAL;
1493
1494 *n = (uint16_t)strtoul(value, NULL, 0);
1495 if (*n == USHRT_MAX && errno == ERANGE)
1496 return -1;
1497
1498 return 0;
1499}
1500
1501static int
1502rte_pmd_vhost_probe(struct rte_vdev_device *dev)
1503{
1504 struct rte_kvargs *kvlist = NULL;
1505 int ret = 0;
1506 char *iface_name;
1507 uint16_t queues;
1508 uint64_t flags = 0;
1509 uint64_t disable_flags = 0;
1510 int client_mode = 0;
1511 int iommu_support = 0;
1512 int postcopy_support = 0;
1513 int tso = 0;
1514 int linear_buf = 0;
1515 int ext_buf = 0;
1516 struct rte_eth_dev *eth_dev;
1517 const char *name = rte_vdev_device_name(dev);
1518
1519 VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name);
1520
1521 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
1522 eth_dev = rte_eth_dev_attach_secondary(name);
1523 if (!eth_dev) {
1524 VHOST_LOG(ERR, "Failed to probe %s\n", name);
1525 return -1;
1526 }
1527 eth_dev->rx_pkt_burst = eth_vhost_rx;
1528 eth_dev->tx_pkt_burst = eth_vhost_tx;
1529 eth_dev->dev_ops = &ops;
1530 if (dev->device.numa_node == SOCKET_ID_ANY)
1531 dev->device.numa_node = rte_socket_id();
1532 eth_dev->device = &dev->device;
1533 rte_eth_dev_probing_finish(eth_dev);
1534 return 0;
1535 }
1536
1537 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments);
1538 if (kvlist == NULL)
1539 return -1;
1540
1541 if (rte_kvargs_count(kvlist, ETH_VHOST_IFACE_ARG) == 1) {
1542 ret = rte_kvargs_process(kvlist, ETH_VHOST_IFACE_ARG,
1543 &open_iface, &iface_name);
1544 if (ret < 0)
1545 goto out_free;
1546 } else {
1547 ret = -1;
1548 goto out_free;
1549 }
1550
1551 if (rte_kvargs_count(kvlist, ETH_VHOST_QUEUES_ARG) == 1) {
1552 ret = rte_kvargs_process(kvlist, ETH_VHOST_QUEUES_ARG,
1553 &open_int, &queues);
1554 if (ret < 0 || queues > RTE_MAX_QUEUES_PER_PORT)
1555 goto out_free;
1556
1557 } else
1558 queues = 1;
1559
1560 if (rte_kvargs_count(kvlist, ETH_VHOST_CLIENT_ARG) == 1) {
1561 ret = rte_kvargs_process(kvlist, ETH_VHOST_CLIENT_ARG,
1562 &open_int, &client_mode);
1563 if (ret < 0)
1564 goto out_free;
1565
1566 if (client_mode)
1567 flags |= RTE_VHOST_USER_CLIENT;
1568 }
1569
1570 if (rte_kvargs_count(kvlist, ETH_VHOST_IOMMU_SUPPORT) == 1) {
1571 ret = rte_kvargs_process(kvlist, ETH_VHOST_IOMMU_SUPPORT,
1572 &open_int, &iommu_support);
1573 if (ret < 0)
1574 goto out_free;
1575
1576 if (iommu_support)
1577 flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
1578 }
1579
1580 if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
1581 ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
1582 &open_int, &postcopy_support);
1583 if (ret < 0)
1584 goto out_free;
1585
1586 if (postcopy_support)
1587 flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
1588 }
1589
1590 if (rte_kvargs_count(kvlist, ETH_VHOST_VIRTIO_NET_F_HOST_TSO) == 1) {
1591 ret = rte_kvargs_process(kvlist,
1592 ETH_VHOST_VIRTIO_NET_F_HOST_TSO,
1593 &open_int, &tso);
1594 if (ret < 0)
1595 goto out_free;
1596
1597 if (tso == 0) {
1598 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO4);
1599 disable_flags |= (1ULL << VIRTIO_NET_F_HOST_TSO6);
1600 }
1601 }
1602
1603 if (rte_kvargs_count(kvlist, ETH_VHOST_LINEAR_BUF) == 1) {
1604 ret = rte_kvargs_process(kvlist,
1605 ETH_VHOST_LINEAR_BUF,
1606 &open_int, &linear_buf);
1607 if (ret < 0)
1608 goto out_free;
1609
1610 if (linear_buf == 1)
1611 flags |= RTE_VHOST_USER_LINEARBUF_SUPPORT;
1612 }
1613
1614 if (rte_kvargs_count(kvlist, ETH_VHOST_EXT_BUF) == 1) {
1615 ret = rte_kvargs_process(kvlist,
1616 ETH_VHOST_EXT_BUF,
1617 &open_int, &ext_buf);
1618 if (ret < 0)
1619 goto out_free;
1620
1621 if (ext_buf == 1)
1622 flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
1623 }
1624
1625 if (dev->device.numa_node == SOCKET_ID_ANY)
1626 dev->device.numa_node = rte_socket_id();
1627
1628 ret = eth_dev_vhost_create(dev, iface_name, queues,
1629 dev->device.numa_node, flags, disable_flags);
1630 if (ret == -1)
1631 VHOST_LOG(ERR, "Failed to create %s\n", name);
1632
1633out_free:
1634 rte_kvargs_free(kvlist);
1635 return ret;
1636}
1637
1638static int
1639rte_pmd_vhost_remove(struct rte_vdev_device *dev)
1640{
1641 const char *name;
1642 struct rte_eth_dev *eth_dev = NULL;
1643
1644 name = rte_vdev_device_name(dev);
1645 VHOST_LOG(INFO, "Un-Initializing pmd_vhost for %s\n", name);
1646
1647
1648 eth_dev = rte_eth_dev_allocated(name);
1649 if (eth_dev == NULL)
1650 return 0;
1651
1652 eth_dev_close(eth_dev);
1653 rte_eth_dev_release_port(eth_dev);
1654
1655 return 0;
1656}
1657
1658static struct rte_vdev_driver pmd_vhost_drv = {
1659 .probe = rte_pmd_vhost_probe,
1660 .remove = rte_pmd_vhost_remove,
1661};
1662
1663RTE_PMD_REGISTER_VDEV(net_vhost, pmd_vhost_drv);
1664RTE_PMD_REGISTER_ALIAS(net_vhost, eth_vhost);
1665RTE_PMD_REGISTER_PARAM_STRING(net_vhost,
1666 "iface=<ifc> "
1667 "queues=<int> "
1668 "client=<0|1> "
1669 "iommu-support=<0|1> "
1670 "postcopy-support=<0|1> "
1671 "tso=<0|1> "
1672 "linear-buffer=<0|1> "
1673 "ext-buffer=<0|1>");
1674