qemu/net/vhost-vdpa.c
<<
>>
Prefs
   1/*
   2 * vhost-vdpa.c
   3 *
   4 * Copyright(c) 2017-2018 Intel Corporation.
   5 * Copyright(c) 2020 Red Hat, Inc.
   6 *
   7 * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8 * See the COPYING file in the top-level directory.
   9 *
  10 */
  11
  12#include "qemu/osdep.h"
  13#include "clients.h"
  14#include "hw/virtio/virtio-net.h"
  15#include "net/vhost_net.h"
  16#include "net/vhost-vdpa.h"
  17#include "hw/virtio/vhost-vdpa.h"
  18#include "qemu/config-file.h"
  19#include "qemu/error-report.h"
  20#include "qemu/log.h"
  21#include "qemu/memalign.h"
  22#include "qemu/option.h"
  23#include "qapi/error.h"
  24#include <linux/vhost.h>
  25#include <sys/ioctl.h>
  26#include <err.h>
  27#include "standard-headers/linux/virtio_net.h"
  28#include "monitor/monitor.h"
  29#include "hw/virtio/vhost.h"
  30
  31/* Todo:need to add the multiqueue support here */
  32typedef struct VhostVDPAState {
  33    NetClientState nc;
  34    struct vhost_vdpa vhost_vdpa;
  35    VHostNetState *vhost_net;
  36
  37    /* Control commands shadow buffers */
  38    void *cvq_cmd_out_buffer;
  39    virtio_net_ctrl_ack *status;
  40
  41    bool started;
  42} VhostVDPAState;
  43
  44const int vdpa_feature_bits[] = {
  45    VIRTIO_F_NOTIFY_ON_EMPTY,
  46    VIRTIO_RING_F_INDIRECT_DESC,
  47    VIRTIO_RING_F_EVENT_IDX,
  48    VIRTIO_F_ANY_LAYOUT,
  49    VIRTIO_F_VERSION_1,
  50    VIRTIO_NET_F_CSUM,
  51    VIRTIO_NET_F_GUEST_CSUM,
  52    VIRTIO_NET_F_GSO,
  53    VIRTIO_NET_F_GUEST_TSO4,
  54    VIRTIO_NET_F_GUEST_TSO6,
  55    VIRTIO_NET_F_GUEST_ECN,
  56    VIRTIO_NET_F_GUEST_UFO,
  57    VIRTIO_NET_F_HOST_TSO4,
  58    VIRTIO_NET_F_HOST_TSO6,
  59    VIRTIO_NET_F_HOST_ECN,
  60    VIRTIO_NET_F_HOST_UFO,
  61    VIRTIO_NET_F_MRG_RXBUF,
  62    VIRTIO_NET_F_MTU,
  63    VIRTIO_NET_F_CTRL_RX,
  64    VIRTIO_NET_F_CTRL_RX_EXTRA,
  65    VIRTIO_NET_F_CTRL_VLAN,
  66    VIRTIO_NET_F_CTRL_MAC_ADDR,
  67    VIRTIO_NET_F_RSS,
  68    VIRTIO_NET_F_MQ,
  69    VIRTIO_NET_F_CTRL_VQ,
  70    VIRTIO_F_IOMMU_PLATFORM,
  71    VIRTIO_F_RING_PACKED,
  72    VIRTIO_F_RING_RESET,
  73    VIRTIO_NET_F_RSS,
  74    VIRTIO_NET_F_HASH_REPORT,
  75    VIRTIO_NET_F_GUEST_ANNOUNCE,
  76    VIRTIO_NET_F_STATUS,
  77    VHOST_INVALID_FEATURE_BIT
  78};
  79
  80/** Supported device specific feature bits with SVQ */
  81static const uint64_t vdpa_svq_device_features =
  82    BIT_ULL(VIRTIO_NET_F_CSUM) |
  83    BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
  84    BIT_ULL(VIRTIO_NET_F_MTU) |
  85    BIT_ULL(VIRTIO_NET_F_MAC) |
  86    BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
  87    BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
  88    BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
  89    BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
  90    BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
  91    BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
  92    BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
  93    BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
  94    BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
  95    BIT_ULL(VIRTIO_NET_F_STATUS) |
  96    BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
  97    BIT_ULL(VIRTIO_NET_F_MQ) |
  98    BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
  99    BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
 100    BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
 101    BIT_ULL(VIRTIO_NET_F_STANDBY);
 102
 103VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 104{
 105    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 106    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 107    return s->vhost_net;
 108}
 109
 110static int vhost_vdpa_net_check_device_id(struct vhost_net *net)
 111{
 112    uint32_t device_id;
 113    int ret;
 114    struct vhost_dev *hdev;
 115
 116    hdev = (struct vhost_dev *)&net->dev;
 117    ret = hdev->vhost_ops->vhost_get_device_id(hdev, &device_id);
 118    if (device_id != VIRTIO_ID_NET) {
 119        return -ENOTSUP;
 120    }
 121    return ret;
 122}
 123
 124static int vhost_vdpa_add(NetClientState *ncs, void *be,
 125                          int queue_pair_index, int nvqs)
 126{
 127    VhostNetOptions options;
 128    struct vhost_net *net = NULL;
 129    VhostVDPAState *s;
 130    int ret;
 131
 132    options.backend_type = VHOST_BACKEND_TYPE_VDPA;
 133    assert(ncs->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 134    s = DO_UPCAST(VhostVDPAState, nc, ncs);
 135    options.net_backend = ncs;
 136    options.opaque      = be;
 137    options.busyloop_timeout = 0;
 138    options.nvqs = nvqs;
 139
 140    net = vhost_net_init(&options);
 141    if (!net) {
 142        error_report("failed to init vhost_net for queue");
 143        goto err_init;
 144    }
 145    s->vhost_net = net;
 146    ret = vhost_vdpa_net_check_device_id(net);
 147    if (ret) {
 148        goto err_check;
 149    }
 150    return 0;
 151err_check:
 152    vhost_net_cleanup(net);
 153    g_free(net);
 154err_init:
 155    return -1;
 156}
 157
 158static void vhost_vdpa_cleanup(NetClientState *nc)
 159{
 160    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 161    struct vhost_dev *dev = &s->vhost_net->dev;
 162
 163    qemu_vfree(s->cvq_cmd_out_buffer);
 164    qemu_vfree(s->status);
 165    if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
 166        g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
 167    }
 168    if (s->vhost_net) {
 169        vhost_net_cleanup(s->vhost_net);
 170        g_free(s->vhost_net);
 171        s->vhost_net = NULL;
 172    }
 173     if (s->vhost_vdpa.device_fd >= 0) {
 174        qemu_close(s->vhost_vdpa.device_fd);
 175        s->vhost_vdpa.device_fd = -1;
 176    }
 177}
 178
 179static bool vhost_vdpa_has_vnet_hdr(NetClientState *nc)
 180{
 181    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 182
 183    return true;
 184}
 185
 186static bool vhost_vdpa_has_ufo(NetClientState *nc)
 187{
 188    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 189    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 190    uint64_t features = 0;
 191    features |= (1ULL << VIRTIO_NET_F_HOST_UFO);
 192    features = vhost_net_get_features(s->vhost_net, features);
 193    return !!(features & (1ULL << VIRTIO_NET_F_HOST_UFO));
 194
 195}
 196
 197static bool vhost_vdpa_check_peer_type(NetClientState *nc, ObjectClass *oc,
 198                                       Error **errp)
 199{
 200    const char *driver = object_class_get_name(oc);
 201
 202    if (!g_str_has_prefix(driver, "virtio-net-")) {
 203        error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
 204        return false;
 205    }
 206
 207    return true;
 208}
 209
 210/** Dummy receive in case qemu falls back to userland tap networking */
 211static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf,
 212                                  size_t size)
 213{
 214    return size;
 215}
 216
 217static NetClientInfo net_vhost_vdpa_info = {
 218        .type = NET_CLIENT_DRIVER_VHOST_VDPA,
 219        .size = sizeof(VhostVDPAState),
 220        .receive = vhost_vdpa_receive,
 221        .cleanup = vhost_vdpa_cleanup,
 222        .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
 223        .has_ufo = vhost_vdpa_has_ufo,
 224        .check_peer_type = vhost_vdpa_check_peer_type,
 225};
 226
 227static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
 228{
 229    VhostIOVATree *tree = v->iova_tree;
 230    DMAMap needle = {
 231        /*
 232         * No need to specify size or to look for more translations since
 233         * this contiguous chunk was allocated by us.
 234         */
 235        .translated_addr = (hwaddr)(uintptr_t)addr,
 236    };
 237    const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
 238    int r;
 239
 240    if (unlikely(!map)) {
 241        error_report("Cannot locate expected map");
 242        return;
 243    }
 244
 245    r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
 246    if (unlikely(r != 0)) {
 247        error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
 248    }
 249
 250    vhost_iova_tree_remove(tree, *map);
 251}
 252
 253static size_t vhost_vdpa_net_cvq_cmd_len(void)
 254{
 255    /*
 256     * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
 257     * In buffer is always 1 byte, so it should fit here
 258     */
 259    return sizeof(struct virtio_net_ctrl_hdr) +
 260           2 * sizeof(struct virtio_net_ctrl_mac) +
 261           MAC_TABLE_ENTRIES * ETH_ALEN;
 262}
 263
 264static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
 265{
 266    return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
 267}
 268
 269/** Map CVQ buffer. */
 270static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size,
 271                                  bool write)
 272{
 273    DMAMap map = {};
 274    int r;
 275
 276    map.translated_addr = (hwaddr)(uintptr_t)buf;
 277    map.size = size - 1;
 278    map.perm = write ? IOMMU_RW : IOMMU_RO,
 279    r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
 280    if (unlikely(r != IOVA_OK)) {
 281        error_report("Cannot map injected element");
 282        return r;
 283    }
 284
 285    r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
 286                           !write);
 287    if (unlikely(r < 0)) {
 288        goto dma_map_err;
 289    }
 290
 291    return 0;
 292
 293dma_map_err:
 294    vhost_iova_tree_remove(v->iova_tree, map);
 295    return r;
 296}
 297
 298static int vhost_vdpa_net_cvq_start(NetClientState *nc)
 299{
 300    VhostVDPAState *s;
 301    int r;
 302
 303    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 304
 305    s = DO_UPCAST(VhostVDPAState, nc, nc);
 306    if (!s->vhost_vdpa.shadow_vqs_enabled) {
 307        return 0;
 308    }
 309
 310    r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer,
 311                               vhost_vdpa_net_cvq_cmd_page_len(), false);
 312    if (unlikely(r < 0)) {
 313        return r;
 314    }
 315
 316    r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->status,
 317                               vhost_vdpa_net_cvq_cmd_page_len(), true);
 318    if (unlikely(r < 0)) {
 319        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
 320    }
 321
 322    return r;
 323}
 324
 325static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
 326{
 327    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 328
 329    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 330
 331    if (s->vhost_vdpa.shadow_vqs_enabled) {
 332        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
 333        vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
 334    }
 335}
 336
 337static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len,
 338                                      size_t in_len)
 339{
 340    /* Buffers for the device */
 341    const struct iovec out = {
 342        .iov_base = s->cvq_cmd_out_buffer,
 343        .iov_len = out_len,
 344    };
 345    const struct iovec in = {
 346        .iov_base = s->status,
 347        .iov_len = sizeof(virtio_net_ctrl_ack),
 348    };
 349    VhostShadowVirtqueue *svq = g_ptr_array_index(s->vhost_vdpa.shadow_vqs, 0);
 350    int r;
 351
 352    r = vhost_svq_add(svq, &out, 1, &in, 1, NULL);
 353    if (unlikely(r != 0)) {
 354        if (unlikely(r == -ENOSPC)) {
 355            qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
 356                          __func__);
 357        }
 358        return r;
 359    }
 360
 361    /*
 362     * We can poll here since we've had BQL from the time we sent the
 363     * descriptor. Also, we need to take the answer before SVQ pulls by itself,
 364     * when BQL is released
 365     */
 366    return vhost_svq_poll(svq);
 367}
 368
 369static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class,
 370                                       uint8_t cmd, const void *data,
 371                                       size_t data_size)
 372{
 373    const struct virtio_net_ctrl_hdr ctrl = {
 374        .class = class,
 375        .cmd = cmd,
 376    };
 377
 378    assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl));
 379
 380    memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl));
 381    memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size);
 382
 383    return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size,
 384                                  sizeof(virtio_net_ctrl_ack));
 385}
 386
 387static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n)
 388{
 389    uint64_t features = n->parent_obj.guest_features;
 390    if (features & BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR)) {
 391        ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC,
 392                                                  VIRTIO_NET_CTRL_MAC_ADDR_SET,
 393                                                  n->mac, sizeof(n->mac));
 394        if (unlikely(dev_written < 0)) {
 395            return dev_written;
 396        }
 397
 398        return *s->status != VIRTIO_NET_OK;
 399    }
 400
 401    return 0;
 402}
 403
 404static int vhost_vdpa_net_load_mq(VhostVDPAState *s,
 405                                  const VirtIONet *n)
 406{
 407    struct virtio_net_ctrl_mq mq;
 408    uint64_t features = n->parent_obj.guest_features;
 409    ssize_t dev_written;
 410
 411    if (!(features & BIT_ULL(VIRTIO_NET_F_MQ))) {
 412        return 0;
 413    }
 414
 415    mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs);
 416    dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ,
 417                                          VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq,
 418                                          sizeof(mq));
 419    if (unlikely(dev_written < 0)) {
 420        return dev_written;
 421    }
 422
 423    return *s->status != VIRTIO_NET_OK;
 424}
 425
 426static int vhost_vdpa_net_load(NetClientState *nc)
 427{
 428    VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
 429    struct vhost_vdpa *v = &s->vhost_vdpa;
 430    const VirtIONet *n;
 431    int r;
 432
 433    assert(nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 434
 435    if (!v->shadow_vqs_enabled) {
 436        return 0;
 437    }
 438
 439    n = VIRTIO_NET(v->dev->vdev);
 440    r = vhost_vdpa_net_load_mac(s, n);
 441    if (unlikely(r < 0)) {
 442        return r;
 443    }
 444    r = vhost_vdpa_net_load_mq(s, n);
 445    if (unlikely(r)) {
 446        return r;
 447    }
 448
 449    return 0;
 450}
 451
 452static NetClientInfo net_vhost_vdpa_cvq_info = {
 453    .type = NET_CLIENT_DRIVER_VHOST_VDPA,
 454    .size = sizeof(VhostVDPAState),
 455    .receive = vhost_vdpa_receive,
 456    .start = vhost_vdpa_net_cvq_start,
 457    .load = vhost_vdpa_net_load,
 458    .stop = vhost_vdpa_net_cvq_stop,
 459    .cleanup = vhost_vdpa_cleanup,
 460    .has_vnet_hdr = vhost_vdpa_has_vnet_hdr,
 461    .has_ufo = vhost_vdpa_has_ufo,
 462    .check_peer_type = vhost_vdpa_check_peer_type,
 463};
 464
 465/**
 466 * Validate and copy control virtqueue commands.
 467 *
 468 * Following QEMU guidelines, we offer a copy of the buffers to the device to
 469 * prevent TOCTOU bugs.
 470 */
 471static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
 472                                            VirtQueueElement *elem,
 473                                            void *opaque)
 474{
 475    VhostVDPAState *s = opaque;
 476    size_t in_len;
 477    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
 478    /* Out buffer sent to both the vdpa device and the device model */
 479    struct iovec out = {
 480        .iov_base = s->cvq_cmd_out_buffer,
 481    };
 482    /* in buffer used for device model */
 483    const struct iovec in = {
 484        .iov_base = &status,
 485        .iov_len = sizeof(status),
 486    };
 487    ssize_t dev_written = -EINVAL;
 488
 489    out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0,
 490                             s->cvq_cmd_out_buffer,
 491                             vhost_vdpa_net_cvq_cmd_len());
 492    dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status));
 493    if (unlikely(dev_written < 0)) {
 494        goto out;
 495    }
 496
 497    if (unlikely(dev_written < sizeof(status))) {
 498        error_report("Insufficient written data (%zu)", dev_written);
 499        goto out;
 500    }
 501
 502    if (*s->status != VIRTIO_NET_OK) {
 503        return VIRTIO_NET_ERR;
 504    }
 505
 506    status = VIRTIO_NET_ERR;
 507    virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, &out, 1);
 508    if (status != VIRTIO_NET_OK) {
 509        error_report("Bad CVQ processing in model");
 510    }
 511
 512out:
 513    in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
 514                          sizeof(status));
 515    if (unlikely(in_len < sizeof(status))) {
 516        error_report("Bad device CVQ written length");
 517    }
 518    vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
 519    g_free(elem);
 520    return dev_written < 0 ? dev_written : 0;
 521}
 522
 523static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
 524    .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
 525};
 526
 527static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
 528                                           const char *device,
 529                                           const char *name,
 530                                           int vdpa_device_fd,
 531                                           int queue_pair_index,
 532                                           int nvqs,
 533                                           bool is_datapath,
 534                                           bool svq,
 535                                           VhostIOVATree *iova_tree)
 536{
 537    NetClientState *nc = NULL;
 538    VhostVDPAState *s;
 539    int ret = 0;
 540    assert(name);
 541    if (is_datapath) {
 542        nc = qemu_new_net_client(&net_vhost_vdpa_info, peer, device,
 543                                 name);
 544    } else {
 545        nc = qemu_new_net_control_client(&net_vhost_vdpa_cvq_info, peer,
 546                                         device, name);
 547    }
 548    qemu_set_info_str(nc, TYPE_VHOST_VDPA);
 549    s = DO_UPCAST(VhostVDPAState, nc, nc);
 550
 551    s->vhost_vdpa.device_fd = vdpa_device_fd;
 552    s->vhost_vdpa.index = queue_pair_index;
 553    s->vhost_vdpa.shadow_vqs_enabled = svq;
 554    s->vhost_vdpa.iova_tree = iova_tree;
 555    if (!is_datapath) {
 556        s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
 557                                            vhost_vdpa_net_cvq_cmd_page_len());
 558        memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
 559        s->status = qemu_memalign(qemu_real_host_page_size(),
 560                                  vhost_vdpa_net_cvq_cmd_page_len());
 561        memset(s->status, 0, vhost_vdpa_net_cvq_cmd_page_len());
 562
 563        s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
 564        s->vhost_vdpa.shadow_vq_ops_opaque = s;
 565    }
 566    ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
 567    if (ret) {
 568        qemu_del_net_client(nc);
 569        return NULL;
 570    }
 571    return nc;
 572}
 573
 574static int vhost_vdpa_get_iova_range(int fd,
 575                                     struct vhost_vdpa_iova_range *iova_range)
 576{
 577    int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
 578
 579    return ret < 0 ? -errno : 0;
 580}
 581
 582static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
 583{
 584    int ret = ioctl(fd, VHOST_GET_FEATURES, features);
 585    if (unlikely(ret < 0)) {
 586        error_setg_errno(errp, errno,
 587                         "Fail to query features from vhost-vDPA device");
 588    }
 589    return ret;
 590}
 591
 592static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
 593                                          int *has_cvq, Error **errp)
 594{
 595    unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
 596    g_autofree struct vhost_vdpa_config *config = NULL;
 597    __virtio16 *max_queue_pairs;
 598    int ret;
 599
 600    if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
 601        *has_cvq = 1;
 602    } else {
 603        *has_cvq = 0;
 604    }
 605
 606    if (features & (1 << VIRTIO_NET_F_MQ)) {
 607        config = g_malloc0(config_size + sizeof(*max_queue_pairs));
 608        config->off = offsetof(struct virtio_net_config, max_virtqueue_pairs);
 609        config->len = sizeof(*max_queue_pairs);
 610
 611        ret = ioctl(fd, VHOST_VDPA_GET_CONFIG, config);
 612        if (ret) {
 613            error_setg(errp, "Fail to get config from vhost-vDPA device");
 614            return -ret;
 615        }
 616
 617        max_queue_pairs = (__virtio16 *)&config->buf;
 618
 619        return lduw_le_p(max_queue_pairs);
 620    }
 621
 622    return 1;
 623}
 624
 625int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
 626                        NetClientState *peer, Error **errp)
 627{
 628    const NetdevVhostVDPAOptions *opts;
 629    uint64_t features;
 630    int vdpa_device_fd;
 631    g_autofree NetClientState **ncs = NULL;
 632    g_autoptr(VhostIOVATree) iova_tree = NULL;
 633    NetClientState *nc;
 634    int queue_pairs, r, i = 0, has_cvq = 0;
 635
 636    assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
 637    opts = &netdev->u.vhost_vdpa;
 638    if (!opts->has_vhostdev && !opts->has_vhostfd) {
 639        error_setg(errp,
 640                   "vhost-vdpa: neither vhostdev= nor vhostfd= was specified");
 641        return -1;
 642    }
 643
 644    if (opts->has_vhostdev && opts->has_vhostfd) {
 645        error_setg(errp,
 646                   "vhost-vdpa: vhostdev= and vhostfd= are mutually exclusive");
 647        return -1;
 648    }
 649
 650    if (opts->has_vhostdev) {
 651        vdpa_device_fd = qemu_open(opts->vhostdev, O_RDWR, errp);
 652        if (vdpa_device_fd == -1) {
 653            return -errno;
 654        }
 655    } else {
 656        /* has_vhostfd */
 657        vdpa_device_fd = monitor_fd_param(monitor_cur(), opts->vhostfd, errp);
 658        if (vdpa_device_fd == -1) {
 659            error_prepend(errp, "vhost-vdpa: unable to parse vhostfd: ");
 660            return -1;
 661        }
 662    }
 663
 664    r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
 665    if (unlikely(r < 0)) {
 666        goto err;
 667    }
 668
 669    queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
 670                                                 &has_cvq, errp);
 671    if (queue_pairs < 0) {
 672        qemu_close(vdpa_device_fd);
 673        return queue_pairs;
 674    }
 675
 676    if (opts->x_svq) {
 677        struct vhost_vdpa_iova_range iova_range;
 678
 679        uint64_t invalid_dev_features =
 680            features & ~vdpa_svq_device_features &
 681            /* Transport are all accepted at this point */
 682            ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
 683                             VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
 684
 685        if (invalid_dev_features) {
 686            error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
 687                       invalid_dev_features);
 688            goto err_svq;
 689        }
 690
 691        vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
 692        iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
 693    }
 694
 695    ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
 696
 697    for (i = 0; i < queue_pairs; i++) {
 698        ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
 699                                     vdpa_device_fd, i, 2, true, opts->x_svq,
 700                                     iova_tree);
 701        if (!ncs[i])
 702            goto err;
 703    }
 704
 705    if (has_cvq) {
 706        nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
 707                                 vdpa_device_fd, i, 1, false,
 708                                 opts->x_svq, iova_tree);
 709        if (!nc)
 710            goto err;
 711    }
 712
 713    /* iova_tree ownership belongs to last NetClientState */
 714    g_steal_pointer(&iova_tree);
 715    return 0;
 716
 717err:
 718    if (i) {
 719        for (i--; i >= 0; i--) {
 720            qemu_del_net_client(ncs[i]);
 721        }
 722    }
 723
 724err_svq:
 725    qemu_close(vdpa_device_fd);
 726
 727    return -1;
 728}
 729