qemu/hw/net/vhost_net.c
<<
>>
Prefs
   1/*
   2 * vhost-net support
   3 *
   4 * Copyright Red Hat, Inc. 2010
   5 *
   6 * Authors:
   7 *  Michael S. Tsirkin <mst@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2.  See
  10 * the COPYING file in the top-level directory.
  11 *
  12 * Contributions after 2012-01-13 are licensed under the terms of the
  13 * GNU GPL, version 2 or (at your option) any later version.
  14 */
  15
  16#include "qemu/osdep.h"
  17#include "net/net.h"
  18#include "net/tap.h"
  19#include "net/vhost-user.h"
  20#include "net/vhost-vdpa.h"
  21
  22#include "standard-headers/linux/vhost_types.h"
  23#include "hw/virtio/virtio-net.h"
  24#include "net/vhost_net.h"
  25#include "qapi/error.h"
  26#include "qemu/error-report.h"
  27#include "qemu/main-loop.h"
  28
  29#include <sys/socket.h>
  30#include <net/if.h>
  31#include <netinet/in.h>
  32
  33
  34#include "standard-headers/linux/virtio_ring.h"
  35#include "hw/virtio/vhost.h"
  36#include "hw/virtio/virtio-bus.h"
  37
  38
  39/* Features supported by host kernel. */
  40static const int kernel_feature_bits[] = {
  41    VIRTIO_F_NOTIFY_ON_EMPTY,
  42    VIRTIO_RING_F_INDIRECT_DESC,
  43    VIRTIO_RING_F_EVENT_IDX,
  44    VIRTIO_NET_F_MRG_RXBUF,
  45    VIRTIO_F_VERSION_1,
  46    VIRTIO_NET_F_MTU,
  47    VIRTIO_F_IOMMU_PLATFORM,
  48    VIRTIO_F_RING_PACKED,
  49    VIRTIO_NET_F_HASH_REPORT,
  50    VHOST_INVALID_FEATURE_BIT
  51};
  52
  53/* Features supported by others. */
  54static const int user_feature_bits[] = {
  55    VIRTIO_F_NOTIFY_ON_EMPTY,
  56    VIRTIO_RING_F_INDIRECT_DESC,
  57    VIRTIO_RING_F_EVENT_IDX,
  58
  59    VIRTIO_F_ANY_LAYOUT,
  60    VIRTIO_F_VERSION_1,
  61    VIRTIO_NET_F_CSUM,
  62    VIRTIO_NET_F_GUEST_CSUM,
  63    VIRTIO_NET_F_GSO,
  64    VIRTIO_NET_F_GUEST_TSO4,
  65    VIRTIO_NET_F_GUEST_TSO6,
  66    VIRTIO_NET_F_GUEST_ECN,
  67    VIRTIO_NET_F_GUEST_UFO,
  68    VIRTIO_NET_F_HOST_TSO4,
  69    VIRTIO_NET_F_HOST_TSO6,
  70    VIRTIO_NET_F_HOST_ECN,
  71    VIRTIO_NET_F_HOST_UFO,
  72    VIRTIO_NET_F_MRG_RXBUF,
  73    VIRTIO_NET_F_MTU,
  74    VIRTIO_F_IOMMU_PLATFORM,
  75    VIRTIO_F_RING_PACKED,
  76    VIRTIO_NET_F_RSS,
  77    VIRTIO_NET_F_HASH_REPORT,
  78
  79    /* This bit implies RARP isn't sent by QEMU out of band */
  80    VIRTIO_NET_F_GUEST_ANNOUNCE,
  81
  82    VIRTIO_NET_F_MQ,
  83
  84    VHOST_INVALID_FEATURE_BIT
  85};
  86
  87static const int *vhost_net_get_feature_bits(struct vhost_net *net)
  88{
  89    const int *feature_bits = 0;
  90
  91    switch (net->nc->info->type) {
  92    case NET_CLIENT_DRIVER_TAP:
  93        feature_bits = kernel_feature_bits;
  94        break;
  95    case NET_CLIENT_DRIVER_VHOST_USER:
  96        feature_bits = user_feature_bits;
  97        break;
  98#ifdef CONFIG_VHOST_NET_VDPA
  99    case NET_CLIENT_DRIVER_VHOST_VDPA:
 100        feature_bits = vdpa_feature_bits;
 101        break;
 102#endif
 103    default:
 104        error_report("Feature bits not defined for this type: %d",
 105                net->nc->info->type);
 106        break;
 107    }
 108
 109    return feature_bits;
 110}
 111
 112uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
 113{
 114    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
 115            features);
 116}
 117int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
 118                         uint32_t config_len)
 119{
 120    return vhost_dev_get_config(&net->dev, config, config_len, NULL);
 121}
 122int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
 123                         uint32_t offset, uint32_t size, uint32_t flags)
 124{
 125    return vhost_dev_set_config(&net->dev, data, offset, size, flags);
 126}
 127
 128void vhost_net_ack_features(struct vhost_net *net, uint64_t features)
 129{
 130    net->dev.acked_features = net->dev.backend_features;
 131    vhost_ack_features(&net->dev, vhost_net_get_feature_bits(net), features);
 132}
 133
 134uint64_t vhost_net_get_max_queues(VHostNetState *net)
 135{
 136    return net->dev.max_queues;
 137}
 138
 139uint64_t vhost_net_get_acked_features(VHostNetState *net)
 140{
 141    return net->dev.acked_features;
 142}
 143
 144static int vhost_net_get_fd(NetClientState *backend)
 145{
 146    switch (backend->info->type) {
 147    case NET_CLIENT_DRIVER_TAP:
 148        return tap_get_fd(backend);
 149    default:
 150        fprintf(stderr, "vhost-net requires tap backend\n");
 151        return -ENOSYS;
 152    }
 153}
 154
 155struct vhost_net *vhost_net_init(VhostNetOptions *options)
 156{
 157    int r;
 158    bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL;
 159    struct vhost_net *net = g_new0(struct vhost_net, 1);
 160    uint64_t features = 0;
 161    Error *local_err = NULL;
 162
 163    if (!options->net_backend) {
 164        fprintf(stderr, "vhost-net requires net backend to be setup\n");
 165        goto fail;
 166    }
 167    net->nc = options->net_backend;
 168    net->dev.nvqs = options->nvqs;
 169
 170    net->dev.max_queues = 1;
 171    net->dev.vqs = net->vqs;
 172
 173    if (backend_kernel) {
 174        r = vhost_net_get_fd(options->net_backend);
 175        if (r < 0) {
 176            goto fail;
 177        }
 178        net->dev.backend_features = qemu_has_vnet_hdr(options->net_backend)
 179            ? 0 : (1ULL << VHOST_NET_F_VIRTIO_NET_HDR);
 180        net->backend = r;
 181        net->dev.protocol_features = 0;
 182    } else {
 183        net->dev.backend_features = 0;
 184        net->dev.protocol_features = 0;
 185        net->backend = -1;
 186
 187        /* vhost-user needs vq_index to initiate a specific queue pair */
 188        net->dev.vq_index = net->nc->queue_index * net->dev.nvqs;
 189    }
 190
 191    r = vhost_dev_init(&net->dev, options->opaque,
 192                       options->backend_type, options->busyloop_timeout,
 193                       &local_err);
 194    if (r < 0) {
 195        error_report_err(local_err);
 196        goto fail;
 197    }
 198    if (backend_kernel) {
 199        if (!qemu_has_vnet_hdr_len(options->net_backend,
 200                               sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
 201            net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF);
 202        }
 203        if (~net->dev.features & net->dev.backend_features) {
 204            fprintf(stderr, "vhost lacks feature mask %" PRIu64
 205                   " for backend\n",
 206                   (uint64_t)(~net->dev.features & net->dev.backend_features));
 207            goto fail;
 208        }
 209    }
 210
 211    /* Set sane init value. Override when guest acks. */
 212#ifdef CONFIG_VHOST_NET_USER
 213    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 214        features = vhost_user_get_acked_features(net->nc);
 215        if (~net->dev.features & features) {
 216            fprintf(stderr, "vhost lacks feature mask %" PRIu64
 217                    " for backend\n",
 218                    (uint64_t)(~net->dev.features & features));
 219            goto fail;
 220        }
 221    }
 222#endif
 223
 224    vhost_net_ack_features(net, features);
 225
 226    return net;
 227
 228fail:
 229    vhost_dev_cleanup(&net->dev);
 230    g_free(net);
 231    return NULL;
 232}
 233
 234static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index,
 235                                   int vq_index_end)
 236{
 237    net->dev.vq_index = vq_index;
 238    net->dev.vq_index_end = vq_index_end;
 239}
 240
 241static int vhost_net_start_one(struct vhost_net *net,
 242                               VirtIODevice *dev)
 243{
 244    struct vhost_vring_file file = { };
 245    int r;
 246
 247    r = vhost_dev_enable_notifiers(&net->dev, dev);
 248    if (r < 0) {
 249        goto fail_notifiers;
 250    }
 251
 252    r = vhost_dev_start(&net->dev, dev);
 253    if (r < 0) {
 254        goto fail_start;
 255    }
 256
 257    if (net->nc->info->poll) {
 258        net->nc->info->poll(net->nc, false);
 259    }
 260
 261    if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 262        qemu_set_fd_handler(net->backend, NULL, NULL, NULL);
 263        file.fd = net->backend;
 264        for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
 265            if (!virtio_queue_enabled(dev, net->dev.vq_index +
 266                                      file.index)) {
 267                /* Queue might not be ready for start */
 268                continue;
 269            }
 270            r = vhost_net_set_backend(&net->dev, &file);
 271            if (r < 0) {
 272                r = -errno;
 273                goto fail;
 274            }
 275        }
 276    }
 277    return 0;
 278fail:
 279    file.fd = -1;
 280    if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 281        while (file.index-- > 0) {
 282            if (!virtio_queue_enabled(dev, net->dev.vq_index +
 283                                      file.index)) {
 284                /* Queue might not be ready for start */
 285                continue;
 286            }
 287            int r = vhost_net_set_backend(&net->dev, &file);
 288            assert(r >= 0);
 289        }
 290    }
 291    if (net->nc->info->poll) {
 292        net->nc->info->poll(net->nc, true);
 293    }
 294    vhost_dev_stop(&net->dev, dev);
 295fail_start:
 296    vhost_dev_disable_notifiers(&net->dev, dev);
 297fail_notifiers:
 298    return r;
 299}
 300
 301static void vhost_net_stop_one(struct vhost_net *net,
 302                               VirtIODevice *dev)
 303{
 304    struct vhost_vring_file file = { .fd = -1 };
 305
 306    if (net->nc->info->type == NET_CLIENT_DRIVER_TAP) {
 307        for (file.index = 0; file.index < net->dev.nvqs; ++file.index) {
 308            int r = vhost_net_set_backend(&net->dev, &file);
 309            assert(r >= 0);
 310        }
 311    }
 312    if (net->nc->info->poll) {
 313        net->nc->info->poll(net->nc, true);
 314    }
 315    vhost_dev_stop(&net->dev, dev);
 316    vhost_dev_disable_notifiers(&net->dev, dev);
 317}
 318
 319int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
 320                    int data_queue_pairs, int cvq)
 321{
 322    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
 323    VirtioBusState *vbus = VIRTIO_BUS(qbus);
 324    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
 325    int total_notifiers = data_queue_pairs * 2 + cvq;
 326    VirtIONet *n = VIRTIO_NET(dev);
 327    int nvhosts = data_queue_pairs + cvq;
 328    struct vhost_net *net;
 329    int r, e, i, index_end = data_queue_pairs * 2;
 330    NetClientState *peer;
 331
 332    if (cvq) {
 333        index_end += 1;
 334    }
 335
 336    if (!k->set_guest_notifiers) {
 337        error_report("binding does not support guest notifiers");
 338        return -ENOSYS;
 339    }
 340
 341    for (i = 0; i < nvhosts; i++) {
 342
 343        if (i < data_queue_pairs) {
 344            peer = qemu_get_peer(ncs, i);
 345        } else { /* Control Virtqueue */
 346            peer = qemu_get_peer(ncs, n->max_queue_pairs);
 347        }
 348
 349        net = get_vhost_net(peer);
 350        vhost_net_set_vq_index(net, i * 2, index_end);
 351
 352        /* Suppress the masking guest notifiers on vhost user
 353         * because vhost user doesn't interrupt masking/unmasking
 354         * properly.
 355         */
 356        if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
 357            dev->use_guest_notifier_mask = false;
 358        }
 359     }
 360
 361    r = k->set_guest_notifiers(qbus->parent, total_notifiers, true);
 362    if (r < 0) {
 363        error_report("Error binding guest notifier: %d", -r);
 364        goto err;
 365    }
 366
 367    for (i = 0; i < nvhosts; i++) {
 368        if (i < data_queue_pairs) {
 369            peer = qemu_get_peer(ncs, i);
 370        } else {
 371            peer = qemu_get_peer(ncs, n->max_queue_pairs);
 372        }
 373        r = vhost_net_start_one(get_vhost_net(peer), dev);
 374
 375        if (r < 0) {
 376            goto err_start;
 377        }
 378
 379        if (peer->vring_enable) {
 380            /* restore vring enable state */
 381            r = vhost_set_vring_enable(peer, peer->vring_enable);
 382
 383            if (r < 0) {
 384                goto err_start;
 385            }
 386        }
 387    }
 388
 389    return 0;
 390
 391err_start:
 392    while (--i >= 0) {
 393        peer = qemu_get_peer(ncs , i);
 394        vhost_net_stop_one(get_vhost_net(peer), dev);
 395    }
 396    e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
 397    if (e < 0) {
 398        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
 399        fflush(stderr);
 400    }
 401err:
 402    return r;
 403}
 404
 405void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
 406                    int data_queue_pairs, int cvq)
 407{
 408    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev)));
 409    VirtioBusState *vbus = VIRTIO_BUS(qbus);
 410    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
 411    VirtIONet *n = VIRTIO_NET(dev);
 412    NetClientState *peer;
 413    int total_notifiers = data_queue_pairs * 2 + cvq;
 414    int nvhosts = data_queue_pairs + cvq;
 415    int i, r;
 416
 417    for (i = 0; i < nvhosts; i++) {
 418        if (i < data_queue_pairs) {
 419            peer = qemu_get_peer(ncs, i);
 420        } else {
 421            peer = qemu_get_peer(ncs, n->max_queue_pairs);
 422        }
 423        vhost_net_stop_one(get_vhost_net(peer), dev);
 424    }
 425
 426    r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
 427    if (r < 0) {
 428        fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
 429        fflush(stderr);
 430    }
 431    assert(r >= 0);
 432}
 433
 434void vhost_net_cleanup(struct vhost_net *net)
 435{
 436    vhost_dev_cleanup(&net->dev);
 437}
 438
 439int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
 440{
 441    const VhostOps *vhost_ops = net->dev.vhost_ops;
 442
 443    assert(vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
 444    assert(vhost_ops->vhost_migration_done);
 445
 446    return vhost_ops->vhost_migration_done(&net->dev, mac_addr);
 447}
 448
 449bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
 450{
 451    return vhost_virtqueue_pending(&net->dev, idx);
 452}
 453
 454void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
 455                              int idx, bool mask)
 456{
 457    vhost_virtqueue_mask(&net->dev, dev, idx, mask);
 458}
 459
 460VHostNetState *get_vhost_net(NetClientState *nc)
 461{
 462    VHostNetState *vhost_net = 0;
 463
 464    if (!nc) {
 465        return 0;
 466    }
 467
 468    switch (nc->info->type) {
 469    case NET_CLIENT_DRIVER_TAP:
 470        vhost_net = tap_get_vhost_net(nc);
 471        break;
 472#ifdef CONFIG_VHOST_NET_USER
 473    case NET_CLIENT_DRIVER_VHOST_USER:
 474        vhost_net = vhost_user_get_vhost_net(nc);
 475        assert(vhost_net);
 476        break;
 477#endif
 478#ifdef CONFIG_VHOST_NET_VDPA
 479    case NET_CLIENT_DRIVER_VHOST_VDPA:
 480        vhost_net = vhost_vdpa_get_vhost_net(nc);
 481        assert(vhost_net);
 482        break;
 483#endif
 484    default:
 485        break;
 486    }
 487
 488    return vhost_net;
 489}
 490
 491int vhost_set_vring_enable(NetClientState *nc, int enable)
 492{
 493    VHostNetState *net = get_vhost_net(nc);
 494    const VhostOps *vhost_ops = net->dev.vhost_ops;
 495
 496    nc->vring_enable = enable;
 497
 498    if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
 499        return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
 500    }
 501
 502    return 0;
 503}
 504
 505int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu)
 506{
 507    const VhostOps *vhost_ops = net->dev.vhost_ops;
 508
 509    if (!vhost_ops->vhost_net_set_mtu) {
 510        return 0;
 511    }
 512
 513    return vhost_ops->vhost_net_set_mtu(&net->dev, mtu);
 514}
 515