qemu/contrib/vhost-user-blk/vhost-user-blk.c
<<
>>
Prefs
   1/*
   2 * vhost-user-blk sample application
   3 *
   4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
   5 *
   6 * Author:
   7 *  Changpeng Liu <changpeng.liu@intel.com>
   8 *
   9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
  10 * implementation by:
  11 *  Felipe Franciosi <felipe@nutanix.com>
  12 *  Anthony Liguori <aliguori@us.ibm.com>
  13 *
  14 * This work is licensed under the terms of the GNU GPL, version 2 only.
  15 * See the COPYING file in the top-level directory.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "standard-headers/linux/virtio_blk.h"
  20#include "contrib/libvhost-user/libvhost-user-glib.h"
  21#include "contrib/libvhost-user/libvhost-user.h"
  22
  23#if defined(__linux__)
  24#include <linux/fs.h>
  25#include <sys/ioctl.h>
  26#endif
  27
  28enum {
  29    VHOST_USER_BLK_MAX_QUEUES = 8,
  30};
  31
  32struct virtio_blk_inhdr {
  33    unsigned char status;
  34};
  35
  36/* vhost user block device */
  37typedef struct VubDev {
  38    VugDev parent;
  39    int blk_fd;
  40    struct virtio_blk_config blkcfg;
  41    bool enable_ro;
  42    char *blk_name;
  43    GMainLoop *loop;
  44} VubDev;
  45
  46typedef struct VubReq {
  47    VuVirtqElement *elem;
  48    int64_t sector_num;
  49    size_t size;
  50    struct virtio_blk_inhdr *in;
  51    struct virtio_blk_outhdr *out;
  52    VubDev *vdev_blk;
  53    struct VuVirtq *vq;
  54} VubReq;
  55
  56/* refer util/iov.c */
  57static size_t vub_iov_size(const struct iovec *iov,
  58                              const unsigned int iov_cnt)
  59{
  60    size_t len;
  61    unsigned int i;
  62
  63    len = 0;
  64    for (i = 0; i < iov_cnt; i++) {
  65        len += iov[i].iov_len;
  66    }
  67    return len;
  68}
  69
  70static size_t vub_iov_to_buf(const struct iovec *iov,
  71                             const unsigned int iov_cnt, void *buf)
  72{
  73    size_t len;
  74    unsigned int i;
  75
  76    len = 0;
  77    for (i = 0; i < iov_cnt; i++) {
  78        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
  79        len += iov[i].iov_len;
  80    }
  81    return len;
  82}
  83
  84static void vub_panic_cb(VuDev *vu_dev, const char *buf)
  85{
  86    VugDev *gdev;
  87    VubDev *vdev_blk;
  88
  89    assert(vu_dev);
  90
  91    gdev = container_of(vu_dev, VugDev, parent);
  92    vdev_blk = container_of(gdev, VubDev, parent);
  93    if (buf) {
  94        g_warning("vu_panic: %s", buf);
  95    }
  96
  97    g_main_loop_quit(vdev_blk->loop);
  98}
  99
 100static void vub_req_complete(VubReq *req)
 101{
 102    VugDev *gdev = &req->vdev_blk->parent;
 103    VuDev *vu_dev = &gdev->parent;
 104
 105    /* IO size with 1 extra status byte */
 106    vu_queue_push(vu_dev, req->vq, req->elem,
 107                  req->size + 1);
 108    vu_queue_notify(vu_dev, req->vq);
 109
 110    if (req->elem) {
 111        free(req->elem);
 112    }
 113
 114    g_free(req);
 115}
 116
 117static int vub_open(const char *file_name, bool wce)
 118{
 119    int fd;
 120    int flags = O_RDWR;
 121
 122    if (!wce) {
 123        flags |= O_DIRECT;
 124    }
 125
 126    fd = open(file_name, flags);
 127    if (fd < 0) {
 128        fprintf(stderr, "Cannot open file %s, %s\n", file_name,
 129                strerror(errno));
 130        return -1;
 131    }
 132
 133    return fd;
 134}
 135
 136static ssize_t
 137vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 138{
 139    VubDev *vdev_blk = req->vdev_blk;
 140    ssize_t rc;
 141
 142    if (!iovcnt) {
 143        fprintf(stderr, "Invalid Read IOV count\n");
 144        return -1;
 145    }
 146
 147    req->size = vub_iov_size(iov, iovcnt);
 148    rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 149    if (rc < 0) {
 150        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 151                vdev_blk->blk_name, req->sector_num, req->size,
 152                strerror(errno));
 153        return -1;
 154    }
 155
 156    return rc;
 157}
 158
 159static ssize_t
 160vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 161{
 162    VubDev *vdev_blk = req->vdev_blk;
 163    ssize_t rc;
 164
 165    if (!iovcnt) {
 166        fprintf(stderr, "Invalid Write IOV count\n");
 167        return -1;
 168    }
 169
 170    req->size = vub_iov_size(iov, iovcnt);
 171    rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 172    if (rc < 0) {
 173        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 174                vdev_blk->blk_name, req->sector_num, req->size,
 175                strerror(errno));
 176        return -1;
 177    }
 178
 179    return rc;
 180}
 181
 182static int
 183vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
 184                         uint32_t type)
 185{
 186    struct virtio_blk_discard_write_zeroes *desc;
 187    ssize_t size;
 188    void *buf;
 189
 190    size = vub_iov_size(iov, iovcnt);
 191    if (size != sizeof(*desc)) {
 192        fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
 193        return -1;
 194    }
 195    buf = g_new0(char, size);
 196    vub_iov_to_buf(iov, iovcnt, buf);
 197
 198    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 199    VubDev *vdev_blk = req->vdev_blk;
 200    desc = (struct virtio_blk_discard_write_zeroes *)buf;
 201    uint64_t range[2] = { le64toh(desc->sector) << 9,
 202                          le32toh(desc->num_sectors) << 9 };
 203    if (type == VIRTIO_BLK_T_DISCARD) {
 204        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
 205            g_free(buf);
 206            return 0;
 207        }
 208    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
 209        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
 210            g_free(buf);
 211            return 0;
 212        }
 213    }
 214    #endif
 215
 216    g_free(buf);
 217    return -1;
 218}
 219
 220static void
 221vub_flush(VubReq *req)
 222{
 223    VubDev *vdev_blk = req->vdev_blk;
 224
 225    fdatasync(vdev_blk->blk_fd);
 226}
 227
 228static int vub_virtio_process_req(VubDev *vdev_blk,
 229                                     VuVirtq *vq)
 230{
 231    VugDev *gdev = &vdev_blk->parent;
 232    VuDev *vu_dev = &gdev->parent;
 233    VuVirtqElement *elem;
 234    uint32_t type;
 235    unsigned in_num;
 236    unsigned out_num;
 237    VubReq *req;
 238
 239    elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
 240    if (!elem) {
 241        return -1;
 242    }
 243
 244    /* refer to hw/block/virtio_blk.c */
 245    if (elem->out_num < 1 || elem->in_num < 1) {
 246        fprintf(stderr, "virtio-blk request missing headers\n");
 247        free(elem);
 248        return -1;
 249    }
 250
 251    req = g_new0(VubReq, 1);
 252    req->vdev_blk = vdev_blk;
 253    req->vq = vq;
 254    req->elem = elem;
 255
 256    in_num = elem->in_num;
 257    out_num = elem->out_num;
 258
 259    /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
 260    if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
 261        fprintf(stderr, "Invalid outhdr size\n");
 262        goto err;
 263    }
 264    req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
 265    out_num--;
 266
 267    if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
 268        fprintf(stderr, "Invalid inhdr size\n");
 269        goto err;
 270    }
 271    req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
 272    in_num--;
 273
 274    type = le32toh(req->out->type);
 275    switch (type & ~VIRTIO_BLK_T_BARRIER) {
 276    case VIRTIO_BLK_T_IN:
 277    case VIRTIO_BLK_T_OUT: {
 278        ssize_t ret = 0;
 279        bool is_write = type & VIRTIO_BLK_T_OUT;
 280        req->sector_num = le64toh(req->out->sector);
 281        if (is_write) {
 282            ret  = vub_writev(req, &elem->out_sg[1], out_num);
 283        } else {
 284            ret = vub_readv(req, &elem->in_sg[0], in_num);
 285        }
 286        if (ret >= 0) {
 287            req->in->status = VIRTIO_BLK_S_OK;
 288        } else {
 289            req->in->status = VIRTIO_BLK_S_IOERR;
 290        }
 291        vub_req_complete(req);
 292        break;
 293    }
 294    case VIRTIO_BLK_T_FLUSH:
 295        vub_flush(req);
 296        req->in->status = VIRTIO_BLK_S_OK;
 297        vub_req_complete(req);
 298        break;
 299    case VIRTIO_BLK_T_GET_ID: {
 300        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
 301                          VIRTIO_BLK_ID_BYTES);
 302        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 303        req->in->status = VIRTIO_BLK_S_OK;
 304        req->size = elem->in_sg[0].iov_len;
 305        vub_req_complete(req);
 306        break;
 307    }
 308    case VIRTIO_BLK_T_DISCARD:
 309    case VIRTIO_BLK_T_WRITE_ZEROES: {
 310        int rc;
 311        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
 312        if (rc == 0) {
 313            req->in->status = VIRTIO_BLK_S_OK;
 314        } else {
 315            req->in->status = VIRTIO_BLK_S_IOERR;
 316        }
 317        vub_req_complete(req);
 318        break;
 319    }
 320    default:
 321        req->in->status = VIRTIO_BLK_S_UNSUPP;
 322        vub_req_complete(req);
 323        break;
 324    }
 325
 326    return 0;
 327
 328err:
 329    free(elem);
 330    g_free(req);
 331    return -1;
 332}
 333
 334static void vub_process_vq(VuDev *vu_dev, int idx)
 335{
 336    VugDev *gdev;
 337    VubDev *vdev_blk;
 338    VuVirtq *vq;
 339    int ret;
 340
 341    gdev = container_of(vu_dev, VugDev, parent);
 342    vdev_blk = container_of(gdev, VubDev, parent);
 343    assert(vdev_blk);
 344
 345    vq = vu_get_queue(vu_dev, idx);
 346    assert(vq);
 347
 348    while (1) {
 349        ret = vub_virtio_process_req(vdev_blk, vq);
 350        if (ret) {
 351            break;
 352        }
 353    }
 354}
 355
 356static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 357{
 358    VuVirtq *vq;
 359
 360    assert(vu_dev);
 361
 362    vq = vu_get_queue(vu_dev, idx);
 363    vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
 364}
 365
 366static uint64_t
 367vub_get_features(VuDev *dev)
 368{
 369    uint64_t features;
 370    VugDev *gdev;
 371    VubDev *vdev_blk;
 372
 373    gdev = container_of(dev, VugDev, parent);
 374    vdev_blk = container_of(gdev, VubDev, parent);
 375
 376    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
 377               1ull << VIRTIO_BLK_F_SEG_MAX |
 378               1ull << VIRTIO_BLK_F_TOPOLOGY |
 379               1ull << VIRTIO_BLK_F_BLK_SIZE |
 380               1ull << VIRTIO_BLK_F_FLUSH |
 381               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 382               1ull << VIRTIO_BLK_F_DISCARD |
 383               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
 384               #endif
 385               1ull << VIRTIO_BLK_F_CONFIG_WCE |
 386               1ull << VIRTIO_F_VERSION_1 |
 387               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
 388
 389    if (vdev_blk->enable_ro) {
 390        features |= 1ull << VIRTIO_BLK_F_RO;
 391    }
 392
 393    return features;
 394}
 395
 396static uint64_t
 397vub_get_protocol_features(VuDev *dev)
 398{
 399    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
 400           1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
 401}
 402
 403static int
 404vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 405{
 406    VugDev *gdev;
 407    VubDev *vdev_blk;
 408
 409    gdev = container_of(vu_dev, VugDev, parent);
 410    vdev_blk = container_of(gdev, VubDev, parent);
 411    memcpy(config, &vdev_blk->blkcfg, len);
 412
 413    return 0;
 414}
 415
 416static int
 417vub_set_config(VuDev *vu_dev, const uint8_t *data,
 418               uint32_t offset, uint32_t size, uint32_t flags)
 419{
 420    VugDev *gdev;
 421    VubDev *vdev_blk;
 422    uint8_t wce;
 423    int fd;
 424
 425    /* don't support live migration */
 426    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
 427        return -1;
 428    }
 429
 430    gdev = container_of(vu_dev, VugDev, parent);
 431    vdev_blk = container_of(gdev, VubDev, parent);
 432
 433    if (offset != offsetof(struct virtio_blk_config, wce) ||
 434        size != 1) {
 435        return -1;
 436    }
 437
 438    wce = *data;
 439    if (wce == vdev_blk->blkcfg.wce) {
 440        /* Do nothing as same with old configuration */
 441        return 0;
 442    }
 443
 444    vdev_blk->blkcfg.wce = wce;
 445    fprintf(stdout, "Write Cache Policy Changed\n");
 446    if (vdev_blk->blk_fd >= 0) {
 447        close(vdev_blk->blk_fd);
 448        vdev_blk->blk_fd = -1;
 449    }
 450
 451    fd = vub_open(vdev_blk->blk_name, wce);
 452    if (fd < 0) {
 453        fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
 454        vdev_blk->blk_fd = -1;
 455        return -1;
 456    }
 457    vdev_blk->blk_fd = fd;
 458
 459    return 0;
 460}
 461
 462static const VuDevIface vub_iface = {
 463    .get_features = vub_get_features,
 464    .queue_set_started = vub_queue_set_started,
 465    .get_protocol_features = vub_get_protocol_features,
 466    .get_config = vub_get_config,
 467    .set_config = vub_set_config,
 468};
 469
 470static int unix_sock_new(char *unix_fn)
 471{
 472    int sock;
 473    struct sockaddr_un un;
 474    size_t len;
 475
 476    assert(unix_fn);
 477
 478    sock = socket(AF_UNIX, SOCK_STREAM, 0);
 479    if (sock <= 0) {
 480        perror("socket");
 481        return -1;
 482    }
 483
 484    un.sun_family = AF_UNIX;
 485    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
 486    len = sizeof(un.sun_family) + strlen(un.sun_path);
 487
 488    (void)unlink(unix_fn);
 489    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
 490        perror("bind");
 491        goto fail;
 492    }
 493
 494    if (listen(sock, 1) < 0) {
 495        perror("listen");
 496        goto fail;
 497    }
 498
 499    return sock;
 500
 501fail:
 502    (void)close(sock);
 503
 504    return -1;
 505}
 506
 507static void vub_free(struct VubDev *vdev_blk)
 508{
 509    if (!vdev_blk) {
 510        return;
 511    }
 512
 513    g_main_loop_unref(vdev_blk->loop);
 514    if (vdev_blk->blk_fd >= 0) {
 515        close(vdev_blk->blk_fd);
 516    }
 517    g_free(vdev_blk);
 518}
 519
 520static uint32_t
 521vub_get_blocksize(int fd)
 522{
 523    uint32_t blocksize = 512;
 524
 525#if defined(__linux__) && defined(BLKSSZGET)
 526    if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
 527        return blocksize;
 528    }
 529#endif
 530
 531    return blocksize;
 532}
 533
 534static void
 535vub_initialize_config(int fd, struct virtio_blk_config *config)
 536{
 537    off64_t capacity;
 538
 539    capacity = lseek64(fd, 0, SEEK_END);
 540    config->capacity = capacity >> 9;
 541    config->blk_size = vub_get_blocksize(fd);
 542    config->size_max = 65536;
 543    config->seg_max = 128 - 2;
 544    config->min_io_size = 1;
 545    config->opt_io_size = 1;
 546    config->num_queues = 1;
 547    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 548    config->max_discard_sectors = 32768;
 549    config->max_discard_seg = 1;
 550    config->discard_sector_alignment = config->blk_size >> 9;
 551    config->max_write_zeroes_sectors = 32768;
 552    config->max_write_zeroes_seg = 1;
 553    #endif
 554}
 555
 556static VubDev *
 557vub_new(char *blk_file)
 558{
 559    VubDev *vdev_blk;
 560
 561    vdev_blk = g_new0(VubDev, 1);
 562    vdev_blk->loop = g_main_loop_new(NULL, FALSE);
 563    vdev_blk->blk_fd = vub_open(blk_file, 0);
 564    if (vdev_blk->blk_fd  < 0) {
 565        fprintf(stderr, "Error to open block device %s\n", blk_file);
 566        vub_free(vdev_blk);
 567        return NULL;
 568    }
 569    vdev_blk->enable_ro = false;
 570    vdev_blk->blkcfg.wce = 0;
 571    vdev_blk->blk_name = blk_file;
 572
 573    /* fill virtio_blk_config with block parameters */
 574    vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
 575
 576    return vdev_blk;
 577}
 578
 579int main(int argc, char **argv)
 580{
 581    int opt;
 582    char *unix_socket = NULL;
 583    char *blk_file = NULL;
 584    bool enable_ro = false;
 585    int lsock = -1, csock = -1;
 586    VubDev *vdev_blk = NULL;
 587
 588    while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
 589        switch (opt) {
 590        case 'b':
 591            blk_file = g_strdup(optarg);
 592            break;
 593        case 's':
 594            unix_socket = g_strdup(optarg);
 595            break;
 596        case 'r':
 597            enable_ro = true;
 598            break;
 599        case 'h':
 600        default:
 601            printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 602                   " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 603            return 0;
 604        }
 605    }
 606
 607    if (!unix_socket || !blk_file) {
 608        printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 609               " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 610        return -1;
 611    }
 612
 613    lsock = unix_sock_new(unix_socket);
 614    if (lsock < 0) {
 615        goto err;
 616    }
 617
 618    csock = accept(lsock, (void *)0, (void *)0);
 619    if (csock < 0) {
 620        fprintf(stderr, "Accept error %s\n", strerror(errno));
 621        goto err;
 622    }
 623
 624    vdev_blk = vub_new(blk_file);
 625    if (!vdev_blk) {
 626        goto err;
 627    }
 628    if (enable_ro) {
 629        vdev_blk->enable_ro = true;
 630    }
 631
 632    if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
 633                  vub_panic_cb, &vub_iface)) {
 634        fprintf(stderr, "Failed to initialized libvhost-user-glib\n");
 635        goto err;
 636    }
 637
 638    g_main_loop_run(vdev_blk->loop);
 639
 640    vug_deinit(&vdev_blk->parent);
 641
 642err:
 643    vub_free(vdev_blk);
 644    if (csock >= 0) {
 645        close(csock);
 646    }
 647    if (lsock >= 0) {
 648        close(lsock);
 649    }
 650    g_free(unix_socket);
 651    g_free(blk_file);
 652
 653    return 0;
 654}
 655