qemu/contrib/vhost-user-blk/vhost-user-blk.c
<<
>>
Prefs
   1/*
   2 * vhost-user-blk sample application
   3 *
   4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
   5 *
   6 * Author:
   7 *  Changpeng Liu <changpeng.liu@intel.com>
   8 *
   9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
  10 * implementation by:
  11 *  Felipe Franciosi <felipe@nutanix.com>
  12 *  Anthony Liguori <aliguori@us.ibm.com>
  13 *
  14 * This work is licensed under the terms of the GNU GPL, version 2 only.
  15 * See the COPYING file in the top-level directory.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "standard-headers/linux/virtio_blk.h"
  20#include "contrib/libvhost-user/libvhost-user-glib.h"
  21#include "contrib/libvhost-user/libvhost-user.h"
  22
  23#if defined(__linux__)
  24#include <linux/fs.h>
  25#include <sys/ioctl.h>
  26#endif
  27
  28struct virtio_blk_inhdr {
  29    unsigned char status;
  30};
  31
  32/* vhost user block device */
  33typedef struct VubDev {
  34    VugDev parent;
  35    int blk_fd;
  36    struct virtio_blk_config blkcfg;
  37    bool enable_ro;
  38    char *blk_name;
  39    GMainLoop *loop;
  40} VubDev;
  41
  42typedef struct VubReq {
  43    VuVirtqElement *elem;
  44    int64_t sector_num;
  45    size_t size;
  46    struct virtio_blk_inhdr *in;
  47    struct virtio_blk_outhdr *out;
  48    VubDev *vdev_blk;
  49    struct VuVirtq *vq;
  50} VubReq;
  51
  52/* refer util/iov.c */
  53static size_t vub_iov_size(const struct iovec *iov,
  54                              const unsigned int iov_cnt)
  55{
  56    size_t len;
  57    unsigned int i;
  58
  59    len = 0;
  60    for (i = 0; i < iov_cnt; i++) {
  61        len += iov[i].iov_len;
  62    }
  63    return len;
  64}
  65
  66static size_t vub_iov_to_buf(const struct iovec *iov,
  67                             const unsigned int iov_cnt, void *buf)
  68{
  69    size_t len;
  70    unsigned int i;
  71
  72    len = 0;
  73    for (i = 0; i < iov_cnt; i++) {
  74        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
  75        len += iov[i].iov_len;
  76    }
  77    return len;
  78}
  79
  80static void vub_panic_cb(VuDev *vu_dev, const char *buf)
  81{
  82    VugDev *gdev;
  83    VubDev *vdev_blk;
  84
  85    assert(vu_dev);
  86
  87    gdev = container_of(vu_dev, VugDev, parent);
  88    vdev_blk = container_of(gdev, VubDev, parent);
  89    if (buf) {
  90        g_warning("vu_panic: %s", buf);
  91    }
  92
  93    g_main_loop_quit(vdev_blk->loop);
  94}
  95
  96static void vub_req_complete(VubReq *req)
  97{
  98    VugDev *gdev = &req->vdev_blk->parent;
  99    VuDev *vu_dev = &gdev->parent;
 100
 101    /* IO size with 1 extra status byte */
 102    vu_queue_push(vu_dev, req->vq, req->elem,
 103                  req->size + 1);
 104    vu_queue_notify(vu_dev, req->vq);
 105
 106    if (req->elem) {
 107        free(req->elem);
 108    }
 109
 110    g_free(req);
 111}
 112
 113static int vub_open(const char *file_name, bool wce)
 114{
 115    int fd;
 116    int flags = O_RDWR;
 117
 118    if (!wce) {
 119        flags |= O_DIRECT;
 120    }
 121
 122    fd = open(file_name, flags);
 123    if (fd < 0) {
 124        fprintf(stderr, "Cannot open file %s, %s\n", file_name,
 125                strerror(errno));
 126        return -1;
 127    }
 128
 129    return fd;
 130}
 131
 132static ssize_t
 133vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 134{
 135    VubDev *vdev_blk = req->vdev_blk;
 136    ssize_t rc;
 137
 138    if (!iovcnt) {
 139        fprintf(stderr, "Invalid Read IOV count\n");
 140        return -1;
 141    }
 142
 143    req->size = vub_iov_size(iov, iovcnt);
 144    rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 145    if (rc < 0) {
 146        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 147                vdev_blk->blk_name, req->sector_num, req->size,
 148                strerror(errno));
 149        return -1;
 150    }
 151
 152    return rc;
 153}
 154
 155static ssize_t
 156vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 157{
 158    VubDev *vdev_blk = req->vdev_blk;
 159    ssize_t rc;
 160
 161    if (!iovcnt) {
 162        fprintf(stderr, "Invalid Write IOV count\n");
 163        return -1;
 164    }
 165
 166    req->size = vub_iov_size(iov, iovcnt);
 167    rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 168    if (rc < 0) {
 169        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 170                vdev_blk->blk_name, req->sector_num, req->size,
 171                strerror(errno));
 172        return -1;
 173    }
 174
 175    return rc;
 176}
 177
 178static int
 179vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
 180                         uint32_t type)
 181{
 182    struct virtio_blk_discard_write_zeroes *desc;
 183    ssize_t size;
 184    void *buf;
 185
 186    size = vub_iov_size(iov, iovcnt);
 187    if (size != sizeof(*desc)) {
 188        fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
 189        return -1;
 190    }
 191    buf = g_new0(char, size);
 192    vub_iov_to_buf(iov, iovcnt, buf);
 193
 194    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 195    VubDev *vdev_blk = req->vdev_blk;
 196    desc = (struct virtio_blk_discard_write_zeroes *)buf;
 197    uint64_t range[2] = { le64toh(desc->sector) << 9,
 198                          le32toh(desc->num_sectors) << 9 };
 199    if (type == VIRTIO_BLK_T_DISCARD) {
 200        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
 201            g_free(buf);
 202            return 0;
 203        }
 204    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
 205        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
 206            g_free(buf);
 207            return 0;
 208        }
 209    }
 210    #endif
 211
 212    g_free(buf);
 213    return -1;
 214}
 215
 216static void
 217vub_flush(VubReq *req)
 218{
 219    VubDev *vdev_blk = req->vdev_blk;
 220
 221    fdatasync(vdev_blk->blk_fd);
 222}
 223
 224static int vub_virtio_process_req(VubDev *vdev_blk,
 225                                     VuVirtq *vq)
 226{
 227    VugDev *gdev = &vdev_blk->parent;
 228    VuDev *vu_dev = &gdev->parent;
 229    VuVirtqElement *elem;
 230    uint32_t type;
 231    unsigned in_num;
 232    unsigned out_num;
 233    VubReq *req;
 234
 235    elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
 236    if (!elem) {
 237        return -1;
 238    }
 239
 240    /* refer to hw/block/virtio_blk.c */
 241    if (elem->out_num < 1 || elem->in_num < 1) {
 242        fprintf(stderr, "virtio-blk request missing headers\n");
 243        free(elem);
 244        return -1;
 245    }
 246
 247    req = g_new0(VubReq, 1);
 248    req->vdev_blk = vdev_blk;
 249    req->vq = vq;
 250    req->elem = elem;
 251
 252    in_num = elem->in_num;
 253    out_num = elem->out_num;
 254
 255    /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
 256    if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
 257        fprintf(stderr, "Invalid outhdr size\n");
 258        goto err;
 259    }
 260    req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
 261    out_num--;
 262
 263    if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
 264        fprintf(stderr, "Invalid inhdr size\n");
 265        goto err;
 266    }
 267    req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
 268    in_num--;
 269
 270    type = le32toh(req->out->type);
 271    switch (type & ~VIRTIO_BLK_T_BARRIER) {
 272    case VIRTIO_BLK_T_IN:
 273    case VIRTIO_BLK_T_OUT: {
 274        ssize_t ret = 0;
 275        bool is_write = type & VIRTIO_BLK_T_OUT;
 276        req->sector_num = le64toh(req->out->sector);
 277        if (is_write) {
 278            ret  = vub_writev(req, &elem->out_sg[1], out_num);
 279        } else {
 280            ret = vub_readv(req, &elem->in_sg[0], in_num);
 281        }
 282        if (ret >= 0) {
 283            req->in->status = VIRTIO_BLK_S_OK;
 284        } else {
 285            req->in->status = VIRTIO_BLK_S_IOERR;
 286        }
 287        vub_req_complete(req);
 288        break;
 289    }
 290    case VIRTIO_BLK_T_FLUSH:
 291        vub_flush(req);
 292        req->in->status = VIRTIO_BLK_S_OK;
 293        vub_req_complete(req);
 294        break;
 295    case VIRTIO_BLK_T_GET_ID: {
 296        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
 297                          VIRTIO_BLK_ID_BYTES);
 298        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 299        req->in->status = VIRTIO_BLK_S_OK;
 300        req->size = elem->in_sg[0].iov_len;
 301        vub_req_complete(req);
 302        break;
 303    }
 304    case VIRTIO_BLK_T_DISCARD:
 305    case VIRTIO_BLK_T_WRITE_ZEROES: {
 306        int rc;
 307        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
 308        if (rc == 0) {
 309            req->in->status = VIRTIO_BLK_S_OK;
 310        } else {
 311            req->in->status = VIRTIO_BLK_S_IOERR;
 312        }
 313        vub_req_complete(req);
 314        break;
 315    }
 316    default:
 317        req->in->status = VIRTIO_BLK_S_UNSUPP;
 318        vub_req_complete(req);
 319        break;
 320    }
 321
 322    return 0;
 323
 324err:
 325    free(elem);
 326    g_free(req);
 327    return -1;
 328}
 329
 330static void vub_process_vq(VuDev *vu_dev, int idx)
 331{
 332    VugDev *gdev;
 333    VubDev *vdev_blk;
 334    VuVirtq *vq;
 335    int ret;
 336
 337    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
 338        fprintf(stderr, "VQ Index out of range: %d\n", idx);
 339        vub_panic_cb(vu_dev, NULL);
 340        return;
 341    }
 342
 343    gdev = container_of(vu_dev, VugDev, parent);
 344    vdev_blk = container_of(gdev, VubDev, parent);
 345    assert(vdev_blk);
 346
 347    vq = vu_get_queue(vu_dev, idx);
 348    assert(vq);
 349
 350    while (1) {
 351        ret = vub_virtio_process_req(vdev_blk, vq);
 352        if (ret) {
 353            break;
 354        }
 355    }
 356}
 357
 358static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 359{
 360    VuVirtq *vq;
 361
 362    assert(vu_dev);
 363
 364    vq = vu_get_queue(vu_dev, idx);
 365    vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
 366}
 367
 368static uint64_t
 369vub_get_features(VuDev *dev)
 370{
 371    uint64_t features;
 372    VugDev *gdev;
 373    VubDev *vdev_blk;
 374
 375    gdev = container_of(dev, VugDev, parent);
 376    vdev_blk = container_of(gdev, VubDev, parent);
 377
 378    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
 379               1ull << VIRTIO_BLK_F_SEG_MAX |
 380               1ull << VIRTIO_BLK_F_TOPOLOGY |
 381               1ull << VIRTIO_BLK_F_BLK_SIZE |
 382               1ull << VIRTIO_BLK_F_FLUSH |
 383               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 384               1ull << VIRTIO_BLK_F_DISCARD |
 385               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
 386               #endif
 387               1ull << VIRTIO_BLK_F_CONFIG_WCE |
 388               1ull << VIRTIO_F_VERSION_1 |
 389               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
 390
 391    if (vdev_blk->enable_ro) {
 392        features |= 1ull << VIRTIO_BLK_F_RO;
 393    }
 394
 395    return features;
 396}
 397
 398static uint64_t
 399vub_get_protocol_features(VuDev *dev)
 400{
 401    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
 402}
 403
 404static int
 405vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 406{
 407    VugDev *gdev;
 408    VubDev *vdev_blk;
 409
 410    gdev = container_of(vu_dev, VugDev, parent);
 411    vdev_blk = container_of(gdev, VubDev, parent);
 412    memcpy(config, &vdev_blk->blkcfg, len);
 413
 414    return 0;
 415}
 416
 417static int
 418vub_set_config(VuDev *vu_dev, const uint8_t *data,
 419               uint32_t offset, uint32_t size, uint32_t flags)
 420{
 421    VugDev *gdev;
 422    VubDev *vdev_blk;
 423    uint8_t wce;
 424    int fd;
 425
 426    /* don't support live migration */
 427    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
 428        return -1;
 429    }
 430
 431    gdev = container_of(vu_dev, VugDev, parent);
 432    vdev_blk = container_of(gdev, VubDev, parent);
 433
 434    if (offset != offsetof(struct virtio_blk_config, wce) ||
 435        size != 1) {
 436        return -1;
 437    }
 438
 439    wce = *data;
 440    if (wce == vdev_blk->blkcfg.wce) {
 441        /* Do nothing as same with old configuration */
 442        return 0;
 443    }
 444
 445    vdev_blk->blkcfg.wce = wce;
 446    fprintf(stdout, "Write Cache Policy Changed\n");
 447    if (vdev_blk->blk_fd >= 0) {
 448        close(vdev_blk->blk_fd);
 449        vdev_blk->blk_fd = -1;
 450    }
 451
 452    fd = vub_open(vdev_blk->blk_name, wce);
 453    if (fd < 0) {
 454        fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
 455        vdev_blk->blk_fd = -1;
 456        return -1;
 457    }
 458    vdev_blk->blk_fd = fd;
 459
 460    return 0;
 461}
 462
 463static const VuDevIface vub_iface = {
 464    .get_features = vub_get_features,
 465    .queue_set_started = vub_queue_set_started,
 466    .get_protocol_features = vub_get_protocol_features,
 467    .get_config = vub_get_config,
 468    .set_config = vub_set_config,
 469};
 470
 471static int unix_sock_new(char *unix_fn)
 472{
 473    int sock;
 474    struct sockaddr_un un;
 475    size_t len;
 476
 477    assert(unix_fn);
 478
 479    sock = socket(AF_UNIX, SOCK_STREAM, 0);
 480    if (sock <= 0) {
 481        perror("socket");
 482        return -1;
 483    }
 484
 485    un.sun_family = AF_UNIX;
 486    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
 487    len = sizeof(un.sun_family) + strlen(un.sun_path);
 488
 489    (void)unlink(unix_fn);
 490    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
 491        perror("bind");
 492        goto fail;
 493    }
 494
 495    if (listen(sock, 1) < 0) {
 496        perror("listen");
 497        goto fail;
 498    }
 499
 500    return sock;
 501
 502fail:
 503    (void)close(sock);
 504
 505    return -1;
 506}
 507
 508static void vub_free(struct VubDev *vdev_blk)
 509{
 510    if (!vdev_blk) {
 511        return;
 512    }
 513
 514    g_main_loop_unref(vdev_blk->loop);
 515    if (vdev_blk->blk_fd >= 0) {
 516        close(vdev_blk->blk_fd);
 517    }
 518    g_free(vdev_blk);
 519}
 520
 521static uint32_t
 522vub_get_blocksize(int fd)
 523{
 524    uint32_t blocksize = 512;
 525
 526#if defined(__linux__) && defined(BLKSSZGET)
 527    if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
 528        return blocksize;
 529    }
 530#endif
 531
 532    return blocksize;
 533}
 534
 535static void
 536vub_initialize_config(int fd, struct virtio_blk_config *config)
 537{
 538    off64_t capacity;
 539
 540    capacity = lseek64(fd, 0, SEEK_END);
 541    config->capacity = capacity >> 9;
 542    config->blk_size = vub_get_blocksize(fd);
 543    config->size_max = 65536;
 544    config->seg_max = 128 - 2;
 545    config->min_io_size = 1;
 546    config->opt_io_size = 1;
 547    config->num_queues = 1;
 548    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 549    config->max_discard_sectors = 32768;
 550    config->max_discard_seg = 1;
 551    config->discard_sector_alignment = config->blk_size >> 9;
 552    config->max_write_zeroes_sectors = 32768;
 553    config->max_write_zeroes_seg = 1;
 554    #endif
 555}
 556
 557static VubDev *
 558vub_new(char *blk_file)
 559{
 560    VubDev *vdev_blk;
 561
 562    vdev_blk = g_new0(VubDev, 1);
 563    vdev_blk->loop = g_main_loop_new(NULL, FALSE);
 564    vdev_blk->blk_fd = vub_open(blk_file, 0);
 565    if (vdev_blk->blk_fd  < 0) {
 566        fprintf(stderr, "Error to open block device %s\n", blk_file);
 567        vub_free(vdev_blk);
 568        return NULL;
 569    }
 570    vdev_blk->enable_ro = false;
 571    vdev_blk->blkcfg.wce = 0;
 572    vdev_blk->blk_name = blk_file;
 573
 574    /* fill virtio_blk_config with block parameters */
 575    vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
 576
 577    return vdev_blk;
 578}
 579
 580int main(int argc, char **argv)
 581{
 582    int opt;
 583    char *unix_socket = NULL;
 584    char *blk_file = NULL;
 585    bool enable_ro = false;
 586    int lsock = -1, csock = -1;
 587    VubDev *vdev_blk = NULL;
 588
 589    while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
 590        switch (opt) {
 591        case 'b':
 592            blk_file = g_strdup(optarg);
 593            break;
 594        case 's':
 595            unix_socket = g_strdup(optarg);
 596            break;
 597        case 'r':
 598            enable_ro = true;
 599            break;
 600        case 'h':
 601        default:
 602            printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 603                   " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 604            return 0;
 605        }
 606    }
 607
 608    if (!unix_socket || !blk_file) {
 609        printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 610               " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 611        return -1;
 612    }
 613
 614    lsock = unix_sock_new(unix_socket);
 615    if (lsock < 0) {
 616        goto err;
 617    }
 618
 619    csock = accept(lsock, (void *)0, (void *)0);
 620    if (csock < 0) {
 621        fprintf(stderr, "Accept error %s\n", strerror(errno));
 622        goto err;
 623    }
 624
 625    vdev_blk = vub_new(blk_file);
 626    if (!vdev_blk) {
 627        goto err;
 628    }
 629    if (enable_ro) {
 630        vdev_blk->enable_ro = true;
 631    }
 632
 633    vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
 634
 635    g_main_loop_run(vdev_blk->loop);
 636
 637    vug_deinit(&vdev_blk->parent);
 638
 639err:
 640    vub_free(vdev_blk);
 641    if (csock >= 0) {
 642        close(csock);
 643    }
 644    if (lsock >= 0) {
 645        close(lsock);
 646    }
 647    g_free(unix_socket);
 648    g_free(blk_file);
 649
 650    return 0;
 651}
 652