qemu/contrib/vhost-user-blk/vhost-user-blk.c
<<
>>
Prefs
   1/*
   2 * vhost-user-blk sample application
   3 *
   4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
   5 *
   6 * Author:
   7 *  Changpeng Liu <changpeng.liu@intel.com>
   8 *
   9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
  10 * implementation by:
  11 *  Felipe Franciosi <felipe@nutanix.com>
  12 *  Anthony Liguori <aliguori@us.ibm.com>
  13 *
  14 * This work is licensed under the terms of the GNU GPL, version 2 only.
  15 * See the COPYING file in the top-level directory.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "standard-headers/linux/virtio_blk.h"
  20#include "libvhost-user-glib.h"
  21
  22#if defined(__linux__)
  23#include <linux/fs.h>
  24#include <sys/ioctl.h>
  25#endif
  26
  27enum {
  28    VHOST_USER_BLK_MAX_QUEUES = 8,
  29};
  30
  31struct virtio_blk_inhdr {
  32    unsigned char status;
  33};
  34
  35/* vhost user block device */
  36typedef struct VubDev {
  37    VugDev parent;
  38    int blk_fd;
  39    struct virtio_blk_config blkcfg;
  40    bool enable_ro;
  41    char *blk_name;
  42    GMainLoop *loop;
  43} VubDev;
  44
  45typedef struct VubReq {
  46    VuVirtqElement *elem;
  47    int64_t sector_num;
  48    size_t size;
  49    struct virtio_blk_inhdr *in;
  50    struct virtio_blk_outhdr *out;
  51    VubDev *vdev_blk;
  52    struct VuVirtq *vq;
  53} VubReq;
  54
  55/* refer util/iov.c */
  56static size_t vub_iov_size(const struct iovec *iov,
  57                              const unsigned int iov_cnt)
  58{
  59    size_t len;
  60    unsigned int i;
  61
  62    len = 0;
  63    for (i = 0; i < iov_cnt; i++) {
  64        len += iov[i].iov_len;
  65    }
  66    return len;
  67}
  68
  69static size_t vub_iov_to_buf(const struct iovec *iov,
  70                             const unsigned int iov_cnt, void *buf)
  71{
  72    size_t len;
  73    unsigned int i;
  74
  75    len = 0;
  76    for (i = 0; i < iov_cnt; i++) {
  77        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
  78        len += iov[i].iov_len;
  79    }
  80    return len;
  81}
  82
  83static void vub_panic_cb(VuDev *vu_dev, const char *buf)
  84{
  85    VugDev *gdev;
  86    VubDev *vdev_blk;
  87
  88    assert(vu_dev);
  89
  90    gdev = container_of(vu_dev, VugDev, parent);
  91    vdev_blk = container_of(gdev, VubDev, parent);
  92    if (buf) {
  93        g_warning("vu_panic: %s", buf);
  94    }
  95
  96    g_main_loop_quit(vdev_blk->loop);
  97}
  98
  99static void vub_req_complete(VubReq *req)
 100{
 101    VugDev *gdev = &req->vdev_blk->parent;
 102    VuDev *vu_dev = &gdev->parent;
 103
 104    /* IO size with 1 extra status byte */
 105    vu_queue_push(vu_dev, req->vq, req->elem,
 106                  req->size + 1);
 107    vu_queue_notify(vu_dev, req->vq);
 108
 109    g_free(req->elem);
 110    g_free(req);
 111}
 112
 113static int vub_open(const char *file_name, bool wce)
 114{
 115    int fd;
 116    int flags = O_RDWR;
 117
 118    if (!wce) {
 119        flags |= O_DIRECT;
 120    }
 121
 122    fd = open(file_name, flags);
 123    if (fd < 0) {
 124        fprintf(stderr, "Cannot open file %s, %s\n", file_name,
 125                strerror(errno));
 126        return -1;
 127    }
 128
 129    return fd;
 130}
 131
 132static ssize_t
 133vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 134{
 135    VubDev *vdev_blk = req->vdev_blk;
 136    ssize_t rc;
 137
 138    if (!iovcnt) {
 139        fprintf(stderr, "Invalid Read IOV count\n");
 140        return -1;
 141    }
 142
 143    req->size = vub_iov_size(iov, iovcnt);
 144    rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 145    if (rc < 0) {
 146        fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
 147                vdev_blk->blk_name, req->sector_num, req->size,
 148                strerror(errno));
 149        return -1;
 150    }
 151
 152    return rc;
 153}
 154
 155static ssize_t
 156vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 157{
 158    VubDev *vdev_blk = req->vdev_blk;
 159    ssize_t rc;
 160
 161    if (!iovcnt) {
 162        fprintf(stderr, "Invalid Write IOV count\n");
 163        return -1;
 164    }
 165
 166    req->size = vub_iov_size(iov, iovcnt);
 167    rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 168    if (rc < 0) {
 169        fprintf(stderr, "%s, Sector %"PRIu64", Size %zu failed with %s\n",
 170                vdev_blk->blk_name, req->sector_num, req->size,
 171                strerror(errno));
 172        return -1;
 173    }
 174
 175    return rc;
 176}
 177
 178static int
 179vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
 180                         uint32_t type)
 181{
 182    struct virtio_blk_discard_write_zeroes *desc;
 183    ssize_t size;
 184    void *buf;
 185
 186    size = vub_iov_size(iov, iovcnt);
 187    if (size != sizeof(*desc)) {
 188        fprintf(stderr, "Invalid size %zd, expect %zd\n", size, sizeof(*desc));
 189        return -1;
 190    }
 191    buf = g_new0(char, size);
 192    vub_iov_to_buf(iov, iovcnt, buf);
 193
 194    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 195    VubDev *vdev_blk = req->vdev_blk;
 196    desc = (struct virtio_blk_discard_write_zeroes *)buf;
 197    uint64_t range[2] = { le64toh(desc->sector) << 9,
 198                          le32toh(desc->num_sectors) << 9 };
 199    if (type == VIRTIO_BLK_T_DISCARD) {
 200        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
 201            g_free(buf);
 202            return 0;
 203        }
 204    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
 205        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
 206            g_free(buf);
 207            return 0;
 208        }
 209    }
 210    #endif
 211
 212    g_free(buf);
 213    return -1;
 214}
 215
 216static void
 217vub_flush(VubReq *req)
 218{
 219    VubDev *vdev_blk = req->vdev_blk;
 220
 221    fdatasync(vdev_blk->blk_fd);
 222}
 223
 224static int vub_virtio_process_req(VubDev *vdev_blk,
 225                                     VuVirtq *vq)
 226{
 227    VugDev *gdev = &vdev_blk->parent;
 228    VuDev *vu_dev = &gdev->parent;
 229    VuVirtqElement *elem;
 230    uint32_t type;
 231    unsigned in_num;
 232    unsigned out_num;
 233    VubReq *req;
 234
 235    elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
 236    if (!elem) {
 237        return -1;
 238    }
 239
 240    /* refer to hw/block/virtio_blk.c */
 241    if (elem->out_num < 1 || elem->in_num < 1) {
 242        fprintf(stderr, "virtio-blk request missing headers\n");
 243        g_free(elem);
 244        return -1;
 245    }
 246
 247    req = g_new0(VubReq, 1);
 248    req->vdev_blk = vdev_blk;
 249    req->vq = vq;
 250    req->elem = elem;
 251
 252    in_num = elem->in_num;
 253    out_num = elem->out_num;
 254
 255    /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
 256    if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
 257        fprintf(stderr, "Invalid outhdr size\n");
 258        goto err;
 259    }
 260    req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
 261    out_num--;
 262
 263    if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
 264        fprintf(stderr, "Invalid inhdr size\n");
 265        goto err;
 266    }
 267    req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
 268    in_num--;
 269
 270    type = le32toh(req->out->type);
 271    switch (type & ~VIRTIO_BLK_T_BARRIER) {
 272    case VIRTIO_BLK_T_IN:
 273    case VIRTIO_BLK_T_OUT: {
 274        ssize_t ret = 0;
 275        bool is_write = type & VIRTIO_BLK_T_OUT;
 276        req->sector_num = le64toh(req->out->sector);
 277        if (is_write) {
 278            ret  = vub_writev(req, &elem->out_sg[1], out_num);
 279        } else {
 280            ret = vub_readv(req, &elem->in_sg[0], in_num);
 281        }
 282        if (ret >= 0) {
 283            req->in->status = VIRTIO_BLK_S_OK;
 284        } else {
 285            req->in->status = VIRTIO_BLK_S_IOERR;
 286        }
 287        vub_req_complete(req);
 288        break;
 289    }
 290    case VIRTIO_BLK_T_FLUSH:
 291        vub_flush(req);
 292        req->in->status = VIRTIO_BLK_S_OK;
 293        vub_req_complete(req);
 294        break;
 295    case VIRTIO_BLK_T_GET_ID: {
 296        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
 297                          VIRTIO_BLK_ID_BYTES);
 298        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 299        req->in->status = VIRTIO_BLK_S_OK;
 300        req->size = elem->in_sg[0].iov_len;
 301        vub_req_complete(req);
 302        break;
 303    }
 304    case VIRTIO_BLK_T_DISCARD:
 305    case VIRTIO_BLK_T_WRITE_ZEROES: {
 306        int rc;
 307        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
 308        if (rc == 0) {
 309            req->in->status = VIRTIO_BLK_S_OK;
 310        } else {
 311            req->in->status = VIRTIO_BLK_S_IOERR;
 312        }
 313        vub_req_complete(req);
 314        break;
 315    }
 316    default:
 317        req->in->status = VIRTIO_BLK_S_UNSUPP;
 318        vub_req_complete(req);
 319        break;
 320    }
 321
 322    return 0;
 323
 324err:
 325    g_free(elem);
 326    g_free(req);
 327    return -1;
 328}
 329
 330static void vub_process_vq(VuDev *vu_dev, int idx)
 331{
 332    VugDev *gdev;
 333    VubDev *vdev_blk;
 334    VuVirtq *vq;
 335    int ret;
 336
 337    gdev = container_of(vu_dev, VugDev, parent);
 338    vdev_blk = container_of(gdev, VubDev, parent);
 339    assert(vdev_blk);
 340
 341    vq = vu_get_queue(vu_dev, idx);
 342    assert(vq);
 343
 344    while (1) {
 345        ret = vub_virtio_process_req(vdev_blk, vq);
 346        if (ret) {
 347            break;
 348        }
 349    }
 350}
 351
 352static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 353{
 354    VuVirtq *vq;
 355
 356    assert(vu_dev);
 357
 358    vq = vu_get_queue(vu_dev, idx);
 359    vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
 360}
 361
 362static uint64_t
 363vub_get_features(VuDev *dev)
 364{
 365    uint64_t features;
 366    VugDev *gdev;
 367    VubDev *vdev_blk;
 368
 369    gdev = container_of(dev, VugDev, parent);
 370    vdev_blk = container_of(gdev, VubDev, parent);
 371
 372    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
 373               1ull << VIRTIO_BLK_F_SEG_MAX |
 374               1ull << VIRTIO_BLK_F_TOPOLOGY |
 375               1ull << VIRTIO_BLK_F_BLK_SIZE |
 376               1ull << VIRTIO_BLK_F_FLUSH |
 377               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 378               1ull << VIRTIO_BLK_F_DISCARD |
 379               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
 380               #endif
 381               1ull << VIRTIO_BLK_F_CONFIG_WCE;
 382
 383    if (vdev_blk->enable_ro) {
 384        features |= 1ull << VIRTIO_BLK_F_RO;
 385    }
 386
 387    return features;
 388}
 389
 390static uint64_t
 391vub_get_protocol_features(VuDev *dev)
 392{
 393    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
 394           1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
 395}
 396
 397static int
 398vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 399{
 400    VugDev *gdev;
 401    VubDev *vdev_blk;
 402
 403    if (len > sizeof(struct virtio_blk_config)) {
 404        return -1;
 405    }
 406
 407    gdev = container_of(vu_dev, VugDev, parent);
 408    vdev_blk = container_of(gdev, VubDev, parent);
 409    memcpy(config, &vdev_blk->blkcfg, len);
 410
 411    return 0;
 412}
 413
 414static int
 415vub_set_config(VuDev *vu_dev, const uint8_t *data,
 416               uint32_t offset, uint32_t size, uint32_t flags)
 417{
 418    VugDev *gdev;
 419    VubDev *vdev_blk;
 420    uint8_t wce;
 421    int fd;
 422
 423    /* don't support live migration */
 424    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
 425        return -1;
 426    }
 427
 428    gdev = container_of(vu_dev, VugDev, parent);
 429    vdev_blk = container_of(gdev, VubDev, parent);
 430
 431    if (offset != offsetof(struct virtio_blk_config, wce) ||
 432        size != 1) {
 433        return -1;
 434    }
 435
 436    wce = *data;
 437    if (wce == vdev_blk->blkcfg.wce) {
 438        /* Do nothing as same with old configuration */
 439        return 0;
 440    }
 441
 442    vdev_blk->blkcfg.wce = wce;
 443    fprintf(stdout, "Write Cache Policy Changed\n");
 444    if (vdev_blk->blk_fd >= 0) {
 445        close(vdev_blk->blk_fd);
 446        vdev_blk->blk_fd = -1;
 447    }
 448
 449    fd = vub_open(vdev_blk->blk_name, wce);
 450    if (fd < 0) {
 451        fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
 452        vdev_blk->blk_fd = -1;
 453        return -1;
 454    }
 455    vdev_blk->blk_fd = fd;
 456
 457    return 0;
 458}
 459
 460static const VuDevIface vub_iface = {
 461    .get_features = vub_get_features,
 462    .queue_set_started = vub_queue_set_started,
 463    .get_protocol_features = vub_get_protocol_features,
 464    .get_config = vub_get_config,
 465    .set_config = vub_set_config,
 466};
 467
 468static int unix_sock_new(char *unix_fn)
 469{
 470    int sock;
 471    struct sockaddr_un un;
 472    size_t len;
 473
 474    assert(unix_fn);
 475
 476    sock = socket(AF_UNIX, SOCK_STREAM, 0);
 477    if (sock < 0) {
 478        perror("socket");
 479        return -1;
 480    }
 481
 482    un.sun_family = AF_UNIX;
 483    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
 484    len = sizeof(un.sun_family) + strlen(un.sun_path);
 485
 486    (void)unlink(unix_fn);
 487    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
 488        perror("bind");
 489        goto fail;
 490    }
 491
 492    if (listen(sock, 1) < 0) {
 493        perror("listen");
 494        goto fail;
 495    }
 496
 497    return sock;
 498
 499fail:
 500    (void)close(sock);
 501
 502    return -1;
 503}
 504
 505static void vub_free(struct VubDev *vdev_blk)
 506{
 507    if (!vdev_blk) {
 508        return;
 509    }
 510
 511    g_main_loop_unref(vdev_blk->loop);
 512    if (vdev_blk->blk_fd >= 0) {
 513        close(vdev_blk->blk_fd);
 514    }
 515    g_free(vdev_blk);
 516}
 517
 518static uint32_t
 519vub_get_blocksize(int fd)
 520{
 521    uint32_t blocksize = 512;
 522
 523#if defined(__linux__) && defined(BLKSSZGET)
 524    if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
 525        return blocksize;
 526    }
 527#endif
 528
 529    return blocksize;
 530}
 531
 532static void
 533vub_initialize_config(int fd, struct virtio_blk_config *config)
 534{
 535    off64_t capacity;
 536
 537    capacity = lseek64(fd, 0, SEEK_END);
 538    config->capacity = capacity >> 9;
 539    config->blk_size = vub_get_blocksize(fd);
 540    config->size_max = 65536;
 541    config->seg_max = 128 - 2;
 542    config->min_io_size = 1;
 543    config->opt_io_size = 1;
 544    config->num_queues = 1;
 545    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 546    config->max_discard_sectors = 32768;
 547    config->max_discard_seg = 1;
 548    config->discard_sector_alignment = config->blk_size >> 9;
 549    config->max_write_zeroes_sectors = 32768;
 550    config->max_write_zeroes_seg = 1;
 551    #endif
 552}
 553
 554static VubDev *
 555vub_new(char *blk_file)
 556{
 557    VubDev *vdev_blk;
 558
 559    vdev_blk = g_new0(VubDev, 1);
 560    vdev_blk->loop = g_main_loop_new(NULL, FALSE);
 561    vdev_blk->blk_fd = vub_open(blk_file, 0);
 562    if (vdev_blk->blk_fd  < 0) {
 563        fprintf(stderr, "Error to open block device %s\n", blk_file);
 564        vub_free(vdev_blk);
 565        return NULL;
 566    }
 567    vdev_blk->enable_ro = false;
 568    vdev_blk->blkcfg.wce = 0;
 569    vdev_blk->blk_name = blk_file;
 570
 571    /* fill virtio_blk_config with block parameters */
 572    vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
 573
 574    return vdev_blk;
 575}
 576
 577static int opt_fdnum = -1;
 578static char *opt_socket_path;
 579static char *opt_blk_file;
 580static gboolean opt_print_caps;
 581static gboolean opt_read_only;
 582
 583static GOptionEntry entries[] = {
 584    { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
 585      "Print capabilities", NULL },
 586    { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
 587      "Use inherited fd socket", "FDNUM" },
 588    { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
 589      "Use UNIX socket path", "PATH" },
 590    {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
 591     "block device or file path", "PATH"},
 592    { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
 593      "Enable read-only", NULL },
 594    { NULL, },
 595};
 596
 597int main(int argc, char **argv)
 598{
 599    int lsock = -1, csock = -1;
 600    VubDev *vdev_blk = NULL;
 601    GError *error = NULL;
 602    GOptionContext *context;
 603
 604    context = g_option_context_new(NULL);
 605    g_option_context_add_main_entries(context, entries, NULL);
 606    if (!g_option_context_parse(context, &argc, &argv, &error)) {
 607        g_printerr("Option parsing failed: %s\n", error->message);
 608        exit(EXIT_FAILURE);
 609    }
 610    if (opt_print_caps) {
 611        g_print("{\n");
 612        g_print("  \"type\": \"block\",\n");
 613        g_print("  \"features\": [\n");
 614        g_print("    \"read-only\",\n");
 615        g_print("    \"blk-file\"\n");
 616        g_print("  ]\n");
 617        g_print("}\n");
 618        exit(EXIT_SUCCESS);
 619    }
 620
 621    if (!opt_blk_file) {
 622        g_print("%s\n", g_option_context_get_help(context, true, NULL));
 623        exit(EXIT_FAILURE);
 624    }
 625
 626    if (opt_socket_path) {
 627        lsock = unix_sock_new(opt_socket_path);
 628        if (lsock < 0) {
 629            exit(EXIT_FAILURE);
 630        }
 631    } else if (opt_fdnum < 0) {
 632        g_print("%s\n", g_option_context_get_help(context, true, NULL));
 633        exit(EXIT_FAILURE);
 634    } else {
 635        lsock = opt_fdnum;
 636    }
 637
 638    csock = accept(lsock, NULL, NULL);
 639    if (csock < 0) {
 640        g_printerr("Accept error %s\n", strerror(errno));
 641        exit(EXIT_FAILURE);
 642    }
 643
 644    vdev_blk = vub_new(opt_blk_file);
 645    if (!vdev_blk) {
 646        exit(EXIT_FAILURE);
 647    }
 648    if (opt_read_only) {
 649        vdev_blk->enable_ro = true;
 650    }
 651
 652    if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
 653                  vub_panic_cb, &vub_iface)) {
 654        g_printerr("Failed to initialize libvhost-user-glib\n");
 655        exit(EXIT_FAILURE);
 656    }
 657
 658    g_main_loop_run(vdev_blk->loop);
 659    g_main_loop_unref(vdev_blk->loop);
 660    g_option_context_free(context);
 661    vug_deinit(&vdev_blk->parent);
 662    vub_free(vdev_blk);
 663    if (csock >= 0) {
 664        close(csock);
 665    }
 666    if (lsock >= 0) {
 667        close(lsock);
 668    }
 669    g_free(opt_socket_path);
 670    g_free(opt_blk_file);
 671
 672    return 0;
 673}
 674