qemu/contrib/vhost-user-blk/vhost-user-blk.c
<<
>>
Prefs
   1/*
   2 * vhost-user-blk sample application
   3 *
   4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
   5 *
   6 * Author:
   7 *  Changpeng Liu <changpeng.liu@intel.com>
   8 *
   9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
  10 * implementation by:
  11 *  Felipe Franciosi <felipe@nutanix.com>
  12 *  Anthony Liguori <aliguori@us.ibm.com>
  13 *
  14 * This work is licensed under the terms of the GNU GPL, version 2 only.
  15 * See the COPYING file in the top-level directory.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "standard-headers/linux/virtio_blk.h"
  20#include "libvhost-user-glib.h"
  21
  22#if defined(__linux__)
  23#include <linux/fs.h>
  24#include <sys/ioctl.h>
  25#endif
  26
  27enum {
  28    VHOST_USER_BLK_MAX_QUEUES = 8,
  29};
  30
  31struct virtio_blk_inhdr {
  32    unsigned char status;
  33};
  34
  35/* vhost user block device */
  36typedef struct VubDev {
  37    VugDev parent;
  38    int blk_fd;
  39    struct virtio_blk_config blkcfg;
  40    bool enable_ro;
  41    char *blk_name;
  42    GMainLoop *loop;
  43} VubDev;
  44
  45typedef struct VubReq {
  46    VuVirtqElement *elem;
  47    int64_t sector_num;
  48    size_t size;
  49    struct virtio_blk_inhdr *in;
  50    struct virtio_blk_outhdr *out;
  51    VubDev *vdev_blk;
  52    struct VuVirtq *vq;
  53} VubReq;
  54
  55/* refer util/iov.c */
  56static size_t vub_iov_size(const struct iovec *iov,
  57                              const unsigned int iov_cnt)
  58{
  59    size_t len;
  60    unsigned int i;
  61
  62    len = 0;
  63    for (i = 0; i < iov_cnt; i++) {
  64        len += iov[i].iov_len;
  65    }
  66    return len;
  67}
  68
  69static size_t vub_iov_to_buf(const struct iovec *iov,
  70                             const unsigned int iov_cnt, void *buf)
  71{
  72    size_t len;
  73    unsigned int i;
  74
  75    len = 0;
  76    for (i = 0; i < iov_cnt; i++) {
  77        memcpy(buf + len,  iov[i].iov_base, iov[i].iov_len);
  78        len += iov[i].iov_len;
  79    }
  80    return len;
  81}
  82
  83static void vub_panic_cb(VuDev *vu_dev, const char *buf)
  84{
  85    VugDev *gdev;
  86    VubDev *vdev_blk;
  87
  88    assert(vu_dev);
  89
  90    gdev = container_of(vu_dev, VugDev, parent);
  91    vdev_blk = container_of(gdev, VubDev, parent);
  92    if (buf) {
  93        g_warning("vu_panic: %s", buf);
  94    }
  95
  96    g_main_loop_quit(vdev_blk->loop);
  97}
  98
  99static void vub_req_complete(VubReq *req)
 100{
 101    VugDev *gdev = &req->vdev_blk->parent;
 102    VuDev *vu_dev = &gdev->parent;
 103
 104    /* IO size with 1 extra status byte */
 105    vu_queue_push(vu_dev, req->vq, req->elem,
 106                  req->size + 1);
 107    vu_queue_notify(vu_dev, req->vq);
 108
 109    if (req->elem) {
 110        free(req->elem);
 111    }
 112
 113    g_free(req);
 114}
 115
 116static int vub_open(const char *file_name, bool wce)
 117{
 118    int fd;
 119    int flags = O_RDWR;
 120
 121    if (!wce) {
 122        flags |= O_DIRECT;
 123    }
 124
 125    fd = open(file_name, flags);
 126    if (fd < 0) {
 127        fprintf(stderr, "Cannot open file %s, %s\n", file_name,
 128                strerror(errno));
 129        return -1;
 130    }
 131
 132    return fd;
 133}
 134
 135static ssize_t
 136vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 137{
 138    VubDev *vdev_blk = req->vdev_blk;
 139    ssize_t rc;
 140
 141    if (!iovcnt) {
 142        fprintf(stderr, "Invalid Read IOV count\n");
 143        return -1;
 144    }
 145
 146    req->size = vub_iov_size(iov, iovcnt);
 147    rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 148    if (rc < 0) {
 149        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 150                vdev_blk->blk_name, req->sector_num, req->size,
 151                strerror(errno));
 152        return -1;
 153    }
 154
 155    return rc;
 156}
 157
 158static ssize_t
 159vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 160{
 161    VubDev *vdev_blk = req->vdev_blk;
 162    ssize_t rc;
 163
 164    if (!iovcnt) {
 165        fprintf(stderr, "Invalid Write IOV count\n");
 166        return -1;
 167    }
 168
 169    req->size = vub_iov_size(iov, iovcnt);
 170    rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 171    if (rc < 0) {
 172        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 173                vdev_blk->blk_name, req->sector_num, req->size,
 174                strerror(errno));
 175        return -1;
 176    }
 177
 178    return rc;
 179}
 180
 181static int
 182vub_discard_write_zeroes(VubReq *req, struct iovec *iov, uint32_t iovcnt,
 183                         uint32_t type)
 184{
 185    struct virtio_blk_discard_write_zeroes *desc;
 186    ssize_t size;
 187    void *buf;
 188
 189    size = vub_iov_size(iov, iovcnt);
 190    if (size != sizeof(*desc)) {
 191        fprintf(stderr, "Invalid size %ld, expect %ld\n", size, sizeof(*desc));
 192        return -1;
 193    }
 194    buf = g_new0(char, size);
 195    vub_iov_to_buf(iov, iovcnt, buf);
 196
 197    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 198    VubDev *vdev_blk = req->vdev_blk;
 199    desc = (struct virtio_blk_discard_write_zeroes *)buf;
 200    uint64_t range[2] = { le64toh(desc->sector) << 9,
 201                          le32toh(desc->num_sectors) << 9 };
 202    if (type == VIRTIO_BLK_T_DISCARD) {
 203        if (ioctl(vdev_blk->blk_fd, BLKDISCARD, range) == 0) {
 204            g_free(buf);
 205            return 0;
 206        }
 207    } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
 208        if (ioctl(vdev_blk->blk_fd, BLKZEROOUT, range) == 0) {
 209            g_free(buf);
 210            return 0;
 211        }
 212    }
 213    #endif
 214
 215    g_free(buf);
 216    return -1;
 217}
 218
 219static void
 220vub_flush(VubReq *req)
 221{
 222    VubDev *vdev_blk = req->vdev_blk;
 223
 224    fdatasync(vdev_blk->blk_fd);
 225}
 226
 227static int vub_virtio_process_req(VubDev *vdev_blk,
 228                                     VuVirtq *vq)
 229{
 230    VugDev *gdev = &vdev_blk->parent;
 231    VuDev *vu_dev = &gdev->parent;
 232    VuVirtqElement *elem;
 233    uint32_t type;
 234    unsigned in_num;
 235    unsigned out_num;
 236    VubReq *req;
 237
 238    elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
 239    if (!elem) {
 240        return -1;
 241    }
 242
 243    /* refer to hw/block/virtio_blk.c */
 244    if (elem->out_num < 1 || elem->in_num < 1) {
 245        fprintf(stderr, "virtio-blk request missing headers\n");
 246        free(elem);
 247        return -1;
 248    }
 249
 250    req = g_new0(VubReq, 1);
 251    req->vdev_blk = vdev_blk;
 252    req->vq = vq;
 253    req->elem = elem;
 254
 255    in_num = elem->in_num;
 256    out_num = elem->out_num;
 257
 258    /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
 259    if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
 260        fprintf(stderr, "Invalid outhdr size\n");
 261        goto err;
 262    }
 263    req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
 264    out_num--;
 265
 266    if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
 267        fprintf(stderr, "Invalid inhdr size\n");
 268        goto err;
 269    }
 270    req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
 271    in_num--;
 272
 273    type = le32toh(req->out->type);
 274    switch (type & ~VIRTIO_BLK_T_BARRIER) {
 275    case VIRTIO_BLK_T_IN:
 276    case VIRTIO_BLK_T_OUT: {
 277        ssize_t ret = 0;
 278        bool is_write = type & VIRTIO_BLK_T_OUT;
 279        req->sector_num = le64toh(req->out->sector);
 280        if (is_write) {
 281            ret  = vub_writev(req, &elem->out_sg[1], out_num);
 282        } else {
 283            ret = vub_readv(req, &elem->in_sg[0], in_num);
 284        }
 285        if (ret >= 0) {
 286            req->in->status = VIRTIO_BLK_S_OK;
 287        } else {
 288            req->in->status = VIRTIO_BLK_S_IOERR;
 289        }
 290        vub_req_complete(req);
 291        break;
 292    }
 293    case VIRTIO_BLK_T_FLUSH:
 294        vub_flush(req);
 295        req->in->status = VIRTIO_BLK_S_OK;
 296        vub_req_complete(req);
 297        break;
 298    case VIRTIO_BLK_T_GET_ID: {
 299        size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
 300                          VIRTIO_BLK_ID_BYTES);
 301        snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 302        req->in->status = VIRTIO_BLK_S_OK;
 303        req->size = elem->in_sg[0].iov_len;
 304        vub_req_complete(req);
 305        break;
 306    }
 307    case VIRTIO_BLK_T_DISCARD:
 308    case VIRTIO_BLK_T_WRITE_ZEROES: {
 309        int rc;
 310        rc = vub_discard_write_zeroes(req, &elem->out_sg[1], out_num, type);
 311        if (rc == 0) {
 312            req->in->status = VIRTIO_BLK_S_OK;
 313        } else {
 314            req->in->status = VIRTIO_BLK_S_IOERR;
 315        }
 316        vub_req_complete(req);
 317        break;
 318    }
 319    default:
 320        req->in->status = VIRTIO_BLK_S_UNSUPP;
 321        vub_req_complete(req);
 322        break;
 323    }
 324
 325    return 0;
 326
 327err:
 328    free(elem);
 329    g_free(req);
 330    return -1;
 331}
 332
 333static void vub_process_vq(VuDev *vu_dev, int idx)
 334{
 335    VugDev *gdev;
 336    VubDev *vdev_blk;
 337    VuVirtq *vq;
 338    int ret;
 339
 340    gdev = container_of(vu_dev, VugDev, parent);
 341    vdev_blk = container_of(gdev, VubDev, parent);
 342    assert(vdev_blk);
 343
 344    vq = vu_get_queue(vu_dev, idx);
 345    assert(vq);
 346
 347    while (1) {
 348        ret = vub_virtio_process_req(vdev_blk, vq);
 349        if (ret) {
 350            break;
 351        }
 352    }
 353}
 354
 355static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 356{
 357    VuVirtq *vq;
 358
 359    assert(vu_dev);
 360
 361    vq = vu_get_queue(vu_dev, idx);
 362    vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
 363}
 364
 365static uint64_t
 366vub_get_features(VuDev *dev)
 367{
 368    uint64_t features;
 369    VugDev *gdev;
 370    VubDev *vdev_blk;
 371
 372    gdev = container_of(dev, VugDev, parent);
 373    vdev_blk = container_of(gdev, VubDev, parent);
 374
 375    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
 376               1ull << VIRTIO_BLK_F_SEG_MAX |
 377               1ull << VIRTIO_BLK_F_TOPOLOGY |
 378               1ull << VIRTIO_BLK_F_BLK_SIZE |
 379               1ull << VIRTIO_BLK_F_FLUSH |
 380               #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 381               1ull << VIRTIO_BLK_F_DISCARD |
 382               1ull << VIRTIO_BLK_F_WRITE_ZEROES |
 383               #endif
 384               1ull << VIRTIO_BLK_F_CONFIG_WCE;
 385
 386    if (vdev_blk->enable_ro) {
 387        features |= 1ull << VIRTIO_BLK_F_RO;
 388    }
 389
 390    return features;
 391}
 392
 393static uint64_t
 394vub_get_protocol_features(VuDev *dev)
 395{
 396    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
 397           1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
 398}
 399
 400static int
 401vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 402{
 403    VugDev *gdev;
 404    VubDev *vdev_blk;
 405
 406    if (len > sizeof(struct virtio_blk_config)) {
 407        return -1;
 408    }
 409
 410    gdev = container_of(vu_dev, VugDev, parent);
 411    vdev_blk = container_of(gdev, VubDev, parent);
 412    memcpy(config, &vdev_blk->blkcfg, len);
 413
 414    return 0;
 415}
 416
 417static int
 418vub_set_config(VuDev *vu_dev, const uint8_t *data,
 419               uint32_t offset, uint32_t size, uint32_t flags)
 420{
 421    VugDev *gdev;
 422    VubDev *vdev_blk;
 423    uint8_t wce;
 424    int fd;
 425
 426    /* don't support live migration */
 427    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
 428        return -1;
 429    }
 430
 431    gdev = container_of(vu_dev, VugDev, parent);
 432    vdev_blk = container_of(gdev, VubDev, parent);
 433
 434    if (offset != offsetof(struct virtio_blk_config, wce) ||
 435        size != 1) {
 436        return -1;
 437    }
 438
 439    wce = *data;
 440    if (wce == vdev_blk->blkcfg.wce) {
 441        /* Do nothing as same with old configuration */
 442        return 0;
 443    }
 444
 445    vdev_blk->blkcfg.wce = wce;
 446    fprintf(stdout, "Write Cache Policy Changed\n");
 447    if (vdev_blk->blk_fd >= 0) {
 448        close(vdev_blk->blk_fd);
 449        vdev_blk->blk_fd = -1;
 450    }
 451
 452    fd = vub_open(vdev_blk->blk_name, wce);
 453    if (fd < 0) {
 454        fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
 455        vdev_blk->blk_fd = -1;
 456        return -1;
 457    }
 458    vdev_blk->blk_fd = fd;
 459
 460    return 0;
 461}
 462
 463static const VuDevIface vub_iface = {
 464    .get_features = vub_get_features,
 465    .queue_set_started = vub_queue_set_started,
 466    .get_protocol_features = vub_get_protocol_features,
 467    .get_config = vub_get_config,
 468    .set_config = vub_set_config,
 469};
 470
 471static int unix_sock_new(char *unix_fn)
 472{
 473    int sock;
 474    struct sockaddr_un un;
 475    size_t len;
 476
 477    assert(unix_fn);
 478
 479    sock = socket(AF_UNIX, SOCK_STREAM, 0);
 480    if (sock < 0) {
 481        perror("socket");
 482        return -1;
 483    }
 484
 485    un.sun_family = AF_UNIX;
 486    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
 487    len = sizeof(un.sun_family) + strlen(un.sun_path);
 488
 489    (void)unlink(unix_fn);
 490    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
 491        perror("bind");
 492        goto fail;
 493    }
 494
 495    if (listen(sock, 1) < 0) {
 496        perror("listen");
 497        goto fail;
 498    }
 499
 500    return sock;
 501
 502fail:
 503    (void)close(sock);
 504
 505    return -1;
 506}
 507
 508static void vub_free(struct VubDev *vdev_blk)
 509{
 510    if (!vdev_blk) {
 511        return;
 512    }
 513
 514    g_main_loop_unref(vdev_blk->loop);
 515    if (vdev_blk->blk_fd >= 0) {
 516        close(vdev_blk->blk_fd);
 517    }
 518    g_free(vdev_blk);
 519}
 520
 521static uint32_t
 522vub_get_blocksize(int fd)
 523{
 524    uint32_t blocksize = 512;
 525
 526#if defined(__linux__) && defined(BLKSSZGET)
 527    if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
 528        return blocksize;
 529    }
 530#endif
 531
 532    return blocksize;
 533}
 534
 535static void
 536vub_initialize_config(int fd, struct virtio_blk_config *config)
 537{
 538    off64_t capacity;
 539
 540    capacity = lseek64(fd, 0, SEEK_END);
 541    config->capacity = capacity >> 9;
 542    config->blk_size = vub_get_blocksize(fd);
 543    config->size_max = 65536;
 544    config->seg_max = 128 - 2;
 545    config->min_io_size = 1;
 546    config->opt_io_size = 1;
 547    config->num_queues = 1;
 548    #if defined(__linux__) && defined(BLKDISCARD) && defined(BLKZEROOUT)
 549    config->max_discard_sectors = 32768;
 550    config->max_discard_seg = 1;
 551    config->discard_sector_alignment = config->blk_size >> 9;
 552    config->max_write_zeroes_sectors = 32768;
 553    config->max_write_zeroes_seg = 1;
 554    #endif
 555}
 556
 557static VubDev *
 558vub_new(char *blk_file)
 559{
 560    VubDev *vdev_blk;
 561
 562    vdev_blk = g_new0(VubDev, 1);
 563    vdev_blk->loop = g_main_loop_new(NULL, FALSE);
 564    vdev_blk->blk_fd = vub_open(blk_file, 0);
 565    if (vdev_blk->blk_fd  < 0) {
 566        fprintf(stderr, "Error to open block device %s\n", blk_file);
 567        vub_free(vdev_blk);
 568        return NULL;
 569    }
 570    vdev_blk->enable_ro = false;
 571    vdev_blk->blkcfg.wce = 0;
 572    vdev_blk->blk_name = blk_file;
 573
 574    /* fill virtio_blk_config with block parameters */
 575    vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
 576
 577    return vdev_blk;
 578}
 579
 580static int opt_fdnum = -1;
 581static char *opt_socket_path;
 582static char *opt_blk_file;
 583static gboolean opt_print_caps;
 584static gboolean opt_read_only;
 585
 586static GOptionEntry entries[] = {
 587    { "print-capabilities", 'c', 0, G_OPTION_ARG_NONE, &opt_print_caps,
 588      "Print capabilities", NULL },
 589    { "fd", 'f', 0, G_OPTION_ARG_INT, &opt_fdnum,
 590      "Use inherited fd socket", "FDNUM" },
 591    { "socket-path", 's', 0, G_OPTION_ARG_FILENAME, &opt_socket_path,
 592      "Use UNIX socket path", "PATH" },
 593    {"blk-file", 'b', 0, G_OPTION_ARG_FILENAME, &opt_blk_file,
 594     "block device or file path", "PATH"},
 595    { "read-only", 'r', 0, G_OPTION_ARG_NONE, &opt_read_only,
 596      "Enable read-only", NULL }
 597};
 598
 599int main(int argc, char **argv)
 600{
 601    int lsock = -1, csock = -1;
 602    VubDev *vdev_blk = NULL;
 603    GError *error = NULL;
 604    GOptionContext *context;
 605
 606    context = g_option_context_new(NULL);
 607    g_option_context_add_main_entries(context, entries, NULL);
 608    if (!g_option_context_parse(context, &argc, &argv, &error)) {
 609        g_printerr("Option parsing failed: %s\n", error->message);
 610        exit(EXIT_FAILURE);
 611    }
 612    if (opt_print_caps) {
 613        g_print("{\n");
 614        g_print("  \"type\": \"block\",\n");
 615        g_print("  \"features\": [\n");
 616        g_print("    \"read-only\",\n");
 617        g_print("    \"blk-file\"\n");
 618        g_print("  ]\n");
 619        g_print("}\n");
 620        exit(EXIT_SUCCESS);
 621    }
 622
 623    if (!opt_blk_file) {
 624        g_print("%s\n", g_option_context_get_help(context, true, NULL));
 625        exit(EXIT_FAILURE);
 626    }
 627
 628    if (opt_socket_path) {
 629        lsock = unix_sock_new(opt_socket_path);
 630        if (lsock < 0) {
 631            exit(EXIT_FAILURE);
 632        }
 633    } else if (opt_fdnum < 0) {
 634        g_print("%s\n", g_option_context_get_help(context, true, NULL));
 635        exit(EXIT_FAILURE);
 636    } else {
 637        lsock = opt_fdnum;
 638    }
 639
 640    csock = accept(lsock, NULL, NULL);
 641    if (csock < 0) {
 642        g_printerr("Accept error %s\n", strerror(errno));
 643        exit(EXIT_FAILURE);
 644    }
 645
 646    vdev_blk = vub_new(opt_blk_file);
 647    if (!vdev_blk) {
 648        exit(EXIT_FAILURE);
 649    }
 650    if (opt_read_only) {
 651        vdev_blk->enable_ro = true;
 652    }
 653
 654    if (!vug_init(&vdev_blk->parent, VHOST_USER_BLK_MAX_QUEUES, csock,
 655                  vub_panic_cb, &vub_iface)) {
 656        g_printerr("Failed to initialize libvhost-user-glib\n");
 657        exit(EXIT_FAILURE);
 658    }
 659
 660    g_main_loop_run(vdev_blk->loop);
 661    g_main_loop_unref(vdev_blk->loop);
 662    g_option_context_free(context);
 663    vug_deinit(&vdev_blk->parent);
 664    vub_free(vdev_blk);
 665    if (csock >= 0) {
 666        close(csock);
 667    }
 668    if (lsock >= 0) {
 669        close(lsock);
 670    }
 671    g_free(opt_socket_path);
 672    g_free(opt_blk_file);
 673
 674    return 0;
 675}
 676