qemu/contrib/vhost-user-blk/vhost-user-blk.c
<<
>>
Prefs
   1/*
   2 * vhost-user-blk sample application
   3 *
   4 * Copyright (c) 2017 Intel Corporation. All rights reserved.
   5 *
   6 * Author:
   7 *  Changpeng Liu <changpeng.liu@intel.com>
   8 *
   9 * This work is based on the "vhost-user-scsi" sample and "virtio-blk" driver
  10 * implementation by:
  11 *  Felipe Franciosi <felipe@nutanix.com>
  12 *  Anthony Liguori <aliguori@us.ibm.com>
  13 *
  14 * This work is licensed under the terms of the GNU GPL, version 2 only.
  15 * See the COPYING file in the top-level directory.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "standard-headers/linux/virtio_blk.h"
  20#include "contrib/libvhost-user/libvhost-user-glib.h"
  21#include "contrib/libvhost-user/libvhost-user.h"
  22
  23#include <glib.h>
  24
  25struct virtio_blk_inhdr {
  26    unsigned char status;
  27};
  28
  29/* vhost user block device */
  30typedef struct VubDev {
  31    VugDev parent;
  32    int blk_fd;
  33    struct virtio_blk_config blkcfg;
  34    bool enable_ro;
  35    char *blk_name;
  36    GMainLoop *loop;
  37} VubDev;
  38
  39typedef struct VubReq {
  40    VuVirtqElement *elem;
  41    int64_t sector_num;
  42    size_t size;
  43    struct virtio_blk_inhdr *in;
  44    struct virtio_blk_outhdr *out;
  45    VubDev *vdev_blk;
  46    struct VuVirtq *vq;
  47} VubReq;
  48
  49/* refer util/iov.c */
  50static size_t vub_iov_size(const struct iovec *iov,
  51                              const unsigned int iov_cnt)
  52{
  53    size_t len;
  54    unsigned int i;
  55
  56    len = 0;
  57    for (i = 0; i < iov_cnt; i++) {
  58        len += iov[i].iov_len;
  59    }
  60    return len;
  61}
  62
  63static void vub_panic_cb(VuDev *vu_dev, const char *buf)
  64{
  65    VugDev *gdev;
  66    VubDev *vdev_blk;
  67
  68    assert(vu_dev);
  69
  70    gdev = container_of(vu_dev, VugDev, parent);
  71    vdev_blk = container_of(gdev, VubDev, parent);
  72    if (buf) {
  73        g_warning("vu_panic: %s", buf);
  74    }
  75
  76    g_main_loop_quit(vdev_blk->loop);
  77}
  78
  79static void vub_req_complete(VubReq *req)
  80{
  81    VugDev *gdev = &req->vdev_blk->parent;
  82    VuDev *vu_dev = &gdev->parent;
  83
  84    /* IO size with 1 extra status byte */
  85    vu_queue_push(vu_dev, req->vq, req->elem,
  86                  req->size + 1);
  87    vu_queue_notify(vu_dev, req->vq);
  88
  89    if (req->elem) {
  90        free(req->elem);
  91    }
  92
  93    g_free(req);
  94}
  95
  96static int vub_open(const char *file_name, bool wce)
  97{
  98    int fd;
  99    int flags = O_RDWR;
 100
 101    if (!wce) {
 102        flags |= O_DIRECT;
 103    }
 104
 105    fd = open(file_name, flags);
 106    if (fd < 0) {
 107        fprintf(stderr, "Cannot open file %s, %s\n", file_name,
 108                strerror(errno));
 109        return -1;
 110    }
 111
 112    return fd;
 113}
 114
 115static ssize_t
 116vub_readv(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 117{
 118    VubDev *vdev_blk = req->vdev_blk;
 119    ssize_t rc;
 120
 121    if (!iovcnt) {
 122        fprintf(stderr, "Invalid Read IOV count\n");
 123        return -1;
 124    }
 125
 126    req->size = vub_iov_size(iov, iovcnt);
 127    rc = preadv(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 128    if (rc < 0) {
 129        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 130                vdev_blk->blk_name, req->sector_num, req->size,
 131                strerror(errno));
 132        return -1;
 133    }
 134
 135    return rc;
 136}
 137
 138static ssize_t
 139vub_writev(VubReq *req, struct iovec *iov, uint32_t iovcnt)
 140{
 141    VubDev *vdev_blk = req->vdev_blk;
 142    ssize_t rc;
 143
 144    if (!iovcnt) {
 145        fprintf(stderr, "Invalid Write IOV count\n");
 146        return -1;
 147    }
 148
 149    req->size = vub_iov_size(iov, iovcnt);
 150    rc = pwritev(vdev_blk->blk_fd, iov, iovcnt, req->sector_num * 512);
 151    if (rc < 0) {
 152        fprintf(stderr, "%s, Sector %"PRIu64", Size %lu failed with %s\n",
 153                vdev_blk->blk_name, req->sector_num, req->size,
 154                strerror(errno));
 155        return -1;
 156    }
 157
 158    return rc;
 159}
 160
 161static void
 162vub_flush(VubReq *req)
 163{
 164    VubDev *vdev_blk = req->vdev_blk;
 165
 166    fdatasync(vdev_blk->blk_fd);
 167}
 168
 169static int vub_virtio_process_req(VubDev *vdev_blk,
 170                                     VuVirtq *vq)
 171{
 172    VugDev *gdev = &vdev_blk->parent;
 173    VuDev *vu_dev = &gdev->parent;
 174    VuVirtqElement *elem;
 175    uint32_t type;
 176    unsigned in_num;
 177    unsigned out_num;
 178    VubReq *req;
 179
 180    elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) + sizeof(VubReq));
 181    if (!elem) {
 182        return -1;
 183    }
 184
 185    /* refer to hw/block/virtio_blk.c */
 186    if (elem->out_num < 1 || elem->in_num < 1) {
 187        fprintf(stderr, "virtio-blk request missing headers\n");
 188        free(elem);
 189        return -1;
 190    }
 191
 192    req = g_new0(VubReq, 1);
 193    req->vdev_blk = vdev_blk;
 194    req->vq = vq;
 195    req->elem = elem;
 196
 197    in_num = elem->in_num;
 198    out_num = elem->out_num;
 199
 200    /* don't support VIRTIO_F_ANY_LAYOUT and virtio 1.0 only */
 201    if (elem->out_sg[0].iov_len < sizeof(struct virtio_blk_outhdr)) {
 202        fprintf(stderr, "Invalid outhdr size\n");
 203        goto err;
 204    }
 205    req->out = (struct virtio_blk_outhdr *)elem->out_sg[0].iov_base;
 206    out_num--;
 207
 208    if (elem->in_sg[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
 209        fprintf(stderr, "Invalid inhdr size\n");
 210        goto err;
 211    }
 212    req->in = (struct virtio_blk_inhdr *)elem->in_sg[in_num - 1].iov_base;
 213    in_num--;
 214
 215    type = le32toh(req->out->type);
 216    switch (type & ~(VIRTIO_BLK_T_OUT | VIRTIO_BLK_T_BARRIER)) {
 217        case VIRTIO_BLK_T_IN: {
 218            ssize_t ret = 0;
 219            bool is_write = type & VIRTIO_BLK_T_OUT;
 220            req->sector_num = le64toh(req->out->sector);
 221            if (is_write) {
 222                ret  = vub_writev(req, &elem->out_sg[1], out_num);
 223            } else {
 224                ret = vub_readv(req, &elem->in_sg[0], in_num);
 225            }
 226            if (ret >= 0) {
 227                req->in->status = VIRTIO_BLK_S_OK;
 228            } else {
 229                req->in->status = VIRTIO_BLK_S_IOERR;
 230            }
 231            vub_req_complete(req);
 232            break;
 233        }
 234        case VIRTIO_BLK_T_FLUSH: {
 235            vub_flush(req);
 236            req->in->status = VIRTIO_BLK_S_OK;
 237            vub_req_complete(req);
 238            break;
 239        }
 240        case VIRTIO_BLK_T_GET_ID: {
 241            size_t size = MIN(vub_iov_size(&elem->in_sg[0], in_num),
 242                              VIRTIO_BLK_ID_BYTES);
 243            snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
 244            req->in->status = VIRTIO_BLK_S_OK;
 245            req->size = elem->in_sg[0].iov_len;
 246            vub_req_complete(req);
 247            break;
 248        }
 249        default: {
 250            req->in->status = VIRTIO_BLK_S_UNSUPP;
 251            vub_req_complete(req);
 252            break;
 253        }
 254    }
 255
 256    return 0;
 257
 258err:
 259    free(elem);
 260    g_free(req);
 261    return -1;
 262}
 263
 264static void vub_process_vq(VuDev *vu_dev, int idx)
 265{
 266    VugDev *gdev;
 267    VubDev *vdev_blk;
 268    VuVirtq *vq;
 269    int ret;
 270
 271    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
 272        fprintf(stderr, "VQ Index out of range: %d\n", idx);
 273        vub_panic_cb(vu_dev, NULL);
 274        return;
 275    }
 276
 277    gdev = container_of(vu_dev, VugDev, parent);
 278    vdev_blk = container_of(gdev, VubDev, parent);
 279    assert(vdev_blk);
 280
 281    vq = vu_get_queue(vu_dev, idx);
 282    assert(vq);
 283
 284    while (1) {
 285        ret = vub_virtio_process_req(vdev_blk, vq);
 286        if (ret) {
 287            break;
 288        }
 289    }
 290}
 291
 292static void vub_queue_set_started(VuDev *vu_dev, int idx, bool started)
 293{
 294    VuVirtq *vq;
 295
 296    assert(vu_dev);
 297
 298    vq = vu_get_queue(vu_dev, idx);
 299    vu_set_queue_handler(vu_dev, vq, started ? vub_process_vq : NULL);
 300}
 301
 302static uint64_t
 303vub_get_features(VuDev *dev)
 304{
 305    uint64_t features;
 306    VugDev *gdev;
 307    VubDev *vdev_blk;
 308
 309    gdev = container_of(dev, VugDev, parent);
 310    vdev_blk = container_of(gdev, VubDev, parent);
 311
 312    features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
 313               1ull << VIRTIO_BLK_F_SEG_MAX |
 314               1ull << VIRTIO_BLK_F_TOPOLOGY |
 315               1ull << VIRTIO_BLK_F_BLK_SIZE |
 316               1ull << VIRTIO_BLK_F_FLUSH |
 317               1ull << VIRTIO_BLK_F_CONFIG_WCE |
 318               1ull << VIRTIO_F_VERSION_1 |
 319               1ull << VHOST_USER_F_PROTOCOL_FEATURES;
 320
 321    if (vdev_blk->enable_ro) {
 322        features |= 1ull << VIRTIO_BLK_F_RO;
 323    }
 324
 325    return features;
 326}
 327
 328static uint64_t
 329vub_get_protocol_features(VuDev *dev)
 330{
 331    return 1ull << VHOST_USER_PROTOCOL_F_CONFIG;
 332}
 333
 334static int
 335vub_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
 336{
 337    VugDev *gdev;
 338    VubDev *vdev_blk;
 339
 340    gdev = container_of(vu_dev, VugDev, parent);
 341    vdev_blk = container_of(gdev, VubDev, parent);
 342    memcpy(config, &vdev_blk->blkcfg, len);
 343
 344    return 0;
 345}
 346
 347static int
 348vub_set_config(VuDev *vu_dev, const uint8_t *data,
 349               uint32_t offset, uint32_t size, uint32_t flags)
 350{
 351    VugDev *gdev;
 352    VubDev *vdev_blk;
 353    uint8_t wce;
 354    int fd;
 355
 356    /* don't support live migration */
 357    if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
 358        return -1;
 359    }
 360
 361    gdev = container_of(vu_dev, VugDev, parent);
 362    vdev_blk = container_of(gdev, VubDev, parent);
 363
 364    if (offset != offsetof(struct virtio_blk_config, wce) ||
 365        size != 1) {
 366        return -1;
 367    }
 368
 369    wce = *data;
 370    if (wce == vdev_blk->blkcfg.wce) {
 371        /* Do nothing as same with old configuration */
 372        return 0;
 373    }
 374
 375    vdev_blk->blkcfg.wce = wce;
 376    fprintf(stdout, "Write Cache Policy Changed\n");
 377    if (vdev_blk->blk_fd >= 0) {
 378        close(vdev_blk->blk_fd);
 379        vdev_blk->blk_fd = -1;
 380    }
 381
 382    fd = vub_open(vdev_blk->blk_name, wce);
 383    if (fd < 0) {
 384        fprintf(stderr, "Error to open block device %s\n", vdev_blk->blk_name);
 385        vdev_blk->blk_fd = -1;
 386        return -1;
 387    }
 388    vdev_blk->blk_fd = fd;
 389
 390    return 0;
 391}
 392
 393static const VuDevIface vub_iface = {
 394    .get_features = vub_get_features,
 395    .queue_set_started = vub_queue_set_started,
 396    .get_protocol_features = vub_get_protocol_features,
 397    .get_config = vub_get_config,
 398    .set_config = vub_set_config,
 399};
 400
 401static int unix_sock_new(char *unix_fn)
 402{
 403    int sock;
 404    struct sockaddr_un un;
 405    size_t len;
 406
 407    assert(unix_fn);
 408
 409    sock = socket(AF_UNIX, SOCK_STREAM, 0);
 410    if (sock <= 0) {
 411        perror("socket");
 412        return -1;
 413    }
 414
 415    un.sun_family = AF_UNIX;
 416    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
 417    len = sizeof(un.sun_family) + strlen(un.sun_path);
 418
 419    (void)unlink(unix_fn);
 420    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
 421        perror("bind");
 422        goto fail;
 423    }
 424
 425    if (listen(sock, 1) < 0) {
 426        perror("listen");
 427        goto fail;
 428    }
 429
 430    return sock;
 431
 432fail:
 433    (void)close(sock);
 434
 435    return -1;
 436}
 437
 438static void vub_free(struct VubDev *vdev_blk)
 439{
 440    if (!vdev_blk) {
 441        return;
 442    }
 443
 444    g_main_loop_unref(vdev_blk->loop);
 445    if (vdev_blk->blk_fd >= 0) {
 446        close(vdev_blk->blk_fd);
 447    }
 448    g_free(vdev_blk);
 449}
 450
 451static uint32_t
 452vub_get_blocksize(int fd)
 453{
 454    uint32_t blocksize = 512;
 455
 456#if defined(__linux__) && defined(BLKSSZGET)
 457    if (ioctl(fd, BLKSSZGET, &blocksize) == 0) {
 458        return blocklen;
 459    }
 460#endif
 461
 462    return blocksize;
 463}
 464
 465static void
 466vub_initialize_config(int fd, struct virtio_blk_config *config)
 467{
 468    off64_t capacity;
 469
 470    capacity = lseek64(fd, 0, SEEK_END);
 471    config->capacity = capacity >> 9;
 472    config->blk_size = vub_get_blocksize(fd);
 473    config->size_max = 65536;
 474    config->seg_max = 128 - 2;
 475    config->min_io_size = 1;
 476    config->opt_io_size = 1;
 477    config->num_queues = 1;
 478}
 479
 480static VubDev *
 481vub_new(char *blk_file)
 482{
 483    VubDev *vdev_blk;
 484
 485    vdev_blk = g_new0(VubDev, 1);
 486    vdev_blk->loop = g_main_loop_new(NULL, FALSE);
 487    vdev_blk->blk_fd = vub_open(blk_file, 0);
 488    if (vdev_blk->blk_fd  < 0) {
 489        fprintf(stderr, "Error to open block device %s\n", blk_file);
 490        vub_free(vdev_blk);
 491        return NULL;
 492    }
 493    vdev_blk->enable_ro = false;
 494    vdev_blk->blkcfg.wce = 0;
 495    vdev_blk->blk_name = blk_file;
 496
 497    /* fill virtio_blk_config with block parameters */
 498    vub_initialize_config(vdev_blk->blk_fd, &vdev_blk->blkcfg);
 499
 500    return vdev_blk;
 501}
 502
 503int main(int argc, char **argv)
 504{
 505    int opt;
 506    char *unix_socket = NULL;
 507    char *blk_file = NULL;
 508    bool enable_ro = false;
 509    int lsock = -1, csock = -1;
 510    VubDev *vdev_blk = NULL;
 511
 512    while ((opt = getopt(argc, argv, "b:rs:h")) != -1) {
 513        switch (opt) {
 514        case 'b':
 515            blk_file = g_strdup(optarg);
 516            break;
 517        case 's':
 518            unix_socket = g_strdup(optarg);
 519            break;
 520        case 'r':
 521            enable_ro = true;
 522            break;
 523        case 'h':
 524        default:
 525            printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 526                   " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 527            return 0;
 528        }
 529    }
 530
 531    if (!unix_socket || !blk_file) {
 532        printf("Usage: %s [ -b block device or file, -s UNIX domain socket"
 533               " | -r Enable read-only ] | [ -h ]\n", argv[0]);
 534        return -1;
 535    }
 536
 537    lsock = unix_sock_new(unix_socket);
 538    if (lsock < 0) {
 539        goto err;
 540    }
 541
 542    csock = accept(lsock, (void *)0, (void *)0);
 543    if (csock < 0) {
 544        fprintf(stderr, "Accept error %s\n", strerror(errno));
 545        goto err;
 546    }
 547
 548    vdev_blk = vub_new(blk_file);
 549    if (!vdev_blk) {
 550        goto err;
 551    }
 552    if (enable_ro) {
 553        vdev_blk->enable_ro = true;
 554    }
 555
 556    vug_init(&vdev_blk->parent, csock, vub_panic_cb, &vub_iface);
 557
 558    g_main_loop_run(vdev_blk->loop);
 559
 560    vug_deinit(&vdev_blk->parent);
 561
 562err:
 563    vub_free(vdev_blk);
 564    if (csock >= 0) {
 565        close(csock);
 566    }
 567    if (lsock >= 0) {
 568        close(lsock);
 569    }
 570    g_free(unix_socket);
 571    g_free(blk_file);
 572
 573    return 0;
 574}
 575