linux/drivers/block/virtio_blk.c
<<
>>
Prefs
   1//#define DEBUG
   2#include <linux/spinlock.h>
   3#include <linux/slab.h>
   4#include <linux/blkdev.h>
   5#include <linux/hdreg.h>
   6#include <linux/virtio.h>
   7#include <linux/virtio_blk.h>
   8#include <linux/scatterlist.h>
   9
  10#define PART_BITS 4
  11
  12static int major, index;
  13
  14struct virtio_blk
  15{
  16        spinlock_t lock;
  17
  18        struct virtio_device *vdev;
  19        struct virtqueue *vq;
  20
  21        /* The disk structure for the kernel. */
  22        struct gendisk *disk;
  23
  24        /* Request tracking. */
  25        struct list_head reqs;
  26
  27        mempool_t *pool;
  28
  29        /* What host tells us, plus 2 for header & tailer. */
  30        unsigned int sg_elems;
  31
  32        /* Scatterlist: can be too big for stack. */
  33        struct scatterlist sg[/*sg_elems*/];
  34};
  35
  36struct virtblk_req
  37{
  38        struct list_head list;
  39        struct request *req;
  40        struct virtio_blk_outhdr out_hdr;
  41        struct virtio_scsi_inhdr in_hdr;
  42        u8 status;
  43};
  44
  45static void blk_done(struct virtqueue *vq)
  46{
  47        struct virtio_blk *vblk = vq->vdev->priv;
  48        struct virtblk_req *vbr;
  49        unsigned int len;
  50        unsigned long flags;
  51
  52        spin_lock_irqsave(&vblk->lock, flags);
  53        while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
  54                int error;
  55
  56                switch (vbr->status) {
  57                case VIRTIO_BLK_S_OK:
  58                        error = 0;
  59                        break;
  60                case VIRTIO_BLK_S_UNSUPP:
  61                        error = -ENOTTY;
  62                        break;
  63                default:
  64                        error = -EIO;
  65                        break;
  66                }
  67
  68                switch (vbr->req->cmd_type) {
  69                case REQ_TYPE_BLOCK_PC:
  70                        vbr->req->resid_len = vbr->in_hdr.residual;
  71                        vbr->req->sense_len = vbr->in_hdr.sense_len;
  72                        vbr->req->errors = vbr->in_hdr.errors;
  73                        break;
  74                case REQ_TYPE_SPECIAL:
  75                        vbr->req->errors = (error != 0);
  76                        break;
  77                default:
  78                        break;
  79                }
  80
  81                __blk_end_request_all(vbr->req, error);
  82                list_del(&vbr->list);
  83                mempool_free(vbr, vblk->pool);
  84        }
  85        /* In case queue is stopped waiting for more buffers. */
  86        blk_start_queue(vblk->disk->queue);
  87        spin_unlock_irqrestore(&vblk->lock, flags);
  88}
  89
  90static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
  91                   struct request *req)
  92{
  93        unsigned long num, out = 0, in = 0;
  94        struct virtblk_req *vbr;
  95
  96        vbr = mempool_alloc(vblk->pool, GFP_ATOMIC);
  97        if (!vbr)
  98                /* When another request finishes we'll try again. */
  99                return false;
 100
 101        vbr->req = req;
 102
 103        if (req->cmd_flags & REQ_FLUSH) {
 104                vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
 105                vbr->out_hdr.sector = 0;
 106                vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 107        } else {
 108                switch (req->cmd_type) {
 109                case REQ_TYPE_FS:
 110                        vbr->out_hdr.type = 0;
 111                        vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 112                        vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 113                        break;
 114                case REQ_TYPE_BLOCK_PC:
 115                        vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 116                        vbr->out_hdr.sector = 0;
 117                        vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 118                        break;
 119                case REQ_TYPE_SPECIAL:
 120                        vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
 121                        vbr->out_hdr.sector = 0;
 122                        vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 123                        break;
 124                default:
 125                        /* We don't put anything else in the queue. */
 126                        BUG();
 127                }
 128        }
 129
 130        sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
 131
 132        /*
 133         * If this is a packet command we need a couple of additional headers.
 134         * Behind the normal outhdr we put a segment with the scsi command
 135         * block, and before the normal inhdr we put the sense data and the
 136         * inhdr with additional status information before the normal inhdr.
 137         */
 138        if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
 139                sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
 140
 141        num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
 142
 143        if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
 144                sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
 145                sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
 146                           sizeof(vbr->in_hdr));
 147        }
 148
 149        sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
 150                   sizeof(vbr->status));
 151
 152        if (num) {
 153                if (rq_data_dir(vbr->req) == WRITE) {
 154                        vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
 155                        out += num;
 156                } else {
 157                        vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
 158                        in += num;
 159                }
 160        }
 161
 162        if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
 163                mempool_free(vbr, vblk->pool);
 164                return false;
 165        }
 166
 167        list_add_tail(&vbr->list, &vblk->reqs);
 168        return true;
 169}
 170
 171static void do_virtblk_request(struct request_queue *q)
 172{
 173        struct virtio_blk *vblk = q->queuedata;
 174        struct request *req;
 175        unsigned int issued = 0;
 176
 177        while ((req = blk_peek_request(q)) != NULL) {
 178                BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 179
 180                /* If this request fails, stop queue and wait for something to
 181                   finish to restart it. */
 182                if (!do_req(q, vblk, req)) {
 183                        blk_stop_queue(q);
 184                        break;
 185                }
 186                blk_start_request(req);
 187                issued++;
 188        }
 189
 190        if (issued)
 191                virtqueue_kick(vblk->vq);
 192}
 193
 194/* return id (s/n) string for *disk to *id_str
 195 */
 196static int virtblk_get_id(struct gendisk *disk, char *id_str)
 197{
 198        struct virtio_blk *vblk = disk->private_data;
 199        struct request *req;
 200        struct bio *bio;
 201        int err;
 202
 203        bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
 204                           GFP_KERNEL);
 205        if (IS_ERR(bio))
 206                return PTR_ERR(bio);
 207
 208        req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
 209        if (IS_ERR(req)) {
 210                bio_put(bio);
 211                return PTR_ERR(req);
 212        }
 213
 214        req->cmd_type = REQ_TYPE_SPECIAL;
 215        err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
 216        blk_put_request(req);
 217
 218        return err;
 219}
 220
 221static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 222                             unsigned int cmd, unsigned long data)
 223{
 224        struct gendisk *disk = bdev->bd_disk;
 225        struct virtio_blk *vblk = disk->private_data;
 226
 227        /*
 228         * Only allow the generic SCSI ioctls if the host can support it.
 229         */
 230        if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
 231                return -ENOTTY;
 232
 233        return scsi_cmd_ioctl(disk->queue, disk, mode, cmd,
 234                              (void __user *)data);
 235}
 236
 237/* We provide getgeo only to please some old bootloader/partitioning tools */
 238static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 239{
 240        struct virtio_blk *vblk = bd->bd_disk->private_data;
 241        struct virtio_blk_geometry vgeo;
 242        int err;
 243
 244        /* see if the host passed in geometry config */
 245        err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
 246                                offsetof(struct virtio_blk_config, geometry),
 247                                &vgeo);
 248
 249        if (!err) {
 250                geo->heads = vgeo.heads;
 251                geo->sectors = vgeo.sectors;
 252                geo->cylinders = vgeo.cylinders;
 253        } else {
 254                /* some standard values, similar to sd */
 255                geo->heads = 1 << 6;
 256                geo->sectors = 1 << 5;
 257                geo->cylinders = get_capacity(bd->bd_disk) >> 11;
 258        }
 259        return 0;
 260}
 261
 262static const struct block_device_operations virtblk_fops = {
 263        .ioctl  = virtblk_ioctl,
 264        .owner  = THIS_MODULE,
 265        .getgeo = virtblk_getgeo,
 266};
 267
 268static int index_to_minor(int index)
 269{
 270        return index << PART_BITS;
 271}
 272
 273static ssize_t virtblk_serial_show(struct device *dev,
 274                                struct device_attribute *attr, char *buf)
 275{
 276        struct gendisk *disk = dev_to_disk(dev);
 277        int err;
 278
 279        /* sysfs gives us a PAGE_SIZE buffer */
 280        BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
 281
 282        buf[VIRTIO_BLK_ID_BYTES] = '\0';
 283        err = virtblk_get_id(disk, buf);
 284        if (!err)
 285                return strlen(buf);
 286
 287        if (err == -EIO) /* Unsupported? Make it empty. */
 288                return 0;
 289
 290        return err;
 291}
 292DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
 293
 294static int __devinit virtblk_probe(struct virtio_device *vdev)
 295{
 296        struct virtio_blk *vblk;
 297        struct request_queue *q;
 298        int err;
 299        u64 cap;
 300        u32 v, blk_size, sg_elems, opt_io_size;
 301        u16 min_io_size;
 302        u8 physical_block_exp, alignment_offset;
 303
 304        if (index_to_minor(index) >= 1 << MINORBITS)
 305                return -ENOSPC;
 306
 307        /* We need to know how many segments before we allocate. */
 308        err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
 309                                offsetof(struct virtio_blk_config, seg_max),
 310                                &sg_elems);
 311
 312        /* We need at least one SG element, whatever they say. */
 313        if (err || !sg_elems)
 314                sg_elems = 1;
 315
 316        /* We need an extra sg elements at head and tail. */
 317        sg_elems += 2;
 318        vdev->priv = vblk = kmalloc(sizeof(*vblk) +
 319                                    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
 320        if (!vblk) {
 321                err = -ENOMEM;
 322                goto out;
 323        }
 324
 325        INIT_LIST_HEAD(&vblk->reqs);
 326        spin_lock_init(&vblk->lock);
 327        vblk->vdev = vdev;
 328        vblk->sg_elems = sg_elems;
 329        sg_init_table(vblk->sg, vblk->sg_elems);
 330
 331        /* We expect one virtqueue, for output. */
 332        vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
 333        if (IS_ERR(vblk->vq)) {
 334                err = PTR_ERR(vblk->vq);
 335                goto out_free_vblk;
 336        }
 337
 338        vblk->pool = mempool_create_kmalloc_pool(1,sizeof(struct virtblk_req));
 339        if (!vblk->pool) {
 340                err = -ENOMEM;
 341                goto out_free_vq;
 342        }
 343
 344        /* FIXME: How many partitions?  How long is a piece of string? */
 345        vblk->disk = alloc_disk(1 << PART_BITS);
 346        if (!vblk->disk) {
 347                err = -ENOMEM;
 348                goto out_mempool;
 349        }
 350
 351        q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
 352        if (!q) {
 353                err = -ENOMEM;
 354                goto out_put_disk;
 355        }
 356
 357        q->queuedata = vblk;
 358
 359        if (index < 26) {
 360                sprintf(vblk->disk->disk_name, "vd%c", 'a' + index % 26);
 361        } else if (index < (26 + 1) * 26) {
 362                sprintf(vblk->disk->disk_name, "vd%c%c",
 363                        'a' + index / 26 - 1, 'a' + index % 26);
 364        } else {
 365                const unsigned int m1 = (index / 26 - 1) / 26 - 1;
 366                const unsigned int m2 = (index / 26 - 1) % 26;
 367                const unsigned int m3 =  index % 26;
 368                sprintf(vblk->disk->disk_name, "vd%c%c%c",
 369                        'a' + m1, 'a' + m2, 'a' + m3);
 370        }
 371
 372        vblk->disk->major = major;
 373        vblk->disk->first_minor = index_to_minor(index);
 374        vblk->disk->private_data = vblk;
 375        vblk->disk->fops = &virtblk_fops;
 376        vblk->disk->driverfs_dev = &vdev->dev;
 377        index++;
 378
 379        /* configure queue flush support */
 380        if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
 381                blk_queue_flush(q, REQ_FLUSH);
 382
 383        /* If disk is read-only in the host, the guest should obey */
 384        if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
 385                set_disk_ro(vblk->disk, 1);
 386
 387        /* Host must always specify the capacity. */
 388        vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
 389                          &cap, sizeof(cap));
 390
 391        /* If capacity is too big, truncate with warning. */
 392        if ((sector_t)cap != cap) {
 393                dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
 394                         (unsigned long long)cap);
 395                cap = (sector_t)-1;
 396        }
 397        set_capacity(vblk->disk, cap);
 398
 399        /* We can handle whatever the host told us to handle. */
 400        blk_queue_max_segments(q, vblk->sg_elems-2);
 401
 402        /* No need to bounce any requests */
 403        blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
 404
 405        /* No real sector limit. */
 406        blk_queue_max_hw_sectors(q, -1U);
 407
 408        /* Host can optionally specify maximum segment size and number of
 409         * segments. */
 410        err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
 411                                offsetof(struct virtio_blk_config, size_max),
 412                                &v);
 413        if (!err)
 414                blk_queue_max_segment_size(q, v);
 415        else
 416                blk_queue_max_segment_size(q, -1U);
 417
 418        /* Host can optionally specify the block size of the device */
 419        err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
 420                                offsetof(struct virtio_blk_config, blk_size),
 421                                &blk_size);
 422        if (!err)
 423                blk_queue_logical_block_size(q, blk_size);
 424        else
 425                blk_size = queue_logical_block_size(q);
 426
 427        /* Use topology information if available */
 428        err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
 429                        offsetof(struct virtio_blk_config, physical_block_exp),
 430                        &physical_block_exp);
 431        if (!err && physical_block_exp)
 432                blk_queue_physical_block_size(q,
 433                                blk_size * (1 << physical_block_exp));
 434
 435        err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
 436                        offsetof(struct virtio_blk_config, alignment_offset),
 437                        &alignment_offset);
 438        if (!err && alignment_offset)
 439                blk_queue_alignment_offset(q, blk_size * alignment_offset);
 440
 441        err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
 442                        offsetof(struct virtio_blk_config, min_io_size),
 443                        &min_io_size);
 444        if (!err && min_io_size)
 445                blk_queue_io_min(q, blk_size * min_io_size);
 446
 447        err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
 448                        offsetof(struct virtio_blk_config, opt_io_size),
 449                        &opt_io_size);
 450        if (!err && opt_io_size)
 451                blk_queue_io_opt(q, blk_size * opt_io_size);
 452
 453
 454        add_disk(vblk->disk);
 455        err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
 456        if (err)
 457                goto out_del_disk;
 458
 459        return 0;
 460
 461out_del_disk:
 462        del_gendisk(vblk->disk);
 463        blk_cleanup_queue(vblk->disk->queue);
 464out_put_disk:
 465        put_disk(vblk->disk);
 466out_mempool:
 467        mempool_destroy(vblk->pool);
 468out_free_vq:
 469        vdev->config->del_vqs(vdev);
 470out_free_vblk:
 471        kfree(vblk);
 472out:
 473        return err;
 474}
 475
 476static void __devexit virtblk_remove(struct virtio_device *vdev)
 477{
 478        struct virtio_blk *vblk = vdev->priv;
 479
 480        /* Nothing should be pending. */
 481        BUG_ON(!list_empty(&vblk->reqs));
 482
 483        /* Stop all the virtqueues. */
 484        vdev->config->reset(vdev);
 485
 486        del_gendisk(vblk->disk);
 487        blk_cleanup_queue(vblk->disk->queue);
 488        put_disk(vblk->disk);
 489        mempool_destroy(vblk->pool);
 490        vdev->config->del_vqs(vdev);
 491        kfree(vblk);
 492}
 493
 494static const struct virtio_device_id id_table[] = {
 495        { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 496        { 0 },
 497};
 498
 499static unsigned int features[] = {
 500        VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 501        VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
 502        VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY
 503};
 504
 505/*
 506 * virtio_blk causes spurious section mismatch warning by
 507 * simultaneously referring to a __devinit and a __devexit function.
 508 * Use __refdata to avoid this warning.
 509 */
 510static struct virtio_driver __refdata virtio_blk = {
 511        .feature_table = features,
 512        .feature_table_size = ARRAY_SIZE(features),
 513        .driver.name =  KBUILD_MODNAME,
 514        .driver.owner = THIS_MODULE,
 515        .id_table =     id_table,
 516        .probe =        virtblk_probe,
 517        .remove =       __devexit_p(virtblk_remove),
 518};
 519
 520static int __init init(void)
 521{
 522        major = register_blkdev(0, "virtblk");
 523        if (major < 0)
 524                return major;
 525        return register_virtio_driver(&virtio_blk);
 526}
 527
 528static void __exit fini(void)
 529{
 530        unregister_blkdev(major, "virtblk");
 531        unregister_virtio_driver(&virtio_blk);
 532}
 533module_init(init);
 534module_exit(fini);
 535
 536MODULE_DEVICE_TABLE(virtio, id_table);
 537MODULE_DESCRIPTION("Virtio block driver");
 538MODULE_LICENSE("GPL");
 539