linux/drivers/block/virtio_blk.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2//#define DEBUG
   3#include <linux/spinlock.h>
   4#include <linux/slab.h>
   5#include <linux/blkdev.h>
   6#include <linux/hdreg.h>
   7#include <linux/module.h>
   8#include <linux/mutex.h>
   9#include <linux/interrupt.h>
  10#include <linux/virtio.h>
  11#include <linux/virtio_blk.h>
  12#include <linux/scatterlist.h>
  13#include <linux/string_helpers.h>
  14#include <linux/idr.h>
  15#include <linux/blk-mq.h>
  16#include <linux/blk-mq-virtio.h>
  17#include <linux/numa.h>
  18#include <uapi/linux/virtio_ring.h>
  19
  20#define PART_BITS 4
  21#define VQ_NAME_LEN 16
  22#define MAX_DISCARD_SEGMENTS 256u
  23
  24static int major;
  25static DEFINE_IDA(vd_index_ida);
  26
  27static struct workqueue_struct *virtblk_wq;
  28
  29struct virtio_blk_vq {
  30        struct virtqueue *vq;
  31        spinlock_t lock;
  32        char name[VQ_NAME_LEN];
  33} ____cacheline_aligned_in_smp;
  34
  35struct virtio_blk {
  36        /*
  37         * This mutex must be held by anything that may run after
  38         * virtblk_remove() sets vblk->vdev to NULL.
  39         *
  40         * blk-mq, virtqueue processing, and sysfs attribute code paths are
  41         * shut down before vblk->vdev is set to NULL and therefore do not need
  42         * to hold this mutex.
  43         */
  44        struct mutex vdev_mutex;
  45        struct virtio_device *vdev;
  46
  47        /* The disk structure for the kernel. */
  48        struct gendisk *disk;
  49
  50        /* Block layer tags. */
  51        struct blk_mq_tag_set tag_set;
  52
  53        /* Process context for config space updates */
  54        struct work_struct config_work;
  55
  56        /*
  57         * Tracks references from block_device_operations open/release and
  58         * virtio_driver probe/remove so this object can be freed once no
  59         * longer in use.
  60         */
  61        refcount_t refs;
  62
  63        /* What host tells us, plus 2 for header & tailer. */
  64        unsigned int sg_elems;
  65
  66        /* Ida index - used to track minor number allocations. */
  67        int index;
  68
  69        /* num of vqs */
  70        int num_vqs;
  71        struct virtio_blk_vq *vqs;
  72};
  73
  74struct virtblk_req {
  75        struct virtio_blk_outhdr out_hdr;
  76        u8 status;
  77        struct scatterlist sg[];
  78};
  79
  80static inline blk_status_t virtblk_result(struct virtblk_req *vbr)
  81{
  82        switch (vbr->status) {
  83        case VIRTIO_BLK_S_OK:
  84                return BLK_STS_OK;
  85        case VIRTIO_BLK_S_UNSUPP:
  86                return BLK_STS_NOTSUPP;
  87        default:
  88                return BLK_STS_IOERR;
  89        }
  90}
  91
  92static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr,
  93                struct scatterlist *data_sg, bool have_data)
  94{
  95        struct scatterlist hdr, status, *sgs[3];
  96        unsigned int num_out = 0, num_in = 0;
  97
  98        sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
  99        sgs[num_out++] = &hdr;
 100
 101        if (have_data) {
 102                if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT))
 103                        sgs[num_out++] = data_sg;
 104                else
 105                        sgs[num_out + num_in++] = data_sg;
 106        }
 107
 108        sg_init_one(&status, &vbr->status, sizeof(vbr->status));
 109        sgs[num_out + num_in++] = &status;
 110
 111        return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 112}
 113
 114static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap)
 115{
 116        unsigned short segments = blk_rq_nr_discard_segments(req);
 117        unsigned short n = 0;
 118        struct virtio_blk_discard_write_zeroes *range;
 119        struct bio *bio;
 120        u32 flags = 0;
 121
 122        if (unmap)
 123                flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP;
 124
 125        range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
 126        if (!range)
 127                return -ENOMEM;
 128
 129        /*
 130         * Single max discard segment means multi-range discard isn't
 131         * supported, and block layer only runs contiguity merge like
 132         * normal RW request. So we can't reply on bio for retrieving
 133         * each range info.
 134         */
 135        if (queue_max_discard_segments(req->q) == 1) {
 136                range[0].flags = cpu_to_le32(flags);
 137                range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req));
 138                range[0].sector = cpu_to_le64(blk_rq_pos(req));
 139                n = 1;
 140        } else {
 141                __rq_for_each_bio(bio, req) {
 142                        u64 sector = bio->bi_iter.bi_sector;
 143                        u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT;
 144
 145                        range[n].flags = cpu_to_le32(flags);
 146                        range[n].num_sectors = cpu_to_le32(num_sectors);
 147                        range[n].sector = cpu_to_le64(sector);
 148                        n++;
 149                }
 150        }
 151
 152        WARN_ON_ONCE(n != segments);
 153
 154        req->special_vec.bv_page = virt_to_page(range);
 155        req->special_vec.bv_offset = offset_in_page(range);
 156        req->special_vec.bv_len = sizeof(*range) * segments;
 157        req->rq_flags |= RQF_SPECIAL_PAYLOAD;
 158
 159        return 0;
 160}
 161
 162static inline void virtblk_request_done(struct request *req)
 163{
 164        struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 165
 166        if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
 167                kfree(page_address(req->special_vec.bv_page) +
 168                      req->special_vec.bv_offset);
 169        }
 170
 171        blk_mq_end_request(req, virtblk_result(vbr));
 172}
 173
 174static void virtblk_done(struct virtqueue *vq)
 175{
 176        struct virtio_blk *vblk = vq->vdev->priv;
 177        bool req_done = false;
 178        int qid = vq->index;
 179        struct virtblk_req *vbr;
 180        unsigned long flags;
 181        unsigned int len;
 182
 183        spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 184        do {
 185                virtqueue_disable_cb(vq);
 186                while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
 187                        struct request *req = blk_mq_rq_from_pdu(vbr);
 188
 189                        if (likely(!blk_should_fake_timeout(req->q)))
 190                                blk_mq_complete_request(req);
 191                        req_done = true;
 192                }
 193                if (unlikely(virtqueue_is_broken(vq)))
 194                        break;
 195        } while (!virtqueue_enable_cb(vq));
 196
 197        /* In case queue is stopped waiting for more buffers. */
 198        if (req_done)
 199                blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
 200        spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 201}
 202
 203static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx)
 204{
 205        struct virtio_blk *vblk = hctx->queue->queuedata;
 206        struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num];
 207        bool kick;
 208
 209        spin_lock_irq(&vq->lock);
 210        kick = virtqueue_kick_prepare(vq->vq);
 211        spin_unlock_irq(&vq->lock);
 212
 213        if (kick)
 214                virtqueue_notify(vq->vq);
 215}
 216
 217static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
 218                           const struct blk_mq_queue_data *bd)
 219{
 220        struct virtio_blk *vblk = hctx->queue->queuedata;
 221        struct request *req = bd->rq;
 222        struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 223        unsigned long flags;
 224        unsigned int num;
 225        int qid = hctx->queue_num;
 226        int err;
 227        bool notify = false;
 228        bool unmap = false;
 229        u32 type;
 230
 231        BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 232
 233        switch (req_op(req)) {
 234        case REQ_OP_READ:
 235        case REQ_OP_WRITE:
 236                type = 0;
 237                break;
 238        case REQ_OP_FLUSH:
 239                type = VIRTIO_BLK_T_FLUSH;
 240                break;
 241        case REQ_OP_DISCARD:
 242                type = VIRTIO_BLK_T_DISCARD;
 243                break;
 244        case REQ_OP_WRITE_ZEROES:
 245                type = VIRTIO_BLK_T_WRITE_ZEROES;
 246                unmap = !(req->cmd_flags & REQ_NOUNMAP);
 247                break;
 248        case REQ_OP_DRV_IN:
 249                type = VIRTIO_BLK_T_GET_ID;
 250                break;
 251        default:
 252                WARN_ON_ONCE(1);
 253                return BLK_STS_IOERR;
 254        }
 255
 256        vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type);
 257        vbr->out_hdr.sector = type ?
 258                0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req));
 259        vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req));
 260
 261        blk_mq_start_request(req);
 262
 263        if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) {
 264                err = virtblk_setup_discard_write_zeroes(req, unmap);
 265                if (err)
 266                        return BLK_STS_RESOURCE;
 267        }
 268
 269        num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
 270        if (num) {
 271                if (rq_data_dir(req) == WRITE)
 272                        vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT);
 273                else
 274                        vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN);
 275        }
 276
 277        spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 278        err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
 279        if (err) {
 280                virtqueue_kick(vblk->vqs[qid].vq);
 281                /* Don't stop the queue if -ENOMEM: we may have failed to
 282                 * bounce the buffer due to global resource outage.
 283                 */
 284                if (err == -ENOSPC)
 285                        blk_mq_stop_hw_queue(hctx);
 286                spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 287                switch (err) {
 288                case -ENOSPC:
 289                        return BLK_STS_DEV_RESOURCE;
 290                case -ENOMEM:
 291                        return BLK_STS_RESOURCE;
 292                default:
 293                        return BLK_STS_IOERR;
 294                }
 295        }
 296
 297        if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
 298                notify = true;
 299        spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 300
 301        if (notify)
 302                virtqueue_notify(vblk->vqs[qid].vq);
 303        return BLK_STS_OK;
 304}
 305
 306/* return id (s/n) string for *disk to *id_str
 307 */
 308static int virtblk_get_id(struct gendisk *disk, char *id_str)
 309{
 310        struct virtio_blk *vblk = disk->private_data;
 311        struct request_queue *q = vblk->disk->queue;
 312        struct request *req;
 313        int err;
 314
 315        req = blk_get_request(q, REQ_OP_DRV_IN, 0);
 316        if (IS_ERR(req))
 317                return PTR_ERR(req);
 318
 319        err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL);
 320        if (err)
 321                goto out;
 322
 323        blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
 324        err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req)));
 325out:
 326        blk_put_request(req);
 327        return err;
 328}
 329
 330static void virtblk_get(struct virtio_blk *vblk)
 331{
 332        refcount_inc(&vblk->refs);
 333}
 334
 335static void virtblk_put(struct virtio_blk *vblk)
 336{
 337        if (refcount_dec_and_test(&vblk->refs)) {
 338                ida_simple_remove(&vd_index_ida, vblk->index);
 339                mutex_destroy(&vblk->vdev_mutex);
 340                kfree(vblk);
 341        }
 342}
 343
 344static int virtblk_open(struct block_device *bd, fmode_t mode)
 345{
 346        struct virtio_blk *vblk = bd->bd_disk->private_data;
 347        int ret = 0;
 348
 349        mutex_lock(&vblk->vdev_mutex);
 350
 351        if (vblk->vdev)
 352                virtblk_get(vblk);
 353        else
 354                ret = -ENXIO;
 355
 356        mutex_unlock(&vblk->vdev_mutex);
 357        return ret;
 358}
 359
 360static void virtblk_release(struct gendisk *disk, fmode_t mode)
 361{
 362        struct virtio_blk *vblk = disk->private_data;
 363
 364        virtblk_put(vblk);
 365}
 366
 367/* We provide getgeo only to please some old bootloader/partitioning tools */
 368static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 369{
 370        struct virtio_blk *vblk = bd->bd_disk->private_data;
 371        int ret = 0;
 372
 373        mutex_lock(&vblk->vdev_mutex);
 374
 375        if (!vblk->vdev) {
 376                ret = -ENXIO;
 377                goto out;
 378        }
 379
 380        /* see if the host passed in geometry config */
 381        if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) {
 382                virtio_cread(vblk->vdev, struct virtio_blk_config,
 383                             geometry.cylinders, &geo->cylinders);
 384                virtio_cread(vblk->vdev, struct virtio_blk_config,
 385                             geometry.heads, &geo->heads);
 386                virtio_cread(vblk->vdev, struct virtio_blk_config,
 387                             geometry.sectors, &geo->sectors);
 388        } else {
 389                /* some standard values, similar to sd */
 390                geo->heads = 1 << 6;
 391                geo->sectors = 1 << 5;
 392                geo->cylinders = get_capacity(bd->bd_disk) >> 11;
 393        }
 394out:
 395        mutex_unlock(&vblk->vdev_mutex);
 396        return ret;
 397}
 398
 399static const struct block_device_operations virtblk_fops = {
 400        .owner  = THIS_MODULE,
 401        .open = virtblk_open,
 402        .release = virtblk_release,
 403        .getgeo = virtblk_getgeo,
 404};
 405
 406static int index_to_minor(int index)
 407{
 408        return index << PART_BITS;
 409}
 410
 411static int minor_to_index(int minor)
 412{
 413        return minor >> PART_BITS;
 414}
 415
 416static ssize_t serial_show(struct device *dev,
 417                           struct device_attribute *attr, char *buf)
 418{
 419        struct gendisk *disk = dev_to_disk(dev);
 420        int err;
 421
 422        /* sysfs gives us a PAGE_SIZE buffer */
 423        BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
 424
 425        buf[VIRTIO_BLK_ID_BYTES] = '\0';
 426        err = virtblk_get_id(disk, buf);
 427        if (!err)
 428                return strlen(buf);
 429
 430        if (err == -EIO) /* Unsupported? Make it empty. */
 431                return 0;
 432
 433        return err;
 434}
 435
 436static DEVICE_ATTR_RO(serial);
 437
 438/* The queue's logical block size must be set before calling this */
 439static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
 440{
 441        struct virtio_device *vdev = vblk->vdev;
 442        struct request_queue *q = vblk->disk->queue;
 443        char cap_str_2[10], cap_str_10[10];
 444        unsigned long long nblocks;
 445        u64 capacity;
 446
 447        /* Host must always specify the capacity. */
 448        virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity);
 449
 450        /* If capacity is too big, truncate with warning. */
 451        if ((sector_t)capacity != capacity) {
 452                dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
 453                         (unsigned long long)capacity);
 454                capacity = (sector_t)-1;
 455        }
 456
 457        nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9);
 458
 459        string_get_size(nblocks, queue_logical_block_size(q),
 460                        STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
 461        string_get_size(nblocks, queue_logical_block_size(q),
 462                        STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
 463
 464        dev_notice(&vdev->dev,
 465                   "[%s] %s%llu %d-byte logical blocks (%s/%s)\n",
 466                   vblk->disk->disk_name,
 467                   resize ? "new size: " : "",
 468                   nblocks,
 469                   queue_logical_block_size(q),
 470                   cap_str_10,
 471                   cap_str_2);
 472
 473        set_capacity_revalidate_and_notify(vblk->disk, capacity, true);
 474}
 475
 476static void virtblk_config_changed_work(struct work_struct *work)
 477{
 478        struct virtio_blk *vblk =
 479                container_of(work, struct virtio_blk, config_work);
 480
 481        virtblk_update_capacity(vblk, true);
 482}
 483
 484static void virtblk_config_changed(struct virtio_device *vdev)
 485{
 486        struct virtio_blk *vblk = vdev->priv;
 487
 488        queue_work(virtblk_wq, &vblk->config_work);
 489}
 490
 491static int init_vq(struct virtio_blk *vblk)
 492{
 493        int err;
 494        int i;
 495        vq_callback_t **callbacks;
 496        const char **names;
 497        struct virtqueue **vqs;
 498        unsigned short num_vqs;
 499        struct virtio_device *vdev = vblk->vdev;
 500        struct irq_affinity desc = { 0, };
 501
 502        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
 503                                   struct virtio_blk_config, num_queues,
 504                                   &num_vqs);
 505        if (err)
 506                num_vqs = 1;
 507
 508        num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs);
 509
 510        vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL);
 511        if (!vblk->vqs)
 512                return -ENOMEM;
 513
 514        names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL);
 515        callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL);
 516        vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL);
 517        if (!names || !callbacks || !vqs) {
 518                err = -ENOMEM;
 519                goto out;
 520        }
 521
 522        for (i = 0; i < num_vqs; i++) {
 523                callbacks[i] = virtblk_done;
 524                snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
 525                names[i] = vblk->vqs[i].name;
 526        }
 527
 528        /* Discover virtqueues and write information to configuration.  */
 529        err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc);
 530        if (err)
 531                goto out;
 532
 533        for (i = 0; i < num_vqs; i++) {
 534                spin_lock_init(&vblk->vqs[i].lock);
 535                vblk->vqs[i].vq = vqs[i];
 536        }
 537        vblk->num_vqs = num_vqs;
 538
 539out:
 540        kfree(vqs);
 541        kfree(callbacks);
 542        kfree(names);
 543        if (err)
 544                kfree(vblk->vqs);
 545        return err;
 546}
 547
 548/*
 549 * Legacy naming scheme used for virtio devices.  We are stuck with it for
 550 * virtio blk but don't ever use it for any new driver.
 551 */
 552static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
 553{
 554        const int base = 'z' - 'a' + 1;
 555        char *begin = buf + strlen(prefix);
 556        char *end = buf + buflen;
 557        char *p;
 558        int unit;
 559
 560        p = end - 1;
 561        *p = '\0';
 562        unit = base;
 563        do {
 564                if (p == begin)
 565                        return -EINVAL;
 566                *--p = 'a' + (index % unit);
 567                index = (index / unit) - 1;
 568        } while (index >= 0);
 569
 570        memmove(begin, p, end - p);
 571        memcpy(buf, prefix, strlen(prefix));
 572
 573        return 0;
 574}
 575
 576static int virtblk_get_cache_mode(struct virtio_device *vdev)
 577{
 578        u8 writeback;
 579        int err;
 580
 581        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE,
 582                                   struct virtio_blk_config, wce,
 583                                   &writeback);
 584
 585        /*
 586         * If WCE is not configurable and flush is not available,
 587         * assume no writeback cache is in use.
 588         */
 589        if (err)
 590                writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH);
 591
 592        return writeback;
 593}
 594
 595static void virtblk_update_cache_mode(struct virtio_device *vdev)
 596{
 597        u8 writeback = virtblk_get_cache_mode(vdev);
 598        struct virtio_blk *vblk = vdev->priv;
 599
 600        blk_queue_write_cache(vblk->disk->queue, writeback, false);
 601        revalidate_disk(vblk->disk);
 602}
 603
 604static const char *const virtblk_cache_types[] = {
 605        "write through", "write back"
 606};
 607
 608static ssize_t
 609cache_type_store(struct device *dev, struct device_attribute *attr,
 610                 const char *buf, size_t count)
 611{
 612        struct gendisk *disk = dev_to_disk(dev);
 613        struct virtio_blk *vblk = disk->private_data;
 614        struct virtio_device *vdev = vblk->vdev;
 615        int i;
 616
 617        BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
 618        i = sysfs_match_string(virtblk_cache_types, buf);
 619        if (i < 0)
 620                return i;
 621
 622        virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i);
 623        virtblk_update_cache_mode(vdev);
 624        return count;
 625}
 626
 627static ssize_t
 628cache_type_show(struct device *dev, struct device_attribute *attr, char *buf)
 629{
 630        struct gendisk *disk = dev_to_disk(dev);
 631        struct virtio_blk *vblk = disk->private_data;
 632        u8 writeback = virtblk_get_cache_mode(vblk->vdev);
 633
 634        BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
 635        return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
 636}
 637
 638static DEVICE_ATTR_RW(cache_type);
 639
 640static struct attribute *virtblk_attrs[] = {
 641        &dev_attr_serial.attr,
 642        &dev_attr_cache_type.attr,
 643        NULL,
 644};
 645
 646static umode_t virtblk_attrs_are_visible(struct kobject *kobj,
 647                struct attribute *a, int n)
 648{
 649        struct device *dev = container_of(kobj, struct device, kobj);
 650        struct gendisk *disk = dev_to_disk(dev);
 651        struct virtio_blk *vblk = disk->private_data;
 652        struct virtio_device *vdev = vblk->vdev;
 653
 654        if (a == &dev_attr_cache_type.attr &&
 655            !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
 656                return S_IRUGO;
 657
 658        return a->mode;
 659}
 660
 661static const struct attribute_group virtblk_attr_group = {
 662        .attrs = virtblk_attrs,
 663        .is_visible = virtblk_attrs_are_visible,
 664};
 665
 666static const struct attribute_group *virtblk_attr_groups[] = {
 667        &virtblk_attr_group,
 668        NULL,
 669};
 670
 671static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
 672                unsigned int hctx_idx, unsigned int numa_node)
 673{
 674        struct virtio_blk *vblk = set->driver_data;
 675        struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq);
 676
 677        sg_init_table(vbr->sg, vblk->sg_elems);
 678        return 0;
 679}
 680
 681static int virtblk_map_queues(struct blk_mq_tag_set *set)
 682{
 683        struct virtio_blk *vblk = set->driver_data;
 684
 685        return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT],
 686                                        vblk->vdev, 0);
 687}
 688
 689static const struct blk_mq_ops virtio_mq_ops = {
 690        .queue_rq       = virtio_queue_rq,
 691        .commit_rqs     = virtio_commit_rqs,
 692        .complete       = virtblk_request_done,
 693        .init_request   = virtblk_init_request,
 694        .map_queues     = virtblk_map_queues,
 695};
 696
 697static unsigned int virtblk_queue_depth;
 698module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
 699
 700static int virtblk_probe(struct virtio_device *vdev)
 701{
 702        struct virtio_blk *vblk;
 703        struct request_queue *q;
 704        int err, index;
 705
 706        u32 v, blk_size, max_size, sg_elems, opt_io_size;
 707        u16 min_io_size;
 708        u8 physical_block_exp, alignment_offset;
 709
 710        if (!vdev->config->get) {
 711                dev_err(&vdev->dev, "%s failure: config access disabled\n",
 712                        __func__);
 713                return -EINVAL;
 714        }
 715
 716        err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
 717                             GFP_KERNEL);
 718        if (err < 0)
 719                goto out;
 720        index = err;
 721
 722        /* We need to know how many segments before we allocate. */
 723        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX,
 724                                   struct virtio_blk_config, seg_max,
 725                                   &sg_elems);
 726
 727        /* We need at least one SG element, whatever they say. */
 728        if (err || !sg_elems)
 729                sg_elems = 1;
 730
 731        /* We need an extra sg elements at head and tail. */
 732        sg_elems += 2;
 733        vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
 734        if (!vblk) {
 735                err = -ENOMEM;
 736                goto out_free_index;
 737        }
 738
 739        /* This reference is dropped in virtblk_remove(). */
 740        refcount_set(&vblk->refs, 1);
 741        mutex_init(&vblk->vdev_mutex);
 742
 743        vblk->vdev = vdev;
 744        vblk->sg_elems = sg_elems;
 745
 746        INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
 747
 748        err = init_vq(vblk);
 749        if (err)
 750                goto out_free_vblk;
 751
 752        /* FIXME: How many partitions?  How long is a piece of string? */
 753        vblk->disk = alloc_disk(1 << PART_BITS);
 754        if (!vblk->disk) {
 755                err = -ENOMEM;
 756                goto out_free_vq;
 757        }
 758
 759        /* Default queue sizing is to fill the ring. */
 760        if (!virtblk_queue_depth) {
 761                virtblk_queue_depth = vblk->vqs[0].vq->num_free;
 762                /* ... but without indirect descs, we use 2 descs per req */
 763                if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
 764                        virtblk_queue_depth /= 2;
 765        }
 766
 767        memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
 768        vblk->tag_set.ops = &virtio_mq_ops;
 769        vblk->tag_set.queue_depth = virtblk_queue_depth;
 770        vblk->tag_set.numa_node = NUMA_NO_NODE;
 771        vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
 772        vblk->tag_set.cmd_size =
 773                sizeof(struct virtblk_req) +
 774                sizeof(struct scatterlist) * sg_elems;
 775        vblk->tag_set.driver_data = vblk;
 776        vblk->tag_set.nr_hw_queues = vblk->num_vqs;
 777
 778        err = blk_mq_alloc_tag_set(&vblk->tag_set);
 779        if (err)
 780                goto out_put_disk;
 781
 782        q = blk_mq_init_queue(&vblk->tag_set);
 783        if (IS_ERR(q)) {
 784                err = -ENOMEM;
 785                goto out_free_tags;
 786        }
 787        vblk->disk->queue = q;
 788
 789        q->queuedata = vblk;
 790
 791        virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
 792
 793        vblk->disk->major = major;
 794        vblk->disk->first_minor = index_to_minor(index);
 795        vblk->disk->private_data = vblk;
 796        vblk->disk->fops = &virtblk_fops;
 797        vblk->disk->flags |= GENHD_FL_EXT_DEVT;
 798        vblk->index = index;
 799
 800        /* configure queue flush support */
 801        virtblk_update_cache_mode(vdev);
 802
 803        /* If disk is read-only in the host, the guest should obey */
 804        if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
 805                set_disk_ro(vblk->disk, 1);
 806
 807        /* We can handle whatever the host told us to handle. */
 808        blk_queue_max_segments(q, vblk->sg_elems-2);
 809
 810        /* No real sector limit. */
 811        blk_queue_max_hw_sectors(q, -1U);
 812
 813        max_size = virtio_max_dma_size(vdev);
 814
 815        /* Host can optionally specify maximum segment size and number of
 816         * segments. */
 817        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX,
 818                                   struct virtio_blk_config, size_max, &v);
 819        if (!err)
 820                max_size = min(max_size, v);
 821
 822        blk_queue_max_segment_size(q, max_size);
 823
 824        /* Host can optionally specify the block size of the device */
 825        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE,
 826                                   struct virtio_blk_config, blk_size,
 827                                   &blk_size);
 828        if (!err)
 829                blk_queue_logical_block_size(q, blk_size);
 830        else
 831                blk_size = queue_logical_block_size(q);
 832
 833        /* Use topology information if available */
 834        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 835                                   struct virtio_blk_config, physical_block_exp,
 836                                   &physical_block_exp);
 837        if (!err && physical_block_exp)
 838                blk_queue_physical_block_size(q,
 839                                blk_size * (1 << physical_block_exp));
 840
 841        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 842                                   struct virtio_blk_config, alignment_offset,
 843                                   &alignment_offset);
 844        if (!err && alignment_offset)
 845                blk_queue_alignment_offset(q, blk_size * alignment_offset);
 846
 847        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 848                                   struct virtio_blk_config, min_io_size,
 849                                   &min_io_size);
 850        if (!err && min_io_size)
 851                blk_queue_io_min(q, blk_size * min_io_size);
 852
 853        err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY,
 854                                   struct virtio_blk_config, opt_io_size,
 855                                   &opt_io_size);
 856        if (!err && opt_io_size)
 857                blk_queue_io_opt(q, blk_size * opt_io_size);
 858
 859        if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
 860                q->limits.discard_granularity = blk_size;
 861
 862                virtio_cread(vdev, struct virtio_blk_config,
 863                             discard_sector_alignment, &v);
 864                q->limits.discard_alignment = v ? v << SECTOR_SHIFT : 0;
 865
 866                virtio_cread(vdev, struct virtio_blk_config,
 867                             max_discard_sectors, &v);
 868                blk_queue_max_discard_sectors(q, v ? v : UINT_MAX);
 869
 870                virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
 871                             &v);
 872                blk_queue_max_discard_segments(q,
 873                                               min_not_zero(v,
 874                                                            MAX_DISCARD_SEGMENTS));
 875
 876                blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
 877        }
 878
 879        if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
 880                virtio_cread(vdev, struct virtio_blk_config,
 881                             max_write_zeroes_sectors, &v);
 882                blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX);
 883        }
 884
 885        virtblk_update_capacity(vblk, false);
 886        virtio_device_ready(vdev);
 887
 888        device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups);
 889        return 0;
 890
 891out_free_tags:
 892        blk_mq_free_tag_set(&vblk->tag_set);
 893out_put_disk:
 894        put_disk(vblk->disk);
 895out_free_vq:
 896        vdev->config->del_vqs(vdev);
 897        kfree(vblk->vqs);
 898out_free_vblk:
 899        kfree(vblk);
 900out_free_index:
 901        ida_simple_remove(&vd_index_ida, index);
 902out:
 903        return err;
 904}
 905
 906static void virtblk_remove(struct virtio_device *vdev)
 907{
 908        struct virtio_blk *vblk = vdev->priv;
 909
 910        /* Make sure no work handler is accessing the device. */
 911        flush_work(&vblk->config_work);
 912
 913        del_gendisk(vblk->disk);
 914        blk_cleanup_queue(vblk->disk->queue);
 915
 916        blk_mq_free_tag_set(&vblk->tag_set);
 917
 918        mutex_lock(&vblk->vdev_mutex);
 919
 920        /* Stop all the virtqueues. */
 921        vdev->config->reset(vdev);
 922
 923        /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */
 924        vblk->vdev = NULL;
 925
 926        put_disk(vblk->disk);
 927        vdev->config->del_vqs(vdev);
 928        kfree(vblk->vqs);
 929
 930        mutex_unlock(&vblk->vdev_mutex);
 931
 932        virtblk_put(vblk);
 933}
 934
 935#ifdef CONFIG_PM_SLEEP
 936static int virtblk_freeze(struct virtio_device *vdev)
 937{
 938        struct virtio_blk *vblk = vdev->priv;
 939
 940        /* Ensure we don't receive any more interrupts */
 941        vdev->config->reset(vdev);
 942
 943        /* Make sure no work handler is accessing the device. */
 944        flush_work(&vblk->config_work);
 945
 946        blk_mq_quiesce_queue(vblk->disk->queue);
 947
 948        vdev->config->del_vqs(vdev);
 949        return 0;
 950}
 951
 952static int virtblk_restore(struct virtio_device *vdev)
 953{
 954        struct virtio_blk *vblk = vdev->priv;
 955        int ret;
 956
 957        ret = init_vq(vdev->priv);
 958        if (ret)
 959                return ret;
 960
 961        virtio_device_ready(vdev);
 962
 963        blk_mq_unquiesce_queue(vblk->disk->queue);
 964        return 0;
 965}
 966#endif
 967
 968static const struct virtio_device_id id_table[] = {
 969        { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
 970        { 0 },
 971};
 972
 973static unsigned int features_legacy[] = {
 974        VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 975        VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 976        VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
 977        VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 978}
 979;
 980static unsigned int features[] = {
 981        VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
 982        VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
 983        VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
 984        VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES,
 985};
 986
 987static struct virtio_driver virtio_blk = {
 988        .feature_table                  = features,
 989        .feature_table_size             = ARRAY_SIZE(features),
 990        .feature_table_legacy           = features_legacy,
 991        .feature_table_size_legacy      = ARRAY_SIZE(features_legacy),
 992        .driver.name                    = KBUILD_MODNAME,
 993        .driver.owner                   = THIS_MODULE,
 994        .id_table                       = id_table,
 995        .probe                          = virtblk_probe,
 996        .remove                         = virtblk_remove,
 997        .config_changed                 = virtblk_config_changed,
 998#ifdef CONFIG_PM_SLEEP
 999        .freeze                         = virtblk_freeze,
1000        .restore                        = virtblk_restore,
1001#endif
1002};
1003
1004static int __init init(void)
1005{
1006        int error;
1007
1008        virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
1009        if (!virtblk_wq)
1010                return -ENOMEM;
1011
1012        major = register_blkdev(0, "virtblk");
1013        if (major < 0) {
1014                error = major;
1015                goto out_destroy_workqueue;
1016        }
1017
1018        error = register_virtio_driver(&virtio_blk);
1019        if (error)
1020                goto out_unregister_blkdev;
1021        return 0;
1022
1023out_unregister_blkdev:
1024        unregister_blkdev(major, "virtblk");
1025out_destroy_workqueue:
1026        destroy_workqueue(virtblk_wq);
1027        return error;
1028}
1029
1030static void __exit fini(void)
1031{
1032        unregister_virtio_driver(&virtio_blk);
1033        unregister_blkdev(major, "virtblk");
1034        destroy_workqueue(virtblk_wq);
1035}
1036module_init(init);
1037module_exit(fini);
1038
1039MODULE_DEVICE_TABLE(virtio, id_table);
1040MODULE_DESCRIPTION("Virtio block driver");
1041MODULE_LICENSE("GPL");
1042