linux/drivers/nvdimm/blk.c
<<
>>
Prefs
   1/*
   2 * NVDIMM Block Window Driver
   3 * Copyright (c) 2014, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 */
  14
  15#include <linux/blkdev.h>
  16#include <linux/fs.h>
  17#include <linux/genhd.h>
  18#include <linux/module.h>
  19#include <linux/moduleparam.h>
  20#include <linux/nd.h>
  21#include <linux/sizes.h>
  22#include "nd.h"
  23
  24struct nd_blk_device {
  25        struct request_queue *queue;
  26        struct gendisk *disk;
  27        struct nd_namespace_blk *nsblk;
  28        struct nd_blk_region *ndbr;
  29        size_t disk_size;
  30        u32 sector_size;
  31        u32 internal_lbasize;
  32};
  33
  34static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
  35{
  36        return blk_dev->nsblk->lbasize - blk_dev->sector_size;
  37}
  38
  39static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
  40                                resource_size_t ns_offset, unsigned int len)
  41{
  42        int i;
  43
  44        for (i = 0; i < nsblk->num_resources; i++) {
  45                if (ns_offset < resource_size(nsblk->res[i])) {
  46                        if (ns_offset + len > resource_size(nsblk->res[i])) {
  47                                dev_WARN_ONCE(&nsblk->common.dev, 1,
  48                                        "illegal request\n");
  49                                return SIZE_MAX;
  50                        }
  51                        return nsblk->res[i]->start + ns_offset;
  52                }
  53                ns_offset -= resource_size(nsblk->res[i]);
  54        }
  55
  56        dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
  57        return SIZE_MAX;
  58}
  59
  60#ifdef CONFIG_BLK_DEV_INTEGRITY
  61static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
  62                                struct bio_integrity_payload *bip, u64 lba,
  63                                int rw)
  64{
  65        unsigned int len = nd_blk_meta_size(blk_dev);
  66        resource_size_t dev_offset, ns_offset;
  67        struct nd_namespace_blk *nsblk;
  68        struct nd_blk_region *ndbr;
  69        int err = 0;
  70
  71        nsblk = blk_dev->nsblk;
  72        ndbr = blk_dev->ndbr;
  73        ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size;
  74        dev_offset = to_dev_offset(nsblk, ns_offset, len);
  75        if (dev_offset == SIZE_MAX)
  76                return -EIO;
  77
  78        while (len) {
  79                unsigned int cur_len;
  80                struct bio_vec bv;
  81                void *iobuf;
  82
  83                bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
  84                /*
  85                 * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
  86                 * .bv_offset already adjusted for iter->bi_bvec_done, and we
  87                 * can use those directly
  88                 */
  89
  90                cur_len = min(len, bv.bv_len);
  91                iobuf = kmap_atomic(bv.bv_page);
  92                err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
  93                                cur_len, rw);
  94                kunmap_atomic(iobuf);
  95                if (err)
  96                        return err;
  97
  98                len -= cur_len;
  99                dev_offset += cur_len;
 100                bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len);
 101        }
 102
 103        return err;
 104}
 105
 106#else /* CONFIG_BLK_DEV_INTEGRITY */
 107static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev,
 108                                struct bio_integrity_payload *bip, u64 lba,
 109                                int rw)
 110{
 111        return 0;
 112}
 113#endif
 114
 115static int nd_blk_do_bvec(struct nd_blk_device *blk_dev,
 116                        struct bio_integrity_payload *bip, struct page *page,
 117                        unsigned int len, unsigned int off, int rw,
 118                        sector_t sector)
 119{
 120        struct nd_blk_region *ndbr = blk_dev->ndbr;
 121        resource_size_t dev_offset, ns_offset;
 122        int err = 0;
 123        void *iobuf;
 124        u64 lba;
 125
 126        while (len) {
 127                unsigned int cur_len;
 128
 129                /*
 130                 * If we don't have an integrity payload, we don't have to
 131                 * split the bvec into sectors, as this would cause unnecessary
 132                 * Block Window setup/move steps. the do_io routine is capable
 133                 * of handling len <= PAGE_SIZE.
 134                 */
 135                cur_len = bip ? min(len, blk_dev->sector_size) : len;
 136
 137                lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size);
 138                ns_offset = lba * blk_dev->internal_lbasize;
 139                dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len);
 140                if (dev_offset == SIZE_MAX)
 141                        return -EIO;
 142
 143                iobuf = kmap_atomic(page);
 144                err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
 145                kunmap_atomic(iobuf);
 146                if (err)
 147                        return err;
 148
 149                if (bip) {
 150                        err = nd_blk_rw_integrity(blk_dev, bip, lba, rw);
 151                        if (err)
 152                                return err;
 153                }
 154                len -= cur_len;
 155                off += cur_len;
 156                sector += blk_dev->sector_size >> SECTOR_SHIFT;
 157        }
 158
 159        return err;
 160}
 161
 162static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
 163{
 164        struct block_device *bdev = bio->bi_bdev;
 165        struct gendisk *disk = bdev->bd_disk;
 166        struct bio_integrity_payload *bip;
 167        struct nd_blk_device *blk_dev;
 168        struct bvec_iter iter;
 169        unsigned long start;
 170        struct bio_vec bvec;
 171        int err = 0, rw;
 172        bool do_acct;
 173
 174        /*
 175         * bio_integrity_enabled also checks if the bio already has an
 176         * integrity payload attached. If it does, we *don't* do a
 177         * bio_integrity_prep here - the payload has been generated by
 178         * another kernel subsystem, and we just pass it through.
 179         */
 180        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
 181                bio->bi_error = -EIO;
 182                goto out;
 183        }
 184
 185        bip = bio_integrity(bio);
 186        blk_dev = disk->private_data;
 187        rw = bio_data_dir(bio);
 188        do_acct = nd_iostat_start(bio, &start);
 189        bio_for_each_segment(bvec, bio, iter) {
 190                unsigned int len = bvec.bv_len;
 191
 192                BUG_ON(len > PAGE_SIZE);
 193                err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len,
 194                                        bvec.bv_offset, rw, iter.bi_sector);
 195                if (err) {
 196                        dev_info(&blk_dev->nsblk->common.dev,
 197                                        "io error in %s sector %lld, len %d,\n",
 198                                        (rw == READ) ? "READ" : "WRITE",
 199                                        (unsigned long long) iter.bi_sector, len);
 200                        bio->bi_error = err;
 201                        break;
 202                }
 203        }
 204        if (do_acct)
 205                nd_iostat_end(bio, start);
 206
 207 out:
 208        bio_endio(bio);
 209        return BLK_QC_T_NONE;
 210}
 211
 212static int nd_blk_rw_bytes(struct nd_namespace_common *ndns,
 213                resource_size_t offset, void *iobuf, size_t n, int rw)
 214{
 215        struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim);
 216        struct nd_namespace_blk *nsblk = blk_dev->nsblk;
 217        struct nd_blk_region *ndbr = blk_dev->ndbr;
 218        resource_size_t dev_offset;
 219
 220        dev_offset = to_dev_offset(nsblk, offset, n);
 221
 222        if (unlikely(offset + n > blk_dev->disk_size)) {
 223                dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
 224                return -EFAULT;
 225        }
 226
 227        if (dev_offset == SIZE_MAX)
 228                return -EIO;
 229
 230        return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
 231}
 232
 233static const struct block_device_operations nd_blk_fops = {
 234        .owner = THIS_MODULE,
 235        .revalidate_disk = nvdimm_revalidate_disk,
 236};
 237
 238static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
 239                struct nd_blk_device *blk_dev)
 240{
 241        resource_size_t available_disk_size;
 242        struct gendisk *disk;
 243        u64 internal_nlba;
 244
 245        internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize);
 246        available_disk_size = internal_nlba * blk_dev->sector_size;
 247
 248        blk_dev->queue = blk_alloc_queue(GFP_KERNEL);
 249        if (!blk_dev->queue)
 250                return -ENOMEM;
 251
 252        blk_queue_make_request(blk_dev->queue, nd_blk_make_request);
 253        blk_queue_max_hw_sectors(blk_dev->queue, UINT_MAX);
 254        blk_queue_bounce_limit(blk_dev->queue, BLK_BOUNCE_ANY);
 255        blk_queue_logical_block_size(blk_dev->queue, blk_dev->sector_size);
 256        queue_flag_set_unlocked(QUEUE_FLAG_NONROT, blk_dev->queue);
 257
 258        disk = blk_dev->disk = alloc_disk(0);
 259        if (!disk) {
 260                blk_cleanup_queue(blk_dev->queue);
 261                return -ENOMEM;
 262        }
 263
 264        disk->driverfs_dev      = &ndns->dev;
 265        disk->first_minor       = 0;
 266        disk->fops              = &nd_blk_fops;
 267        disk->private_data      = blk_dev;
 268        disk->queue             = blk_dev->queue;
 269        disk->flags             = GENHD_FL_EXT_DEVT;
 270        nvdimm_namespace_disk_name(ndns, disk->disk_name);
 271        set_capacity(disk, 0);
 272        add_disk(disk);
 273
 274        if (nd_blk_meta_size(blk_dev)) {
 275                int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev));
 276
 277                if (rc) {
 278                        del_gendisk(disk);
 279                        put_disk(disk);
 280                        blk_cleanup_queue(blk_dev->queue);
 281                        return rc;
 282                }
 283        }
 284
 285        set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
 286        revalidate_disk(disk);
 287        return 0;
 288}
 289
 290static int nd_blk_probe(struct device *dev)
 291{
 292        struct nd_namespace_common *ndns;
 293        struct nd_namespace_blk *nsblk;
 294        struct nd_blk_device *blk_dev;
 295        int rc;
 296
 297        ndns = nvdimm_namespace_common_probe(dev);
 298        if (IS_ERR(ndns))
 299                return PTR_ERR(ndns);
 300
 301        blk_dev = kzalloc(sizeof(*blk_dev), GFP_KERNEL);
 302        if (!blk_dev)
 303                return -ENOMEM;
 304
 305        nsblk = to_nd_namespace_blk(&ndns->dev);
 306        blk_dev->disk_size = nvdimm_namespace_capacity(ndns);
 307        blk_dev->ndbr = to_nd_blk_region(dev->parent);
 308        blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev);
 309        blk_dev->internal_lbasize = roundup(nsblk->lbasize,
 310                                                INT_LBASIZE_ALIGNMENT);
 311        blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512);
 312        dev_set_drvdata(dev, blk_dev);
 313
 314        ndns->rw_bytes = nd_blk_rw_bytes;
 315        if (is_nd_btt(dev))
 316                rc = nvdimm_namespace_attach_btt(ndns);
 317        else if (nd_btt_probe(ndns, blk_dev) == 0) {
 318                /* we'll come back as btt-blk */
 319                rc = -ENXIO;
 320        } else
 321                rc = nd_blk_attach_disk(ndns, blk_dev);
 322        if (rc)
 323                kfree(blk_dev);
 324        return rc;
 325}
 326
 327static void nd_blk_detach_disk(struct nd_blk_device *blk_dev)
 328{
 329        del_gendisk(blk_dev->disk);
 330        put_disk(blk_dev->disk);
 331        blk_cleanup_queue(blk_dev->queue);
 332}
 333
 334static int nd_blk_remove(struct device *dev)
 335{
 336        struct nd_blk_device *blk_dev = dev_get_drvdata(dev);
 337
 338        if (is_nd_btt(dev))
 339                nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
 340        else
 341                nd_blk_detach_disk(blk_dev);
 342        kfree(blk_dev);
 343
 344        return 0;
 345}
 346
 347static struct nd_device_driver nd_blk_driver = {
 348        .probe = nd_blk_probe,
 349        .remove = nd_blk_remove,
 350        .drv = {
 351                .name = "nd_blk",
 352        },
 353        .type = ND_DRIVER_NAMESPACE_BLK,
 354};
 355
 356static int __init nd_blk_init(void)
 357{
 358        return nd_driver_register(&nd_blk_driver);
 359}
 360
 361static void __exit nd_blk_exit(void)
 362{
 363        driver_unregister(&nd_blk_driver.drv);
 364}
 365
 366MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
 367MODULE_LICENSE("GPL v2");
 368MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
 369module_init(nd_blk_init);
 370module_exit(nd_blk_exit);
 371