linux/block/blk-zoned.c
<<
>>
Prefs
   1/*
   2 * Zoned block device handling
   3 *
   4 * Copyright (c) 2015, Hannes Reinecke
   5 * Copyright (c) 2015, SUSE Linux GmbH
   6 *
   7 * Copyright (c) 2016, Damien Le Moal
   8 * Copyright (c) 2016, Western Digital
   9 */
  10
  11#include <linux/kernel.h>
  12#include <linux/module.h>
  13#include <linux/rbtree.h>
  14#include <linux/blkdev.h>
  15
  16static inline sector_t blk_zone_start(struct request_queue *q,
  17                                      sector_t sector)
  18{
  19        sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
  20
  21        return sector & ~zone_mask;
  22}
  23
  24/*
  25 * Return true if a request is a write requests that needs zone write locking.
  26 */
  27bool blk_req_needs_zone_write_lock(struct request *rq)
  28{
  29        if (!rq->q->seq_zones_wlock)
  30                return false;
  31
  32        if (blk_rq_is_passthrough(rq))
  33                return false;
  34
  35        switch (req_op(rq)) {
  36        case REQ_OP_WRITE_ZEROES:
  37        case REQ_OP_WRITE_SAME:
  38        case REQ_OP_WRITE:
  39                return blk_rq_zone_is_seq(rq);
  40        default:
  41                return false;
  42        }
  43}
  44EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
  45
  46void __blk_req_zone_write_lock(struct request *rq)
  47{
  48        if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
  49                                          rq->q->seq_zones_wlock)))
  50                return;
  51
  52        WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
  53        rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
  54}
  55EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
  56
  57void __blk_req_zone_write_unlock(struct request *rq)
  58{
  59        rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
  60        if (rq->q->seq_zones_wlock)
  61                WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
  62                                                 rq->q->seq_zones_wlock));
  63}
  64EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
  65
  66/*
  67 * Check that a zone report belongs to the partition.
  68 * If yes, fix its start sector and write pointer, copy it in the
  69 * zone information array and return true. Return false otherwise.
  70 */
  71static bool blkdev_report_zone(struct block_device *bdev,
  72                               struct blk_zone *rep,
  73                               struct blk_zone *zone)
  74{
  75        sector_t offset = get_start_sect(bdev);
  76
  77        if (rep->start < offset)
  78                return false;
  79
  80        rep->start -= offset;
  81        if (rep->start + rep->len > bdev->bd_part->nr_sects)
  82                return false;
  83
  84        if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
  85                rep->wp = rep->start + rep->len;
  86        else
  87                rep->wp -= offset;
  88        memcpy(zone, rep, sizeof(struct blk_zone));
  89
  90        return true;
  91}
  92
  93/**
  94 * blkdev_report_zones - Get zones information
  95 * @bdev:       Target block device
  96 * @sector:     Sector from which to report zones
  97 * @zones:      Array of zone structures where to return the zones information
  98 * @nr_zones:   Number of zone structures in the zone array
  99 * @gfp_mask:   Memory allocation flags (for bio_alloc)
 100 *
 101 * Description:
 102 *    Get zone information starting from the zone containing @sector.
 103 *    The number of zone information reported may be less than the number
 104 *    requested by @nr_zones. The number of zones actually reported is
 105 *    returned in @nr_zones.
 106 */
 107int blkdev_report_zones(struct block_device *bdev,
 108                        sector_t sector,
 109                        struct blk_zone *zones,
 110                        unsigned int *nr_zones,
 111                        gfp_t gfp_mask)
 112{
 113        struct request_queue *q = bdev_get_queue(bdev);
 114        struct blk_zone_report_hdr *hdr;
 115        unsigned int nrz = *nr_zones;
 116        struct page *page;
 117        unsigned int nr_rep;
 118        size_t rep_bytes;
 119        unsigned int nr_pages;
 120        struct bio *bio;
 121        struct bio_vec *bv;
 122        unsigned int i, n, nz;
 123        unsigned int ofst;
 124        void *addr;
 125        int ret;
 126
 127        if (!q)
 128                return -ENXIO;
 129
 130        if (!blk_queue_is_zoned(q))
 131                return -EOPNOTSUPP;
 132
 133        if (!nrz)
 134                return 0;
 135
 136        if (sector > bdev->bd_part->nr_sects) {
 137                *nr_zones = 0;
 138                return 0;
 139        }
 140
 141        /*
 142         * The zone report has a header. So make room for it in the
 143         * payload. Also make sure that the report fits in a single BIO
 144         * that will not be split down the stack.
 145         */
 146        rep_bytes = sizeof(struct blk_zone_report_hdr) +
 147                sizeof(struct blk_zone) * nrz;
 148        rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
 149        if (rep_bytes > (queue_max_sectors(q) << 9))
 150                rep_bytes = queue_max_sectors(q) << 9;
 151
 152        nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
 153                         rep_bytes >> PAGE_SHIFT);
 154        nr_pages = min_t(unsigned int, nr_pages,
 155                         queue_max_segments(q));
 156
 157        bio = bio_alloc(gfp_mask, nr_pages);
 158        if (!bio)
 159                return -ENOMEM;
 160
 161        bio_set_dev(bio, bdev);
 162        bio->bi_iter.bi_sector = blk_zone_start(q, sector);
 163        bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
 164
 165        for (i = 0; i < nr_pages; i++) {
 166                page = alloc_page(gfp_mask);
 167                if (!page) {
 168                        ret = -ENOMEM;
 169                        goto out;
 170                }
 171                if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
 172                        __free_page(page);
 173                        break;
 174                }
 175        }
 176
 177        if (i == 0)
 178                ret = -ENOMEM;
 179        else
 180                ret = submit_bio_wait(bio);
 181        if (ret)
 182                goto out;
 183
 184        /*
 185         * Process the report result: skip the header and go through the
 186         * reported zones to fixup and fixup the zone information for
 187         * partitions. At the same time, return the zone information into
 188         * the zone array.
 189         */
 190        n = 0;
 191        nz = 0;
 192        nr_rep = 0;
 193        bio_for_each_segment_all(bv, bio, i) {
 194
 195                if (!bv->bv_page)
 196                        break;
 197
 198                addr = kmap_atomic(bv->bv_page);
 199
 200                /* Get header in the first page */
 201                ofst = 0;
 202                if (!nr_rep) {
 203                        hdr = (struct blk_zone_report_hdr *) addr;
 204                        nr_rep = hdr->nr_zones;
 205                        ofst = sizeof(struct blk_zone_report_hdr);
 206                }
 207
 208                /* Fixup and report zones */
 209                while (ofst < bv->bv_len &&
 210                       n < nr_rep && nz < nrz) {
 211                        if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
 212                                nz++;
 213                        ofst += sizeof(struct blk_zone);
 214                        n++;
 215                }
 216
 217                kunmap_atomic(addr);
 218
 219                if (n >= nr_rep || nz >= nrz)
 220                        break;
 221
 222        }
 223
 224        *nr_zones = nz;
 225out:
 226        bio_for_each_segment_all(bv, bio, i)
 227                __free_page(bv->bv_page);
 228        bio_put(bio);
 229
 230        return ret;
 231}
 232EXPORT_SYMBOL_GPL(blkdev_report_zones);
 233
 234/**
 235 * blkdev_reset_zones - Reset zones write pointer
 236 * @bdev:       Target block device
 237 * @sector:     Start sector of the first zone to reset
 238 * @nr_sectors: Number of sectors, at least the length of one zone
 239 * @gfp_mask:   Memory allocation flags (for bio_alloc)
 240 *
 241 * Description:
 242 *    Reset the write pointer of the zones contained in the range
 243 *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
 244 *    is valid, but the specified range should not contain conventional zones.
 245 */
 246int blkdev_reset_zones(struct block_device *bdev,
 247                       sector_t sector, sector_t nr_sectors,
 248                       gfp_t gfp_mask)
 249{
 250        struct request_queue *q = bdev_get_queue(bdev);
 251        sector_t zone_sectors;
 252        sector_t end_sector = sector + nr_sectors;
 253        struct bio *bio;
 254        int ret;
 255
 256        if (!q)
 257                return -ENXIO;
 258
 259        if (!blk_queue_is_zoned(q))
 260                return -EOPNOTSUPP;
 261
 262        if (end_sector > bdev->bd_part->nr_sects)
 263                /* Out of range */
 264                return -EINVAL;
 265
 266        /* Check alignment (handle eventual smaller last zone) */
 267        zone_sectors = blk_queue_zone_sectors(q);
 268        if (sector & (zone_sectors - 1))
 269                return -EINVAL;
 270
 271        if ((nr_sectors & (zone_sectors - 1)) &&
 272            end_sector != bdev->bd_part->nr_sects)
 273                return -EINVAL;
 274
 275        while (sector < end_sector) {
 276
 277                bio = bio_alloc(gfp_mask, 0);
 278                bio->bi_iter.bi_sector = sector;
 279                bio_set_dev(bio, bdev);
 280                bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
 281
 282                ret = submit_bio_wait(bio);
 283                bio_put(bio);
 284
 285                if (ret)
 286                        return ret;
 287
 288                sector += zone_sectors;
 289
 290                /* This may take a while, so be nice to others */
 291                cond_resched();
 292
 293        }
 294
 295        return 0;
 296}
 297EXPORT_SYMBOL_GPL(blkdev_reset_zones);
 298
 299/*
 300 * BLKREPORTZONE ioctl processing.
 301 * Called from blkdev_ioctl.
 302 */
 303int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 304                              unsigned int cmd, unsigned long arg)
 305{
 306        void __user *argp = (void __user *)arg;
 307        struct request_queue *q;
 308        struct blk_zone_report rep;
 309        struct blk_zone *zones;
 310        int ret;
 311
 312        if (!argp)
 313                return -EINVAL;
 314
 315        q = bdev_get_queue(bdev);
 316        if (!q)
 317                return -ENXIO;
 318
 319        if (!blk_queue_is_zoned(q))
 320                return -ENOTTY;
 321
 322        if (!capable(CAP_SYS_ADMIN))
 323                return -EACCES;
 324
 325        if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
 326                return -EFAULT;
 327
 328        if (!rep.nr_zones)
 329                return -EINVAL;
 330
 331        zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
 332        if (!zones)
 333                return -ENOMEM;
 334
 335        ret = blkdev_report_zones(bdev, rep.sector,
 336                                  zones, &rep.nr_zones,
 337                                  GFP_KERNEL);
 338        if (ret)
 339                goto out;
 340
 341        if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
 342                ret = -EFAULT;
 343                goto out;
 344        }
 345
 346        if (rep.nr_zones) {
 347                if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
 348                                 sizeof(struct blk_zone) * rep.nr_zones))
 349                        ret = -EFAULT;
 350        }
 351
 352 out:
 353        kfree(zones);
 354
 355        return ret;
 356}
 357
 358/*
 359 * BLKRESETZONE ioctl processing.
 360 * Called from blkdev_ioctl.
 361 */
 362int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 363                             unsigned int cmd, unsigned long arg)
 364{
 365        void __user *argp = (void __user *)arg;
 366        struct request_queue *q;
 367        struct blk_zone_range zrange;
 368
 369        if (!argp)
 370                return -EINVAL;
 371
 372        q = bdev_get_queue(bdev);
 373        if (!q)
 374                return -ENXIO;
 375
 376        if (!blk_queue_is_zoned(q))
 377                return -ENOTTY;
 378
 379        if (!capable(CAP_SYS_ADMIN))
 380                return -EACCES;
 381
 382        if (!(mode & FMODE_WRITE))
 383                return -EBADF;
 384
 385        if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
 386                return -EFAULT;
 387
 388        return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
 389                                  GFP_KERNEL);
 390}
 391