linux/drivers/md/dm-zoned-target.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2017 Western Digital Corporation or its affiliates.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-zoned.h"
   8
   9#include <linux/module.h>
  10
  11#define DM_MSG_PREFIX           "zoned"
  12
  13#define DMZ_MIN_BIOS            8192
  14
  15/*
  16 * Zone BIO context.
  17 */
  18struct dmz_bioctx {
  19        struct dmz_target       *target;
  20        struct dm_zone          *zone;
  21        struct bio              *bio;
  22        atomic_t                ref;
  23        blk_status_t            status;
  24};
  25
  26/*
  27 * Chunk work descriptor.
  28 */
  29struct dm_chunk_work {
  30        struct work_struct      work;
  31        atomic_t                refcount;
  32        struct dmz_target       *target;
  33        unsigned int            chunk;
  34        struct bio_list         bio_list;
  35};
  36
  37/*
  38 * Target descriptor.
  39 */
  40struct dmz_target {
  41        struct dm_dev           *ddev;
  42
  43        unsigned long           flags;
  44
  45        /* Zoned block device information */
  46        struct dmz_dev          *dev;
  47
  48        /* For metadata handling */
  49        struct dmz_metadata     *metadata;
  50
  51        /* For reclaim */
  52        struct dmz_reclaim      *reclaim;
  53
  54        /* For chunk work */
  55        struct mutex            chunk_lock;
  56        struct radix_tree_root  chunk_rxtree;
  57        struct workqueue_struct *chunk_wq;
  58
  59        /* For cloned BIOs to zones */
  60        struct bio_set          *bio_set;
  61
  62        /* For flush */
  63        spinlock_t              flush_lock;
  64        struct bio_list         flush_list;
  65        struct delayed_work     flush_work;
  66        struct workqueue_struct *flush_wq;
  67};
  68
  69/*
  70 * Flush intervals (seconds).
  71 */
  72#define DMZ_FLUSH_PERIOD        (10 * HZ)
  73
  74/*
  75 * Target BIO completion.
  76 */
  77static inline void dmz_bio_endio(struct bio *bio, blk_status_t status)
  78{
  79        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
  80
  81        if (bioctx->status == BLK_STS_OK && status != BLK_STS_OK)
  82                bioctx->status = status;
  83        bio_endio(bio);
  84}
  85
  86/*
  87 * Partial clone read BIO completion callback. This terminates the
  88 * target BIO when there are no more references to its context.
  89 */
  90static void dmz_read_bio_end_io(struct bio *bio)
  91{
  92        struct dmz_bioctx *bioctx = bio->bi_private;
  93        blk_status_t status = bio->bi_status;
  94
  95        bio_put(bio);
  96        dmz_bio_endio(bioctx->bio, status);
  97}
  98
  99/*
 100 * Issue a BIO to a zone. The BIO may only partially process the
 101 * original target BIO.
 102 */
 103static int dmz_submit_read_bio(struct dmz_target *dmz, struct dm_zone *zone,
 104                               struct bio *bio, sector_t chunk_block,
 105                               unsigned int nr_blocks)
 106{
 107        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 108        sector_t sector;
 109        struct bio *clone;
 110
 111        /* BIO remap sector */
 112        sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
 113
 114        /* If the read is not partial, there is no need to clone the BIO */
 115        if (nr_blocks == dmz_bio_blocks(bio)) {
 116                /* Setup and submit the BIO */
 117                bio->bi_iter.bi_sector = sector;
 118                atomic_inc(&bioctx->ref);
 119                generic_make_request(bio);
 120                return 0;
 121        }
 122
 123        /* Partial BIO: we need to clone the BIO */
 124        clone = bio_clone_fast(bio, GFP_NOIO, dmz->bio_set);
 125        if (!clone)
 126                return -ENOMEM;
 127
 128        /* Setup the clone */
 129        clone->bi_iter.bi_sector = sector;
 130        clone->bi_iter.bi_size = dmz_blk2sect(nr_blocks) << SECTOR_SHIFT;
 131        clone->bi_end_io = dmz_read_bio_end_io;
 132        clone->bi_private = bioctx;
 133
 134        bio_advance(bio, clone->bi_iter.bi_size);
 135
 136        /* Submit the clone */
 137        atomic_inc(&bioctx->ref);
 138        generic_make_request(clone);
 139
 140        return 0;
 141}
 142
 143/*
 144 * Zero out pages of discarded blocks accessed by a read BIO.
 145 */
 146static void dmz_handle_read_zero(struct dmz_target *dmz, struct bio *bio,
 147                                 sector_t chunk_block, unsigned int nr_blocks)
 148{
 149        unsigned int size = nr_blocks << DMZ_BLOCK_SHIFT;
 150
 151        /* Clear nr_blocks */
 152        swap(bio->bi_iter.bi_size, size);
 153        zero_fill_bio(bio);
 154        swap(bio->bi_iter.bi_size, size);
 155
 156        bio_advance(bio, size);
 157}
 158
 159/*
 160 * Process a read BIO.
 161 */
 162static int dmz_handle_read(struct dmz_target *dmz, struct dm_zone *zone,
 163                           struct bio *bio)
 164{
 165        sector_t chunk_block = dmz_chunk_block(dmz->dev, dmz_bio_block(bio));
 166        unsigned int nr_blocks = dmz_bio_blocks(bio);
 167        sector_t end_block = chunk_block + nr_blocks;
 168        struct dm_zone *rzone, *bzone;
 169        int ret;
 170
 171        /* Read into unmapped chunks need only zeroing the BIO buffer */
 172        if (!zone) {
 173                zero_fill_bio(bio);
 174                return 0;
 175        }
 176
 177        dmz_dev_debug(dmz->dev, "READ chunk %llu -> %s zone %u, block %llu, %u blocks",
 178                      (unsigned long long)dmz_bio_chunk(dmz->dev, bio),
 179                      (dmz_is_rnd(zone) ? "RND" : "SEQ"),
 180                      dmz_id(dmz->metadata, zone),
 181                      (unsigned long long)chunk_block, nr_blocks);
 182
 183        /* Check block validity to determine the read location */
 184        bzone = zone->bzone;
 185        while (chunk_block < end_block) {
 186                nr_blocks = 0;
 187                if (dmz_is_rnd(zone) || chunk_block < zone->wp_block) {
 188                        /* Test block validity in the data zone */
 189                        ret = dmz_block_valid(dmz->metadata, zone, chunk_block);
 190                        if (ret < 0)
 191                                return ret;
 192                        if (ret > 0) {
 193                                /* Read data zone blocks */
 194                                nr_blocks = ret;
 195                                rzone = zone;
 196                        }
 197                }
 198
 199                /*
 200                 * No valid blocks found in the data zone.
 201                 * Check the buffer zone, if there is one.
 202                 */
 203                if (!nr_blocks && bzone) {
 204                        ret = dmz_block_valid(dmz->metadata, bzone, chunk_block);
 205                        if (ret < 0)
 206                                return ret;
 207                        if (ret > 0) {
 208                                /* Read buffer zone blocks */
 209                                nr_blocks = ret;
 210                                rzone = bzone;
 211                        }
 212                }
 213
 214                if (nr_blocks) {
 215                        /* Valid blocks found: read them */
 216                        nr_blocks = min_t(unsigned int, nr_blocks, end_block - chunk_block);
 217                        ret = dmz_submit_read_bio(dmz, rzone, bio, chunk_block, nr_blocks);
 218                        if (ret)
 219                                return ret;
 220                        chunk_block += nr_blocks;
 221                } else {
 222                        /* No valid block: zeroout the current BIO block */
 223                        dmz_handle_read_zero(dmz, bio, chunk_block, 1);
 224                        chunk_block++;
 225                }
 226        }
 227
 228        return 0;
 229}
 230
 231/*
 232 * Issue a write BIO to a zone.
 233 */
 234static void dmz_submit_write_bio(struct dmz_target *dmz, struct dm_zone *zone,
 235                                 struct bio *bio, sector_t chunk_block,
 236                                 unsigned int nr_blocks)
 237{
 238        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 239
 240        /* Setup and submit the BIO */
 241        bio_set_dev(bio, dmz->dev->bdev);
 242        bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
 243        atomic_inc(&bioctx->ref);
 244        generic_make_request(bio);
 245
 246        if (dmz_is_seq(zone))
 247                zone->wp_block += nr_blocks;
 248}
 249
 250/*
 251 * Write blocks directly in a data zone, at the write pointer.
 252 * If a buffer zone is assigned, invalidate the blocks written
 253 * in place.
 254 */
 255static int dmz_handle_direct_write(struct dmz_target *dmz,
 256                                   struct dm_zone *zone, struct bio *bio,
 257                                   sector_t chunk_block,
 258                                   unsigned int nr_blocks)
 259{
 260        struct dmz_metadata *zmd = dmz->metadata;
 261        struct dm_zone *bzone = zone->bzone;
 262        int ret;
 263
 264        if (dmz_is_readonly(zone))
 265                return -EROFS;
 266
 267        /* Submit write */
 268        dmz_submit_write_bio(dmz, zone, bio, chunk_block, nr_blocks);
 269
 270        /*
 271         * Validate the blocks in the data zone and invalidate
 272         * in the buffer zone, if there is one.
 273         */
 274        ret = dmz_validate_blocks(zmd, zone, chunk_block, nr_blocks);
 275        if (ret == 0 && bzone)
 276                ret = dmz_invalidate_blocks(zmd, bzone, chunk_block, nr_blocks);
 277
 278        return ret;
 279}
 280
 281/*
 282 * Write blocks in the buffer zone of @zone.
 283 * If no buffer zone is assigned yet, get one.
 284 * Called with @zone write locked.
 285 */
 286static int dmz_handle_buffered_write(struct dmz_target *dmz,
 287                                     struct dm_zone *zone, struct bio *bio,
 288                                     sector_t chunk_block,
 289                                     unsigned int nr_blocks)
 290{
 291        struct dmz_metadata *zmd = dmz->metadata;
 292        struct dm_zone *bzone;
 293        int ret;
 294
 295        /* Get the buffer zone. One will be allocated if needed */
 296        bzone = dmz_get_chunk_buffer(zmd, zone);
 297        if (!bzone)
 298                return -ENOSPC;
 299
 300        if (dmz_is_readonly(bzone))
 301                return -EROFS;
 302
 303        /* Submit write */
 304        dmz_submit_write_bio(dmz, bzone, bio, chunk_block, nr_blocks);
 305
 306        /*
 307         * Validate the blocks in the buffer zone
 308         * and invalidate in the data zone.
 309         */
 310        ret = dmz_validate_blocks(zmd, bzone, chunk_block, nr_blocks);
 311        if (ret == 0 && chunk_block < zone->wp_block)
 312                ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
 313
 314        return ret;
 315}
 316
 317/*
 318 * Process a write BIO.
 319 */
 320static int dmz_handle_write(struct dmz_target *dmz, struct dm_zone *zone,
 321                            struct bio *bio)
 322{
 323        sector_t chunk_block = dmz_chunk_block(dmz->dev, dmz_bio_block(bio));
 324        unsigned int nr_blocks = dmz_bio_blocks(bio);
 325
 326        if (!zone)
 327                return -ENOSPC;
 328
 329        dmz_dev_debug(dmz->dev, "WRITE chunk %llu -> %s zone %u, block %llu, %u blocks",
 330                      (unsigned long long)dmz_bio_chunk(dmz->dev, bio),
 331                      (dmz_is_rnd(zone) ? "RND" : "SEQ"),
 332                      dmz_id(dmz->metadata, zone),
 333                      (unsigned long long)chunk_block, nr_blocks);
 334
 335        if (dmz_is_rnd(zone) || chunk_block == zone->wp_block) {
 336                /*
 337                 * zone is a random zone or it is a sequential zone
 338                 * and the BIO is aligned to the zone write pointer:
 339                 * direct write the zone.
 340                 */
 341                return dmz_handle_direct_write(dmz, zone, bio, chunk_block, nr_blocks);
 342        }
 343
 344        /*
 345         * This is an unaligned write in a sequential zone:
 346         * use buffered write.
 347         */
 348        return dmz_handle_buffered_write(dmz, zone, bio, chunk_block, nr_blocks);
 349}
 350
 351/*
 352 * Process a discard BIO.
 353 */
 354static int dmz_handle_discard(struct dmz_target *dmz, struct dm_zone *zone,
 355                              struct bio *bio)
 356{
 357        struct dmz_metadata *zmd = dmz->metadata;
 358        sector_t block = dmz_bio_block(bio);
 359        unsigned int nr_blocks = dmz_bio_blocks(bio);
 360        sector_t chunk_block = dmz_chunk_block(dmz->dev, block);
 361        int ret = 0;
 362
 363        /* For unmapped chunks, there is nothing to do */
 364        if (!zone)
 365                return 0;
 366
 367        if (dmz_is_readonly(zone))
 368                return -EROFS;
 369
 370        dmz_dev_debug(dmz->dev, "DISCARD chunk %llu -> zone %u, block %llu, %u blocks",
 371                      (unsigned long long)dmz_bio_chunk(dmz->dev, bio),
 372                      dmz_id(zmd, zone),
 373                      (unsigned long long)chunk_block, nr_blocks);
 374
 375        /*
 376         * Invalidate blocks in the data zone and its
 377         * buffer zone if one is mapped.
 378         */
 379        if (dmz_is_rnd(zone) || chunk_block < zone->wp_block)
 380                ret = dmz_invalidate_blocks(zmd, zone, chunk_block, nr_blocks);
 381        if (ret == 0 && zone->bzone)
 382                ret = dmz_invalidate_blocks(zmd, zone->bzone,
 383                                            chunk_block, nr_blocks);
 384        return ret;
 385}
 386
 387/*
 388 * Process a BIO.
 389 */
 390static void dmz_handle_bio(struct dmz_target *dmz, struct dm_chunk_work *cw,
 391                           struct bio *bio)
 392{
 393        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 394        struct dmz_metadata *zmd = dmz->metadata;
 395        struct dm_zone *zone;
 396        int ret;
 397
 398        /*
 399         * Write may trigger a zone allocation. So make sure the
 400         * allocation can succeed.
 401         */
 402        if (bio_op(bio) == REQ_OP_WRITE)
 403                dmz_schedule_reclaim(dmz->reclaim);
 404
 405        dmz_lock_metadata(zmd);
 406
 407        /*
 408         * Get the data zone mapping the chunk. There may be no
 409         * mapping for read and discard. If a mapping is obtained,
 410         + the zone returned will be set to active state.
 411         */
 412        zone = dmz_get_chunk_mapping(zmd, dmz_bio_chunk(dmz->dev, bio),
 413                                     bio_op(bio));
 414        if (IS_ERR(zone)) {
 415                ret = PTR_ERR(zone);
 416                goto out;
 417        }
 418
 419        /* Process the BIO */
 420        if (zone) {
 421                dmz_activate_zone(zone);
 422                bioctx->zone = zone;
 423        }
 424
 425        switch (bio_op(bio)) {
 426        case REQ_OP_READ:
 427                ret = dmz_handle_read(dmz, zone, bio);
 428                break;
 429        case REQ_OP_WRITE:
 430                ret = dmz_handle_write(dmz, zone, bio);
 431                break;
 432        case REQ_OP_DISCARD:
 433        case REQ_OP_WRITE_ZEROES:
 434                ret = dmz_handle_discard(dmz, zone, bio);
 435                break;
 436        default:
 437                dmz_dev_err(dmz->dev, "Unsupported BIO operation 0x%x",
 438                            bio_op(bio));
 439                ret = -EIO;
 440        }
 441
 442        /*
 443         * Release the chunk mapping. This will check that the mapping
 444         * is still valid, that is, that the zone used still has valid blocks.
 445         */
 446        if (zone)
 447                dmz_put_chunk_mapping(zmd, zone);
 448out:
 449        dmz_bio_endio(bio, errno_to_blk_status(ret));
 450
 451        dmz_unlock_metadata(zmd);
 452}
 453
 454/*
 455 * Increment a chunk reference counter.
 456 */
 457static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
 458{
 459        atomic_inc(&cw->refcount);
 460}
 461
 462/*
 463 * Decrement a chunk work reference count and
 464 * free it if it becomes 0.
 465 */
 466static void dmz_put_chunk_work(struct dm_chunk_work *cw)
 467{
 468        if (atomic_dec_and_test(&cw->refcount)) {
 469                WARN_ON(!bio_list_empty(&cw->bio_list));
 470                radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
 471                kfree(cw);
 472        }
 473}
 474
 475/*
 476 * Chunk BIO work function.
 477 */
 478static void dmz_chunk_work(struct work_struct *work)
 479{
 480        struct dm_chunk_work *cw = container_of(work, struct dm_chunk_work, work);
 481        struct dmz_target *dmz = cw->target;
 482        struct bio *bio;
 483
 484        mutex_lock(&dmz->chunk_lock);
 485
 486        /* Process the chunk BIOs */
 487        while ((bio = bio_list_pop(&cw->bio_list))) {
 488                mutex_unlock(&dmz->chunk_lock);
 489                dmz_handle_bio(dmz, cw, bio);
 490                mutex_lock(&dmz->chunk_lock);
 491                dmz_put_chunk_work(cw);
 492        }
 493
 494        /* Queueing the work incremented the work refcount */
 495        dmz_put_chunk_work(cw);
 496
 497        mutex_unlock(&dmz->chunk_lock);
 498}
 499
 500/*
 501 * Flush work.
 502 */
 503static void dmz_flush_work(struct work_struct *work)
 504{
 505        struct dmz_target *dmz = container_of(work, struct dmz_target, flush_work.work);
 506        struct bio *bio;
 507        int ret;
 508
 509        /* Flush dirty metadata blocks */
 510        ret = dmz_flush_metadata(dmz->metadata);
 511
 512        /* Process queued flush requests */
 513        while (1) {
 514                spin_lock(&dmz->flush_lock);
 515                bio = bio_list_pop(&dmz->flush_list);
 516                spin_unlock(&dmz->flush_lock);
 517
 518                if (!bio)
 519                        break;
 520
 521                dmz_bio_endio(bio, errno_to_blk_status(ret));
 522        }
 523
 524        queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
 525}
 526
 527/*
 528 * Get a chunk work and start it to process a new BIO.
 529 * If the BIO chunk has no work yet, create one.
 530 */
 531static void dmz_queue_chunk_work(struct dmz_target *dmz, struct bio *bio)
 532{
 533        unsigned int chunk = dmz_bio_chunk(dmz->dev, bio);
 534        struct dm_chunk_work *cw;
 535
 536        mutex_lock(&dmz->chunk_lock);
 537
 538        /* Get the BIO chunk work. If one is not active yet, create one */
 539        cw = radix_tree_lookup(&dmz->chunk_rxtree, chunk);
 540        if (!cw) {
 541                int ret;
 542
 543                /* Create a new chunk work */
 544                cw = kmalloc(sizeof(struct dm_chunk_work), GFP_NOIO);
 545                if (!cw)
 546                        goto out;
 547
 548                INIT_WORK(&cw->work, dmz_chunk_work);
 549                atomic_set(&cw->refcount, 0);
 550                cw->target = dmz;
 551                cw->chunk = chunk;
 552                bio_list_init(&cw->bio_list);
 553
 554                ret = radix_tree_insert(&dmz->chunk_rxtree, chunk, cw);
 555                if (unlikely(ret)) {
 556                        kfree(cw);
 557                        cw = NULL;
 558                        goto out;
 559                }
 560        }
 561
 562        bio_list_add(&cw->bio_list, bio);
 563        dmz_get_chunk_work(cw);
 564
 565        if (queue_work(dmz->chunk_wq, &cw->work))
 566                dmz_get_chunk_work(cw);
 567out:
 568        mutex_unlock(&dmz->chunk_lock);
 569}
 570
 571/*
 572 * Process a new BIO.
 573 */
 574static int dmz_map(struct dm_target *ti, struct bio *bio)
 575{
 576        struct dmz_target *dmz = ti->private;
 577        struct dmz_dev *dev = dmz->dev;
 578        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 579        sector_t sector = bio->bi_iter.bi_sector;
 580        unsigned int nr_sectors = bio_sectors(bio);
 581        sector_t chunk_sector;
 582
 583        dmz_dev_debug(dev, "BIO op %d sector %llu + %u => chunk %llu, block %llu, %u blocks",
 584                      bio_op(bio), (unsigned long long)sector, nr_sectors,
 585                      (unsigned long long)dmz_bio_chunk(dmz->dev, bio),
 586                      (unsigned long long)dmz_chunk_block(dmz->dev, dmz_bio_block(bio)),
 587                      (unsigned int)dmz_bio_blocks(bio));
 588
 589        bio_set_dev(bio, dev->bdev);
 590
 591        if (!nr_sectors && bio_op(bio) != REQ_OP_WRITE)
 592                return DM_MAPIO_REMAPPED;
 593
 594        /* The BIO should be block aligned */
 595        if ((nr_sectors & DMZ_BLOCK_SECTORS_MASK) || (sector & DMZ_BLOCK_SECTORS_MASK))
 596                return DM_MAPIO_KILL;
 597
 598        /* Initialize the BIO context */
 599        bioctx->target = dmz;
 600        bioctx->zone = NULL;
 601        bioctx->bio = bio;
 602        atomic_set(&bioctx->ref, 1);
 603        bioctx->status = BLK_STS_OK;
 604
 605        /* Set the BIO pending in the flush list */
 606        if (!nr_sectors && bio_op(bio) == REQ_OP_WRITE) {
 607                spin_lock(&dmz->flush_lock);
 608                bio_list_add(&dmz->flush_list, bio);
 609                spin_unlock(&dmz->flush_lock);
 610                mod_delayed_work(dmz->flush_wq, &dmz->flush_work, 0);
 611                return DM_MAPIO_SUBMITTED;
 612        }
 613
 614        /* Split zone BIOs to fit entirely into a zone */
 615        chunk_sector = sector & (dev->zone_nr_sectors - 1);
 616        if (chunk_sector + nr_sectors > dev->zone_nr_sectors)
 617                dm_accept_partial_bio(bio, dev->zone_nr_sectors - chunk_sector);
 618
 619        /* Now ready to handle this BIO */
 620        dmz_reclaim_bio_acc(dmz->reclaim);
 621        dmz_queue_chunk_work(dmz, bio);
 622
 623        return DM_MAPIO_SUBMITTED;
 624}
 625
 626/*
 627 * Completed target BIO processing.
 628 */
 629static int dmz_end_io(struct dm_target *ti, struct bio *bio, blk_status_t *error)
 630{
 631        struct dmz_bioctx *bioctx = dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
 632
 633        if (bioctx->status == BLK_STS_OK && *error)
 634                bioctx->status = *error;
 635
 636        if (!atomic_dec_and_test(&bioctx->ref))
 637                return DM_ENDIO_INCOMPLETE;
 638
 639        /* Done */
 640        bio->bi_status = bioctx->status;
 641
 642        if (bioctx->zone) {
 643                struct dm_zone *zone = bioctx->zone;
 644
 645                if (*error && bio_op(bio) == REQ_OP_WRITE) {
 646                        if (dmz_is_seq(zone))
 647                                set_bit(DMZ_SEQ_WRITE_ERR, &zone->flags);
 648                }
 649                dmz_deactivate_zone(zone);
 650        }
 651
 652        return DM_ENDIO_DONE;
 653}
 654
 655/*
 656 * Get zoned device information.
 657 */
 658static int dmz_get_zoned_device(struct dm_target *ti, char *path)
 659{
 660        struct dmz_target *dmz = ti->private;
 661        struct request_queue *q;
 662        struct dmz_dev *dev;
 663        sector_t aligned_capacity;
 664        int ret;
 665
 666        /* Get the target device */
 667        ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev);
 668        if (ret) {
 669                ti->error = "Get target device failed";
 670                dmz->ddev = NULL;
 671                return ret;
 672        }
 673
 674        dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL);
 675        if (!dev) {
 676                ret = -ENOMEM;
 677                goto err;
 678        }
 679
 680        dev->bdev = dmz->ddev->bdev;
 681        (void)bdevname(dev->bdev, dev->name);
 682
 683        if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) {
 684                ti->error = "Not a zoned block device";
 685                ret = -EINVAL;
 686                goto err;
 687        }
 688
 689        q = bdev_get_queue(dev->bdev);
 690        dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT;
 691        aligned_capacity = dev->capacity & ~(blk_queue_zone_sectors(q) - 1);
 692        if (ti->begin ||
 693            ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) {
 694                ti->error = "Partial mapping not supported";
 695                ret = -EINVAL;
 696                goto err;
 697        }
 698
 699        dev->zone_nr_sectors = blk_queue_zone_sectors(q);
 700        dev->zone_nr_sectors_shift = ilog2(dev->zone_nr_sectors);
 701
 702        dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
 703        dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
 704
 705        dev->nr_zones = (dev->capacity + dev->zone_nr_sectors - 1)
 706                >> dev->zone_nr_sectors_shift;
 707
 708        dmz->dev = dev;
 709
 710        return 0;
 711err:
 712        dm_put_device(ti, dmz->ddev);
 713        kfree(dev);
 714
 715        return ret;
 716}
 717
 718/*
 719 * Cleanup zoned device information.
 720 */
 721static void dmz_put_zoned_device(struct dm_target *ti)
 722{
 723        struct dmz_target *dmz = ti->private;
 724
 725        dm_put_device(ti, dmz->ddev);
 726        kfree(dmz->dev);
 727        dmz->dev = NULL;
 728}
 729
 730/*
 731 * Setup target.
 732 */
 733static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 734{
 735        struct dmz_target *dmz;
 736        struct dmz_dev *dev;
 737        int ret;
 738
 739        /* Check arguments */
 740        if (argc != 1) {
 741                ti->error = "Invalid argument count";
 742                return -EINVAL;
 743        }
 744
 745        /* Allocate and initialize the target descriptor */
 746        dmz = kzalloc(sizeof(struct dmz_target), GFP_KERNEL);
 747        if (!dmz) {
 748                ti->error = "Unable to allocate the zoned target descriptor";
 749                return -ENOMEM;
 750        }
 751        ti->private = dmz;
 752
 753        /* Get the target zoned block device */
 754        ret = dmz_get_zoned_device(ti, argv[0]);
 755        if (ret) {
 756                dmz->ddev = NULL;
 757                goto err;
 758        }
 759
 760        /* Initialize metadata */
 761        dev = dmz->dev;
 762        ret = dmz_ctr_metadata(dev, &dmz->metadata);
 763        if (ret) {
 764                ti->error = "Metadata initialization failed";
 765                goto err_dev;
 766        }
 767
 768        /* Set target (no write same support) */
 769        ti->max_io_len = dev->zone_nr_sectors << 9;
 770        ti->num_flush_bios = 1;
 771        ti->num_discard_bios = 1;
 772        ti->num_write_zeroes_bios = 1;
 773        ti->per_io_data_size = sizeof(struct dmz_bioctx);
 774        ti->flush_supported = true;
 775        ti->discards_supported = true;
 776        ti->split_discard_bios = true;
 777
 778        /* The exposed capacity is the number of chunks that can be mapped */
 779        ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
 780
 781        /* Zone BIO */
 782        dmz->bio_set = bioset_create(DMZ_MIN_BIOS, 0, 0);
 783        if (!dmz->bio_set) {
 784                ti->error = "Create BIO set failed";
 785                ret = -ENOMEM;
 786                goto err_meta;
 787        }
 788
 789        /* Chunk BIO work */
 790        mutex_init(&dmz->chunk_lock);
 791        INIT_RADIX_TREE(&dmz->chunk_rxtree, GFP_KERNEL);
 792        dmz->chunk_wq = alloc_workqueue("dmz_cwq_%s", WQ_MEM_RECLAIM | WQ_UNBOUND,
 793                                        0, dev->name);
 794        if (!dmz->chunk_wq) {
 795                ti->error = "Create chunk workqueue failed";
 796                ret = -ENOMEM;
 797                goto err_bio;
 798        }
 799
 800        /* Flush work */
 801        spin_lock_init(&dmz->flush_lock);
 802        bio_list_init(&dmz->flush_list);
 803        INIT_DELAYED_WORK(&dmz->flush_work, dmz_flush_work);
 804        dmz->flush_wq = alloc_ordered_workqueue("dmz_fwq_%s", WQ_MEM_RECLAIM,
 805                                                dev->name);
 806        if (!dmz->flush_wq) {
 807                ti->error = "Create flush workqueue failed";
 808                ret = -ENOMEM;
 809                goto err_cwq;
 810        }
 811        mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
 812
 813        /* Initialize reclaim */
 814        ret = dmz_ctr_reclaim(dev, dmz->metadata, &dmz->reclaim);
 815        if (ret) {
 816                ti->error = "Zone reclaim initialization failed";
 817                goto err_fwq;
 818        }
 819
 820        dmz_dev_info(dev, "Target device: %llu 512-byte logical sectors (%llu blocks)",
 821                     (unsigned long long)ti->len,
 822                     (unsigned long long)dmz_sect2blk(ti->len));
 823
 824        return 0;
 825err_fwq:
 826        destroy_workqueue(dmz->flush_wq);
 827err_cwq:
 828        destroy_workqueue(dmz->chunk_wq);
 829err_bio:
 830        bioset_free(dmz->bio_set);
 831err_meta:
 832        dmz_dtr_metadata(dmz->metadata);
 833err_dev:
 834        dmz_put_zoned_device(ti);
 835err:
 836        kfree(dmz);
 837
 838        return ret;
 839}
 840
 841/*
 842 * Cleanup target.
 843 */
 844static void dmz_dtr(struct dm_target *ti)
 845{
 846        struct dmz_target *dmz = ti->private;
 847
 848        flush_workqueue(dmz->chunk_wq);
 849        destroy_workqueue(dmz->chunk_wq);
 850
 851        dmz_dtr_reclaim(dmz->reclaim);
 852
 853        cancel_delayed_work_sync(&dmz->flush_work);
 854        destroy_workqueue(dmz->flush_wq);
 855
 856        (void) dmz_flush_metadata(dmz->metadata);
 857
 858        dmz_dtr_metadata(dmz->metadata);
 859
 860        bioset_free(dmz->bio_set);
 861
 862        dmz_put_zoned_device(ti);
 863
 864        kfree(dmz);
 865}
 866
 867/*
 868 * Setup target request queue limits.
 869 */
 870static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
 871{
 872        struct dmz_target *dmz = ti->private;
 873        unsigned int chunk_sectors = dmz->dev->zone_nr_sectors;
 874
 875        limits->logical_block_size = DMZ_BLOCK_SIZE;
 876        limits->physical_block_size = DMZ_BLOCK_SIZE;
 877
 878        blk_limits_io_min(limits, DMZ_BLOCK_SIZE);
 879        blk_limits_io_opt(limits, DMZ_BLOCK_SIZE);
 880
 881        limits->discard_alignment = DMZ_BLOCK_SIZE;
 882        limits->discard_granularity = DMZ_BLOCK_SIZE;
 883        limits->max_discard_sectors = chunk_sectors;
 884        limits->max_hw_discard_sectors = chunk_sectors;
 885        limits->max_write_zeroes_sectors = chunk_sectors;
 886
 887        /* FS hint to try to align to the device zone size */
 888        limits->chunk_sectors = chunk_sectors;
 889        limits->max_sectors = chunk_sectors;
 890
 891        /* We are exposing a drive-managed zoned block device */
 892        limits->zoned = BLK_ZONED_NONE;
 893}
 894
 895/*
 896 * Pass on ioctl to the backend device.
 897 */
 898static int dmz_prepare_ioctl(struct dm_target *ti,
 899                             struct block_device **bdev, fmode_t *mode)
 900{
 901        struct dmz_target *dmz = ti->private;
 902
 903        *bdev = dmz->dev->bdev;
 904
 905        return 0;
 906}
 907
 908/*
 909 * Stop works on suspend.
 910 */
 911static void dmz_suspend(struct dm_target *ti)
 912{
 913        struct dmz_target *dmz = ti->private;
 914
 915        flush_workqueue(dmz->chunk_wq);
 916        dmz_suspend_reclaim(dmz->reclaim);
 917        cancel_delayed_work_sync(&dmz->flush_work);
 918}
 919
 920/*
 921 * Restart works on resume or if suspend failed.
 922 */
 923static void dmz_resume(struct dm_target *ti)
 924{
 925        struct dmz_target *dmz = ti->private;
 926
 927        queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
 928        dmz_resume_reclaim(dmz->reclaim);
 929}
 930
 931static int dmz_iterate_devices(struct dm_target *ti,
 932                               iterate_devices_callout_fn fn, void *data)
 933{
 934        struct dmz_target *dmz = ti->private;
 935        struct dmz_dev *dev = dmz->dev;
 936        sector_t capacity = dev->capacity & ~(dev->zone_nr_sectors - 1);
 937
 938        return fn(ti, dmz->ddev, 0, capacity, data);
 939}
 940
 941static struct target_type dmz_type = {
 942        .name            = "zoned",
 943        .version         = {1, 0, 0},
 944        .features        = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
 945        .module          = THIS_MODULE,
 946        .ctr             = dmz_ctr,
 947        .dtr             = dmz_dtr,
 948        .map             = dmz_map,
 949        .end_io          = dmz_end_io,
 950        .io_hints        = dmz_io_hints,
 951        .prepare_ioctl   = dmz_prepare_ioctl,
 952        .postsuspend     = dmz_suspend,
 953        .resume          = dmz_resume,
 954        .iterate_devices = dmz_iterate_devices,
 955};
 956
 957static int __init dmz_init(void)
 958{
 959        return dm_register_target(&dmz_type);
 960}
 961
 962static void __exit dmz_exit(void)
 963{
 964        dm_unregister_target(&dmz_type);
 965}
 966
 967module_init(dmz_init);
 968module_exit(dmz_exit);
 969
 970MODULE_DESCRIPTION(DM_NAME " target for zoned block devices");
 971MODULE_AUTHOR("Damien Le Moal <damien.lemoal@wdc.com>");
 972MODULE_LICENSE("GPL");
 973