linux/drivers/md/dm-rq.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-core.h"
   8#include "dm-rq.h"
   9
  10#include <linux/elevator.h> /* for rq_end_sector() */
  11#include <linux/blk-mq.h>
  12
  13#define DM_MSG_PREFIX "core-rq"
  14
  15/*
  16 * One of these is allocated per request.
  17 */
  18struct dm_rq_target_io {
  19        struct mapped_device *md;
  20        struct dm_target *ti;
  21        struct request *orig, *clone;
  22        struct kthread_work work;
  23        blk_status_t error;
  24        union map_info info;
  25        struct dm_stats_aux stats_aux;
  26        unsigned long duration_jiffies;
  27        unsigned n_sectors;
  28        unsigned completed;
  29};
  30
  31#define DM_MQ_NR_HW_QUEUES 1
  32#define DM_MQ_QUEUE_DEPTH 2048
  33static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
  34static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  35
  36/*
  37 * Request-based DM's mempools' reserved IOs set by the user.
  38 */
  39#define RESERVED_REQUEST_BASED_IOS      256
  40static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  41
  42unsigned dm_get_reserved_rq_based_ios(void)
  43{
  44        return __dm_get_module_param(&reserved_rq_based_ios,
  45                                     RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
  46}
  47EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
  48
  49static unsigned dm_get_blk_mq_nr_hw_queues(void)
  50{
  51        return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
  52}
  53
  54static unsigned dm_get_blk_mq_queue_depth(void)
  55{
  56        return __dm_get_module_param(&dm_mq_queue_depth,
  57                                     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
  58}
  59
  60int dm_request_based(struct mapped_device *md)
  61{
  62        return queue_is_mq(md->queue);
  63}
  64
  65void dm_start_queue(struct request_queue *q)
  66{
  67        blk_mq_unquiesce_queue(q);
  68        blk_mq_kick_requeue_list(q);
  69}
  70
  71void dm_stop_queue(struct request_queue *q)
  72{
  73        blk_mq_quiesce_queue(q);
  74}
  75
  76/*
  77 * Partial completion handling for request-based dm
  78 */
  79static void end_clone_bio(struct bio *clone)
  80{
  81        struct dm_rq_clone_bio_info *info =
  82                container_of(clone, struct dm_rq_clone_bio_info, clone);
  83        struct dm_rq_target_io *tio = info->tio;
  84        unsigned int nr_bytes = info->orig->bi_iter.bi_size;
  85        blk_status_t error = clone->bi_status;
  86        bool is_last = !clone->bi_next;
  87
  88        bio_put(clone);
  89
  90        if (tio->error)
  91                /*
  92                 * An error has already been detected on the request.
  93                 * Once error occurred, just let clone->end_io() handle
  94                 * the remainder.
  95                 */
  96                return;
  97        else if (error) {
  98                /*
  99                 * Don't notice the error to the upper layer yet.
 100                 * The error handling decision is made by the target driver,
 101                 * when the request is completed.
 102                 */
 103                tio->error = error;
 104                goto exit;
 105        }
 106
 107        /*
 108         * I/O for the bio successfully completed.
 109         * Notice the data completion to the upper layer.
 110         */
 111        tio->completed += nr_bytes;
 112
 113        /*
 114         * Update the original request.
 115         * Do not use blk_mq_end_request() here, because it may complete
 116         * the original request before the clone, and break the ordering.
 117         */
 118        if (is_last)
 119 exit:
 120                blk_update_request(tio->orig, BLK_STS_OK, tio->completed);
 121}
 122
 123static struct dm_rq_target_io *tio_from_request(struct request *rq)
 124{
 125        return blk_mq_rq_to_pdu(rq);
 126}
 127
 128static void rq_end_stats(struct mapped_device *md, struct request *orig)
 129{
 130        if (unlikely(dm_stats_used(&md->stats))) {
 131                struct dm_rq_target_io *tio = tio_from_request(orig);
 132                tio->duration_jiffies = jiffies - tio->duration_jiffies;
 133                dm_stats_account_io(&md->stats, rq_data_dir(orig),
 134                                    blk_rq_pos(orig), tio->n_sectors, true,
 135                                    tio->duration_jiffies, &tio->stats_aux);
 136        }
 137}
 138
 139/*
 140 * Don't touch any member of the md after calling this function because
 141 * the md may be freed in dm_put() at the end of this function.
 142 * Or do dm_get() before calling this function and dm_put() later.
 143 */
 144static void rq_completed(struct mapped_device *md)
 145{
 146        /*
 147         * dm_put() must be at the end of this function. See the comment above
 148         */
 149        dm_put(md);
 150}
 151
 152/*
 153 * Complete the clone and the original request.
 154 * Must be called without clone's queue lock held,
 155 * see end_clone_request() for more details.
 156 */
 157static void dm_end_request(struct request *clone, blk_status_t error)
 158{
 159        struct dm_rq_target_io *tio = clone->end_io_data;
 160        struct mapped_device *md = tio->md;
 161        struct request *rq = tio->orig;
 162
 163        blk_rq_unprep_clone(clone);
 164        tio->ti->type->release_clone_rq(clone, NULL);
 165
 166        rq_end_stats(md, rq);
 167        blk_mq_end_request(rq, error);
 168        rq_completed(md);
 169}
 170
 171static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
 172{
 173        blk_mq_delay_kick_requeue_list(q, msecs);
 174}
 175
 176void dm_mq_kick_requeue_list(struct mapped_device *md)
 177{
 178        __dm_mq_kick_requeue_list(md->queue, 0);
 179}
 180EXPORT_SYMBOL(dm_mq_kick_requeue_list);
 181
 182static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 183{
 184        blk_mq_requeue_request(rq, false);
 185        __dm_mq_kick_requeue_list(rq->q, msecs);
 186}
 187
 188static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 189{
 190        struct mapped_device *md = tio->md;
 191        struct request *rq = tio->orig;
 192        unsigned long delay_ms = delay_requeue ? 100 : 0;
 193
 194        rq_end_stats(md, rq);
 195        if (tio->clone) {
 196                blk_rq_unprep_clone(tio->clone);
 197                tio->ti->type->release_clone_rq(tio->clone, NULL);
 198        }
 199
 200        dm_mq_delay_requeue_request(rq, delay_ms);
 201        rq_completed(md);
 202}
 203
 204static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 205{
 206        int r = DM_ENDIO_DONE;
 207        struct dm_rq_target_io *tio = clone->end_io_data;
 208        dm_request_endio_fn rq_end_io = NULL;
 209
 210        if (tio->ti) {
 211                rq_end_io = tio->ti->type->rq_end_io;
 212
 213                if (mapped && rq_end_io)
 214                        r = rq_end_io(tio->ti, clone, error, &tio->info);
 215        }
 216
 217        if (unlikely(error == BLK_STS_TARGET)) {
 218                if (req_op(clone) == REQ_OP_DISCARD &&
 219                    !clone->q->limits.max_discard_sectors)
 220                        disable_discard(tio->md);
 221                else if (req_op(clone) == REQ_OP_WRITE_SAME &&
 222                         !clone->q->limits.max_write_same_sectors)
 223                        disable_write_same(tio->md);
 224                else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
 225                         !clone->q->limits.max_write_zeroes_sectors)
 226                        disable_write_zeroes(tio->md);
 227        }
 228
 229        switch (r) {
 230        case DM_ENDIO_DONE:
 231                /* The target wants to complete the I/O */
 232                dm_end_request(clone, error);
 233                break;
 234        case DM_ENDIO_INCOMPLETE:
 235                /* The target will handle the I/O */
 236                return;
 237        case DM_ENDIO_REQUEUE:
 238                /* The target wants to requeue the I/O */
 239                dm_requeue_original_request(tio, false);
 240                break;
 241        case DM_ENDIO_DELAY_REQUEUE:
 242                /* The target wants to requeue the I/O after a delay */
 243                dm_requeue_original_request(tio, true);
 244                break;
 245        default:
 246                DMWARN("unimplemented target endio return value: %d", r);
 247                BUG();
 248        }
 249}
 250
 251/*
 252 * Request completion handler for request-based dm
 253 */
 254static void dm_softirq_done(struct request *rq)
 255{
 256        bool mapped = true;
 257        struct dm_rq_target_io *tio = tio_from_request(rq);
 258        struct request *clone = tio->clone;
 259
 260        if (!clone) {
 261                struct mapped_device *md = tio->md;
 262
 263                rq_end_stats(md, rq);
 264                blk_mq_end_request(rq, tio->error);
 265                rq_completed(md);
 266                return;
 267        }
 268
 269        if (rq->rq_flags & RQF_FAILED)
 270                mapped = false;
 271
 272        dm_done(clone, tio->error, mapped);
 273}
 274
 275/*
 276 * Complete the clone and the original request with the error status
 277 * through softirq context.
 278 */
 279static void dm_complete_request(struct request *rq, blk_status_t error)
 280{
 281        struct dm_rq_target_io *tio = tio_from_request(rq);
 282
 283        tio->error = error;
 284        if (likely(!blk_should_fake_timeout(rq->q)))
 285                blk_mq_complete_request(rq);
 286}
 287
 288/*
 289 * Complete the not-mapped clone and the original request with the error status
 290 * through softirq context.
 291 * Target's rq_end_io() function isn't called.
 292 * This may be used when the target's clone_and_map_rq() function fails.
 293 */
 294static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 295{
 296        rq->rq_flags |= RQF_FAILED;
 297        dm_complete_request(rq, error);
 298}
 299
 300static void end_clone_request(struct request *clone, blk_status_t error)
 301{
 302        struct dm_rq_target_io *tio = clone->end_io_data;
 303
 304        dm_complete_request(tio->orig, error);
 305}
 306
 307static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
 308{
 309        blk_status_t r;
 310
 311        if (blk_queue_io_stat(clone->q))
 312                clone->rq_flags |= RQF_IO_STAT;
 313
 314        clone->start_time_ns = ktime_get_ns();
 315        r = blk_insert_cloned_request(clone->q, clone);
 316        if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 317                /* must complete clone in terms of original request */
 318                dm_complete_request(rq, r);
 319        return r;
 320}
 321
 322static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
 323                                 void *data)
 324{
 325        struct dm_rq_target_io *tio = data;
 326        struct dm_rq_clone_bio_info *info =
 327                container_of(bio, struct dm_rq_clone_bio_info, clone);
 328
 329        info->orig = bio_orig;
 330        info->tio = tio;
 331        bio->bi_end_io = end_clone_bio;
 332
 333        return 0;
 334}
 335
 336static int setup_clone(struct request *clone, struct request *rq,
 337                       struct dm_rq_target_io *tio, gfp_t gfp_mask)
 338{
 339        int r;
 340
 341        r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
 342                              dm_rq_bio_constructor, tio);
 343        if (r)
 344                return r;
 345
 346        clone->end_io = end_clone_request;
 347        clone->end_io_data = tio;
 348
 349        tio->clone = clone;
 350
 351        return 0;
 352}
 353
 354static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
 355                     struct mapped_device *md)
 356{
 357        tio->md = md;
 358        tio->ti = NULL;
 359        tio->clone = NULL;
 360        tio->orig = rq;
 361        tio->error = 0;
 362        tio->completed = 0;
 363        /*
 364         * Avoid initializing info for blk-mq; it passes
 365         * target-specific data through info.ptr
 366         * (see: dm_mq_init_request)
 367         */
 368        if (!md->init_tio_pdu)
 369                memset(&tio->info, 0, sizeof(tio->info));
 370}
 371
 372/*
 373 * Returns:
 374 * DM_MAPIO_*       : the request has been processed as indicated
 375 * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
 376 * < 0              : the request was completed due to failure
 377 */
 378static int map_request(struct dm_rq_target_io *tio)
 379{
 380        int r;
 381        struct dm_target *ti = tio->ti;
 382        struct mapped_device *md = tio->md;
 383        struct request *rq = tio->orig;
 384        struct request *clone = NULL;
 385        blk_status_t ret;
 386
 387        r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
 388        switch (r) {
 389        case DM_MAPIO_SUBMITTED:
 390                /* The target has taken the I/O to submit by itself later */
 391                break;
 392        case DM_MAPIO_REMAPPED:
 393                if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
 394                        /* -ENOMEM */
 395                        ti->type->release_clone_rq(clone, &tio->info);
 396                        return DM_MAPIO_REQUEUE;
 397                }
 398
 399                /* The target has remapped the I/O so dispatch it */
 400                trace_block_rq_remap(clone, disk_devt(dm_disk(md)),
 401                                     blk_rq_pos(rq));
 402                ret = dm_dispatch_clone_request(clone, rq);
 403                if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
 404                        blk_rq_unprep_clone(clone);
 405                        blk_mq_cleanup_rq(clone);
 406                        tio->ti->type->release_clone_rq(clone, &tio->info);
 407                        tio->clone = NULL;
 408                        return DM_MAPIO_REQUEUE;
 409                }
 410                break;
 411        case DM_MAPIO_REQUEUE:
 412                /* The target wants to requeue the I/O */
 413                break;
 414        case DM_MAPIO_DELAY_REQUEUE:
 415                /* The target wants to requeue the I/O after a delay */
 416                dm_requeue_original_request(tio, true);
 417                break;
 418        case DM_MAPIO_KILL:
 419                /* The target wants to complete the I/O */
 420                dm_kill_unmapped_request(rq, BLK_STS_IOERR);
 421                break;
 422        default:
 423                DMWARN("unimplemented target map return value: %d", r);
 424                BUG();
 425        }
 426
 427        return r;
 428}
 429
 430/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
 431ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
 432{
 433        return sprintf(buf, "%u\n", 0);
 434}
 435
 436ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
 437                                                     const char *buf, size_t count)
 438{
 439        return count;
 440}
 441
 442static void dm_start_request(struct mapped_device *md, struct request *orig)
 443{
 444        blk_mq_start_request(orig);
 445
 446        if (unlikely(dm_stats_used(&md->stats))) {
 447                struct dm_rq_target_io *tio = tio_from_request(orig);
 448                tio->duration_jiffies = jiffies;
 449                tio->n_sectors = blk_rq_sectors(orig);
 450                dm_stats_account_io(&md->stats, rq_data_dir(orig),
 451                                    blk_rq_pos(orig), tio->n_sectors, false, 0,
 452                                    &tio->stats_aux);
 453        }
 454
 455        /*
 456         * Hold the md reference here for the in-flight I/O.
 457         * We can't rely on the reference count by device opener,
 458         * because the device may be closed during the request completion
 459         * when all bios are completed.
 460         * See the comment in rq_completed() too.
 461         */
 462        dm_get(md);
 463}
 464
 465static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 466                              unsigned int hctx_idx, unsigned int numa_node)
 467{
 468        struct mapped_device *md = set->driver_data;
 469        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 470
 471        /*
 472         * Must initialize md member of tio, otherwise it won't
 473         * be available in dm_mq_queue_rq.
 474         */
 475        tio->md = md;
 476
 477        if (md->init_tio_pdu) {
 478                /* target-specific per-io data is immediately after the tio */
 479                tio->info.ptr = tio + 1;
 480        }
 481
 482        return 0;
 483}
 484
 485static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 486                          const struct blk_mq_queue_data *bd)
 487{
 488        struct request *rq = bd->rq;
 489        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 490        struct mapped_device *md = tio->md;
 491        struct dm_target *ti = md->immutable_target;
 492
 493        /*
 494         * blk-mq's unquiesce may come from outside events, such as
 495         * elevator switch, updating nr_requests or others, and request may
 496         * come during suspend, so simply ask for blk-mq to requeue it.
 497         */
 498        if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)))
 499                return BLK_STS_RESOURCE;
 500
 501        if (unlikely(!ti)) {
 502                int srcu_idx;
 503                struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 504
 505                ti = dm_table_find_target(map, 0);
 506                dm_put_live_table(md, srcu_idx);
 507        }
 508
 509        if (ti->type->busy && ti->type->busy(ti))
 510                return BLK_STS_RESOURCE;
 511
 512        dm_start_request(md, rq);
 513
 514        /* Init tio using md established in .init_request */
 515        init_tio(tio, rq, md);
 516
 517        /*
 518         * Establish tio->ti before calling map_request().
 519         */
 520        tio->ti = ti;
 521
 522        /* Direct call is fine since .queue_rq allows allocations */
 523        if (map_request(tio) == DM_MAPIO_REQUEUE) {
 524                /* Undo dm_start_request() before requeuing */
 525                rq_end_stats(md, rq);
 526                rq_completed(md);
 527                return BLK_STS_RESOURCE;
 528        }
 529
 530        return BLK_STS_OK;
 531}
 532
 533static const struct blk_mq_ops dm_mq_ops = {
 534        .queue_rq = dm_mq_queue_rq,
 535        .complete = dm_softirq_done,
 536        .init_request = dm_mq_init_request,
 537};
 538
 539int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 540{
 541        struct dm_target *immutable_tgt;
 542        int err;
 543
 544        md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
 545        if (!md->tag_set)
 546                return -ENOMEM;
 547
 548        md->tag_set->ops = &dm_mq_ops;
 549        md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
 550        md->tag_set->numa_node = md->numa_node_id;
 551        md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
 552        md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
 553        md->tag_set->driver_data = md;
 554
 555        md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
 556        immutable_tgt = dm_table_get_immutable_target(t);
 557        if (immutable_tgt && immutable_tgt->per_io_data_size) {
 558                /* any target-specific per-io data is immediately after the tio */
 559                md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
 560                md->init_tio_pdu = true;
 561        }
 562
 563        err = blk_mq_alloc_tag_set(md->tag_set);
 564        if (err)
 565                goto out_kfree_tag_set;
 566
 567        err = blk_mq_init_allocated_queue(md->tag_set, md->queue);
 568        if (err)
 569                goto out_tag_set;
 570        return 0;
 571
 572out_tag_set:
 573        blk_mq_free_tag_set(md->tag_set);
 574out_kfree_tag_set:
 575        kfree(md->tag_set);
 576        md->tag_set = NULL;
 577
 578        return err;
 579}
 580
 581void dm_mq_cleanup_mapped_device(struct mapped_device *md)
 582{
 583        if (md->tag_set) {
 584                blk_mq_free_tag_set(md->tag_set);
 585                kfree(md->tag_set);
 586                md->tag_set = NULL;
 587        }
 588}
 589
 590module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
 591MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
 592
 593/* Unused, but preserved for userspace compatibility */
 594static bool use_blk_mq = true;
 595module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
 596MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
 597
 598module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
 599MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
 600
 601module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
 602MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
 603