linux/drivers/md/dm-rq.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-core.h"
   8#include "dm-rq.h"
   9
  10#include <linux/elevator.h> /* for rq_end_sector() */
  11#include <linux/blk-mq.h>
  12
  13#define DM_MSG_PREFIX "core-rq"
  14
  15#define DM_MQ_NR_HW_QUEUES 1
  16#define DM_MQ_QUEUE_DEPTH 2048
  17static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
  18static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  19
  20/*
  21 * Request-based DM's mempools' reserved IOs set by the user.
  22 */
  23#define RESERVED_REQUEST_BASED_IOS      256
  24static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  25
  26unsigned dm_get_reserved_rq_based_ios(void)
  27{
  28        return __dm_get_module_param(&reserved_rq_based_ios,
  29                                     RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
  30}
  31EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
  32
  33static unsigned dm_get_blk_mq_nr_hw_queues(void)
  34{
  35        return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
  36}
  37
  38static unsigned dm_get_blk_mq_queue_depth(void)
  39{
  40        return __dm_get_module_param(&dm_mq_queue_depth,
  41                                     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
  42}
  43
  44int dm_request_based(struct mapped_device *md)
  45{
  46        return queue_is_mq(md->queue);
  47}
  48
  49void dm_start_queue(struct request_queue *q)
  50{
  51        blk_mq_unquiesce_queue(q);
  52        blk_mq_kick_requeue_list(q);
  53}
  54
  55void dm_stop_queue(struct request_queue *q)
  56{
  57        if (blk_mq_queue_stopped(q))
  58                return;
  59
  60        blk_mq_quiesce_queue(q);
  61}
  62
  63/*
  64 * Partial completion handling for request-based dm
  65 */
  66static void end_clone_bio(struct bio *clone)
  67{
  68        struct dm_rq_clone_bio_info *info =
  69                container_of(clone, struct dm_rq_clone_bio_info, clone);
  70        struct dm_rq_target_io *tio = info->tio;
  71        unsigned int nr_bytes = info->orig->bi_iter.bi_size;
  72        blk_status_t error = clone->bi_status;
  73        bool is_last = !clone->bi_next;
  74
  75        bio_put(clone);
  76
  77        if (tio->error)
  78                /*
  79                 * An error has already been detected on the request.
  80                 * Once error occurred, just let clone->end_io() handle
  81                 * the remainder.
  82                 */
  83                return;
  84        else if (error) {
  85                /*
  86                 * Don't notice the error to the upper layer yet.
  87                 * The error handling decision is made by the target driver,
  88                 * when the request is completed.
  89                 */
  90                tio->error = error;
  91                goto exit;
  92        }
  93
  94        /*
  95         * I/O for the bio successfully completed.
  96         * Notice the data completion to the upper layer.
  97         */
  98        tio->completed += nr_bytes;
  99
 100        /*
 101         * Update the original request.
 102         * Do not use blk_end_request() here, because it may complete
 103         * the original request before the clone, and break the ordering.
 104         */
 105        if (is_last)
 106 exit:
 107                blk_update_request(tio->orig, BLK_STS_OK, tio->completed);
 108}
 109
 110static struct dm_rq_target_io *tio_from_request(struct request *rq)
 111{
 112        return blk_mq_rq_to_pdu(rq);
 113}
 114
 115static void rq_end_stats(struct mapped_device *md, struct request *orig)
 116{
 117        if (unlikely(dm_stats_used(&md->stats))) {
 118                struct dm_rq_target_io *tio = tio_from_request(orig);
 119                tio->duration_jiffies = jiffies - tio->duration_jiffies;
 120                dm_stats_account_io(&md->stats, rq_data_dir(orig),
 121                                    blk_rq_pos(orig), tio->n_sectors, true,
 122                                    tio->duration_jiffies, &tio->stats_aux);
 123        }
 124}
 125
 126/*
 127 * Don't touch any member of the md after calling this function because
 128 * the md may be freed in dm_put() at the end of this function.
 129 * Or do dm_get() before calling this function and dm_put() later.
 130 */
 131static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 132{
 133        /* nudge anyone waiting on suspend queue */
 134        if (unlikely(wq_has_sleeper(&md->wait)))
 135                wake_up(&md->wait);
 136
 137        /*
 138         * dm_put() must be at the end of this function. See the comment above
 139         */
 140        dm_put(md);
 141}
 142
 143/*
 144 * Complete the clone and the original request.
 145 * Must be called without clone's queue lock held,
 146 * see end_clone_request() for more details.
 147 */
 148static void dm_end_request(struct request *clone, blk_status_t error)
 149{
 150        int rw = rq_data_dir(clone);
 151        struct dm_rq_target_io *tio = clone->end_io_data;
 152        struct mapped_device *md = tio->md;
 153        struct request *rq = tio->orig;
 154
 155        blk_rq_unprep_clone(clone);
 156        tio->ti->type->release_clone_rq(clone);
 157
 158        rq_end_stats(md, rq);
 159        blk_mq_end_request(rq, error);
 160        rq_completed(md, rw, true);
 161}
 162
 163static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
 164{
 165        blk_mq_delay_kick_requeue_list(q, msecs);
 166}
 167
 168void dm_mq_kick_requeue_list(struct mapped_device *md)
 169{
 170        __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
 171}
 172EXPORT_SYMBOL(dm_mq_kick_requeue_list);
 173
 174static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 175{
 176        blk_mq_requeue_request(rq, false);
 177        __dm_mq_kick_requeue_list(rq->q, msecs);
 178}
 179
 180static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 181{
 182        struct mapped_device *md = tio->md;
 183        struct request *rq = tio->orig;
 184        int rw = rq_data_dir(rq);
 185        unsigned long delay_ms = delay_requeue ? 100 : 0;
 186
 187        rq_end_stats(md, rq);
 188        if (tio->clone) {
 189                blk_rq_unprep_clone(tio->clone);
 190                tio->ti->type->release_clone_rq(tio->clone);
 191        }
 192
 193        dm_mq_delay_requeue_request(rq, delay_ms);
 194        rq_completed(md, rw, false);
 195}
 196
 197static void dm_done(struct request *clone, blk_status_t error, bool mapped)
 198{
 199        int r = DM_ENDIO_DONE;
 200        struct dm_rq_target_io *tio = clone->end_io_data;
 201        dm_request_endio_fn rq_end_io = NULL;
 202
 203        if (tio->ti) {
 204                rq_end_io = tio->ti->type->rq_end_io;
 205
 206                if (mapped && rq_end_io)
 207                        r = rq_end_io(tio->ti, clone, error, &tio->info);
 208        }
 209
 210        if (unlikely(error == BLK_STS_TARGET)) {
 211                if (req_op(clone) == REQ_OP_WRITE_SAME &&
 212                    !clone->q->limits.max_write_same_sectors)
 213                        disable_write_same(tio->md);
 214                if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
 215                    !clone->q->limits.max_write_zeroes_sectors)
 216                        disable_write_zeroes(tio->md);
 217        }
 218
 219        switch (r) {
 220        case DM_ENDIO_DONE:
 221                /* The target wants to complete the I/O */
 222                dm_end_request(clone, error);
 223                break;
 224        case DM_ENDIO_INCOMPLETE:
 225                /* The target will handle the I/O */
 226                return;
 227        case DM_ENDIO_REQUEUE:
 228                /* The target wants to requeue the I/O */
 229                dm_requeue_original_request(tio, false);
 230                break;
 231        case DM_ENDIO_DELAY_REQUEUE:
 232                /* The target wants to requeue the I/O after a delay */
 233                dm_requeue_original_request(tio, true);
 234                break;
 235        default:
 236                DMWARN("unimplemented target endio return value: %d", r);
 237                BUG();
 238        }
 239}
 240
 241/*
 242 * Request completion handler for request-based dm
 243 */
 244static void dm_softirq_done(struct request *rq)
 245{
 246        bool mapped = true;
 247        struct dm_rq_target_io *tio = tio_from_request(rq);
 248        struct request *clone = tio->clone;
 249        int rw;
 250
 251        if (!clone) {
 252                struct mapped_device *md = tio->md;
 253
 254                rq_end_stats(md, rq);
 255                rw = rq_data_dir(rq);
 256                blk_mq_end_request(rq, tio->error);
 257                rq_completed(md, rw, false);
 258                return;
 259        }
 260
 261        if (rq->rq_flags & RQF_FAILED)
 262                mapped = false;
 263
 264        dm_done(clone, tio->error, mapped);
 265}
 266
 267/*
 268 * Complete the clone and the original request with the error status
 269 * through softirq context.
 270 */
 271static void dm_complete_request(struct request *rq, blk_status_t error)
 272{
 273        struct dm_rq_target_io *tio = tio_from_request(rq);
 274
 275        tio->error = error;
 276        blk_mq_complete_request(rq);
 277}
 278
 279/*
 280 * Complete the not-mapped clone and the original request with the error status
 281 * through softirq context.
 282 * Target's rq_end_io() function isn't called.
 283 * This may be used when the target's clone_and_map_rq() function fails.
 284 */
 285static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
 286{
 287        rq->rq_flags |= RQF_FAILED;
 288        dm_complete_request(rq, error);
 289}
 290
 291static void end_clone_request(struct request *clone, blk_status_t error)
 292{
 293        struct dm_rq_target_io *tio = clone->end_io_data;
 294
 295        dm_complete_request(tio->orig, error);
 296}
 297
 298static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
 299{
 300        blk_status_t r;
 301
 302        if (blk_queue_io_stat(clone->q))
 303                clone->rq_flags |= RQF_IO_STAT;
 304
 305        clone->start_time_ns = ktime_get_ns();
 306        r = blk_insert_cloned_request(clone->q, clone);
 307        if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 308                /* must complete clone in terms of original request */
 309                dm_complete_request(rq, r);
 310        return r;
 311}
 312
 313static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
 314                                 void *data)
 315{
 316        struct dm_rq_target_io *tio = data;
 317        struct dm_rq_clone_bio_info *info =
 318                container_of(bio, struct dm_rq_clone_bio_info, clone);
 319
 320        info->orig = bio_orig;
 321        info->tio = tio;
 322        bio->bi_end_io = end_clone_bio;
 323
 324        return 0;
 325}
 326
 327static int setup_clone(struct request *clone, struct request *rq,
 328                       struct dm_rq_target_io *tio, gfp_t gfp_mask)
 329{
 330        int r;
 331
 332        r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
 333                              dm_rq_bio_constructor, tio);
 334        if (r)
 335                return r;
 336
 337        clone->end_io = end_clone_request;
 338        clone->end_io_data = tio;
 339
 340        tio->clone = clone;
 341
 342        return 0;
 343}
 344
 345static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
 346                     struct mapped_device *md)
 347{
 348        tio->md = md;
 349        tio->ti = NULL;
 350        tio->clone = NULL;
 351        tio->orig = rq;
 352        tio->error = 0;
 353        tio->completed = 0;
 354        /*
 355         * Avoid initializing info for blk-mq; it passes
 356         * target-specific data through info.ptr
 357         * (see: dm_mq_init_request)
 358         */
 359        if (!md->init_tio_pdu)
 360                memset(&tio->info, 0, sizeof(tio->info));
 361}
 362
 363/*
 364 * Returns:
 365 * DM_MAPIO_*       : the request has been processed as indicated
 366 * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
 367 * < 0              : the request was completed due to failure
 368 */
 369static int map_request(struct dm_rq_target_io *tio)
 370{
 371        int r;
 372        struct dm_target *ti = tio->ti;
 373        struct mapped_device *md = tio->md;
 374        struct request *rq = tio->orig;
 375        struct request *clone = NULL;
 376        blk_status_t ret;
 377
 378        r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
 379        switch (r) {
 380        case DM_MAPIO_SUBMITTED:
 381                /* The target has taken the I/O to submit by itself later */
 382                break;
 383        case DM_MAPIO_REMAPPED:
 384                if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
 385                        /* -ENOMEM */
 386                        ti->type->release_clone_rq(clone);
 387                        return DM_MAPIO_REQUEUE;
 388                }
 389
 390                /* The target has remapped the I/O so dispatch it */
 391                trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
 392                                     blk_rq_pos(rq));
 393                ret = dm_dispatch_clone_request(clone, rq);
 394                if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) {
 395                        blk_rq_unprep_clone(clone);
 396                        tio->ti->type->release_clone_rq(clone);
 397                        tio->clone = NULL;
 398                        return DM_MAPIO_REQUEUE;
 399                }
 400                break;
 401        case DM_MAPIO_REQUEUE:
 402                /* The target wants to requeue the I/O */
 403                break;
 404        case DM_MAPIO_DELAY_REQUEUE:
 405                /* The target wants to requeue the I/O after a delay */
 406                dm_requeue_original_request(tio, true);
 407                break;
 408        case DM_MAPIO_KILL:
 409                /* The target wants to complete the I/O */
 410                dm_kill_unmapped_request(rq, BLK_STS_IOERR);
 411                break;
 412        default:
 413                DMWARN("unimplemented target map return value: %d", r);
 414                BUG();
 415        }
 416
 417        return r;
 418}
 419
 420/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
 421ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
 422{
 423        return sprintf(buf, "%u\n", 0);
 424}
 425
 426ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
 427                                                     const char *buf, size_t count)
 428{
 429        return count;
 430}
 431
 432static void dm_start_request(struct mapped_device *md, struct request *orig)
 433{
 434        blk_mq_start_request(orig);
 435
 436        if (unlikely(dm_stats_used(&md->stats))) {
 437                struct dm_rq_target_io *tio = tio_from_request(orig);
 438                tio->duration_jiffies = jiffies;
 439                tio->n_sectors = blk_rq_sectors(orig);
 440                dm_stats_account_io(&md->stats, rq_data_dir(orig),
 441                                    blk_rq_pos(orig), tio->n_sectors, false, 0,
 442                                    &tio->stats_aux);
 443        }
 444
 445        /*
 446         * Hold the md reference here for the in-flight I/O.
 447         * We can't rely on the reference count by device opener,
 448         * because the device may be closed during the request completion
 449         * when all bios are completed.
 450         * See the comment in rq_completed() too.
 451         */
 452        dm_get(md);
 453}
 454
 455static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 456                              unsigned int hctx_idx, unsigned int numa_node)
 457{
 458        struct mapped_device *md = set->driver_data;
 459        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 460
 461        /*
 462         * Must initialize md member of tio, otherwise it won't
 463         * be available in dm_mq_queue_rq.
 464         */
 465        tio->md = md;
 466
 467        if (md->init_tio_pdu) {
 468                /* target-specific per-io data is immediately after the tio */
 469                tio->info.ptr = tio + 1;
 470        }
 471
 472        return 0;
 473}
 474
 475static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 476                          const struct blk_mq_queue_data *bd)
 477{
 478        struct request *rq = bd->rq;
 479        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 480        struct mapped_device *md = tio->md;
 481        struct dm_target *ti = md->immutable_target;
 482
 483        if (unlikely(!ti)) {
 484                int srcu_idx;
 485                struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 486
 487                ti = dm_table_find_target(map, 0);
 488                dm_put_live_table(md, srcu_idx);
 489        }
 490
 491        if (ti->type->busy && ti->type->busy(ti))
 492                return BLK_STS_RESOURCE;
 493
 494        dm_start_request(md, rq);
 495
 496        /* Init tio using md established in .init_request */
 497        init_tio(tio, rq, md);
 498
 499        /*
 500         * Establish tio->ti before calling map_request().
 501         */
 502        tio->ti = ti;
 503
 504        /* Direct call is fine since .queue_rq allows allocations */
 505        if (map_request(tio) == DM_MAPIO_REQUEUE) {
 506                /* Undo dm_start_request() before requeuing */
 507                rq_end_stats(md, rq);
 508                rq_completed(md, rq_data_dir(rq), false);
 509                return BLK_STS_RESOURCE;
 510        }
 511
 512        return BLK_STS_OK;
 513}
 514
 515static const struct blk_mq_ops dm_mq_ops = {
 516        .queue_rq = dm_mq_queue_rq,
 517        .complete = dm_softirq_done,
 518        .init_request = dm_mq_init_request,
 519};
 520
 521int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 522{
 523        struct request_queue *q;
 524        struct dm_target *immutable_tgt;
 525        int err;
 526
 527        md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
 528        if (!md->tag_set)
 529                return -ENOMEM;
 530
 531        md->tag_set->ops = &dm_mq_ops;
 532        md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
 533        md->tag_set->numa_node = md->numa_node_id;
 534        md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
 535        md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
 536        md->tag_set->driver_data = md;
 537
 538        md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
 539        immutable_tgt = dm_table_get_immutable_target(t);
 540        if (immutable_tgt && immutable_tgt->per_io_data_size) {
 541                /* any target-specific per-io data is immediately after the tio */
 542                md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
 543                md->init_tio_pdu = true;
 544        }
 545
 546        err = blk_mq_alloc_tag_set(md->tag_set);
 547        if (err)
 548                goto out_kfree_tag_set;
 549
 550        q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
 551        if (IS_ERR(q)) {
 552                err = PTR_ERR(q);
 553                goto out_tag_set;
 554        }
 555
 556        return 0;
 557
 558out_tag_set:
 559        blk_mq_free_tag_set(md->tag_set);
 560out_kfree_tag_set:
 561        kfree(md->tag_set);
 562
 563        return err;
 564}
 565
 566void dm_mq_cleanup_mapped_device(struct mapped_device *md)
 567{
 568        if (md->tag_set) {
 569                blk_mq_free_tag_set(md->tag_set);
 570                kfree(md->tag_set);
 571        }
 572}
 573
 574module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
 575MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
 576
 577/* Unused, but preserved for userspace compatibility */
 578static bool use_blk_mq = true;
 579module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
 580MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
 581
 582module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
 583MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
 584
 585module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
 586MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
 587