LXR linux/drivers/md/dm-rq.c

   1/*
   2 * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
   3 *
   4 * This file is released under the GPL.
   5 */
   6
   7#include "dm-core.h"
   8#include "dm-rq.h"
   9
  10#include <linux/elevator.h> /* for rq_end_sector() */
  11#include <linux/blk-mq.h>
  12
  13#define DM_MSG_PREFIX "core-rq"
  14
  15#define DM_MQ_NR_HW_QUEUES 1
  16#define DM_MQ_QUEUE_DEPTH 2048
  17static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
  18static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
  19
  20/*
  21 * Request-based DM's mempools' reserved IOs set by the user.
  22 */
  23#define RESERVED_REQUEST_BASED_IOS      256
  24static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
  25
  26static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
  27
  28bool dm_use_blk_mq_default(void)
  29{
  30        return use_blk_mq;
  31}
  32
  33bool dm_use_blk_mq(struct mapped_device *md)
  34{
  35        return md->use_blk_mq;
  36}
  37EXPORT_SYMBOL_GPL(dm_use_blk_mq);
  38
  39unsigned dm_get_reserved_rq_based_ios(void)
  40{
  41        return __dm_get_module_param(&reserved_rq_based_ios,
  42                                     RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
  43}
  44EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
  45
  46static unsigned dm_get_blk_mq_nr_hw_queues(void)
  47{
  48        return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
  49}
  50
  51static unsigned dm_get_blk_mq_queue_depth(void)
  52{
  53        return __dm_get_module_param(&dm_mq_queue_depth,
  54                                     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
  55}
  56
  57int dm_request_based(struct mapped_device *md)
  58{
  59        return blk_queue_stackable(md->queue);
  60}
  61
  62static void dm_old_start_queue(struct request_queue *q)
  63{
  64        unsigned long flags;
  65
  66        spin_lock_irqsave(q->queue_lock, flags);
  67        if (blk_queue_stopped(q))
  68                blk_start_queue(q);
  69        spin_unlock_irqrestore(q->queue_lock, flags);
  70}
  71
  72static void dm_mq_start_queue(struct request_queue *q)
  73{
  74        blk_mq_unquiesce_queue(q);
  75        blk_mq_kick_requeue_list(q);
  76}
  77
  78void dm_start_queue(struct request_queue *q)
  79{
  80        if (!q->mq_ops)
  81                dm_old_start_queue(q);
  82        else
  83                dm_mq_start_queue(q);
  84}
  85
  86static void dm_old_stop_queue(struct request_queue *q)
  87{
  88        unsigned long flags;
  89
  90        spin_lock_irqsave(q->queue_lock, flags);
  91        if (!blk_queue_stopped(q))
  92                blk_stop_queue(q);
  93        spin_unlock_irqrestore(q->queue_lock, flags);
  94}
  95
  96static void dm_mq_stop_queue(struct request_queue *q)
  97{
  98        if (blk_mq_queue_stopped(q))
  99                return;
 100
 101        blk_mq_quiesce_queue(q);
 102}
 103
 104void dm_stop_queue(struct request_queue *q)
 105{
 106        if (!q->mq_ops)
 107                dm_old_stop_queue(q);
 108        else
 109                dm_mq_stop_queue(q);
 110}
 111
 112static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
 113                                                gfp_t gfp_mask)
 114{
 115        return mempool_alloc(md->io_pool, gfp_mask);
 116}
 117
 118static void free_old_rq_tio(struct dm_rq_target_io *tio)
 119{
 120        mempool_free(tio, tio->md->io_pool);
 121}
 122
 123static struct request *alloc_old_clone_request(struct mapped_device *md,
 124                                               gfp_t gfp_mask)
 125{
 126        return mempool_alloc(md->rq_pool, gfp_mask);
 127}
 128
 129static void free_old_clone_request(struct mapped_device *md, struct request *rq)
 130{
 131        mempool_free(rq, md->rq_pool);
 132}
 133
 134/*
 135 * Partial completion handling for request-based dm
 136 */
 137static void end_clone_bio(struct bio *clone, int error)
 138{
 139        struct dm_rq_clone_bio_info *info =
 140                container_of(clone, struct dm_rq_clone_bio_info, clone);
 141        struct dm_rq_target_io *tio = info->tio;
 142        unsigned int nr_bytes = info->orig->bi_size;
 143        bool is_last = !clone->bi_next;
 144
 145        bio_put(clone);
 146
 147        if (tio->error)
 148                /*
 149                 * An error has already been detected on the request.
 150                 * Once error occurred, just let clone->end_io() handle
 151                 * the remainder.
 152                 */
 153                return;
 154        else if (error) {
 155                /*
 156                 * Don't notice the error to the upper layer yet.
 157                 * The error handling decision is made by the target driver,
 158                 * when the request is completed.
 159                 */
 160                tio->error = error;
 161                goto exit;
 162        }
 163
 164        /*
 165         * I/O for the bio successfully completed.
 166         * Notice the data completion to the upper layer.
 167         */
 168        tio->completed += nr_bytes;
 169
 170        /*
 171         * Update the original request.
 172         * Do not use blk_end_request() here, because it may complete
 173         * the original request before the clone, and break the ordering.
 174         */
 175        if (is_last)
 176exit:
 177                blk_update_request(tio->orig, 0, tio->completed);
 178}
 179
 180static struct dm_rq_target_io *tio_from_request(struct request *rq)
 181{
 182        return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
 183}
 184
 185static void rq_end_stats(struct mapped_device *md, struct request *orig)
 186{
 187        if (unlikely(dm_stats_used(&md->stats))) {
 188                struct dm_rq_target_io *tio = tio_from_request(orig);
 189                tio->duration_jiffies = jiffies - tio->duration_jiffies;
 190                dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
 191                                    tio->n_sectors, true, tio->duration_jiffies,
 192                                    &tio->stats_aux);
 193        }
 194}
 195
 196/*
 197 * Don't touch any member of the md after calling this function because
 198 * the md may be freed in dm_put() at the end of this function.
 199 * Or do dm_get() before calling this function and dm_put() later.
 200 */
 201static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 202{
 203        struct request_queue *q = md->queue;
 204        unsigned long flags;
 205
 206        atomic_dec(&md->pending[rw]);
 207
 208        /* nudge anyone waiting on suspend queue */
 209        if (!md_in_flight(md))
 210                wake_up(&md->wait);
 211
 212        /*
 213         * Run this off this callpath, as drivers could invoke end_io while
 214         * inside their request_fn (and holding the queue lock). Calling
 215         * back into ->request_fn() could deadlock attempting to grab the
 216         * queue lock again.
 217         */
 218        if (!q->mq_ops && run_queue) {
 219                spin_lock_irqsave(q->queue_lock, flags);
 220                blk_run_queue_async(q);
 221                spin_unlock_irqrestore(q->queue_lock, flags);
 222        }
 223
 224        /*
 225         * dm_put() must be at the end of this function. See the comment above
 226         */
 227        dm_put(md);
 228}
 229
 230static void free_rq_clone(struct request *clone)
 231{
 232        struct dm_rq_target_io *tio = clone->end_io_data;
 233        struct mapped_device *md = tio->md;
 234
 235        blk_rq_unprep_clone(clone);
 236
 237        /*
 238         * It is possible for a clone_old_rq() allocated clone to
 239         * get passed in -- it may not yet have a request_queue.
 240         * This is known to occur if the error target replaces
 241         * a multipath target that has a request_fn queue stacked
 242         * on blk-mq queue(s).
 243         */
 244        if (clone->q && clone->q->mq_ops)
 245                /* stacked on blk-mq queue(s) */
 246                tio->ti->type->release_clone_rq(clone, NULL);
 247        else if (!md->queue->mq_ops)
 248                /* request_fn queue stacked on request_fn queue(s) */
 249                free_old_clone_request(md, clone);
 250
 251        if (!md->queue->mq_ops)
 252                free_old_rq_tio(tio);
 253}
 254
 255/*
 256 * Complete the clone and the original request.
 257 * Must be called without clone's queue lock held,
 258 * see end_clone_request() for more details.
 259 */
 260static void dm_end_request(struct request *clone, int error)
 261{
 262        int rw = rq_data_dir(clone);
 263        struct dm_rq_target_io *tio = clone->end_io_data;
 264        struct mapped_device *md = tio->md;
 265        struct request *rq = tio->orig;
 266
 267        if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 268                rq->errors = clone->errors;
 269                rq->resid_len = clone->resid_len;
 270
 271                if (rq->sense)
 272                        /*
 273                         * We are using the sense buffer of the original
 274                         * request.
 275                         * So setting the length of the sense data is enough.
 276                         */
 277                        rq->sense_len = clone->sense_len;
 278        }
 279
 280        free_rq_clone(clone);
 281        rq_end_stats(md, rq);
 282        if (!rq->q->mq_ops)
 283                blk_end_request_all(rq, error);
 284        else
 285                blk_mq_end_request(rq, error);
 286        rq_completed(md, rw, true);
 287}
 288
 289static void dm_unprep_request(struct request *rq)
 290{
 291        struct dm_rq_target_io *tio = tio_from_request(rq);
 292        struct request *clone = tio->clone;
 293
 294        if (!rq->q->mq_ops) {
 295                rq->special = NULL;
 296                rq->cmd_flags &= ~REQ_DONTPREP;
 297        }
 298
 299        if (clone)
 300                free_rq_clone(clone);
 301        else if (!tio->md->queue->mq_ops)
 302                free_old_rq_tio(tio);
 303}
 304
 305/*
 306 * Requeue the original request of a clone.
 307 */
 308static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms)
 309{
 310        struct request_queue *q = rq->q;
 311        unsigned long flags;
 312
 313        spin_lock_irqsave(q->queue_lock, flags);
 314        blk_requeue_request(q, rq);
 315        blk_delay_queue(q, delay_ms);
 316        spin_unlock_irqrestore(q->queue_lock, flags);
 317}
 318
 319static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
 320{
 321        blk_mq_delay_kick_requeue_list(q, msecs);
 322}
 323
 324void dm_mq_kick_requeue_list(struct mapped_device *md)
 325{
 326        __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
 327}
 328EXPORT_SYMBOL(dm_mq_kick_requeue_list);
 329
 330static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
 331{
 332        blk_mq_requeue_request(rq, false);
 333        __dm_mq_kick_requeue_list(rq->q, msecs);
 334}
 335
 336static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
 337{
 338        struct mapped_device *md = tio->md;
 339        struct request *rq = tio->orig;
 340        int rw = rq_data_dir(rq);
 341        unsigned long delay_ms = delay_requeue ? 100 : 0;
 342
 343        rq_end_stats(md, rq);
 344        dm_unprep_request(rq);
 345
 346        if (!rq->q->mq_ops)
 347                dm_old_requeue_request(rq, delay_ms);
 348        else
 349                dm_mq_delay_requeue_request(rq, delay_ms);
 350
 351        rq_completed(md, rw, false);
 352}
 353
 354static void dm_done(struct request *clone, int error, bool mapped)
 355{
 356        int r = error;
 357        struct dm_rq_target_io *tio = clone->end_io_data;
 358        dm_request_endio_fn rq_end_io = NULL;
 359
 360        if (tio->ti) {
 361                rq_end_io = tio->ti->type->rq_end_io;
 362
 363                if (mapped && rq_end_io)
 364                        r = rq_end_io(tio->ti, clone, error, &tio->info);
 365        }
 366
 367        if (unlikely(r == -EREMOTEIO)) {
 368                if ((clone->cmd_flags & REQ_DISCARD) &&
 369                    !clone->q->limits.max_discard_sectors)
 370                        disable_discard(tio->md);
 371                else if ((clone->cmd_flags & REQ_WRITE_SAME) &&
 372                         !clone->q->limits.max_write_same_sectors)
 373                        disable_write_same(tio->md);
 374        }
 375
 376        if (r <= 0)
 377                /* The target wants to complete the I/O */
 378                dm_end_request(clone, r);
 379        else if (r == DM_ENDIO_INCOMPLETE)
 380                /* The target will handle the I/O */
 381                return;
 382        else if (r == DM_ENDIO_REQUEUE)
 383                /* The target wants to requeue the I/O */
 384                dm_requeue_original_request(tio, false);
 385        else {
 386                DMWARN("unimplemented target endio return value: %d", r);
 387                BUG();
 388        }
 389}
 390
 391/*
 392 * Request completion handler for request-based dm
 393 */
 394static void dm_softirq_done(struct request *rq)
 395{
 396        bool mapped = true;
 397        struct dm_rq_target_io *tio = tio_from_request(rq);
 398        struct request *clone = tio->clone;
 399        int rw;
 400
 401        if (!clone) {
 402                rq_end_stats(tio->md, rq);
 403                rw = rq_data_dir(rq);
 404                if (!rq->q->mq_ops) {
 405                        blk_end_request_all(rq, tio->error);
 406                        rq_completed(tio->md, rw, false);
 407                        free_old_rq_tio(tio);
 408                } else {
 409                        blk_mq_end_request(rq, tio->error);
 410                        rq_completed(tio->md, rw, false);
 411                }
 412                return;
 413        }
 414
 415        if (rq->cmd_flags & REQ_FAILED)
 416                mapped = false;
 417
 418        dm_done(clone, tio->error, mapped);
 419}
 420
 421/*
 422 * Complete the clone and the original request with the error status
 423 * through softirq context.
 424 */
 425static void dm_complete_request(struct request *rq, int error)
 426{
 427        struct dm_rq_target_io *tio = tio_from_request(rq);
 428
 429        tio->error = error;
 430        if (!rq->q->mq_ops)
 431                blk_complete_request(rq);
 432        else
 433                blk_mq_complete_request(rq, error);
 434}
 435
 436/*
 437 * Complete the not-mapped clone and the original request with the error status
 438 * through softirq context.
 439 * Target's rq_end_io() function isn't called.
 440 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
 441 */
 442static void dm_kill_unmapped_request(struct request *rq, int error)
 443{
 444        rq->cmd_flags |= REQ_FAILED;
 445        dm_complete_request(rq, error);
 446}
 447
 448/*
 449 * Called with the clone's queue lock held (in the case of .request_fn)
 450 */
 451static void end_clone_request(struct request *clone, int error)
 452{
 453        struct dm_rq_target_io *tio = clone->end_io_data;
 454
 455        if (!clone->q->mq_ops) {
 456                /*
 457                 * For just cleaning up the information of the queue in which
 458                 * the clone was dispatched.
 459                 * The clone is *NOT* freed actually here because it is alloced
 460                 * from dm own mempool (REQ_ALLOCED isn't set).
 461                 */
 462                __blk_put_request(clone->q, clone);
 463        }
 464
 465        /*
 466         * Actual request completion is done in a softirq context which doesn't
 467         * hold the clone's queue lock.  Otherwise, deadlock could occur because:
 468         *     - another request may be submitted by the upper level driver
 469         *       of the stacking during the completion
 470         *     - the submission which requires queue lock may be done
 471         *       against this clone's queue
 472         */
 473        dm_complete_request(tio->orig, error);
 474}
 475
 476static int dm_dispatch_clone_request(struct request *clone, struct request *rq)
 477{
 478        int r;
 479
 480        if (blk_queue_io_stat(clone->q))
 481                clone->cmd_flags |= REQ_IO_STAT;
 482
 483        clone->start_time = jiffies;
 484        r = blk_insert_cloned_request(clone->q, clone);
 485        if (r != BLK_MQ_RQ_QUEUE_OK && r != BLK_MQ_RQ_QUEUE_BUSY &&
 486                        r != BLK_MQ_RQ_QUEUE_DEV_BUSY)
 487                /* must complete clone in terms of original request */
 488                dm_complete_request(rq, r);
 489        return r;
 490}
 491
 492static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
 493                                 void *data)
 494{
 495        struct dm_rq_target_io *tio = data;
 496        struct dm_rq_clone_bio_info *info =
 497                container_of(bio, struct dm_rq_clone_bio_info, clone);
 498
 499        info->orig = bio_orig;
 500        info->tio = tio;
 501        bio->bi_end_io = end_clone_bio;
 502
 503        return 0;
 504}
 505
 506static int setup_clone(struct request *clone, struct request *rq,
 507                       struct dm_rq_target_io *tio, gfp_t gfp_mask)
 508{
 509        int r;
 510
 511        r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
 512                              dm_rq_bio_constructor, tio);
 513        if (r)
 514                return r;
 515
 516        clone->cmd = rq->cmd;
 517        clone->cmd_len = rq->cmd_len;
 518        clone->sense = rq->sense;
 519        clone->buffer = rq->buffer;
 520        clone->end_io = end_clone_request;
 521        clone->end_io_data = tio;
 522
 523        tio->clone = clone;
 524
 525        return 0;
 526}
 527
 528static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
 529                                    struct dm_rq_target_io *tio, gfp_t gfp_mask)
 530{
 531        /*
 532         * Create clone for use with .request_fn request_queue
 533         */
 534        struct request *clone;
 535
 536        clone = alloc_old_clone_request(md, gfp_mask);
 537        if (!clone)
 538                return NULL;
 539
 540        blk_rq_init(NULL, clone);
 541        if (setup_clone(clone, rq, tio, gfp_mask)) {
 542                /* -ENOMEM */
 543                free_old_clone_request(md, clone);
 544                return NULL;
 545        }
 546
 547        return clone;
 548}
 549
 550static void map_tio_request(struct kthread_work *work);
 551
 552static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
 553                     struct mapped_device *md)
 554{
 555        tio->md = md;
 556        tio->ti = NULL;
 557        tio->clone = NULL;
 558        tio->orig = rq;
 559        tio->error = 0;
 560        tio->completed = 0;
 561        /*
 562         * Avoid initializing info for blk-mq; it passes
 563         * target-specific data through info.ptr
 564         * (see: dm_mq_init_request)
 565         */
 566        if (!md->init_tio_pdu)
 567                memset(&tio->info, 0, sizeof(tio->info));
 568        if (md->kworker_task)
 569                init_kthread_work(&tio->work, map_tio_request);
 570}
 571
 572static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
 573                                               struct mapped_device *md,
 574                                               gfp_t gfp_mask)
 575{
 576        struct dm_rq_target_io *tio;
 577        int srcu_idx;
 578        struct dm_table *table;
 579
 580        tio = alloc_old_rq_tio(md, gfp_mask);
 581        if (!tio)
 582                return NULL;
 583
 584        init_tio(tio, rq, md);
 585
 586        table = dm_get_live_table(md, &srcu_idx);
 587        /*
 588         * Must clone a request if this .request_fn DM device
 589         * is stacked on .request_fn device(s).
 590         */
 591        if (!dm_table_all_blk_mq_devices(table)) {
 592                if (!clone_old_rq(rq, md, tio, gfp_mask)) {
 593                        dm_put_live_table(md, srcu_idx);
 594                        free_old_rq_tio(tio);
 595                        return NULL;
 596                }
 597        }
 598        dm_put_live_table(md, srcu_idx);
 599
 600        return tio;
 601}
 602
 603/*
 604 * Called with the queue lock held.
 605 */
 606static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
 607{
 608        struct mapped_device *md = q->queuedata;
 609        struct dm_rq_target_io *tio;
 610
 611        if (unlikely(rq->special)) {
 612                DMWARN("Already has something in rq->special.");
 613                return BLKPREP_KILL;
 614        }
 615
 616        tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
 617        if (!tio)
 618                return BLKPREP_DEFER;
 619
 620        rq->special = tio;
 621        rq->cmd_flags |= REQ_DONTPREP;
 622
 623        return BLKPREP_OK;
 624}
 625
 626/*
 627 * Returns:
 628 * DM_MAPIO_*       : the request has been processed as indicated
 629 * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
 630 * < 0              : the request was completed due to failure
 631 */
 632static int map_request(struct dm_rq_target_io *tio)
 633{
 634        int r;
 635        struct dm_target *ti = tio->ti;
 636        struct mapped_device *md = tio->md;
 637        struct request *rq = tio->orig;
 638        struct request *clone = NULL;
 639        int ret;
 640
 641        if (tio->clone) {
 642                clone = tio->clone;
 643                r = ti->type->map_rq(ti, clone, &tio->info);
 644                if (r == DM_MAPIO_DELAY_REQUEUE)
 645                        return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
 646        } else {
 647                r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
 648                if (r < 0) {
 649                        /* The target wants to complete the I/O */
 650                        dm_kill_unmapped_request(rq, r);
 651                        return r;
 652                }
 653                if (r == DM_MAPIO_REMAPPED &&
 654                    setup_clone(clone, rq, tio, GFP_ATOMIC)) {
 655                        /* -ENOMEM */
 656                        ti->type->release_clone_rq(clone, &tio->info);
 657                        return DM_MAPIO_REQUEUE;
 658                }
 659        }
 660check_again:
 661        switch (r) {
 662        case DM_MAPIO_SUBMITTED:
 663                /* The target has taken the I/O to submit by itself later */
 664                break;
 665        case DM_MAPIO_REMAPPED:
 666                /* The target has remapped the I/O so dispatch it */
 667                trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
 668                                     blk_rq_pos(rq));
 669                ret = dm_dispatch_clone_request(clone, rq);
 670                if (ret == BLK_MQ_RQ_QUEUE_BUSY || ret == BLK_MQ_RQ_QUEUE_DEV_BUSY) {
 671                        blk_rq_unprep_clone(clone);
 672                        blk_mq_cleanup_rq(clone);
 673                        tio->ti->type->release_clone_rq(clone, &tio->info);
 674                        tio->clone = NULL;
 675                        if (!rq->q->mq_ops)
 676                                r = DM_MAPIO_DELAY_REQUEUE;
 677                        else
 678                                r = DM_MAPIO_REQUEUE;
 679                        goto check_again;
 680                }
 681                break;
 682        case DM_MAPIO_REQUEUE:
 683                /* The target wants to requeue the I/O */
 684                break;
 685        case DM_MAPIO_DELAY_REQUEUE:
 686                /* The target wants to requeue the I/O after a delay */
 687                dm_requeue_original_request(tio, true);
 688                break;
 689        default:
 690                if (r > 0) {
 691                        DMWARN("unimplemented target map return value: %d", r);
 692                        BUG();
 693                }
 694
 695                /* The target wants to complete the I/O */
 696                dm_kill_unmapped_request(rq, r);
 697        }
 698
 699        return r;
 700}
 701
 702static void dm_start_request(struct mapped_device *md, struct request *orig)
 703{
 704        if (!orig->q->mq_ops)
 705                blk_start_request(orig);
 706        else
 707                blk_mq_start_request(orig);
 708        atomic_inc(&md->pending[rq_data_dir(orig)]);
 709
 710        if (md->seq_rq_merge_deadline_usecs) {
 711                md->last_rq_pos = rq_end_sector(orig);
 712                md->last_rq_rw = rq_data_dir(orig);
 713                md->last_rq_start_time = ktime_get();
 714        }
 715
 716        if (unlikely(dm_stats_used(&md->stats))) {
 717                struct dm_rq_target_io *tio = tio_from_request(orig);
 718                tio->duration_jiffies = jiffies;
 719                tio->n_sectors = blk_rq_sectors(orig);
 720                dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
 721                                    tio->n_sectors, false, 0, &tio->stats_aux);
 722        }
 723
 724        /*
 725         * Hold the md reference here for the in-flight I/O.
 726         * We can't rely on the reference count by device opener,
 727         * because the device may be closed during the request completion
 728         * when all bios are completed.
 729         * See the comment in rq_completed() too.
 730         */
 731        dm_get(md);
 732}
 733
 734static void map_tio_request(struct kthread_work *work)
 735{
 736        struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
 737
 738        if (map_request(tio) == DM_MAPIO_REQUEUE)
 739                dm_requeue_original_request(tio, false);
 740}
 741
 742ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
 743{
 744        return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
 745}
 746
 747#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
 748
 749ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
 750                                                     const char *buf, size_t count)
 751{
 752        unsigned deadline;
 753
 754        if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
 755                return count;
 756
 757        if (kstrtouint(buf, 10, &deadline))
 758                return -EINVAL;
 759
 760        if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
 761                deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
 762
 763        md->seq_rq_merge_deadline_usecs = deadline;
 764
 765        return count;
 766}
 767
 768static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
 769{
 770        ktime_t kt_deadline;
 771
 772        if (!md->seq_rq_merge_deadline_usecs)
 773                return false;
 774
 775        kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
 776        kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
 777
 778        return !ktime_after(ktime_get(), kt_deadline);
 779}
 780
 781/*
 782 * q->request_fn for old request-based dm.
 783 * Called with the queue lock held.
 784 */
 785static void dm_old_request_fn(struct request_queue *q)
 786{
 787        struct mapped_device *md = q->queuedata;
 788        struct dm_target *ti = md->immutable_target;
 789        struct request *rq;
 790        struct dm_rq_target_io *tio;
 791        sector_t pos = 0;
 792
 793        if (unlikely(!ti)) {
 794                int srcu_idx;
 795                struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 796
 797                if (unlikely(!map)) {
 798                        dm_put_live_table(md, srcu_idx);
 799                        return;
 800                }
 801                ti = dm_table_find_target(map, pos);
 802                dm_put_live_table(md, srcu_idx);
 803        }
 804
 805        /*
 806         * For suspend, check blk_queue_stopped() and increment
 807         * ->pending within a single queue_lock not to increment the
 808         * number of in-flight I/Os after the queue is stopped in
 809         * dm_suspend().
 810         */
 811        while (!blk_queue_stopped(q)) {
 812                rq = blk_peek_request(q);
 813                if (!rq)
 814                        return;
 815
 816                /* always use block 0 to find the target for flushes for now */
 817                pos = 0;
 818                if (!(rq->cmd_flags & REQ_FLUSH))
 819                        pos = blk_rq_pos(rq);
 820
 821                if ((dm_old_request_peeked_before_merge_deadline(md) &&
 822                     md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
 823                     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
 824                    (ti->type->busy && ti->type->busy(ti))) {
 825                        blk_delay_queue(q, 10);
 826                        return;
 827                }
 828
 829                dm_start_request(md, rq);
 830
 831                tio = tio_from_request(rq);
 832                /* Establish tio->ti before queuing work (map_tio_request) */
 833                tio->ti = ti;
 834                queue_kthread_work(&md->kworker, &tio->work);
 835                BUG_ON(!irqs_disabled());
 836        }
 837}
 838
 839/*
 840 * Fully initialize a .request_fn request-based queue.
 841 */
 842int dm_old_init_request_queue(struct mapped_device *md)
 843{
 844        /* Fully initialize the queue */
 845        md->queue->request_fn = dm_old_request_fn;
 846        if (blk_init_allocated_queue(md->queue) < 0)
 847                return -EINVAL;
 848
 849        /* disable dm_old_request_fn's merge heuristic by default */
 850        md->seq_rq_merge_deadline_usecs = 0;
 851
 852        dm_init_normal_md_queue(md);
 853        blk_queue_softirq_done(md->queue, dm_softirq_done);
 854        blk_queue_prep_rq(md->queue, dm_old_prep_fn);
 855
 856        /* Initialize the request-based DM worker thread */
 857        init_kthread_worker(&md->kworker);
 858        md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
 859                                       "kdmwork-%s", dm_device_name(md));
 860        if (IS_ERR(md->kworker_task)) {
 861                int error = PTR_ERR(md->kworker_task);
 862                md->kworker_task = NULL;
 863                return error;
 864        }
 865
 866        return 0;
 867}
 868
 869static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
 870                unsigned int hctx_idx, unsigned int numa_node)
 871{
 872        struct mapped_device *md = set->driver_data;
 873        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 874
 875        /*
 876         * Must initialize md member of tio, otherwise it won't
 877         * be available in dm_mq_queue_rq.
 878         */
 879        tio->md = md;
 880
 881        if (md->init_tio_pdu) {
 882                /* target-specific per-io data is immediately after the tio */
 883                tio->info.ptr = tio + 1;
 884        }
 885
 886        return 0;
 887}
 888
 889static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 890                          const struct blk_mq_queue_data *bd)
 891{
 892        struct request *rq = bd->rq;
 893        struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 894        struct mapped_device *md = tio->md;
 895        struct dm_target *ti = md->immutable_target;
 896
 897        if (unlikely(!ti)) {
 898                int srcu_idx;
 899                struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 900
 901                ti = dm_table_find_target(map, 0);
 902                dm_put_live_table(md, srcu_idx);
 903        }
 904
 905        if (ti->type->busy && ti->type->busy(ti))
 906                return BLK_MQ_RQ_QUEUE_BUSY;
 907
 908        dm_start_request(md, rq);
 909
 910        /* Init tio using md established in .init_request */
 911        init_tio(tio, rq, md);
 912
 913        /*
 914         * Establish tio->ti before calling map_request().
 915         */
 916        tio->ti = ti;
 917
 918        /* Direct call is fine since .queue_rq allows allocations */
 919        if (map_request(tio) == DM_MAPIO_REQUEUE) {
 920                /* Undo dm_start_request() before requeuing */
 921                rq_end_stats(md, rq);
 922                rq_completed(md, rq_data_dir(rq), false);
 923                return BLK_MQ_RQ_QUEUE_BUSY;
 924        }
 925
 926        return BLK_MQ_RQ_QUEUE_OK;
 927}
 928
 929static struct blk_mq_ops dm_mq_ops = {
 930        .queue_rq = dm_mq_queue_rq,
 931        .complete = dm_softirq_done,
 932        .init_request = dm_mq_init_request,
 933};
 934
 935int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
 936{
 937        struct request_queue *q;
 938        struct dm_target *immutable_tgt;
 939        int err;
 940
 941        if (!dm_table_all_blk_mq_devices(t)) {
 942                DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
 943                return -EINVAL;
 944        }
 945
 946        md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
 947        if (!md->tag_set)
 948                return -ENOMEM;
 949
 950        md->tag_set->ops = &dm_mq_ops;
 951        md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
 952        md->tag_set->numa_node = md->numa_node_id;
 953        md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
 954        md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
 955        md->tag_set->driver_data = md;
 956
 957        md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
 958        immutable_tgt = dm_table_get_immutable_target(t);
 959        if (immutable_tgt && immutable_tgt->per_io_data_size) {
 960                /* any target-specific per-io data is immediately after the tio */
 961                md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
 962                md->init_tio_pdu = true;
 963        }
 964
 965        err = blk_mq_alloc_tag_set(md->tag_set);
 966        if (err)
 967                goto out_kfree_tag_set;
 968
 969        q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
 970        if (IS_ERR(q)) {
 971                err = PTR_ERR(q);
 972                goto out_tag_set;
 973        }
 974        q->front_queue = 1;
 975        dm_init_md_queue(md);
 976
 977        return 0;
 978
 979out_tag_set:
 980        blk_mq_free_tag_set(md->tag_set);
 981out_kfree_tag_set:
 982        kfree(md->tag_set);
 983
 984        return err;
 985}
 986
 987void dm_mq_cleanup_mapped_device(struct mapped_device *md)
 988{
 989        if (md->tag_set) {
 990                blk_mq_free_tag_set(md->tag_set);
 991                kfree(md->tag_set);
 992        }
 993}
 994
 995module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
 996MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
 997
 998module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
 999MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
1000

1001module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
1002MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
1003
1004module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
1005MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
1006