linux/drivers/md/dm-mpath.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2003 Sistina Software Limited.
   3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
   4 *
   5 * This file is released under the GPL.
   6 */
   7
   8#include <linux/device-mapper.h>
   9
  10#include "dm-rq.h"
  11#include "dm-bio-record.h"
  12#include "dm-path-selector.h"
  13#include "dm-uevent.h"
  14
  15#include <linux/blkdev.h>
  16#include <linux/ctype.h>
  17#include <linux/init.h>
  18#include <linux/mempool.h>
  19#include <linux/module.h>
  20#include <linux/pagemap.h>
  21#include <linux/slab.h>
  22#include <linux/time.h>
  23#include <linux/workqueue.h>
  24#include <linux/delay.h>
  25#include <scsi/scsi_dh.h>
  26#include <linux/atomic.h>
  27#include <linux/blk-mq.h>
  28
  29#define DM_MSG_PREFIX "multipath"
  30#define DM_PG_INIT_DELAY_MSECS 2000
  31#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
  32
  33/* Path properties */
  34struct pgpath {
  35        struct list_head list;
  36
  37        struct priority_group *pg;      /* Owning PG */
  38        unsigned fail_count;            /* Cumulative failure count */
  39
  40        struct dm_path path;
  41        struct delayed_work activate_path;
  42
  43        bool is_active:1;               /* Path status */
  44};
  45
  46#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
  47
  48/*
  49 * Paths are grouped into Priority Groups and numbered from 1 upwards.
  50 * Each has a path selector which controls which path gets used.
  51 */
  52struct priority_group {
  53        struct list_head list;
  54
  55        struct multipath *m;            /* Owning multipath instance */
  56        struct path_selector ps;
  57
  58        unsigned pg_num;                /* Reference number */
  59        unsigned nr_pgpaths;            /* Number of paths in PG */
  60        struct list_head pgpaths;
  61
  62        bool bypassed:1;                /* Temporarily bypass this PG? */
  63};
  64
  65/* Multipath context */
  66struct multipath {
  67        unsigned long flags;            /* Multipath state flags */
  68
  69        spinlock_t lock;
  70        enum dm_queue_mode queue_mode;
  71
  72        struct pgpath *current_pgpath;
  73        struct priority_group *current_pg;
  74        struct priority_group *next_pg; /* Switch to this PG if set */
  75
  76        atomic_t nr_valid_paths;        /* Total number of usable paths */
  77        unsigned nr_priority_groups;
  78        struct list_head priority_groups;
  79
  80        const char *hw_handler_name;
  81        char *hw_handler_params;
  82        wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
  83        unsigned pg_init_retries;       /* Number of times to retry pg_init */
  84        unsigned pg_init_delay_msecs;   /* Number of msecs before pg_init retry */
  85        atomic_t pg_init_in_progress;   /* Only one pg_init allowed at once */
  86        atomic_t pg_init_count;         /* Number of times pg_init called */
  87
  88        struct mutex work_mutex;
  89        struct work_struct trigger_event;
  90        struct dm_target *ti;
  91
  92        struct work_struct process_queued_bios;
  93        struct bio_list queued_bios;
  94};
  95
  96/*
  97 * Context information attached to each io we process.
  98 */
  99struct dm_mpath_io {
 100        struct pgpath *pgpath;
 101        size_t nr_bytes;
 102};
 103
 104typedef int (*action_fn) (struct pgpath *pgpath);
 105
 106static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
 107static void trigger_event(struct work_struct *work);
 108static void activate_or_offline_path(struct pgpath *pgpath);
 109static void activate_path_work(struct work_struct *work);
 110static void process_queued_bios(struct work_struct *work);
 111
 112/*-----------------------------------------------
 113 * Multipath state flags.
 114 *-----------------------------------------------*/
 115
 116#define MPATHF_QUEUE_IO 0                       /* Must we queue all I/O? */
 117#define MPATHF_QUEUE_IF_NO_PATH 1               /* Queue I/O if last path fails? */
 118#define MPATHF_SAVED_QUEUE_IF_NO_PATH 2         /* Saved state during suspension */
 119#define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3     /* If there's already a hw_handler present, don't change it. */
 120#define MPATHF_PG_INIT_DISABLED 4               /* pg_init is not currently allowed */
 121#define MPATHF_PG_INIT_REQUIRED 5               /* pg_init needs calling? */
 122#define MPATHF_PG_INIT_DELAY_RETRY 6            /* Delay pg_init retry? */
 123
 124/*-----------------------------------------------
 125 * Allocation routines
 126 *-----------------------------------------------*/
 127
 128static struct pgpath *alloc_pgpath(void)
 129{
 130        struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
 131
 132        if (!pgpath)
 133                return NULL;
 134
 135        pgpath->is_active = true;
 136
 137        return pgpath;
 138}
 139
 140static void free_pgpath(struct pgpath *pgpath)
 141{
 142        kfree(pgpath);
 143}
 144
 145static struct priority_group *alloc_priority_group(void)
 146{
 147        struct priority_group *pg;
 148
 149        pg = kzalloc(sizeof(*pg), GFP_KERNEL);
 150
 151        if (pg)
 152                INIT_LIST_HEAD(&pg->pgpaths);
 153
 154        return pg;
 155}
 156
 157static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
 158{
 159        struct pgpath *pgpath, *tmp;
 160
 161        list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
 162                list_del(&pgpath->list);
 163                dm_put_device(ti, pgpath->path.dev);
 164                free_pgpath(pgpath);
 165        }
 166}
 167
 168static void free_priority_group(struct priority_group *pg,
 169                                struct dm_target *ti)
 170{
 171        struct path_selector *ps = &pg->ps;
 172
 173        if (ps->type) {
 174                ps->type->destroy(ps);
 175                dm_put_path_selector(ps->type);
 176        }
 177
 178        free_pgpaths(&pg->pgpaths, ti);
 179        kfree(pg);
 180}
 181
 182static struct multipath *alloc_multipath(struct dm_target *ti)
 183{
 184        struct multipath *m;
 185
 186        m = kzalloc(sizeof(*m), GFP_KERNEL);
 187        if (m) {
 188                INIT_LIST_HEAD(&m->priority_groups);
 189                spin_lock_init(&m->lock);
 190                atomic_set(&m->nr_valid_paths, 0);
 191                INIT_WORK(&m->trigger_event, trigger_event);
 192                mutex_init(&m->work_mutex);
 193
 194                m->queue_mode = DM_TYPE_NONE;
 195
 196                m->ti = ti;
 197                ti->private = m;
 198        }
 199
 200        return m;
 201}
 202
 203static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 204{
 205        if (m->queue_mode == DM_TYPE_NONE) {
 206                m->queue_mode = DM_TYPE_REQUEST_BASED;
 207        } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
 208                INIT_WORK(&m->process_queued_bios, process_queued_bios);
 209                /*
 210                 * bio-based doesn't support any direct scsi_dh management;
 211                 * it just discovers if a scsi_dh is attached.
 212                 */
 213                set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
 214        }
 215
 216        dm_table_set_type(ti->table, m->queue_mode);
 217
 218        /*
 219         * Init fields that are only used when a scsi_dh is attached
 220         * - must do this unconditionally (really doesn't hurt non-SCSI uses)
 221         */
 222        set_bit(MPATHF_QUEUE_IO, &m->flags);
 223        atomic_set(&m->pg_init_in_progress, 0);
 224        atomic_set(&m->pg_init_count, 0);
 225        m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
 226        init_waitqueue_head(&m->pg_init_wait);
 227
 228        return 0;
 229}
 230
 231static void free_multipath(struct multipath *m)
 232{
 233        struct priority_group *pg, *tmp;
 234
 235        list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
 236                list_del(&pg->list);
 237                free_priority_group(pg, m->ti);
 238        }
 239
 240        kfree(m->hw_handler_name);
 241        kfree(m->hw_handler_params);
 242        mutex_destroy(&m->work_mutex);
 243        kfree(m);
 244}
 245
 246static struct dm_mpath_io *get_mpio(union map_info *info)
 247{
 248        return info->ptr;
 249}
 250
 251static size_t multipath_per_bio_data_size(void)
 252{
 253        return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details);
 254}
 255
 256static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
 257{
 258        return dm_per_bio_data(bio, multipath_per_bio_data_size());
 259}
 260
 261static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio)
 262{
 263        /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
 264        void *bio_details = mpio + 1;
 265        return bio_details;
 266}
 267
 268static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
 269{
 270        struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
 271        struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
 272
 273        mpio->nr_bytes = bio->bi_iter.bi_size;
 274        mpio->pgpath = NULL;
 275        *mpio_p = mpio;
 276
 277        dm_bio_record(bio_details, bio);
 278}
 279
 280/*-----------------------------------------------
 281 * Path selection
 282 *-----------------------------------------------*/
 283
 284static int __pg_init_all_paths(struct multipath *m)
 285{
 286        struct pgpath *pgpath;
 287        unsigned long pg_init_delay = 0;
 288
 289        lockdep_assert_held(&m->lock);
 290
 291        if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
 292                return 0;
 293
 294        atomic_inc(&m->pg_init_count);
 295        clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
 296
 297        /* Check here to reset pg_init_required */
 298        if (!m->current_pg)
 299                return 0;
 300
 301        if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags))
 302                pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
 303                                                 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
 304        list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
 305                /* Skip failed paths */
 306                if (!pgpath->is_active)
 307                        continue;
 308                if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
 309                                       pg_init_delay))
 310                        atomic_inc(&m->pg_init_in_progress);
 311        }
 312        return atomic_read(&m->pg_init_in_progress);
 313}
 314
 315static int pg_init_all_paths(struct multipath *m)
 316{
 317        int ret;
 318        unsigned long flags;
 319
 320        spin_lock_irqsave(&m->lock, flags);
 321        ret = __pg_init_all_paths(m);
 322        spin_unlock_irqrestore(&m->lock, flags);
 323
 324        return ret;
 325}
 326
 327static void __switch_pg(struct multipath *m, struct priority_group *pg)
 328{
 329        m->current_pg = pg;
 330
 331        /* Must we initialise the PG first, and queue I/O till it's ready? */
 332        if (m->hw_handler_name) {
 333                set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
 334                set_bit(MPATHF_QUEUE_IO, &m->flags);
 335        } else {
 336                clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
 337                clear_bit(MPATHF_QUEUE_IO, &m->flags);
 338        }
 339
 340        atomic_set(&m->pg_init_count, 0);
 341}
 342
 343static struct pgpath *choose_path_in_pg(struct multipath *m,
 344                                        struct priority_group *pg,
 345                                        size_t nr_bytes)
 346{
 347        unsigned long flags;
 348        struct dm_path *path;
 349        struct pgpath *pgpath;
 350
 351        path = pg->ps.type->select_path(&pg->ps, nr_bytes);
 352        if (!path)
 353                return ERR_PTR(-ENXIO);
 354
 355        pgpath = path_to_pgpath(path);
 356
 357        if (unlikely(READ_ONCE(m->current_pg) != pg)) {
 358                /* Only update current_pgpath if pg changed */
 359                spin_lock_irqsave(&m->lock, flags);
 360                m->current_pgpath = pgpath;
 361                __switch_pg(m, pg);
 362                spin_unlock_irqrestore(&m->lock, flags);
 363        }
 364
 365        return pgpath;
 366}
 367
 368static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
 369{
 370        unsigned long flags;
 371        struct priority_group *pg;
 372        struct pgpath *pgpath;
 373        unsigned bypassed = 1;
 374
 375        if (!atomic_read(&m->nr_valid_paths)) {
 376                clear_bit(MPATHF_QUEUE_IO, &m->flags);
 377                goto failed;
 378        }
 379
 380        /* Were we instructed to switch PG? */
 381        if (READ_ONCE(m->next_pg)) {
 382                spin_lock_irqsave(&m->lock, flags);
 383                pg = m->next_pg;
 384                if (!pg) {
 385                        spin_unlock_irqrestore(&m->lock, flags);
 386                        goto check_current_pg;
 387                }
 388                m->next_pg = NULL;
 389                spin_unlock_irqrestore(&m->lock, flags);
 390                pgpath = choose_path_in_pg(m, pg, nr_bytes);
 391                if (!IS_ERR_OR_NULL(pgpath))
 392                        return pgpath;
 393        }
 394
 395        /* Don't change PG until it has no remaining paths */
 396check_current_pg:
 397        pg = READ_ONCE(m->current_pg);
 398        if (pg) {
 399                pgpath = choose_path_in_pg(m, pg, nr_bytes);
 400                if (!IS_ERR_OR_NULL(pgpath))
 401                        return pgpath;
 402        }
 403
 404        /*
 405         * Loop through priority groups until we find a valid path.
 406         * First time we skip PGs marked 'bypassed'.
 407         * Second time we only try the ones we skipped, but set
 408         * pg_init_delay_retry so we do not hammer controllers.
 409         */
 410        do {
 411                list_for_each_entry(pg, &m->priority_groups, list) {
 412                        if (pg->bypassed == !!bypassed)
 413                                continue;
 414                        pgpath = choose_path_in_pg(m, pg, nr_bytes);
 415                        if (!IS_ERR_OR_NULL(pgpath)) {
 416                                if (!bypassed)
 417                                        set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
 418                                return pgpath;
 419                        }
 420                }
 421        } while (bypassed--);
 422
 423failed:
 424        spin_lock_irqsave(&m->lock, flags);
 425        m->current_pgpath = NULL;
 426        m->current_pg = NULL;
 427        spin_unlock_irqrestore(&m->lock, flags);
 428
 429        return NULL;
 430}
 431
 432/*
 433 * dm_report_EIO() is a macro instead of a function to make pr_debug()
 434 * report the function name and line number of the function from which
 435 * it has been invoked.
 436 */
 437#define dm_report_EIO(m)                                                \
 438do {                                                                    \
 439        struct mapped_device *md = dm_table_get_md((m)->ti->table);     \
 440                                                                        \
 441        pr_debug("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d\n", \
 442                 dm_device_name(md),                                    \
 443                 test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags),        \
 444                 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags),  \
 445                 dm_noflush_suspending((m)->ti));                       \
 446} while (0)
 447
 448/*
 449 * Check whether bios must be queued in the device-mapper core rather
 450 * than here in the target.
 451 *
 452 * If MPATHF_QUEUE_IF_NO_PATH and MPATHF_SAVED_QUEUE_IF_NO_PATH hold
 453 * the same value then we are not between multipath_presuspend()
 454 * and multipath_resume() calls and we have no need to check
 455 * for the DMF_NOFLUSH_SUSPENDING flag.
 456 */
 457static bool __must_push_back(struct multipath *m, unsigned long flags)
 458{
 459        return ((test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) !=
 460                 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &flags)) &&
 461                dm_noflush_suspending(m->ti));
 462}
 463
 464/*
 465 * Following functions use READ_ONCE to get atomic access to
 466 * all m->flags to avoid taking spinlock
 467 */
 468static bool must_push_back_rq(struct multipath *m)
 469{
 470        unsigned long flags = READ_ONCE(m->flags);
 471        return test_bit(MPATHF_QUEUE_IF_NO_PATH, &flags) || __must_push_back(m, flags);
 472}
 473
 474static bool must_push_back_bio(struct multipath *m)
 475{
 476        unsigned long flags = READ_ONCE(m->flags);
 477        return __must_push_back(m, flags);
 478}
 479
 480/*
 481 * Map cloned requests (request-based multipath)
 482 */
 483static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 484                                   union map_info *map_context,
 485                                   struct request **__clone)
 486{
 487        struct multipath *m = ti->private;
 488        size_t nr_bytes = blk_rq_bytes(rq);
 489        struct pgpath *pgpath;
 490        struct block_device *bdev;
 491        struct dm_mpath_io *mpio = get_mpio(map_context);
 492        struct request_queue *q;
 493        struct request *clone;
 494
 495        /* Do we need to select a new pgpath? */
 496        pgpath = READ_ONCE(m->current_pgpath);
 497        if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags))
 498                pgpath = choose_pgpath(m, nr_bytes);
 499
 500        if (!pgpath) {
 501                if (must_push_back_rq(m))
 502                        return DM_MAPIO_DELAY_REQUEUE;
 503                dm_report_EIO(m);       /* Failed */
 504                return DM_MAPIO_KILL;
 505        } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
 506                   test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
 507                pg_init_all_paths(m);
 508                return DM_MAPIO_DELAY_REQUEUE;
 509        }
 510
 511        mpio->pgpath = pgpath;
 512        mpio->nr_bytes = nr_bytes;
 513
 514        bdev = pgpath->path.dev->bdev;
 515        q = bdev_get_queue(bdev);
 516        clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
 517                        BLK_MQ_REQ_NOWAIT);
 518        if (IS_ERR(clone)) {
 519                /* EBUSY, ENODEV or EWOULDBLOCK: requeue */
 520                if (blk_queue_dying(q)) {
 521                        atomic_inc(&m->pg_init_in_progress);
 522                        activate_or_offline_path(pgpath);
 523                        return DM_MAPIO_DELAY_REQUEUE;
 524                }
 525
 526                /*
 527                 * blk-mq's SCHED_RESTART can cover this requeue, so we
 528                 * needn't deal with it by DELAY_REQUEUE. More importantly,
 529                 * we have to return DM_MAPIO_REQUEUE so that blk-mq can
 530                 * get the queue busy feedback (via BLK_STS_RESOURCE),
 531                 * otherwise I/O merging can suffer.
 532                 */
 533                return DM_MAPIO_REQUEUE;
 534        }
 535        clone->bio = clone->biotail = NULL;
 536        clone->rq_disk = bdev->bd_disk;
 537        clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
 538        *__clone = clone;
 539
 540        if (pgpath->pg->ps.type->start_io)
 541                pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
 542                                              &pgpath->path,
 543                                              nr_bytes);
 544        return DM_MAPIO_REMAPPED;
 545}
 546
 547static void multipath_release_clone(struct request *clone,
 548                                    union map_info *map_context)
 549{
 550        if (unlikely(map_context)) {
 551                /*
 552                 * non-NULL map_context means caller is still map
 553                 * method; must undo multipath_clone_and_map()
 554                 */
 555                struct dm_mpath_io *mpio = get_mpio(map_context);
 556                struct pgpath *pgpath = mpio->pgpath;
 557
 558                if (pgpath && pgpath->pg->ps.type->end_io)
 559                        pgpath->pg->ps.type->end_io(&pgpath->pg->ps,
 560                                                    &pgpath->path,
 561                                                    mpio->nr_bytes);
 562        }
 563
 564        blk_put_request(clone);
 565}
 566
 567/*
 568 * Map cloned bios (bio-based multipath)
 569 */
 570
 571static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
 572{
 573        struct pgpath *pgpath;
 574        unsigned long flags;
 575        bool queue_io;
 576
 577        /* Do we need to select a new pgpath? */
 578        pgpath = READ_ONCE(m->current_pgpath);
 579        queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
 580        if (!pgpath || !queue_io)
 581                pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
 582
 583        if ((pgpath && queue_io) ||
 584            (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
 585                /* Queue for the daemon to resubmit */
 586                spin_lock_irqsave(&m->lock, flags);
 587                bio_list_add(&m->queued_bios, bio);
 588                spin_unlock_irqrestore(&m->lock, flags);
 589
 590                /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
 591                if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
 592                        pg_init_all_paths(m);
 593                else if (!queue_io)
 594                        queue_work(kmultipathd, &m->process_queued_bios);
 595
 596                return ERR_PTR(-EAGAIN);
 597        }
 598
 599        return pgpath;
 600}
 601
 602static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
 603{
 604        struct pgpath *pgpath;
 605        unsigned long flags;
 606
 607        /* Do we need to select a new pgpath? */
 608        /*
 609         * FIXME: currently only switching path if no path (due to failure, etc)
 610         * - which negates the point of using a path selector
 611         */
 612        pgpath = READ_ONCE(m->current_pgpath);
 613        if (!pgpath)
 614                pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
 615
 616        if (!pgpath) {
 617                if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
 618                        /* Queue for the daemon to resubmit */
 619                        spin_lock_irqsave(&m->lock, flags);
 620                        bio_list_add(&m->queued_bios, bio);
 621                        spin_unlock_irqrestore(&m->lock, flags);
 622                        queue_work(kmultipathd, &m->process_queued_bios);
 623
 624                        return ERR_PTR(-EAGAIN);
 625                }
 626                return NULL;
 627        }
 628
 629        return pgpath;
 630}
 631
 632static int __multipath_map_bio(struct multipath *m, struct bio *bio,
 633                               struct dm_mpath_io *mpio)
 634{
 635        struct pgpath *pgpath;
 636
 637        if (!m->hw_handler_name)
 638                pgpath = __map_bio_fast(m, bio);
 639        else
 640                pgpath = __map_bio(m, bio);
 641
 642        if (IS_ERR(pgpath))
 643                return DM_MAPIO_SUBMITTED;
 644
 645        if (!pgpath) {
 646                if (must_push_back_bio(m))
 647                        return DM_MAPIO_REQUEUE;
 648                dm_report_EIO(m);
 649                return DM_MAPIO_KILL;
 650        }
 651
 652        mpio->pgpath = pgpath;
 653
 654        bio->bi_status = 0;
 655        bio_set_dev(bio, pgpath->path.dev->bdev);
 656        bio->bi_opf |= REQ_FAILFAST_TRANSPORT;
 657
 658        if (pgpath->pg->ps.type->start_io)
 659                pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
 660                                              &pgpath->path,
 661                                              mpio->nr_bytes);
 662        return DM_MAPIO_REMAPPED;
 663}
 664
 665static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
 666{
 667        struct multipath *m = ti->private;
 668        struct dm_mpath_io *mpio = NULL;
 669
 670        multipath_init_per_bio_data(bio, &mpio);
 671        return __multipath_map_bio(m, bio, mpio);
 672}
 673
 674static void process_queued_io_list(struct multipath *m)
 675{
 676        if (m->queue_mode == DM_TYPE_REQUEST_BASED)
 677                dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
 678        else if (m->queue_mode == DM_TYPE_BIO_BASED)
 679                queue_work(kmultipathd, &m->process_queued_bios);
 680}
 681
 682static void process_queued_bios(struct work_struct *work)
 683{
 684        int r;
 685        unsigned long flags;
 686        struct bio *bio;
 687        struct bio_list bios;
 688        struct blk_plug plug;
 689        struct multipath *m =
 690                container_of(work, struct multipath, process_queued_bios);
 691
 692        bio_list_init(&bios);
 693
 694        spin_lock_irqsave(&m->lock, flags);
 695
 696        if (bio_list_empty(&m->queued_bios)) {
 697                spin_unlock_irqrestore(&m->lock, flags);
 698                return;
 699        }
 700
 701        bio_list_merge(&bios, &m->queued_bios);
 702        bio_list_init(&m->queued_bios);
 703
 704        spin_unlock_irqrestore(&m->lock, flags);
 705
 706        blk_start_plug(&plug);
 707        while ((bio = bio_list_pop(&bios))) {
 708                struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
 709                dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
 710                r = __multipath_map_bio(m, bio, mpio);
 711                switch (r) {
 712                case DM_MAPIO_KILL:
 713                        bio->bi_status = BLK_STS_IOERR;
 714                        bio_endio(bio);
 715                        break;
 716                case DM_MAPIO_REQUEUE:
 717                        bio->bi_status = BLK_STS_DM_REQUEUE;
 718                        bio_endio(bio);
 719                        break;
 720                case DM_MAPIO_REMAPPED:
 721                        generic_make_request(bio);
 722                        break;
 723                case DM_MAPIO_SUBMITTED:
 724                        break;
 725                default:
 726                        WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r);
 727                }
 728        }
 729        blk_finish_plug(&plug);
 730}
 731
 732/*
 733 * If we run out of usable paths, should we queue I/O or error it?
 734 */
 735static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
 736                            bool save_old_value)
 737{
 738        unsigned long flags;
 739
 740        spin_lock_irqsave(&m->lock, flags);
 741        assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags,
 742                   (save_old_value && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) ||
 743                   (!save_old_value && queue_if_no_path));
 744        assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path);
 745        spin_unlock_irqrestore(&m->lock, flags);
 746
 747        if (!queue_if_no_path) {
 748                dm_table_run_md_queue_async(m->ti->table);
 749                process_queued_io_list(m);
 750        }
 751
 752        return 0;
 753}
 754
 755/*
 756 * An event is triggered whenever a path is taken out of use.
 757 * Includes path failure and PG bypass.
 758 */
 759static void trigger_event(struct work_struct *work)
 760{
 761        struct multipath *m =
 762                container_of(work, struct multipath, trigger_event);
 763
 764        dm_table_event(m->ti->table);
 765}
 766
 767/*-----------------------------------------------------------------
 768 * Constructor/argument parsing:
 769 * <#multipath feature args> [<arg>]*
 770 * <#hw_handler args> [hw_handler [<arg>]*]
 771 * <#priority groups>
 772 * <initial priority group>
 773 *     [<selector> <#selector args> [<arg>]*
 774 *      <#paths> <#per-path selector args>
 775 *         [<path> [<arg>]* ]+ ]+
 776 *---------------------------------------------------------------*/
 777static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
 778                               struct dm_target *ti)
 779{
 780        int r;
 781        struct path_selector_type *pst;
 782        unsigned ps_argc;
 783
 784        static const struct dm_arg _args[] = {
 785                {0, 1024, "invalid number of path selector args"},
 786        };
 787
 788        pst = dm_get_path_selector(dm_shift_arg(as));
 789        if (!pst) {
 790                ti->error = "unknown path selector type";
 791                return -EINVAL;
 792        }
 793
 794        r = dm_read_arg_group(_args, as, &ps_argc, &ti->error);
 795        if (r) {
 796                dm_put_path_selector(pst);
 797                return -EINVAL;
 798        }
 799
 800        r = pst->create(&pg->ps, ps_argc, as->argv);
 801        if (r) {
 802                dm_put_path_selector(pst);
 803                ti->error = "path selector constructor failed";
 804                return r;
 805        }
 806
 807        pg->ps.type = pst;
 808        dm_consume_args(as, ps_argc);
 809
 810        return 0;
 811}
 812
 813static int setup_scsi_dh(struct block_device *bdev, struct multipath *m,
 814                         const char **attached_handler_name, char **error)
 815{
 816        struct request_queue *q = bdev_get_queue(bdev);
 817        int r;
 818
 819        if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
 820retain:
 821                if (*attached_handler_name) {
 822                        /*
 823                         * Clear any hw_handler_params associated with a
 824                         * handler that isn't already attached.
 825                         */
 826                        if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) {
 827                                kfree(m->hw_handler_params);
 828                                m->hw_handler_params = NULL;
 829                        }
 830
 831                        /*
 832                         * Reset hw_handler_name to match the attached handler
 833                         *
 834                         * NB. This modifies the table line to show the actual
 835                         * handler instead of the original table passed in.
 836                         */
 837                        kfree(m->hw_handler_name);
 838                        m->hw_handler_name = *attached_handler_name;
 839                        *attached_handler_name = NULL;
 840                }
 841        }
 842
 843        if (m->hw_handler_name) {
 844                r = scsi_dh_attach(q, m->hw_handler_name);
 845                if (r == -EBUSY) {
 846                        char b[BDEVNAME_SIZE];
 847
 848                        printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
 849                               bdevname(bdev, b));
 850                        goto retain;
 851                }
 852                if (r < 0) {
 853                        *error = "error attaching hardware handler";
 854                        return r;
 855                }
 856
 857                if (m->hw_handler_params) {
 858                        r = scsi_dh_set_params(q, m->hw_handler_params);
 859                        if (r < 0) {
 860                                *error = "unable to set hardware handler parameters";
 861                                return r;
 862                        }
 863                }
 864        }
 865
 866        return 0;
 867}
 868
 869static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
 870                                 struct dm_target *ti)
 871{
 872        int r;
 873        struct pgpath *p;
 874        struct multipath *m = ti->private;
 875        struct request_queue *q;
 876        const char *attached_handler_name = NULL;
 877
 878        /* we need at least a path arg */
 879        if (as->argc < 1) {
 880                ti->error = "no device given";
 881                return ERR_PTR(-EINVAL);
 882        }
 883
 884        p = alloc_pgpath();
 885        if (!p)
 886                return ERR_PTR(-ENOMEM);
 887
 888        r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
 889                          &p->path.dev);
 890        if (r) {
 891                ti->error = "error getting device";
 892                goto bad;
 893        }
 894
 895        q = bdev_get_queue(p->path.dev->bdev);
 896        attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL);
 897        if (attached_handler_name || m->hw_handler_name) {
 898                INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
 899                r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error);
 900                kfree(attached_handler_name);
 901                if (r) {
 902                        dm_put_device(ti, p->path.dev);
 903                        goto bad;
 904                }
 905        }
 906
 907        r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
 908        if (r) {
 909                dm_put_device(ti, p->path.dev);
 910                goto bad;
 911        }
 912
 913        return p;
 914 bad:
 915        free_pgpath(p);
 916        return ERR_PTR(r);
 917}
 918
 919static struct priority_group *parse_priority_group(struct dm_arg_set *as,
 920                                                   struct multipath *m)
 921{
 922        static const struct dm_arg _args[] = {
 923                {1, 1024, "invalid number of paths"},
 924                {0, 1024, "invalid number of selector args"}
 925        };
 926
 927        int r;
 928        unsigned i, nr_selector_args, nr_args;
 929        struct priority_group *pg;
 930        struct dm_target *ti = m->ti;
 931
 932        if (as->argc < 2) {
 933                as->argc = 0;
 934                ti->error = "not enough priority group arguments";
 935                return ERR_PTR(-EINVAL);
 936        }
 937
 938        pg = alloc_priority_group();
 939        if (!pg) {
 940                ti->error = "couldn't allocate priority group";
 941                return ERR_PTR(-ENOMEM);
 942        }
 943        pg->m = m;
 944
 945        r = parse_path_selector(as, pg, ti);
 946        if (r)
 947                goto bad;
 948
 949        /*
 950         * read the paths
 951         */
 952        r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error);
 953        if (r)
 954                goto bad;
 955
 956        r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error);
 957        if (r)
 958                goto bad;
 959
 960        nr_args = 1 + nr_selector_args;
 961        for (i = 0; i < pg->nr_pgpaths; i++) {
 962                struct pgpath *pgpath;
 963                struct dm_arg_set path_args;
 964
 965                if (as->argc < nr_args) {
 966                        ti->error = "not enough path parameters";
 967                        r = -EINVAL;
 968                        goto bad;
 969                }
 970
 971                path_args.argc = nr_args;
 972                path_args.argv = as->argv;
 973
 974                pgpath = parse_path(&path_args, &pg->ps, ti);
 975                if (IS_ERR(pgpath)) {
 976                        r = PTR_ERR(pgpath);
 977                        goto bad;
 978                }
 979
 980                pgpath->pg = pg;
 981                list_add_tail(&pgpath->list, &pg->pgpaths);
 982                dm_consume_args(as, nr_args);
 983        }
 984
 985        return pg;
 986
 987 bad:
 988        free_priority_group(pg, ti);
 989        return ERR_PTR(r);
 990}
 991
 992static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
 993{
 994        unsigned hw_argc;
 995        int ret;
 996        struct dm_target *ti = m->ti;
 997
 998        static const struct dm_arg _args[] = {
 999                {0, 1024, "invalid number of hardware handler args"},
1000        };
1001
1002        if (dm_read_arg_group(_args, as, &hw_argc, &ti->error))
1003                return -EINVAL;
1004
1005        if (!hw_argc)
1006                return 0;
1007
1008        if (m->queue_mode == DM_TYPE_BIO_BASED) {
1009                dm_consume_args(as, hw_argc);
1010                DMERR("bio-based multipath doesn't allow hardware handler args");
1011                return 0;
1012        }
1013
1014        m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL);
1015        if (!m->hw_handler_name)
1016                return -EINVAL;
1017
1018        if (hw_argc > 1) {
1019                char *p;
1020                int i, j, len = 4;
1021
1022                for (i = 0; i <= hw_argc - 2; i++)
1023                        len += strlen(as->argv[i]) + 1;
1024                p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
1025                if (!p) {
1026                        ti->error = "memory allocation failed";
1027                        ret = -ENOMEM;
1028                        goto fail;
1029                }
1030                j = sprintf(p, "%d", hw_argc - 1);
1031                for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
1032                        j = sprintf(p, "%s", as->argv[i]);
1033        }
1034        dm_consume_args(as, hw_argc - 1);
1035
1036        return 0;
1037fail:
1038        kfree(m->hw_handler_name);
1039        m->hw_handler_name = NULL;
1040        return ret;
1041}
1042
1043static int parse_features(struct dm_arg_set *as, struct multipath *m)
1044{
1045        int r;
1046        unsigned argc;
1047        struct dm_target *ti = m->ti;
1048        const char *arg_name;
1049
1050        static const struct dm_arg _args[] = {
1051                {0, 8, "invalid number of feature args"},
1052                {1, 50, "pg_init_retries must be between 1 and 50"},
1053                {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
1054        };
1055
1056        r = dm_read_arg_group(_args, as, &argc, &ti->error);
1057        if (r)
1058                return -EINVAL;
1059
1060        if (!argc)
1061                return 0;
1062
1063        do {
1064                arg_name = dm_shift_arg(as);
1065                argc--;
1066
1067                if (!strcasecmp(arg_name, "queue_if_no_path")) {
1068                        r = queue_if_no_path(m, true, false);
1069                        continue;
1070                }
1071
1072                if (!strcasecmp(arg_name, "retain_attached_hw_handler")) {
1073                        set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
1074                        continue;
1075                }
1076
1077                if (!strcasecmp(arg_name, "pg_init_retries") &&
1078                    (argc >= 1)) {
1079                        r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error);
1080                        argc--;
1081                        continue;
1082                }
1083
1084                if (!strcasecmp(arg_name, "pg_init_delay_msecs") &&
1085                    (argc >= 1)) {
1086                        r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error);
1087                        argc--;
1088                        continue;
1089                }
1090
1091                if (!strcasecmp(arg_name, "queue_mode") &&
1092                    (argc >= 1)) {
1093                        const char *queue_mode_name = dm_shift_arg(as);
1094
1095                        if (!strcasecmp(queue_mode_name, "bio"))
1096                                m->queue_mode = DM_TYPE_BIO_BASED;
1097                        else if (!strcasecmp(queue_mode_name, "rq") ||
1098                                 !strcasecmp(queue_mode_name, "mq"))
1099                                m->queue_mode = DM_TYPE_REQUEST_BASED;
1100                        else {
1101                                ti->error = "Unknown 'queue_mode' requested";
1102                                r = -EINVAL;
1103                        }
1104                        argc--;
1105                        continue;
1106                }
1107
1108                ti->error = "Unrecognised multipath feature request";
1109                r = -EINVAL;
1110        } while (argc && !r);
1111
1112        return r;
1113}
1114
1115static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
1116{
1117        /* target arguments */
1118        static const struct dm_arg _args[] = {
1119                {0, 1024, "invalid number of priority groups"},
1120                {0, 1024, "invalid initial priority group number"},
1121        };
1122
1123        int r;
1124        struct multipath *m;
1125        struct dm_arg_set as;
1126        unsigned pg_count = 0;
1127        unsigned next_pg_num;
1128
1129        as.argc = argc;
1130        as.argv = argv;
1131
1132        m = alloc_multipath(ti);
1133        if (!m) {
1134                ti->error = "can't allocate multipath";
1135                return -EINVAL;
1136        }
1137
1138        r = parse_features(&as, m);
1139        if (r)
1140                goto bad;
1141
1142        r = alloc_multipath_stage2(ti, m);
1143        if (r)
1144                goto bad;
1145
1146        r = parse_hw_handler(&as, m);
1147        if (r)
1148                goto bad;
1149
1150        r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error);
1151        if (r)
1152                goto bad;
1153
1154        r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error);
1155        if (r)
1156                goto bad;
1157
1158        if ((!m->nr_priority_groups && next_pg_num) ||
1159            (m->nr_priority_groups && !next_pg_num)) {
1160                ti->error = "invalid initial priority group";
1161                r = -EINVAL;
1162                goto bad;
1163        }
1164
1165        /* parse the priority groups */
1166        while (as.argc) {
1167                struct priority_group *pg;
1168                unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths);
1169
1170                pg = parse_priority_group(&as, m);
1171                if (IS_ERR(pg)) {
1172                        r = PTR_ERR(pg);
1173                        goto bad;
1174                }
1175
1176                nr_valid_paths += pg->nr_pgpaths;
1177                atomic_set(&m->nr_valid_paths, nr_valid_paths);
1178
1179                list_add_tail(&pg->list, &m->priority_groups);
1180                pg_count++;
1181                pg->pg_num = pg_count;
1182                if (!--next_pg_num)
1183                        m->next_pg = pg;
1184        }
1185
1186        if (pg_count != m->nr_priority_groups) {
1187                ti->error = "priority group count mismatch";
1188                r = -EINVAL;
1189                goto bad;
1190        }
1191
1192        ti->num_flush_bios = 1;
1193        ti->num_discard_bios = 1;
1194        ti->num_write_same_bios = 1;
1195        ti->num_write_zeroes_bios = 1;
1196        if (m->queue_mode == DM_TYPE_BIO_BASED)
1197                ti->per_io_data_size = multipath_per_bio_data_size();
1198        else
1199                ti->per_io_data_size = sizeof(struct dm_mpath_io);
1200
1201        return 0;
1202
1203 bad:
1204        free_multipath(m);
1205        return r;
1206}
1207
1208static void multipath_wait_for_pg_init_completion(struct multipath *m)
1209{
1210        DEFINE_WAIT(wait);
1211
1212        while (1) {
1213                prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE);
1214
1215                if (!atomic_read(&m->pg_init_in_progress))
1216                        break;
1217
1218                io_schedule();
1219        }
1220        finish_wait(&m->pg_init_wait, &wait);
1221}
1222
1223static void flush_multipath_work(struct multipath *m)
1224{
1225        if (m->hw_handler_name) {
1226                set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1227                smp_mb__after_atomic();
1228
1229                if (atomic_read(&m->pg_init_in_progress))
1230                        flush_workqueue(kmpath_handlerd);
1231                multipath_wait_for_pg_init_completion(m);
1232
1233                clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
1234                smp_mb__after_atomic();
1235        }
1236
1237        if (m->queue_mode == DM_TYPE_BIO_BASED)
1238                flush_work(&m->process_queued_bios);
1239        flush_work(&m->trigger_event);
1240}
1241
1242static void multipath_dtr(struct dm_target *ti)
1243{
1244        struct multipath *m = ti->private;
1245
1246        flush_multipath_work(m);
1247        free_multipath(m);
1248}
1249
1250/*
1251 * Take a path out of use.
1252 */
1253static int fail_path(struct pgpath *pgpath)
1254{
1255        unsigned long flags;
1256        struct multipath *m = pgpath->pg->m;
1257
1258        spin_lock_irqsave(&m->lock, flags);
1259
1260        if (!pgpath->is_active)
1261                goto out;
1262
1263        DMWARN("Failing path %s.", pgpath->path.dev->name);
1264
1265        pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
1266        pgpath->is_active = false;
1267        pgpath->fail_count++;
1268
1269        atomic_dec(&m->nr_valid_paths);
1270
1271        if (pgpath == m->current_pgpath)
1272                m->current_pgpath = NULL;
1273
1274        dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
1275                       pgpath->path.dev->name, atomic_read(&m->nr_valid_paths));
1276
1277        schedule_work(&m->trigger_event);
1278
1279out:
1280        spin_unlock_irqrestore(&m->lock, flags);
1281
1282        return 0;
1283}
1284
1285/*
1286 * Reinstate a previously-failed path
1287 */
1288static int reinstate_path(struct pgpath *pgpath)
1289{
1290        int r = 0, run_queue = 0;
1291        unsigned long flags;
1292        struct multipath *m = pgpath->pg->m;
1293        unsigned nr_valid_paths;
1294
1295        spin_lock_irqsave(&m->lock, flags);
1296
1297        if (pgpath->is_active)
1298                goto out;
1299
1300        DMWARN("Reinstating path %s.", pgpath->path.dev->name);
1301
1302        r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1303        if (r)
1304                goto out;
1305
1306        pgpath->is_active = true;
1307
1308        nr_valid_paths = atomic_inc_return(&m->nr_valid_paths);
1309        if (nr_valid_paths == 1) {
1310                m->current_pgpath = NULL;
1311                run_queue = 1;
1312        } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
1313                if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
1314                        atomic_inc(&m->pg_init_in_progress);
1315        }
1316
1317        dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1318                       pgpath->path.dev->name, nr_valid_paths);
1319
1320        schedule_work(&m->trigger_event);
1321
1322out:
1323        spin_unlock_irqrestore(&m->lock, flags);
1324        if (run_queue) {
1325                dm_table_run_md_queue_async(m->ti->table);
1326                process_queued_io_list(m);
1327        }
1328
1329        return r;
1330}
1331
1332/*
1333 * Fail or reinstate all paths that match the provided struct dm_dev.
1334 */
1335static int action_dev(struct multipath *m, struct dm_dev *dev,
1336                      action_fn action)
1337{
1338        int r = -EINVAL;
1339        struct pgpath *pgpath;
1340        struct priority_group *pg;
1341
1342        list_for_each_entry(pg, &m->priority_groups, list) {
1343                list_for_each_entry(pgpath, &pg->pgpaths, list) {
1344                        if (pgpath->path.dev == dev)
1345                                r = action(pgpath);
1346                }
1347        }
1348
1349        return r;
1350}
1351
1352/*
1353 * Temporarily try to avoid having to use the specified PG
1354 */
1355static void bypass_pg(struct multipath *m, struct priority_group *pg,
1356                      bool bypassed)
1357{
1358        unsigned long flags;
1359
1360        spin_lock_irqsave(&m->lock, flags);
1361
1362        pg->bypassed = bypassed;
1363        m->current_pgpath = NULL;
1364        m->current_pg = NULL;
1365
1366        spin_unlock_irqrestore(&m->lock, flags);
1367
1368        schedule_work(&m->trigger_event);
1369}
1370
1371/*
1372 * Switch to using the specified PG from the next I/O that gets mapped
1373 */
1374static int switch_pg_num(struct multipath *m, const char *pgstr)
1375{
1376        struct priority_group *pg;
1377        unsigned pgnum;
1378        unsigned long flags;
1379        char dummy;
1380
1381        if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
1382            !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
1383                DMWARN("invalid PG number supplied to switch_pg_num");
1384                return -EINVAL;
1385        }
1386
1387        spin_lock_irqsave(&m->lock, flags);
1388        list_for_each_entry(pg, &m->priority_groups, list) {
1389                pg->bypassed = false;
1390                if (--pgnum)
1391                        continue;
1392
1393                m->current_pgpath = NULL;
1394                m->current_pg = NULL;
1395                m->next_pg = pg;
1396        }
1397        spin_unlock_irqrestore(&m->lock, flags);
1398
1399        schedule_work(&m->trigger_event);
1400        return 0;
1401}
1402
1403/*
1404 * Set/clear bypassed status of a PG.
1405 * PGs are numbered upwards from 1 in the order they were declared.
1406 */
1407static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
1408{
1409        struct priority_group *pg;
1410        unsigned pgnum;
1411        char dummy;
1412
1413        if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
1414            !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) {
1415                DMWARN("invalid PG number supplied to bypass_pg");
1416                return -EINVAL;
1417        }
1418
1419        list_for_each_entry(pg, &m->priority_groups, list) {
1420                if (!--pgnum)
1421                        break;
1422        }
1423
1424        bypass_pg(m, pg, bypassed);
1425        return 0;
1426}
1427
1428/*
1429 * Should we retry pg_init immediately?
1430 */
1431static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1432{
1433        unsigned long flags;
1434        bool limit_reached = false;
1435
1436        spin_lock_irqsave(&m->lock, flags);
1437
1438        if (atomic_read(&m->pg_init_count) <= m->pg_init_retries &&
1439            !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags))
1440                set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
1441        else
1442                limit_reached = true;
1443
1444        spin_unlock_irqrestore(&m->lock, flags);
1445
1446        return limit_reached;
1447}
1448
1449static void pg_init_done(void *data, int errors)
1450{
1451        struct pgpath *pgpath = data;
1452        struct priority_group *pg = pgpath->pg;
1453        struct multipath *m = pg->m;
1454        unsigned long flags;
1455        bool delay_retry = false;
1456
1457        /* device or driver problems */
1458        switch (errors) {
1459        case SCSI_DH_OK:
1460                break;
1461        case SCSI_DH_NOSYS:
1462                if (!m->hw_handler_name) {
1463                        errors = 0;
1464                        break;
1465                }
1466                DMERR("Could not failover the device: Handler scsi_dh_%s "
1467                      "Error %d.", m->hw_handler_name, errors);
1468                /*
1469                 * Fail path for now, so we do not ping pong
1470                 */
1471                fail_path(pgpath);
1472                break;
1473        case SCSI_DH_DEV_TEMP_BUSY:
1474                /*
1475                 * Probably doing something like FW upgrade on the
1476                 * controller so try the other pg.
1477                 */
1478                bypass_pg(m, pg, true);
1479                break;
1480        case SCSI_DH_RETRY:
1481                /* Wait before retrying. */
1482                delay_retry = 1;
1483                /* fall through */
1484        case SCSI_DH_IMM_RETRY:
1485        case SCSI_DH_RES_TEMP_UNAVAIL:
1486                if (pg_init_limit_reached(m, pgpath))
1487                        fail_path(pgpath);
1488                errors = 0;
1489                break;
1490        case SCSI_DH_DEV_OFFLINED:
1491        default:
1492                /*
1493                 * We probably do not want to fail the path for a device
1494                 * error, but this is what the old dm did. In future
1495                 * patches we can do more advanced handling.
1496                 */
1497                fail_path(pgpath);
1498        }
1499
1500        spin_lock_irqsave(&m->lock, flags);
1501        if (errors) {
1502                if (pgpath == m->current_pgpath) {
1503                        DMERR("Could not failover device. Error %d.", errors);
1504                        m->current_pgpath = NULL;
1505                        m->current_pg = NULL;
1506                }
1507        } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
1508                pg->bypassed = false;
1509
1510        if (atomic_dec_return(&m->pg_init_in_progress) > 0)
1511                /* Activations of other paths are still on going */
1512                goto out;
1513
1514        if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
1515                if (delay_retry)
1516                        set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1517                else
1518                        clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags);
1519
1520                if (__pg_init_all_paths(m))
1521                        goto out;
1522        }
1523        clear_bit(MPATHF_QUEUE_IO, &m->flags);
1524
1525        process_queued_io_list(m);
1526
1527        /*
1528         * Wake up any thread waiting to suspend.
1529         */
1530        wake_up(&m->pg_init_wait);
1531
1532out:
1533        spin_unlock_irqrestore(&m->lock, flags);
1534}
1535
1536static void activate_or_offline_path(struct pgpath *pgpath)
1537{
1538        struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1539
1540        if (pgpath->is_active && !blk_queue_dying(q))
1541                scsi_dh_activate(q, pg_init_done, pgpath);
1542        else
1543                pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
1544}
1545
1546static void activate_path_work(struct work_struct *work)
1547{
1548        struct pgpath *pgpath =
1549                container_of(work, struct pgpath, activate_path.work);
1550
1551        activate_or_offline_path(pgpath);
1552}
1553
1554static int multipath_end_io(struct dm_target *ti, struct request *clone,
1555                            blk_status_t error, union map_info *map_context)
1556{
1557        struct dm_mpath_io *mpio = get_mpio(map_context);
1558        struct pgpath *pgpath = mpio->pgpath;
1559        int r = DM_ENDIO_DONE;
1560
1561        /*
1562         * We don't queue any clone request inside the multipath target
1563         * during end I/O handling, since those clone requests don't have
1564         * bio clones.  If we queue them inside the multipath target,
1565         * we need to make bio clones, that requires memory allocation.
1566         * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests
1567         *  don't have bio clones.)
1568         * Instead of queueing the clone request here, we queue the original
1569         * request into dm core, which will remake a clone request and
1570         * clone bios for it and resubmit it later.
1571         */
1572        if (error && blk_path_error(error)) {
1573                struct multipath *m = ti->private;
1574
1575                if (error == BLK_STS_RESOURCE)
1576                        r = DM_ENDIO_DELAY_REQUEUE;
1577                else
1578                        r = DM_ENDIO_REQUEUE;
1579
1580                if (pgpath)
1581                        fail_path(pgpath);
1582
1583                if (atomic_read(&m->nr_valid_paths) == 0 &&
1584                    !must_push_back_rq(m)) {
1585                        if (error == BLK_STS_IOERR)
1586                                dm_report_EIO(m);
1587                        /* complete with the original error */
1588                        r = DM_ENDIO_DONE;
1589                }
1590        }
1591
1592        if (pgpath) {
1593                struct path_selector *ps = &pgpath->pg->ps;
1594
1595                if (ps->type->end_io)
1596                        ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1597        }
1598
1599        return r;
1600}
1601
1602static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
1603                                blk_status_t *error)
1604{
1605        struct multipath *m = ti->private;
1606        struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
1607        struct pgpath *pgpath = mpio->pgpath;
1608        unsigned long flags;
1609        int r = DM_ENDIO_DONE;
1610
1611        if (!*error || !blk_path_error(*error))
1612                goto done;
1613
1614        if (pgpath)
1615                fail_path(pgpath);
1616
1617        if (atomic_read(&m->nr_valid_paths) == 0 &&
1618            !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
1619                if (must_push_back_bio(m)) {
1620                        r = DM_ENDIO_REQUEUE;
1621                } else {
1622                        dm_report_EIO(m);
1623                        *error = BLK_STS_IOERR;
1624                }
1625                goto done;
1626        }
1627
1628        spin_lock_irqsave(&m->lock, flags);
1629        bio_list_add(&m->queued_bios, clone);
1630        spin_unlock_irqrestore(&m->lock, flags);
1631        if (!test_bit(MPATHF_QUEUE_IO, &m->flags))
1632                queue_work(kmultipathd, &m->process_queued_bios);
1633
1634        r = DM_ENDIO_INCOMPLETE;
1635done:
1636        if (pgpath) {
1637                struct path_selector *ps = &pgpath->pg->ps;
1638
1639                if (ps->type->end_io)
1640                        ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1641        }
1642
1643        return r;
1644}
1645
1646/*
1647 * Suspend can't complete until all the I/O is processed so if
1648 * the last path fails we must error any remaining I/O.
1649 * Note that if the freeze_bdev fails while suspending, the
1650 * queue_if_no_path state is lost - userspace should reset it.
1651 */
1652static void multipath_presuspend(struct dm_target *ti)
1653{
1654        struct multipath *m = ti->private;
1655
1656        queue_if_no_path(m, false, true);
1657}
1658
1659static void multipath_postsuspend(struct dm_target *ti)
1660{
1661        struct multipath *m = ti->private;
1662
1663        mutex_lock(&m->work_mutex);
1664        flush_multipath_work(m);
1665        mutex_unlock(&m->work_mutex);
1666}
1667
1668/*
1669 * Restore the queue_if_no_path setting.
1670 */
1671static void multipath_resume(struct dm_target *ti)
1672{
1673        struct multipath *m = ti->private;
1674        unsigned long flags;
1675
1676        spin_lock_irqsave(&m->lock, flags);
1677        assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags,
1678                   test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
1679        spin_unlock_irqrestore(&m->lock, flags);
1680}
1681
1682/*
1683 * Info output has the following format:
1684 * num_multipath_feature_args [multipath_feature_args]*
1685 * num_handler_status_args [handler_status_args]*
1686 * num_groups init_group_number
1687 *            [A|D|E num_ps_status_args [ps_status_args]*
1688 *             num_paths num_selector_args
1689 *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1690 *
1691 * Table output has the following format (identical to the constructor string):
1692 * num_feature_args [features_args]*
1693 * num_handler_args hw_handler [hw_handler_args]*
1694 * num_groups init_group_number
1695 *     [priority selector-name num_ps_args [ps_args]*
1696 *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1697 */
1698static void multipath_status(struct dm_target *ti, status_type_t type,
1699                             unsigned status_flags, char *result, unsigned maxlen)
1700{
1701        int sz = 0;
1702        unsigned long flags;
1703        struct multipath *m = ti->private;
1704        struct priority_group *pg;
1705        struct pgpath *p;
1706        unsigned pg_num;
1707        char state;
1708
1709        spin_lock_irqsave(&m->lock, flags);
1710
1711        /* Features */
1712        if (type == STATUSTYPE_INFO)
1713                DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags),
1714                       atomic_read(&m->pg_init_count));
1715        else {
1716                DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) +
1717                              (m->pg_init_retries > 0) * 2 +
1718                              (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 +
1719                              test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) +
1720                              (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2);
1721
1722                if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
1723                        DMEMIT("queue_if_no_path ");
1724                if (m->pg_init_retries)
1725                        DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1726                if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
1727                        DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
1728                if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags))
1729                        DMEMIT("retain_attached_hw_handler ");
1730                if (m->queue_mode != DM_TYPE_REQUEST_BASED) {
1731                        switch(m->queue_mode) {
1732                        case DM_TYPE_BIO_BASED:
1733                                DMEMIT("queue_mode bio ");
1734                                break;
1735                        default:
1736                                WARN_ON_ONCE(true);
1737                                break;
1738                        }
1739                }
1740        }
1741
1742        if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1743                DMEMIT("0 ");
1744        else
1745                DMEMIT("1 %s ", m->hw_handler_name);
1746
1747        DMEMIT("%u ", m->nr_priority_groups);
1748
1749        if (m->next_pg)
1750                pg_num = m->next_pg->pg_num;
1751        else if (m->current_pg)
1752                pg_num = m->current_pg->pg_num;
1753        else
1754                pg_num = (m->nr_priority_groups ? 1 : 0);
1755
1756        DMEMIT("%u ", pg_num);
1757
1758        switch (type) {
1759        case STATUSTYPE_INFO:
1760                list_for_each_entry(pg, &m->priority_groups, list) {
1761                        if (pg->bypassed)
1762                                state = 'D';    /* Disabled */
1763                        else if (pg == m->current_pg)
1764                                state = 'A';    /* Currently Active */
1765                        else
1766                                state = 'E';    /* Enabled */
1767
1768                        DMEMIT("%c ", state);
1769
1770                        if (pg->ps.type->status)
1771                                sz += pg->ps.type->status(&pg->ps, NULL, type,
1772                                                          result + sz,
1773                                                          maxlen - sz);
1774                        else
1775                                DMEMIT("0 ");
1776
1777                        DMEMIT("%u %u ", pg->nr_pgpaths,
1778                               pg->ps.type->info_args);
1779
1780                        list_for_each_entry(p, &pg->pgpaths, list) {
1781                                DMEMIT("%s %s %u ", p->path.dev->name,
1782                                       p->is_active ? "A" : "F",
1783                                       p->fail_count);
1784                                if (pg->ps.type->status)
1785                                        sz += pg->ps.type->status(&pg->ps,
1786                                              &p->path, type, result + sz,
1787                                              maxlen - sz);
1788                        }
1789                }
1790                break;
1791
1792        case STATUSTYPE_TABLE:
1793                list_for_each_entry(pg, &m->priority_groups, list) {
1794                        DMEMIT("%s ", pg->ps.type->name);
1795
1796                        if (pg->ps.type->status)
1797                                sz += pg->ps.type->status(&pg->ps, NULL, type,
1798                                                          result + sz,
1799                                                          maxlen - sz);
1800                        else
1801                                DMEMIT("0 ");
1802
1803                        DMEMIT("%u %u ", pg->nr_pgpaths,
1804                               pg->ps.type->table_args);
1805
1806                        list_for_each_entry(p, &pg->pgpaths, list) {
1807                                DMEMIT("%s ", p->path.dev->name);
1808                                if (pg->ps.type->status)
1809                                        sz += pg->ps.type->status(&pg->ps,
1810                                              &p->path, type, result + sz,
1811                                              maxlen - sz);
1812                        }
1813                }
1814                break;
1815        }
1816
1817        spin_unlock_irqrestore(&m->lock, flags);
1818}
1819
1820static int multipath_message(struct dm_target *ti, unsigned argc, char **argv,
1821                             char *result, unsigned maxlen)
1822{
1823        int r = -EINVAL;
1824        struct dm_dev *dev;
1825        struct multipath *m = ti->private;
1826        action_fn action;
1827
1828        mutex_lock(&m->work_mutex);
1829
1830        if (dm_suspended(ti)) {
1831                r = -EBUSY;
1832                goto out;
1833        }
1834
1835        if (argc == 1) {
1836                if (!strcasecmp(argv[0], "queue_if_no_path")) {
1837                        r = queue_if_no_path(m, true, false);
1838                        goto out;
1839                } else if (!strcasecmp(argv[0], "fail_if_no_path")) {
1840                        r = queue_if_no_path(m, false, false);
1841                        goto out;
1842                }
1843        }
1844
1845        if (argc != 2) {
1846                DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc);
1847                goto out;
1848        }
1849
1850        if (!strcasecmp(argv[0], "disable_group")) {
1851                r = bypass_pg_num(m, argv[1], true);
1852                goto out;
1853        } else if (!strcasecmp(argv[0], "enable_group")) {
1854                r = bypass_pg_num(m, argv[1], false);
1855                goto out;
1856        } else if (!strcasecmp(argv[0], "switch_group")) {
1857                r = switch_pg_num(m, argv[1]);
1858                goto out;
1859        } else if (!strcasecmp(argv[0], "reinstate_path"))
1860                action = reinstate_path;
1861        else if (!strcasecmp(argv[0], "fail_path"))
1862                action = fail_path;
1863        else {
1864                DMWARN("Unrecognised multipath message received: %s", argv[0]);
1865                goto out;
1866        }
1867
1868        r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
1869        if (r) {
1870                DMWARN("message: error getting device %s",
1871                       argv[1]);
1872                goto out;
1873        }
1874
1875        r = action_dev(m, dev, action);
1876
1877        dm_put_device(ti, dev);
1878
1879out:
1880        mutex_unlock(&m->work_mutex);
1881        return r;
1882}
1883
1884static int multipath_prepare_ioctl(struct dm_target *ti,
1885                                   struct block_device **bdev)
1886{
1887        struct multipath *m = ti->private;
1888        struct pgpath *current_pgpath;
1889        int r;
1890
1891        current_pgpath = READ_ONCE(m->current_pgpath);
1892        if (!current_pgpath)
1893                current_pgpath = choose_pgpath(m, 0);
1894
1895        if (current_pgpath) {
1896                if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) {
1897                        *bdev = current_pgpath->path.dev->bdev;
1898                        r = 0;
1899                } else {
1900                        /* pg_init has not started or completed */
1901                        r = -ENOTCONN;
1902                }
1903        } else {
1904                /* No path is available */
1905                if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
1906                        r = -ENOTCONN;
1907                else
1908                        r = -EIO;
1909        }
1910
1911        if (r == -ENOTCONN) {
1912                if (!READ_ONCE(m->current_pg)) {
1913                        /* Path status changed, redo selection */
1914                        (void) choose_pgpath(m, 0);
1915                }
1916                if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
1917                        pg_init_all_paths(m);
1918                dm_table_run_md_queue_async(m->ti->table);
1919                process_queued_io_list(m);
1920        }
1921
1922        /*
1923         * Only pass ioctls through if the device sizes match exactly.
1924         */
1925        if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT)
1926                return 1;
1927        return r;
1928}
1929
1930static int multipath_iterate_devices(struct dm_target *ti,
1931                                     iterate_devices_callout_fn fn, void *data)
1932{
1933        struct multipath *m = ti->private;
1934        struct priority_group *pg;
1935        struct pgpath *p;
1936        int ret = 0;
1937
1938        list_for_each_entry(pg, &m->priority_groups, list) {
1939                list_for_each_entry(p, &pg->pgpaths, list) {
1940                        ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1941                        if (ret)
1942                                goto out;
1943                }
1944        }
1945
1946out:
1947        return ret;
1948}
1949
1950static int pgpath_busy(struct pgpath *pgpath)
1951{
1952        struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1953
1954        return blk_lld_busy(q);
1955}
1956
1957/*
1958 * We return "busy", only when we can map I/Os but underlying devices
1959 * are busy (so even if we map I/Os now, the I/Os will wait on
1960 * the underlying queue).
1961 * In other words, if we want to kill I/Os or queue them inside us
1962 * due to map unavailability, we don't return "busy".  Otherwise,
1963 * dm core won't give us the I/Os and we can't do what we want.
1964 */
1965static int multipath_busy(struct dm_target *ti)
1966{
1967        bool busy = false, has_active = false;
1968        struct multipath *m = ti->private;
1969        struct priority_group *pg, *next_pg;
1970        struct pgpath *pgpath;
1971
1972        /* pg_init in progress */
1973        if (atomic_read(&m->pg_init_in_progress))
1974                return true;
1975
1976        /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
1977        if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
1978                return (m->queue_mode != DM_TYPE_REQUEST_BASED);
1979
1980        /* Guess which priority_group will be used at next mapping time */
1981        pg = READ_ONCE(m->current_pg);
1982        next_pg = READ_ONCE(m->next_pg);
1983        if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg))
1984                pg = next_pg;
1985
1986        if (!pg) {
1987                /*
1988                 * We don't know which pg will be used at next mapping time.
1989                 * We don't call choose_pgpath() here to avoid to trigger
1990                 * pg_init just by busy checking.
1991                 * So we don't know whether underlying devices we will be using
1992                 * at next mapping time are busy or not. Just try mapping.
1993                 */
1994                return busy;
1995        }
1996
1997        /*
1998         * If there is one non-busy active path at least, the path selector
1999         * will be able to select it. So we consider such a pg as not busy.
2000         */
2001        busy = true;
2002        list_for_each_entry(pgpath, &pg->pgpaths, list) {
2003                if (pgpath->is_active) {
2004                        has_active = true;
2005                        if (!pgpath_busy(pgpath)) {
2006                                busy = false;
2007                                break;
2008                        }
2009                }
2010        }
2011
2012        if (!has_active) {
2013                /*
2014                 * No active path in this pg, so this pg won't be used and
2015                 * the current_pg will be changed at next mapping time.
2016                 * We need to try mapping to determine it.
2017                 */
2018                busy = false;
2019        }
2020
2021        return busy;
2022}
2023
2024/*-----------------------------------------------------------------
2025 * Module setup
2026 *---------------------------------------------------------------*/
2027static struct target_type multipath_target = {
2028        .name = "multipath",
2029        .version = {1, 13, 0},
2030        .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
2031                    DM_TARGET_PASSES_INTEGRITY,
2032        .module = THIS_MODULE,
2033        .ctr = multipath_ctr,
2034        .dtr = multipath_dtr,
2035        .clone_and_map_rq = multipath_clone_and_map,
2036        .release_clone_rq = multipath_release_clone,
2037        .rq_end_io = multipath_end_io,
2038        .map = multipath_map_bio,
2039        .end_io = multipath_end_io_bio,
2040        .presuspend = multipath_presuspend,
2041        .postsuspend = multipath_postsuspend,
2042        .resume = multipath_resume,
2043        .status = multipath_status,
2044        .message = multipath_message,
2045        .prepare_ioctl = multipath_prepare_ioctl,
2046        .iterate_devices = multipath_iterate_devices,
2047        .busy = multipath_busy,
2048};
2049
2050static int __init dm_multipath_init(void)
2051{
2052        int r;
2053
2054        kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
2055        if (!kmultipathd) {
2056                DMERR("failed to create workqueue kmpathd");
2057                r = -ENOMEM;
2058                goto bad_alloc_kmultipathd;
2059        }
2060
2061        /*
2062         * A separate workqueue is used to handle the device handlers
2063         * to avoid overloading existing workqueue. Overloading the
2064         * old workqueue would also create a bottleneck in the
2065         * path of the storage hardware device activation.
2066         */
2067        kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
2068                                                  WQ_MEM_RECLAIM);
2069        if (!kmpath_handlerd) {
2070                DMERR("failed to create workqueue kmpath_handlerd");
2071                r = -ENOMEM;
2072                goto bad_alloc_kmpath_handlerd;
2073        }
2074
2075        r = dm_register_target(&multipath_target);
2076        if (r < 0) {
2077                DMERR("request-based register failed %d", r);
2078                r = -EINVAL;
2079                goto bad_register_target;
2080        }
2081
2082        return 0;
2083
2084bad_register_target:
2085        destroy_workqueue(kmpath_handlerd);
2086bad_alloc_kmpath_handlerd:
2087        destroy_workqueue(kmultipathd);
2088bad_alloc_kmultipathd:
2089        return r;
2090}
2091
2092static void __exit dm_multipath_exit(void)
2093{
2094        destroy_workqueue(kmpath_handlerd);
2095        destroy_workqueue(kmultipathd);
2096
2097        dm_unregister_target(&multipath_target);
2098}
2099
2100module_init(dm_multipath_init);
2101module_exit(dm_multipath_exit);
2102
2103MODULE_DESCRIPTION(DM_NAME " multipath target");
2104MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
2105MODULE_LICENSE("GPL");
2106