linux/drivers/md/dm-mpath.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2003 Sistina Software Limited.
   3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
   4 *
   5 * This file is released under the GPL.
   6 */
   7
   8#include <linux/device-mapper.h>
   9
  10#include "dm-path-selector.h"
  11#include "dm-uevent.h"
  12
  13#include <linux/ctype.h>
  14#include <linux/init.h>
  15#include <linux/mempool.h>
  16#include <linux/module.h>
  17#include <linux/pagemap.h>
  18#include <linux/slab.h>
  19#include <linux/time.h>
  20#include <linux/workqueue.h>
  21#include <scsi/scsi_dh.h>
  22#include <asm/atomic.h>
  23
  24#define DM_MSG_PREFIX "multipath"
  25#define MESG_STR(x) x, sizeof(x)
  26#define DM_PG_INIT_DELAY_MSECS 2000
  27#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
  28
  29/* Path properties */
  30struct pgpath {
  31        struct list_head list;
  32
  33        struct priority_group *pg;      /* Owning PG */
  34        unsigned is_active;             /* Path status */
  35        unsigned fail_count;            /* Cumulative failure count */
  36
  37        struct dm_path path;
  38        struct delayed_work activate_path;
  39};
  40
  41#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
  42
  43/*
  44 * Paths are grouped into Priority Groups and numbered from 1 upwards.
  45 * Each has a path selector which controls which path gets used.
  46 */
  47struct priority_group {
  48        struct list_head list;
  49
  50        struct multipath *m;            /* Owning multipath instance */
  51        struct path_selector ps;
  52
  53        unsigned pg_num;                /* Reference number */
  54        unsigned bypassed;              /* Temporarily bypass this PG? */
  55
  56        unsigned nr_pgpaths;            /* Number of paths in PG */
  57        struct list_head pgpaths;
  58};
  59
  60/* Multipath context */
  61struct multipath {
  62        struct list_head list;
  63        struct dm_target *ti;
  64
  65        spinlock_t lock;
  66
  67        const char *hw_handler_name;
  68        char *hw_handler_params;
  69
  70        unsigned nr_priority_groups;
  71        struct list_head priority_groups;
  72
  73        wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
  74
  75        unsigned pg_init_required;      /* pg_init needs calling? */
  76        unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
  77        unsigned pg_init_delay_retry;   /* Delay pg_init retry? */
  78
  79        unsigned nr_valid_paths;        /* Total number of usable paths */
  80        struct pgpath *current_pgpath;
  81        struct priority_group *current_pg;
  82        struct priority_group *next_pg; /* Switch to this PG if set */
  83        unsigned repeat_count;          /* I/Os left before calling PS again */
  84
  85        unsigned queue_io;              /* Must we queue all I/O? */
  86        unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
  87        unsigned saved_queue_if_no_path;/* Saved state during suspension */
  88        unsigned pg_init_retries;       /* Number of times to retry pg_init */
  89        unsigned pg_init_count;         /* Number of times pg_init called */
  90        unsigned pg_init_delay_msecs;   /* Number of msecs before pg_init retry */
  91
  92        struct work_struct process_queued_ios;
  93        struct list_head queued_ios;
  94        unsigned queue_size;
  95
  96        struct work_struct trigger_event;
  97
  98        /*
  99         * We must use a mempool of dm_mpath_io structs so that we
 100         * can resubmit bios on error.
 101         */
 102        mempool_t *mpio_pool;
 103
 104        struct mutex work_mutex;
 105};
 106
 107/*
 108 * Context information attached to each bio we process.
 109 */
 110struct dm_mpath_io {
 111        struct pgpath *pgpath;
 112        size_t nr_bytes;
 113};
 114
 115typedef int (*action_fn) (struct pgpath *pgpath);
 116
 117#define MIN_IOS 256     /* Mempool size */
 118
 119static struct kmem_cache *_mpio_cache;
 120
 121static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
 122static void process_queued_ios(struct work_struct *work);
 123static void trigger_event(struct work_struct *work);
 124static void activate_path(struct work_struct *work);
 125
 126
 127/*-----------------------------------------------
 128 * Allocation routines
 129 *-----------------------------------------------*/
 130
 131static struct pgpath *alloc_pgpath(void)
 132{
 133        struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
 134
 135        if (pgpath) {
 136                pgpath->is_active = 1;
 137                INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
 138        }
 139
 140        return pgpath;
 141}
 142
 143static void free_pgpath(struct pgpath *pgpath)
 144{
 145        kfree(pgpath);
 146}
 147
 148static struct priority_group *alloc_priority_group(void)
 149{
 150        struct priority_group *pg;
 151
 152        pg = kzalloc(sizeof(*pg), GFP_KERNEL);
 153
 154        if (pg)
 155                INIT_LIST_HEAD(&pg->pgpaths);
 156
 157        return pg;
 158}
 159
 160static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti)
 161{
 162        struct pgpath *pgpath, *tmp;
 163        struct multipath *m = ti->private;
 164
 165        list_for_each_entry_safe(pgpath, tmp, pgpaths, list) {
 166                list_del(&pgpath->list);
 167                if (m->hw_handler_name)
 168                        scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev));
 169                dm_put_device(ti, pgpath->path.dev);
 170                free_pgpath(pgpath);
 171        }
 172}
 173
 174static void free_priority_group(struct priority_group *pg,
 175                                struct dm_target *ti)
 176{
 177        struct path_selector *ps = &pg->ps;
 178
 179        if (ps->type) {
 180                ps->type->destroy(ps);
 181                dm_put_path_selector(ps->type);
 182        }
 183
 184        free_pgpaths(&pg->pgpaths, ti);
 185        kfree(pg);
 186}
 187
 188static struct multipath *alloc_multipath(struct dm_target *ti)
 189{
 190        struct multipath *m;
 191
 192        m = kzalloc(sizeof(*m), GFP_KERNEL);
 193        if (m) {
 194                INIT_LIST_HEAD(&m->priority_groups);
 195                INIT_LIST_HEAD(&m->queued_ios);
 196                spin_lock_init(&m->lock);
 197                m->queue_io = 1;
 198                m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
 199                INIT_WORK(&m->process_queued_ios, process_queued_ios);
 200                INIT_WORK(&m->trigger_event, trigger_event);
 201                init_waitqueue_head(&m->pg_init_wait);
 202                mutex_init(&m->work_mutex);
 203                m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
 204                if (!m->mpio_pool) {
 205                        kfree(m);
 206                        return NULL;
 207                }
 208                m->ti = ti;
 209                ti->private = m;
 210        }
 211
 212        return m;
 213}
 214
 215static void free_multipath(struct multipath *m)
 216{
 217        struct priority_group *pg, *tmp;
 218
 219        list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) {
 220                list_del(&pg->list);
 221                free_priority_group(pg, m->ti);
 222        }
 223
 224        kfree(m->hw_handler_name);
 225        kfree(m->hw_handler_params);
 226        mempool_destroy(m->mpio_pool);
 227        kfree(m);
 228}
 229
 230
 231/*-----------------------------------------------
 232 * Path selection
 233 *-----------------------------------------------*/
 234
 235static void __pg_init_all_paths(struct multipath *m)
 236{
 237        struct pgpath *pgpath;
 238        unsigned long pg_init_delay = 0;
 239
 240        m->pg_init_count++;
 241        m->pg_init_required = 0;
 242        if (m->pg_init_delay_retry)
 243                pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
 244                                                 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
 245        list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
 246                /* Skip failed paths */
 247                if (!pgpath->is_active)
 248                        continue;
 249                if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
 250                                       pg_init_delay))
 251                        m->pg_init_in_progress++;
 252        }
 253}
 254
 255static void __switch_pg(struct multipath *m, struct pgpath *pgpath)
 256{
 257        m->current_pg = pgpath->pg;
 258
 259        /* Must we initialise the PG first, and queue I/O till it's ready? */
 260        if (m->hw_handler_name) {
 261                m->pg_init_required = 1;
 262                m->queue_io = 1;
 263        } else {
 264                m->pg_init_required = 0;
 265                m->queue_io = 0;
 266        }
 267
 268        m->pg_init_count = 0;
 269}
 270
 271static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg,
 272                               size_t nr_bytes)
 273{
 274        struct dm_path *path;
 275
 276        path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes);
 277        if (!path)
 278                return -ENXIO;
 279
 280        m->current_pgpath = path_to_pgpath(path);
 281
 282        if (m->current_pg != pg)
 283                __switch_pg(m, m->current_pgpath);
 284
 285        return 0;
 286}
 287
 288static void __choose_pgpath(struct multipath *m, size_t nr_bytes)
 289{
 290        struct priority_group *pg;
 291        unsigned bypassed = 1;
 292
 293        if (!m->nr_valid_paths)
 294                goto failed;
 295
 296        /* Were we instructed to switch PG? */
 297        if (m->next_pg) {
 298                pg = m->next_pg;
 299                m->next_pg = NULL;
 300                if (!__choose_path_in_pg(m, pg, nr_bytes))
 301                        return;
 302        }
 303
 304        /* Don't change PG until it has no remaining paths */
 305        if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes))
 306                return;
 307
 308        /*
 309         * Loop through priority groups until we find a valid path.
 310         * First time we skip PGs marked 'bypassed'.
 311         * Second time we only try the ones we skipped.
 312         */
 313        do {
 314                list_for_each_entry(pg, &m->priority_groups, list) {
 315                        if (pg->bypassed == bypassed)
 316                                continue;
 317                        if (!__choose_path_in_pg(m, pg, nr_bytes))
 318                                return;
 319                }
 320        } while (bypassed--);
 321
 322failed:
 323        m->current_pgpath = NULL;
 324        m->current_pg = NULL;
 325}
 326
 327/*
 328 * Check whether bios must be queued in the device-mapper core rather
 329 * than here in the target.
 330 *
 331 * m->lock must be held on entry.
 332 *
 333 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the
 334 * same value then we are not between multipath_presuspend()
 335 * and multipath_resume() calls and we have no need to check
 336 * for the DMF_NOFLUSH_SUSPENDING flag.
 337 */
 338static int __must_push_back(struct multipath *m)
 339{
 340        return (m->queue_if_no_path != m->saved_queue_if_no_path &&
 341                dm_noflush_suspending(m->ti));
 342}
 343
 344static int map_io(struct multipath *m, struct request *clone,
 345                  struct dm_mpath_io *mpio, unsigned was_queued)
 346{
 347        int r = DM_MAPIO_REMAPPED;
 348        size_t nr_bytes = blk_rq_bytes(clone);
 349        unsigned long flags;
 350        struct pgpath *pgpath;
 351        struct block_device *bdev;
 352
 353        spin_lock_irqsave(&m->lock, flags);
 354
 355        /* Do we need to select a new pgpath? */
 356        if (!m->current_pgpath ||
 357            (!m->queue_io && (m->repeat_count && --m->repeat_count == 0)))
 358                __choose_pgpath(m, nr_bytes);
 359
 360        pgpath = m->current_pgpath;
 361
 362        if (was_queued)
 363                m->queue_size--;
 364
 365        if ((pgpath && m->queue_io) ||
 366            (!pgpath && m->queue_if_no_path)) {
 367                /* Queue for the daemon to resubmit */
 368                list_add_tail(&clone->queuelist, &m->queued_ios);
 369                m->queue_size++;
 370                if ((m->pg_init_required && !m->pg_init_in_progress) ||
 371                    !m->queue_io)
 372                        queue_work(kmultipathd, &m->process_queued_ios);
 373                pgpath = NULL;
 374                r = DM_MAPIO_SUBMITTED;
 375        } else if (pgpath) {
 376                bdev = pgpath->path.dev->bdev;
 377                clone->q = bdev_get_queue(bdev);
 378                clone->rq_disk = bdev->bd_disk;
 379        } else if (__must_push_back(m))
 380                r = DM_MAPIO_REQUEUE;
 381        else
 382                r = -EIO;       /* Failed */
 383
 384        mpio->pgpath = pgpath;
 385        mpio->nr_bytes = nr_bytes;
 386
 387        if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io)
 388                pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path,
 389                                              nr_bytes);
 390
 391        spin_unlock_irqrestore(&m->lock, flags);
 392
 393        return r;
 394}
 395
 396/*
 397 * If we run out of usable paths, should we queue I/O or error it?
 398 */
 399static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
 400                            unsigned save_old_value)
 401{
 402        unsigned long flags;
 403
 404        spin_lock_irqsave(&m->lock, flags);
 405
 406        if (save_old_value)
 407                m->saved_queue_if_no_path = m->queue_if_no_path;
 408        else
 409                m->saved_queue_if_no_path = queue_if_no_path;
 410        m->queue_if_no_path = queue_if_no_path;
 411        if (!m->queue_if_no_path && m->queue_size)
 412                queue_work(kmultipathd, &m->process_queued_ios);
 413
 414        spin_unlock_irqrestore(&m->lock, flags);
 415
 416        return 0;
 417}
 418
 419/*-----------------------------------------------------------------
 420 * The multipath daemon is responsible for resubmitting queued ios.
 421 *---------------------------------------------------------------*/
 422
 423static void dispatch_queued_ios(struct multipath *m)
 424{
 425        int r;
 426        unsigned long flags;
 427        struct dm_mpath_io *mpio;
 428        union map_info *info;
 429        struct request *clone, *n;
 430        LIST_HEAD(cl);
 431
 432        spin_lock_irqsave(&m->lock, flags);
 433        list_splice_init(&m->queued_ios, &cl);
 434        spin_unlock_irqrestore(&m->lock, flags);
 435
 436        list_for_each_entry_safe(clone, n, &cl, queuelist) {
 437                list_del_init(&clone->queuelist);
 438
 439                info = dm_get_rq_mapinfo(clone);
 440                mpio = info->ptr;
 441
 442                r = map_io(m, clone, mpio, 1);
 443                if (r < 0) {
 444                        mempool_free(mpio, m->mpio_pool);
 445                        dm_kill_unmapped_request(clone, r);
 446                } else if (r == DM_MAPIO_REMAPPED)
 447                        dm_dispatch_request(clone);
 448                else if (r == DM_MAPIO_REQUEUE) {
 449                        mempool_free(mpio, m->mpio_pool);
 450                        dm_requeue_unmapped_request(clone);
 451                }
 452        }
 453}
 454
 455static void process_queued_ios(struct work_struct *work)
 456{
 457        struct multipath *m =
 458                container_of(work, struct multipath, process_queued_ios);
 459        struct pgpath *pgpath = NULL;
 460        unsigned must_queue = 1;
 461        unsigned long flags;
 462
 463        spin_lock_irqsave(&m->lock, flags);
 464
 465        if (!m->queue_size)
 466                goto out;
 467
 468        if (!m->current_pgpath)
 469                __choose_pgpath(m, 0);
 470
 471        pgpath = m->current_pgpath;
 472
 473        if ((pgpath && !m->queue_io) ||
 474            (!pgpath && !m->queue_if_no_path))
 475                must_queue = 0;
 476
 477        if (m->pg_init_required && !m->pg_init_in_progress && pgpath)
 478                __pg_init_all_paths(m);
 479
 480out:
 481        spin_unlock_irqrestore(&m->lock, flags);
 482        if (!must_queue)
 483                dispatch_queued_ios(m);
 484}
 485
 486/*
 487 * An event is triggered whenever a path is taken out of use.
 488 * Includes path failure and PG bypass.
 489 */
 490static void trigger_event(struct work_struct *work)
 491{
 492        struct multipath *m =
 493                container_of(work, struct multipath, trigger_event);
 494
 495        dm_table_event(m->ti->table);
 496}
 497
 498/*-----------------------------------------------------------------
 499 * Constructor/argument parsing:
 500 * <#multipath feature args> [<arg>]*
 501 * <#hw_handler args> [hw_handler [<arg>]*]
 502 * <#priority groups>
 503 * <initial priority group>
 504 *     [<selector> <#selector args> [<arg>]*
 505 *      <#paths> <#per-path selector args>
 506 *         [<path> [<arg>]* ]+ ]+
 507 *---------------------------------------------------------------*/
 508struct param {
 509        unsigned min;
 510        unsigned max;
 511        char *error;
 512};
 513
 514static int read_param(struct param *param, char *str, unsigned *v, char **error)
 515{
 516        if (!str ||
 517            (sscanf(str, "%u", v) != 1) ||
 518            (*v < param->min) ||
 519            (*v > param->max)) {
 520                *error = param->error;
 521                return -EINVAL;
 522        }
 523
 524        return 0;
 525}
 526
 527struct arg_set {
 528        unsigned argc;
 529        char **argv;
 530};
 531
 532static char *shift(struct arg_set *as)
 533{
 534        char *r;
 535
 536        if (as->argc) {
 537                as->argc--;
 538                r = *as->argv;
 539                as->argv++;
 540                return r;
 541        }
 542
 543        return NULL;
 544}
 545
 546static void consume(struct arg_set *as, unsigned n)
 547{
 548        BUG_ON (as->argc < n);
 549        as->argc -= n;
 550        as->argv += n;
 551}
 552
 553static int parse_path_selector(struct arg_set *as, struct priority_group *pg,
 554                               struct dm_target *ti)
 555{
 556        int r;
 557        struct path_selector_type *pst;
 558        unsigned ps_argc;
 559
 560        static struct param _params[] = {
 561                {0, 1024, "invalid number of path selector args"},
 562        };
 563
 564        pst = dm_get_path_selector(shift(as));
 565        if (!pst) {
 566                ti->error = "unknown path selector type";
 567                return -EINVAL;
 568        }
 569
 570        r = read_param(_params, shift(as), &ps_argc, &ti->error);
 571        if (r) {
 572                dm_put_path_selector(pst);
 573                return -EINVAL;
 574        }
 575
 576        if (ps_argc > as->argc) {
 577                dm_put_path_selector(pst);
 578                ti->error = "not enough arguments for path selector";
 579                return -EINVAL;
 580        }
 581
 582        r = pst->create(&pg->ps, ps_argc, as->argv);
 583        if (r) {
 584                dm_put_path_selector(pst);
 585                ti->error = "path selector constructor failed";
 586                return r;
 587        }
 588
 589        pg->ps.type = pst;
 590        consume(as, ps_argc);
 591
 592        return 0;
 593}
 594
 595static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps,
 596                               struct dm_target *ti)
 597{
 598        int r;
 599        struct pgpath *p;
 600        struct multipath *m = ti->private;
 601
 602        /* we need at least a path arg */
 603        if (as->argc < 1) {
 604                ti->error = "no device given";
 605                return ERR_PTR(-EINVAL);
 606        }
 607
 608        p = alloc_pgpath();
 609        if (!p)
 610                return ERR_PTR(-ENOMEM);
 611
 612        r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table),
 613                          &p->path.dev);
 614        if (r) {
 615                ti->error = "error getting device";
 616                goto bad;
 617        }
 618
 619        if (m->hw_handler_name) {
 620                struct request_queue *q = bdev_get_queue(p->path.dev->bdev);
 621
 622                r = scsi_dh_attach(q, m->hw_handler_name);
 623                if (r == -EBUSY) {
 624                        /*
 625                         * Already attached to different hw_handler,
 626                         * try to reattach with correct one.
 627                         */
 628                        scsi_dh_detach(q);
 629                        r = scsi_dh_attach(q, m->hw_handler_name);
 630                }
 631
 632                if (r < 0) {
 633                        ti->error = "error attaching hardware handler";
 634                        dm_put_device(ti, p->path.dev);
 635                        goto bad;
 636                }
 637
 638                if (m->hw_handler_params) {
 639                        r = scsi_dh_set_params(q, m->hw_handler_params);
 640                        if (r < 0) {
 641                                ti->error = "unable to set hardware "
 642                                                        "handler parameters";
 643                                scsi_dh_detach(q);
 644                                dm_put_device(ti, p->path.dev);
 645                                goto bad;
 646                        }
 647                }
 648        }
 649
 650        r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
 651        if (r) {
 652                dm_put_device(ti, p->path.dev);
 653                goto bad;
 654        }
 655
 656        return p;
 657
 658 bad:
 659        free_pgpath(p);
 660        return ERR_PTR(r);
 661}
 662
 663static struct priority_group *parse_priority_group(struct arg_set *as,
 664                                                   struct multipath *m)
 665{
 666        static struct param _params[] = {
 667                {1, 1024, "invalid number of paths"},
 668                {0, 1024, "invalid number of selector args"}
 669        };
 670
 671        int r;
 672        unsigned i, nr_selector_args, nr_params;
 673        struct priority_group *pg;
 674        struct dm_target *ti = m->ti;
 675
 676        if (as->argc < 2) {
 677                as->argc = 0;
 678                ti->error = "not enough priority group arguments";
 679                return ERR_PTR(-EINVAL);
 680        }
 681
 682        pg = alloc_priority_group();
 683        if (!pg) {
 684                ti->error = "couldn't allocate priority group";
 685                return ERR_PTR(-ENOMEM);
 686        }
 687        pg->m = m;
 688
 689        r = parse_path_selector(as, pg, ti);
 690        if (r)
 691                goto bad;
 692
 693        /*
 694         * read the paths
 695         */
 696        r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error);
 697        if (r)
 698                goto bad;
 699
 700        r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error);
 701        if (r)
 702                goto bad;
 703
 704        nr_params = 1 + nr_selector_args;
 705        for (i = 0; i < pg->nr_pgpaths; i++) {
 706                struct pgpath *pgpath;
 707                struct arg_set path_args;
 708
 709                if (as->argc < nr_params) {
 710                        ti->error = "not enough path parameters";
 711                        r = -EINVAL;
 712                        goto bad;
 713                }
 714
 715                path_args.argc = nr_params;
 716                path_args.argv = as->argv;
 717
 718                pgpath = parse_path(&path_args, &pg->ps, ti);
 719                if (IS_ERR(pgpath)) {
 720                        r = PTR_ERR(pgpath);
 721                        goto bad;
 722                }
 723
 724                pgpath->pg = pg;
 725                list_add_tail(&pgpath->list, &pg->pgpaths);
 726                consume(as, nr_params);
 727        }
 728
 729        return pg;
 730
 731 bad:
 732        free_priority_group(pg, ti);
 733        return ERR_PTR(r);
 734}
 735
 736static int parse_hw_handler(struct arg_set *as, struct multipath *m)
 737{
 738        unsigned hw_argc;
 739        int ret;
 740        struct dm_target *ti = m->ti;
 741
 742        static struct param _params[] = {
 743                {0, 1024, "invalid number of hardware handler args"},
 744        };
 745
 746        if (read_param(_params, shift(as), &hw_argc, &ti->error))
 747                return -EINVAL;
 748
 749        if (!hw_argc)
 750                return 0;
 751
 752        if (hw_argc > as->argc) {
 753                ti->error = "not enough arguments for hardware handler";
 754                return -EINVAL;
 755        }
 756
 757        m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL);
 758        request_module("scsi_dh_%s", m->hw_handler_name);
 759        if (scsi_dh_handler_exist(m->hw_handler_name) == 0) {
 760                ti->error = "unknown hardware handler type";
 761                ret = -EINVAL;
 762                goto fail;
 763        }
 764
 765        if (hw_argc > 1) {
 766                char *p;
 767                int i, j, len = 4;
 768
 769                for (i = 0; i <= hw_argc - 2; i++)
 770                        len += strlen(as->argv[i]) + 1;
 771                p = m->hw_handler_params = kzalloc(len, GFP_KERNEL);
 772                if (!p) {
 773                        ti->error = "memory allocation failed";
 774                        ret = -ENOMEM;
 775                        goto fail;
 776                }
 777                j = sprintf(p, "%d", hw_argc - 1);
 778                for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1)
 779                        j = sprintf(p, "%s", as->argv[i]);
 780        }
 781        consume(as, hw_argc - 1);
 782
 783        return 0;
 784fail:
 785        kfree(m->hw_handler_name);
 786        m->hw_handler_name = NULL;
 787        return ret;
 788}
 789
 790static int parse_features(struct arg_set *as, struct multipath *m)
 791{
 792        int r;
 793        unsigned argc;
 794        struct dm_target *ti = m->ti;
 795        const char *param_name;
 796
 797        static struct param _params[] = {
 798                {0, 5, "invalid number of feature args"},
 799                {1, 50, "pg_init_retries must be between 1 and 50"},
 800                {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
 801        };
 802
 803        r = read_param(_params, shift(as), &argc, &ti->error);
 804        if (r)
 805                return -EINVAL;
 806
 807        if (!argc)
 808                return 0;
 809
 810        do {
 811                param_name = shift(as);
 812                argc--;
 813
 814                if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) {
 815                        r = queue_if_no_path(m, 1, 0);
 816                        continue;
 817                }
 818
 819                if (!strnicmp(param_name, MESG_STR("pg_init_retries")) &&
 820                    (argc >= 1)) {
 821                        r = read_param(_params + 1, shift(as),
 822                                       &m->pg_init_retries, &ti->error);
 823                        argc--;
 824                        continue;
 825                }
 826
 827                if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
 828                    (argc >= 1)) {
 829                        r = read_param(_params + 2, shift(as),
 830                                       &m->pg_init_delay_msecs, &ti->error);
 831                        argc--;
 832                        continue;
 833                }
 834
 835                ti->error = "Unrecognised multipath feature request";
 836                r = -EINVAL;
 837        } while (argc && !r);
 838
 839        return r;
 840}
 841
 842static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 843                         char **argv)
 844{
 845        /* target parameters */
 846        static struct param _params[] = {
 847                {1, 1024, "invalid number of priority groups"},
 848                {1, 1024, "invalid initial priority group number"},
 849        };
 850
 851        int r;
 852        struct multipath *m;
 853        struct arg_set as;
 854        unsigned pg_count = 0;
 855        unsigned next_pg_num;
 856
 857        as.argc = argc;
 858        as.argv = argv;
 859
 860        m = alloc_multipath(ti);
 861        if (!m) {
 862                ti->error = "can't allocate multipath";
 863                return -EINVAL;
 864        }
 865
 866        r = parse_features(&as, m);
 867        if (r)
 868                goto bad;
 869
 870        r = parse_hw_handler(&as, m);
 871        if (r)
 872                goto bad;
 873
 874        r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error);
 875        if (r)
 876                goto bad;
 877
 878        r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error);
 879        if (r)
 880                goto bad;
 881
 882        /* parse the priority groups */
 883        while (as.argc) {
 884                struct priority_group *pg;
 885
 886                pg = parse_priority_group(&as, m);
 887                if (IS_ERR(pg)) {
 888                        r = PTR_ERR(pg);
 889                        goto bad;
 890                }
 891
 892                m->nr_valid_paths += pg->nr_pgpaths;
 893                list_add_tail(&pg->list, &m->priority_groups);
 894                pg_count++;
 895                pg->pg_num = pg_count;
 896                if (!--next_pg_num)
 897                        m->next_pg = pg;
 898        }
 899
 900        if (pg_count != m->nr_priority_groups) {
 901                ti->error = "priority group count mismatch";
 902                r = -EINVAL;
 903                goto bad;
 904        }
 905
 906        ti->num_flush_requests = 1;
 907        ti->num_discard_requests = 1;
 908
 909        return 0;
 910
 911 bad:
 912        free_multipath(m);
 913        return r;
 914}
 915
 916static void multipath_wait_for_pg_init_completion(struct multipath *m)
 917{
 918        DECLARE_WAITQUEUE(wait, current);
 919        unsigned long flags;
 920
 921        add_wait_queue(&m->pg_init_wait, &wait);
 922
 923        while (1) {
 924                set_current_state(TASK_UNINTERRUPTIBLE);
 925
 926                spin_lock_irqsave(&m->lock, flags);
 927                if (!m->pg_init_in_progress) {
 928                        spin_unlock_irqrestore(&m->lock, flags);
 929                        break;
 930                }
 931                spin_unlock_irqrestore(&m->lock, flags);
 932
 933                io_schedule();
 934        }
 935        set_current_state(TASK_RUNNING);
 936
 937        remove_wait_queue(&m->pg_init_wait, &wait);
 938}
 939
 940static void flush_multipath_work(struct multipath *m)
 941{
 942        flush_workqueue(kmpath_handlerd);
 943        multipath_wait_for_pg_init_completion(m);
 944        flush_workqueue(kmultipathd);
 945        flush_work_sync(&m->trigger_event);
 946}
 947
 948static void multipath_dtr(struct dm_target *ti)
 949{
 950        struct multipath *m = ti->private;
 951
 952        flush_multipath_work(m);
 953        free_multipath(m);
 954}
 955
 956/*
 957 * Map cloned requests
 958 */
 959static int multipath_map(struct dm_target *ti, struct request *clone,
 960                         union map_info *map_context)
 961{
 962        int r;
 963        struct dm_mpath_io *mpio;
 964        struct multipath *m = (struct multipath *) ti->private;
 965
 966        mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC);
 967        if (!mpio)
 968                /* ENOMEM, requeue */
 969                return DM_MAPIO_REQUEUE;
 970        memset(mpio, 0, sizeof(*mpio));
 971
 972        map_context->ptr = mpio;
 973        clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
 974        r = map_io(m, clone, mpio, 0);
 975        if (r < 0 || r == DM_MAPIO_REQUEUE)
 976                mempool_free(mpio, m->mpio_pool);
 977
 978        return r;
 979}
 980
 981/*
 982 * Take a path out of use.
 983 */
 984static int fail_path(struct pgpath *pgpath)
 985{
 986        unsigned long flags;
 987        struct multipath *m = pgpath->pg->m;
 988
 989        spin_lock_irqsave(&m->lock, flags);
 990
 991        if (!pgpath->is_active)
 992                goto out;
 993
 994        DMWARN("Failing path %s.", pgpath->path.dev->name);
 995
 996        pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path);
 997        pgpath->is_active = 0;
 998        pgpath->fail_count++;
 999
1000        m->nr_valid_paths--;
1001
1002        if (pgpath == m->current_pgpath)
1003                m->current_pgpath = NULL;
1004
1005        dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti,
1006                      pgpath->path.dev->name, m->nr_valid_paths);
1007
1008        schedule_work(&m->trigger_event);
1009
1010out:
1011        spin_unlock_irqrestore(&m->lock, flags);
1012
1013        return 0;
1014}
1015
1016/*
1017 * Reinstate a previously-failed path
1018 */
1019static int reinstate_path(struct pgpath *pgpath)
1020{
1021        int r = 0;
1022        unsigned long flags;
1023        struct multipath *m = pgpath->pg->m;
1024
1025        spin_lock_irqsave(&m->lock, flags);
1026
1027        if (pgpath->is_active)
1028                goto out;
1029
1030        if (!pgpath->pg->ps.type->reinstate_path) {
1031                DMWARN("Reinstate path not supported by path selector %s",
1032                       pgpath->pg->ps.type->name);
1033                r = -EINVAL;
1034                goto out;
1035        }
1036
1037        r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path);
1038        if (r)
1039                goto out;
1040
1041        pgpath->is_active = 1;
1042
1043        if (!m->nr_valid_paths++ && m->queue_size) {
1044                m->current_pgpath = NULL;
1045                queue_work(kmultipathd, &m->process_queued_ios);
1046        } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
1047                if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
1048                        m->pg_init_in_progress++;
1049        }
1050
1051        dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti,
1052                      pgpath->path.dev->name, m->nr_valid_paths);
1053
1054        schedule_work(&m->trigger_event);
1055
1056out:
1057        spin_unlock_irqrestore(&m->lock, flags);
1058
1059        return r;
1060}
1061
1062/*
1063 * Fail or reinstate all paths that match the provided struct dm_dev.
1064 */
1065static int action_dev(struct multipath *m, struct dm_dev *dev,
1066                      action_fn action)
1067{
1068        int r = 0;
1069        struct pgpath *pgpath;
1070        struct priority_group *pg;
1071
1072        list_for_each_entry(pg, &m->priority_groups, list) {
1073                list_for_each_entry(pgpath, &pg->pgpaths, list) {
1074                        if (pgpath->path.dev == dev)
1075                                r = action(pgpath);
1076                }
1077        }
1078
1079        return r;
1080}
1081
1082/*
1083 * Temporarily try to avoid having to use the specified PG
1084 */
1085static void bypass_pg(struct multipath *m, struct priority_group *pg,
1086                      int bypassed)
1087{
1088        unsigned long flags;
1089
1090        spin_lock_irqsave(&m->lock, flags);
1091
1092        pg->bypassed = bypassed;
1093        m->current_pgpath = NULL;
1094        m->current_pg = NULL;
1095
1096        spin_unlock_irqrestore(&m->lock, flags);
1097
1098        schedule_work(&m->trigger_event);
1099}
1100
1101/*
1102 * Switch to using the specified PG from the next I/O that gets mapped
1103 */
1104static int switch_pg_num(struct multipath *m, const char *pgstr)
1105{
1106        struct priority_group *pg;
1107        unsigned pgnum;
1108        unsigned long flags;
1109
1110        if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1111            (pgnum > m->nr_priority_groups)) {
1112                DMWARN("invalid PG number supplied to switch_pg_num");
1113                return -EINVAL;
1114        }
1115
1116        spin_lock_irqsave(&m->lock, flags);
1117        list_for_each_entry(pg, &m->priority_groups, list) {
1118                pg->bypassed = 0;
1119                if (--pgnum)
1120                        continue;
1121
1122                m->current_pgpath = NULL;
1123                m->current_pg = NULL;
1124                m->next_pg = pg;
1125        }
1126        spin_unlock_irqrestore(&m->lock, flags);
1127
1128        schedule_work(&m->trigger_event);
1129        return 0;
1130}
1131
1132/*
1133 * Set/clear bypassed status of a PG.
1134 * PGs are numbered upwards from 1 in the order they were declared.
1135 */
1136static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed)
1137{
1138        struct priority_group *pg;
1139        unsigned pgnum;
1140
1141        if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum ||
1142            (pgnum > m->nr_priority_groups)) {
1143                DMWARN("invalid PG number supplied to bypass_pg");
1144                return -EINVAL;
1145        }
1146
1147        list_for_each_entry(pg, &m->priority_groups, list) {
1148                if (!--pgnum)
1149                        break;
1150        }
1151
1152        bypass_pg(m, pg, bypassed);
1153        return 0;
1154}
1155
1156/*
1157 * Should we retry pg_init immediately?
1158 */
1159static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath)
1160{
1161        unsigned long flags;
1162        int limit_reached = 0;
1163
1164        spin_lock_irqsave(&m->lock, flags);
1165
1166        if (m->pg_init_count <= m->pg_init_retries)
1167                m->pg_init_required = 1;
1168        else
1169                limit_reached = 1;
1170
1171        spin_unlock_irqrestore(&m->lock, flags);
1172
1173        return limit_reached;
1174}
1175
1176static void pg_init_done(void *data, int errors)
1177{
1178        struct pgpath *pgpath = data;
1179        struct priority_group *pg = pgpath->pg;
1180        struct multipath *m = pg->m;
1181        unsigned long flags;
1182        unsigned delay_retry = 0;
1183
1184        /* device or driver problems */
1185        switch (errors) {
1186        case SCSI_DH_OK:
1187                break;
1188        case SCSI_DH_NOSYS:
1189                if (!m->hw_handler_name) {
1190                        errors = 0;
1191                        break;
1192                }
1193                DMERR("Could not failover the device: Handler scsi_dh_%s "
1194                      "Error %d.", m->hw_handler_name, errors);
1195                /*
1196                 * Fail path for now, so we do not ping pong
1197                 */
1198                fail_path(pgpath);
1199                break;
1200        case SCSI_DH_DEV_TEMP_BUSY:
1201                /*
1202                 * Probably doing something like FW upgrade on the
1203                 * controller so try the other pg.
1204                 */
1205                bypass_pg(m, pg, 1);
1206                break;
1207        case SCSI_DH_RETRY:
1208                /* Wait before retrying. */
1209                delay_retry = 1;
1210        case SCSI_DH_IMM_RETRY:
1211        case SCSI_DH_RES_TEMP_UNAVAIL:
1212                if (pg_init_limit_reached(m, pgpath))
1213                        fail_path(pgpath);
1214                errors = 0;
1215                break;
1216        default:
1217                /*
1218                 * We probably do not want to fail the path for a device
1219                 * error, but this is what the old dm did. In future
1220                 * patches we can do more advanced handling.
1221                 */
1222                fail_path(pgpath);
1223        }
1224
1225        spin_lock_irqsave(&m->lock, flags);
1226        if (errors) {
1227                if (pgpath == m->current_pgpath) {
1228                        DMERR("Could not failover device. Error %d.", errors);
1229                        m->current_pgpath = NULL;
1230                        m->current_pg = NULL;
1231                }
1232        } else if (!m->pg_init_required)
1233                pg->bypassed = 0;
1234
1235        if (--m->pg_init_in_progress)
1236                /* Activations of other paths are still on going */
1237                goto out;
1238
1239        if (!m->pg_init_required)
1240                m->queue_io = 0;
1241
1242        m->pg_init_delay_retry = delay_retry;
1243        queue_work(kmultipathd, &m->process_queued_ios);
1244
1245        /*
1246         * Wake up any thread waiting to suspend.
1247         */
1248        wake_up(&m->pg_init_wait);
1249
1250out:
1251        spin_unlock_irqrestore(&m->lock, flags);
1252}
1253
1254static void activate_path(struct work_struct *work)
1255{
1256        struct pgpath *pgpath =
1257                container_of(work, struct pgpath, activate_path.work);
1258
1259        scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
1260                                pg_init_done, pgpath);
1261}
1262
1263/*
1264 * end_io handling
1265 */
1266static int do_end_io(struct multipath *m, struct request *clone,
1267                     int error, struct dm_mpath_io *mpio)
1268{
1269        /*
1270         * We don't queue any clone request inside the multipath target
1271         * during end I/O handling, since those clone requests don't have
1272         * bio clones.  If we queue them inside the multipath target,
1273         * we need to make bio clones, that requires memory allocation.
1274         * (See drivers/md/dm.c:end_clone_bio() about why the clone requests
1275         *  don't have bio clones.)
1276         * Instead of queueing the clone request here, we queue the original
1277         * request into dm core, which will remake a clone request and
1278         * clone bios for it and resubmit it later.
1279         */
1280        int r = DM_ENDIO_REQUEUE;
1281        unsigned long flags;
1282
1283        if (!error && !clone->errors)
1284                return 0;       /* I/O complete */
1285
1286        if (error == -EOPNOTSUPP)
1287                return error;
1288
1289        if (clone->cmd_flags & REQ_DISCARD)
1290                /*
1291                 * Pass all discard request failures up.
1292                 * FIXME: only fail_path if the discard failed due to a
1293                 * transport problem.  This requires precise understanding
1294                 * of the underlying failure (e.g. the SCSI sense).
1295                 */
1296                return error;
1297
1298        if (mpio->pgpath)
1299                fail_path(mpio->pgpath);
1300
1301        spin_lock_irqsave(&m->lock, flags);
1302        if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m))
1303                r = -EIO;
1304        spin_unlock_irqrestore(&m->lock, flags);
1305
1306        return r;
1307}
1308
1309static int multipath_end_io(struct dm_target *ti, struct request *clone,
1310                            int error, union map_info *map_context)
1311{
1312        struct multipath *m = ti->private;
1313        struct dm_mpath_io *mpio = map_context->ptr;
1314        struct pgpath *pgpath = mpio->pgpath;
1315        struct path_selector *ps;
1316        int r;
1317
1318        r  = do_end_io(m, clone, error, mpio);
1319        if (pgpath) {
1320                ps = &pgpath->pg->ps;
1321                if (ps->type->end_io)
1322                        ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
1323        }
1324        mempool_free(mpio, m->mpio_pool);
1325
1326        return r;
1327}
1328
1329/*
1330 * Suspend can't complete until all the I/O is processed so if
1331 * the last path fails we must error any remaining I/O.
1332 * Note that if the freeze_bdev fails while suspending, the
1333 * queue_if_no_path state is lost - userspace should reset it.
1334 */
1335static void multipath_presuspend(struct dm_target *ti)
1336{
1337        struct multipath *m = (struct multipath *) ti->private;
1338
1339        queue_if_no_path(m, 0, 1);
1340}
1341
1342static void multipath_postsuspend(struct dm_target *ti)
1343{
1344        struct multipath *m = ti->private;
1345
1346        mutex_lock(&m->work_mutex);
1347        flush_multipath_work(m);
1348        mutex_unlock(&m->work_mutex);
1349}
1350
1351/*
1352 * Restore the queue_if_no_path setting.
1353 */
1354static void multipath_resume(struct dm_target *ti)
1355{
1356        struct multipath *m = (struct multipath *) ti->private;
1357        unsigned long flags;
1358
1359        spin_lock_irqsave(&m->lock, flags);
1360        m->queue_if_no_path = m->saved_queue_if_no_path;
1361        spin_unlock_irqrestore(&m->lock, flags);
1362}
1363
1364/*
1365 * Info output has the following format:
1366 * num_multipath_feature_args [multipath_feature_args]*
1367 * num_handler_status_args [handler_status_args]*
1368 * num_groups init_group_number
1369 *            [A|D|E num_ps_status_args [ps_status_args]*
1370 *             num_paths num_selector_args
1371 *             [path_dev A|F fail_count [selector_args]* ]+ ]+
1372 *
1373 * Table output has the following format (identical to the constructor string):
1374 * num_feature_args [features_args]*
1375 * num_handler_args hw_handler [hw_handler_args]*
1376 * num_groups init_group_number
1377 *     [priority selector-name num_ps_args [ps_args]*
1378 *      num_paths num_selector_args [path_dev [selector_args]* ]+ ]+
1379 */
1380static int multipath_status(struct dm_target *ti, status_type_t type,
1381                            char *result, unsigned int maxlen)
1382{
1383        int sz = 0;
1384        unsigned long flags;
1385        struct multipath *m = (struct multipath *) ti->private;
1386        struct priority_group *pg;
1387        struct pgpath *p;
1388        unsigned pg_num;
1389        char state;
1390
1391        spin_lock_irqsave(&m->lock, flags);
1392
1393        /* Features */
1394        if (type == STATUSTYPE_INFO)
1395                DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
1396        else {
1397                DMEMIT("%u ", m->queue_if_no_path +
1398                              (m->pg_init_retries > 0) * 2 +
1399                              (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2);
1400                if (m->queue_if_no_path)
1401                        DMEMIT("queue_if_no_path ");
1402                if (m->pg_init_retries)
1403                        DMEMIT("pg_init_retries %u ", m->pg_init_retries);
1404                if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
1405                        DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
1406        }
1407
1408        if (!m->hw_handler_name || type == STATUSTYPE_INFO)
1409                DMEMIT("0 ");
1410        else
1411                DMEMIT("1 %s ", m->hw_handler_name);
1412
1413        DMEMIT("%u ", m->nr_priority_groups);
1414
1415        if (m->next_pg)
1416                pg_num = m->next_pg->pg_num;
1417        else if (m->current_pg)
1418                pg_num = m->current_pg->pg_num;
1419        else
1420                        pg_num = 1;
1421
1422        DMEMIT("%u ", pg_num);
1423
1424        switch (type) {
1425        case STATUSTYPE_INFO:
1426                list_for_each_entry(pg, &m->priority_groups, list) {
1427                        if (pg->bypassed)
1428                                state = 'D';    /* Disabled */
1429                        else if (pg == m->current_pg)
1430                                state = 'A';    /* Currently Active */
1431                        else
1432                                state = 'E';    /* Enabled */
1433
1434                        DMEMIT("%c ", state);
1435
1436                        if (pg->ps.type->status)
1437                                sz += pg->ps.type->status(&pg->ps, NULL, type,
1438                                                          result + sz,
1439                                                          maxlen - sz);
1440                        else
1441                                DMEMIT("0 ");
1442
1443                        DMEMIT("%u %u ", pg->nr_pgpaths,
1444                               pg->ps.type->info_args);
1445
1446                        list_for_each_entry(p, &pg->pgpaths, list) {
1447                                DMEMIT("%s %s %u ", p->path.dev->name,
1448                                       p->is_active ? "A" : "F",
1449                                       p->fail_count);
1450                                if (pg->ps.type->status)
1451                                        sz += pg->ps.type->status(&pg->ps,
1452                                              &p->path, type, result + sz,
1453                                              maxlen - sz);
1454                        }
1455                }
1456                break;
1457
1458        case STATUSTYPE_TABLE:
1459                list_for_each_entry(pg, &m->priority_groups, list) {
1460                        DMEMIT("%s ", pg->ps.type->name);
1461
1462                        if (pg->ps.type->status)
1463                                sz += pg->ps.type->status(&pg->ps, NULL, type,
1464                                                          result + sz,
1465                                                          maxlen - sz);
1466                        else
1467                                DMEMIT("0 ");
1468
1469                        DMEMIT("%u %u ", pg->nr_pgpaths,
1470                               pg->ps.type->table_args);
1471
1472                        list_for_each_entry(p, &pg->pgpaths, list) {
1473                                DMEMIT("%s ", p->path.dev->name);
1474                                if (pg->ps.type->status)
1475                                        sz += pg->ps.type->status(&pg->ps,
1476                                              &p->path, type, result + sz,
1477                                              maxlen - sz);
1478                        }
1479                }
1480                break;
1481        }
1482
1483        spin_unlock_irqrestore(&m->lock, flags);
1484
1485        return 0;
1486}
1487
1488static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
1489{
1490        int r = -EINVAL;
1491        struct dm_dev *dev;
1492        struct multipath *m = (struct multipath *) ti->private;
1493        action_fn action;
1494
1495        mutex_lock(&m->work_mutex);
1496
1497        if (dm_suspended(ti)) {
1498                r = -EBUSY;
1499                goto out;
1500        }
1501
1502        if (argc == 1) {
1503                if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) {
1504                        r = queue_if_no_path(m, 1, 0);
1505                        goto out;
1506                } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) {
1507                        r = queue_if_no_path(m, 0, 0);
1508                        goto out;
1509                }
1510        }
1511
1512        if (argc != 2) {
1513                DMWARN("Unrecognised multipath message received.");
1514                goto out;
1515        }
1516
1517        if (!strnicmp(argv[0], MESG_STR("disable_group"))) {
1518                r = bypass_pg_num(m, argv[1], 1);
1519                goto out;
1520        } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) {
1521                r = bypass_pg_num(m, argv[1], 0);
1522                goto out;
1523        } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) {
1524                r = switch_pg_num(m, argv[1]);
1525                goto out;
1526        } else if (!strnicmp(argv[0], MESG_STR("reinstate_path")))
1527                action = reinstate_path;
1528        else if (!strnicmp(argv[0], MESG_STR("fail_path")))
1529                action = fail_path;
1530        else {
1531                DMWARN("Unrecognised multipath message received.");
1532                goto out;
1533        }
1534
1535        r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev);
1536        if (r) {
1537                DMWARN("message: error getting device %s",
1538                       argv[1]);
1539                goto out;
1540        }
1541
1542        r = action_dev(m, dev, action);
1543
1544        dm_put_device(ti, dev);
1545
1546out:
1547        mutex_unlock(&m->work_mutex);
1548        return r;
1549}
1550
1551static int multipath_ioctl(struct dm_target *ti, unsigned int cmd,
1552                           unsigned long arg)
1553{
1554        struct multipath *m = (struct multipath *) ti->private;
1555        struct block_device *bdev = NULL;
1556        fmode_t mode = 0;
1557        unsigned long flags;
1558        int r = 0;
1559
1560        spin_lock_irqsave(&m->lock, flags);
1561
1562        if (!m->current_pgpath)
1563                __choose_pgpath(m, 0);
1564
1565        if (m->current_pgpath) {
1566                bdev = m->current_pgpath->path.dev->bdev;
1567                mode = m->current_pgpath->path.dev->mode;
1568        }
1569
1570        if (m->queue_io)
1571                r = -EAGAIN;
1572        else if (!bdev)
1573                r = -EIO;
1574
1575        spin_unlock_irqrestore(&m->lock, flags);
1576
1577        return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg);
1578}
1579
1580static int multipath_iterate_devices(struct dm_target *ti,
1581                                     iterate_devices_callout_fn fn, void *data)
1582{
1583        struct multipath *m = ti->private;
1584        struct priority_group *pg;
1585        struct pgpath *p;
1586        int ret = 0;
1587
1588        list_for_each_entry(pg, &m->priority_groups, list) {
1589                list_for_each_entry(p, &pg->pgpaths, list) {
1590                        ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1591                        if (ret)
1592                                goto out;
1593                }
1594        }
1595
1596out:
1597        return ret;
1598}
1599
1600static int __pgpath_busy(struct pgpath *pgpath)
1601{
1602        struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
1603
1604        return dm_underlying_device_busy(q);
1605}
1606
1607/*
1608 * We return "busy", only when we can map I/Os but underlying devices
1609 * are busy (so even if we map I/Os now, the I/Os will wait on
1610 * the underlying queue).
1611 * In other words, if we want to kill I/Os or queue them inside us
1612 * due to map unavailability, we don't return "busy".  Otherwise,
1613 * dm core won't give us the I/Os and we can't do what we want.
1614 */
1615static int multipath_busy(struct dm_target *ti)
1616{
1617        int busy = 0, has_active = 0;
1618        struct multipath *m = ti->private;
1619        struct priority_group *pg;
1620        struct pgpath *pgpath;
1621        unsigned long flags;
1622
1623        spin_lock_irqsave(&m->lock, flags);
1624
1625        /* Guess which priority_group will be used at next mapping time */
1626        if (unlikely(!m->current_pgpath && m->next_pg))
1627                pg = m->next_pg;
1628        else if (likely(m->current_pg))
1629                pg = m->current_pg;
1630        else
1631                /*
1632                 * We don't know which pg will be used at next mapping time.
1633                 * We don't call __choose_pgpath() here to avoid to trigger
1634                 * pg_init just by busy checking.
1635                 * So we don't know whether underlying devices we will be using
1636                 * at next mapping time are busy or not. Just try mapping.
1637                 */
1638                goto out;
1639
1640        /*
1641         * If there is one non-busy active path at least, the path selector
1642         * will be able to select it. So we consider such a pg as not busy.
1643         */
1644        busy = 1;
1645        list_for_each_entry(pgpath, &pg->pgpaths, list)
1646                if (pgpath->is_active) {
1647                        has_active = 1;
1648
1649                        if (!__pgpath_busy(pgpath)) {
1650                                busy = 0;
1651                                break;
1652                        }
1653                }
1654
1655        if (!has_active)
1656                /*
1657                 * No active path in this pg, so this pg won't be used and
1658                 * the current_pg will be changed at next mapping time.
1659                 * We need to try mapping to determine it.
1660                 */
1661                busy = 0;
1662
1663out:
1664        spin_unlock_irqrestore(&m->lock, flags);
1665
1666        return busy;
1667}
1668
1669/*-----------------------------------------------------------------
1670 * Module setup
1671 *---------------------------------------------------------------*/
1672static struct target_type multipath_target = {
1673        .name = "multipath",
1674        .version = {1, 2, 0},
1675        .module = THIS_MODULE,
1676        .ctr = multipath_ctr,
1677        .dtr = multipath_dtr,
1678        .map_rq = multipath_map,
1679        .rq_end_io = multipath_end_io,
1680        .presuspend = multipath_presuspend,
1681        .postsuspend = multipath_postsuspend,
1682        .resume = multipath_resume,
1683        .status = multipath_status,
1684        .message = multipath_message,
1685        .ioctl  = multipath_ioctl,
1686        .iterate_devices = multipath_iterate_devices,
1687        .busy = multipath_busy,
1688};
1689
1690static int __init dm_multipath_init(void)
1691{
1692        int r;
1693
1694        /* allocate a slab for the dm_ios */
1695        _mpio_cache = KMEM_CACHE(dm_mpath_io, 0);
1696        if (!_mpio_cache)
1697                return -ENOMEM;
1698
1699        r = dm_register_target(&multipath_target);
1700        if (r < 0) {
1701                DMERR("register failed %d", r);
1702                kmem_cache_destroy(_mpio_cache);
1703                return -EINVAL;
1704        }
1705
1706        kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
1707        if (!kmultipathd) {
1708                DMERR("failed to create workqueue kmpathd");
1709                dm_unregister_target(&multipath_target);
1710                kmem_cache_destroy(_mpio_cache);
1711                return -ENOMEM;
1712        }
1713
1714        /*
1715         * A separate workqueue is used to handle the device handlers
1716         * to avoid overloading existing workqueue. Overloading the
1717         * old workqueue would also create a bottleneck in the
1718         * path of the storage hardware device activation.
1719         */
1720        kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
1721                                                  WQ_MEM_RECLAIM);
1722        if (!kmpath_handlerd) {
1723                DMERR("failed to create workqueue kmpath_handlerd");
1724                destroy_workqueue(kmultipathd);
1725                dm_unregister_target(&multipath_target);
1726                kmem_cache_destroy(_mpio_cache);
1727                return -ENOMEM;
1728        }
1729
1730        DMINFO("version %u.%u.%u loaded",
1731               multipath_target.version[0], multipath_target.version[1],
1732               multipath_target.version[2]);
1733
1734        return r;
1735}
1736
1737static void __exit dm_multipath_exit(void)
1738{
1739        destroy_workqueue(kmpath_handlerd);
1740        destroy_workqueue(kmultipathd);
1741
1742        dm_unregister_target(&multipath_target);
1743        kmem_cache_destroy(_mpio_cache);
1744}
1745
1746module_init(dm_multipath_init);
1747module_exit(dm_multipath_exit);
1748
1749MODULE_DESCRIPTION(DM_NAME " multipath target");
1750MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>");
1751MODULE_LICENSE("GPL");
1752