linux/drivers/scsi/device_handler/scsi_dh_alua.c
<<
>>
Prefs
   1/*
   2 * Generic SCSI-3 ALUA SCSI Device Handler
   3 *
   4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
   5 * All rights reserved.
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License as published by
   9 * the Free Software Foundation; either version 2 of the License, or
  10 * (at your option) any later version.
  11 *
  12 * This program is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 * GNU General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU General Public License
  18 * along with this program; if not, write to the Free Software
  19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20 *
  21 */
  22#include <linux/slab.h>
  23#include <linux/delay.h>
  24#include <linux/module.h>
  25#include <asm/unaligned.h>
  26#include <scsi/scsi.h>
  27#include <scsi/scsi_proto.h>
  28#include <scsi/scsi_dbg.h>
  29#include <scsi/scsi_eh.h>
  30#include <scsi/scsi_dh.h>
  31
  32#define ALUA_DH_NAME "alua"
  33#define ALUA_DH_VER "2.0"
  34
  35#define TPGS_SUPPORT_NONE               0x00
  36#define TPGS_SUPPORT_OPTIMIZED          0x01
  37#define TPGS_SUPPORT_NONOPTIMIZED       0x02
  38#define TPGS_SUPPORT_STANDBY            0x04
  39#define TPGS_SUPPORT_UNAVAILABLE        0x08
  40#define TPGS_SUPPORT_LBA_DEPENDENT      0x10
  41#define TPGS_SUPPORT_OFFLINE            0x40
  42#define TPGS_SUPPORT_TRANSITION         0x80
  43
  44#define RTPG_FMT_MASK                   0x70
  45#define RTPG_FMT_EXT_HDR                0x10
  46
  47#define TPGS_MODE_UNINITIALIZED          -1
  48#define TPGS_MODE_NONE                  0x0
  49#define TPGS_MODE_IMPLICIT              0x1
  50#define TPGS_MODE_EXPLICIT              0x2
  51
  52#define ALUA_RTPG_SIZE                  128
  53#define ALUA_FAILOVER_TIMEOUT           60
  54#define ALUA_FAILOVER_RETRIES           5
  55#define ALUA_RTPG_DELAY_MSECS           5
  56
  57/* device handler flags */
  58#define ALUA_OPTIMIZE_STPG              0x01
  59#define ALUA_RTPG_EXT_HDR_UNSUPP        0x02
  60/* State machine flags */
  61#define ALUA_PG_RUN_RTPG                0x10
  62#define ALUA_PG_RUN_STPG                0x20
  63#define ALUA_PG_RUNNING                 0x40
  64
  65static uint optimize_stpg;
  66module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
  67MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
  68
  69static LIST_HEAD(port_group_list);
  70static DEFINE_SPINLOCK(port_group_lock);
  71static struct workqueue_struct *kaluad_wq;
  72
  73struct alua_port_group {
  74        struct kref             kref;
  75        struct rcu_head         rcu;
  76        struct list_head        node;
  77        struct list_head        dh_list;
  78        unsigned char           device_id_str[256];
  79        int                     device_id_len;
  80        int                     group_id;
  81        int                     tpgs;
  82        int                     state;
  83        int                     pref;
  84        unsigned                flags; /* used for optimizing STPG */
  85        unsigned char           transition_tmo;
  86        unsigned long           expiry;
  87        unsigned long           interval;
  88        struct delayed_work     rtpg_work;
  89        spinlock_t              lock;
  90        struct list_head        rtpg_list;
  91        struct scsi_device      *rtpg_sdev;
  92};
  93
  94struct alua_dh_data {
  95        struct list_head        node;
  96        struct alua_port_group __rcu *pg;
  97        int                     group_id;
  98        spinlock_t              pg_lock;
  99        struct scsi_device      *sdev;
 100        int                     init_error;
 101        struct mutex            init_mutex;
 102};
 103
 104struct alua_queue_data {
 105        struct list_head        entry;
 106        activate_complete       callback_fn;
 107        void                    *callback_data;
 108};
 109
 110#define ALUA_POLICY_SWITCH_CURRENT      0
 111#define ALUA_POLICY_SWITCH_ALL          1
 112
 113static void alua_rtpg_work(struct work_struct *work);
 114static bool alua_rtpg_queue(struct alua_port_group *pg,
 115                            struct scsi_device *sdev,
 116                            struct alua_queue_data *qdata, bool force);
 117static void alua_check(struct scsi_device *sdev, bool force);
 118
 119static void release_port_group(struct kref *kref)
 120{
 121        struct alua_port_group *pg;
 122
 123        pg = container_of(kref, struct alua_port_group, kref);
 124        if (pg->rtpg_sdev)
 125                flush_delayed_work(&pg->rtpg_work);
 126        spin_lock(&port_group_lock);
 127        list_del(&pg->node);
 128        spin_unlock(&port_group_lock);
 129        kfree_rcu(pg, rcu);
 130}
 131
 132/*
 133 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
 134 * @sdev: sdev the command should be sent to
 135 */
 136static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 137                       int bufflen, struct scsi_sense_hdr *sshdr, int flags)
 138{
 139        u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)];
 140        int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 141                REQ_FAILFAST_DRIVER;
 142
 143        /* Prepare the command. */
 144        memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN));
 145        cdb[0] = MAINTENANCE_IN;
 146        if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
 147                cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
 148        else
 149                cdb[1] = MI_REPORT_TARGET_PGS;
 150        put_unaligned_be32(bufflen, &cdb[6]);
 151
 152        return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
 153                        sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
 154                        ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 155}
 156
 157/*
 158 * submit_stpg - Issue a SET TARGET PORT GROUP command
 159 *
 160 * Currently we're only setting the current target port group state
 161 * to 'active/optimized' and let the array firmware figure out
 162 * the states of the remaining groups.
 163 */
 164static int submit_stpg(struct scsi_device *sdev, int group_id,
 165                       struct scsi_sense_hdr *sshdr)
 166{
 167        u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)];
 168        unsigned char stpg_data[8];
 169        int stpg_len = 8;
 170        int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 171                REQ_FAILFAST_DRIVER;
 172
 173        /* Prepare the data buffer */
 174        memset(stpg_data, 0, stpg_len);
 175        stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
 176        put_unaligned_be16(group_id, &stpg_data[6]);
 177
 178        /* Prepare the command. */
 179        memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT));
 180        cdb[0] = MAINTENANCE_OUT;
 181        cdb[1] = MO_SET_TARGET_PGS;
 182        put_unaligned_be32(stpg_len, &cdb[6]);
 183
 184        return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
 185                        sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
 186                        ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 187}
 188
 189static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
 190                                                int group_id)
 191{
 192        struct alua_port_group *pg;
 193
 194        if (!id_str || !id_size || !strlen(id_str))
 195                return NULL;
 196
 197        list_for_each_entry(pg, &port_group_list, node) {
 198                if (pg->group_id != group_id)
 199                        continue;
 200                if (!pg->device_id_len || pg->device_id_len != id_size)
 201                        continue;
 202                if (strncmp(pg->device_id_str, id_str, id_size))
 203                        continue;
 204                if (!kref_get_unless_zero(&pg->kref))
 205                        continue;
 206                return pg;
 207        }
 208
 209        return NULL;
 210}
 211
 212/*
 213 * alua_alloc_pg - Allocate a new port_group structure
 214 * @sdev: scsi device
 215 * @h: alua device_handler data
 216 * @group_id: port group id
 217 *
 218 * Allocate a new port_group structure for a given
 219 * device.
 220 */
 221static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
 222                                             int group_id, int tpgs)
 223{
 224        struct alua_port_group *pg, *tmp_pg;
 225
 226        pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
 227        if (!pg)
 228                return ERR_PTR(-ENOMEM);
 229
 230        pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
 231                                            sizeof(pg->device_id_str));
 232        if (pg->device_id_len <= 0) {
 233                /*
 234                 * TPGS supported but no device identification found.
 235                 * Generate private device identification.
 236                 */
 237                sdev_printk(KERN_INFO, sdev,
 238                            "%s: No device descriptors found\n",
 239                            ALUA_DH_NAME);
 240                pg->device_id_str[0] = '\0';
 241                pg->device_id_len = 0;
 242        }
 243        pg->group_id = group_id;
 244        pg->tpgs = tpgs;
 245        pg->state = SCSI_ACCESS_STATE_OPTIMAL;
 246        if (optimize_stpg)
 247                pg->flags |= ALUA_OPTIMIZE_STPG;
 248        kref_init(&pg->kref);
 249        INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
 250        INIT_LIST_HEAD(&pg->rtpg_list);
 251        INIT_LIST_HEAD(&pg->node);
 252        INIT_LIST_HEAD(&pg->dh_list);
 253        spin_lock_init(&pg->lock);
 254
 255        spin_lock(&port_group_lock);
 256        tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
 257                                  group_id);
 258        if (tmp_pg) {
 259                spin_unlock(&port_group_lock);
 260                kfree(pg);
 261                return tmp_pg;
 262        }
 263
 264        list_add(&pg->node, &port_group_list);
 265        spin_unlock(&port_group_lock);
 266
 267        return pg;
 268}
 269
 270/*
 271 * alua_check_tpgs - Evaluate TPGS setting
 272 * @sdev: device to be checked
 273 *
 274 * Examine the TPGS setting of the sdev to find out if ALUA
 275 * is supported.
 276 */
 277static int alua_check_tpgs(struct scsi_device *sdev)
 278{
 279        int tpgs = TPGS_MODE_NONE;
 280
 281        /*
 282         * ALUA support for non-disk devices is fraught with
 283         * difficulties, so disable it for now.
 284         */
 285        if (sdev->type != TYPE_DISK) {
 286                sdev_printk(KERN_INFO, sdev,
 287                            "%s: disable for non-disk devices\n",
 288                            ALUA_DH_NAME);
 289                return tpgs;
 290        }
 291
 292        tpgs = scsi_device_tpgs(sdev);
 293        switch (tpgs) {
 294        case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
 295                sdev_printk(KERN_INFO, sdev,
 296                            "%s: supports implicit and explicit TPGS\n",
 297                            ALUA_DH_NAME);
 298                break;
 299        case TPGS_MODE_EXPLICIT:
 300                sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
 301                            ALUA_DH_NAME);
 302                break;
 303        case TPGS_MODE_IMPLICIT:
 304                sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
 305                            ALUA_DH_NAME);
 306                break;
 307        case TPGS_MODE_NONE:
 308                sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
 309                            ALUA_DH_NAME);
 310                break;
 311        default:
 312                sdev_printk(KERN_INFO, sdev,
 313                            "%s: unsupported TPGS setting %d\n",
 314                            ALUA_DH_NAME, tpgs);
 315                tpgs = TPGS_MODE_NONE;
 316                break;
 317        }
 318
 319        return tpgs;
 320}
 321
 322/*
 323 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
 324 * @sdev: device to be checked
 325 *
 326 * Extract the relative target port and the target port group
 327 * descriptor from the list of identificators.
 328 */
 329static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 330                          int tpgs)
 331{
 332        int rel_port = -1, group_id;
 333        struct alua_port_group *pg, *old_pg = NULL;
 334        bool pg_updated = false;
 335        unsigned long flags;
 336
 337        group_id = scsi_vpd_tpg_id(sdev, &rel_port);
 338        if (group_id < 0) {
 339                /*
 340                 * Internal error; TPGS supported but required
 341                 * VPD identification descriptors not present.
 342                 * Disable ALUA support
 343                 */
 344                sdev_printk(KERN_INFO, sdev,
 345                            "%s: No target port descriptors found\n",
 346                            ALUA_DH_NAME);
 347                return SCSI_DH_DEV_UNSUPP;
 348        }
 349
 350        pg = alua_alloc_pg(sdev, group_id, tpgs);
 351        if (IS_ERR(pg)) {
 352                if (PTR_ERR(pg) == -ENOMEM)
 353                        return SCSI_DH_NOMEM;
 354                return SCSI_DH_DEV_UNSUPP;
 355        }
 356        if (pg->device_id_len)
 357                sdev_printk(KERN_INFO, sdev,
 358                            "%s: device %s port group %x rel port %x\n",
 359                            ALUA_DH_NAME, pg->device_id_str,
 360                            group_id, rel_port);
 361        else
 362                sdev_printk(KERN_INFO, sdev,
 363                            "%s: port group %x rel port %x\n",
 364                            ALUA_DH_NAME, group_id, rel_port);
 365
 366        /* Check for existing port group references */
 367        spin_lock(&h->pg_lock);
 368        old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
 369        if (old_pg != pg) {
 370                /* port group has changed. Update to new port group */
 371                if (h->pg) {
 372                        spin_lock_irqsave(&old_pg->lock, flags);
 373                        list_del_rcu(&h->node);
 374                        spin_unlock_irqrestore(&old_pg->lock, flags);
 375                }
 376                rcu_assign_pointer(h->pg, pg);
 377                pg_updated = true;
 378        }
 379
 380        spin_lock_irqsave(&pg->lock, flags);
 381        if (pg_updated)
 382                list_add_rcu(&h->node, &pg->dh_list);
 383        spin_unlock_irqrestore(&pg->lock, flags);
 384
 385        alua_rtpg_queue(rcu_dereference_protected(h->pg,
 386                                                  lockdep_is_held(&h->pg_lock)),
 387                        sdev, NULL, true);
 388        spin_unlock(&h->pg_lock);
 389
 390        if (old_pg)
 391                kref_put(&old_pg->kref, release_port_group);
 392
 393        return SCSI_DH_OK;
 394}
 395
 396static char print_alua_state(unsigned char state)
 397{
 398        switch (state) {
 399        case SCSI_ACCESS_STATE_OPTIMAL:
 400                return 'A';
 401        case SCSI_ACCESS_STATE_ACTIVE:
 402                return 'N';
 403        case SCSI_ACCESS_STATE_STANDBY:
 404                return 'S';
 405        case SCSI_ACCESS_STATE_UNAVAILABLE:
 406                return 'U';
 407        case SCSI_ACCESS_STATE_LBA:
 408                return 'L';
 409        case SCSI_ACCESS_STATE_OFFLINE:
 410                return 'O';
 411        case SCSI_ACCESS_STATE_TRANSITIONING:
 412                return 'T';
 413        default:
 414                return 'X';
 415        }
 416}
 417
 418static int alua_check_sense(struct scsi_device *sdev,
 419                            struct scsi_sense_hdr *sense_hdr)
 420{
 421        switch (sense_hdr->sense_key) {
 422        case NOT_READY:
 423                if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
 424                        /*
 425                         * LUN Not Accessible - ALUA state transition
 426                         */
 427                        alua_check(sdev, false);
 428                        return NEEDS_RETRY;
 429                }
 430                break;
 431        case UNIT_ATTENTION:
 432                if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
 433                        /*
 434                         * Power On, Reset, or Bus Device Reset.
 435                         * Might have obscured a state transition,
 436                         * so schedule a recheck.
 437                         */
 438                        alua_check(sdev, true);
 439                        return ADD_TO_MLQUEUE;
 440                }
 441                if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
 442                        /*
 443                         * Device internal reset
 444                         */
 445                        return ADD_TO_MLQUEUE;
 446                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
 447                        /*
 448                         * Mode Parameters Changed
 449                         */
 450                        return ADD_TO_MLQUEUE;
 451                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
 452                        /*
 453                         * ALUA state changed
 454                         */
 455                        alua_check(sdev, true);
 456                        return ADD_TO_MLQUEUE;
 457                }
 458                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
 459                        /*
 460                         * Implicit ALUA state transition failed
 461                         */
 462                        alua_check(sdev, true);
 463                        return ADD_TO_MLQUEUE;
 464                }
 465                if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
 466                        /*
 467                         * Inquiry data has changed
 468                         */
 469                        return ADD_TO_MLQUEUE;
 470                if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
 471                        /*
 472                         * REPORTED_LUNS_DATA_HAS_CHANGED is reported
 473                         * when switching controllers on targets like
 474                         * Intel Multi-Flex. We can just retry.
 475                         */
 476                        return ADD_TO_MLQUEUE;
 477                break;
 478        }
 479
 480        return SCSI_RETURN_NOT_HANDLED;
 481}
 482
 483/*
 484 * alua_tur - Send a TEST UNIT READY
 485 * @sdev: device to which the TEST UNIT READY command should be send
 486 *
 487 * Send a TEST UNIT READY to @sdev to figure out the device state
 488 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
 489 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
 490 */
 491static int alua_tur(struct scsi_device *sdev)
 492{
 493        struct scsi_sense_hdr sense_hdr;
 494        int retval;
 495
 496        retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
 497                                      ALUA_FAILOVER_RETRIES, &sense_hdr);
 498        if (sense_hdr.sense_key == NOT_READY &&
 499            sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
 500                return SCSI_DH_RETRY;
 501        else if (retval)
 502                return SCSI_DH_IO;
 503        else
 504                return SCSI_DH_OK;
 505}
 506
 507/*
 508 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
 509 * @sdev: the device to be evaluated.
 510 *
 511 * Evaluate the Target Port Group State.
 512 * Returns SCSI_DH_DEV_OFFLINED if the path is
 513 * found to be unusable.
 514 */
 515static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 516{
 517        struct scsi_sense_hdr sense_hdr;
 518        struct alua_port_group *tmp_pg;
 519        int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE;
 520        unsigned char *desc, *buff;
 521        unsigned err, retval;
 522        unsigned int tpg_desc_tbl_off;
 523        unsigned char orig_transition_tmo;
 524        unsigned long flags;
 525
 526        if (!pg->expiry) {
 527                unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
 528
 529                if (pg->transition_tmo)
 530                        transition_tmo = pg->transition_tmo * HZ;
 531
 532                pg->expiry = round_jiffies_up(jiffies + transition_tmo);
 533        }
 534
 535        buff = kzalloc(bufflen, GFP_KERNEL);
 536        if (!buff)
 537                return SCSI_DH_DEV_TEMP_BUSY;
 538
 539 retry:
 540        err = 0;
 541        retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
 542
 543        if (retval) {
 544                if (!scsi_sense_valid(&sense_hdr)) {
 545                        sdev_printk(KERN_INFO, sdev,
 546                                    "%s: rtpg failed, result %d\n",
 547                                    ALUA_DH_NAME, retval);
 548                        kfree(buff);
 549                        if (driver_byte(retval) == DRIVER_ERROR)
 550                                return SCSI_DH_DEV_TEMP_BUSY;
 551                        return SCSI_DH_IO;
 552                }
 553
 554                /*
 555                 * submit_rtpg() has failed on existing arrays
 556                 * when requesting extended header info, and
 557                 * the array doesn't support extended headers,
 558                 * even though it shouldn't according to T10.
 559                 * The retry without rtpg_ext_hdr_req set
 560                 * handles this.
 561                 */
 562                if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
 563                    sense_hdr.sense_key == ILLEGAL_REQUEST &&
 564                    sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
 565                        pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
 566                        goto retry;
 567                }
 568                /*
 569                 * Retry on ALUA state transition or if any
 570                 * UNIT ATTENTION occurred.
 571                 */
 572                if (sense_hdr.sense_key == NOT_READY &&
 573                    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
 574                        err = SCSI_DH_RETRY;
 575                else if (sense_hdr.sense_key == UNIT_ATTENTION)
 576                        err = SCSI_DH_RETRY;
 577                if (err == SCSI_DH_RETRY &&
 578                    pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
 579                        sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
 580                                    ALUA_DH_NAME);
 581                        scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 582                        kfree(buff);
 583                        return err;
 584                }
 585                sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
 586                            ALUA_DH_NAME);
 587                scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 588                kfree(buff);
 589                pg->expiry = 0;
 590                return SCSI_DH_IO;
 591        }
 592
 593        len = get_unaligned_be32(&buff[0]) + 4;
 594
 595        if (len > bufflen) {
 596                /* Resubmit with the correct length */
 597                kfree(buff);
 598                bufflen = len;
 599                buff = kmalloc(bufflen, GFP_KERNEL);
 600                if (!buff) {
 601                        sdev_printk(KERN_WARNING, sdev,
 602                                    "%s: kmalloc buffer failed\n",__func__);
 603                        /* Temporary failure, bypass */
 604                        pg->expiry = 0;
 605                        return SCSI_DH_DEV_TEMP_BUSY;
 606                }
 607                goto retry;
 608        }
 609
 610        orig_transition_tmo = pg->transition_tmo;
 611        if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
 612                pg->transition_tmo = buff[5];
 613        else
 614                pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
 615
 616        if (orig_transition_tmo != pg->transition_tmo) {
 617                sdev_printk(KERN_INFO, sdev,
 618                            "%s: transition timeout set to %d seconds\n",
 619                            ALUA_DH_NAME, pg->transition_tmo);
 620                pg->expiry = jiffies + pg->transition_tmo * HZ;
 621        }
 622
 623        if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
 624                tpg_desc_tbl_off = 8;
 625        else
 626                tpg_desc_tbl_off = 4;
 627
 628        for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
 629             k < len;
 630             k += off, desc += off) {
 631                u16 group_id = get_unaligned_be16(&desc[2]);
 632
 633                spin_lock_irqsave(&port_group_lock, flags);
 634                tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
 635                                          group_id);
 636                spin_unlock_irqrestore(&port_group_lock, flags);
 637                if (tmp_pg) {
 638                        if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
 639                                if ((tmp_pg == pg) ||
 640                                    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
 641                                        struct alua_dh_data *h;
 642
 643                                        tmp_pg->state = desc[0] & 0x0f;
 644                                        tmp_pg->pref = desc[0] >> 7;
 645                                        rcu_read_lock();
 646                                        list_for_each_entry_rcu(h,
 647                                                &tmp_pg->dh_list, node) {
 648                                                /* h->sdev should always be valid */
 649                                                BUG_ON(!h->sdev);
 650                                                h->sdev->access_state = desc[0];
 651                                        }
 652                                        rcu_read_unlock();
 653                                }
 654                                if (tmp_pg == pg)
 655                                        valid_states = desc[1];
 656                                spin_unlock_irqrestore(&tmp_pg->lock, flags);
 657                        }
 658                        kref_put(&tmp_pg->kref, release_port_group);
 659                }
 660                off = 8 + (desc[7] * 4);
 661        }
 662
 663        spin_lock_irqsave(&pg->lock, flags);
 664        sdev_printk(KERN_INFO, sdev,
 665                    "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
 666                    ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
 667                    pg->pref ? "preferred" : "non-preferred",
 668                    valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
 669                    valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
 670                    valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
 671                    valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
 672                    valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
 673                    valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
 674                    valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
 675
 676        switch (pg->state) {
 677        case SCSI_ACCESS_STATE_TRANSITIONING:
 678                if (time_before(jiffies, pg->expiry)) {
 679                        /* State transition, retry */
 680                        pg->interval = 2;
 681                        err = SCSI_DH_RETRY;
 682                } else {
 683                        struct alua_dh_data *h;
 684
 685                        /* Transitioning time exceeded, set port to standby */
 686                        err = SCSI_DH_IO;
 687                        pg->state = SCSI_ACCESS_STATE_STANDBY;
 688                        pg->expiry = 0;
 689                        rcu_read_lock();
 690                        list_for_each_entry_rcu(h, &pg->dh_list, node) {
 691                                BUG_ON(!h->sdev);
 692                                h->sdev->access_state =
 693                                        (pg->state & SCSI_ACCESS_STATE_MASK);
 694                                if (pg->pref)
 695                                        h->sdev->access_state |=
 696                                                SCSI_ACCESS_STATE_PREFERRED;
 697                        }
 698                        rcu_read_unlock();
 699                }
 700                break;
 701        case SCSI_ACCESS_STATE_OFFLINE:
 702                /* Path unusable */
 703                err = SCSI_DH_DEV_OFFLINED;
 704                pg->expiry = 0;
 705                break;
 706        default:
 707                /* Useable path if active */
 708                err = SCSI_DH_OK;
 709                pg->expiry = 0;
 710                break;
 711        }
 712        spin_unlock_irqrestore(&pg->lock, flags);
 713        kfree(buff);
 714        return err;
 715}
 716
 717/*
 718 * alua_stpg - Issue a SET TARGET PORT GROUP command
 719 *
 720 * Issue a SET TARGET PORT GROUP command and evaluate the
 721 * response. Returns SCSI_DH_RETRY per default to trigger
 722 * a re-evaluation of the target group state or SCSI_DH_OK
 723 * if no further action needs to be taken.
 724 */
 725static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
 726{
 727        int retval;
 728        struct scsi_sense_hdr sense_hdr;
 729
 730        if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
 731                /* Only implicit ALUA supported, retry */
 732                return SCSI_DH_RETRY;
 733        }
 734        switch (pg->state) {
 735        case SCSI_ACCESS_STATE_OPTIMAL:
 736                return SCSI_DH_OK;
 737        case SCSI_ACCESS_STATE_ACTIVE:
 738                if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
 739                    !pg->pref &&
 740                    (pg->tpgs & TPGS_MODE_IMPLICIT))
 741                        return SCSI_DH_OK;
 742                break;
 743        case SCSI_ACCESS_STATE_STANDBY:
 744        case SCSI_ACCESS_STATE_UNAVAILABLE:
 745                break;
 746        case SCSI_ACCESS_STATE_OFFLINE:
 747                return SCSI_DH_IO;
 748        case SCSI_ACCESS_STATE_TRANSITIONING:
 749                break;
 750        default:
 751                sdev_printk(KERN_INFO, sdev,
 752                            "%s: stpg failed, unhandled TPGS state %d",
 753                            ALUA_DH_NAME, pg->state);
 754                return SCSI_DH_NOSYS;
 755        }
 756        retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
 757
 758        if (retval) {
 759                if (!scsi_sense_valid(&sense_hdr)) {
 760                        sdev_printk(KERN_INFO, sdev,
 761                                    "%s: stpg failed, result %d",
 762                                    ALUA_DH_NAME, retval);
 763                        if (driver_byte(retval) == DRIVER_ERROR)
 764                                return SCSI_DH_DEV_TEMP_BUSY;
 765                } else {
 766                        sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
 767                                    ALUA_DH_NAME);
 768                        scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 769                }
 770        }
 771        /* Retry RTPG */
 772        return SCSI_DH_RETRY;
 773}
 774
 775static void alua_rtpg_work(struct work_struct *work)
 776{
 777        struct alua_port_group *pg =
 778                container_of(work, struct alua_port_group, rtpg_work.work);
 779        struct scsi_device *sdev;
 780        LIST_HEAD(qdata_list);
 781        int err = SCSI_DH_OK;
 782        struct alua_queue_data *qdata, *tmp;
 783        unsigned long flags;
 784
 785        spin_lock_irqsave(&pg->lock, flags);
 786        sdev = pg->rtpg_sdev;
 787        if (!sdev) {
 788                WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
 789                WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
 790                spin_unlock_irqrestore(&pg->lock, flags);
 791                kref_put(&pg->kref, release_port_group);
 792                return;
 793        }
 794        pg->flags |= ALUA_PG_RUNNING;
 795        if (pg->flags & ALUA_PG_RUN_RTPG) {
 796                int state = pg->state;
 797
 798                pg->flags &= ~ALUA_PG_RUN_RTPG;
 799                spin_unlock_irqrestore(&pg->lock, flags);
 800                if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
 801                        if (alua_tur(sdev) == SCSI_DH_RETRY) {
 802                                spin_lock_irqsave(&pg->lock, flags);
 803                                pg->flags &= ~ALUA_PG_RUNNING;
 804                                pg->flags |= ALUA_PG_RUN_RTPG;
 805                                spin_unlock_irqrestore(&pg->lock, flags);
 806                                queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 807                                                   pg->interval * HZ);
 808                                return;
 809                        }
 810                        /* Send RTPG on failure or if TUR indicates SUCCESS */
 811                }
 812                err = alua_rtpg(sdev, pg);
 813                spin_lock_irqsave(&pg->lock, flags);
 814                if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
 815                        pg->flags &= ~ALUA_PG_RUNNING;
 816                        pg->flags |= ALUA_PG_RUN_RTPG;
 817                        spin_unlock_irqrestore(&pg->lock, flags);
 818                        queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 819                                           pg->interval * HZ);
 820                        return;
 821                }
 822                if (err != SCSI_DH_OK)
 823                        pg->flags &= ~ALUA_PG_RUN_STPG;
 824        }
 825        if (pg->flags & ALUA_PG_RUN_STPG) {
 826                pg->flags &= ~ALUA_PG_RUN_STPG;
 827                spin_unlock_irqrestore(&pg->lock, flags);
 828                err = alua_stpg(sdev, pg);
 829                spin_lock_irqsave(&pg->lock, flags);
 830                if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
 831                        pg->flags |= ALUA_PG_RUN_RTPG;
 832                        pg->interval = 0;
 833                        pg->flags &= ~ALUA_PG_RUNNING;
 834                        spin_unlock_irqrestore(&pg->lock, flags);
 835                        queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 836                                           pg->interval * HZ);
 837                        return;
 838                }
 839        }
 840
 841        list_splice_init(&pg->rtpg_list, &qdata_list);
 842        pg->rtpg_sdev = NULL;
 843        spin_unlock_irqrestore(&pg->lock, flags);
 844
 845        list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
 846                list_del(&qdata->entry);
 847                if (qdata->callback_fn)
 848                        qdata->callback_fn(qdata->callback_data, err);
 849                kfree(qdata);
 850        }
 851        spin_lock_irqsave(&pg->lock, flags);
 852        pg->flags &= ~ALUA_PG_RUNNING;
 853        spin_unlock_irqrestore(&pg->lock, flags);
 854        scsi_device_put(sdev);
 855        kref_put(&pg->kref, release_port_group);
 856}
 857
 858/**
 859 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
 860 *
 861 * Returns true if and only if alua_rtpg_work() will be called asynchronously.
 862 * That function is responsible for calling @qdata->fn().
 863 */
 864static bool alua_rtpg_queue(struct alua_port_group *pg,
 865                            struct scsi_device *sdev,
 866                            struct alua_queue_data *qdata, bool force)
 867{
 868        int start_queue = 0;
 869        unsigned long flags;
 870        if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
 871                return false;
 872
 873        spin_lock_irqsave(&pg->lock, flags);
 874        if (qdata) {
 875                list_add_tail(&qdata->entry, &pg->rtpg_list);
 876                pg->flags |= ALUA_PG_RUN_STPG;
 877                force = true;
 878        }
 879        if (pg->rtpg_sdev == NULL) {
 880                pg->interval = 0;
 881                pg->flags |= ALUA_PG_RUN_RTPG;
 882                kref_get(&pg->kref);
 883                pg->rtpg_sdev = sdev;
 884                start_queue = 1;
 885        } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
 886                pg->flags |= ALUA_PG_RUN_RTPG;
 887                /* Do not queue if the worker is already running */
 888                if (!(pg->flags & ALUA_PG_RUNNING)) {
 889                        kref_get(&pg->kref);
 890                        start_queue = 1;
 891                }
 892        }
 893
 894        spin_unlock_irqrestore(&pg->lock, flags);
 895
 896        if (start_queue) {
 897                if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 898                                msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
 899                        sdev = NULL;
 900                else
 901                        kref_put(&pg->kref, release_port_group);
 902        }
 903        if (sdev)
 904                scsi_device_put(sdev);
 905
 906        return true;
 907}
 908
 909/*
 910 * alua_initialize - Initialize ALUA state
 911 * @sdev: the device to be initialized
 912 *
 913 * For the prep_fn to work correctly we have
 914 * to initialize the ALUA state for the device.
 915 */
 916static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
 917{
 918        int err = SCSI_DH_DEV_UNSUPP, tpgs;
 919
 920        mutex_lock(&h->init_mutex);
 921        tpgs = alua_check_tpgs(sdev);
 922        if (tpgs != TPGS_MODE_NONE)
 923                err = alua_check_vpd(sdev, h, tpgs);
 924        h->init_error = err;
 925        mutex_unlock(&h->init_mutex);
 926        return err;
 927}
 928/*
 929 * alua_set_params - set/unset the optimize flag
 930 * @sdev: device on the path to be activated
 931 * params - parameters in the following format
 932 *      "no_of_params\0param1\0param2\0param3\0...\0"
 933 * For example, to set the flag pass the following parameters
 934 * from multipath.conf
 935 *     hardware_handler        "2 alua 1"
 936 */
 937static int alua_set_params(struct scsi_device *sdev, const char *params)
 938{
 939        struct alua_dh_data *h = sdev->handler_data;
 940        struct alua_port_group *pg = NULL;
 941        unsigned int optimize = 0, argc;
 942        const char *p = params;
 943        int result = SCSI_DH_OK;
 944        unsigned long flags;
 945
 946        if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
 947                return -EINVAL;
 948
 949        while (*p++)
 950                ;
 951        if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
 952                return -EINVAL;
 953
 954        rcu_read_lock();
 955        pg = rcu_dereference(h->pg);
 956        if (!pg) {
 957                rcu_read_unlock();
 958                return -ENXIO;
 959        }
 960        spin_lock_irqsave(&pg->lock, flags);
 961        if (optimize)
 962                pg->flags |= ALUA_OPTIMIZE_STPG;
 963        else
 964                pg->flags &= ~ALUA_OPTIMIZE_STPG;
 965        spin_unlock_irqrestore(&pg->lock, flags);
 966        rcu_read_unlock();
 967
 968        return result;
 969}
 970
 971/*
 972 * alua_activate - activate a path
 973 * @sdev: device on the path to be activated
 974 *
 975 * We're currently switching the port group to be activated only and
 976 * let the array figure out the rest.
 977 * There may be other arrays which require us to switch all port groups
 978 * based on a certain policy. But until we actually encounter them it
 979 * should be okay.
 980 */
 981static int alua_activate(struct scsi_device *sdev,
 982                        activate_complete fn, void *data)
 983{
 984        struct alua_dh_data *h = sdev->handler_data;
 985        int err = SCSI_DH_OK;
 986        struct alua_queue_data *qdata;
 987        struct alua_port_group *pg;
 988
 989        qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
 990        if (!qdata) {
 991                err = SCSI_DH_RES_TEMP_UNAVAIL;
 992                goto out;
 993        }
 994        qdata->callback_fn = fn;
 995        qdata->callback_data = data;
 996
 997        mutex_lock(&h->init_mutex);
 998        rcu_read_lock();
 999        pg = rcu_dereference(h->pg);
1000        if (!pg || !kref_get_unless_zero(&pg->kref)) {
1001                rcu_read_unlock();
1002                kfree(qdata);
1003                err = h->init_error;
1004                mutex_unlock(&h->init_mutex);
1005                goto out;
1006        }
1007        rcu_read_unlock();
1008        mutex_unlock(&h->init_mutex);
1009
1010        if (alua_rtpg_queue(pg, sdev, qdata, true))
1011                fn = NULL;
1012        else
1013                err = SCSI_DH_DEV_OFFLINED;
1014        kref_put(&pg->kref, release_port_group);
1015out:
1016        if (fn)
1017                fn(data, err);
1018        return 0;
1019}
1020
1021/*
1022 * alua_check - check path status
1023 * @sdev: device on the path to be checked
1024 *
1025 * Check the device status
1026 */
1027static void alua_check(struct scsi_device *sdev, bool force)
1028{
1029        struct alua_dh_data *h = sdev->handler_data;
1030        struct alua_port_group *pg;
1031
1032        rcu_read_lock();
1033        pg = rcu_dereference(h->pg);
1034        if (!pg || !kref_get_unless_zero(&pg->kref)) {
1035                rcu_read_unlock();
1036                return;
1037        }
1038        rcu_read_unlock();
1039
1040        alua_rtpg_queue(pg, sdev, NULL, force);
1041        kref_put(&pg->kref, release_port_group);
1042}
1043
1044/*
1045 * alua_prep_fn - request callback
1046 *
1047 * Fail I/O to all paths not in state
1048 * active/optimized or active/non-optimized.
1049 */
1050static int alua_prep_fn(struct scsi_device *sdev, struct request *req)
1051{
1052        struct alua_dh_data *h = sdev->handler_data;
1053        struct alua_port_group *pg;
1054        unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
1055        int ret = BLKPREP_OK;
1056
1057        rcu_read_lock();
1058        pg = rcu_dereference(h->pg);
1059        if (pg)
1060                state = pg->state;
1061        rcu_read_unlock();
1062        if (state == SCSI_ACCESS_STATE_TRANSITIONING)
1063                ret = BLKPREP_DEFER;
1064        else if (state != SCSI_ACCESS_STATE_OPTIMAL &&
1065                 state != SCSI_ACCESS_STATE_ACTIVE &&
1066                 state != SCSI_ACCESS_STATE_LBA) {
1067                ret = BLKPREP_KILL;
1068                req->rq_flags |= RQF_QUIET;
1069        }
1070        return ret;
1071
1072}
1073
1074static void alua_rescan(struct scsi_device *sdev)
1075{
1076        struct alua_dh_data *h = sdev->handler_data;
1077
1078        alua_initialize(sdev, h);
1079}
1080
1081/*
1082 * alua_bus_attach - Attach device handler
1083 * @sdev: device to be attached to
1084 */
1085static int alua_bus_attach(struct scsi_device *sdev)
1086{
1087        struct alua_dh_data *h;
1088        int err, ret = -EINVAL;
1089
1090        h = kzalloc(sizeof(*h) , GFP_KERNEL);
1091        if (!h)
1092                return -ENOMEM;
1093        spin_lock_init(&h->pg_lock);
1094        rcu_assign_pointer(h->pg, NULL);
1095        h->init_error = SCSI_DH_OK;
1096        h->sdev = sdev;
1097        INIT_LIST_HEAD(&h->node);
1098
1099        mutex_init(&h->init_mutex);
1100        err = alua_initialize(sdev, h);
1101        if (err == SCSI_DH_NOMEM)
1102                ret = -ENOMEM;
1103        if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
1104                goto failed;
1105
1106        sdev->handler_data = h;
1107        return 0;
1108failed:
1109        kfree(h);
1110        return ret;
1111}
1112
1113/*
1114 * alua_bus_detach - Detach device handler
1115 * @sdev: device to be detached from
1116 */
1117static void alua_bus_detach(struct scsi_device *sdev)
1118{
1119        struct alua_dh_data *h = sdev->handler_data;
1120        struct alua_port_group *pg;
1121
1122        spin_lock(&h->pg_lock);
1123        pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
1124        rcu_assign_pointer(h->pg, NULL);
1125        h->sdev = NULL;
1126        spin_unlock(&h->pg_lock);
1127        if (pg) {
1128                spin_lock_irq(&pg->lock);
1129                list_del_rcu(&h->node);
1130                spin_unlock_irq(&pg->lock);
1131                kref_put(&pg->kref, release_port_group);
1132        }
1133        sdev->handler_data = NULL;
1134        kfree(h);
1135}
1136
1137static struct scsi_device_handler alua_dh = {
1138        .name = ALUA_DH_NAME,
1139        .module = THIS_MODULE,
1140        .attach = alua_bus_attach,
1141        .detach = alua_bus_detach,
1142        .prep_fn = alua_prep_fn,
1143        .check_sense = alua_check_sense,
1144        .activate = alua_activate,
1145        .rescan = alua_rescan,
1146        .set_params = alua_set_params,
1147};
1148
1149static int __init alua_init(void)
1150{
1151        int r;
1152
1153        kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
1154        if (!kaluad_wq) {
1155                /* Temporary failure, bypass */
1156                return SCSI_DH_DEV_TEMP_BUSY;
1157        }
1158
1159        r = scsi_register_device_handler(&alua_dh);
1160        if (r != 0) {
1161                printk(KERN_ERR "%s: Failed to register scsi device handler",
1162                        ALUA_DH_NAME);
1163                destroy_workqueue(kaluad_wq);
1164        }
1165        return r;
1166}
1167
1168static void __exit alua_exit(void)
1169{
1170        scsi_unregister_device_handler(&alua_dh);
1171        destroy_workqueue(kaluad_wq);
1172}
1173
1174module_init(alua_init);
1175module_exit(alua_exit);
1176
1177MODULE_DESCRIPTION("DM Multipath ALUA support");
1178MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1179MODULE_LICENSE("GPL");
1180MODULE_VERSION(ALUA_DH_VER);
1181