linux/drivers/scsi/device_handler/scsi_dh_alua.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Generic SCSI-3 ALUA SCSI Device Handler
   4 *
   5 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH.
   6 * All rights reserved.
   7 */
   8#include <linux/slab.h>
   9#include <linux/delay.h>
  10#include <linux/module.h>
  11#include <asm/unaligned.h>
  12#include <scsi/scsi.h>
  13#include <scsi/scsi_proto.h>
  14#include <scsi/scsi_dbg.h>
  15#include <scsi/scsi_eh.h>
  16#include <scsi/scsi_dh.h>
  17
  18#define ALUA_DH_NAME "alua"
  19#define ALUA_DH_VER "2.0"
  20
  21#define TPGS_SUPPORT_NONE               0x00
  22#define TPGS_SUPPORT_OPTIMIZED          0x01
  23#define TPGS_SUPPORT_NONOPTIMIZED       0x02
  24#define TPGS_SUPPORT_STANDBY            0x04
  25#define TPGS_SUPPORT_UNAVAILABLE        0x08
  26#define TPGS_SUPPORT_LBA_DEPENDENT      0x10
  27#define TPGS_SUPPORT_OFFLINE            0x40
  28#define TPGS_SUPPORT_TRANSITION         0x80
  29#define TPGS_SUPPORT_ALL                0xdf
  30
  31#define RTPG_FMT_MASK                   0x70
  32#define RTPG_FMT_EXT_HDR                0x10
  33
  34#define TPGS_MODE_UNINITIALIZED          -1
  35#define TPGS_MODE_NONE                  0x0
  36#define TPGS_MODE_IMPLICIT              0x1
  37#define TPGS_MODE_EXPLICIT              0x2
  38
  39#define ALUA_RTPG_SIZE                  128
  40#define ALUA_FAILOVER_TIMEOUT           60
  41#define ALUA_FAILOVER_RETRIES           5
  42#define ALUA_RTPG_DELAY_MSECS           5
  43#define ALUA_RTPG_RETRY_DELAY           2
  44
  45/* device handler flags */
  46#define ALUA_OPTIMIZE_STPG              0x01
  47#define ALUA_RTPG_EXT_HDR_UNSUPP        0x02
  48/* State machine flags */
  49#define ALUA_PG_RUN_RTPG                0x10
  50#define ALUA_PG_RUN_STPG                0x20
  51#define ALUA_PG_RUNNING                 0x40
  52
  53static uint optimize_stpg;
  54module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR);
  55MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0.");
  56
  57static LIST_HEAD(port_group_list);
  58static DEFINE_SPINLOCK(port_group_lock);
  59static struct workqueue_struct *kaluad_wq;
  60
  61struct alua_port_group {
  62        struct kref             kref;
  63        struct rcu_head         rcu;
  64        struct list_head        node;
  65        struct list_head        dh_list;
  66        unsigned char           device_id_str[256];
  67        int                     device_id_len;
  68        int                     group_id;
  69        int                     tpgs;
  70        int                     state;
  71        int                     pref;
  72        int                     valid_states;
  73        unsigned                flags; /* used for optimizing STPG */
  74        unsigned char           transition_tmo;
  75        unsigned long           expiry;
  76        unsigned long           interval;
  77        struct delayed_work     rtpg_work;
  78        spinlock_t              lock;
  79        struct list_head        rtpg_list;
  80        struct scsi_device      *rtpg_sdev;
  81};
  82
  83struct alua_dh_data {
  84        struct list_head        node;
  85        struct alua_port_group __rcu *pg;
  86        int                     group_id;
  87        spinlock_t              pg_lock;
  88        struct scsi_device      *sdev;
  89        int                     init_error;
  90        struct mutex            init_mutex;
  91};
  92
  93struct alua_queue_data {
  94        struct list_head        entry;
  95        activate_complete       callback_fn;
  96        void                    *callback_data;
  97};
  98
  99#define ALUA_POLICY_SWITCH_CURRENT      0
 100#define ALUA_POLICY_SWITCH_ALL          1
 101
 102static void alua_rtpg_work(struct work_struct *work);
 103static bool alua_rtpg_queue(struct alua_port_group *pg,
 104                            struct scsi_device *sdev,
 105                            struct alua_queue_data *qdata, bool force);
 106static void alua_check(struct scsi_device *sdev, bool force);
 107
 108static void release_port_group(struct kref *kref)
 109{
 110        struct alua_port_group *pg;
 111
 112        pg = container_of(kref, struct alua_port_group, kref);
 113        if (pg->rtpg_sdev)
 114                flush_delayed_work(&pg->rtpg_work);
 115        spin_lock(&port_group_lock);
 116        list_del(&pg->node);
 117        spin_unlock(&port_group_lock);
 118        kfree_rcu(pg, rcu);
 119}
 120
 121/*
 122 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command
 123 * @sdev: sdev the command should be sent to
 124 */
 125static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff,
 126                       int bufflen, struct scsi_sense_hdr *sshdr, int flags)
 127{
 128        u8 cdb[MAX_COMMAND_SIZE];
 129        int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 130                REQ_FAILFAST_DRIVER;
 131
 132        /* Prepare the command. */
 133        memset(cdb, 0x0, MAX_COMMAND_SIZE);
 134        cdb[0] = MAINTENANCE_IN;
 135        if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP))
 136                cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT;
 137        else
 138                cdb[1] = MI_REPORT_TARGET_PGS;
 139        put_unaligned_be32(bufflen, &cdb[6]);
 140
 141        return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL,
 142                        sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
 143                        ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 144}
 145
 146/*
 147 * submit_stpg - Issue a SET TARGET PORT GROUP command
 148 *
 149 * Currently we're only setting the current target port group state
 150 * to 'active/optimized' and let the array firmware figure out
 151 * the states of the remaining groups.
 152 */
 153static int submit_stpg(struct scsi_device *sdev, int group_id,
 154                       struct scsi_sense_hdr *sshdr)
 155{
 156        u8 cdb[MAX_COMMAND_SIZE];
 157        unsigned char stpg_data[8];
 158        int stpg_len = 8;
 159        int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
 160                REQ_FAILFAST_DRIVER;
 161
 162        /* Prepare the data buffer */
 163        memset(stpg_data, 0, stpg_len);
 164        stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL;
 165        put_unaligned_be16(group_id, &stpg_data[6]);
 166
 167        /* Prepare the command. */
 168        memset(cdb, 0x0, MAX_COMMAND_SIZE);
 169        cdb[0] = MAINTENANCE_OUT;
 170        cdb[1] = MO_SET_TARGET_PGS;
 171        put_unaligned_be32(stpg_len, &cdb[6]);
 172
 173        return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL,
 174                        sshdr, ALUA_FAILOVER_TIMEOUT * HZ,
 175                        ALUA_FAILOVER_RETRIES, req_flags, 0, NULL);
 176}
 177
 178static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size,
 179                                                int group_id)
 180{
 181        struct alua_port_group *pg;
 182
 183        if (!id_str || !id_size || !strlen(id_str))
 184                return NULL;
 185
 186        list_for_each_entry(pg, &port_group_list, node) {
 187                if (pg->group_id != group_id)
 188                        continue;
 189                if (!pg->device_id_len || pg->device_id_len != id_size)
 190                        continue;
 191                if (strncmp(pg->device_id_str, id_str, id_size))
 192                        continue;
 193                if (!kref_get_unless_zero(&pg->kref))
 194                        continue;
 195                return pg;
 196        }
 197
 198        return NULL;
 199}
 200
 201/*
 202 * alua_alloc_pg - Allocate a new port_group structure
 203 * @sdev: scsi device
 204 * @group_id: port group id
 205 * @tpgs: target port group settings
 206 *
 207 * Allocate a new port_group structure for a given
 208 * device.
 209 */
 210static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev,
 211                                             int group_id, int tpgs)
 212{
 213        struct alua_port_group *pg, *tmp_pg;
 214
 215        pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
 216        if (!pg)
 217                return ERR_PTR(-ENOMEM);
 218
 219        pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str,
 220                                            sizeof(pg->device_id_str));
 221        if (pg->device_id_len <= 0) {
 222                /*
 223                 * TPGS supported but no device identification found.
 224                 * Generate private device identification.
 225                 */
 226                sdev_printk(KERN_INFO, sdev,
 227                            "%s: No device descriptors found\n",
 228                            ALUA_DH_NAME);
 229                pg->device_id_str[0] = '\0';
 230                pg->device_id_len = 0;
 231        }
 232        pg->group_id = group_id;
 233        pg->tpgs = tpgs;
 234        pg->state = SCSI_ACCESS_STATE_OPTIMAL;
 235        pg->valid_states = TPGS_SUPPORT_ALL;
 236        if (optimize_stpg)
 237                pg->flags |= ALUA_OPTIMIZE_STPG;
 238        kref_init(&pg->kref);
 239        INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work);
 240        INIT_LIST_HEAD(&pg->rtpg_list);
 241        INIT_LIST_HEAD(&pg->node);
 242        INIT_LIST_HEAD(&pg->dh_list);
 243        spin_lock_init(&pg->lock);
 244
 245        spin_lock(&port_group_lock);
 246        tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
 247                                  group_id);
 248        if (tmp_pg) {
 249                spin_unlock(&port_group_lock);
 250                kfree(pg);
 251                return tmp_pg;
 252        }
 253
 254        list_add(&pg->node, &port_group_list);
 255        spin_unlock(&port_group_lock);
 256
 257        return pg;
 258}
 259
 260/*
 261 * alua_check_tpgs - Evaluate TPGS setting
 262 * @sdev: device to be checked
 263 *
 264 * Examine the TPGS setting of the sdev to find out if ALUA
 265 * is supported.
 266 */
 267static int alua_check_tpgs(struct scsi_device *sdev)
 268{
 269        int tpgs = TPGS_MODE_NONE;
 270
 271        /*
 272         * ALUA support for non-disk devices is fraught with
 273         * difficulties, so disable it for now.
 274         */
 275        if (sdev->type != TYPE_DISK) {
 276                sdev_printk(KERN_INFO, sdev,
 277                            "%s: disable for non-disk devices\n",
 278                            ALUA_DH_NAME);
 279                return tpgs;
 280        }
 281
 282        tpgs = scsi_device_tpgs(sdev);
 283        switch (tpgs) {
 284        case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT:
 285                sdev_printk(KERN_INFO, sdev,
 286                            "%s: supports implicit and explicit TPGS\n",
 287                            ALUA_DH_NAME);
 288                break;
 289        case TPGS_MODE_EXPLICIT:
 290                sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n",
 291                            ALUA_DH_NAME);
 292                break;
 293        case TPGS_MODE_IMPLICIT:
 294                sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n",
 295                            ALUA_DH_NAME);
 296                break;
 297        case TPGS_MODE_NONE:
 298                sdev_printk(KERN_INFO, sdev, "%s: not supported\n",
 299                            ALUA_DH_NAME);
 300                break;
 301        default:
 302                sdev_printk(KERN_INFO, sdev,
 303                            "%s: unsupported TPGS setting %d\n",
 304                            ALUA_DH_NAME, tpgs);
 305                tpgs = TPGS_MODE_NONE;
 306                break;
 307        }
 308
 309        return tpgs;
 310}
 311
 312/*
 313 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83
 314 * @sdev: device to be checked
 315 *
 316 * Extract the relative target port and the target port group
 317 * descriptor from the list of identificators.
 318 */
 319static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 320                          int tpgs)
 321{
 322        int rel_port = -1, group_id;
 323        struct alua_port_group *pg, *old_pg = NULL;
 324        bool pg_updated = false;
 325        unsigned long flags;
 326
 327        group_id = scsi_vpd_tpg_id(sdev, &rel_port);
 328        if (group_id < 0) {
 329                /*
 330                 * Internal error; TPGS supported but required
 331                 * VPD identification descriptors not present.
 332                 * Disable ALUA support
 333                 */
 334                sdev_printk(KERN_INFO, sdev,
 335                            "%s: No target port descriptors found\n",
 336                            ALUA_DH_NAME);
 337                return SCSI_DH_DEV_UNSUPP;
 338        }
 339
 340        pg = alua_alloc_pg(sdev, group_id, tpgs);
 341        if (IS_ERR(pg)) {
 342                if (PTR_ERR(pg) == -ENOMEM)
 343                        return SCSI_DH_NOMEM;
 344                return SCSI_DH_DEV_UNSUPP;
 345        }
 346        if (pg->device_id_len)
 347                sdev_printk(KERN_INFO, sdev,
 348                            "%s: device %s port group %x rel port %x\n",
 349                            ALUA_DH_NAME, pg->device_id_str,
 350                            group_id, rel_port);
 351        else
 352                sdev_printk(KERN_INFO, sdev,
 353                            "%s: port group %x rel port %x\n",
 354                            ALUA_DH_NAME, group_id, rel_port);
 355
 356        /* Check for existing port group references */
 357        spin_lock(&h->pg_lock);
 358        old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
 359        if (old_pg != pg) {
 360                /* port group has changed. Update to new port group */
 361                if (h->pg) {
 362                        spin_lock_irqsave(&old_pg->lock, flags);
 363                        list_del_rcu(&h->node);
 364                        spin_unlock_irqrestore(&old_pg->lock, flags);
 365                }
 366                rcu_assign_pointer(h->pg, pg);
 367                pg_updated = true;
 368        }
 369
 370        spin_lock_irqsave(&pg->lock, flags);
 371        if (pg_updated)
 372                list_add_rcu(&h->node, &pg->dh_list);
 373        spin_unlock_irqrestore(&pg->lock, flags);
 374
 375        alua_rtpg_queue(rcu_dereference_protected(h->pg,
 376                                                  lockdep_is_held(&h->pg_lock)),
 377                        sdev, NULL, true);
 378        spin_unlock(&h->pg_lock);
 379
 380        if (old_pg)
 381                kref_put(&old_pg->kref, release_port_group);
 382
 383        return SCSI_DH_OK;
 384}
 385
 386static char print_alua_state(unsigned char state)
 387{
 388        switch (state) {
 389        case SCSI_ACCESS_STATE_OPTIMAL:
 390                return 'A';
 391        case SCSI_ACCESS_STATE_ACTIVE:
 392                return 'N';
 393        case SCSI_ACCESS_STATE_STANDBY:
 394                return 'S';
 395        case SCSI_ACCESS_STATE_UNAVAILABLE:
 396                return 'U';
 397        case SCSI_ACCESS_STATE_LBA:
 398                return 'L';
 399        case SCSI_ACCESS_STATE_OFFLINE:
 400                return 'O';
 401        case SCSI_ACCESS_STATE_TRANSITIONING:
 402                return 'T';
 403        default:
 404                return 'X';
 405        }
 406}
 407
 408static int alua_check_sense(struct scsi_device *sdev,
 409                            struct scsi_sense_hdr *sense_hdr)
 410{
 411        switch (sense_hdr->sense_key) {
 412        case NOT_READY:
 413                if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) {
 414                        /*
 415                         * LUN Not Accessible - ALUA state transition
 416                         */
 417                        alua_check(sdev, false);
 418                        return NEEDS_RETRY;
 419                }
 420                break;
 421        case UNIT_ATTENTION:
 422                if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) {
 423                        /*
 424                         * Power On, Reset, or Bus Device Reset.
 425                         * Might have obscured a state transition,
 426                         * so schedule a recheck.
 427                         */
 428                        alua_check(sdev, true);
 429                        return ADD_TO_MLQUEUE;
 430                }
 431                if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04)
 432                        /*
 433                         * Device internal reset
 434                         */
 435                        return ADD_TO_MLQUEUE;
 436                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01)
 437                        /*
 438                         * Mode Parameters Changed
 439                         */
 440                        return ADD_TO_MLQUEUE;
 441                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) {
 442                        /*
 443                         * ALUA state changed
 444                         */
 445                        alua_check(sdev, true);
 446                        return ADD_TO_MLQUEUE;
 447                }
 448                if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) {
 449                        /*
 450                         * Implicit ALUA state transition failed
 451                         */
 452                        alua_check(sdev, true);
 453                        return ADD_TO_MLQUEUE;
 454                }
 455                if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03)
 456                        /*
 457                         * Inquiry data has changed
 458                         */
 459                        return ADD_TO_MLQUEUE;
 460                if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e)
 461                        /*
 462                         * REPORTED_LUNS_DATA_HAS_CHANGED is reported
 463                         * when switching controllers on targets like
 464                         * Intel Multi-Flex. We can just retry.
 465                         */
 466                        return ADD_TO_MLQUEUE;
 467                break;
 468        }
 469
 470        return SCSI_RETURN_NOT_HANDLED;
 471}
 472
 473/*
 474 * alua_tur - Send a TEST UNIT READY
 475 * @sdev: device to which the TEST UNIT READY command should be send
 476 *
 477 * Send a TEST UNIT READY to @sdev to figure out the device state
 478 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING,
 479 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise.
 480 */
 481static int alua_tur(struct scsi_device *sdev)
 482{
 483        struct scsi_sense_hdr sense_hdr;
 484        int retval;
 485
 486        retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ,
 487                                      ALUA_FAILOVER_RETRIES, &sense_hdr);
 488        if (sense_hdr.sense_key == NOT_READY &&
 489            sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
 490                return SCSI_DH_RETRY;
 491        else if (retval)
 492                return SCSI_DH_IO;
 493        else
 494                return SCSI_DH_OK;
 495}
 496
 497/*
 498 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES
 499 * @sdev: the device to be evaluated.
 500 *
 501 * Evaluate the Target Port Group State.
 502 * Returns SCSI_DH_DEV_OFFLINED if the path is
 503 * found to be unusable.
 504 */
 505static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg)
 506{
 507        struct scsi_sense_hdr sense_hdr;
 508        struct alua_port_group *tmp_pg;
 509        int len, k, off, bufflen = ALUA_RTPG_SIZE;
 510        unsigned char *desc, *buff;
 511        unsigned err, retval;
 512        unsigned int tpg_desc_tbl_off;
 513        unsigned char orig_transition_tmo;
 514        unsigned long flags;
 515
 516        if (!pg->expiry) {
 517                unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ;
 518
 519                if (pg->transition_tmo)
 520                        transition_tmo = pg->transition_tmo * HZ;
 521
 522                pg->expiry = round_jiffies_up(jiffies + transition_tmo);
 523        }
 524
 525        buff = kzalloc(bufflen, GFP_KERNEL);
 526        if (!buff)
 527                return SCSI_DH_DEV_TEMP_BUSY;
 528
 529 retry:
 530        err = 0;
 531        retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags);
 532
 533        if (retval) {
 534                /*
 535                 * Some (broken) implementations have a habit of returning
 536                 * an error during things like firmware update etc.
 537                 * But if the target only supports active/optimized there's
 538                 * not much we can do; it's not that we can switch paths
 539                 * or anything.
 540                 * So ignore any errors to avoid spurious failures during
 541                 * path failover.
 542                 */
 543                if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) {
 544                        sdev_printk(KERN_INFO, sdev,
 545                                    "%s: ignoring rtpg result %d\n",
 546                                    ALUA_DH_NAME, retval);
 547                        kfree(buff);
 548                        return SCSI_DH_OK;
 549                }
 550                if (!scsi_sense_valid(&sense_hdr)) {
 551                        sdev_printk(KERN_INFO, sdev,
 552                                    "%s: rtpg failed, result %d\n",
 553                                    ALUA_DH_NAME, retval);
 554                        kfree(buff);
 555                        if (driver_byte(retval) == DRIVER_ERROR)
 556                                return SCSI_DH_DEV_TEMP_BUSY;
 557                        return SCSI_DH_IO;
 558                }
 559
 560                /*
 561                 * submit_rtpg() has failed on existing arrays
 562                 * when requesting extended header info, and
 563                 * the array doesn't support extended headers,
 564                 * even though it shouldn't according to T10.
 565                 * The retry without rtpg_ext_hdr_req set
 566                 * handles this.
 567                 */
 568                if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) &&
 569                    sense_hdr.sense_key == ILLEGAL_REQUEST &&
 570                    sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) {
 571                        pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP;
 572                        goto retry;
 573                }
 574                /*
 575                 * Retry on ALUA state transition or if any
 576                 * UNIT ATTENTION occurred.
 577                 */
 578                if (sense_hdr.sense_key == NOT_READY &&
 579                    sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a)
 580                        err = SCSI_DH_RETRY;
 581                else if (sense_hdr.sense_key == UNIT_ATTENTION)
 582                        err = SCSI_DH_RETRY;
 583                if (err == SCSI_DH_RETRY &&
 584                    pg->expiry != 0 && time_before(jiffies, pg->expiry)) {
 585                        sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n",
 586                                    ALUA_DH_NAME);
 587                        scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 588                        kfree(buff);
 589                        return err;
 590                }
 591                sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n",
 592                            ALUA_DH_NAME);
 593                scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 594                kfree(buff);
 595                pg->expiry = 0;
 596                return SCSI_DH_IO;
 597        }
 598
 599        len = get_unaligned_be32(&buff[0]) + 4;
 600
 601        if (len > bufflen) {
 602                /* Resubmit with the correct length */
 603                kfree(buff);
 604                bufflen = len;
 605                buff = kmalloc(bufflen, GFP_KERNEL);
 606                if (!buff) {
 607                        sdev_printk(KERN_WARNING, sdev,
 608                                    "%s: kmalloc buffer failed\n",__func__);
 609                        /* Temporary failure, bypass */
 610                        pg->expiry = 0;
 611                        return SCSI_DH_DEV_TEMP_BUSY;
 612                }
 613                goto retry;
 614        }
 615
 616        orig_transition_tmo = pg->transition_tmo;
 617        if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0)
 618                pg->transition_tmo = buff[5];
 619        else
 620                pg->transition_tmo = ALUA_FAILOVER_TIMEOUT;
 621
 622        if (orig_transition_tmo != pg->transition_tmo) {
 623                sdev_printk(KERN_INFO, sdev,
 624                            "%s: transition timeout set to %d seconds\n",
 625                            ALUA_DH_NAME, pg->transition_tmo);
 626                pg->expiry = jiffies + pg->transition_tmo * HZ;
 627        }
 628
 629        if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR)
 630                tpg_desc_tbl_off = 8;
 631        else
 632                tpg_desc_tbl_off = 4;
 633
 634        for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off;
 635             k < len;
 636             k += off, desc += off) {
 637                u16 group_id = get_unaligned_be16(&desc[2]);
 638
 639                spin_lock_irqsave(&port_group_lock, flags);
 640                tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len,
 641                                          group_id);
 642                spin_unlock_irqrestore(&port_group_lock, flags);
 643                if (tmp_pg) {
 644                        if (spin_trylock_irqsave(&tmp_pg->lock, flags)) {
 645                                if ((tmp_pg == pg) ||
 646                                    !(tmp_pg->flags & ALUA_PG_RUNNING)) {
 647                                        struct alua_dh_data *h;
 648
 649                                        tmp_pg->state = desc[0] & 0x0f;
 650                                        tmp_pg->pref = desc[0] >> 7;
 651                                        rcu_read_lock();
 652                                        list_for_each_entry_rcu(h,
 653                                                &tmp_pg->dh_list, node) {
 654                                                /* h->sdev should always be valid */
 655                                                BUG_ON(!h->sdev);
 656                                                h->sdev->access_state = desc[0];
 657                                        }
 658                                        rcu_read_unlock();
 659                                }
 660                                if (tmp_pg == pg)
 661                                        tmp_pg->valid_states = desc[1];
 662                                spin_unlock_irqrestore(&tmp_pg->lock, flags);
 663                        }
 664                        kref_put(&tmp_pg->kref, release_port_group);
 665                }
 666                off = 8 + (desc[7] * 4);
 667        }
 668
 669        spin_lock_irqsave(&pg->lock, flags);
 670        sdev_printk(KERN_INFO, sdev,
 671                    "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n",
 672                    ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state),
 673                    pg->pref ? "preferred" : "non-preferred",
 674                    pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t',
 675                    pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o',
 676                    pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l',
 677                    pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u',
 678                    pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s',
 679                    pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n',
 680                    pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a');
 681
 682        switch (pg->state) {
 683        case SCSI_ACCESS_STATE_TRANSITIONING:
 684                if (time_before(jiffies, pg->expiry)) {
 685                        /* State transition, retry */
 686                        pg->interval = ALUA_RTPG_RETRY_DELAY;
 687                        err = SCSI_DH_RETRY;
 688                } else {
 689                        struct alua_dh_data *h;
 690
 691                        /* Transitioning time exceeded, set port to standby */
 692                        err = SCSI_DH_IO;
 693                        pg->state = SCSI_ACCESS_STATE_STANDBY;
 694                        pg->expiry = 0;
 695                        rcu_read_lock();
 696                        list_for_each_entry_rcu(h, &pg->dh_list, node) {
 697                                BUG_ON(!h->sdev);
 698                                h->sdev->access_state =
 699                                        (pg->state & SCSI_ACCESS_STATE_MASK);
 700                                if (pg->pref)
 701                                        h->sdev->access_state |=
 702                                                SCSI_ACCESS_STATE_PREFERRED;
 703                        }
 704                        rcu_read_unlock();
 705                }
 706                break;
 707        case SCSI_ACCESS_STATE_OFFLINE:
 708                /* Path unusable */
 709                err = SCSI_DH_DEV_OFFLINED;
 710                pg->expiry = 0;
 711                break;
 712        default:
 713                /* Useable path if active */
 714                err = SCSI_DH_OK;
 715                pg->expiry = 0;
 716                break;
 717        }
 718        spin_unlock_irqrestore(&pg->lock, flags);
 719        kfree(buff);
 720        return err;
 721}
 722
 723/*
 724 * alua_stpg - Issue a SET TARGET PORT GROUP command
 725 *
 726 * Issue a SET TARGET PORT GROUP command and evaluate the
 727 * response. Returns SCSI_DH_RETRY per default to trigger
 728 * a re-evaluation of the target group state or SCSI_DH_OK
 729 * if no further action needs to be taken.
 730 */
 731static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
 732{
 733        int retval;
 734        struct scsi_sense_hdr sense_hdr;
 735
 736        if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) {
 737                /* Only implicit ALUA supported, retry */
 738                return SCSI_DH_RETRY;
 739        }
 740        switch (pg->state) {
 741        case SCSI_ACCESS_STATE_OPTIMAL:
 742                return SCSI_DH_OK;
 743        case SCSI_ACCESS_STATE_ACTIVE:
 744                if ((pg->flags & ALUA_OPTIMIZE_STPG) &&
 745                    !pg->pref &&
 746                    (pg->tpgs & TPGS_MODE_IMPLICIT))
 747                        return SCSI_DH_OK;
 748                break;
 749        case SCSI_ACCESS_STATE_STANDBY:
 750        case SCSI_ACCESS_STATE_UNAVAILABLE:
 751                break;
 752        case SCSI_ACCESS_STATE_OFFLINE:
 753                return SCSI_DH_IO;
 754        case SCSI_ACCESS_STATE_TRANSITIONING:
 755                break;
 756        default:
 757                sdev_printk(KERN_INFO, sdev,
 758                            "%s: stpg failed, unhandled TPGS state %d",
 759                            ALUA_DH_NAME, pg->state);
 760                return SCSI_DH_NOSYS;
 761        }
 762        retval = submit_stpg(sdev, pg->group_id, &sense_hdr);
 763
 764        if (retval) {
 765                if (!scsi_sense_valid(&sense_hdr)) {
 766                        sdev_printk(KERN_INFO, sdev,
 767                                    "%s: stpg failed, result %d",
 768                                    ALUA_DH_NAME, retval);
 769                        if (driver_byte(retval) == DRIVER_ERROR)
 770                                return SCSI_DH_DEV_TEMP_BUSY;
 771                } else {
 772                        sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n",
 773                                    ALUA_DH_NAME);
 774                        scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr);
 775                }
 776        }
 777        /* Retry RTPG */
 778        return SCSI_DH_RETRY;
 779}
 780
 781static void alua_rtpg_work(struct work_struct *work)
 782{
 783        struct alua_port_group *pg =
 784                container_of(work, struct alua_port_group, rtpg_work.work);
 785        struct scsi_device *sdev;
 786        LIST_HEAD(qdata_list);
 787        int err = SCSI_DH_OK;
 788        struct alua_queue_data *qdata, *tmp;
 789        unsigned long flags;
 790
 791        spin_lock_irqsave(&pg->lock, flags);
 792        sdev = pg->rtpg_sdev;
 793        if (!sdev) {
 794                WARN_ON(pg->flags & ALUA_PG_RUN_RTPG);
 795                WARN_ON(pg->flags & ALUA_PG_RUN_STPG);
 796                spin_unlock_irqrestore(&pg->lock, flags);
 797                kref_put(&pg->kref, release_port_group);
 798                return;
 799        }
 800        pg->flags |= ALUA_PG_RUNNING;
 801        if (pg->flags & ALUA_PG_RUN_RTPG) {
 802                int state = pg->state;
 803
 804                pg->flags &= ~ALUA_PG_RUN_RTPG;
 805                spin_unlock_irqrestore(&pg->lock, flags);
 806                if (state == SCSI_ACCESS_STATE_TRANSITIONING) {
 807                        if (alua_tur(sdev) == SCSI_DH_RETRY) {
 808                                spin_lock_irqsave(&pg->lock, flags);
 809                                pg->flags &= ~ALUA_PG_RUNNING;
 810                                pg->flags |= ALUA_PG_RUN_RTPG;
 811                                if (!pg->interval)
 812                                        pg->interval = ALUA_RTPG_RETRY_DELAY;
 813                                spin_unlock_irqrestore(&pg->lock, flags);
 814                                queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 815                                                   pg->interval * HZ);
 816                                return;
 817                        }
 818                        /* Send RTPG on failure or if TUR indicates SUCCESS */
 819                }
 820                err = alua_rtpg(sdev, pg);
 821                spin_lock_irqsave(&pg->lock, flags);
 822                if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
 823                        pg->flags &= ~ALUA_PG_RUNNING;
 824                        if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
 825                                pg->interval = ALUA_RTPG_RETRY_DELAY;
 826                        pg->flags |= ALUA_PG_RUN_RTPG;
 827                        spin_unlock_irqrestore(&pg->lock, flags);
 828                        queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 829                                           pg->interval * HZ);
 830                        return;
 831                }
 832                if (err != SCSI_DH_OK)
 833                        pg->flags &= ~ALUA_PG_RUN_STPG;
 834        }
 835        if (pg->flags & ALUA_PG_RUN_STPG) {
 836                pg->flags &= ~ALUA_PG_RUN_STPG;
 837                spin_unlock_irqrestore(&pg->lock, flags);
 838                err = alua_stpg(sdev, pg);
 839                spin_lock_irqsave(&pg->lock, flags);
 840                if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
 841                        pg->flags |= ALUA_PG_RUN_RTPG;
 842                        pg->interval = 0;
 843                        pg->flags &= ~ALUA_PG_RUNNING;
 844                        spin_unlock_irqrestore(&pg->lock, flags);
 845                        queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 846                                           pg->interval * HZ);
 847                        return;
 848                }
 849        }
 850
 851        list_splice_init(&pg->rtpg_list, &qdata_list);
 852        pg->rtpg_sdev = NULL;
 853        spin_unlock_irqrestore(&pg->lock, flags);
 854
 855        list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
 856                list_del(&qdata->entry);
 857                if (qdata->callback_fn)
 858                        qdata->callback_fn(qdata->callback_data, err);
 859                kfree(qdata);
 860        }
 861        spin_lock_irqsave(&pg->lock, flags);
 862        pg->flags &= ~ALUA_PG_RUNNING;
 863        spin_unlock_irqrestore(&pg->lock, flags);
 864        scsi_device_put(sdev);
 865        kref_put(&pg->kref, release_port_group);
 866}
 867
 868/**
 869 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously
 870 * @pg: ALUA port group associated with @sdev.
 871 * @sdev: SCSI device for which to submit an RTPG.
 872 * @qdata: Information about the callback to invoke after the RTPG.
 873 * @force: Whether or not to submit an RTPG if a work item that will submit an
 874 *         RTPG already has been scheduled.
 875 *
 876 * Returns true if and only if alua_rtpg_work() will be called asynchronously.
 877 * That function is responsible for calling @qdata->fn().
 878 */
 879static bool alua_rtpg_queue(struct alua_port_group *pg,
 880                            struct scsi_device *sdev,
 881                            struct alua_queue_data *qdata, bool force)
 882{
 883        int start_queue = 0;
 884        unsigned long flags;
 885        if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
 886                return false;
 887
 888        spin_lock_irqsave(&pg->lock, flags);
 889        if (qdata) {
 890                list_add_tail(&qdata->entry, &pg->rtpg_list);
 891                pg->flags |= ALUA_PG_RUN_STPG;
 892                force = true;
 893        }
 894        if (pg->rtpg_sdev == NULL) {
 895                pg->interval = 0;
 896                pg->flags |= ALUA_PG_RUN_RTPG;
 897                kref_get(&pg->kref);
 898                pg->rtpg_sdev = sdev;
 899                start_queue = 1;
 900        } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
 901                pg->flags |= ALUA_PG_RUN_RTPG;
 902                /* Do not queue if the worker is already running */
 903                if (!(pg->flags & ALUA_PG_RUNNING)) {
 904                        kref_get(&pg->kref);
 905                        start_queue = 1;
 906                }
 907        }
 908
 909        spin_unlock_irqrestore(&pg->lock, flags);
 910
 911        if (start_queue) {
 912                if (queue_delayed_work(kaluad_wq, &pg->rtpg_work,
 913                                msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS)))
 914                        sdev = NULL;
 915                else
 916                        kref_put(&pg->kref, release_port_group);
 917        }
 918        if (sdev)
 919                scsi_device_put(sdev);
 920
 921        return true;
 922}
 923
 924/*
 925 * alua_initialize - Initialize ALUA state
 926 * @sdev: the device to be initialized
 927 *
 928 * For the prep_fn to work correctly we have
 929 * to initialize the ALUA state for the device.
 930 */
 931static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h)
 932{
 933        int err = SCSI_DH_DEV_UNSUPP, tpgs;
 934
 935        mutex_lock(&h->init_mutex);
 936        tpgs = alua_check_tpgs(sdev);
 937        if (tpgs != TPGS_MODE_NONE)
 938                err = alua_check_vpd(sdev, h, tpgs);
 939        h->init_error = err;
 940        mutex_unlock(&h->init_mutex);
 941        return err;
 942}
 943/*
 944 * alua_set_params - set/unset the optimize flag
 945 * @sdev: device on the path to be activated
 946 * params - parameters in the following format
 947 *      "no_of_params\0param1\0param2\0param3\0...\0"
 948 * For example, to set the flag pass the following parameters
 949 * from multipath.conf
 950 *     hardware_handler        "2 alua 1"
 951 */
 952static int alua_set_params(struct scsi_device *sdev, const char *params)
 953{
 954        struct alua_dh_data *h = sdev->handler_data;
 955        struct alua_port_group *pg = NULL;
 956        unsigned int optimize = 0, argc;
 957        const char *p = params;
 958        int result = SCSI_DH_OK;
 959        unsigned long flags;
 960
 961        if ((sscanf(params, "%u", &argc) != 1) || (argc != 1))
 962                return -EINVAL;
 963
 964        while (*p++)
 965                ;
 966        if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1))
 967                return -EINVAL;
 968
 969        rcu_read_lock();
 970        pg = rcu_dereference(h->pg);
 971        if (!pg) {
 972                rcu_read_unlock();
 973                return -ENXIO;
 974        }
 975        spin_lock_irqsave(&pg->lock, flags);
 976        if (optimize)
 977                pg->flags |= ALUA_OPTIMIZE_STPG;
 978        else
 979                pg->flags &= ~ALUA_OPTIMIZE_STPG;
 980        spin_unlock_irqrestore(&pg->lock, flags);
 981        rcu_read_unlock();
 982
 983        return result;
 984}
 985
 986/*
 987 * alua_activate - activate a path
 988 * @sdev: device on the path to be activated
 989 *
 990 * We're currently switching the port group to be activated only and
 991 * let the array figure out the rest.
 992 * There may be other arrays which require us to switch all port groups
 993 * based on a certain policy. But until we actually encounter them it
 994 * should be okay.
 995 */
 996static int alua_activate(struct scsi_device *sdev,
 997                        activate_complete fn, void *data)
 998{
 999        struct alua_dh_data *h = sdev->handler_data;
1000        int err = SCSI_DH_OK;
1001        struct alua_queue_data *qdata;
1002        struct alua_port_group *pg;
1003
1004        qdata = kzalloc(sizeof(*qdata), GFP_KERNEL);
1005        if (!qdata) {
1006                err = SCSI_DH_RES_TEMP_UNAVAIL;
1007                goto out;
1008        }
1009        qdata->callback_fn = fn;
1010        qdata->callback_data = data;
1011
1012        mutex_lock(&h->init_mutex);
1013        rcu_read_lock();
1014        pg = rcu_dereference(h->pg);
1015        if (!pg || !kref_get_unless_zero(&pg->kref)) {
1016                rcu_read_unlock();
1017                kfree(qdata);
1018                err = h->init_error;
1019                mutex_unlock(&h->init_mutex);
1020                goto out;
1021        }
1022        rcu_read_unlock();
1023        mutex_unlock(&h->init_mutex);
1024
1025        if (alua_rtpg_queue(pg, sdev, qdata, true))
1026                fn = NULL;
1027        else
1028                err = SCSI_DH_DEV_OFFLINED;
1029        kref_put(&pg->kref, release_port_group);
1030out:
1031        if (fn)
1032                fn(data, err);
1033        return 0;
1034}
1035
1036/*
1037 * alua_check - check path status
1038 * @sdev: device on the path to be checked
1039 *
1040 * Check the device status
1041 */
1042static void alua_check(struct scsi_device *sdev, bool force)
1043{
1044        struct alua_dh_data *h = sdev->handler_data;
1045        struct alua_port_group *pg;
1046
1047        rcu_read_lock();
1048        pg = rcu_dereference(h->pg);
1049        if (!pg || !kref_get_unless_zero(&pg->kref)) {
1050                rcu_read_unlock();
1051                return;
1052        }
1053        rcu_read_unlock();
1054
1055        alua_rtpg_queue(pg, sdev, NULL, force);
1056        kref_put(&pg->kref, release_port_group);
1057}
1058
1059/*
1060 * alua_prep_fn - request callback
1061 *
1062 * Fail I/O to all paths not in state
1063 * active/optimized or active/non-optimized.
1064 */
1065static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req)
1066{
1067        struct alua_dh_data *h = sdev->handler_data;
1068        struct alua_port_group *pg;
1069        unsigned char state = SCSI_ACCESS_STATE_OPTIMAL;
1070
1071        rcu_read_lock();
1072        pg = rcu_dereference(h->pg);
1073        if (pg)
1074                state = pg->state;
1075        rcu_read_unlock();
1076
1077        switch (state) {
1078        case SCSI_ACCESS_STATE_OPTIMAL:
1079        case SCSI_ACCESS_STATE_ACTIVE:
1080        case SCSI_ACCESS_STATE_LBA:
1081                return BLK_STS_OK;
1082        case SCSI_ACCESS_STATE_TRANSITIONING:
1083                return BLK_STS_RESOURCE;
1084        default:
1085                req->rq_flags |= RQF_QUIET;
1086                return BLK_STS_IOERR;
1087        }
1088}
1089
1090static void alua_rescan(struct scsi_device *sdev)
1091{
1092        struct alua_dh_data *h = sdev->handler_data;
1093
1094        alua_initialize(sdev, h);
1095}
1096
1097/*
1098 * alua_bus_attach - Attach device handler
1099 * @sdev: device to be attached to
1100 */
1101static int alua_bus_attach(struct scsi_device *sdev)
1102{
1103        struct alua_dh_data *h;
1104        int err;
1105
1106        h = kzalloc(sizeof(*h) , GFP_KERNEL);
1107        if (!h)
1108                return SCSI_DH_NOMEM;
1109        spin_lock_init(&h->pg_lock);
1110        rcu_assign_pointer(h->pg, NULL);
1111        h->init_error = SCSI_DH_OK;
1112        h->sdev = sdev;
1113        INIT_LIST_HEAD(&h->node);
1114
1115        mutex_init(&h->init_mutex);
1116        err = alua_initialize(sdev, h);
1117        if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED)
1118                goto failed;
1119
1120        sdev->handler_data = h;
1121        return SCSI_DH_OK;
1122failed:
1123        kfree(h);
1124        return err;
1125}
1126
1127/*
1128 * alua_bus_detach - Detach device handler
1129 * @sdev: device to be detached from
1130 */
1131static void alua_bus_detach(struct scsi_device *sdev)
1132{
1133        struct alua_dh_data *h = sdev->handler_data;
1134        struct alua_port_group *pg;
1135
1136        spin_lock(&h->pg_lock);
1137        pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
1138        rcu_assign_pointer(h->pg, NULL);
1139        h->sdev = NULL;
1140        spin_unlock(&h->pg_lock);
1141        if (pg) {
1142                spin_lock_irq(&pg->lock);
1143                list_del_rcu(&h->node);
1144                spin_unlock_irq(&pg->lock);
1145                kref_put(&pg->kref, release_port_group);
1146        }
1147        sdev->handler_data = NULL;
1148        kfree(h);
1149}
1150
1151static struct scsi_device_handler alua_dh = {
1152        .name = ALUA_DH_NAME,
1153        .module = THIS_MODULE,
1154        .attach = alua_bus_attach,
1155        .detach = alua_bus_detach,
1156        .prep_fn = alua_prep_fn,
1157        .check_sense = alua_check_sense,
1158        .activate = alua_activate,
1159        .rescan = alua_rescan,
1160        .set_params = alua_set_params,
1161};
1162
1163static int __init alua_init(void)
1164{
1165        int r;
1166
1167        kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0);
1168        if (!kaluad_wq)
1169                return -ENOMEM;
1170
1171        r = scsi_register_device_handler(&alua_dh);
1172        if (r != 0) {
1173                printk(KERN_ERR "%s: Failed to register scsi device handler",
1174                        ALUA_DH_NAME);
1175                destroy_workqueue(kaluad_wq);
1176        }
1177        return r;
1178}
1179
1180static void __exit alua_exit(void)
1181{
1182        scsi_unregister_device_handler(&alua_dh);
1183        destroy_workqueue(kaluad_wq);
1184}
1185
1186module_init(alua_init);
1187module_exit(alua_exit);
1188
1189MODULE_DESCRIPTION("DM Multipath ALUA support");
1190MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>");
1191MODULE_LICENSE("GPL");
1192MODULE_VERSION(ALUA_DH_VER);
1193