linux/drivers/scsi/vmw_pvscsi.c
<<
>>
Prefs
   1/*
   2 * Linux driver for VMware's para-virtualized SCSI HBA.
   3 *
   4 * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License as published by the
   8 * Free Software Foundation; version 2 of the License and no later version.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  13 * NON INFRINGEMENT.  See the GNU General Public License for more
  14 * details.
  15 *
  16 * You should have received a copy of the GNU General Public License
  17 * along with this program; if not, write to the Free Software
  18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 *
  20 */
  21
  22#include <linux/kernel.h>
  23#include <linux/module.h>
  24#include <linux/interrupt.h>
  25#include <linux/slab.h>
  26#include <linux/workqueue.h>
  27#include <linux/pci.h>
  28
  29#include <scsi/scsi.h>
  30#include <scsi/scsi_host.h>
  31#include <scsi/scsi_cmnd.h>
  32#include <scsi/scsi_device.h>
  33#include <scsi/scsi_tcq.h>
  34
  35#include "vmw_pvscsi.h"
  36
  37#define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
  38
  39MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
  40MODULE_AUTHOR("VMware, Inc.");
  41MODULE_LICENSE("GPL");
  42MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
  43
  44#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING       8
  45#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING       1
  46#define PVSCSI_DEFAULT_QUEUE_DEPTH              254
  47#define SGL_SIZE                                PAGE_SIZE
  48
  49struct pvscsi_sg_list {
  50        struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
  51};
  52
  53struct pvscsi_ctx {
  54        /*
  55         * The index of the context in cmd_map serves as the context ID for a
  56         * 1-to-1 mapping completions back to requests.
  57         */
  58        struct scsi_cmnd        *cmd;
  59        struct pvscsi_sg_list   *sgl;
  60        struct list_head        list;
  61        dma_addr_t              dataPA;
  62        dma_addr_t              sensePA;
  63        dma_addr_t              sglPA;
  64        struct completion       *abort_cmp;
  65};
  66
  67struct pvscsi_adapter {
  68        char                            *mmioBase;
  69        u8                              rev;
  70        bool                            use_msg;
  71        bool                            use_req_threshold;
  72
  73        spinlock_t                      hw_lock;
  74
  75        struct workqueue_struct         *workqueue;
  76        struct work_struct              work;
  77
  78        struct PVSCSIRingReqDesc        *req_ring;
  79        unsigned                        req_pages;
  80        unsigned                        req_depth;
  81        dma_addr_t                      reqRingPA;
  82
  83        struct PVSCSIRingCmpDesc        *cmp_ring;
  84        unsigned                        cmp_pages;
  85        dma_addr_t                      cmpRingPA;
  86
  87        struct PVSCSIRingMsgDesc        *msg_ring;
  88        unsigned                        msg_pages;
  89        dma_addr_t                      msgRingPA;
  90
  91        struct PVSCSIRingsState         *rings_state;
  92        dma_addr_t                      ringStatePA;
  93
  94        struct pci_dev                  *dev;
  95        struct Scsi_Host                *host;
  96
  97        struct list_head                cmd_pool;
  98        struct pvscsi_ctx               *cmd_map;
  99};
 100
 101
 102/* Command line parameters */
 103static int pvscsi_ring_pages;
 104static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
 105static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
 106static bool pvscsi_disable_msi;
 107static bool pvscsi_disable_msix;
 108static bool pvscsi_use_msg       = true;
 109static bool pvscsi_use_req_threshold = true;
 110
 111#define PVSCSI_RW (S_IRUSR | S_IWUSR)
 112
 113module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
 114MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
 115                 __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
 116                 "[up to 16 targets],"
 117                 __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
 118                 "[for 16+ targets])");
 119
 120module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
 121MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
 122                 __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
 123
 124module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
 125MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
 126                 __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
 127
 128module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
 129MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
 130
 131module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
 132MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
 133
 134module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
 135MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
 136
 137module_param_named(use_req_threshold, pvscsi_use_req_threshold,
 138                   bool, PVSCSI_RW);
 139MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
 140
 141static const struct pci_device_id pvscsi_pci_tbl[] = {
 142        { PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
 143        { 0 }
 144};
 145
 146MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
 147
 148static struct device *
 149pvscsi_dev(const struct pvscsi_adapter *adapter)
 150{
 151        return &(adapter->dev->dev);
 152}
 153
 154static struct pvscsi_ctx *
 155pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 156{
 157        struct pvscsi_ctx *ctx, *end;
 158
 159        end = &adapter->cmd_map[adapter->req_depth];
 160        for (ctx = adapter->cmd_map; ctx < end; ctx++)
 161                if (ctx->cmd == cmd)
 162                        return ctx;
 163
 164        return NULL;
 165}
 166
 167static struct pvscsi_ctx *
 168pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
 169{
 170        struct pvscsi_ctx *ctx;
 171
 172        if (list_empty(&adapter->cmd_pool))
 173                return NULL;
 174
 175        ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
 176        ctx->cmd = cmd;
 177        list_del(&ctx->list);
 178
 179        return ctx;
 180}
 181
 182static void pvscsi_release_context(struct pvscsi_adapter *adapter,
 183                                   struct pvscsi_ctx *ctx)
 184{
 185        ctx->cmd = NULL;
 186        ctx->abort_cmp = NULL;
 187        list_add(&ctx->list, &adapter->cmd_pool);
 188}
 189
 190/*
 191 * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
 192 * non-zero integer. ctx always points to an entry in cmd_map array, hence
 193 * the return value is always >=1.
 194 */
 195static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
 196                              const struct pvscsi_ctx *ctx)
 197{
 198        return ctx - adapter->cmd_map + 1;
 199}
 200
 201static struct pvscsi_ctx *
 202pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
 203{
 204        return &adapter->cmd_map[context - 1];
 205}
 206
 207static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
 208                             u32 offset, u32 val)
 209{
 210        writel(val, adapter->mmioBase + offset);
 211}
 212
 213static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
 214{
 215        return readl(adapter->mmioBase + offset);
 216}
 217
 218static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
 219{
 220        return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
 221}
 222
 223static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
 224                                     u32 val)
 225{
 226        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
 227}
 228
 229static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
 230{
 231        u32 intr_bits;
 232
 233        intr_bits = PVSCSI_INTR_CMPL_MASK;
 234        if (adapter->use_msg)
 235                intr_bits |= PVSCSI_INTR_MSG_MASK;
 236
 237        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
 238}
 239
 240static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
 241{
 242        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
 243}
 244
 245static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
 246                                  u32 cmd, const void *desc, size_t len)
 247{
 248        const u32 *ptr = desc;
 249        size_t i;
 250
 251        len /= sizeof(*ptr);
 252        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
 253        for (i = 0; i < len; i++)
 254                pvscsi_reg_write(adapter,
 255                                 PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
 256}
 257
 258static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
 259                             const struct pvscsi_ctx *ctx)
 260{
 261        struct PVSCSICmdDescAbortCmd cmd = { 0 };
 262
 263        cmd.target = ctx->cmd->device->id;
 264        cmd.context = pvscsi_map_context(adapter, ctx);
 265
 266        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
 267}
 268
 269static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
 270{
 271        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
 272}
 273
 274static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
 275{
 276        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
 277}
 278
 279static int scsi_is_rw(unsigned char op)
 280{
 281        return op == READ_6  || op == WRITE_6 ||
 282               op == READ_10 || op == WRITE_10 ||
 283               op == READ_12 || op == WRITE_12 ||
 284               op == READ_16 || op == WRITE_16;
 285}
 286
 287static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
 288                           unsigned char op)
 289{
 290        if (scsi_is_rw(op)) {
 291                struct PVSCSIRingsState *s = adapter->rings_state;
 292
 293                if (!adapter->use_req_threshold ||
 294                    s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
 295                        pvscsi_kick_rw_io(adapter);
 296        } else {
 297                pvscsi_process_request_ring(adapter);
 298        }
 299}
 300
 301static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
 302{
 303        dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
 304
 305        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
 306}
 307
 308static void ll_bus_reset(const struct pvscsi_adapter *adapter)
 309{
 310        dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
 311
 312        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
 313}
 314
 315static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
 316{
 317        struct PVSCSICmdDescResetDevice cmd = { 0 };
 318
 319        dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
 320
 321        cmd.target = target;
 322
 323        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
 324                              &cmd, sizeof(cmd));
 325}
 326
 327static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
 328                             struct scatterlist *sg, unsigned count)
 329{
 330        unsigned i;
 331        struct PVSCSISGElement *sge;
 332
 333        BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
 334
 335        sge = &ctx->sgl->sge[0];
 336        for (i = 0; i < count; i++, sg = sg_next(sg)) {
 337                sge[i].addr   = sg_dma_address(sg);
 338                sge[i].length = sg_dma_len(sg);
 339                sge[i].flags  = 0;
 340        }
 341}
 342
 343/*
 344 * Map all data buffers for a command into PCI space and
 345 * setup the scatter/gather list if needed.
 346 */
 347static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
 348                              struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
 349                              struct PVSCSIRingReqDesc *e)
 350{
 351        unsigned count;
 352        unsigned bufflen = scsi_bufflen(cmd);
 353        struct scatterlist *sg;
 354
 355        e->dataLen = bufflen;
 356        e->dataAddr = 0;
 357        if (bufflen == 0)
 358                return 0;
 359
 360        sg = scsi_sglist(cmd);
 361        count = scsi_sg_count(cmd);
 362        if (count != 0) {
 363                int segs = scsi_dma_map(cmd);
 364
 365                if (segs == -ENOMEM) {
 366                        scmd_printk(KERN_DEBUG, cmd,
 367                                    "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
 368                        return -ENOMEM;
 369                } else if (segs > 1) {
 370                        pvscsi_create_sg(ctx, sg, segs);
 371
 372                        e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
 373                        ctx->sglPA = dma_map_single(&adapter->dev->dev,
 374                                        ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
 375                        if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
 376                                scmd_printk(KERN_ERR, cmd,
 377                                            "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
 378                                scsi_dma_unmap(cmd);
 379                                ctx->sglPA = 0;
 380                                return -ENOMEM;
 381                        }
 382                        e->dataAddr = ctx->sglPA;
 383                } else
 384                        e->dataAddr = sg_dma_address(sg);
 385        } else {
 386                /*
 387                 * In case there is no S/G list, scsi_sglist points
 388                 * directly to the buffer.
 389                 */
 390                ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
 391                                             cmd->sc_data_direction);
 392                if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
 393                        scmd_printk(KERN_DEBUG, cmd,
 394                                    "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
 395                        return -ENOMEM;
 396                }
 397                e->dataAddr = ctx->dataPA;
 398        }
 399
 400        return 0;
 401}
 402
 403/*
 404 * The device incorrectly doesn't clear the first byte of the sense
 405 * buffer in some cases. We have to do it ourselves.
 406 * Otherwise we run into trouble when SWIOTLB is forced.
 407 */
 408static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
 409{
 410        if (cmd->sense_buffer)
 411                cmd->sense_buffer[0] = 0;
 412}
 413
 414static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
 415                                 struct pvscsi_ctx *ctx)
 416{
 417        struct scsi_cmnd *cmd;
 418        unsigned bufflen;
 419
 420        cmd = ctx->cmd;
 421        bufflen = scsi_bufflen(cmd);
 422
 423        if (bufflen != 0) {
 424                unsigned count = scsi_sg_count(cmd);
 425
 426                if (count != 0) {
 427                        scsi_dma_unmap(cmd);
 428                        if (ctx->sglPA) {
 429                                dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
 430                                                 SGL_SIZE, DMA_TO_DEVICE);
 431                                ctx->sglPA = 0;
 432                        }
 433                } else
 434                        dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
 435                                         bufflen, cmd->sc_data_direction);
 436        }
 437        if (cmd->sense_buffer)
 438                dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 439                                 SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
 440}
 441
 442static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
 443{
 444        adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
 445                        &adapter->ringStatePA, GFP_KERNEL);
 446        if (!adapter->rings_state)
 447                return -ENOMEM;
 448
 449        adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
 450                                 pvscsi_ring_pages);
 451        adapter->req_depth = adapter->req_pages
 452                                        * PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
 453        adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
 454                        adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
 455                        GFP_KERNEL);
 456        if (!adapter->req_ring)
 457                return -ENOMEM;
 458
 459        adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
 460                                 pvscsi_ring_pages);
 461        adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
 462                        adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
 463                        GFP_KERNEL);
 464        if (!adapter->cmp_ring)
 465                return -ENOMEM;
 466
 467        BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
 468        BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
 469        BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
 470
 471        if (!adapter->use_msg)
 472                return 0;
 473
 474        adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
 475                                 pvscsi_msg_ring_pages);
 476        adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
 477                        adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
 478                        GFP_KERNEL);
 479        if (!adapter->msg_ring)
 480                return -ENOMEM;
 481        BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
 482
 483        return 0;
 484}
 485
 486static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
 487{
 488        struct PVSCSICmdDescSetupRings cmd = { 0 };
 489        dma_addr_t base;
 490        unsigned i;
 491
 492        cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
 493        cmd.reqRingNumPages = adapter->req_pages;
 494        cmd.cmpRingNumPages = adapter->cmp_pages;
 495
 496        base = adapter->reqRingPA;
 497        for (i = 0; i < adapter->req_pages; i++) {
 498                cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
 499                base += PAGE_SIZE;
 500        }
 501
 502        base = adapter->cmpRingPA;
 503        for (i = 0; i < adapter->cmp_pages; i++) {
 504                cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
 505                base += PAGE_SIZE;
 506        }
 507
 508        memset(adapter->rings_state, 0, PAGE_SIZE);
 509        memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
 510        memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
 511
 512        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
 513                              &cmd, sizeof(cmd));
 514
 515        if (adapter->use_msg) {
 516                struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
 517
 518                cmd_msg.numPages = adapter->msg_pages;
 519
 520                base = adapter->msgRingPA;
 521                for (i = 0; i < adapter->msg_pages; i++) {
 522                        cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
 523                        base += PAGE_SIZE;
 524                }
 525                memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
 526
 527                pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
 528                                      &cmd_msg, sizeof(cmd_msg));
 529        }
 530}
 531
 532static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 533{
 534        if (!sdev->tagged_supported)
 535                qdepth = 1;
 536        return scsi_change_queue_depth(sdev, qdepth);
 537}
 538
 539/*
 540 * Pull a completion descriptor off and pass the completion back
 541 * to the SCSI mid layer.
 542 */
 543static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
 544                                    const struct PVSCSIRingCmpDesc *e)
 545{
 546        struct pvscsi_ctx *ctx;
 547        struct scsi_cmnd *cmd;
 548        struct completion *abort_cmp;
 549        u32 btstat = e->hostStatus;
 550        u32 sdstat = e->scsiStatus;
 551
 552        ctx = pvscsi_get_context(adapter, e->context);
 553        cmd = ctx->cmd;
 554        abort_cmp = ctx->abort_cmp;
 555        pvscsi_unmap_buffers(adapter, ctx);
 556        if (sdstat != SAM_STAT_CHECK_CONDITION)
 557                pvscsi_patch_sense(cmd);
 558        pvscsi_release_context(adapter, ctx);
 559        if (abort_cmp) {
 560                /*
 561                 * The command was requested to be aborted. Just signal that
 562                 * the request completed and swallow the actual cmd completion
 563                 * here. The abort handler will post a completion for this
 564                 * command indicating that it got successfully aborted.
 565                 */
 566                complete(abort_cmp);
 567                return;
 568        }
 569
 570        cmd->result = 0;
 571        if (sdstat != SAM_STAT_GOOD &&
 572            (btstat == BTSTAT_SUCCESS ||
 573             btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
 574             btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
 575                if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
 576                        cmd->result = (DID_RESET << 16);
 577                } else {
 578                        cmd->result = (DID_OK << 16) | sdstat;
 579                }
 580        } else
 581                switch (btstat) {
 582                case BTSTAT_SUCCESS:
 583                case BTSTAT_LINKED_COMMAND_COMPLETED:
 584                case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
 585                        /*
 586                         * Commands like INQUIRY may transfer less data than
 587                         * requested by the initiator via bufflen. Set residual
 588                         * count to make upper layer aware of the actual amount
 589                         * of data returned.
 590                         */
 591                        scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 592                        cmd->result = (DID_OK << 16);
 593                        break;
 594
 595                case BTSTAT_DATARUN:
 596                case BTSTAT_DATA_UNDERRUN:
 597                        /* Report residual data in underruns */
 598                        scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
 599                        cmd->result = (DID_ERROR << 16);
 600                        break;
 601
 602                case BTSTAT_SELTIMEO:
 603                        /* Our emulation returns this for non-connected devs */
 604                        cmd->result = (DID_BAD_TARGET << 16);
 605                        break;
 606
 607                case BTSTAT_LUNMISMATCH:
 608                case BTSTAT_TAGREJECT:
 609                case BTSTAT_BADMSG:
 610                case BTSTAT_HAHARDWARE:
 611                case BTSTAT_INVPHASE:
 612                case BTSTAT_HATIMEOUT:
 613                case BTSTAT_NORESPONSE:
 614                case BTSTAT_DISCONNECT:
 615                case BTSTAT_HASOFTWARE:
 616                case BTSTAT_BUSFREE:
 617                case BTSTAT_SENSFAILED:
 618                        cmd->result |= (DID_ERROR << 16);
 619                        break;
 620
 621                case BTSTAT_SENTRST:
 622                case BTSTAT_RECVRST:
 623                case BTSTAT_BUSRESET:
 624                        cmd->result = (DID_RESET << 16);
 625                        break;
 626
 627                case BTSTAT_ABORTQUEUE:
 628                        cmd->result = (DID_BUS_BUSY << 16);
 629                        break;
 630
 631                case BTSTAT_SCSIPARITY:
 632                        cmd->result = (DID_PARITY << 16);
 633                        break;
 634
 635                default:
 636                        cmd->result = (DID_ERROR << 16);
 637                        scmd_printk(KERN_DEBUG, cmd,
 638                                    "Unknown completion status: 0x%x\n",
 639                                    btstat);
 640        }
 641
 642        dev_dbg(&cmd->device->sdev_gendev,
 643                "cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
 644                cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
 645
 646        cmd->scsi_done(cmd);
 647}
 648
 649/*
 650 * barrier usage : Since the PVSCSI device is emulated, there could be cases
 651 * where we may want to serialize some accesses between the driver and the
 652 * emulation layer. We use compiler barriers instead of the more expensive
 653 * memory barriers because PVSCSI is only supported on X86 which has strong
 654 * memory access ordering.
 655 */
 656static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
 657{
 658        struct PVSCSIRingsState *s = adapter->rings_state;
 659        struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
 660        u32 cmp_entries = s->cmpNumEntriesLog2;
 661
 662        while (s->cmpConsIdx != s->cmpProdIdx) {
 663                struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
 664                                                      MASK(cmp_entries));
 665                /*
 666                 * This barrier() ensures that *e is not dereferenced while
 667                 * the device emulation still writes data into the slot.
 668                 * Since the device emulation advances s->cmpProdIdx only after
 669                 * updating the slot we want to check it first.
 670                 */
 671                barrier();
 672                pvscsi_complete_request(adapter, e);
 673                /*
 674                 * This barrier() ensures that compiler doesn't reorder write
 675                 * to s->cmpConsIdx before the read of (*e) inside
 676                 * pvscsi_complete_request. Otherwise, device emulation may
 677                 * overwrite *e before we had a chance to read it.
 678                 */
 679                barrier();
 680                s->cmpConsIdx++;
 681        }
 682}
 683
 684/*
 685 * Translate a Linux SCSI request into a request ring entry.
 686 */
 687static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
 688                             struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
 689{
 690        struct PVSCSIRingsState *s;
 691        struct PVSCSIRingReqDesc *e;
 692        struct scsi_device *sdev;
 693        u32 req_entries;
 694
 695        s = adapter->rings_state;
 696        sdev = cmd->device;
 697        req_entries = s->reqNumEntriesLog2;
 698
 699        /*
 700         * If this condition holds, we might have room on the request ring, but
 701         * we might not have room on the completion ring for the response.
 702         * However, we have already ruled out this possibility - we would not
 703         * have successfully allocated a context if it were true, since we only
 704         * have one context per request entry.  Check for it anyway, since it
 705         * would be a serious bug.
 706         */
 707        if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
 708                scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
 709                            "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
 710                            s->reqProdIdx, s->cmpConsIdx);
 711                return -1;
 712        }
 713
 714        e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
 715
 716        e->bus    = sdev->channel;
 717        e->target = sdev->id;
 718        memset(e->lun, 0, sizeof(e->lun));
 719        e->lun[1] = sdev->lun;
 720
 721        if (cmd->sense_buffer) {
 722                ctx->sensePA = dma_map_single(&adapter->dev->dev,
 723                                cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
 724                                DMA_FROM_DEVICE);
 725                if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
 726                        scmd_printk(KERN_DEBUG, cmd,
 727                                    "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
 728                        ctx->sensePA = 0;
 729                        return -ENOMEM;
 730                }
 731                e->senseAddr = ctx->sensePA;
 732                e->senseLen = SCSI_SENSE_BUFFERSIZE;
 733        } else {
 734                e->senseLen  = 0;
 735                e->senseAddr = 0;
 736        }
 737        e->cdbLen   = cmd->cmd_len;
 738        e->vcpuHint = smp_processor_id();
 739        memcpy(e->cdb, cmd->cmnd, e->cdbLen);
 740
 741        e->tag = SIMPLE_QUEUE_TAG;
 742
 743        if (cmd->sc_data_direction == DMA_FROM_DEVICE)
 744                e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
 745        else if (cmd->sc_data_direction == DMA_TO_DEVICE)
 746                e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
 747        else if (cmd->sc_data_direction == DMA_NONE)
 748                e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
 749        else
 750                e->flags = 0;
 751
 752        if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
 753                if (cmd->sense_buffer) {
 754                        dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
 755                                         SCSI_SENSE_BUFFERSIZE,
 756                                         DMA_FROM_DEVICE);
 757                        ctx->sensePA = 0;
 758                }
 759                return -ENOMEM;
 760        }
 761
 762        e->context = pvscsi_map_context(adapter, ctx);
 763
 764        barrier();
 765
 766        s->reqProdIdx++;
 767
 768        return 0;
 769}
 770
 771static int pvscsi_queue_lck(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 772{
 773        struct Scsi_Host *host = cmd->device->host;
 774        struct pvscsi_adapter *adapter = shost_priv(host);
 775        struct pvscsi_ctx *ctx;
 776        unsigned long flags;
 777        unsigned char op;
 778
 779        spin_lock_irqsave(&adapter->hw_lock, flags);
 780
 781        ctx = pvscsi_acquire_context(adapter, cmd);
 782        if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
 783                if (ctx)
 784                        pvscsi_release_context(adapter, ctx);
 785                spin_unlock_irqrestore(&adapter->hw_lock, flags);
 786                return SCSI_MLQUEUE_HOST_BUSY;
 787        }
 788
 789        cmd->scsi_done = done;
 790        op = cmd->cmnd[0];
 791
 792        dev_dbg(&cmd->device->sdev_gendev,
 793                "queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
 794
 795        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 796
 797        pvscsi_kick_io(adapter, op);
 798
 799        return 0;
 800}
 801
 802static DEF_SCSI_QCMD(pvscsi_queue)
 803
 804static int pvscsi_abort(struct scsi_cmnd *cmd)
 805{
 806        struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
 807        struct pvscsi_ctx *ctx;
 808        unsigned long flags;
 809        int result = SUCCESS;
 810        DECLARE_COMPLETION_ONSTACK(abort_cmp);
 811        int done;
 812
 813        scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
 814                    adapter->host->host_no, cmd);
 815
 816        spin_lock_irqsave(&adapter->hw_lock, flags);
 817
 818        /*
 819         * Poll the completion ring first - we might be trying to abort
 820         * a command that is waiting to be dispatched in the completion ring.
 821         */
 822        pvscsi_process_completion_ring(adapter);
 823
 824        /*
 825         * If there is no context for the command, it either already succeeded
 826         * or else was never properly issued.  Not our problem.
 827         */
 828        ctx = pvscsi_find_context(adapter, cmd);
 829        if (!ctx) {
 830                scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
 831                goto out;
 832        }
 833
 834        /*
 835         * Mark that the command has been requested to be aborted and issue
 836         * the abort.
 837         */
 838        ctx->abort_cmp = &abort_cmp;
 839
 840        pvscsi_abort_cmd(adapter, ctx);
 841        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 842        /* Wait for 2 secs for the completion. */
 843        done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
 844        spin_lock_irqsave(&adapter->hw_lock, flags);
 845
 846        if (!done) {
 847                /*
 848                 * Failed to abort the command, unmark the fact that it
 849                 * was requested to be aborted.
 850                 */
 851                ctx->abort_cmp = NULL;
 852                result = FAILED;
 853                scmd_printk(KERN_DEBUG, cmd,
 854                            "Failed to get completion for aborted cmd %p\n",
 855                            cmd);
 856                goto out;
 857        }
 858
 859        /*
 860         * Successfully aborted the command.
 861         */
 862        cmd->result = (DID_ABORT << 16);
 863        cmd->scsi_done(cmd);
 864
 865out:
 866        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 867        return result;
 868}
 869
 870/*
 871 * Abort all outstanding requests.  This is only safe to use if the completion
 872 * ring will never be walked again or the device has been reset, because it
 873 * destroys the 1-1 mapping between context field passed to emulation and our
 874 * request structure.
 875 */
 876static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
 877{
 878        unsigned i;
 879
 880        for (i = 0; i < adapter->req_depth; i++) {
 881                struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
 882                struct scsi_cmnd *cmd = ctx->cmd;
 883                if (cmd) {
 884                        scmd_printk(KERN_ERR, cmd,
 885                                    "Forced reset on cmd %p\n", cmd);
 886                        pvscsi_unmap_buffers(adapter, ctx);
 887                        pvscsi_patch_sense(cmd);
 888                        pvscsi_release_context(adapter, ctx);
 889                        cmd->result = (DID_RESET << 16);
 890                        cmd->scsi_done(cmd);
 891                }
 892        }
 893}
 894
 895static int pvscsi_host_reset(struct scsi_cmnd *cmd)
 896{
 897        struct Scsi_Host *host = cmd->device->host;
 898        struct pvscsi_adapter *adapter = shost_priv(host);
 899        unsigned long flags;
 900        bool use_msg;
 901
 902        scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
 903
 904        spin_lock_irqsave(&adapter->hw_lock, flags);
 905
 906        use_msg = adapter->use_msg;
 907
 908        if (use_msg) {
 909                adapter->use_msg = false;
 910                spin_unlock_irqrestore(&adapter->hw_lock, flags);
 911
 912                /*
 913                 * Now that we know that the ISR won't add more work on the
 914                 * workqueue we can safely flush any outstanding work.
 915                 */
 916                flush_workqueue(adapter->workqueue);
 917                spin_lock_irqsave(&adapter->hw_lock, flags);
 918        }
 919
 920        /*
 921         * We're going to tear down the entire ring structure and set it back
 922         * up, so stalling new requests until all completions are flushed and
 923         * the rings are back in place.
 924         */
 925
 926        pvscsi_process_request_ring(adapter);
 927
 928        ll_adapter_reset(adapter);
 929
 930        /*
 931         * Now process any completions.  Note we do this AFTER adapter reset,
 932         * which is strange, but stops races where completions get posted
 933         * between processing the ring and issuing the reset.  The backend will
 934         * not touch the ring memory after reset, so the immediately pre-reset
 935         * completion ring state is still valid.
 936         */
 937        pvscsi_process_completion_ring(adapter);
 938
 939        pvscsi_reset_all(adapter);
 940        adapter->use_msg = use_msg;
 941        pvscsi_setup_all_rings(adapter);
 942        pvscsi_unmask_intr(adapter);
 943
 944        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 945
 946        return SUCCESS;
 947}
 948
 949static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
 950{
 951        struct Scsi_Host *host = cmd->device->host;
 952        struct pvscsi_adapter *adapter = shost_priv(host);
 953        unsigned long flags;
 954
 955        scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
 956
 957        /*
 958         * We don't want to queue new requests for this bus after
 959         * flushing all pending requests to emulation, since new
 960         * requests could then sneak in during this bus reset phase,
 961         * so take the lock now.
 962         */
 963        spin_lock_irqsave(&adapter->hw_lock, flags);
 964
 965        pvscsi_process_request_ring(adapter);
 966        ll_bus_reset(adapter);
 967        pvscsi_process_completion_ring(adapter);
 968
 969        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 970
 971        return SUCCESS;
 972}
 973
 974static int pvscsi_device_reset(struct scsi_cmnd *cmd)
 975{
 976        struct Scsi_Host *host = cmd->device->host;
 977        struct pvscsi_adapter *adapter = shost_priv(host);
 978        unsigned long flags;
 979
 980        scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
 981                    host->host_no, cmd->device->id);
 982
 983        /*
 984         * We don't want to queue new requests for this device after flushing
 985         * all pending requests to emulation, since new requests could then
 986         * sneak in during this device reset phase, so take the lock now.
 987         */
 988        spin_lock_irqsave(&adapter->hw_lock, flags);
 989
 990        pvscsi_process_request_ring(adapter);
 991        ll_device_reset(adapter, cmd->device->id);
 992        pvscsi_process_completion_ring(adapter);
 993
 994        spin_unlock_irqrestore(&adapter->hw_lock, flags);
 995
 996        return SUCCESS;
 997}
 998
 999static struct scsi_host_template pvscsi_template;
1000
1001static const char *pvscsi_info(struct Scsi_Host *host)
1002{
1003        struct pvscsi_adapter *adapter = shost_priv(host);
1004        static char buf[256];
1005
1006        sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1007                "%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1008                adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1009                pvscsi_template.cmd_per_lun);
1010
1011        return buf;
1012}
1013
1014static struct scsi_host_template pvscsi_template = {
1015        .module                         = THIS_MODULE,
1016        .name                           = "VMware PVSCSI Host Adapter",
1017        .proc_name                      = "vmw_pvscsi",
1018        .info                           = pvscsi_info,
1019        .queuecommand                   = pvscsi_queue,
1020        .this_id                        = -1,
1021        .sg_tablesize                   = PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1022        .dma_boundary                   = UINT_MAX,
1023        .max_sectors                    = 0xffff,
1024        .change_queue_depth             = pvscsi_change_queue_depth,
1025        .eh_abort_handler               = pvscsi_abort,
1026        .eh_device_reset_handler        = pvscsi_device_reset,
1027        .eh_bus_reset_handler           = pvscsi_bus_reset,
1028        .eh_host_reset_handler          = pvscsi_host_reset,
1029};
1030
1031static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1032                               const struct PVSCSIRingMsgDesc *e)
1033{
1034        struct PVSCSIRingsState *s = adapter->rings_state;
1035        struct Scsi_Host *host = adapter->host;
1036        struct scsi_device *sdev;
1037
1038        printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1039               e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1040
1041        BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1042
1043        if (e->type == PVSCSI_MSG_DEV_ADDED) {
1044                struct PVSCSIMsgDescDevStatusChanged *desc;
1045                desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1046
1047                printk(KERN_INFO
1048                       "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1049                       desc->bus, desc->target, desc->lun[1]);
1050
1051                if (!scsi_host_get(host))
1052                        return;
1053
1054                sdev = scsi_device_lookup(host, desc->bus, desc->target,
1055                                          desc->lun[1]);
1056                if (sdev) {
1057                        printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1058                        scsi_device_put(sdev);
1059                } else
1060                        scsi_add_device(adapter->host, desc->bus,
1061                                        desc->target, desc->lun[1]);
1062
1063                scsi_host_put(host);
1064        } else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1065                struct PVSCSIMsgDescDevStatusChanged *desc;
1066                desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1067
1068                printk(KERN_INFO
1069                       "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1070                       desc->bus, desc->target, desc->lun[1]);
1071
1072                if (!scsi_host_get(host))
1073                        return;
1074
1075                sdev = scsi_device_lookup(host, desc->bus, desc->target,
1076                                          desc->lun[1]);
1077                if (sdev) {
1078                        scsi_remove_device(sdev);
1079                        scsi_device_put(sdev);
1080                } else
1081                        printk(KERN_INFO
1082                               "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1083                               desc->bus, desc->target, desc->lun[1]);
1084
1085                scsi_host_put(host);
1086        }
1087}
1088
1089static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1090{
1091        struct PVSCSIRingsState *s = adapter->rings_state;
1092
1093        return s->msgProdIdx != s->msgConsIdx;
1094}
1095
1096static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1097{
1098        struct PVSCSIRingsState *s = adapter->rings_state;
1099        struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1100        u32 msg_entries = s->msgNumEntriesLog2;
1101
1102        while (pvscsi_msg_pending(adapter)) {
1103                struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1104                                                      MASK(msg_entries));
1105
1106                barrier();
1107                pvscsi_process_msg(adapter, e);
1108                barrier();
1109                s->msgConsIdx++;
1110        }
1111}
1112
1113static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1114{
1115        struct pvscsi_adapter *adapter;
1116
1117        adapter = container_of(data, struct pvscsi_adapter, work);
1118
1119        pvscsi_process_msg_ring(adapter);
1120}
1121
1122static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1123{
1124        char name[32];
1125
1126        if (!pvscsi_use_msg)
1127                return 0;
1128
1129        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1130                         PVSCSI_CMD_SETUP_MSG_RING);
1131
1132        if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1133                return 0;
1134
1135        snprintf(name, sizeof(name),
1136                 "vmw_pvscsi_wq_%u", adapter->host->host_no);
1137
1138        adapter->workqueue = create_singlethread_workqueue(name);
1139        if (!adapter->workqueue) {
1140                printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1141                return 0;
1142        }
1143        INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1144
1145        return 1;
1146}
1147
1148static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1149                                      bool enable)
1150{
1151        u32 val;
1152
1153        if (!pvscsi_use_req_threshold)
1154                return false;
1155
1156        pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1157                         PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1158        val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1159        if (val == -1) {
1160                printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1161                return false;
1162        } else {
1163                struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1164                cmd_msg.enable = enable;
1165                printk(KERN_INFO
1166                       "vmw_pvscsi: %sabling reqCallThreshold\n",
1167                        enable ? "en" : "dis");
1168                pvscsi_write_cmd_desc(adapter,
1169                                      PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1170                                      &cmd_msg, sizeof(cmd_msg));
1171                return pvscsi_reg_read(adapter,
1172                                       PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1173        }
1174}
1175
1176static irqreturn_t pvscsi_isr(int irq, void *devp)
1177{
1178        struct pvscsi_adapter *adapter = devp;
1179        unsigned long flags;
1180
1181        spin_lock_irqsave(&adapter->hw_lock, flags);
1182        pvscsi_process_completion_ring(adapter);
1183        if (adapter->use_msg && pvscsi_msg_pending(adapter))
1184                queue_work(adapter->workqueue, &adapter->work);
1185        spin_unlock_irqrestore(&adapter->hw_lock, flags);
1186
1187        return IRQ_HANDLED;
1188}
1189
1190static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1191{
1192        struct pvscsi_adapter *adapter = devp;
1193        u32 val = pvscsi_read_intr_status(adapter);
1194
1195        if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1196                return IRQ_NONE;
1197        pvscsi_write_intr_status(devp, val);
1198        return pvscsi_isr(irq, devp);
1199}
1200
1201static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1202{
1203        struct pvscsi_ctx *ctx = adapter->cmd_map;
1204        unsigned i;
1205
1206        for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1207                free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1208}
1209
1210static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1211{
1212        free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1213        pci_free_irq_vectors(adapter->dev);
1214}
1215
1216static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1217{
1218        if (adapter->workqueue)
1219                destroy_workqueue(adapter->workqueue);
1220
1221        if (adapter->mmioBase)
1222                pci_iounmap(adapter->dev, adapter->mmioBase);
1223
1224        pci_release_regions(adapter->dev);
1225
1226        if (adapter->cmd_map) {
1227                pvscsi_free_sgls(adapter);
1228                kfree(adapter->cmd_map);
1229        }
1230
1231        if (adapter->rings_state)
1232                dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1233                                    adapter->rings_state, adapter->ringStatePA);
1234
1235        if (adapter->req_ring)
1236                dma_free_coherent(&adapter->dev->dev,
1237                                    adapter->req_pages * PAGE_SIZE,
1238                                    adapter->req_ring, adapter->reqRingPA);
1239
1240        if (adapter->cmp_ring)
1241                dma_free_coherent(&adapter->dev->dev,
1242                                    adapter->cmp_pages * PAGE_SIZE,
1243                                    adapter->cmp_ring, adapter->cmpRingPA);
1244
1245        if (adapter->msg_ring)
1246                dma_free_coherent(&adapter->dev->dev,
1247                                    adapter->msg_pages * PAGE_SIZE,
1248                                    adapter->msg_ring, adapter->msgRingPA);
1249}
1250
1251/*
1252 * Allocate scatter gather lists.
1253 *
1254 * These are statically allocated.  Trying to be clever was not worth it.
1255 *
1256 * Dynamic allocation can fail, and we can't go deep into the memory
1257 * allocator, since we're a SCSI driver, and trying too hard to allocate
1258 * memory might generate disk I/O.  We also don't want to fail disk I/O
1259 * in that case because we can't get an allocation - the I/O could be
1260 * trying to swap out data to free memory.  Since that is pathological,
1261 * just use a statically allocated scatter list.
1262 *
1263 */
1264static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1265{
1266        struct pvscsi_ctx *ctx;
1267        int i;
1268
1269        ctx = adapter->cmd_map;
1270        BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1271
1272        for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1273                ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1274                                                    get_order(SGL_SIZE));
1275                ctx->sglPA = 0;
1276                BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1277                if (!ctx->sgl) {
1278                        for (; i >= 0; --i, --ctx) {
1279                                free_pages((unsigned long)ctx->sgl,
1280                                           get_order(SGL_SIZE));
1281                                ctx->sgl = NULL;
1282                        }
1283                        return -ENOMEM;
1284                }
1285        }
1286
1287        return 0;
1288}
1289
1290/*
1291 * Query the device, fetch the config info and return the
1292 * maximum number of targets on the adapter. In case of
1293 * failure due to any reason return default i.e. 16.
1294 */
1295static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1296{
1297        struct PVSCSICmdDescConfigCmd cmd;
1298        struct PVSCSIConfigPageHeader *header;
1299        struct device *dev;
1300        dma_addr_t configPagePA;
1301        void *config_page;
1302        u32 numPhys = 16;
1303
1304        dev = pvscsi_dev(adapter);
1305        config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1306                        &configPagePA, GFP_KERNEL);
1307        if (!config_page) {
1308                dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1309                goto exit;
1310        }
1311        BUG_ON(configPagePA & ~PAGE_MASK);
1312
1313        /* Fetch config info from the device. */
1314        cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1315        cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1316        cmd.cmpAddr = configPagePA;
1317        cmd._pad = 0;
1318
1319        /*
1320         * Mark the completion page header with error values. If the device
1321         * completes the command successfully, it sets the status values to
1322         * indicate success.
1323         */
1324        header = config_page;
1325        memset(header, 0, sizeof *header);
1326        header->hostStatus = BTSTAT_INVPARAM;
1327        header->scsiStatus = SDSTAT_CHECK;
1328
1329        pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1330
1331        if (header->hostStatus == BTSTAT_SUCCESS &&
1332            header->scsiStatus == SDSTAT_GOOD) {
1333                struct PVSCSIConfigPageController *config;
1334
1335                config = config_page;
1336                numPhys = config->numPhys;
1337        } else
1338                dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1339                         header->hostStatus, header->scsiStatus);
1340        dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1341                          configPagePA);
1342exit:
1343        return numPhys;
1344}
1345
1346static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1347{
1348        unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
1349        struct pvscsi_adapter *adapter;
1350        struct pvscsi_adapter adapter_temp;
1351        struct Scsi_Host *host = NULL;
1352        unsigned int i;
1353        int error;
1354        u32 max_id;
1355
1356        error = -ENODEV;
1357
1358        if (pci_enable_device(pdev))
1359                return error;
1360
1361        if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1362                printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1363        } else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1364                printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1365        } else {
1366                printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1367                goto out_disable_device;
1368        }
1369
1370        /*
1371         * Let's use a temp pvscsi_adapter struct until we find the number of
1372         * targets on the adapter, after that we will switch to the real
1373         * allocated struct.
1374         */
1375        adapter = &adapter_temp;
1376        memset(adapter, 0, sizeof(*adapter));
1377        adapter->dev  = pdev;
1378        adapter->rev = pdev->revision;
1379
1380        if (pci_request_regions(pdev, "vmw_pvscsi")) {
1381                printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1382                goto out_disable_device;
1383        }
1384
1385        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1386                if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1387                        continue;
1388
1389                if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1390                        continue;
1391
1392                break;
1393        }
1394
1395        if (i == DEVICE_COUNT_RESOURCE) {
1396                printk(KERN_ERR
1397                       "vmw_pvscsi: adapter has no suitable MMIO region\n");
1398                goto out_release_resources_and_disable;
1399        }
1400
1401        adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1402
1403        if (!adapter->mmioBase) {
1404                printk(KERN_ERR
1405                       "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1406                       i, PVSCSI_MEM_SPACE_SIZE);
1407                goto out_release_resources_and_disable;
1408        }
1409
1410        pci_set_master(pdev);
1411
1412        /*
1413         * Ask the device for max number of targets before deciding the
1414         * default pvscsi_ring_pages value.
1415         */
1416        max_id = pvscsi_get_max_targets(adapter);
1417        printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1418
1419        if (pvscsi_ring_pages == 0)
1420                /*
1421                 * Set the right default value. Up to 16 it is 8, above it is
1422                 * max.
1423                 */
1424                pvscsi_ring_pages = (max_id > 16) ?
1425                        PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1426                        PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1427        printk(KERN_INFO
1428               "vmw_pvscsi: setting ring_pages to %d\n",
1429               pvscsi_ring_pages);
1430
1431        pvscsi_template.can_queue =
1432                min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1433                PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1434        pvscsi_template.cmd_per_lun =
1435                min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1436        host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1437        if (!host) {
1438                printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1439                goto out_release_resources_and_disable;
1440        }
1441
1442        /*
1443         * Let's use the real pvscsi_adapter struct here onwards.
1444         */
1445        adapter = shost_priv(host);
1446        memset(adapter, 0, sizeof(*adapter));
1447        adapter->dev  = pdev;
1448        adapter->host = host;
1449        /*
1450         * Copy back what we already have to the allocated adapter struct.
1451         */
1452        adapter->rev = adapter_temp.rev;
1453        adapter->mmioBase = adapter_temp.mmioBase;
1454
1455        spin_lock_init(&adapter->hw_lock);
1456        host->max_channel = 0;
1457        host->max_lun     = 1;
1458        host->max_cmd_len = 16;
1459        host->max_id      = max_id;
1460
1461        pci_set_drvdata(pdev, host);
1462
1463        ll_adapter_reset(adapter);
1464
1465        adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1466
1467        error = pvscsi_allocate_rings(adapter);
1468        if (error) {
1469                printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1470                goto out_release_resources;
1471        }
1472
1473        /*
1474         * From this point on we should reset the adapter if anything goes
1475         * wrong.
1476         */
1477        pvscsi_setup_all_rings(adapter);
1478
1479        adapter->cmd_map = kcalloc(adapter->req_depth,
1480                                   sizeof(struct pvscsi_ctx), GFP_KERNEL);
1481        if (!adapter->cmd_map) {
1482                printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1483                error = -ENOMEM;
1484                goto out_reset_adapter;
1485        }
1486
1487        INIT_LIST_HEAD(&adapter->cmd_pool);
1488        for (i = 0; i < adapter->req_depth; i++) {
1489                struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1490                list_add(&ctx->list, &adapter->cmd_pool);
1491        }
1492
1493        error = pvscsi_allocate_sg(adapter);
1494        if (error) {
1495                printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1496                goto out_reset_adapter;
1497        }
1498
1499        if (pvscsi_disable_msix)
1500                irq_flag &= ~PCI_IRQ_MSIX;
1501        if (pvscsi_disable_msi)
1502                irq_flag &= ~PCI_IRQ_MSI;
1503
1504        error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1505        if (error < 0)
1506                goto out_reset_adapter;
1507
1508        adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1509        printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1510               adapter->use_req_threshold ? "en" : "dis");
1511
1512        if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1513                printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1514                        adapter->dev->msix_enabled ? "-X" : "");
1515                error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1516                                0, "vmw_pvscsi", adapter);
1517        } else {
1518                printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1519                error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1520                                IRQF_SHARED, "vmw_pvscsi", adapter);
1521        }
1522
1523        if (error) {
1524                printk(KERN_ERR
1525                       "vmw_pvscsi: unable to request IRQ: %d\n", error);
1526                goto out_reset_adapter;
1527        }
1528
1529        error = scsi_add_host(host, &pdev->dev);
1530        if (error) {
1531                printk(KERN_ERR
1532                       "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1533                goto out_reset_adapter;
1534        }
1535
1536        dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1537                 adapter->rev, host->host_no);
1538
1539        pvscsi_unmask_intr(adapter);
1540
1541        scsi_scan_host(host);
1542
1543        return 0;
1544
1545out_reset_adapter:
1546        ll_adapter_reset(adapter);
1547out_release_resources:
1548        pvscsi_shutdown_intr(adapter);
1549        pvscsi_release_resources(adapter);
1550        scsi_host_put(host);
1551out_disable_device:
1552        pci_disable_device(pdev);
1553
1554        return error;
1555
1556out_release_resources_and_disable:
1557        pvscsi_shutdown_intr(adapter);
1558        pvscsi_release_resources(adapter);
1559        goto out_disable_device;
1560}
1561
1562static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1563{
1564        pvscsi_mask_intr(adapter);
1565
1566        if (adapter->workqueue)
1567                flush_workqueue(adapter->workqueue);
1568
1569        pvscsi_shutdown_intr(adapter);
1570
1571        pvscsi_process_request_ring(adapter);
1572        pvscsi_process_completion_ring(adapter);
1573        ll_adapter_reset(adapter);
1574}
1575
1576static void pvscsi_shutdown(struct pci_dev *dev)
1577{
1578        struct Scsi_Host *host = pci_get_drvdata(dev);
1579        struct pvscsi_adapter *adapter = shost_priv(host);
1580
1581        __pvscsi_shutdown(adapter);
1582}
1583
1584static void pvscsi_remove(struct pci_dev *pdev)
1585{
1586        struct Scsi_Host *host = pci_get_drvdata(pdev);
1587        struct pvscsi_adapter *adapter = shost_priv(host);
1588
1589        scsi_remove_host(host);
1590
1591        __pvscsi_shutdown(adapter);
1592        pvscsi_release_resources(adapter);
1593
1594        scsi_host_put(host);
1595
1596        pci_disable_device(pdev);
1597}
1598
1599static struct pci_driver pvscsi_pci_driver = {
1600        .name           = "vmw_pvscsi",
1601        .id_table       = pvscsi_pci_tbl,
1602        .probe          = pvscsi_probe,
1603        .remove         = pvscsi_remove,
1604        .shutdown       = pvscsi_shutdown,
1605};
1606
1607static int __init pvscsi_init(void)
1608{
1609        pr_info("%s - version %s\n",
1610                PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1611        return pci_register_driver(&pvscsi_pci_driver);
1612}
1613
1614static void __exit pvscsi_exit(void)
1615{
1616        pci_unregister_driver(&pvscsi_pci_driver);
1617}
1618
1619module_init(pvscsi_init);
1620module_exit(pvscsi_exit);
1621