linux/drivers/block/sunvdc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* sunvdc.c: Sun LDOM Virtual Disk Client.
   3 *
   4 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
   5 */
   6
   7#include <linux/module.h>
   8#include <linux/kernel.h>
   9#include <linux/types.h>
  10#include <linux/blk-mq.h>
  11#include <linux/hdreg.h>
  12#include <linux/cdrom.h>
  13#include <linux/slab.h>
  14#include <linux/spinlock.h>
  15#include <linux/completion.h>
  16#include <linux/delay.h>
  17#include <linux/init.h>
  18#include <linux/list.h>
  19#include <linux/scatterlist.h>
  20
  21#include <asm/vio.h>
  22#include <asm/ldc.h>
  23
  24#define DRV_MODULE_NAME         "sunvdc"
  25#define PFX DRV_MODULE_NAME     ": "
  26#define DRV_MODULE_VERSION      "1.2"
  27#define DRV_MODULE_RELDATE      "November 24, 2014"
  28
  29static char version[] =
  30        DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
  31MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
  32MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
  33MODULE_LICENSE("GPL");
  34MODULE_VERSION(DRV_MODULE_VERSION);
  35
  36#define VDC_TX_RING_SIZE        512
  37#define VDC_DEFAULT_BLK_SIZE    512
  38
  39#define MAX_XFER_BLKS           (128 * 1024)
  40#define MAX_XFER_SIZE           (MAX_XFER_BLKS / VDC_DEFAULT_BLK_SIZE)
  41#define MAX_RING_COOKIES        ((MAX_XFER_BLKS / PAGE_SIZE) + 2)
  42
  43#define WAITING_FOR_LINK_UP     0x01
  44#define WAITING_FOR_TX_SPACE    0x02
  45#define WAITING_FOR_GEN_CMD     0x04
  46#define WAITING_FOR_ANY         -1
  47
  48#define VDC_MAX_RETRIES 10
  49
  50static struct workqueue_struct *sunvdc_wq;
  51
  52struct vdc_req_entry {
  53        struct request          *req;
  54};
  55
  56struct vdc_port {
  57        struct vio_driver_state vio;
  58
  59        struct gendisk          *disk;
  60
  61        struct vdc_completion   *cmp;
  62
  63        u64                     req_id;
  64        u64                     seq;
  65        struct vdc_req_entry    rq_arr[VDC_TX_RING_SIZE];
  66
  67        unsigned long           ring_cookies;
  68
  69        u64                     max_xfer_size;
  70        u32                     vdisk_block_size;
  71        u32                     drain;
  72
  73        u64                     ldc_timeout;
  74        struct delayed_work     ldc_reset_timer_work;
  75        struct work_struct      ldc_reset_work;
  76
  77        /* The server fills these in for us in the disk attribute
  78         * ACK packet.
  79         */
  80        u64                     operations;
  81        u32                     vdisk_size;
  82        u8                      vdisk_type;
  83        u8                      vdisk_mtype;
  84        u32                     vdisk_phys_blksz;
  85
  86        struct blk_mq_tag_set   tag_set;
  87
  88        char                    disk_name[32];
  89};
  90
  91static void vdc_ldc_reset(struct vdc_port *port);
  92static void vdc_ldc_reset_work(struct work_struct *work);
  93static void vdc_ldc_reset_timer_work(struct work_struct *work);
  94
  95static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
  96{
  97        return container_of(vio, struct vdc_port, vio);
  98}
  99
 100/* Ordered from largest major to lowest */
 101static struct vio_version vdc_versions[] = {
 102        { .major = 1, .minor = 2 },
 103        { .major = 1, .minor = 1 },
 104        { .major = 1, .minor = 0 },
 105};
 106
 107static inline int vdc_version_supported(struct vdc_port *port,
 108                                        u16 major, u16 minor)
 109{
 110        return port->vio.ver.major == major && port->vio.ver.minor >= minor;
 111}
 112
 113#define VDCBLK_NAME     "vdisk"
 114static int vdc_major;
 115#define PARTITION_SHIFT 3
 116
 117static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
 118{
 119        return vio_dring_avail(dr, VDC_TX_RING_SIZE);
 120}
 121
 122static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 123{
 124        struct gendisk *disk = bdev->bd_disk;
 125        sector_t nsect = get_capacity(disk);
 126        sector_t cylinders = nsect;
 127
 128        geo->heads = 0xff;
 129        geo->sectors = 0x3f;
 130        sector_div(cylinders, geo->heads * geo->sectors);
 131        geo->cylinders = cylinders;
 132        if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect)
 133                geo->cylinders = 0xffff;
 134
 135        return 0;
 136}
 137
 138/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev
 139 * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD.
 140 * Needed to be able to install inside an ldom from an iso image.
 141 */
 142static int vdc_ioctl(struct block_device *bdev, fmode_t mode,
 143                     unsigned command, unsigned long argument)
 144{
 145        struct vdc_port *port = bdev->bd_disk->private_data;
 146        int i;
 147
 148        switch (command) {
 149        case CDROMMULTISESSION:
 150                pr_debug(PFX "Multisession CDs not supported\n");
 151                for (i = 0; i < sizeof(struct cdrom_multisession); i++)
 152                        if (put_user(0, (char __user *)(argument + i)))
 153                                return -EFAULT;
 154                return 0;
 155
 156        case CDROM_GET_CAPABILITY:
 157                if (!vdc_version_supported(port, 1, 1))
 158                        return -EINVAL;
 159                switch (port->vdisk_mtype) {
 160                case VD_MEDIA_TYPE_CD:
 161                case VD_MEDIA_TYPE_DVD:
 162                        return 0;
 163                default:
 164                        return -EINVAL;
 165                }
 166        default:
 167                pr_debug(PFX "ioctl %08x not supported\n", command);
 168                return -EINVAL;
 169        }
 170}
 171
 172static const struct block_device_operations vdc_fops = {
 173        .owner          = THIS_MODULE,
 174        .getgeo         = vdc_getgeo,
 175        .ioctl          = vdc_ioctl,
 176        .compat_ioctl   = blkdev_compat_ptr_ioctl,
 177};
 178
 179static void vdc_blk_queue_start(struct vdc_port *port)
 180{
 181        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 182
 183        /* restart blk queue when ring is half emptied. also called after
 184         * handshake completes, so check for initial handshake before we've
 185         * allocated a disk.
 186         */
 187        if (port->disk && vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50)
 188                blk_mq_start_stopped_hw_queues(port->disk->queue, true);
 189}
 190
 191static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
 192{
 193        if (vio->cmp &&
 194            (waiting_for == -1 ||
 195             vio->cmp->waiting_for == waiting_for)) {
 196                vio->cmp->err = err;
 197                complete(&vio->cmp->com);
 198                vio->cmp = NULL;
 199        }
 200}
 201
 202static void vdc_handshake_complete(struct vio_driver_state *vio)
 203{
 204        struct vdc_port *port = to_vdc_port(vio);
 205
 206        cancel_delayed_work(&port->ldc_reset_timer_work);
 207        vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
 208        vdc_blk_queue_start(port);
 209}
 210
 211static int vdc_handle_unknown(struct vdc_port *port, void *arg)
 212{
 213        struct vio_msg_tag *pkt = arg;
 214
 215        printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
 216               pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
 217        printk(KERN_ERR PFX "Resetting connection.\n");
 218
 219        ldc_disconnect(port->vio.lp);
 220
 221        return -ECONNRESET;
 222}
 223
 224static int vdc_send_attr(struct vio_driver_state *vio)
 225{
 226        struct vdc_port *port = to_vdc_port(vio);
 227        struct vio_disk_attr_info pkt;
 228
 229        memset(&pkt, 0, sizeof(pkt));
 230
 231        pkt.tag.type = VIO_TYPE_CTRL;
 232        pkt.tag.stype = VIO_SUBTYPE_INFO;
 233        pkt.tag.stype_env = VIO_ATTR_INFO;
 234        pkt.tag.sid = vio_send_sid(vio);
 235
 236        pkt.xfer_mode = VIO_DRING_MODE;
 237        pkt.vdisk_block_size = port->vdisk_block_size;
 238        pkt.max_xfer_size = port->max_xfer_size;
 239
 240        viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
 241               pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
 242
 243        return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
 244}
 245
 246static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
 247{
 248        struct vdc_port *port = to_vdc_port(vio);
 249        struct vio_disk_attr_info *pkt = arg;
 250
 251        viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] "
 252               "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n",
 253               pkt->tag.stype, pkt->operations,
 254               pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype,
 255               pkt->xfer_mode, pkt->vdisk_block_size,
 256               pkt->max_xfer_size);
 257
 258        if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
 259                switch (pkt->vdisk_type) {
 260                case VD_DISK_TYPE_DISK:
 261                case VD_DISK_TYPE_SLICE:
 262                        break;
 263
 264                default:
 265                        printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
 266                               vio->name, pkt->vdisk_type);
 267                        return -ECONNRESET;
 268                }
 269
 270                if (pkt->vdisk_block_size > port->vdisk_block_size) {
 271                        printk(KERN_ERR PFX "%s: BLOCK size increased "
 272                               "%u --> %u\n",
 273                               vio->name,
 274                               port->vdisk_block_size, pkt->vdisk_block_size);
 275                        return -ECONNRESET;
 276                }
 277
 278                port->operations = pkt->operations;
 279                port->vdisk_type = pkt->vdisk_type;
 280                if (vdc_version_supported(port, 1, 1)) {
 281                        port->vdisk_size = pkt->vdisk_size;
 282                        port->vdisk_mtype = pkt->vdisk_mtype;
 283                }
 284                if (pkt->max_xfer_size < port->max_xfer_size)
 285                        port->max_xfer_size = pkt->max_xfer_size;
 286                port->vdisk_block_size = pkt->vdisk_block_size;
 287
 288                port->vdisk_phys_blksz = VDC_DEFAULT_BLK_SIZE;
 289                if (vdc_version_supported(port, 1, 2))
 290                        port->vdisk_phys_blksz = pkt->phys_block_size;
 291
 292                return 0;
 293        } else {
 294                printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
 295
 296                return -ECONNRESET;
 297        }
 298}
 299
 300static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
 301{
 302        int err = desc->status;
 303
 304        vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
 305}
 306
 307static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
 308                        unsigned int index)
 309{
 310        struct vio_disk_desc *desc = vio_dring_entry(dr, index);
 311        struct vdc_req_entry *rqe = &port->rq_arr[index];
 312        struct request *req;
 313
 314        if (unlikely(desc->hdr.state != VIO_DESC_DONE))
 315                return;
 316
 317        ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
 318        desc->hdr.state = VIO_DESC_FREE;
 319        dr->cons = vio_dring_next(dr, index);
 320
 321        req = rqe->req;
 322        if (req == NULL) {
 323                vdc_end_special(port, desc);
 324                return;
 325        }
 326
 327        rqe->req = NULL;
 328
 329        blk_mq_end_request(req, desc->status ? BLK_STS_IOERR : 0);
 330
 331        vdc_blk_queue_start(port);
 332}
 333
 334static int vdc_ack(struct vdc_port *port, void *msgbuf)
 335{
 336        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 337        struct vio_dring_data *pkt = msgbuf;
 338
 339        if (unlikely(pkt->dring_ident != dr->ident ||
 340                     pkt->start_idx != pkt->end_idx ||
 341                     pkt->start_idx >= VDC_TX_RING_SIZE))
 342                return 0;
 343
 344        vdc_end_one(port, dr, pkt->start_idx);
 345
 346        return 0;
 347}
 348
 349static int vdc_nack(struct vdc_port *port, void *msgbuf)
 350{
 351        /* XXX Implement me XXX */
 352        return 0;
 353}
 354
 355static void vdc_event(void *arg, int event)
 356{
 357        struct vdc_port *port = arg;
 358        struct vio_driver_state *vio = &port->vio;
 359        unsigned long flags;
 360        int err;
 361
 362        spin_lock_irqsave(&vio->lock, flags);
 363
 364        if (unlikely(event == LDC_EVENT_RESET)) {
 365                vio_link_state_change(vio, event);
 366                queue_work(sunvdc_wq, &port->ldc_reset_work);
 367                goto out;
 368        }
 369
 370        if (unlikely(event == LDC_EVENT_UP)) {
 371                vio_link_state_change(vio, event);
 372                goto out;
 373        }
 374
 375        if (unlikely(event != LDC_EVENT_DATA_READY)) {
 376                pr_warn(PFX "Unexpected LDC event %d\n", event);
 377                goto out;
 378        }
 379
 380        err = 0;
 381        while (1) {
 382                union {
 383                        struct vio_msg_tag tag;
 384                        u64 raw[8];
 385                } msgbuf;
 386
 387                err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
 388                if (unlikely(err < 0)) {
 389                        if (err == -ECONNRESET)
 390                                vio_conn_reset(vio);
 391                        break;
 392                }
 393                if (err == 0)
 394                        break;
 395                viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
 396                       msgbuf.tag.type,
 397                       msgbuf.tag.stype,
 398                       msgbuf.tag.stype_env,
 399                       msgbuf.tag.sid);
 400                err = vio_validate_sid(vio, &msgbuf.tag);
 401                if (err < 0)
 402                        break;
 403
 404                if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
 405                        if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
 406                                err = vdc_ack(port, &msgbuf);
 407                        else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
 408                                err = vdc_nack(port, &msgbuf);
 409                        else
 410                                err = vdc_handle_unknown(port, &msgbuf);
 411                } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
 412                        err = vio_control_pkt_engine(vio, &msgbuf);
 413                } else {
 414                        err = vdc_handle_unknown(port, &msgbuf);
 415                }
 416                if (err < 0)
 417                        break;
 418        }
 419        if (err < 0)
 420                vdc_finish(&port->vio, err, WAITING_FOR_ANY);
 421out:
 422        spin_unlock_irqrestore(&vio->lock, flags);
 423}
 424
 425static int __vdc_tx_trigger(struct vdc_port *port)
 426{
 427        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 428        struct vio_dring_data hdr = {
 429                .tag = {
 430                        .type           = VIO_TYPE_DATA,
 431                        .stype          = VIO_SUBTYPE_INFO,
 432                        .stype_env      = VIO_DRING_DATA,
 433                        .sid            = vio_send_sid(&port->vio),
 434                },
 435                .dring_ident            = dr->ident,
 436                .start_idx              = dr->prod,
 437                .end_idx                = dr->prod,
 438        };
 439        int err, delay;
 440        int retries = 0;
 441
 442        hdr.seq = dr->snd_nxt;
 443        delay = 1;
 444        do {
 445                err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
 446                if (err > 0) {
 447                        dr->snd_nxt++;
 448                        break;
 449                }
 450                udelay(delay);
 451                if ((delay <<= 1) > 128)
 452                        delay = 128;
 453                if (retries++ > VDC_MAX_RETRIES)
 454                        break;
 455        } while (err == -EAGAIN);
 456
 457        if (err == -ENOTCONN)
 458                vdc_ldc_reset(port);
 459        return err;
 460}
 461
 462static int __send_request(struct request *req)
 463{
 464        struct vdc_port *port = req->q->disk->private_data;
 465        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 466        struct scatterlist sg[MAX_RING_COOKIES];
 467        struct vdc_req_entry *rqe;
 468        struct vio_disk_desc *desc;
 469        unsigned int map_perm;
 470        int nsg, err, i;
 471        u64 len;
 472        u8 op;
 473
 474        if (WARN_ON(port->ring_cookies > MAX_RING_COOKIES))
 475                return -EINVAL;
 476
 477        map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
 478
 479        if (rq_data_dir(req) == READ) {
 480                map_perm |= LDC_MAP_W;
 481                op = VD_OP_BREAD;
 482        } else {
 483                map_perm |= LDC_MAP_R;
 484                op = VD_OP_BWRITE;
 485        }
 486
 487        sg_init_table(sg, port->ring_cookies);
 488        nsg = blk_rq_map_sg(req->q, req, sg);
 489
 490        len = 0;
 491        for (i = 0; i < nsg; i++)
 492                len += sg[i].length;
 493
 494        desc = vio_dring_cur(dr);
 495
 496        err = ldc_map_sg(port->vio.lp, sg, nsg,
 497                         desc->cookies, port->ring_cookies,
 498                         map_perm);
 499        if (err < 0) {
 500                printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
 501                return err;
 502        }
 503
 504        rqe = &port->rq_arr[dr->prod];
 505        rqe->req = req;
 506
 507        desc->hdr.ack = VIO_ACK_ENABLE;
 508        desc->req_id = port->req_id;
 509        desc->operation = op;
 510        if (port->vdisk_type == VD_DISK_TYPE_DISK) {
 511                desc->slice = 0xff;
 512        } else {
 513                desc->slice = 0;
 514        }
 515        desc->status = ~0;
 516        desc->offset = (blk_rq_pos(req) << 9) / port->vdisk_block_size;
 517        desc->size = len;
 518        desc->ncookies = err;
 519
 520        /* This has to be a non-SMP write barrier because we are writing
 521         * to memory which is shared with the peer LDOM.
 522         */
 523        wmb();
 524        desc->hdr.state = VIO_DESC_READY;
 525
 526        err = __vdc_tx_trigger(port);
 527        if (err < 0) {
 528                printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
 529        } else {
 530                port->req_id++;
 531                dr->prod = vio_dring_next(dr, dr->prod);
 532        }
 533
 534        return err;
 535}
 536
 537static blk_status_t vdc_queue_rq(struct blk_mq_hw_ctx *hctx,
 538                                 const struct blk_mq_queue_data *bd)
 539{
 540        struct vdc_port *port = hctx->queue->queuedata;
 541        struct vio_dring_state *dr;
 542        unsigned long flags;
 543
 544        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 545
 546        blk_mq_start_request(bd->rq);
 547
 548        spin_lock_irqsave(&port->vio.lock, flags);
 549
 550        /*
 551         * Doing drain, just end the request in error
 552         */
 553        if (unlikely(port->drain)) {
 554                spin_unlock_irqrestore(&port->vio.lock, flags);
 555                return BLK_STS_IOERR;
 556        }
 557
 558        if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
 559                spin_unlock_irqrestore(&port->vio.lock, flags);
 560                blk_mq_stop_hw_queue(hctx);
 561                return BLK_STS_DEV_RESOURCE;
 562        }
 563
 564        if (__send_request(bd->rq) < 0) {
 565                spin_unlock_irqrestore(&port->vio.lock, flags);
 566                return BLK_STS_IOERR;
 567        }
 568
 569        spin_unlock_irqrestore(&port->vio.lock, flags);
 570        return BLK_STS_OK;
 571}
 572
 573static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
 574{
 575        struct vio_dring_state *dr;
 576        struct vio_completion comp;
 577        struct vio_disk_desc *desc;
 578        unsigned int map_perm;
 579        unsigned long flags;
 580        int op_len, err;
 581        void *req_buf;
 582
 583        if (!(((u64)1 << (u64)op) & port->operations))
 584                return -EOPNOTSUPP;
 585
 586        switch (op) {
 587        case VD_OP_BREAD:
 588        case VD_OP_BWRITE:
 589        default:
 590                return -EINVAL;
 591
 592        case VD_OP_FLUSH:
 593                op_len = 0;
 594                map_perm = 0;
 595                break;
 596
 597        case VD_OP_GET_WCE:
 598                op_len = sizeof(u32);
 599                map_perm = LDC_MAP_W;
 600                break;
 601
 602        case VD_OP_SET_WCE:
 603                op_len = sizeof(u32);
 604                map_perm = LDC_MAP_R;
 605                break;
 606
 607        case VD_OP_GET_VTOC:
 608                op_len = sizeof(struct vio_disk_vtoc);
 609                map_perm = LDC_MAP_W;
 610                break;
 611
 612        case VD_OP_SET_VTOC:
 613                op_len = sizeof(struct vio_disk_vtoc);
 614                map_perm = LDC_MAP_R;
 615                break;
 616
 617        case VD_OP_GET_DISKGEOM:
 618                op_len = sizeof(struct vio_disk_geom);
 619                map_perm = LDC_MAP_W;
 620                break;
 621
 622        case VD_OP_SET_DISKGEOM:
 623                op_len = sizeof(struct vio_disk_geom);
 624                map_perm = LDC_MAP_R;
 625                break;
 626
 627        case VD_OP_SCSICMD:
 628                op_len = 16;
 629                map_perm = LDC_MAP_RW;
 630                break;
 631
 632        case VD_OP_GET_DEVID:
 633                op_len = sizeof(struct vio_disk_devid);
 634                map_perm = LDC_MAP_W;
 635                break;
 636
 637        case VD_OP_GET_EFI:
 638        case VD_OP_SET_EFI:
 639                return -EOPNOTSUPP;
 640        }
 641
 642        map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
 643
 644        op_len = (op_len + 7) & ~7;
 645        req_buf = kzalloc(op_len, GFP_KERNEL);
 646        if (!req_buf)
 647                return -ENOMEM;
 648
 649        if (len > op_len)
 650                len = op_len;
 651
 652        if (map_perm & LDC_MAP_R)
 653                memcpy(req_buf, buf, len);
 654
 655        spin_lock_irqsave(&port->vio.lock, flags);
 656
 657        dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 658
 659        /* XXX If we want to use this code generically we have to
 660         * XXX handle TX ring exhaustion etc.
 661         */
 662        desc = vio_dring_cur(dr);
 663
 664        err = ldc_map_single(port->vio.lp, req_buf, op_len,
 665                             desc->cookies, port->ring_cookies,
 666                             map_perm);
 667        if (err < 0) {
 668                spin_unlock_irqrestore(&port->vio.lock, flags);
 669                kfree(req_buf);
 670                return err;
 671        }
 672
 673        init_completion(&comp.com);
 674        comp.waiting_for = WAITING_FOR_GEN_CMD;
 675        port->vio.cmp = &comp;
 676
 677        desc->hdr.ack = VIO_ACK_ENABLE;
 678        desc->req_id = port->req_id;
 679        desc->operation = op;
 680        desc->slice = 0;
 681        desc->status = ~0;
 682        desc->offset = 0;
 683        desc->size = op_len;
 684        desc->ncookies = err;
 685
 686        /* This has to be a non-SMP write barrier because we are writing
 687         * to memory which is shared with the peer LDOM.
 688         */
 689        wmb();
 690        desc->hdr.state = VIO_DESC_READY;
 691
 692        err = __vdc_tx_trigger(port);
 693        if (err >= 0) {
 694                port->req_id++;
 695                dr->prod = vio_dring_next(dr, dr->prod);
 696                spin_unlock_irqrestore(&port->vio.lock, flags);
 697
 698                wait_for_completion(&comp.com);
 699                err = comp.err;
 700        } else {
 701                port->vio.cmp = NULL;
 702                spin_unlock_irqrestore(&port->vio.lock, flags);
 703        }
 704
 705        if (map_perm & LDC_MAP_W)
 706                memcpy(buf, req_buf, len);
 707
 708        kfree(req_buf);
 709
 710        return err;
 711}
 712
 713static int vdc_alloc_tx_ring(struct vdc_port *port)
 714{
 715        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 716        unsigned long len, entry_size;
 717        int ncookies;
 718        void *dring;
 719
 720        entry_size = sizeof(struct vio_disk_desc) +
 721                (sizeof(struct ldc_trans_cookie) * port->ring_cookies);
 722        len = (VDC_TX_RING_SIZE * entry_size);
 723
 724        ncookies = VIO_MAX_RING_COOKIES;
 725        dring = ldc_alloc_exp_dring(port->vio.lp, len,
 726                                    dr->cookies, &ncookies,
 727                                    (LDC_MAP_SHADOW |
 728                                     LDC_MAP_DIRECT |
 729                                     LDC_MAP_RW));
 730        if (IS_ERR(dring))
 731                return PTR_ERR(dring);
 732
 733        dr->base = dring;
 734        dr->entry_size = entry_size;
 735        dr->num_entries = VDC_TX_RING_SIZE;
 736        dr->prod = dr->cons = 0;
 737        dr->pending = VDC_TX_RING_SIZE;
 738        dr->ncookies = ncookies;
 739
 740        return 0;
 741}
 742
 743static void vdc_free_tx_ring(struct vdc_port *port)
 744{
 745        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
 746
 747        if (dr->base) {
 748                ldc_free_exp_dring(port->vio.lp, dr->base,
 749                                   (dr->entry_size * dr->num_entries),
 750                                   dr->cookies, dr->ncookies);
 751                dr->base = NULL;
 752                dr->entry_size = 0;
 753                dr->num_entries = 0;
 754                dr->pending = 0;
 755                dr->ncookies = 0;
 756        }
 757}
 758
 759static int vdc_port_up(struct vdc_port *port)
 760{
 761        struct vio_completion comp;
 762
 763        init_completion(&comp.com);
 764        comp.err = 0;
 765        comp.waiting_for = WAITING_FOR_LINK_UP;
 766        port->vio.cmp = &comp;
 767
 768        vio_port_up(&port->vio);
 769        wait_for_completion(&comp.com);
 770        return comp.err;
 771}
 772
 773static void vdc_port_down(struct vdc_port *port)
 774{
 775        ldc_disconnect(port->vio.lp);
 776        ldc_unbind(port->vio.lp);
 777        vdc_free_tx_ring(port);
 778        vio_ldc_free(&port->vio);
 779}
 780
 781static const struct blk_mq_ops vdc_mq_ops = {
 782        .queue_rq       = vdc_queue_rq,
 783};
 784
 785static int probe_disk(struct vdc_port *port)
 786{
 787        struct request_queue *q;
 788        struct gendisk *g;
 789        int err;
 790
 791        err = vdc_port_up(port);
 792        if (err)
 793                return err;
 794
 795        /* Using version 1.2 means vdisk_phys_blksz should be set unless the
 796         * disk is reserved by another system.
 797         */
 798        if (vdc_version_supported(port, 1, 2) && !port->vdisk_phys_blksz)
 799                return -ENODEV;
 800
 801        if (vdc_version_supported(port, 1, 1)) {
 802                /* vdisk_size should be set during the handshake, if it wasn't
 803                 * then the underlying disk is reserved by another system
 804                 */
 805                if (port->vdisk_size == -1)
 806                        return -ENODEV;
 807        } else {
 808                struct vio_disk_geom geom;
 809
 810                err = generic_request(port, VD_OP_GET_DISKGEOM,
 811                                      &geom, sizeof(geom));
 812                if (err < 0) {
 813                        printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
 814                               "error %d\n", err);
 815                        return err;
 816                }
 817                port->vdisk_size = ((u64)geom.num_cyl *
 818                                    (u64)geom.num_hd *
 819                                    (u64)geom.num_sec);
 820        }
 821
 822        err = blk_mq_alloc_sq_tag_set(&port->tag_set, &vdc_mq_ops,
 823                        VDC_TX_RING_SIZE, BLK_MQ_F_SHOULD_MERGE);
 824        if (err)
 825                return err;
 826
 827        g = blk_mq_alloc_disk(&port->tag_set, port);
 828        if (IS_ERR(g)) {
 829                printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
 830                       port->vio.name);
 831                err = PTR_ERR(g);
 832                goto out_free_tag;
 833        }
 834
 835        port->disk = g;
 836        q = g->queue;
 837
 838        /* Each segment in a request is up to an aligned page in size. */
 839        blk_queue_segment_boundary(q, PAGE_SIZE - 1);
 840        blk_queue_max_segment_size(q, PAGE_SIZE);
 841
 842        blk_queue_max_segments(q, port->ring_cookies);
 843        blk_queue_max_hw_sectors(q, port->max_xfer_size);
 844        g->major = vdc_major;
 845        g->first_minor = port->vio.vdev->dev_no << PARTITION_SHIFT;
 846        g->minors = 1 << PARTITION_SHIFT;
 847        strcpy(g->disk_name, port->disk_name);
 848
 849        g->fops = &vdc_fops;
 850        g->queue = q;
 851        g->private_data = port;
 852
 853        set_capacity(g, port->vdisk_size);
 854
 855        if (vdc_version_supported(port, 1, 1)) {
 856                switch (port->vdisk_mtype) {
 857                case VD_MEDIA_TYPE_CD:
 858                        pr_info(PFX "Virtual CDROM %s\n", port->disk_name);
 859                        g->flags |= GENHD_FL_REMOVABLE;
 860                        set_disk_ro(g, 1);
 861                        break;
 862
 863                case VD_MEDIA_TYPE_DVD:
 864                        pr_info(PFX "Virtual DVD %s\n", port->disk_name);
 865                        g->flags |= GENHD_FL_REMOVABLE;
 866                        set_disk_ro(g, 1);
 867                        break;
 868
 869                case VD_MEDIA_TYPE_FIXED:
 870                        pr_info(PFX "Virtual Hard disk %s\n", port->disk_name);
 871                        break;
 872                }
 873        }
 874
 875        blk_queue_physical_block_size(q, port->vdisk_phys_blksz);
 876
 877        pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n",
 878               g->disk_name,
 879               port->vdisk_size, (port->vdisk_size >> (20 - 9)),
 880               port->vio.ver.major, port->vio.ver.minor);
 881
 882        err = device_add_disk(&port->vio.vdev->dev, g, NULL);
 883        if (err)
 884                goto out_cleanup_disk;
 885
 886        return 0;
 887
 888out_cleanup_disk:
 889        blk_cleanup_disk(g);
 890out_free_tag:
 891        blk_mq_free_tag_set(&port->tag_set);
 892        return err;
 893}
 894
 895static struct ldc_channel_config vdc_ldc_cfg = {
 896        .event          = vdc_event,
 897        .mtu            = 64,
 898        .mode           = LDC_MODE_UNRELIABLE,
 899};
 900
 901static struct vio_driver_ops vdc_vio_ops = {
 902        .send_attr              = vdc_send_attr,
 903        .handle_attr            = vdc_handle_attr,
 904        .handshake_complete     = vdc_handshake_complete,
 905};
 906
 907static void print_version(void)
 908{
 909        static int version_printed;
 910
 911        if (version_printed++ == 0)
 912                printk(KERN_INFO "%s", version);
 913}
 914
 915struct vdc_check_port_data {
 916        int     dev_no;
 917        char    *type;
 918};
 919
 920static int vdc_device_probed(struct device *dev, void *arg)
 921{
 922        struct vio_dev *vdev = to_vio_dev(dev);
 923        struct vdc_check_port_data *port_data;
 924
 925        port_data = (struct vdc_check_port_data *)arg;
 926
 927        if ((vdev->dev_no == port_data->dev_no) &&
 928            (!(strcmp((char *)&vdev->type, port_data->type))) &&
 929                dev_get_drvdata(dev)) {
 930                /* This device has already been configured
 931                 * by vdc_port_probe()
 932                 */
 933                return 1;
 934        } else {
 935                return 0;
 936        }
 937}
 938
 939/* Determine whether the VIO device is part of an mpgroup
 940 * by locating all the virtual-device-port nodes associated
 941 * with the parent virtual-device node for the VIO device
 942 * and checking whether any of these nodes are vdc-ports
 943 * which have already been configured.
 944 *
 945 * Returns true if this device is part of an mpgroup and has
 946 * already been probed.
 947 */
 948static bool vdc_port_mpgroup_check(struct vio_dev *vdev)
 949{
 950        struct vdc_check_port_data port_data;
 951        struct device *dev;
 952
 953        port_data.dev_no = vdev->dev_no;
 954        port_data.type = (char *)&vdev->type;
 955
 956        dev = device_find_child(vdev->dev.parent, &port_data,
 957                                vdc_device_probed);
 958
 959        if (dev)
 960                return true;
 961
 962        return false;
 963}
 964
 965static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 966{
 967        struct mdesc_handle *hp;
 968        struct vdc_port *port;
 969        int err;
 970        const u64 *ldc_timeout;
 971
 972        print_version();
 973
 974        hp = mdesc_grab();
 975
 976        err = -ENODEV;
 977        if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {
 978                printk(KERN_ERR PFX "Port id [%llu] too large.\n",
 979                       vdev->dev_no);
 980                goto err_out_release_mdesc;
 981        }
 982
 983        /* Check if this device is part of an mpgroup */
 984        if (vdc_port_mpgroup_check(vdev)) {
 985                printk(KERN_WARNING
 986                        "VIO: Ignoring extra vdisk port %s",
 987                        dev_name(&vdev->dev));
 988                goto err_out_release_mdesc;
 989        }
 990
 991        port = kzalloc(sizeof(*port), GFP_KERNEL);
 992        if (!port) {
 993                err = -ENOMEM;
 994                goto err_out_release_mdesc;
 995        }
 996
 997        if (vdev->dev_no >= 26)
 998                snprintf(port->disk_name, sizeof(port->disk_name),
 999                         VDCBLK_NAME "%c%c",
1000                         'a' + ((int)vdev->dev_no / 26) - 1,
1001                         'a' + ((int)vdev->dev_no % 26));
1002        else
1003                snprintf(port->disk_name, sizeof(port->disk_name),
1004                         VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26));
1005        port->vdisk_size = -1;
1006
1007        /* Actual wall time may be double due to do_generic_file_read() doing
1008         * a readahead I/O first, and once that fails it will try to read a
1009         * single page.
1010         */
1011        ldc_timeout = mdesc_get_property(hp, vdev->mp, "vdc-timeout", NULL);
1012        port->ldc_timeout = ldc_timeout ? *ldc_timeout : 0;
1013        INIT_DELAYED_WORK(&port->ldc_reset_timer_work, vdc_ldc_reset_timer_work);
1014        INIT_WORK(&port->ldc_reset_work, vdc_ldc_reset_work);
1015
1016        err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
1017                              vdc_versions, ARRAY_SIZE(vdc_versions),
1018                              &vdc_vio_ops, port->disk_name);
1019        if (err)
1020                goto err_out_free_port;
1021
1022        port->vdisk_block_size = VDC_DEFAULT_BLK_SIZE;
1023        port->max_xfer_size = MAX_XFER_SIZE;
1024        port->ring_cookies = MAX_RING_COOKIES;
1025
1026        err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1027        if (err)
1028                goto err_out_free_port;
1029
1030        err = vdc_alloc_tx_ring(port);
1031        if (err)
1032                goto err_out_free_ldc;
1033
1034        err = probe_disk(port);
1035        if (err)
1036                goto err_out_free_tx_ring;
1037
1038        /* Note that the device driver_data is used to determine
1039         * whether the port has been probed.
1040         */
1041        dev_set_drvdata(&vdev->dev, port);
1042
1043        mdesc_release(hp);
1044
1045        return 0;
1046
1047err_out_free_tx_ring:
1048        vdc_free_tx_ring(port);
1049
1050err_out_free_ldc:
1051        vio_ldc_free(&port->vio);
1052
1053err_out_free_port:
1054        kfree(port);
1055
1056err_out_release_mdesc:
1057        mdesc_release(hp);
1058        return err;
1059}
1060
1061static void vdc_port_remove(struct vio_dev *vdev)
1062{
1063        struct vdc_port *port = dev_get_drvdata(&vdev->dev);
1064
1065        if (port) {
1066                blk_mq_stop_hw_queues(port->disk->queue);
1067
1068                flush_work(&port->ldc_reset_work);
1069                cancel_delayed_work_sync(&port->ldc_reset_timer_work);
1070                del_timer_sync(&port->vio.timer);
1071
1072                del_gendisk(port->disk);
1073                blk_cleanup_disk(port->disk);
1074                blk_mq_free_tag_set(&port->tag_set);
1075
1076                vdc_free_tx_ring(port);
1077                vio_ldc_free(&port->vio);
1078
1079                dev_set_drvdata(&vdev->dev, NULL);
1080
1081                kfree(port);
1082        }
1083}
1084
1085static void vdc_requeue_inflight(struct vdc_port *port)
1086{
1087        struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1088        u32 idx;
1089
1090        for (idx = dr->cons; idx != dr->prod; idx = vio_dring_next(dr, idx)) {
1091                struct vio_disk_desc *desc = vio_dring_entry(dr, idx);
1092                struct vdc_req_entry *rqe = &port->rq_arr[idx];
1093                struct request *req;
1094
1095                ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
1096                desc->hdr.state = VIO_DESC_FREE;
1097                dr->cons = vio_dring_next(dr, idx);
1098
1099                req = rqe->req;
1100                if (req == NULL) {
1101                        vdc_end_special(port, desc);
1102                        continue;
1103                }
1104
1105                rqe->req = NULL;
1106                blk_mq_requeue_request(req, false);
1107        }
1108}
1109
1110static void vdc_queue_drain(struct vdc_port *port)
1111{
1112        struct request_queue *q = port->disk->queue;
1113
1114        /*
1115         * Mark the queue as draining, then freeze/quiesce to ensure
1116         * that all existing requests are seen in ->queue_rq() and killed
1117         */
1118        port->drain = 1;
1119        spin_unlock_irq(&port->vio.lock);
1120
1121        blk_mq_freeze_queue(q);
1122        blk_mq_quiesce_queue(q);
1123
1124        spin_lock_irq(&port->vio.lock);
1125        port->drain = 0;
1126        blk_mq_unquiesce_queue(q);
1127        blk_mq_unfreeze_queue(q);
1128}
1129
1130static void vdc_ldc_reset_timer_work(struct work_struct *work)
1131{
1132        struct vdc_port *port;
1133        struct vio_driver_state *vio;
1134
1135        port = container_of(work, struct vdc_port, ldc_reset_timer_work.work);
1136        vio = &port->vio;
1137
1138        spin_lock_irq(&vio->lock);
1139        if (!(port->vio.hs_state & VIO_HS_COMPLETE)) {
1140                pr_warn(PFX "%s ldc down %llu seconds, draining queue\n",
1141                        port->disk_name, port->ldc_timeout);
1142                vdc_queue_drain(port);
1143                vdc_blk_queue_start(port);
1144        }
1145        spin_unlock_irq(&vio->lock);
1146}
1147
1148static void vdc_ldc_reset_work(struct work_struct *work)
1149{
1150        struct vdc_port *port;
1151        struct vio_driver_state *vio;
1152        unsigned long flags;
1153
1154        port = container_of(work, struct vdc_port, ldc_reset_work);
1155        vio = &port->vio;
1156
1157        spin_lock_irqsave(&vio->lock, flags);
1158        vdc_ldc_reset(port);
1159        spin_unlock_irqrestore(&vio->lock, flags);
1160}
1161
1162static void vdc_ldc_reset(struct vdc_port *port)
1163{
1164        int err;
1165
1166        assert_spin_locked(&port->vio.lock);
1167
1168        pr_warn(PFX "%s ldc link reset\n", port->disk_name);
1169        blk_mq_stop_hw_queues(port->disk->queue);
1170        vdc_requeue_inflight(port);
1171        vdc_port_down(port);
1172
1173        err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
1174        if (err) {
1175                pr_err(PFX "%s vio_ldc_alloc:%d\n", port->disk_name, err);
1176                return;
1177        }
1178
1179        err = vdc_alloc_tx_ring(port);
1180        if (err) {
1181                pr_err(PFX "%s vio_alloc_tx_ring:%d\n", port->disk_name, err);
1182                goto err_free_ldc;
1183        }
1184
1185        if (port->ldc_timeout)
1186                mod_delayed_work(system_wq, &port->ldc_reset_timer_work,
1187                          round_jiffies(jiffies + HZ * port->ldc_timeout));
1188        mod_timer(&port->vio.timer, round_jiffies(jiffies + HZ));
1189        return;
1190
1191err_free_ldc:
1192        vio_ldc_free(&port->vio);
1193}
1194
1195static const struct vio_device_id vdc_port_match[] = {
1196        {
1197                .type = "vdc-port",
1198        },
1199        {},
1200};
1201MODULE_DEVICE_TABLE(vio, vdc_port_match);
1202
1203static struct vio_driver vdc_port_driver = {
1204        .id_table       = vdc_port_match,
1205        .probe          = vdc_port_probe,
1206        .remove         = vdc_port_remove,
1207        .name           = "vdc_port",
1208};
1209
1210static int __init vdc_init(void)
1211{
1212        int err;
1213
1214        sunvdc_wq = alloc_workqueue("sunvdc", 0, 0);
1215        if (!sunvdc_wq)
1216                return -ENOMEM;
1217
1218        err = register_blkdev(0, VDCBLK_NAME);
1219        if (err < 0)
1220                goto out_free_wq;
1221
1222        vdc_major = err;
1223
1224        err = vio_register_driver(&vdc_port_driver);
1225        if (err)
1226                goto out_unregister_blkdev;
1227
1228        return 0;
1229
1230out_unregister_blkdev:
1231        unregister_blkdev(vdc_major, VDCBLK_NAME);
1232        vdc_major = 0;
1233
1234out_free_wq:
1235        destroy_workqueue(sunvdc_wq);
1236        return err;
1237}
1238
1239static void __exit vdc_exit(void)
1240{
1241        vio_unregister_driver(&vdc_port_driver);
1242        unregister_blkdev(vdc_major, VDCBLK_NAME);
1243        destroy_workqueue(sunvdc_wq);
1244}
1245
1246module_init(vdc_init);
1247module_exit(vdc_exit);
1248