linux/drivers/misc/mic/vop/vop_vringh.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Intel MIC Platform Software Stack (MPSS)
   4 *
   5 * Copyright(c) 2016 Intel Corporation.
   6 *
   7 * Intel Virtio Over PCIe (VOP) driver.
   8 */
   9#include <linux/sched.h>
  10#include <linux/poll.h>
  11#include <linux/dma-mapping.h>
  12
  13#include <linux/mic_common.h>
  14#include "../common/mic_dev.h"
  15
  16#include <linux/mic_ioctl.h>
  17#include "vop_main.h"
  18
  19/* Helper API to obtain the VOP PCIe device */
  20static inline struct device *vop_dev(struct vop_vdev *vdev)
  21{
  22        return vdev->vpdev->dev.parent;
  23}
  24
  25/* Helper API to check if a virtio device is initialized */
  26static inline int vop_vdev_inited(struct vop_vdev *vdev)
  27{
  28        if (!vdev)
  29                return -EINVAL;
  30        /* Device has not been created yet */
  31        if (!vdev->dd || !vdev->dd->type) {
  32                dev_err(vop_dev(vdev), "%s %d err %d\n",
  33                        __func__, __LINE__, -EINVAL);
  34                return -EINVAL;
  35        }
  36        /* Device has been removed/deleted */
  37        if (vdev->dd->type == -1) {
  38                dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  39                        __func__, __LINE__, -ENODEV);
  40                return -ENODEV;
  41        }
  42        return 0;
  43}
  44
  45static void _vop_notify(struct vringh *vrh)
  46{
  47        struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  48        struct vop_vdev *vdev = vvrh->vdev;
  49        struct vop_device *vpdev = vdev->vpdev;
  50        s8 db = vdev->dc->h2c_vdev_db;
  51
  52        if (db != -1)
  53                vpdev->hw_ops->send_intr(vpdev, db);
  54}
  55
  56static void vop_virtio_init_post(struct vop_vdev *vdev)
  57{
  58        struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  59        struct vop_device *vpdev = vdev->vpdev;
  60        int i, used_size;
  61
  62        for (i = 0; i < vdev->dd->num_vq; i++) {
  63                used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  64                                sizeof(struct vring_used_elem) *
  65                                le16_to_cpu(vqconfig->num));
  66                if (!le64_to_cpu(vqconfig[i].used_address)) {
  67                        dev_warn(vop_dev(vdev), "used_address zero??\n");
  68                        continue;
  69                }
  70                vdev->vvr[i].vrh.vring.used =
  71                        (void __force *)vpdev->hw_ops->remap(
  72                        vpdev,
  73                        le64_to_cpu(vqconfig[i].used_address),
  74                        used_size);
  75        }
  76
  77        vdev->dc->used_address_updated = 0;
  78
  79        dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  80                 __func__, vdev->virtio_id);
  81}
  82
  83static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  84{
  85        int i;
  86
  87        dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
  88                __func__, vdev->dd->status, vdev->virtio_id);
  89
  90        for (i = 0; i < vdev->dd->num_vq; i++)
  91                /*
  92                 * Avoid lockdep false positive. The + 1 is for the vop
  93                 * mutex which is held in the reset devices code path.
  94                 */
  95                mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
  96
  97        /* 0 status means "reset" */
  98        vdev->dd->status = 0;
  99        vdev->dc->vdev_reset = 0;
 100        vdev->dc->host_ack = 1;
 101
 102        for (i = 0; i < vdev->dd->num_vq; i++) {
 103                struct vringh *vrh = &vdev->vvr[i].vrh;
 104
 105                vdev->vvr[i].vring.info->avail_idx = 0;
 106                vrh->completed = 0;
 107                vrh->last_avail_idx = 0;
 108                vrh->last_used_idx = 0;
 109        }
 110
 111        for (i = 0; i < vdev->dd->num_vq; i++)
 112                mutex_unlock(&vdev->vvr[i].vr_mutex);
 113}
 114
 115static void vop_virtio_reset_devices(struct vop_info *vi)
 116{
 117        struct list_head *pos, *tmp;
 118        struct vop_vdev *vdev;
 119
 120        list_for_each_safe(pos, tmp, &vi->vdev_list) {
 121                vdev = list_entry(pos, struct vop_vdev, list);
 122                vop_virtio_device_reset(vdev);
 123                vdev->poll_wake = 1;
 124                wake_up(&vdev->waitq);
 125        }
 126}
 127
 128static void vop_bh_handler(struct work_struct *work)
 129{
 130        struct vop_vdev *vdev = container_of(work, struct vop_vdev,
 131                        virtio_bh_work);
 132
 133        if (vdev->dc->used_address_updated)
 134                vop_virtio_init_post(vdev);
 135
 136        if (vdev->dc->vdev_reset)
 137                vop_virtio_device_reset(vdev);
 138
 139        vdev->poll_wake = 1;
 140        wake_up(&vdev->waitq);
 141}
 142
 143static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
 144{
 145        struct vop_vdev *vdev = data;
 146        struct vop_device *vpdev = vdev->vpdev;
 147
 148        vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
 149        schedule_work(&vdev->virtio_bh_work);
 150        return IRQ_HANDLED;
 151}
 152
 153static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
 154{
 155        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 156        int ret = 0, retry, i;
 157        struct vop_device *vpdev = vdev->vpdev;
 158        struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 159        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 160        s8 db = bootparam->h2c_config_db;
 161
 162        mutex_lock(&vi->vop_mutex);
 163        for (i = 0; i < vdev->dd->num_vq; i++)
 164                mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
 165
 166        if (db == -1 || vdev->dd->type == -1) {
 167                ret = -EIO;
 168                goto exit;
 169        }
 170
 171        memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
 172        vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
 173        vpdev->hw_ops->send_intr(vpdev, db);
 174
 175        for (retry = 100; retry--;) {
 176                ret = wait_event_timeout(wake, vdev->dc->guest_ack,
 177                                         msecs_to_jiffies(100));
 178                if (ret)
 179                        break;
 180        }
 181
 182        dev_dbg(vop_dev(vdev),
 183                "%s %d retry: %d\n", __func__, __LINE__, retry);
 184        vdev->dc->config_change = 0;
 185        vdev->dc->guest_ack = 0;
 186exit:
 187        for (i = 0; i < vdev->dd->num_vq; i++)
 188                mutex_unlock(&vdev->vvr[i].vr_mutex);
 189        mutex_unlock(&vi->vop_mutex);
 190        return ret;
 191}
 192
 193static int vop_copy_dp_entry(struct vop_vdev *vdev,
 194                             struct mic_device_desc *argp, __u8 *type,
 195                             struct mic_device_desc **devpage)
 196{
 197        struct vop_device *vpdev = vdev->vpdev;
 198        struct mic_device_desc *devp;
 199        struct mic_vqconfig *vqconfig;
 200        int ret = 0, i;
 201        bool slot_found = false;
 202
 203        vqconfig = mic_vq_config(argp);
 204        for (i = 0; i < argp->num_vq; i++) {
 205                if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
 206                        ret =  -EINVAL;
 207                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 208                                __func__, __LINE__, ret);
 209                        goto exit;
 210                }
 211        }
 212
 213        /* Find the first free device page entry */
 214        for (i = sizeof(struct mic_bootparam);
 215                i < MIC_DP_SIZE - mic_total_desc_size(argp);
 216                i += mic_total_desc_size(devp)) {
 217                devp = vpdev->hw_ops->get_dp(vpdev) + i;
 218                if (devp->type == 0 || devp->type == -1) {
 219                        slot_found = true;
 220                        break;
 221                }
 222        }
 223        if (!slot_found) {
 224                ret =  -EINVAL;
 225                dev_err(vop_dev(vdev), "%s %d err %d\n",
 226                        __func__, __LINE__, ret);
 227                goto exit;
 228        }
 229        /*
 230         * Save off the type before doing the memcpy. Type will be set in the
 231         * end after completing all initialization for the new device.
 232         */
 233        *type = argp->type;
 234        argp->type = 0;
 235        memcpy(devp, argp, mic_desc_size(argp));
 236
 237        *devpage = devp;
 238exit:
 239        return ret;
 240}
 241
 242static void vop_init_device_ctrl(struct vop_vdev *vdev,
 243                                 struct mic_device_desc *devpage)
 244{
 245        struct mic_device_ctrl *dc;
 246
 247        dc = (void *)devpage + mic_aligned_desc_size(devpage);
 248
 249        dc->config_change = 0;
 250        dc->guest_ack = 0;
 251        dc->vdev_reset = 0;
 252        dc->host_ack = 0;
 253        dc->used_address_updated = 0;
 254        dc->c2h_vdev_db = -1;
 255        dc->h2c_vdev_db = -1;
 256        vdev->dc = dc;
 257}
 258
 259static int vop_virtio_add_device(struct vop_vdev *vdev,
 260                                 struct mic_device_desc *argp)
 261{
 262        struct vop_info *vi = vdev->vi;
 263        struct vop_device *vpdev = vi->vpdev;
 264        struct mic_device_desc *dd = NULL;
 265        struct mic_vqconfig *vqconfig;
 266        int vr_size, i, j, ret;
 267        u8 type = 0;
 268        s8 db = -1;
 269        char irqname[16];
 270        struct mic_bootparam *bootparam;
 271        u16 num;
 272        dma_addr_t vr_addr;
 273
 274        bootparam = vpdev->hw_ops->get_dp(vpdev);
 275        init_waitqueue_head(&vdev->waitq);
 276        INIT_LIST_HEAD(&vdev->list);
 277        vdev->vpdev = vpdev;
 278
 279        ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
 280        if (ret) {
 281                dev_err(vop_dev(vdev), "%s %d err %d\n",
 282                        __func__, __LINE__, ret);
 283                return ret;
 284        }
 285
 286        vop_init_device_ctrl(vdev, dd);
 287
 288        vdev->dd = dd;
 289        vdev->virtio_id = type;
 290        vqconfig = mic_vq_config(dd);
 291        INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
 292
 293        for (i = 0; i < dd->num_vq; i++) {
 294                struct vop_vringh *vvr = &vdev->vvr[i];
 295                struct mic_vring *vr = &vdev->vvr[i].vring;
 296
 297                num = le16_to_cpu(vqconfig[i].num);
 298                mutex_init(&vvr->vr_mutex);
 299                vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
 300                        sizeof(struct _mic_vring_info));
 301                vr->va = (void *)
 302                        __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 303                                         get_order(vr_size));
 304                if (!vr->va) {
 305                        ret = -ENOMEM;
 306                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 307                                __func__, __LINE__, ret);
 308                        goto err;
 309                }
 310                vr->len = vr_size;
 311                vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
 312                vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
 313                vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
 314                                         DMA_BIDIRECTIONAL);
 315                if (dma_mapping_error(&vpdev->dev, vr_addr)) {
 316                        free_pages((unsigned long)vr->va, get_order(vr_size));
 317                        ret = -ENOMEM;
 318                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 319                                __func__, __LINE__, ret);
 320                        goto err;
 321                }
 322                vqconfig[i].address = cpu_to_le64(vr_addr);
 323
 324                vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
 325                ret = vringh_init_kern(&vvr->vrh,
 326                                       *(u32 *)mic_vq_features(vdev->dd),
 327                                       num, false, vr->vr.desc, vr->vr.avail,
 328                                       vr->vr.used);
 329                if (ret) {
 330                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 331                                __func__, __LINE__, ret);
 332                        goto err;
 333                }
 334                vringh_kiov_init(&vvr->riov, NULL, 0);
 335                vringh_kiov_init(&vvr->wiov, NULL, 0);
 336                vvr->head = USHRT_MAX;
 337                vvr->vdev = vdev;
 338                vvr->vrh.notify = _vop_notify;
 339                dev_dbg(&vpdev->dev,
 340                        "%s %d index %d va %p info %p vr_size 0x%x\n",
 341                        __func__, __LINE__, i, vr->va, vr->info, vr_size);
 342                vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
 343                                        get_order(VOP_INT_DMA_BUF_SIZE));
 344                vvr->buf_da = dma_map_single(&vpdev->dev,
 345                                          vvr->buf, VOP_INT_DMA_BUF_SIZE,
 346                                          DMA_BIDIRECTIONAL);
 347        }
 348
 349        snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
 350                 vdev->virtio_id);
 351        vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
 352        vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
 353                        _vop_virtio_intr_handler, irqname, vdev,
 354                        vdev->virtio_db);
 355        if (IS_ERR(vdev->virtio_cookie)) {
 356                ret = PTR_ERR(vdev->virtio_cookie);
 357                dev_dbg(&vpdev->dev, "request irq failed\n");
 358                goto err;
 359        }
 360
 361        vdev->dc->c2h_vdev_db = vdev->virtio_db;
 362
 363        /*
 364         * Order the type update with previous stores. This write barrier
 365         * is paired with the corresponding read barrier before the uncached
 366         * system memory read of the type, on the card while scanning the
 367         * device page.
 368         */
 369        smp_wmb();
 370        dd->type = type;
 371        argp->type = type;
 372
 373        if (bootparam) {
 374                db = bootparam->h2c_config_db;
 375                if (db != -1)
 376                        vpdev->hw_ops->send_intr(vpdev, db);
 377        }
 378        dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
 379        return 0;
 380err:
 381        vqconfig = mic_vq_config(dd);
 382        for (j = 0; j < i; j++) {
 383                struct vop_vringh *vvr = &vdev->vvr[j];
 384
 385                dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
 386                                 vvr->vring.len, DMA_BIDIRECTIONAL);
 387                free_pages((unsigned long)vvr->vring.va,
 388                           get_order(vvr->vring.len));
 389        }
 390        return ret;
 391}
 392
 393static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
 394                           struct vop_device *vpdev)
 395{
 396        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 397        s8 db;
 398        int ret, retry;
 399        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 400
 401        devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
 402        db = bootparam->h2c_config_db;
 403        if (db != -1)
 404                vpdev->hw_ops->send_intr(vpdev, db);
 405        else
 406                goto done;
 407        for (retry = 15; retry--;) {
 408                ret = wait_event_timeout(wake, devp->guest_ack,
 409                                         msecs_to_jiffies(1000));
 410                if (ret)
 411                        break;
 412        }
 413done:
 414        devp->config_change = 0;
 415        devp->guest_ack = 0;
 416}
 417
 418static void vop_virtio_del_device(struct vop_vdev *vdev)
 419{
 420        struct vop_info *vi = vdev->vi;
 421        struct vop_device *vpdev = vdev->vpdev;
 422        int i;
 423        struct mic_vqconfig *vqconfig;
 424        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 425
 426        if (!bootparam)
 427                goto skip_hot_remove;
 428        vop_dev_remove(vi, vdev->dc, vpdev);
 429skip_hot_remove:
 430        vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
 431        flush_work(&vdev->virtio_bh_work);
 432        vqconfig = mic_vq_config(vdev->dd);
 433        for (i = 0; i < vdev->dd->num_vq; i++) {
 434                struct vop_vringh *vvr = &vdev->vvr[i];
 435
 436                dma_unmap_single(&vpdev->dev,
 437                                 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
 438                                 DMA_BIDIRECTIONAL);
 439                free_pages((unsigned long)vvr->buf,
 440                           get_order(VOP_INT_DMA_BUF_SIZE));
 441                vringh_kiov_cleanup(&vvr->riov);
 442                vringh_kiov_cleanup(&vvr->wiov);
 443                dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
 444                                 vvr->vring.len, DMA_BIDIRECTIONAL);
 445                free_pages((unsigned long)vvr->vring.va,
 446                           get_order(vvr->vring.len));
 447        }
 448        /*
 449         * Order the type update with previous stores. This write barrier
 450         * is paired with the corresponding read barrier before the uncached
 451         * system memory read of the type, on the card while scanning the
 452         * device page.
 453         */
 454        smp_wmb();
 455        vdev->dd->type = -1;
 456}
 457
 458/*
 459 * vop_sync_dma - Wrapper for synchronous DMAs.
 460 *
 461 * @dev - The address of the pointer to the device instance used
 462 * for DMA registration.
 463 * @dst - destination DMA address.
 464 * @src - source DMA address.
 465 * @len - size of the transfer.
 466 *
 467 * Return DMA_SUCCESS on success
 468 */
 469static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
 470                        size_t len)
 471{
 472        int err = 0;
 473        struct dma_device *ddev;
 474        struct dma_async_tx_descriptor *tx;
 475        struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 476        struct dma_chan *vop_ch = vi->dma_ch;
 477
 478        if (!vop_ch) {
 479                err = -EBUSY;
 480                goto error;
 481        }
 482        ddev = vop_ch->device;
 483        tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
 484                DMA_PREP_FENCE);
 485        if (!tx) {
 486                err = -ENOMEM;
 487                goto error;
 488        } else {
 489                dma_cookie_t cookie;
 490
 491                cookie = tx->tx_submit(tx);
 492                if (dma_submit_error(cookie)) {
 493                        err = -ENOMEM;
 494                        goto error;
 495                }
 496                dma_async_issue_pending(vop_ch);
 497                err = dma_sync_wait(vop_ch, cookie);
 498        }
 499error:
 500        if (err)
 501                dev_err(&vi->vpdev->dev, "%s %d err %d\n",
 502                        __func__, __LINE__, err);
 503        return err;
 504}
 505
 506#define VOP_USE_DMA true
 507
 508/*
 509 * Initiates the copies across the PCIe bus from card memory to a user
 510 * space buffer. When transfers are done using DMA, source/destination
 511 * addresses and transfer length must follow the alignment requirements of
 512 * the MIC DMA engine.
 513 */
 514static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 515                                   size_t len, u64 daddr, size_t dlen,
 516                                   int vr_idx)
 517{
 518        struct vop_device *vpdev = vdev->vpdev;
 519        void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
 520        struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 521        struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 522        size_t dma_alignment;
 523        bool x200;
 524        size_t dma_offset, partlen;
 525        int err;
 526
 527        if (!VOP_USE_DMA || !vi->dma_ch) {
 528                if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
 529                        err = -EFAULT;
 530                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 531                                __func__, __LINE__, err);
 532                        goto err;
 533                }
 534                vdev->in_bytes += len;
 535                err = 0;
 536                goto err;
 537        }
 538
 539        dma_alignment = 1 << vi->dma_ch->device->copy_align;
 540        x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 541
 542        dma_offset = daddr - round_down(daddr, dma_alignment);
 543        daddr -= dma_offset;
 544        len += dma_offset;
 545        /*
 546         * X100 uses DMA addresses as seen by the card so adding
 547         * the aperture base is not required for DMA. However x200
 548         * requires DMA addresses to be an offset into the bar so
 549         * add the aperture base for x200.
 550         */
 551        if (x200)
 552                daddr += vpdev->aper->pa;
 553        while (len) {
 554                partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 555                err = vop_sync_dma(vdev, vvr->buf_da, daddr,
 556                                   ALIGN(partlen, dma_alignment));
 557                if (err) {
 558                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 559                                __func__, __LINE__, err);
 560                        goto err;
 561                }
 562                if (copy_to_user(ubuf, vvr->buf + dma_offset,
 563                                 partlen - dma_offset)) {
 564                        err = -EFAULT;
 565                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 566                                __func__, __LINE__, err);
 567                        goto err;
 568                }
 569                daddr += partlen;
 570                ubuf += partlen;
 571                dbuf += partlen;
 572                vdev->in_bytes_dma += partlen;
 573                vdev->in_bytes += partlen;
 574                len -= partlen;
 575                dma_offset = 0;
 576        }
 577        err = 0;
 578err:
 579        vpdev->hw_ops->unmap(vpdev, dbuf);
 580        dev_dbg(vop_dev(vdev),
 581                "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
 582                __func__, ubuf, dbuf, len, vr_idx);
 583        return err;
 584}
 585
 586/*
 587 * Initiates copies across the PCIe bus from a user space buffer to card
 588 * memory. When transfers are done using DMA, source/destination addresses
 589 * and transfer length must follow the alignment requirements of the MIC
 590 * DMA engine.
 591 */
 592static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
 593                                     size_t len, u64 daddr, size_t dlen,
 594                                     int vr_idx)
 595{
 596        struct vop_device *vpdev = vdev->vpdev;
 597        void __iomem *dbuf = vpdev->hw_ops->remap(vpdev, daddr, len);
 598        struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 599        struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 600        size_t dma_alignment;
 601        bool x200;
 602        size_t partlen;
 603        bool dma = VOP_USE_DMA && vi->dma_ch;
 604        int err = 0;
 605
 606        if (dma) {
 607                dma_alignment = 1 << vi->dma_ch->device->copy_align;
 608                x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 609
 610                if (daddr & (dma_alignment - 1)) {
 611                        vdev->tx_dst_unaligned += len;
 612                        dma = false;
 613                } else if (ALIGN(len, dma_alignment) > dlen) {
 614                        vdev->tx_len_unaligned += len;
 615                        dma = false;
 616                }
 617        }
 618
 619        if (!dma)
 620                goto memcpy;
 621
 622        /*
 623         * X100 uses DMA addresses as seen by the card so adding
 624         * the aperture base is not required for DMA. However x200
 625         * requires DMA addresses to be an offset into the bar so
 626         * add the aperture base for x200.
 627         */
 628        if (x200)
 629                daddr += vpdev->aper->pa;
 630        while (len) {
 631                partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 632
 633                if (copy_from_user(vvr->buf, ubuf, partlen)) {
 634                        err = -EFAULT;
 635                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 636                                __func__, __LINE__, err);
 637                        goto err;
 638                }
 639                err = vop_sync_dma(vdev, daddr, vvr->buf_da,
 640                                   ALIGN(partlen, dma_alignment));
 641                if (err) {
 642                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 643                                __func__, __LINE__, err);
 644                        goto err;
 645                }
 646                daddr += partlen;
 647                ubuf += partlen;
 648                dbuf += partlen;
 649                vdev->out_bytes_dma += partlen;
 650                vdev->out_bytes += partlen;
 651                len -= partlen;
 652        }
 653memcpy:
 654        /*
 655         * We are copying to IO below and should ideally use something
 656         * like copy_from_user_toio(..) if it existed.
 657         */
 658        if (copy_from_user((void __force *)dbuf, ubuf, len)) {
 659                err = -EFAULT;
 660                dev_err(vop_dev(vdev), "%s %d err %d\n",
 661                        __func__, __LINE__, err);
 662                goto err;
 663        }
 664        vdev->out_bytes += len;
 665        err = 0;
 666err:
 667        vpdev->hw_ops->unmap(vpdev, dbuf);
 668        dev_dbg(vop_dev(vdev),
 669                "%s: ubuf %p dbuf %p len 0x%zx vr_idx 0x%x\n",
 670                __func__, ubuf, dbuf, len, vr_idx);
 671        return err;
 672}
 673
 674#define MIC_VRINGH_READ true
 675
 676/* Determine the total number of bytes consumed in a VRINGH KIOV */
 677static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
 678{
 679        int i;
 680        u32 total = iov->consumed;
 681
 682        for (i = 0; i < iov->i; i++)
 683                total += iov->iov[i].iov_len;
 684        return total;
 685}
 686
 687/*
 688 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
 689 * This API is heavily based on the vringh_iov_xfer(..) implementation
 690 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
 691 * and vringh_iov_push_kern(..) directly is because there is no
 692 * way to override the VRINGH xfer(..) routines as of v3.10.
 693 */
 694static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
 695                           void __user *ubuf, size_t len, bool read, int vr_idx,
 696                           size_t *out_len)
 697{
 698        int ret = 0;
 699        size_t partlen, tot_len = 0;
 700
 701        while (len && iov->i < iov->used) {
 702                struct kvec *kiov = &iov->iov[iov->i];
 703                unsigned long daddr = (unsigned long)kiov->iov_base;
 704
 705                partlen = min(kiov->iov_len, len);
 706                if (read)
 707                        ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
 708                                                      daddr,
 709                                                      kiov->iov_len,
 710                                                      vr_idx);
 711                else
 712                        ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
 713                                                        daddr,
 714                                                        kiov->iov_len,
 715                                                        vr_idx);
 716                if (ret) {
 717                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 718                                __func__, __LINE__, ret);
 719                        break;
 720                }
 721                len -= partlen;
 722                ubuf += partlen;
 723                tot_len += partlen;
 724                iov->consumed += partlen;
 725                kiov->iov_len -= partlen;
 726                kiov->iov_base += partlen;
 727                if (!kiov->iov_len) {
 728                        /* Fix up old iov element then increment. */
 729                        kiov->iov_len = iov->consumed;
 730                        kiov->iov_base -= iov->consumed;
 731
 732                        iov->consumed = 0;
 733                        iov->i++;
 734                }
 735        }
 736        *out_len = tot_len;
 737        return ret;
 738}
 739
 740/*
 741 * Use the standard VRINGH infrastructure in the kernel to fetch new
 742 * descriptors, initiate the copies and update the used ring.
 743 */
 744static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
 745{
 746        int ret = 0;
 747        u32 iovcnt = copy->iovcnt;
 748        struct iovec iov;
 749        struct iovec __user *u_iov = copy->iov;
 750        void __user *ubuf = NULL;
 751        struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
 752        struct vringh_kiov *riov = &vvr->riov;
 753        struct vringh_kiov *wiov = &vvr->wiov;
 754        struct vringh *vrh = &vvr->vrh;
 755        u16 *head = &vvr->head;
 756        struct mic_vring *vr = &vvr->vring;
 757        size_t len = 0, out_len;
 758
 759        copy->out_len = 0;
 760        /* Fetch a new IOVEC if all previous elements have been processed */
 761        if (riov->i == riov->used && wiov->i == wiov->used) {
 762                ret = vringh_getdesc_kern(vrh, riov, wiov,
 763                                          head, GFP_KERNEL);
 764                /* Check if there are available descriptors */
 765                if (ret <= 0)
 766                        return ret;
 767        }
 768        while (iovcnt) {
 769                if (!len) {
 770                        /* Copy over a new iovec from user space. */
 771                        ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
 772                        if (ret) {
 773                                ret = -EINVAL;
 774                                dev_err(vop_dev(vdev), "%s %d err %d\n",
 775                                        __func__, __LINE__, ret);
 776                                break;
 777                        }
 778                        len = iov.iov_len;
 779                        ubuf = iov.iov_base;
 780                }
 781                /* Issue all the read descriptors first */
 782                ret = vop_vringh_copy(vdev, riov, ubuf, len,
 783                                      MIC_VRINGH_READ, copy->vr_idx, &out_len);
 784                if (ret) {
 785                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 786                                __func__, __LINE__, ret);
 787                        break;
 788                }
 789                len -= out_len;
 790                ubuf += out_len;
 791                copy->out_len += out_len;
 792                /* Issue the write descriptors next */
 793                ret = vop_vringh_copy(vdev, wiov, ubuf, len,
 794                                      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
 795                if (ret) {
 796                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 797                                __func__, __LINE__, ret);
 798                        break;
 799                }
 800                len -= out_len;
 801                ubuf += out_len;
 802                copy->out_len += out_len;
 803                if (!len) {
 804                        /* One user space iovec is now completed */
 805                        iovcnt--;
 806                        u_iov++;
 807                }
 808                /* Exit loop if all elements in KIOVs have been processed. */
 809                if (riov->i == riov->used && wiov->i == wiov->used)
 810                        break;
 811        }
 812        /*
 813         * Update the used ring if a descriptor was available and some data was
 814         * copied in/out and the user asked for a used ring update.
 815         */
 816        if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
 817                u32 total = 0;
 818
 819                /* Determine the total data consumed */
 820                total += vop_vringh_iov_consumed(riov);
 821                total += vop_vringh_iov_consumed(wiov);
 822                vringh_complete_kern(vrh, *head, total);
 823                *head = USHRT_MAX;
 824                if (vringh_need_notify_kern(vrh) > 0)
 825                        vringh_notify(vrh);
 826                vringh_kiov_cleanup(riov);
 827                vringh_kiov_cleanup(wiov);
 828                /* Update avail idx for user space */
 829                vr->info->avail_idx = vrh->last_avail_idx;
 830        }
 831        return ret;
 832}
 833
 834static inline int vop_verify_copy_args(struct vop_vdev *vdev,
 835                                       struct mic_copy_desc *copy)
 836{
 837        if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
 838                return -EINVAL;
 839        return 0;
 840}
 841
 842/* Copy a specified number of virtio descriptors in a chain */
 843static int vop_virtio_copy_desc(struct vop_vdev *vdev,
 844                                struct mic_copy_desc *copy)
 845{
 846        int err;
 847        struct vop_vringh *vvr;
 848
 849        err = vop_verify_copy_args(vdev, copy);
 850        if (err)
 851                return err;
 852
 853        vvr = &vdev->vvr[copy->vr_idx];
 854        mutex_lock(&vvr->vr_mutex);
 855        if (!vop_vdevup(vdev)) {
 856                err = -ENODEV;
 857                dev_err(vop_dev(vdev), "%s %d err %d\n",
 858                        __func__, __LINE__, err);
 859                goto err;
 860        }
 861        err = _vop_virtio_copy(vdev, copy);
 862        if (err) {
 863                dev_err(vop_dev(vdev), "%s %d err %d\n",
 864                        __func__, __LINE__, err);
 865        }
 866err:
 867        mutex_unlock(&vvr->vr_mutex);
 868        return err;
 869}
 870
 871static int vop_open(struct inode *inode, struct file *f)
 872{
 873        struct vop_vdev *vdev;
 874        struct vop_info *vi = container_of(f->private_data,
 875                struct vop_info, miscdev);
 876
 877        vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 878        if (!vdev)
 879                return -ENOMEM;
 880        vdev->vi = vi;
 881        mutex_init(&vdev->vdev_mutex);
 882        f->private_data = vdev;
 883        init_completion(&vdev->destroy);
 884        complete(&vdev->destroy);
 885        return 0;
 886}
 887
 888static int vop_release(struct inode *inode, struct file *f)
 889{
 890        struct vop_vdev *vdev = f->private_data, *vdev_tmp;
 891        struct vop_info *vi = vdev->vi;
 892        struct list_head *pos, *tmp;
 893        bool found = false;
 894
 895        mutex_lock(&vdev->vdev_mutex);
 896        if (vdev->deleted)
 897                goto unlock;
 898        mutex_lock(&vi->vop_mutex);
 899        list_for_each_safe(pos, tmp, &vi->vdev_list) {
 900                vdev_tmp = list_entry(pos, struct vop_vdev, list);
 901                if (vdev == vdev_tmp) {
 902                        vop_virtio_del_device(vdev);
 903                        list_del(pos);
 904                        found = true;
 905                        break;
 906                }
 907        }
 908        mutex_unlock(&vi->vop_mutex);
 909unlock:
 910        mutex_unlock(&vdev->vdev_mutex);
 911        if (!found)
 912                wait_for_completion(&vdev->destroy);
 913        f->private_data = NULL;
 914        kfree(vdev);
 915        return 0;
 916}
 917
 918static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 919{
 920        struct vop_vdev *vdev = f->private_data;
 921        struct vop_info *vi = vdev->vi;
 922        void __user *argp = (void __user *)arg;
 923        int ret;
 924
 925        switch (cmd) {
 926        case MIC_VIRTIO_ADD_DEVICE:
 927        {
 928                struct mic_device_desc dd, *dd_config;
 929
 930                if (copy_from_user(&dd, argp, sizeof(dd)))
 931                        return -EFAULT;
 932
 933                if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
 934                    dd.num_vq > MIC_MAX_VRINGS)
 935                        return -EINVAL;
 936
 937                dd_config = memdup_user(argp, mic_desc_size(&dd));
 938                if (IS_ERR(dd_config))
 939                        return PTR_ERR(dd_config);
 940
 941                /* Ensure desc has not changed between the two reads */
 942                if (memcmp(&dd, dd_config, sizeof(dd))) {
 943                        ret = -EINVAL;
 944                        goto free_ret;
 945                }
 946                mutex_lock(&vdev->vdev_mutex);
 947                mutex_lock(&vi->vop_mutex);
 948                ret = vop_virtio_add_device(vdev, dd_config);
 949                if (ret)
 950                        goto unlock_ret;
 951                list_add_tail(&vdev->list, &vi->vdev_list);
 952unlock_ret:
 953                mutex_unlock(&vi->vop_mutex);
 954                mutex_unlock(&vdev->vdev_mutex);
 955free_ret:
 956                kfree(dd_config);
 957                return ret;
 958        }
 959        case MIC_VIRTIO_COPY_DESC:
 960        {
 961                struct mic_copy_desc copy;
 962
 963                mutex_lock(&vdev->vdev_mutex);
 964                ret = vop_vdev_inited(vdev);
 965                if (ret)
 966                        goto _unlock_ret;
 967
 968                if (copy_from_user(&copy, argp, sizeof(copy))) {
 969                        ret = -EFAULT;
 970                        goto _unlock_ret;
 971                }
 972
 973                ret = vop_virtio_copy_desc(vdev, &copy);
 974                if (ret < 0)
 975                        goto _unlock_ret;
 976                if (copy_to_user(
 977                        &((struct mic_copy_desc __user *)argp)->out_len,
 978                        &copy.out_len, sizeof(copy.out_len)))
 979                        ret = -EFAULT;
 980_unlock_ret:
 981                mutex_unlock(&vdev->vdev_mutex);
 982                return ret;
 983        }
 984        case MIC_VIRTIO_CONFIG_CHANGE:
 985        {
 986                void *buf;
 987
 988                mutex_lock(&vdev->vdev_mutex);
 989                ret = vop_vdev_inited(vdev);
 990                if (ret)
 991                        goto __unlock_ret;
 992                buf = memdup_user(argp, vdev->dd->config_len);
 993                if (IS_ERR(buf)) {
 994                        ret = PTR_ERR(buf);
 995                        goto __unlock_ret;
 996                }
 997                ret = vop_virtio_config_change(vdev, buf);
 998                kfree(buf);
 999__unlock_ret:
1000                mutex_unlock(&vdev->vdev_mutex);
1001                return ret;
1002        }
1003        default:
1004                return -ENOIOCTLCMD;
1005        };
1006        return 0;
1007}
1008
1009/*
1010 * We return EPOLLIN | EPOLLOUT from poll when new buffers are enqueued, and
1011 * not when previously enqueued buffers may be available. This means that
1012 * in the card->host (TX) path, when userspace is unblocked by poll it
1013 * must drain all available descriptors or it can stall.
1014 */
1015static __poll_t vop_poll(struct file *f, poll_table *wait)
1016{
1017        struct vop_vdev *vdev = f->private_data;
1018        __poll_t mask = 0;
1019
1020        mutex_lock(&vdev->vdev_mutex);
1021        if (vop_vdev_inited(vdev)) {
1022                mask = EPOLLERR;
1023                goto done;
1024        }
1025        poll_wait(f, &vdev->waitq, wait);
1026        if (vop_vdev_inited(vdev)) {
1027                mask = EPOLLERR;
1028        } else if (vdev->poll_wake) {
1029                vdev->poll_wake = 0;
1030                mask = EPOLLIN | EPOLLOUT;
1031        }
1032done:
1033        mutex_unlock(&vdev->vdev_mutex);
1034        return mask;
1035}
1036
1037static inline int
1038vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1039                 unsigned long *size, unsigned long *pa)
1040{
1041        struct vop_device *vpdev = vdev->vpdev;
1042        unsigned long start = MIC_DP_SIZE;
1043        int i;
1044
1045        /*
1046         * MMAP interface is as follows:
1047         * offset                               region
1048         * 0x0                                  virtio device_page
1049         * 0x1000                               first vring
1050         * 0x1000 + size of 1st vring           second vring
1051         * ....
1052         */
1053        if (!offset) {
1054                *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1055                *size = MIC_DP_SIZE;
1056                return 0;
1057        }
1058
1059        for (i = 0; i < vdev->dd->num_vq; i++) {
1060                struct vop_vringh *vvr = &vdev->vvr[i];
1061
1062                if (offset == start) {
1063                        *pa = virt_to_phys(vvr->vring.va);
1064                        *size = vvr->vring.len;
1065                        return 0;
1066                }
1067                start += vvr->vring.len;
1068        }
1069        return -1;
1070}
1071
1072/*
1073 * Maps the device page and virtio rings to user space for readonly access.
1074 */
1075static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1076{
1077        struct vop_vdev *vdev = f->private_data;
1078        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1079        unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1080        int i, err;
1081
1082        err = vop_vdev_inited(vdev);
1083        if (err)
1084                goto ret;
1085        if (vma->vm_flags & VM_WRITE) {
1086                err = -EACCES;
1087                goto ret;
1088        }
1089        while (size_rem) {
1090                i = vop_query_offset(vdev, offset, &size, &pa);
1091                if (i < 0) {
1092                        err = -EINVAL;
1093                        goto ret;
1094                }
1095                err = remap_pfn_range(vma, vma->vm_start + offset,
1096                                      pa >> PAGE_SHIFT, size,
1097                                      vma->vm_page_prot);
1098                if (err)
1099                        goto ret;
1100                size_rem -= size;
1101                offset += size;
1102        }
1103ret:
1104        return err;
1105}
1106
1107static const struct file_operations vop_fops = {
1108        .open = vop_open,
1109        .release = vop_release,
1110        .unlocked_ioctl = vop_ioctl,
1111        .poll = vop_poll,
1112        .mmap = vop_mmap,
1113        .owner = THIS_MODULE,
1114};
1115
1116int vop_host_init(struct vop_info *vi)
1117{
1118        int rc;
1119        struct miscdevice *mdev;
1120        struct vop_device *vpdev = vi->vpdev;
1121
1122        INIT_LIST_HEAD(&vi->vdev_list);
1123        vi->dma_ch = vpdev->dma_ch;
1124        mdev = &vi->miscdev;
1125        mdev->minor = MISC_DYNAMIC_MINOR;
1126        snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1127        mdev->name = vi->name;
1128        mdev->fops = &vop_fops;
1129        mdev->parent = &vpdev->dev;
1130
1131        rc = misc_register(mdev);
1132        if (rc)
1133                dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1134        return rc;
1135}
1136
1137void vop_host_uninit(struct vop_info *vi)
1138{
1139        struct list_head *pos, *tmp;
1140        struct vop_vdev *vdev;
1141
1142        mutex_lock(&vi->vop_mutex);
1143        vop_virtio_reset_devices(vi);
1144        list_for_each_safe(pos, tmp, &vi->vdev_list) {
1145                vdev = list_entry(pos, struct vop_vdev, list);
1146                list_del(pos);
1147                reinit_completion(&vdev->destroy);
1148                mutex_unlock(&vi->vop_mutex);
1149                mutex_lock(&vdev->vdev_mutex);
1150                vop_virtio_del_device(vdev);
1151                vdev->deleted = true;
1152                mutex_unlock(&vdev->vdev_mutex);
1153                complete(&vdev->destroy);
1154                mutex_lock(&vi->vop_mutex);
1155        }
1156        mutex_unlock(&vi->vop_mutex);
1157        misc_deregister(&vi->miscdev);
1158}
1159