linux/drivers/misc/mic/vop/vop_vringh.c
<<
>>
Prefs
   1/*
   2 * Intel MIC Platform Software Stack (MPSS)
   3 *
   4 * Copyright(c) 2016 Intel Corporation.
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License, version 2, as
   8 * published by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful, but
  11 * WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13 * General Public License for more details.
  14 *
  15 * The full GNU General Public License is included in this distribution in
  16 * the file called "COPYING".
  17 *
  18 * Intel Virtio Over PCIe (VOP) driver.
  19 *
  20 */
  21#include <linux/sched.h>
  22#include <linux/poll.h>
  23#include <linux/dma-mapping.h>
  24
  25#include <linux/mic_common.h>
  26#include "../common/mic_dev.h"
  27
  28#include <linux/mic_ioctl.h>
  29#include "vop_main.h"
  30
  31/* Helper API to obtain the VOP PCIe device */
  32static inline struct device *vop_dev(struct vop_vdev *vdev)
  33{
  34        return vdev->vpdev->dev.parent;
  35}
  36
  37/* Helper API to check if a virtio device is initialized */
  38static inline int vop_vdev_inited(struct vop_vdev *vdev)
  39{
  40        if (!vdev)
  41                return -EINVAL;
  42        /* Device has not been created yet */
  43        if (!vdev->dd || !vdev->dd->type) {
  44                dev_err(vop_dev(vdev), "%s %d err %d\n",
  45                        __func__, __LINE__, -EINVAL);
  46                return -EINVAL;
  47        }
  48        /* Device has been removed/deleted */
  49        if (vdev->dd->type == -1) {
  50                dev_dbg(vop_dev(vdev), "%s %d err %d\n",
  51                        __func__, __LINE__, -ENODEV);
  52                return -ENODEV;
  53        }
  54        return 0;
  55}
  56
  57static void _vop_notify(struct vringh *vrh)
  58{
  59        struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
  60        struct vop_vdev *vdev = vvrh->vdev;
  61        struct vop_device *vpdev = vdev->vpdev;
  62        s8 db = vdev->dc->h2c_vdev_db;
  63
  64        if (db != -1)
  65                vpdev->hw_ops->send_intr(vpdev, db);
  66}
  67
  68static void vop_virtio_init_post(struct vop_vdev *vdev)
  69{
  70        struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
  71        struct vop_device *vpdev = vdev->vpdev;
  72        int i, used_size;
  73
  74        for (i = 0; i < vdev->dd->num_vq; i++) {
  75                used_size = PAGE_ALIGN(sizeof(u16) * 3 +
  76                                sizeof(struct vring_used_elem) *
  77                                le16_to_cpu(vqconfig->num));
  78                if (!le64_to_cpu(vqconfig[i].used_address)) {
  79                        dev_warn(vop_dev(vdev), "used_address zero??\n");
  80                        continue;
  81                }
  82                vdev->vvr[i].vrh.vring.used =
  83                        (void __force *)vpdev->hw_ops->ioremap(
  84                        vpdev,
  85                        le64_to_cpu(vqconfig[i].used_address),
  86                        used_size);
  87        }
  88
  89        vdev->dc->used_address_updated = 0;
  90
  91        dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
  92                 __func__, vdev->virtio_id);
  93}
  94
  95static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
  96{
  97        int i;
  98
  99        dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
 100                __func__, vdev->dd->status, vdev->virtio_id);
 101
 102        for (i = 0; i < vdev->dd->num_vq; i++)
 103                /*
 104                 * Avoid lockdep false positive. The + 1 is for the vop
 105                 * mutex which is held in the reset devices code path.
 106                 */
 107                mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
 108
 109        /* 0 status means "reset" */
 110        vdev->dd->status = 0;
 111        vdev->dc->vdev_reset = 0;
 112        vdev->dc->host_ack = 1;
 113
 114        for (i = 0; i < vdev->dd->num_vq; i++) {
 115                struct vringh *vrh = &vdev->vvr[i].vrh;
 116
 117                vdev->vvr[i].vring.info->avail_idx = 0;
 118                vrh->completed = 0;
 119                vrh->last_avail_idx = 0;
 120                vrh->last_used_idx = 0;
 121        }
 122
 123        for (i = 0; i < vdev->dd->num_vq; i++)
 124                mutex_unlock(&vdev->vvr[i].vr_mutex);
 125}
 126
 127static void vop_virtio_reset_devices(struct vop_info *vi)
 128{
 129        struct list_head *pos, *tmp;
 130        struct vop_vdev *vdev;
 131
 132        list_for_each_safe(pos, tmp, &vi->vdev_list) {
 133                vdev = list_entry(pos, struct vop_vdev, list);
 134                vop_virtio_device_reset(vdev);
 135                vdev->poll_wake = 1;
 136                wake_up(&vdev->waitq);
 137        }
 138}
 139
 140static void vop_bh_handler(struct work_struct *work)
 141{
 142        struct vop_vdev *vdev = container_of(work, struct vop_vdev,
 143                        virtio_bh_work);
 144
 145        if (vdev->dc->used_address_updated)
 146                vop_virtio_init_post(vdev);
 147
 148        if (vdev->dc->vdev_reset)
 149                vop_virtio_device_reset(vdev);
 150
 151        vdev->poll_wake = 1;
 152        wake_up(&vdev->waitq);
 153}
 154
 155static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
 156{
 157        struct vop_vdev *vdev = data;
 158        struct vop_device *vpdev = vdev->vpdev;
 159
 160        vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
 161        schedule_work(&vdev->virtio_bh_work);
 162        return IRQ_HANDLED;
 163}
 164
 165static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
 166{
 167        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 168        int ret = 0, retry, i;
 169        struct vop_device *vpdev = vdev->vpdev;
 170        struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 171        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 172        s8 db = bootparam->h2c_config_db;
 173
 174        mutex_lock(&vi->vop_mutex);
 175        for (i = 0; i < vdev->dd->num_vq; i++)
 176                mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
 177
 178        if (db == -1 || vdev->dd->type == -1) {
 179                ret = -EIO;
 180                goto exit;
 181        }
 182
 183        memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
 184        vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
 185        vpdev->hw_ops->send_intr(vpdev, db);
 186
 187        for (retry = 100; retry--;) {
 188                ret = wait_event_timeout(wake, vdev->dc->guest_ack,
 189                                         msecs_to_jiffies(100));
 190                if (ret)
 191                        break;
 192        }
 193
 194        dev_dbg(vop_dev(vdev),
 195                "%s %d retry: %d\n", __func__, __LINE__, retry);
 196        vdev->dc->config_change = 0;
 197        vdev->dc->guest_ack = 0;
 198exit:
 199        for (i = 0; i < vdev->dd->num_vq; i++)
 200                mutex_unlock(&vdev->vvr[i].vr_mutex);
 201        mutex_unlock(&vi->vop_mutex);
 202        return ret;
 203}
 204
 205static int vop_copy_dp_entry(struct vop_vdev *vdev,
 206                             struct mic_device_desc *argp, __u8 *type,
 207                             struct mic_device_desc **devpage)
 208{
 209        struct vop_device *vpdev = vdev->vpdev;
 210        struct mic_device_desc *devp;
 211        struct mic_vqconfig *vqconfig;
 212        int ret = 0, i;
 213        bool slot_found = false;
 214
 215        vqconfig = mic_vq_config(argp);
 216        for (i = 0; i < argp->num_vq; i++) {
 217                if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
 218                        ret =  -EINVAL;
 219                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 220                                __func__, __LINE__, ret);
 221                        goto exit;
 222                }
 223        }
 224
 225        /* Find the first free device page entry */
 226        for (i = sizeof(struct mic_bootparam);
 227                i < MIC_DP_SIZE - mic_total_desc_size(argp);
 228                i += mic_total_desc_size(devp)) {
 229                devp = vpdev->hw_ops->get_dp(vpdev) + i;
 230                if (devp->type == 0 || devp->type == -1) {
 231                        slot_found = true;
 232                        break;
 233                }
 234        }
 235        if (!slot_found) {
 236                ret =  -EINVAL;
 237                dev_err(vop_dev(vdev), "%s %d err %d\n",
 238                        __func__, __LINE__, ret);
 239                goto exit;
 240        }
 241        /*
 242         * Save off the type before doing the memcpy. Type will be set in the
 243         * end after completing all initialization for the new device.
 244         */
 245        *type = argp->type;
 246        argp->type = 0;
 247        memcpy(devp, argp, mic_desc_size(argp));
 248
 249        *devpage = devp;
 250exit:
 251        return ret;
 252}
 253
 254static void vop_init_device_ctrl(struct vop_vdev *vdev,
 255                                 struct mic_device_desc *devpage)
 256{
 257        struct mic_device_ctrl *dc;
 258
 259        dc = (void *)devpage + mic_aligned_desc_size(devpage);
 260
 261        dc->config_change = 0;
 262        dc->guest_ack = 0;
 263        dc->vdev_reset = 0;
 264        dc->host_ack = 0;
 265        dc->used_address_updated = 0;
 266        dc->c2h_vdev_db = -1;
 267        dc->h2c_vdev_db = -1;
 268        vdev->dc = dc;
 269}
 270
 271static int vop_virtio_add_device(struct vop_vdev *vdev,
 272                                 struct mic_device_desc *argp)
 273{
 274        struct vop_info *vi = vdev->vi;
 275        struct vop_device *vpdev = vi->vpdev;
 276        struct mic_device_desc *dd = NULL;
 277        struct mic_vqconfig *vqconfig;
 278        int vr_size, i, j, ret;
 279        u8 type = 0;
 280        s8 db = -1;
 281        char irqname[16];
 282        struct mic_bootparam *bootparam;
 283        u16 num;
 284        dma_addr_t vr_addr;
 285
 286        bootparam = vpdev->hw_ops->get_dp(vpdev);
 287        init_waitqueue_head(&vdev->waitq);
 288        INIT_LIST_HEAD(&vdev->list);
 289        vdev->vpdev = vpdev;
 290
 291        ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
 292        if (ret) {
 293                dev_err(vop_dev(vdev), "%s %d err %d\n",
 294                        __func__, __LINE__, ret);
 295                return ret;
 296        }
 297
 298        vop_init_device_ctrl(vdev, dd);
 299
 300        vdev->dd = dd;
 301        vdev->virtio_id = type;
 302        vqconfig = mic_vq_config(dd);
 303        INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
 304
 305        for (i = 0; i < dd->num_vq; i++) {
 306                struct vop_vringh *vvr = &vdev->vvr[i];
 307                struct mic_vring *vr = &vdev->vvr[i].vring;
 308
 309                num = le16_to_cpu(vqconfig[i].num);
 310                mutex_init(&vvr->vr_mutex);
 311                vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
 312                        sizeof(struct _mic_vring_info));
 313                vr->va = (void *)
 314                        __get_free_pages(GFP_KERNEL | __GFP_ZERO,
 315                                         get_order(vr_size));
 316                if (!vr->va) {
 317                        ret = -ENOMEM;
 318                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 319                                __func__, __LINE__, ret);
 320                        goto err;
 321                }
 322                vr->len = vr_size;
 323                vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
 324                vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
 325                vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
 326                                         DMA_BIDIRECTIONAL);
 327                if (dma_mapping_error(&vpdev->dev, vr_addr)) {
 328                        free_pages((unsigned long)vr->va, get_order(vr_size));
 329                        ret = -ENOMEM;
 330                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 331                                __func__, __LINE__, ret);
 332                        goto err;
 333                }
 334                vqconfig[i].address = cpu_to_le64(vr_addr);
 335
 336                vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
 337                ret = vringh_init_kern(&vvr->vrh,
 338                                       *(u32 *)mic_vq_features(vdev->dd),
 339                                       num, false, vr->vr.desc, vr->vr.avail,
 340                                       vr->vr.used);
 341                if (ret) {
 342                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 343                                __func__, __LINE__, ret);
 344                        goto err;
 345                }
 346                vringh_kiov_init(&vvr->riov, NULL, 0);
 347                vringh_kiov_init(&vvr->wiov, NULL, 0);
 348                vvr->head = USHRT_MAX;
 349                vvr->vdev = vdev;
 350                vvr->vrh.notify = _vop_notify;
 351                dev_dbg(&vpdev->dev,
 352                        "%s %d index %d va %p info %p vr_size 0x%x\n",
 353                        __func__, __LINE__, i, vr->va, vr->info, vr_size);
 354                vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
 355                                        get_order(VOP_INT_DMA_BUF_SIZE));
 356                vvr->buf_da = dma_map_single(&vpdev->dev,
 357                                          vvr->buf, VOP_INT_DMA_BUF_SIZE,
 358                                          DMA_BIDIRECTIONAL);
 359        }
 360
 361        snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
 362                 vdev->virtio_id);
 363        vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
 364        vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
 365                        _vop_virtio_intr_handler, irqname, vdev,
 366                        vdev->virtio_db);
 367        if (IS_ERR(vdev->virtio_cookie)) {
 368                ret = PTR_ERR(vdev->virtio_cookie);
 369                dev_dbg(&vpdev->dev, "request irq failed\n");
 370                goto err;
 371        }
 372
 373        vdev->dc->c2h_vdev_db = vdev->virtio_db;
 374
 375        /*
 376         * Order the type update with previous stores. This write barrier
 377         * is paired with the corresponding read barrier before the uncached
 378         * system memory read of the type, on the card while scanning the
 379         * device page.
 380         */
 381        smp_wmb();
 382        dd->type = type;
 383        argp->type = type;
 384
 385        if (bootparam) {
 386                db = bootparam->h2c_config_db;
 387                if (db != -1)
 388                        vpdev->hw_ops->send_intr(vpdev, db);
 389        }
 390        dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
 391        return 0;
 392err:
 393        vqconfig = mic_vq_config(dd);
 394        for (j = 0; j < i; j++) {
 395                struct vop_vringh *vvr = &vdev->vvr[j];
 396
 397                dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
 398                                 vvr->vring.len, DMA_BIDIRECTIONAL);
 399                free_pages((unsigned long)vvr->vring.va,
 400                           get_order(vvr->vring.len));
 401        }
 402        return ret;
 403}
 404
 405static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
 406                           struct vop_device *vpdev)
 407{
 408        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 409        s8 db;
 410        int ret, retry;
 411        DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
 412
 413        devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
 414        db = bootparam->h2c_config_db;
 415        if (db != -1)
 416                vpdev->hw_ops->send_intr(vpdev, db);
 417        else
 418                goto done;
 419        for (retry = 15; retry--;) {
 420                ret = wait_event_timeout(wake, devp->guest_ack,
 421                                         msecs_to_jiffies(1000));
 422                if (ret)
 423                        break;
 424        }
 425done:
 426        devp->config_change = 0;
 427        devp->guest_ack = 0;
 428}
 429
 430static void vop_virtio_del_device(struct vop_vdev *vdev)
 431{
 432        struct vop_info *vi = vdev->vi;
 433        struct vop_device *vpdev = vdev->vpdev;
 434        int i;
 435        struct mic_vqconfig *vqconfig;
 436        struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
 437
 438        if (!bootparam)
 439                goto skip_hot_remove;
 440        vop_dev_remove(vi, vdev->dc, vpdev);
 441skip_hot_remove:
 442        vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
 443        flush_work(&vdev->virtio_bh_work);
 444        vqconfig = mic_vq_config(vdev->dd);
 445        for (i = 0; i < vdev->dd->num_vq; i++) {
 446                struct vop_vringh *vvr = &vdev->vvr[i];
 447
 448                dma_unmap_single(&vpdev->dev,
 449                                 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
 450                                 DMA_BIDIRECTIONAL);
 451                free_pages((unsigned long)vvr->buf,
 452                           get_order(VOP_INT_DMA_BUF_SIZE));
 453                vringh_kiov_cleanup(&vvr->riov);
 454                vringh_kiov_cleanup(&vvr->wiov);
 455                dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
 456                                 vvr->vring.len, DMA_BIDIRECTIONAL);
 457                free_pages((unsigned long)vvr->vring.va,
 458                           get_order(vvr->vring.len));
 459        }
 460        /*
 461         * Order the type update with previous stores. This write barrier
 462         * is paired with the corresponding read barrier before the uncached
 463         * system memory read of the type, on the card while scanning the
 464         * device page.
 465         */
 466        smp_wmb();
 467        vdev->dd->type = -1;
 468}
 469
 470/*
 471 * vop_sync_dma - Wrapper for synchronous DMAs.
 472 *
 473 * @dev - The address of the pointer to the device instance used
 474 * for DMA registration.
 475 * @dst - destination DMA address.
 476 * @src - source DMA address.
 477 * @len - size of the transfer.
 478 *
 479 * Return DMA_SUCCESS on success
 480 */
 481static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
 482                        size_t len)
 483{
 484        int err = 0;
 485        struct dma_device *ddev;
 486        struct dma_async_tx_descriptor *tx;
 487        struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 488        struct dma_chan *vop_ch = vi->dma_ch;
 489
 490        if (!vop_ch) {
 491                err = -EBUSY;
 492                goto error;
 493        }
 494        ddev = vop_ch->device;
 495        tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
 496                DMA_PREP_FENCE);
 497        if (!tx) {
 498                err = -ENOMEM;
 499                goto error;
 500        } else {
 501                dma_cookie_t cookie;
 502
 503                cookie = tx->tx_submit(tx);
 504                if (dma_submit_error(cookie)) {
 505                        err = -ENOMEM;
 506                        goto error;
 507                }
 508                dma_async_issue_pending(vop_ch);
 509                err = dma_sync_wait(vop_ch, cookie);
 510        }
 511error:
 512        if (err)
 513                dev_err(&vi->vpdev->dev, "%s %d err %d\n",
 514                        __func__, __LINE__, err);
 515        return err;
 516}
 517
 518#define VOP_USE_DMA true
 519
 520/*
 521 * Initiates the copies across the PCIe bus from card memory to a user
 522 * space buffer. When transfers are done using DMA, source/destination
 523 * addresses and transfer length must follow the alignment requirements of
 524 * the MIC DMA engine.
 525 */
 526static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
 527                                   size_t len, u64 daddr, size_t dlen,
 528                                   int vr_idx)
 529{
 530        struct vop_device *vpdev = vdev->vpdev;
 531        void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 532        struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 533        struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
 534        size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
 535        bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 536        size_t dma_offset, partlen;
 537        int err;
 538
 539        if (!VOP_USE_DMA) {
 540                if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
 541                        err = -EFAULT;
 542                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 543                                __func__, __LINE__, err);
 544                        goto err;
 545                }
 546                vdev->in_bytes += len;
 547                err = 0;
 548                goto err;
 549        }
 550
 551        dma_offset = daddr - round_down(daddr, dma_alignment);
 552        daddr -= dma_offset;
 553        len += dma_offset;
 554        /*
 555         * X100 uses DMA addresses as seen by the card so adding
 556         * the aperture base is not required for DMA. However x200
 557         * requires DMA addresses to be an offset into the bar so
 558         * add the aperture base for x200.
 559         */
 560        if (x200)
 561                daddr += vpdev->aper->pa;
 562        while (len) {
 563                partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 564                err = vop_sync_dma(vdev, vvr->buf_da, daddr,
 565                                   ALIGN(partlen, dma_alignment));
 566                if (err) {
 567                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 568                                __func__, __LINE__, err);
 569                        goto err;
 570                }
 571                if (copy_to_user(ubuf, vvr->buf + dma_offset,
 572                                 partlen - dma_offset)) {
 573                        err = -EFAULT;
 574                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 575                                __func__, __LINE__, err);
 576                        goto err;
 577                }
 578                daddr += partlen;
 579                ubuf += partlen;
 580                dbuf += partlen;
 581                vdev->in_bytes_dma += partlen;
 582                vdev->in_bytes += partlen;
 583                len -= partlen;
 584                dma_offset = 0;
 585        }
 586        err = 0;
 587err:
 588        vpdev->hw_ops->iounmap(vpdev, dbuf);
 589        dev_dbg(vop_dev(vdev),
 590                "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
 591                __func__, ubuf, dbuf, len, vr_idx);
 592        return err;
 593}
 594
 595/*
 596 * Initiates copies across the PCIe bus from a user space buffer to card
 597 * memory. When transfers are done using DMA, source/destination addresses
 598 * and transfer length must follow the alignment requirements of the MIC
 599 * DMA engine.
 600 */
 601static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
 602                                     size_t len, u64 daddr, size_t dlen,
 603                                     int vr_idx)
 604{
 605        struct vop_device *vpdev = vdev->vpdev;
 606        void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
 607        struct vop_vringh *vvr = &vdev->vvr[vr_idx];
 608        struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
 609        size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
 610        bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
 611        size_t partlen;
 612        bool dma = VOP_USE_DMA;
 613        int err = 0;
 614
 615        if (daddr & (dma_alignment - 1)) {
 616                vdev->tx_dst_unaligned += len;
 617                dma = false;
 618        } else if (ALIGN(len, dma_alignment) > dlen) {
 619                vdev->tx_len_unaligned += len;
 620                dma = false;
 621        }
 622
 623        if (!dma)
 624                goto memcpy;
 625
 626        /*
 627         * X100 uses DMA addresses as seen by the card so adding
 628         * the aperture base is not required for DMA. However x200
 629         * requires DMA addresses to be an offset into the bar so
 630         * add the aperture base for x200.
 631         */
 632        if (x200)
 633                daddr += vpdev->aper->pa;
 634        while (len) {
 635                partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
 636
 637                if (copy_from_user(vvr->buf, ubuf, partlen)) {
 638                        err = -EFAULT;
 639                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 640                                __func__, __LINE__, err);
 641                        goto err;
 642                }
 643                err = vop_sync_dma(vdev, daddr, vvr->buf_da,
 644                                   ALIGN(partlen, dma_alignment));
 645                if (err) {
 646                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 647                                __func__, __LINE__, err);
 648                        goto err;
 649                }
 650                daddr += partlen;
 651                ubuf += partlen;
 652                dbuf += partlen;
 653                vdev->out_bytes_dma += partlen;
 654                vdev->out_bytes += partlen;
 655                len -= partlen;
 656        }
 657memcpy:
 658        /*
 659         * We are copying to IO below and should ideally use something
 660         * like copy_from_user_toio(..) if it existed.
 661         */
 662        if (copy_from_user((void __force *)dbuf, ubuf, len)) {
 663                err = -EFAULT;
 664                dev_err(vop_dev(vdev), "%s %d err %d\n",
 665                        __func__, __LINE__, err);
 666                goto err;
 667        }
 668        vdev->out_bytes += len;
 669        err = 0;
 670err:
 671        vpdev->hw_ops->iounmap(vpdev, dbuf);
 672        dev_dbg(vop_dev(vdev),
 673                "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
 674                __func__, ubuf, dbuf, len, vr_idx);
 675        return err;
 676}
 677
 678#define MIC_VRINGH_READ true
 679
 680/* Determine the total number of bytes consumed in a VRINGH KIOV */
 681static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
 682{
 683        int i;
 684        u32 total = iov->consumed;
 685
 686        for (i = 0; i < iov->i; i++)
 687                total += iov->iov[i].iov_len;
 688        return total;
 689}
 690
 691/*
 692 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
 693 * This API is heavily based on the vringh_iov_xfer(..) implementation
 694 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
 695 * and vringh_iov_push_kern(..) directly is because there is no
 696 * way to override the VRINGH xfer(..) routines as of v3.10.
 697 */
 698static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
 699                           void __user *ubuf, size_t len, bool read, int vr_idx,
 700                           size_t *out_len)
 701{
 702        int ret = 0;
 703        size_t partlen, tot_len = 0;
 704
 705        while (len && iov->i < iov->used) {
 706                struct kvec *kiov = &iov->iov[iov->i];
 707
 708                partlen = min(kiov->iov_len, len);
 709                if (read)
 710                        ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
 711                                                      (u64)kiov->iov_base,
 712                                                      kiov->iov_len,
 713                                                      vr_idx);
 714                else
 715                        ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
 716                                                        (u64)kiov->iov_base,
 717                                                        kiov->iov_len,
 718                                                        vr_idx);
 719                if (ret) {
 720                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 721                                __func__, __LINE__, ret);
 722                        break;
 723                }
 724                len -= partlen;
 725                ubuf += partlen;
 726                tot_len += partlen;
 727                iov->consumed += partlen;
 728                kiov->iov_len -= partlen;
 729                kiov->iov_base += partlen;
 730                if (!kiov->iov_len) {
 731                        /* Fix up old iov element then increment. */
 732                        kiov->iov_len = iov->consumed;
 733                        kiov->iov_base -= iov->consumed;
 734
 735                        iov->consumed = 0;
 736                        iov->i++;
 737                }
 738        }
 739        *out_len = tot_len;
 740        return ret;
 741}
 742
 743/*
 744 * Use the standard VRINGH infrastructure in the kernel to fetch new
 745 * descriptors, initiate the copies and update the used ring.
 746 */
 747static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
 748{
 749        int ret = 0;
 750        u32 iovcnt = copy->iovcnt;
 751        struct iovec iov;
 752        struct iovec __user *u_iov = copy->iov;
 753        void __user *ubuf = NULL;
 754        struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
 755        struct vringh_kiov *riov = &vvr->riov;
 756        struct vringh_kiov *wiov = &vvr->wiov;
 757        struct vringh *vrh = &vvr->vrh;
 758        u16 *head = &vvr->head;
 759        struct mic_vring *vr = &vvr->vring;
 760        size_t len = 0, out_len;
 761
 762        copy->out_len = 0;
 763        /* Fetch a new IOVEC if all previous elements have been processed */
 764        if (riov->i == riov->used && wiov->i == wiov->used) {
 765                ret = vringh_getdesc_kern(vrh, riov, wiov,
 766                                          head, GFP_KERNEL);
 767                /* Check if there are available descriptors */
 768                if (ret <= 0)
 769                        return ret;
 770        }
 771        while (iovcnt) {
 772                if (!len) {
 773                        /* Copy over a new iovec from user space. */
 774                        ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
 775                        if (ret) {
 776                                ret = -EINVAL;
 777                                dev_err(vop_dev(vdev), "%s %d err %d\n",
 778                                        __func__, __LINE__, ret);
 779                                break;
 780                        }
 781                        len = iov.iov_len;
 782                        ubuf = iov.iov_base;
 783                }
 784                /* Issue all the read descriptors first */
 785                ret = vop_vringh_copy(vdev, riov, ubuf, len,
 786                                      MIC_VRINGH_READ, copy->vr_idx, &out_len);
 787                if (ret) {
 788                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 789                                __func__, __LINE__, ret);
 790                        break;
 791                }
 792                len -= out_len;
 793                ubuf += out_len;
 794                copy->out_len += out_len;
 795                /* Issue the write descriptors next */
 796                ret = vop_vringh_copy(vdev, wiov, ubuf, len,
 797                                      !MIC_VRINGH_READ, copy->vr_idx, &out_len);
 798                if (ret) {
 799                        dev_err(vop_dev(vdev), "%s %d err %d\n",
 800                                __func__, __LINE__, ret);
 801                        break;
 802                }
 803                len -= out_len;
 804                ubuf += out_len;
 805                copy->out_len += out_len;
 806                if (!len) {
 807                        /* One user space iovec is now completed */
 808                        iovcnt--;
 809                        u_iov++;
 810                }
 811                /* Exit loop if all elements in KIOVs have been processed. */
 812                if (riov->i == riov->used && wiov->i == wiov->used)
 813                        break;
 814        }
 815        /*
 816         * Update the used ring if a descriptor was available and some data was
 817         * copied in/out and the user asked for a used ring update.
 818         */
 819        if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
 820                u32 total = 0;
 821
 822                /* Determine the total data consumed */
 823                total += vop_vringh_iov_consumed(riov);
 824                total += vop_vringh_iov_consumed(wiov);
 825                vringh_complete_kern(vrh, *head, total);
 826                *head = USHRT_MAX;
 827                if (vringh_need_notify_kern(vrh) > 0)
 828                        vringh_notify(vrh);
 829                vringh_kiov_cleanup(riov);
 830                vringh_kiov_cleanup(wiov);
 831                /* Update avail idx for user space */
 832                vr->info->avail_idx = vrh->last_avail_idx;
 833        }
 834        return ret;
 835}
 836
 837static inline int vop_verify_copy_args(struct vop_vdev *vdev,
 838                                       struct mic_copy_desc *copy)
 839{
 840        if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
 841                return -EINVAL;
 842        return 0;
 843}
 844
 845/* Copy a specified number of virtio descriptors in a chain */
 846static int vop_virtio_copy_desc(struct vop_vdev *vdev,
 847                                struct mic_copy_desc *copy)
 848{
 849        int err;
 850        struct vop_vringh *vvr;
 851
 852        err = vop_verify_copy_args(vdev, copy);
 853        if (err)
 854                return err;
 855
 856        vvr = &vdev->vvr[copy->vr_idx];
 857        mutex_lock(&vvr->vr_mutex);
 858        if (!vop_vdevup(vdev)) {
 859                err = -ENODEV;
 860                dev_err(vop_dev(vdev), "%s %d err %d\n",
 861                        __func__, __LINE__, err);
 862                goto err;
 863        }
 864        err = _vop_virtio_copy(vdev, copy);
 865        if (err) {
 866                dev_err(vop_dev(vdev), "%s %d err %d\n",
 867                        __func__, __LINE__, err);
 868        }
 869err:
 870        mutex_unlock(&vvr->vr_mutex);
 871        return err;
 872}
 873
 874static int vop_open(struct inode *inode, struct file *f)
 875{
 876        struct vop_vdev *vdev;
 877        struct vop_info *vi = container_of(f->private_data,
 878                struct vop_info, miscdev);
 879
 880        vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
 881        if (!vdev)
 882                return -ENOMEM;
 883        vdev->vi = vi;
 884        mutex_init(&vdev->vdev_mutex);
 885        f->private_data = vdev;
 886        init_completion(&vdev->destroy);
 887        complete(&vdev->destroy);
 888        return 0;
 889}
 890
 891static int vop_release(struct inode *inode, struct file *f)
 892{
 893        struct vop_vdev *vdev = f->private_data, *vdev_tmp;
 894        struct vop_info *vi = vdev->vi;
 895        struct list_head *pos, *tmp;
 896        bool found = false;
 897
 898        mutex_lock(&vdev->vdev_mutex);
 899        if (vdev->deleted)
 900                goto unlock;
 901        mutex_lock(&vi->vop_mutex);
 902        list_for_each_safe(pos, tmp, &vi->vdev_list) {
 903                vdev_tmp = list_entry(pos, struct vop_vdev, list);
 904                if (vdev == vdev_tmp) {
 905                        vop_virtio_del_device(vdev);
 906                        list_del(pos);
 907                        found = true;
 908                        break;
 909                }
 910        }
 911        mutex_unlock(&vi->vop_mutex);
 912unlock:
 913        mutex_unlock(&vdev->vdev_mutex);
 914        if (!found)
 915                wait_for_completion(&vdev->destroy);
 916        f->private_data = NULL;
 917        kfree(vdev);
 918        return 0;
 919}
 920
 921static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 922{
 923        struct vop_vdev *vdev = f->private_data;
 924        struct vop_info *vi = vdev->vi;
 925        void __user *argp = (void __user *)arg;
 926        int ret;
 927
 928        switch (cmd) {
 929        case MIC_VIRTIO_ADD_DEVICE:
 930        {
 931                struct mic_device_desc dd, *dd_config;
 932
 933                if (copy_from_user(&dd, argp, sizeof(dd)))
 934                        return -EFAULT;
 935
 936                if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
 937                    dd.num_vq > MIC_MAX_VRINGS)
 938                        return -EINVAL;
 939
 940                dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
 941                if (!dd_config)
 942                        return -ENOMEM;
 943                if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
 944                        ret = -EFAULT;
 945                        goto free_ret;
 946                }
 947                /* Ensure desc has not changed between the two reads */
 948                if (memcmp(&dd, dd_config, sizeof(dd))) {
 949                        ret = -EINVAL;
 950                        goto free_ret;
 951                }
 952                mutex_lock(&vdev->vdev_mutex);
 953                mutex_lock(&vi->vop_mutex);
 954                ret = vop_virtio_add_device(vdev, dd_config);
 955                if (ret)
 956                        goto unlock_ret;
 957                list_add_tail(&vdev->list, &vi->vdev_list);
 958unlock_ret:
 959                mutex_unlock(&vi->vop_mutex);
 960                mutex_unlock(&vdev->vdev_mutex);
 961free_ret:
 962                kfree(dd_config);
 963                return ret;
 964        }
 965        case MIC_VIRTIO_COPY_DESC:
 966        {
 967                struct mic_copy_desc copy;
 968
 969                mutex_lock(&vdev->vdev_mutex);
 970                ret = vop_vdev_inited(vdev);
 971                if (ret)
 972                        goto _unlock_ret;
 973
 974                if (copy_from_user(&copy, argp, sizeof(copy))) {
 975                        ret = -EFAULT;
 976                        goto _unlock_ret;
 977                }
 978
 979                ret = vop_virtio_copy_desc(vdev, &copy);
 980                if (ret < 0)
 981                        goto _unlock_ret;
 982                if (copy_to_user(
 983                        &((struct mic_copy_desc __user *)argp)->out_len,
 984                        &copy.out_len, sizeof(copy.out_len)))
 985                        ret = -EFAULT;
 986_unlock_ret:
 987                mutex_unlock(&vdev->vdev_mutex);
 988                return ret;
 989        }
 990        case MIC_VIRTIO_CONFIG_CHANGE:
 991        {
 992                void *buf;
 993
 994                mutex_lock(&vdev->vdev_mutex);
 995                ret = vop_vdev_inited(vdev);
 996                if (ret)
 997                        goto __unlock_ret;
 998                buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
 999                if (!buf) {
1000                        ret = -ENOMEM;
1001                        goto __unlock_ret;
1002                }
1003                if (copy_from_user(buf, argp, vdev->dd->config_len)) {
1004                        ret = -EFAULT;
1005                        goto done;
1006                }
1007                ret = vop_virtio_config_change(vdev, buf);
1008done:
1009                kfree(buf);
1010__unlock_ret:
1011                mutex_unlock(&vdev->vdev_mutex);
1012                return ret;
1013        }
1014        default:
1015                return -ENOIOCTLCMD;
1016        };
1017        return 0;
1018}
1019
1020/*
1021 * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1022 * not when previously enqueued buffers may be available. This means that
1023 * in the card->host (TX) path, when userspace is unblocked by poll it
1024 * must drain all available descriptors or it can stall.
1025 */
1026static unsigned int vop_poll(struct file *f, poll_table *wait)
1027{
1028        struct vop_vdev *vdev = f->private_data;
1029        int mask = 0;
1030
1031        mutex_lock(&vdev->vdev_mutex);
1032        if (vop_vdev_inited(vdev)) {
1033                mask = POLLERR;
1034                goto done;
1035        }
1036        poll_wait(f, &vdev->waitq, wait);
1037        if (vop_vdev_inited(vdev)) {
1038                mask = POLLERR;
1039        } else if (vdev->poll_wake) {
1040                vdev->poll_wake = 0;
1041                mask = POLLIN | POLLOUT;
1042        }
1043done:
1044        mutex_unlock(&vdev->vdev_mutex);
1045        return mask;
1046}
1047
1048static inline int
1049vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1050                 unsigned long *size, unsigned long *pa)
1051{
1052        struct vop_device *vpdev = vdev->vpdev;
1053        unsigned long start = MIC_DP_SIZE;
1054        int i;
1055
1056        /*
1057         * MMAP interface is as follows:
1058         * offset                               region
1059         * 0x0                                  virtio device_page
1060         * 0x1000                               first vring
1061         * 0x1000 + size of 1st vring           second vring
1062         * ....
1063         */
1064        if (!offset) {
1065                *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1066                *size = MIC_DP_SIZE;
1067                return 0;
1068        }
1069
1070        for (i = 0; i < vdev->dd->num_vq; i++) {
1071                struct vop_vringh *vvr = &vdev->vvr[i];
1072
1073                if (offset == start) {
1074                        *pa = virt_to_phys(vvr->vring.va);
1075                        *size = vvr->vring.len;
1076                        return 0;
1077                }
1078                start += vvr->vring.len;
1079        }
1080        return -1;
1081}
1082
1083/*
1084 * Maps the device page and virtio rings to user space for readonly access.
1085 */
1086static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1087{
1088        struct vop_vdev *vdev = f->private_data;
1089        unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1090        unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1091        int i, err;
1092
1093        err = vop_vdev_inited(vdev);
1094        if (err)
1095                goto ret;
1096        if (vma->vm_flags & VM_WRITE) {
1097                err = -EACCES;
1098                goto ret;
1099        }
1100        while (size_rem) {
1101                i = vop_query_offset(vdev, offset, &size, &pa);
1102                if (i < 0) {
1103                        err = -EINVAL;
1104                        goto ret;
1105                }
1106                err = remap_pfn_range(vma, vma->vm_start + offset,
1107                                      pa >> PAGE_SHIFT, size,
1108                                      vma->vm_page_prot);
1109                if (err)
1110                        goto ret;
1111                size_rem -= size;
1112                offset += size;
1113        }
1114ret:
1115        return err;
1116}
1117
1118static const struct file_operations vop_fops = {
1119        .open = vop_open,
1120        .release = vop_release,
1121        .unlocked_ioctl = vop_ioctl,
1122        .poll = vop_poll,
1123        .mmap = vop_mmap,
1124        .owner = THIS_MODULE,
1125};
1126
1127int vop_host_init(struct vop_info *vi)
1128{
1129        int rc;
1130        struct miscdevice *mdev;
1131        struct vop_device *vpdev = vi->vpdev;
1132
1133        INIT_LIST_HEAD(&vi->vdev_list);
1134        vi->dma_ch = vpdev->dma_ch;
1135        mdev = &vi->miscdev;
1136        mdev->minor = MISC_DYNAMIC_MINOR;
1137        snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1138        mdev->name = vi->name;
1139        mdev->fops = &vop_fops;
1140        mdev->parent = &vpdev->dev;
1141
1142        rc = misc_register(mdev);
1143        if (rc)
1144                dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1145        return rc;
1146}
1147
1148void vop_host_uninit(struct vop_info *vi)
1149{
1150        struct list_head *pos, *tmp;
1151        struct vop_vdev *vdev;
1152
1153        mutex_lock(&vi->vop_mutex);
1154        vop_virtio_reset_devices(vi);
1155        list_for_each_safe(pos, tmp, &vi->vdev_list) {
1156                vdev = list_entry(pos, struct vop_vdev, list);
1157                list_del(pos);
1158                reinit_completion(&vdev->destroy);
1159                mutex_unlock(&vi->vop_mutex);
1160                mutex_lock(&vdev->vdev_mutex);
1161                vop_virtio_del_device(vdev);
1162                vdev->deleted = true;
1163                mutex_unlock(&vdev->vdev_mutex);
1164                complete(&vdev->destroy);
1165                mutex_lock(&vi->vop_mutex);
1166        }
1167        mutex_unlock(&vi->vop_mutex);
1168        misc_deregister(&vi->miscdev);
1169}
1170