linux/drivers/vhost/vdpa.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2018-2020 Intel Corporation.
   4 * Copyright (C) 2020 Red Hat, Inc.
   5 *
   6 * Author: Tiwei Bie <tiwei.bie@intel.com>
   7 *         Jason Wang <jasowang@redhat.com>
   8 *
   9 * Thanks Michael S. Tsirkin for the valuable comments and
  10 * suggestions.  And thanks to Cunming Liang and Zhihong Wang for all
  11 * their supports.
  12 */
  13
  14#include <linux/kernel.h>
  15#include <linux/module.h>
  16#include <linux/cdev.h>
  17#include <linux/device.h>
  18#include <linux/mm.h>
  19#include <linux/iommu.h>
  20#include <linux/uuid.h>
  21#include <linux/vdpa.h>
  22#include <linux/nospec.h>
  23#include <linux/vhost.h>
  24#include <linux/virtio_net.h>
  25#include <linux/kernel.h>
  26
  27#include "vhost.h"
  28
  29enum {
  30        VHOST_VDPA_BACKEND_FEATURES =
  31        (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2) |
  32        (1ULL << VHOST_BACKEND_F_IOTLB_BATCH),
  33};
  34
  35#define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
  36
  37struct vhost_vdpa {
  38        struct vhost_dev vdev;
  39        struct iommu_domain *domain;
  40        struct vhost_virtqueue *vqs;
  41        struct completion completion;
  42        struct vdpa_device *vdpa;
  43        struct device dev;
  44        struct cdev cdev;
  45        atomic_t opened;
  46        int nvqs;
  47        int virtio_id;
  48        int minor;
  49        struct eventfd_ctx *config_ctx;
  50        int in_batch;
  51};
  52
  53static DEFINE_IDA(vhost_vdpa_ida);
  54
  55static dev_t vhost_vdpa_major;
  56
  57static void handle_vq_kick(struct vhost_work *work)
  58{
  59        struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
  60                                                  poll.work);
  61        struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev);
  62        const struct vdpa_config_ops *ops = v->vdpa->config;
  63
  64        ops->kick_vq(v->vdpa, vq - v->vqs);
  65}
  66
  67static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
  68{
  69        struct vhost_virtqueue *vq = private;
  70        struct eventfd_ctx *call_ctx = vq->call_ctx.ctx;
  71
  72        if (call_ctx)
  73                eventfd_signal(call_ctx, 1);
  74
  75        return IRQ_HANDLED;
  76}
  77
  78static irqreturn_t vhost_vdpa_config_cb(void *private)
  79{
  80        struct vhost_vdpa *v = private;
  81        struct eventfd_ctx *config_ctx = v->config_ctx;
  82
  83        if (config_ctx)
  84                eventfd_signal(config_ctx, 1);
  85
  86        return IRQ_HANDLED;
  87}
  88
  89static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
  90{
  91        struct vhost_virtqueue *vq = &v->vqs[qid];
  92        const struct vdpa_config_ops *ops = v->vdpa->config;
  93        struct vdpa_device *vdpa = v->vdpa;
  94        int ret, irq;
  95
  96        if (!ops->get_vq_irq)
  97                return;
  98
  99        irq = ops->get_vq_irq(vdpa, qid);
 100        spin_lock(&vq->call_ctx.ctx_lock);
 101        irq_bypass_unregister_producer(&vq->call_ctx.producer);
 102        if (!vq->call_ctx.ctx || irq < 0) {
 103                spin_unlock(&vq->call_ctx.ctx_lock);
 104                return;
 105        }
 106
 107        vq->call_ctx.producer.token = vq->call_ctx.ctx;
 108        vq->call_ctx.producer.irq = irq;
 109        ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 110        spin_unlock(&vq->call_ctx.ctx_lock);
 111}
 112
 113static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
 114{
 115        struct vhost_virtqueue *vq = &v->vqs[qid];
 116
 117        spin_lock(&vq->call_ctx.ctx_lock);
 118        irq_bypass_unregister_producer(&vq->call_ctx.producer);
 119        spin_unlock(&vq->call_ctx.ctx_lock);
 120}
 121
 122static void vhost_vdpa_reset(struct vhost_vdpa *v)
 123{
 124        struct vdpa_device *vdpa = v->vdpa;
 125
 126        vdpa_reset(vdpa);
 127        v->in_batch = 0;
 128}
 129
 130static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
 131{
 132        struct vdpa_device *vdpa = v->vdpa;
 133        const struct vdpa_config_ops *ops = vdpa->config;
 134        u32 device_id;
 135
 136        device_id = ops->get_device_id(vdpa);
 137
 138        if (copy_to_user(argp, &device_id, sizeof(device_id)))
 139                return -EFAULT;
 140
 141        return 0;
 142}
 143
 144static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp)
 145{
 146        struct vdpa_device *vdpa = v->vdpa;
 147        const struct vdpa_config_ops *ops = vdpa->config;
 148        u8 status;
 149
 150        status = ops->get_status(vdpa);
 151
 152        if (copy_to_user(statusp, &status, sizeof(status)))
 153                return -EFAULT;
 154
 155        return 0;
 156}
 157
 158static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp)
 159{
 160        struct vdpa_device *vdpa = v->vdpa;
 161        const struct vdpa_config_ops *ops = vdpa->config;
 162        u8 status, status_old;
 163        int nvqs = v->nvqs;
 164        u16 i;
 165
 166        if (copy_from_user(&status, statusp, sizeof(status)))
 167                return -EFAULT;
 168
 169        status_old = ops->get_status(vdpa);
 170
 171        /*
 172         * Userspace shouldn't remove status bits unless reset the
 173         * status to 0.
 174         */
 175        if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
 176                return -EINVAL;
 177
 178        ops->set_status(vdpa, status);
 179
 180        if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
 181                for (i = 0; i < nvqs; i++)
 182                        vhost_vdpa_setup_vq_irq(v, i);
 183
 184        if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && !(status & VIRTIO_CONFIG_S_DRIVER_OK))
 185                for (i = 0; i < nvqs; i++)
 186                        vhost_vdpa_unsetup_vq_irq(v, i);
 187
 188        return 0;
 189}
 190
 191static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
 192                                      struct vhost_vdpa_config *c)
 193{
 194        long size = 0;
 195
 196        switch (v->virtio_id) {
 197        case VIRTIO_ID_NET:
 198                size = sizeof(struct virtio_net_config);
 199                break;
 200        }
 201
 202        if (c->len == 0)
 203                return -EINVAL;
 204
 205        if (c->len > size - c->off)
 206                return -E2BIG;
 207
 208        return 0;
 209}
 210
 211static long vhost_vdpa_get_config(struct vhost_vdpa *v,
 212                                  struct vhost_vdpa_config __user *c)
 213{
 214        struct vdpa_device *vdpa = v->vdpa;
 215        struct vhost_vdpa_config config;
 216        unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 217        u8 *buf;
 218
 219        if (copy_from_user(&config, c, size))
 220                return -EFAULT;
 221        if (vhost_vdpa_config_validate(v, &config))
 222                return -EINVAL;
 223        buf = kvzalloc(config.len, GFP_KERNEL);
 224        if (!buf)
 225                return -ENOMEM;
 226
 227        vdpa_get_config(vdpa, config.off, buf, config.len);
 228
 229        if (copy_to_user(c->buf, buf, config.len)) {
 230                kvfree(buf);
 231                return -EFAULT;
 232        }
 233
 234        kvfree(buf);
 235        return 0;
 236}
 237
 238static long vhost_vdpa_set_config(struct vhost_vdpa *v,
 239                                  struct vhost_vdpa_config __user *c)
 240{
 241        struct vdpa_device *vdpa = v->vdpa;
 242        const struct vdpa_config_ops *ops = vdpa->config;
 243        struct vhost_vdpa_config config;
 244        unsigned long size = offsetof(struct vhost_vdpa_config, buf);
 245        u8 *buf;
 246
 247        if (copy_from_user(&config, c, size))
 248                return -EFAULT;
 249        if (vhost_vdpa_config_validate(v, &config))
 250                return -EINVAL;
 251        buf = kvzalloc(config.len, GFP_KERNEL);
 252        if (!buf)
 253                return -ENOMEM;
 254
 255        if (copy_from_user(buf, c->buf, config.len)) {
 256                kvfree(buf);
 257                return -EFAULT;
 258        }
 259
 260        ops->set_config(vdpa, config.off, buf, config.len);
 261
 262        kvfree(buf);
 263        return 0;
 264}
 265
 266static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep)
 267{
 268        struct vdpa_device *vdpa = v->vdpa;
 269        const struct vdpa_config_ops *ops = vdpa->config;
 270        u64 features;
 271
 272        features = ops->get_features(vdpa);
 273
 274        if (copy_to_user(featurep, &features, sizeof(features)))
 275                return -EFAULT;
 276
 277        return 0;
 278}
 279
 280static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 281{
 282        struct vdpa_device *vdpa = v->vdpa;
 283        const struct vdpa_config_ops *ops = vdpa->config;
 284        u64 features;
 285
 286        /*
 287         * It's not allowed to change the features after they have
 288         * been negotiated.
 289         */
 290        if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK)
 291                return -EBUSY;
 292
 293        if (copy_from_user(&features, featurep, sizeof(features)))
 294                return -EFAULT;
 295
 296        if (vdpa_set_features(vdpa, features))
 297                return -EINVAL;
 298
 299        return 0;
 300}
 301
 302static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 303{
 304        struct vdpa_device *vdpa = v->vdpa;
 305        const struct vdpa_config_ops *ops = vdpa->config;
 306        u16 num;
 307
 308        num = ops->get_vq_num_max(vdpa);
 309
 310        if (copy_to_user(argp, &num, sizeof(num)))
 311                return -EFAULT;
 312
 313        return 0;
 314}
 315
 316static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 317{
 318        if (v->config_ctx)
 319                eventfd_ctx_put(v->config_ctx);
 320}
 321
 322static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
 323{
 324        struct vdpa_callback cb;
 325        int fd;
 326        struct eventfd_ctx *ctx;
 327
 328        cb.callback = vhost_vdpa_config_cb;
 329        cb.private = v->vdpa;
 330        if (copy_from_user(&fd, argp, sizeof(fd)))
 331                return  -EFAULT;
 332
 333        ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
 334        swap(ctx, v->config_ctx);
 335
 336        if (!IS_ERR_OR_NULL(ctx))
 337                eventfd_ctx_put(ctx);
 338
 339        if (IS_ERR(v->config_ctx))
 340                return PTR_ERR(v->config_ctx);
 341
 342        v->vdpa->config->set_config_cb(v->vdpa, &cb);
 343
 344        return 0;
 345}
 346
 347static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 348                                   void __user *argp)
 349{
 350        struct vdpa_device *vdpa = v->vdpa;
 351        const struct vdpa_config_ops *ops = vdpa->config;
 352        struct vdpa_vq_state vq_state;
 353        struct vdpa_callback cb;
 354        struct vhost_virtqueue *vq;
 355        struct vhost_vring_state s;
 356        u32 idx;
 357        long r;
 358
 359        r = get_user(idx, (u32 __user *)argp);
 360        if (r < 0)
 361                return r;
 362
 363        if (idx >= v->nvqs)
 364                return -ENOBUFS;
 365
 366        idx = array_index_nospec(idx, v->nvqs);
 367        vq = &v->vqs[idx];
 368
 369        switch (cmd) {
 370        case VHOST_VDPA_SET_VRING_ENABLE:
 371                if (copy_from_user(&s, argp, sizeof(s)))
 372                        return -EFAULT;
 373                ops->set_vq_ready(vdpa, idx, s.num);
 374                return 0;
 375        case VHOST_GET_VRING_BASE:
 376                r = ops->get_vq_state(v->vdpa, idx, &vq_state);
 377                if (r)
 378                        return r;
 379
 380                vq->last_avail_idx = vq_state.avail_index;
 381                break;
 382        }
 383
 384        r = vhost_vring_ioctl(&v->vdev, cmd, argp);
 385        if (r)
 386                return r;
 387
 388        switch (cmd) {
 389        case VHOST_SET_VRING_ADDR:
 390                if (ops->set_vq_address(vdpa, idx,
 391                                        (u64)(uintptr_t)vq->desc,
 392                                        (u64)(uintptr_t)vq->avail,
 393                                        (u64)(uintptr_t)vq->used))
 394                        r = -EINVAL;
 395                break;
 396
 397        case VHOST_SET_VRING_BASE:
 398                vq_state.avail_index = vq->last_avail_idx;
 399                if (ops->set_vq_state(vdpa, idx, &vq_state))
 400                        r = -EINVAL;
 401                break;
 402
 403        case VHOST_SET_VRING_CALL:
 404                if (vq->call_ctx.ctx) {
 405                        cb.callback = vhost_vdpa_virtqueue_cb;
 406                        cb.private = vq;
 407                } else {
 408                        cb.callback = NULL;
 409                        cb.private = NULL;
 410                }
 411                ops->set_vq_cb(vdpa, idx, &cb);
 412                vhost_vdpa_setup_vq_irq(v, idx);
 413                break;
 414
 415        case VHOST_SET_VRING_NUM:
 416                ops->set_vq_num(vdpa, idx, vq->num);
 417                break;
 418        }
 419
 420        return r;
 421}
 422
 423static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 424                                      unsigned int cmd, unsigned long arg)
 425{
 426        struct vhost_vdpa *v = filep->private_data;
 427        struct vhost_dev *d = &v->vdev;
 428        void __user *argp = (void __user *)arg;
 429        u64 __user *featurep = argp;
 430        u64 features;
 431        long r;
 432
 433        if (cmd == VHOST_SET_BACKEND_FEATURES) {
 434                r = copy_from_user(&features, featurep, sizeof(features));
 435                if (r)
 436                        return r;
 437                if (features & ~VHOST_VDPA_BACKEND_FEATURES)
 438                        return -EOPNOTSUPP;
 439                vhost_set_backend_features(&v->vdev, features);
 440                return 0;
 441        }
 442
 443        mutex_lock(&d->mutex);
 444
 445        switch (cmd) {
 446        case VHOST_VDPA_GET_DEVICE_ID:
 447                r = vhost_vdpa_get_device_id(v, argp);
 448                break;
 449        case VHOST_VDPA_GET_STATUS:
 450                r = vhost_vdpa_get_status(v, argp);
 451                break;
 452        case VHOST_VDPA_SET_STATUS:
 453                r = vhost_vdpa_set_status(v, argp);
 454                break;
 455        case VHOST_VDPA_GET_CONFIG:
 456                r = vhost_vdpa_get_config(v, argp);
 457                break;
 458        case VHOST_VDPA_SET_CONFIG:
 459                r = vhost_vdpa_set_config(v, argp);
 460                break;
 461        case VHOST_GET_FEATURES:
 462                r = vhost_vdpa_get_features(v, argp);
 463                break;
 464        case VHOST_SET_FEATURES:
 465                r = vhost_vdpa_set_features(v, argp);
 466                break;
 467        case VHOST_VDPA_GET_VRING_NUM:
 468                r = vhost_vdpa_get_vring_num(v, argp);
 469                break;
 470        case VHOST_SET_LOG_BASE:
 471        case VHOST_SET_LOG_FD:
 472                r = -ENOIOCTLCMD;
 473                break;
 474        case VHOST_VDPA_SET_CONFIG_CALL:
 475                r = vhost_vdpa_set_config_call(v, argp);
 476                break;
 477        case VHOST_GET_BACKEND_FEATURES:
 478                features = VHOST_VDPA_BACKEND_FEATURES;
 479                r = copy_to_user(featurep, &features, sizeof(features));
 480                break;
 481        default:
 482                r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 483                if (r == -ENOIOCTLCMD)
 484                        r = vhost_vdpa_vring_ioctl(v, cmd, argp);
 485                break;
 486        }
 487
 488        mutex_unlock(&d->mutex);
 489        return r;
 490}
 491
 492static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
 493{
 494        struct vhost_dev *dev = &v->vdev;
 495        struct vhost_iotlb *iotlb = dev->iotlb;
 496        struct vhost_iotlb_map *map;
 497        struct page *page;
 498        unsigned long pfn, pinned;
 499
 500        while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
 501                pinned = map->size >> PAGE_SHIFT;
 502                for (pfn = map->addr >> PAGE_SHIFT;
 503                     pinned > 0; pfn++, pinned--) {
 504                        page = pfn_to_page(pfn);
 505                        if (map->perm & VHOST_ACCESS_WO)
 506                                set_page_dirty_lock(page);
 507                        unpin_user_page(page);
 508                }
 509                atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
 510                vhost_iotlb_map_free(iotlb, map);
 511        }
 512}
 513
 514static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
 515{
 516        struct vhost_dev *dev = &v->vdev;
 517
 518        vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1);
 519        kfree(dev->iotlb);
 520        dev->iotlb = NULL;
 521}
 522
 523static int perm_to_iommu_flags(u32 perm)
 524{
 525        int flags = 0;
 526
 527        switch (perm) {
 528        case VHOST_ACCESS_WO:
 529                flags |= IOMMU_WRITE;
 530                break;
 531        case VHOST_ACCESS_RO:
 532                flags |= IOMMU_READ;
 533                break;
 534        case VHOST_ACCESS_RW:
 535                flags |= (IOMMU_WRITE | IOMMU_READ);
 536                break;
 537        default:
 538                WARN(1, "invalidate vhost IOTLB permission\n");
 539                break;
 540        }
 541
 542        return flags | IOMMU_CACHE;
 543}
 544
 545static int vhost_vdpa_map(struct vhost_vdpa *v,
 546                          u64 iova, u64 size, u64 pa, u32 perm)
 547{
 548        struct vhost_dev *dev = &v->vdev;
 549        struct vdpa_device *vdpa = v->vdpa;
 550        const struct vdpa_config_ops *ops = vdpa->config;
 551        int r = 0;
 552
 553        r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
 554                                  pa, perm);
 555        if (r)
 556                return r;
 557
 558        if (ops->dma_map) {
 559                r = ops->dma_map(vdpa, iova, size, pa, perm);
 560        } else if (ops->set_map) {
 561                if (!v->in_batch)
 562                        r = ops->set_map(vdpa, dev->iotlb);
 563        } else {
 564                r = iommu_map(v->domain, iova, pa, size,
 565                              perm_to_iommu_flags(perm));
 566        }
 567
 568        if (r)
 569                vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
 570
 571        return r;
 572}
 573
 574static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
 575{
 576        struct vhost_dev *dev = &v->vdev;
 577        struct vdpa_device *vdpa = v->vdpa;
 578        const struct vdpa_config_ops *ops = vdpa->config;
 579
 580        vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1);
 581
 582        if (ops->dma_map) {
 583                ops->dma_unmap(vdpa, iova, size);
 584        } else if (ops->set_map) {
 585                if (!v->in_batch)
 586                        ops->set_map(vdpa, dev->iotlb);
 587        } else {
 588                iommu_unmap(v->domain, iova, size);
 589        }
 590}
 591
 592static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
 593                                           struct vhost_iotlb_msg *msg)
 594{
 595        struct vhost_dev *dev = &v->vdev;
 596        struct vhost_iotlb *iotlb = dev->iotlb;
 597        struct page **page_list;
 598        struct vm_area_struct **vmas;
 599        unsigned int gup_flags = FOLL_LONGTERM;
 600        unsigned long map_pfn, last_pfn = 0;
 601        unsigned long npages, lock_limit;
 602        unsigned long i, nmap = 0;
 603        u64 iova = msg->iova;
 604        long pinned;
 605        int ret = 0;
 606
 607        if (vhost_iotlb_itree_first(iotlb, msg->iova,
 608                                    msg->iova + msg->size - 1))
 609                return -EEXIST;
 610
 611        if (msg->perm & VHOST_ACCESS_WO)
 612                gup_flags |= FOLL_WRITE;
 613
 614        npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
 615        if (!npages)
 616                return -EINVAL;
 617
 618        page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
 619        vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
 620                              GFP_KERNEL);
 621        if (!page_list || !vmas) {
 622                ret = -ENOMEM;
 623                goto free;
 624        }
 625
 626        mmap_read_lock(dev->mm);
 627
 628        lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 629        if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
 630                ret = -ENOMEM;
 631                goto unlock;
 632        }
 633
 634        pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags,
 635                                page_list, vmas);
 636        if (npages != pinned) {
 637                if (pinned < 0) {
 638                        ret = pinned;
 639                } else {
 640                        unpin_user_pages(page_list, pinned);
 641                        ret = -ENOMEM;
 642                }
 643                goto unlock;
 644        }
 645
 646        iova &= PAGE_MASK;
 647        map_pfn = page_to_pfn(page_list[0]);
 648
 649        /* One more iteration to avoid extra vdpa_map() call out of loop. */
 650        for (i = 0; i <= npages; i++) {
 651                unsigned long this_pfn;
 652                u64 csize;
 653
 654                /* The last chunk may have no valid PFN next to it */
 655                this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL;
 656
 657                if (last_pfn && (this_pfn == -1UL ||
 658                                 this_pfn != last_pfn + 1)) {
 659                        /* Pin a contiguous chunk of memory */
 660                        csize = last_pfn - map_pfn + 1;
 661                        ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
 662                                             map_pfn << PAGE_SHIFT,
 663                                             msg->perm);
 664                        if (ret) {
 665                                /*
 666                                 * Unpin the rest chunks of memory on the
 667                                 * flight with no corresponding vdpa_map()
 668                                 * calls having been made yet. On the other
 669                                 * hand, vdpa_unmap() in the failure path
 670                                 * is in charge of accounting the number of
 671                                 * pinned pages for its own.
 672                                 * This asymmetrical pattern of accounting
 673                                 * is for efficiency to pin all pages at
 674                                 * once, while there is no other callsite
 675                                 * of vdpa_map() than here above.
 676                                 */
 677                                unpin_user_pages(&page_list[nmap],
 678                                                 npages - nmap);
 679                                goto out;
 680                        }
 681                        atomic64_add(csize, &dev->mm->pinned_vm);
 682                        nmap += csize;
 683                        iova += csize << PAGE_SHIFT;
 684                        map_pfn = this_pfn;
 685                }
 686                last_pfn = this_pfn;
 687        }
 688
 689        WARN_ON(nmap != npages);
 690out:
 691        if (ret)
 692                vhost_vdpa_unmap(v, msg->iova, msg->size);
 693unlock:
 694        mmap_read_unlock(dev->mm);
 695free:
 696        kvfree(vmas);
 697        kvfree(page_list);
 698        return ret;
 699}
 700
 701static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
 702                                        struct vhost_iotlb_msg *msg)
 703{
 704        struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev);
 705        struct vdpa_device *vdpa = v->vdpa;
 706        const struct vdpa_config_ops *ops = vdpa->config;
 707        int r = 0;
 708
 709        r = vhost_dev_check_owner(dev);
 710        if (r)
 711                return r;
 712
 713        switch (msg->type) {
 714        case VHOST_IOTLB_UPDATE:
 715                r = vhost_vdpa_process_iotlb_update(v, msg);
 716                break;
 717        case VHOST_IOTLB_INVALIDATE:
 718                vhost_vdpa_unmap(v, msg->iova, msg->size);
 719                break;
 720        case VHOST_IOTLB_BATCH_BEGIN:
 721                v->in_batch = true;
 722                break;
 723        case VHOST_IOTLB_BATCH_END:
 724                if (v->in_batch && ops->set_map)
 725                        ops->set_map(vdpa, dev->iotlb);
 726                v->in_batch = false;
 727                break;
 728        default:
 729                r = -EINVAL;
 730                break;
 731        }
 732
 733        return r;
 734}
 735
 736static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb,
 737                                         struct iov_iter *from)
 738{
 739        struct file *file = iocb->ki_filp;
 740        struct vhost_vdpa *v = file->private_data;
 741        struct vhost_dev *dev = &v->vdev;
 742
 743        return vhost_chr_write_iter(dev, from);
 744}
 745
 746static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 747{
 748        struct vdpa_device *vdpa = v->vdpa;
 749        const struct vdpa_config_ops *ops = vdpa->config;
 750        struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 751        struct bus_type *bus;
 752        int ret;
 753
 754        /* Device want to do DMA by itself */
 755        if (ops->set_map || ops->dma_map)
 756                return 0;
 757
 758        bus = dma_dev->bus;
 759        if (!bus)
 760                return -EFAULT;
 761
 762        if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY))
 763                return -ENOTSUPP;
 764
 765        v->domain = iommu_domain_alloc(bus);
 766        if (!v->domain)
 767                return -EIO;
 768
 769        ret = iommu_attach_device(v->domain, dma_dev);
 770        if (ret)
 771                goto err_attach;
 772
 773        return 0;
 774
 775err_attach:
 776        iommu_domain_free(v->domain);
 777        return ret;
 778}
 779
 780static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
 781{
 782        struct vdpa_device *vdpa = v->vdpa;
 783        struct device *dma_dev = vdpa_get_dma_dev(vdpa);
 784
 785        if (v->domain) {
 786                iommu_detach_device(v->domain, dma_dev);
 787                iommu_domain_free(v->domain);
 788        }
 789
 790        v->domain = NULL;
 791}
 792
 793static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 794{
 795        struct vhost_vdpa *v;
 796        struct vhost_dev *dev;
 797        struct vhost_virtqueue **vqs;
 798        int nvqs, i, r, opened;
 799
 800        v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
 801
 802        opened = atomic_cmpxchg(&v->opened, 0, 1);
 803        if (opened)
 804                return -EBUSY;
 805
 806        nvqs = v->nvqs;
 807        vhost_vdpa_reset(v);
 808
 809        vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
 810        if (!vqs) {
 811                r = -ENOMEM;
 812                goto err;
 813        }
 814
 815        dev = &v->vdev;
 816        for (i = 0; i < nvqs; i++) {
 817                vqs[i] = &v->vqs[i];
 818                vqs[i]->handle_kick = handle_vq_kick;
 819        }
 820        vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
 821                       vhost_vdpa_process_iotlb_msg);
 822
 823        dev->iotlb = vhost_iotlb_alloc(0, 0);
 824        if (!dev->iotlb) {
 825                r = -ENOMEM;
 826                goto err_init_iotlb;
 827        }
 828
 829        r = vhost_vdpa_alloc_domain(v);
 830        if (r)
 831                goto err_init_iotlb;
 832
 833        filep->private_data = v;
 834
 835        return 0;
 836
 837err_init_iotlb:
 838        vhost_dev_cleanup(&v->vdev);
 839        kfree(vqs);
 840err:
 841        atomic_dec(&v->opened);
 842        return r;
 843}
 844
 845static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
 846{
 847        struct vhost_virtqueue *vq;
 848        int i;
 849
 850        for (i = 0; i < v->nvqs; i++) {
 851                vq = &v->vqs[i];
 852                if (vq->call_ctx.producer.irq)
 853                        irq_bypass_unregister_producer(&vq->call_ctx.producer);
 854        }
 855}
 856
 857static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 858{
 859        struct vhost_vdpa *v = filep->private_data;
 860        struct vhost_dev *d = &v->vdev;
 861
 862        mutex_lock(&d->mutex);
 863        filep->private_data = NULL;
 864        vhost_vdpa_reset(v);
 865        vhost_dev_stop(&v->vdev);
 866        vhost_vdpa_iotlb_free(v);
 867        vhost_vdpa_free_domain(v);
 868        vhost_vdpa_config_put(v);
 869        vhost_vdpa_clean_irq(v);
 870        vhost_dev_cleanup(&v->vdev);
 871        kfree(v->vdev.vqs);
 872        mutex_unlock(&d->mutex);
 873
 874        atomic_dec(&v->opened);
 875        complete(&v->completion);
 876
 877        return 0;
 878}
 879
 880#ifdef CONFIG_MMU
 881static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
 882{
 883        struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
 884        struct vdpa_device *vdpa = v->vdpa;
 885        const struct vdpa_config_ops *ops = vdpa->config;
 886        struct vdpa_notification_area notify;
 887        struct vm_area_struct *vma = vmf->vma;
 888        u16 index = vma->vm_pgoff;
 889
 890        notify = ops->get_vq_notification(vdpa, index);
 891
 892        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 893        if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
 894                            notify.addr >> PAGE_SHIFT, PAGE_SIZE,
 895                            vma->vm_page_prot))
 896                return VM_FAULT_SIGBUS;
 897
 898        return VM_FAULT_NOPAGE;
 899}
 900
 901static const struct vm_operations_struct vhost_vdpa_vm_ops = {
 902        .fault = vhost_vdpa_fault,
 903};
 904
 905static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
 906{
 907        struct vhost_vdpa *v = vma->vm_file->private_data;
 908        struct vdpa_device *vdpa = v->vdpa;
 909        const struct vdpa_config_ops *ops = vdpa->config;
 910        struct vdpa_notification_area notify;
 911        unsigned long index = vma->vm_pgoff;
 912
 913        if (vma->vm_end - vma->vm_start != PAGE_SIZE)
 914                return -EINVAL;
 915        if ((vma->vm_flags & VM_SHARED) == 0)
 916                return -EINVAL;
 917        if (vma->vm_flags & VM_READ)
 918                return -EINVAL;
 919        if (index > 65535)
 920                return -EINVAL;
 921        if (!ops->get_vq_notification)
 922                return -ENOTSUPP;
 923
 924        /* To be safe and easily modelled by userspace, We only
 925         * support the doorbell which sits on the page boundary and
 926         * does not share the page with other registers.
 927         */
 928        notify = ops->get_vq_notification(vdpa, index);
 929        if (notify.addr & (PAGE_SIZE - 1))
 930                return -EINVAL;
 931        if (vma->vm_end - vma->vm_start != notify.size)
 932                return -ENOTSUPP;
 933
 934        vma->vm_ops = &vhost_vdpa_vm_ops;
 935        return 0;
 936}
 937#endif /* CONFIG_MMU */
 938
 939static const struct file_operations vhost_vdpa_fops = {
 940        .owner          = THIS_MODULE,
 941        .open           = vhost_vdpa_open,
 942        .release        = vhost_vdpa_release,
 943        .write_iter     = vhost_vdpa_chr_write_iter,
 944        .unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
 945#ifdef CONFIG_MMU
 946        .mmap           = vhost_vdpa_mmap,
 947#endif /* CONFIG_MMU */
 948        .compat_ioctl   = compat_ptr_ioctl,
 949};
 950
 951static void vhost_vdpa_release_dev(struct device *device)
 952{
 953        struct vhost_vdpa *v =
 954               container_of(device, struct vhost_vdpa, dev);
 955
 956        ida_simple_remove(&vhost_vdpa_ida, v->minor);
 957        kfree(v->vqs);
 958        kfree(v);
 959}
 960
 961static int vhost_vdpa_probe(struct vdpa_device *vdpa)
 962{
 963        const struct vdpa_config_ops *ops = vdpa->config;
 964        struct vhost_vdpa *v;
 965        int minor;
 966        int r;
 967
 968        /* Currently, we only accept the network devices. */
 969        if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
 970                return -ENOTSUPP;
 971
 972        v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 973        if (!v)
 974                return -ENOMEM;
 975
 976        minor = ida_simple_get(&vhost_vdpa_ida, 0,
 977                               VHOST_VDPA_DEV_MAX, GFP_KERNEL);
 978        if (minor < 0) {
 979                kfree(v);
 980                return minor;
 981        }
 982
 983        atomic_set(&v->opened, 0);
 984        v->minor = minor;
 985        v->vdpa = vdpa;
 986        v->nvqs = vdpa->nvqs;
 987        v->virtio_id = ops->get_device_id(vdpa);
 988
 989        device_initialize(&v->dev);
 990        v->dev.release = vhost_vdpa_release_dev;
 991        v->dev.parent = &vdpa->dev;
 992        v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor);
 993        v->vqs = kmalloc_array(v->nvqs, sizeof(struct vhost_virtqueue),
 994                               GFP_KERNEL);
 995        if (!v->vqs) {
 996                r = -ENOMEM;
 997                goto err;
 998        }
 999
1000        r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor);
1001        if (r)
1002                goto err;
1003
1004        cdev_init(&v->cdev, &vhost_vdpa_fops);
1005        v->cdev.owner = THIS_MODULE;
1006
1007        r = cdev_device_add(&v->cdev, &v->dev);
1008        if (r)
1009                goto err;
1010
1011        init_completion(&v->completion);
1012        vdpa_set_drvdata(vdpa, v);
1013
1014        return 0;
1015
1016err:
1017        put_device(&v->dev);
1018        return r;
1019}
1020
1021static void vhost_vdpa_remove(struct vdpa_device *vdpa)
1022{
1023        struct vhost_vdpa *v = vdpa_get_drvdata(vdpa);
1024        int opened;
1025
1026        cdev_device_del(&v->cdev, &v->dev);
1027
1028        do {
1029                opened = atomic_cmpxchg(&v->opened, 0, 1);
1030                if (!opened)
1031                        break;
1032                wait_for_completion(&v->completion);
1033        } while (1);
1034
1035        put_device(&v->dev);
1036}
1037
1038static struct vdpa_driver vhost_vdpa_driver = {
1039        .driver = {
1040                .name   = "vhost_vdpa",
1041        },
1042        .probe  = vhost_vdpa_probe,
1043        .remove = vhost_vdpa_remove,
1044};
1045
1046static int __init vhost_vdpa_init(void)
1047{
1048        int r;
1049
1050        r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX,
1051                                "vhost-vdpa");
1052        if (r)
1053                goto err_alloc_chrdev;
1054
1055        r = vdpa_register_driver(&vhost_vdpa_driver);
1056        if (r)
1057                goto err_vdpa_register_driver;
1058
1059        return 0;
1060
1061err_vdpa_register_driver:
1062        unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1063err_alloc_chrdev:
1064        return r;
1065}
1066module_init(vhost_vdpa_init);
1067
1068static void __exit vhost_vdpa_exit(void)
1069{
1070        vdpa_unregister_driver(&vhost_vdpa_driver);
1071        unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX);
1072}
1073module_exit(vhost_vdpa_exit);
1074
1075MODULE_VERSION("0.0.1");
1076MODULE_LICENSE("GPL v2");
1077MODULE_AUTHOR("Intel Corporation");
1078MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");
1079