linux/drivers/vdpa/vdpa_user/vduse_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VDUSE: vDPA Device in Userspace
   4 *
   5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
   6 *
   7 * Author: Xie Yongji <xieyongji@bytedance.com>
   8 *
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/cdev.h>
  14#include <linux/device.h>
  15#include <linux/eventfd.h>
  16#include <linux/slab.h>
  17#include <linux/wait.h>
  18#include <linux/dma-map-ops.h>
  19#include <linux/poll.h>
  20#include <linux/file.h>
  21#include <linux/uio.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <uapi/linux/vduse.h>
  25#include <uapi/linux/vdpa.h>
  26#include <uapi/linux/virtio_config.h>
  27#include <uapi/linux/virtio_ids.h>
  28#include <uapi/linux/virtio_blk.h>
  29#include <linux/mod_devicetable.h>
  30
  31#include "iova_domain.h"
  32
  33#define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
  34#define DRV_DESC     "vDPA Device in Userspace"
  35#define DRV_LICENSE  "GPL v2"
  36
  37#define VDUSE_DEV_MAX (1U << MINORBITS)
  38#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  39#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
  40#define VDUSE_MSG_DEFAULT_TIMEOUT 30
  41
  42struct vduse_virtqueue {
  43        u16 index;
  44        u16 num_max;
  45        u32 num;
  46        u64 desc_addr;
  47        u64 driver_addr;
  48        u64 device_addr;
  49        struct vdpa_vq_state state;
  50        bool ready;
  51        bool kicked;
  52        spinlock_t kick_lock;
  53        spinlock_t irq_lock;
  54        struct eventfd_ctx *kickfd;
  55        struct vdpa_callback cb;
  56        struct work_struct inject;
  57        struct work_struct kick;
  58};
  59
  60struct vduse_dev;
  61
  62struct vduse_vdpa {
  63        struct vdpa_device vdpa;
  64        struct vduse_dev *dev;
  65};
  66
  67struct vduse_dev {
  68        struct vduse_vdpa *vdev;
  69        struct device *dev;
  70        struct vduse_virtqueue *vqs;
  71        struct vduse_iova_domain *domain;
  72        char *name;
  73        struct mutex lock;
  74        spinlock_t msg_lock;
  75        u64 msg_unique;
  76        u32 msg_timeout;
  77        wait_queue_head_t waitq;
  78        struct list_head send_list;
  79        struct list_head recv_list;
  80        struct vdpa_callback config_cb;
  81        struct work_struct inject;
  82        spinlock_t irq_lock;
  83        struct rw_semaphore rwsem;
  84        int minor;
  85        bool broken;
  86        bool connected;
  87        u64 api_version;
  88        u64 device_features;
  89        u64 driver_features;
  90        u32 device_id;
  91        u32 vendor_id;
  92        u32 generation;
  93        u32 config_size;
  94        void *config;
  95        u8 status;
  96        u32 vq_num;
  97        u32 vq_align;
  98};
  99
 100struct vduse_dev_msg {
 101        struct vduse_dev_request req;
 102        struct vduse_dev_response resp;
 103        struct list_head list;
 104        wait_queue_head_t waitq;
 105        bool completed;
 106};
 107
 108struct vduse_control {
 109        u64 api_version;
 110};
 111
 112static DEFINE_MUTEX(vduse_lock);
 113static DEFINE_IDR(vduse_idr);
 114
 115static dev_t vduse_major;
 116static struct class *vduse_class;
 117static struct cdev vduse_ctrl_cdev;
 118static struct cdev vduse_cdev;
 119static struct workqueue_struct *vduse_irq_wq;
 120
 121static u32 allowed_device_id[] = {
 122        VIRTIO_ID_BLOCK,
 123};
 124
 125static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
 126{
 127        struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
 128
 129        return vdev->dev;
 130}
 131
 132static inline struct vduse_dev *dev_to_vduse(struct device *dev)
 133{
 134        struct vdpa_device *vdpa = dev_to_vdpa(dev);
 135
 136        return vdpa_to_vduse(vdpa);
 137}
 138
 139static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
 140                                            uint32_t request_id)
 141{
 142        struct vduse_dev_msg *msg;
 143
 144        list_for_each_entry(msg, head, list) {
 145                if (msg->req.request_id == request_id) {
 146                        list_del(&msg->list);
 147                        return msg;
 148                }
 149        }
 150
 151        return NULL;
 152}
 153
 154static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
 155{
 156        struct vduse_dev_msg *msg = NULL;
 157
 158        if (!list_empty(head)) {
 159                msg = list_first_entry(head, struct vduse_dev_msg, list);
 160                list_del(&msg->list);
 161        }
 162
 163        return msg;
 164}
 165
 166static void vduse_enqueue_msg(struct list_head *head,
 167                              struct vduse_dev_msg *msg)
 168{
 169        list_add_tail(&msg->list, head);
 170}
 171
 172static void vduse_dev_broken(struct vduse_dev *dev)
 173{
 174        struct vduse_dev_msg *msg, *tmp;
 175
 176        if (unlikely(dev->broken))
 177                return;
 178
 179        list_splice_init(&dev->recv_list, &dev->send_list);
 180        list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
 181                list_del(&msg->list);
 182                msg->completed = 1;
 183                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 184                wake_up(&msg->waitq);
 185        }
 186        dev->broken = true;
 187        wake_up(&dev->waitq);
 188}
 189
 190static int vduse_dev_msg_sync(struct vduse_dev *dev,
 191                              struct vduse_dev_msg *msg)
 192{
 193        int ret;
 194
 195        if (unlikely(dev->broken))
 196                return -EIO;
 197
 198        init_waitqueue_head(&msg->waitq);
 199        spin_lock(&dev->msg_lock);
 200        if (unlikely(dev->broken)) {
 201                spin_unlock(&dev->msg_lock);
 202                return -EIO;
 203        }
 204        msg->req.request_id = dev->msg_unique++;
 205        vduse_enqueue_msg(&dev->send_list, msg);
 206        wake_up(&dev->waitq);
 207        spin_unlock(&dev->msg_lock);
 208        if (dev->msg_timeout)
 209                ret = wait_event_killable_timeout(msg->waitq, msg->completed,
 210                                                  (long)dev->msg_timeout * HZ);
 211        else
 212                ret = wait_event_killable(msg->waitq, msg->completed);
 213
 214        spin_lock(&dev->msg_lock);
 215        if (!msg->completed) {
 216                list_del(&msg->list);
 217                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 218                /* Mark the device as malfunction when there is a timeout */
 219                if (!ret)
 220                        vduse_dev_broken(dev);
 221        }
 222        ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
 223        spin_unlock(&dev->msg_lock);
 224
 225        return ret;
 226}
 227
 228static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
 229                                         struct vduse_virtqueue *vq,
 230                                         struct vdpa_vq_state_packed *packed)
 231{
 232        struct vduse_dev_msg msg = { 0 };
 233        int ret;
 234
 235        msg.req.type = VDUSE_GET_VQ_STATE;
 236        msg.req.vq_state.index = vq->index;
 237
 238        ret = vduse_dev_msg_sync(dev, &msg);
 239        if (ret)
 240                return ret;
 241
 242        packed->last_avail_counter =
 243                        msg.resp.vq_state.packed.last_avail_counter & 0x0001;
 244        packed->last_avail_idx =
 245                        msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
 246        packed->last_used_counter =
 247                        msg.resp.vq_state.packed.last_used_counter & 0x0001;
 248        packed->last_used_idx =
 249                        msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
 250
 251        return 0;
 252}
 253
 254static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
 255                                        struct vduse_virtqueue *vq,
 256                                        struct vdpa_vq_state_split *split)
 257{
 258        struct vduse_dev_msg msg = { 0 };
 259        int ret;
 260
 261        msg.req.type = VDUSE_GET_VQ_STATE;
 262        msg.req.vq_state.index = vq->index;
 263
 264        ret = vduse_dev_msg_sync(dev, &msg);
 265        if (ret)
 266                return ret;
 267
 268        split->avail_index = msg.resp.vq_state.split.avail_index;
 269
 270        return 0;
 271}
 272
 273static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
 274{
 275        struct vduse_dev_msg msg = { 0 };
 276
 277        msg.req.type = VDUSE_SET_STATUS;
 278        msg.req.s.status = status;
 279
 280        return vduse_dev_msg_sync(dev, &msg);
 281}
 282
 283static int vduse_dev_update_iotlb(struct vduse_dev *dev,
 284                                  u64 start, u64 last)
 285{
 286        struct vduse_dev_msg msg = { 0 };
 287
 288        if (last < start)
 289                return -EINVAL;
 290
 291        msg.req.type = VDUSE_UPDATE_IOTLB;
 292        msg.req.iova.start = start;
 293        msg.req.iova.last = last;
 294
 295        return vduse_dev_msg_sync(dev, &msg);
 296}
 297
 298static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 299{
 300        struct file *file = iocb->ki_filp;
 301        struct vduse_dev *dev = file->private_data;
 302        struct vduse_dev_msg *msg;
 303        int size = sizeof(struct vduse_dev_request);
 304        ssize_t ret;
 305
 306        if (iov_iter_count(to) < size)
 307                return -EINVAL;
 308
 309        spin_lock(&dev->msg_lock);
 310        while (1) {
 311                msg = vduse_dequeue_msg(&dev->send_list);
 312                if (msg)
 313                        break;
 314
 315                ret = -EAGAIN;
 316                if (file->f_flags & O_NONBLOCK)
 317                        goto unlock;
 318
 319                spin_unlock(&dev->msg_lock);
 320                ret = wait_event_interruptible_exclusive(dev->waitq,
 321                                        !list_empty(&dev->send_list));
 322                if (ret)
 323                        return ret;
 324
 325                spin_lock(&dev->msg_lock);
 326        }
 327        spin_unlock(&dev->msg_lock);
 328        ret = copy_to_iter(&msg->req, size, to);
 329        spin_lock(&dev->msg_lock);
 330        if (ret != size) {
 331                ret = -EFAULT;
 332                vduse_enqueue_msg(&dev->send_list, msg);
 333                goto unlock;
 334        }
 335        vduse_enqueue_msg(&dev->recv_list, msg);
 336unlock:
 337        spin_unlock(&dev->msg_lock);
 338
 339        return ret;
 340}
 341
 342static bool is_mem_zero(const char *ptr, int size)
 343{
 344        int i;
 345
 346        for (i = 0; i < size; i++) {
 347                if (ptr[i])
 348                        return false;
 349        }
 350        return true;
 351}
 352
 353static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 354{
 355        struct file *file = iocb->ki_filp;
 356        struct vduse_dev *dev = file->private_data;
 357        struct vduse_dev_response resp;
 358        struct vduse_dev_msg *msg;
 359        size_t ret;
 360
 361        ret = copy_from_iter(&resp, sizeof(resp), from);
 362        if (ret != sizeof(resp))
 363                return -EINVAL;
 364
 365        if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
 366                return -EINVAL;
 367
 368        spin_lock(&dev->msg_lock);
 369        msg = vduse_find_msg(&dev->recv_list, resp.request_id);
 370        if (!msg) {
 371                ret = -ENOENT;
 372                goto unlock;
 373        }
 374
 375        memcpy(&msg->resp, &resp, sizeof(resp));
 376        msg->completed = 1;
 377        wake_up(&msg->waitq);
 378unlock:
 379        spin_unlock(&dev->msg_lock);
 380
 381        return ret;
 382}
 383
 384static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
 385{
 386        struct vduse_dev *dev = file->private_data;
 387        __poll_t mask = 0;
 388
 389        poll_wait(file, &dev->waitq, wait);
 390
 391        spin_lock(&dev->msg_lock);
 392
 393        if (unlikely(dev->broken))
 394                mask |= EPOLLERR;
 395        if (!list_empty(&dev->send_list))
 396                mask |= EPOLLIN | EPOLLRDNORM;
 397        if (!list_empty(&dev->recv_list))
 398                mask |= EPOLLOUT | EPOLLWRNORM;
 399
 400        spin_unlock(&dev->msg_lock);
 401
 402        return mask;
 403}
 404
 405static void vduse_dev_reset(struct vduse_dev *dev)
 406{
 407        int i;
 408        struct vduse_iova_domain *domain = dev->domain;
 409
 410        /* The coherent mappings are handled in vduse_dev_free_coherent() */
 411        if (domain->bounce_map)
 412                vduse_domain_reset_bounce_map(domain);
 413
 414        down_write(&dev->rwsem);
 415
 416        dev->status = 0;
 417        dev->driver_features = 0;
 418        dev->generation++;
 419        spin_lock(&dev->irq_lock);
 420        dev->config_cb.callback = NULL;
 421        dev->config_cb.private = NULL;
 422        spin_unlock(&dev->irq_lock);
 423        flush_work(&dev->inject);
 424
 425        for (i = 0; i < dev->vq_num; i++) {
 426                struct vduse_virtqueue *vq = &dev->vqs[i];
 427
 428                vq->ready = false;
 429                vq->desc_addr = 0;
 430                vq->driver_addr = 0;
 431                vq->device_addr = 0;
 432                vq->num = 0;
 433                memset(&vq->state, 0, sizeof(vq->state));
 434
 435                spin_lock(&vq->kick_lock);
 436                vq->kicked = false;
 437                if (vq->kickfd)
 438                        eventfd_ctx_put(vq->kickfd);
 439                vq->kickfd = NULL;
 440                spin_unlock(&vq->kick_lock);
 441
 442                spin_lock(&vq->irq_lock);
 443                vq->cb.callback = NULL;
 444                vq->cb.private = NULL;
 445                spin_unlock(&vq->irq_lock);
 446                flush_work(&vq->inject);
 447                flush_work(&vq->kick);
 448        }
 449
 450        up_write(&dev->rwsem);
 451}
 452
 453static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
 454                                u64 desc_area, u64 driver_area,
 455                                u64 device_area)
 456{
 457        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 458        struct vduse_virtqueue *vq = &dev->vqs[idx];
 459
 460        vq->desc_addr = desc_area;
 461        vq->driver_addr = driver_area;
 462        vq->device_addr = device_area;
 463
 464        return 0;
 465}
 466
 467static void vduse_vq_kick(struct vduse_virtqueue *vq)
 468{
 469        spin_lock(&vq->kick_lock);
 470        if (!vq->ready)
 471                goto unlock;
 472
 473        if (vq->kickfd)
 474                eventfd_signal(vq->kickfd, 1);
 475        else
 476                vq->kicked = true;
 477unlock:
 478        spin_unlock(&vq->kick_lock);
 479}
 480
 481static void vduse_vq_kick_work(struct work_struct *work)
 482{
 483        struct vduse_virtqueue *vq = container_of(work,
 484                                        struct vduse_virtqueue, kick);
 485
 486        vduse_vq_kick(vq);
 487}
 488
 489static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
 490{
 491        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 492        struct vduse_virtqueue *vq = &dev->vqs[idx];
 493
 494        if (!eventfd_signal_allowed()) {
 495                schedule_work(&vq->kick);
 496                return;
 497        }
 498        vduse_vq_kick(vq);
 499}
 500
 501static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
 502                              struct vdpa_callback *cb)
 503{
 504        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 505        struct vduse_virtqueue *vq = &dev->vqs[idx];
 506
 507        spin_lock(&vq->irq_lock);
 508        vq->cb.callback = cb->callback;
 509        vq->cb.private = cb->private;
 510        spin_unlock(&vq->irq_lock);
 511}
 512
 513static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
 514{
 515        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 516        struct vduse_virtqueue *vq = &dev->vqs[idx];
 517
 518        vq->num = num;
 519}
 520
 521static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
 522                                        u16 idx, bool ready)
 523{
 524        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 525        struct vduse_virtqueue *vq = &dev->vqs[idx];
 526
 527        vq->ready = ready;
 528}
 529
 530static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
 531{
 532        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 533        struct vduse_virtqueue *vq = &dev->vqs[idx];
 534
 535        return vq->ready;
 536}
 537
 538static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
 539                                const struct vdpa_vq_state *state)
 540{
 541        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 542        struct vduse_virtqueue *vq = &dev->vqs[idx];
 543
 544        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
 545                vq->state.packed.last_avail_counter =
 546                                state->packed.last_avail_counter;
 547                vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
 548                vq->state.packed.last_used_counter =
 549                                state->packed.last_used_counter;
 550                vq->state.packed.last_used_idx = state->packed.last_used_idx;
 551        } else
 552                vq->state.split.avail_index = state->split.avail_index;
 553
 554        return 0;
 555}
 556
 557static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
 558                                struct vdpa_vq_state *state)
 559{
 560        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 561        struct vduse_virtqueue *vq = &dev->vqs[idx];
 562
 563        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
 564                return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
 565
 566        return vduse_dev_get_vq_state_split(dev, vq, &state->split);
 567}
 568
 569static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
 570{
 571        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 572
 573        return dev->vq_align;
 574}
 575
 576static u64 vduse_vdpa_get_device_features(struct vdpa_device *vdpa)
 577{
 578        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 579
 580        return dev->device_features;
 581}
 582
 583static int vduse_vdpa_set_driver_features(struct vdpa_device *vdpa, u64 features)
 584{
 585        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 586
 587        dev->driver_features = features;
 588        return 0;
 589}
 590
 591static u64 vduse_vdpa_get_driver_features(struct vdpa_device *vdpa)
 592{
 593        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 594
 595        return dev->driver_features;
 596}
 597
 598static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
 599                                  struct vdpa_callback *cb)
 600{
 601        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 602
 603        spin_lock(&dev->irq_lock);
 604        dev->config_cb.callback = cb->callback;
 605        dev->config_cb.private = cb->private;
 606        spin_unlock(&dev->irq_lock);
 607}
 608
 609static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
 610{
 611        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 612        u16 num_max = 0;
 613        int i;
 614
 615        for (i = 0; i < dev->vq_num; i++)
 616                if (num_max < dev->vqs[i].num_max)
 617                        num_max = dev->vqs[i].num_max;
 618
 619        return num_max;
 620}
 621
 622static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
 623{
 624        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 625
 626        return dev->device_id;
 627}
 628
 629static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
 630{
 631        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 632
 633        return dev->vendor_id;
 634}
 635
 636static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
 637{
 638        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 639
 640        return dev->status;
 641}
 642
 643static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
 644{
 645        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 646
 647        if (vduse_dev_set_status(dev, status))
 648                return;
 649
 650        dev->status = status;
 651}
 652
 653static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
 654{
 655        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 656
 657        return dev->config_size;
 658}
 659
 660static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
 661                                  void *buf, unsigned int len)
 662{
 663        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 664
 665        if (offset > dev->config_size ||
 666            len > dev->config_size - offset)
 667                return;
 668
 669        memcpy(buf, dev->config + offset, len);
 670}
 671
 672static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
 673                        const void *buf, unsigned int len)
 674{
 675        /* Now we only support read-only configuration space */
 676}
 677
 678static int vduse_vdpa_reset(struct vdpa_device *vdpa)
 679{
 680        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 681        int ret = vduse_dev_set_status(dev, 0);
 682
 683        vduse_dev_reset(dev);
 684
 685        return ret;
 686}
 687
 688static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
 689{
 690        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 691
 692        return dev->generation;
 693}
 694
 695static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 696                                unsigned int asid,
 697                                struct vhost_iotlb *iotlb)
 698{
 699        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 700        int ret;
 701
 702        ret = vduse_domain_set_map(dev->domain, iotlb);
 703        if (ret)
 704                return ret;
 705
 706        ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
 707        if (ret) {
 708                vduse_domain_clear_map(dev->domain, iotlb);
 709                return ret;
 710        }
 711
 712        return 0;
 713}
 714
 715static void vduse_vdpa_free(struct vdpa_device *vdpa)
 716{
 717        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 718
 719        dev->vdev = NULL;
 720}
 721
 722static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 723        .set_vq_address         = vduse_vdpa_set_vq_address,
 724        .kick_vq                = vduse_vdpa_kick_vq,
 725        .set_vq_cb              = vduse_vdpa_set_vq_cb,
 726        .set_vq_num             = vduse_vdpa_set_vq_num,
 727        .set_vq_ready           = vduse_vdpa_set_vq_ready,
 728        .get_vq_ready           = vduse_vdpa_get_vq_ready,
 729        .set_vq_state           = vduse_vdpa_set_vq_state,
 730        .get_vq_state           = vduse_vdpa_get_vq_state,
 731        .get_vq_align           = vduse_vdpa_get_vq_align,
 732        .get_device_features    = vduse_vdpa_get_device_features,
 733        .set_driver_features    = vduse_vdpa_set_driver_features,
 734        .get_driver_features    = vduse_vdpa_get_driver_features,
 735        .set_config_cb          = vduse_vdpa_set_config_cb,
 736        .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
 737        .get_device_id          = vduse_vdpa_get_device_id,
 738        .get_vendor_id          = vduse_vdpa_get_vendor_id,
 739        .get_status             = vduse_vdpa_get_status,
 740        .set_status             = vduse_vdpa_set_status,
 741        .get_config_size        = vduse_vdpa_get_config_size,
 742        .get_config             = vduse_vdpa_get_config,
 743        .set_config             = vduse_vdpa_set_config,
 744        .get_generation         = vduse_vdpa_get_generation,
 745        .reset                  = vduse_vdpa_reset,
 746        .set_map                = vduse_vdpa_set_map,
 747        .free                   = vduse_vdpa_free,
 748};
 749
 750static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
 751                                     unsigned long offset, size_t size,
 752                                     enum dma_data_direction dir,
 753                                     unsigned long attrs)
 754{
 755        struct vduse_dev *vdev = dev_to_vduse(dev);
 756        struct vduse_iova_domain *domain = vdev->domain;
 757
 758        return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
 759}
 760
 761static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
 762                                size_t size, enum dma_data_direction dir,
 763                                unsigned long attrs)
 764{
 765        struct vduse_dev *vdev = dev_to_vduse(dev);
 766        struct vduse_iova_domain *domain = vdev->domain;
 767
 768        return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
 769}
 770
 771static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
 772                                        dma_addr_t *dma_addr, gfp_t flag,
 773                                        unsigned long attrs)
 774{
 775        struct vduse_dev *vdev = dev_to_vduse(dev);
 776        struct vduse_iova_domain *domain = vdev->domain;
 777        unsigned long iova;
 778        void *addr;
 779
 780        *dma_addr = DMA_MAPPING_ERROR;
 781        addr = vduse_domain_alloc_coherent(domain, size,
 782                                (dma_addr_t *)&iova, flag, attrs);
 783        if (!addr)
 784                return NULL;
 785
 786        *dma_addr = (dma_addr_t)iova;
 787
 788        return addr;
 789}
 790
 791static void vduse_dev_free_coherent(struct device *dev, size_t size,
 792                                        void *vaddr, dma_addr_t dma_addr,
 793                                        unsigned long attrs)
 794{
 795        struct vduse_dev *vdev = dev_to_vduse(dev);
 796        struct vduse_iova_domain *domain = vdev->domain;
 797
 798        vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
 799}
 800
 801static size_t vduse_dev_max_mapping_size(struct device *dev)
 802{
 803        struct vduse_dev *vdev = dev_to_vduse(dev);
 804        struct vduse_iova_domain *domain = vdev->domain;
 805
 806        return domain->bounce_size;
 807}
 808
 809static const struct dma_map_ops vduse_dev_dma_ops = {
 810        .map_page = vduse_dev_map_page,
 811        .unmap_page = vduse_dev_unmap_page,
 812        .alloc = vduse_dev_alloc_coherent,
 813        .free = vduse_dev_free_coherent,
 814        .max_mapping_size = vduse_dev_max_mapping_size,
 815};
 816
 817static unsigned int perm_to_file_flags(u8 perm)
 818{
 819        unsigned int flags = 0;
 820
 821        switch (perm) {
 822        case VDUSE_ACCESS_WO:
 823                flags |= O_WRONLY;
 824                break;
 825        case VDUSE_ACCESS_RO:
 826                flags |= O_RDONLY;
 827                break;
 828        case VDUSE_ACCESS_RW:
 829                flags |= O_RDWR;
 830                break;
 831        default:
 832                WARN(1, "invalidate vhost IOTLB permission\n");
 833                break;
 834        }
 835
 836        return flags;
 837}
 838
 839static int vduse_kickfd_setup(struct vduse_dev *dev,
 840                        struct vduse_vq_eventfd *eventfd)
 841{
 842        struct eventfd_ctx *ctx = NULL;
 843        struct vduse_virtqueue *vq;
 844        u32 index;
 845
 846        if (eventfd->index >= dev->vq_num)
 847                return -EINVAL;
 848
 849        index = array_index_nospec(eventfd->index, dev->vq_num);
 850        vq = &dev->vqs[index];
 851        if (eventfd->fd >= 0) {
 852                ctx = eventfd_ctx_fdget(eventfd->fd);
 853                if (IS_ERR(ctx))
 854                        return PTR_ERR(ctx);
 855        } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
 856                return 0;
 857
 858        spin_lock(&vq->kick_lock);
 859        if (vq->kickfd)
 860                eventfd_ctx_put(vq->kickfd);
 861        vq->kickfd = ctx;
 862        if (vq->ready && vq->kicked && vq->kickfd) {
 863                eventfd_signal(vq->kickfd, 1);
 864                vq->kicked = false;
 865        }
 866        spin_unlock(&vq->kick_lock);
 867
 868        return 0;
 869}
 870
 871static bool vduse_dev_is_ready(struct vduse_dev *dev)
 872{
 873        int i;
 874
 875        for (i = 0; i < dev->vq_num; i++)
 876                if (!dev->vqs[i].num_max)
 877                        return false;
 878
 879        return true;
 880}
 881
 882static void vduse_dev_irq_inject(struct work_struct *work)
 883{
 884        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 885
 886        spin_lock_irq(&dev->irq_lock);
 887        if (dev->config_cb.callback)
 888                dev->config_cb.callback(dev->config_cb.private);
 889        spin_unlock_irq(&dev->irq_lock);
 890}
 891
 892static void vduse_vq_irq_inject(struct work_struct *work)
 893{
 894        struct vduse_virtqueue *vq = container_of(work,
 895                                        struct vduse_virtqueue, inject);
 896
 897        spin_lock_irq(&vq->irq_lock);
 898        if (vq->ready && vq->cb.callback)
 899                vq->cb.callback(vq->cb.private);
 900        spin_unlock_irq(&vq->irq_lock);
 901}
 902
 903static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 904                                    struct work_struct *irq_work)
 905{
 906        int ret = -EINVAL;
 907
 908        down_read(&dev->rwsem);
 909        if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 910                goto unlock;
 911
 912        ret = 0;
 913        queue_work(vduse_irq_wq, irq_work);
 914unlock:
 915        up_read(&dev->rwsem);
 916
 917        return ret;
 918}
 919
 920static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 921                            unsigned long arg)
 922{
 923        struct vduse_dev *dev = file->private_data;
 924        void __user *argp = (void __user *)arg;
 925        int ret;
 926
 927        if (unlikely(dev->broken))
 928                return -EPERM;
 929
 930        switch (cmd) {
 931        case VDUSE_IOTLB_GET_FD: {
 932                struct vduse_iotlb_entry entry;
 933                struct vhost_iotlb_map *map;
 934                struct vdpa_map_file *map_file;
 935                struct vduse_iova_domain *domain = dev->domain;
 936                struct file *f = NULL;
 937
 938                ret = -EFAULT;
 939                if (copy_from_user(&entry, argp, sizeof(entry)))
 940                        break;
 941
 942                ret = -EINVAL;
 943                if (entry.start > entry.last)
 944                        break;
 945
 946                spin_lock(&domain->iotlb_lock);
 947                map = vhost_iotlb_itree_first(domain->iotlb,
 948                                              entry.start, entry.last);
 949                if (map) {
 950                        map_file = (struct vdpa_map_file *)map->opaque;
 951                        f = get_file(map_file->file);
 952                        entry.offset = map_file->offset;
 953                        entry.start = map->start;
 954                        entry.last = map->last;
 955                        entry.perm = map->perm;
 956                }
 957                spin_unlock(&domain->iotlb_lock);
 958                ret = -EINVAL;
 959                if (!f)
 960                        break;
 961
 962                ret = -EFAULT;
 963                if (copy_to_user(argp, &entry, sizeof(entry))) {
 964                        fput(f);
 965                        break;
 966                }
 967                ret = receive_fd(f, perm_to_file_flags(entry.perm));
 968                fput(f);
 969                break;
 970        }
 971        case VDUSE_DEV_GET_FEATURES:
 972                /*
 973                 * Just mirror what driver wrote here.
 974                 * The driver is expected to check FEATURE_OK later.
 975                 */
 976                ret = put_user(dev->driver_features, (u64 __user *)argp);
 977                break;
 978        case VDUSE_DEV_SET_CONFIG: {
 979                struct vduse_config_data config;
 980                unsigned long size = offsetof(struct vduse_config_data,
 981                                              buffer);
 982
 983                ret = -EFAULT;
 984                if (copy_from_user(&config, argp, size))
 985                        break;
 986
 987                ret = -EINVAL;
 988                if (config.offset > dev->config_size ||
 989                    config.length == 0 ||
 990                    config.length > dev->config_size - config.offset)
 991                        break;
 992
 993                ret = -EFAULT;
 994                if (copy_from_user(dev->config + config.offset, argp + size,
 995                                   config.length))
 996                        break;
 997
 998                ret = 0;
 999                break;
1000        }
1001        case VDUSE_DEV_INJECT_CONFIG_IRQ:
1002                ret = vduse_dev_queue_irq_work(dev, &dev->inject);
1003                break;
1004        case VDUSE_VQ_SETUP: {
1005                struct vduse_vq_config config;
1006                u32 index;
1007
1008                ret = -EFAULT;
1009                if (copy_from_user(&config, argp, sizeof(config)))
1010                        break;
1011
1012                ret = -EINVAL;
1013                if (config.index >= dev->vq_num)
1014                        break;
1015
1016                if (!is_mem_zero((const char *)config.reserved,
1017                                 sizeof(config.reserved)))
1018                        break;
1019
1020                index = array_index_nospec(config.index, dev->vq_num);
1021                dev->vqs[index].num_max = config.max_size;
1022                ret = 0;
1023                break;
1024        }
1025        case VDUSE_VQ_GET_INFO: {
1026                struct vduse_vq_info vq_info;
1027                struct vduse_virtqueue *vq;
1028                u32 index;
1029
1030                ret = -EFAULT;
1031                if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1032                        break;
1033
1034                ret = -EINVAL;
1035                if (vq_info.index >= dev->vq_num)
1036                        break;
1037
1038                index = array_index_nospec(vq_info.index, dev->vq_num);
1039                vq = &dev->vqs[index];
1040                vq_info.desc_addr = vq->desc_addr;
1041                vq_info.driver_addr = vq->driver_addr;
1042                vq_info.device_addr = vq->device_addr;
1043                vq_info.num = vq->num;
1044
1045                if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1046                        vq_info.packed.last_avail_counter =
1047                                vq->state.packed.last_avail_counter;
1048                        vq_info.packed.last_avail_idx =
1049                                vq->state.packed.last_avail_idx;
1050                        vq_info.packed.last_used_counter =
1051                                vq->state.packed.last_used_counter;
1052                        vq_info.packed.last_used_idx =
1053                                vq->state.packed.last_used_idx;
1054                } else
1055                        vq_info.split.avail_index =
1056                                vq->state.split.avail_index;
1057
1058                vq_info.ready = vq->ready;
1059
1060                ret = -EFAULT;
1061                if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1062                        break;
1063
1064                ret = 0;
1065                break;
1066        }
1067        case VDUSE_VQ_SETUP_KICKFD: {
1068                struct vduse_vq_eventfd eventfd;
1069
1070                ret = -EFAULT;
1071                if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1072                        break;
1073
1074                ret = vduse_kickfd_setup(dev, &eventfd);
1075                break;
1076        }
1077        case VDUSE_VQ_INJECT_IRQ: {
1078                u32 index;
1079
1080                ret = -EFAULT;
1081                if (get_user(index, (u32 __user *)argp))
1082                        break;
1083
1084                ret = -EINVAL;
1085                if (index >= dev->vq_num)
1086                        break;
1087
1088                index = array_index_nospec(index, dev->vq_num);
1089                ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1090                break;
1091        }
1092        default:
1093                ret = -ENOIOCTLCMD;
1094                break;
1095        }
1096
1097        return ret;
1098}
1099
1100static int vduse_dev_release(struct inode *inode, struct file *file)
1101{
1102        struct vduse_dev *dev = file->private_data;
1103
1104        spin_lock(&dev->msg_lock);
1105        /* Make sure the inflight messages can processed after reconncection */
1106        list_splice_init(&dev->recv_list, &dev->send_list);
1107        spin_unlock(&dev->msg_lock);
1108        dev->connected = false;
1109
1110        return 0;
1111}
1112
1113static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1114{
1115        struct vduse_dev *dev;
1116
1117        mutex_lock(&vduse_lock);
1118        dev = idr_find(&vduse_idr, minor);
1119        mutex_unlock(&vduse_lock);
1120
1121        return dev;
1122}
1123
1124static int vduse_dev_open(struct inode *inode, struct file *file)
1125{
1126        int ret;
1127        struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1128
1129        if (!dev)
1130                return -ENODEV;
1131
1132        ret = -EBUSY;
1133        mutex_lock(&dev->lock);
1134        if (dev->connected)
1135                goto unlock;
1136
1137        ret = 0;
1138        dev->connected = true;
1139        file->private_data = dev;
1140unlock:
1141        mutex_unlock(&dev->lock);
1142
1143        return ret;
1144}
1145
1146static const struct file_operations vduse_dev_fops = {
1147        .owner          = THIS_MODULE,
1148        .open           = vduse_dev_open,
1149        .release        = vduse_dev_release,
1150        .read_iter      = vduse_dev_read_iter,
1151        .write_iter     = vduse_dev_write_iter,
1152        .poll           = vduse_dev_poll,
1153        .unlocked_ioctl = vduse_dev_ioctl,
1154        .compat_ioctl   = compat_ptr_ioctl,
1155        .llseek         = noop_llseek,
1156};
1157
1158static struct vduse_dev *vduse_dev_create(void)
1159{
1160        struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1161
1162        if (!dev)
1163                return NULL;
1164
1165        mutex_init(&dev->lock);
1166        spin_lock_init(&dev->msg_lock);
1167        INIT_LIST_HEAD(&dev->send_list);
1168        INIT_LIST_HEAD(&dev->recv_list);
1169        spin_lock_init(&dev->irq_lock);
1170        init_rwsem(&dev->rwsem);
1171
1172        INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1173        init_waitqueue_head(&dev->waitq);
1174
1175        return dev;
1176}
1177
1178static void vduse_dev_destroy(struct vduse_dev *dev)
1179{
1180        kfree(dev);
1181}
1182
1183static struct vduse_dev *vduse_find_dev(const char *name)
1184{
1185        struct vduse_dev *dev;
1186        int id;
1187
1188        idr_for_each_entry(&vduse_idr, dev, id)
1189                if (!strcmp(dev->name, name))
1190                        return dev;
1191
1192        return NULL;
1193}
1194
1195static int vduse_destroy_dev(char *name)
1196{
1197        struct vduse_dev *dev = vduse_find_dev(name);
1198
1199        if (!dev)
1200                return -EINVAL;
1201
1202        mutex_lock(&dev->lock);
1203        if (dev->vdev || dev->connected) {
1204                mutex_unlock(&dev->lock);
1205                return -EBUSY;
1206        }
1207        dev->connected = true;
1208        mutex_unlock(&dev->lock);
1209
1210        vduse_dev_reset(dev);
1211        device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1212        idr_remove(&vduse_idr, dev->minor);
1213        kvfree(dev->config);
1214        kfree(dev->vqs);
1215        vduse_domain_destroy(dev->domain);
1216        kfree(dev->name);
1217        vduse_dev_destroy(dev);
1218        module_put(THIS_MODULE);
1219
1220        return 0;
1221}
1222
1223static bool device_is_allowed(u32 device_id)
1224{
1225        int i;
1226
1227        for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1228                if (allowed_device_id[i] == device_id)
1229                        return true;
1230
1231        return false;
1232}
1233
1234static bool features_is_valid(u64 features)
1235{
1236        if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1237                return false;
1238
1239        /* Now we only support read-only configuration space */
1240        if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1241                return false;
1242
1243        return true;
1244}
1245
1246static bool vduse_validate_config(struct vduse_dev_config *config)
1247{
1248        if (!is_mem_zero((const char *)config->reserved,
1249                         sizeof(config->reserved)))
1250                return false;
1251
1252        if (config->vq_align > PAGE_SIZE)
1253                return false;
1254
1255        if (config->config_size > PAGE_SIZE)
1256                return false;
1257
1258        if (!device_is_allowed(config->device_id))
1259                return false;
1260
1261        if (!features_is_valid(config->features))
1262                return false;
1263
1264        return true;
1265}
1266
1267static ssize_t msg_timeout_show(struct device *device,
1268                                struct device_attribute *attr, char *buf)
1269{
1270        struct vduse_dev *dev = dev_get_drvdata(device);
1271
1272        return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1273}
1274
1275static ssize_t msg_timeout_store(struct device *device,
1276                                 struct device_attribute *attr,
1277                                 const char *buf, size_t count)
1278{
1279        struct vduse_dev *dev = dev_get_drvdata(device);
1280        int ret;
1281
1282        ret = kstrtouint(buf, 10, &dev->msg_timeout);
1283        if (ret < 0)
1284                return ret;
1285
1286        return count;
1287}
1288
1289static DEVICE_ATTR_RW(msg_timeout);
1290
1291static struct attribute *vduse_dev_attrs[] = {
1292        &dev_attr_msg_timeout.attr,
1293        NULL
1294};
1295
1296ATTRIBUTE_GROUPS(vduse_dev);
1297
1298static int vduse_create_dev(struct vduse_dev_config *config,
1299                            void *config_buf, u64 api_version)
1300{
1301        int i, ret;
1302        struct vduse_dev *dev;
1303
1304        ret = -EEXIST;
1305        if (vduse_find_dev(config->name))
1306                goto err;
1307
1308        ret = -ENOMEM;
1309        dev = vduse_dev_create();
1310        if (!dev)
1311                goto err;
1312
1313        dev->api_version = api_version;
1314        dev->device_features = config->features;
1315        dev->device_id = config->device_id;
1316        dev->vendor_id = config->vendor_id;
1317        dev->name = kstrdup(config->name, GFP_KERNEL);
1318        if (!dev->name)
1319                goto err_str;
1320
1321        dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1322                                          VDUSE_BOUNCE_SIZE);
1323        if (!dev->domain)
1324                goto err_domain;
1325
1326        dev->config = config_buf;
1327        dev->config_size = config->config_size;
1328        dev->vq_align = config->vq_align;
1329        dev->vq_num = config->vq_num;
1330        dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1331        if (!dev->vqs)
1332                goto err_vqs;
1333
1334        for (i = 0; i < dev->vq_num; i++) {
1335                dev->vqs[i].index = i;
1336                INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1337                INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1338                spin_lock_init(&dev->vqs[i].kick_lock);
1339                spin_lock_init(&dev->vqs[i].irq_lock);
1340        }
1341
1342        ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1343        if (ret < 0)
1344                goto err_idr;
1345
1346        dev->minor = ret;
1347        dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1348        dev->dev = device_create_with_groups(vduse_class, NULL,
1349                                MKDEV(MAJOR(vduse_major), dev->minor),
1350                                dev, vduse_dev_groups, "%s", config->name);
1351        if (IS_ERR(dev->dev)) {
1352                ret = PTR_ERR(dev->dev);
1353                goto err_dev;
1354        }
1355        __module_get(THIS_MODULE);
1356
1357        return 0;
1358err_dev:
1359        idr_remove(&vduse_idr, dev->minor);
1360err_idr:
1361        kfree(dev->vqs);
1362err_vqs:
1363        vduse_domain_destroy(dev->domain);
1364err_domain:
1365        kfree(dev->name);
1366err_str:
1367        vduse_dev_destroy(dev);
1368err:
1369        return ret;
1370}
1371
1372static long vduse_ioctl(struct file *file, unsigned int cmd,
1373                        unsigned long arg)
1374{
1375        int ret;
1376        void __user *argp = (void __user *)arg;
1377        struct vduse_control *control = file->private_data;
1378
1379        mutex_lock(&vduse_lock);
1380        switch (cmd) {
1381        case VDUSE_GET_API_VERSION:
1382                ret = put_user(control->api_version, (u64 __user *)argp);
1383                break;
1384        case VDUSE_SET_API_VERSION: {
1385                u64 api_version;
1386
1387                ret = -EFAULT;
1388                if (get_user(api_version, (u64 __user *)argp))
1389                        break;
1390
1391                ret = -EINVAL;
1392                if (api_version > VDUSE_API_VERSION)
1393                        break;
1394
1395                ret = 0;
1396                control->api_version = api_version;
1397                break;
1398        }
1399        case VDUSE_CREATE_DEV: {
1400                struct vduse_dev_config config;
1401                unsigned long size = offsetof(struct vduse_dev_config, config);
1402                void *buf;
1403
1404                ret = -EFAULT;
1405                if (copy_from_user(&config, argp, size))
1406                        break;
1407
1408                ret = -EINVAL;
1409                if (vduse_validate_config(&config) == false)
1410                        break;
1411
1412                buf = vmemdup_user(argp + size, config.config_size);
1413                if (IS_ERR(buf)) {
1414                        ret = PTR_ERR(buf);
1415                        break;
1416                }
1417                config.name[VDUSE_NAME_MAX - 1] = '\0';
1418                ret = vduse_create_dev(&config, buf, control->api_version);
1419                if (ret)
1420                        kvfree(buf);
1421                break;
1422        }
1423        case VDUSE_DESTROY_DEV: {
1424                char name[VDUSE_NAME_MAX];
1425
1426                ret = -EFAULT;
1427                if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1428                        break;
1429
1430                name[VDUSE_NAME_MAX - 1] = '\0';
1431                ret = vduse_destroy_dev(name);
1432                break;
1433        }
1434        default:
1435                ret = -EINVAL;
1436                break;
1437        }
1438        mutex_unlock(&vduse_lock);
1439
1440        return ret;
1441}
1442
1443static int vduse_release(struct inode *inode, struct file *file)
1444{
1445        struct vduse_control *control = file->private_data;
1446
1447        kfree(control);
1448        return 0;
1449}
1450
1451static int vduse_open(struct inode *inode, struct file *file)
1452{
1453        struct vduse_control *control;
1454
1455        control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1456        if (!control)
1457                return -ENOMEM;
1458
1459        control->api_version = VDUSE_API_VERSION;
1460        file->private_data = control;
1461
1462        return 0;
1463}
1464
1465static const struct file_operations vduse_ctrl_fops = {
1466        .owner          = THIS_MODULE,
1467        .open           = vduse_open,
1468        .release        = vduse_release,
1469        .unlocked_ioctl = vduse_ioctl,
1470        .compat_ioctl   = compat_ptr_ioctl,
1471        .llseek         = noop_llseek,
1472};
1473
1474static char *vduse_devnode(struct device *dev, umode_t *mode)
1475{
1476        return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1477}
1478
1479struct vduse_mgmt_dev {
1480        struct vdpa_mgmt_dev mgmt_dev;
1481        struct device dev;
1482};
1483
1484static struct vduse_mgmt_dev *vduse_mgmt;
1485
1486static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1487{
1488        struct vduse_vdpa *vdev;
1489        int ret;
1490
1491        if (dev->vdev)
1492                return -EEXIST;
1493
1494        vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1495                                 &vduse_vdpa_config_ops, 1, 1, name, true);
1496        if (IS_ERR(vdev))
1497                return PTR_ERR(vdev);
1498
1499        dev->vdev = vdev;
1500        vdev->dev = dev;
1501        vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1502        ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1503        if (ret) {
1504                put_device(&vdev->vdpa.dev);
1505                return ret;
1506        }
1507        set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1508        vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1509        vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
1510
1511        return 0;
1512}
1513
1514static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1515                        const struct vdpa_dev_set_config *config)
1516{
1517        struct vduse_dev *dev;
1518        int ret;
1519
1520        mutex_lock(&vduse_lock);
1521        dev = vduse_find_dev(name);
1522        if (!dev || !vduse_dev_is_ready(dev)) {
1523                mutex_unlock(&vduse_lock);
1524                return -EINVAL;
1525        }
1526        ret = vduse_dev_init_vdpa(dev, name);
1527        mutex_unlock(&vduse_lock);
1528        if (ret)
1529                return ret;
1530
1531        ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1532        if (ret) {
1533                put_device(&dev->vdev->vdpa.dev);
1534                return ret;
1535        }
1536
1537        return 0;
1538}
1539
1540static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1541{
1542        _vdpa_unregister_device(dev);
1543}
1544
1545static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1546        .dev_add = vdpa_dev_add,
1547        .dev_del = vdpa_dev_del,
1548};
1549
1550static struct virtio_device_id id_table[] = {
1551        { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1552        { 0 },
1553};
1554
1555static void vduse_mgmtdev_release(struct device *dev)
1556{
1557        struct vduse_mgmt_dev *mgmt_dev;
1558
1559        mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
1560        kfree(mgmt_dev);
1561}
1562
1563static int vduse_mgmtdev_init(void)
1564{
1565        int ret;
1566
1567        vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
1568        if (!vduse_mgmt)
1569                return -ENOMEM;
1570
1571        ret = dev_set_name(&vduse_mgmt->dev, "vduse");
1572        if (ret) {
1573                kfree(vduse_mgmt);
1574                return ret;
1575        }
1576
1577        vduse_mgmt->dev.release = vduse_mgmtdev_release;
1578
1579        ret = device_register(&vduse_mgmt->dev);
1580        if (ret)
1581                goto dev_reg_err;
1582
1583        vduse_mgmt->mgmt_dev.id_table = id_table;
1584        vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
1585        vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
1586        ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
1587        if (ret)
1588                device_unregister(&vduse_mgmt->dev);
1589
1590        return ret;
1591
1592dev_reg_err:
1593        put_device(&vduse_mgmt->dev);
1594        return ret;
1595}
1596
1597static void vduse_mgmtdev_exit(void)
1598{
1599        vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
1600        device_unregister(&vduse_mgmt->dev);
1601}
1602
1603static int vduse_init(void)
1604{
1605        int ret;
1606        struct device *dev;
1607
1608        vduse_class = class_create(THIS_MODULE, "vduse");
1609        if (IS_ERR(vduse_class))
1610                return PTR_ERR(vduse_class);
1611
1612        vduse_class->devnode = vduse_devnode;
1613
1614        ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1615        if (ret)
1616                goto err_chardev_region;
1617
1618        /* /dev/vduse/control */
1619        cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1620        vduse_ctrl_cdev.owner = THIS_MODULE;
1621        ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1622        if (ret)
1623                goto err_ctrl_cdev;
1624
1625        dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1626        if (IS_ERR(dev)) {
1627                ret = PTR_ERR(dev);
1628                goto err_device;
1629        }
1630
1631        /* /dev/vduse/$DEVICE */
1632        cdev_init(&vduse_cdev, &vduse_dev_fops);
1633        vduse_cdev.owner = THIS_MODULE;
1634        ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1635                       VDUSE_DEV_MAX - 1);
1636        if (ret)
1637                goto err_cdev;
1638
1639        vduse_irq_wq = alloc_workqueue("vduse-irq",
1640                                WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1641        if (!vduse_irq_wq) {
1642                ret = -ENOMEM;
1643                goto err_wq;
1644        }
1645
1646        ret = vduse_domain_init();
1647        if (ret)
1648                goto err_domain;
1649
1650        ret = vduse_mgmtdev_init();
1651        if (ret)
1652                goto err_mgmtdev;
1653
1654        return 0;
1655err_mgmtdev:
1656        vduse_domain_exit();
1657err_domain:
1658        destroy_workqueue(vduse_irq_wq);
1659err_wq:
1660        cdev_del(&vduse_cdev);
1661err_cdev:
1662        device_destroy(vduse_class, vduse_major);
1663err_device:
1664        cdev_del(&vduse_ctrl_cdev);
1665err_ctrl_cdev:
1666        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1667err_chardev_region:
1668        class_destroy(vduse_class);
1669        return ret;
1670}
1671module_init(vduse_init);
1672
1673static void vduse_exit(void)
1674{
1675        vduse_mgmtdev_exit();
1676        vduse_domain_exit();
1677        destroy_workqueue(vduse_irq_wq);
1678        cdev_del(&vduse_cdev);
1679        device_destroy(vduse_class, vduse_major);
1680        cdev_del(&vduse_ctrl_cdev);
1681        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1682        class_destroy(vduse_class);
1683}
1684module_exit(vduse_exit);
1685
1686MODULE_LICENSE(DRV_LICENSE);
1687MODULE_AUTHOR(DRV_AUTHOR);
1688MODULE_DESCRIPTION(DRV_DESC);
1689