linux/drivers/vdpa/vdpa_user/vduse_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VDUSE: vDPA Device in Userspace
   4 *
   5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
   6 *
   7 * Author: Xie Yongji <xieyongji@bytedance.com>
   8 *
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/cdev.h>
  14#include <linux/device.h>
  15#include <linux/eventfd.h>
  16#include <linux/slab.h>
  17#include <linux/wait.h>
  18#include <linux/dma-map-ops.h>
  19#include <linux/poll.h>
  20#include <linux/file.h>
  21#include <linux/uio.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <uapi/linux/vduse.h>
  25#include <uapi/linux/vdpa.h>
  26#include <uapi/linux/virtio_config.h>
  27#include <uapi/linux/virtio_ids.h>
  28#include <uapi/linux/virtio_blk.h>
  29#include <linux/mod_devicetable.h>
  30
  31#include "iova_domain.h"
  32
  33#define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
  34#define DRV_DESC     "vDPA Device in Userspace"
  35#define DRV_LICENSE  "GPL v2"
  36
  37#define VDUSE_DEV_MAX (1U << MINORBITS)
  38#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  39#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
  40#define VDUSE_MSG_DEFAULT_TIMEOUT 30
  41
  42struct vduse_virtqueue {
  43        u16 index;
  44        u16 num_max;
  45        u32 num;
  46        u64 desc_addr;
  47        u64 driver_addr;
  48        u64 device_addr;
  49        struct vdpa_vq_state state;
  50        bool ready;
  51        bool kicked;
  52        spinlock_t kick_lock;
  53        spinlock_t irq_lock;
  54        struct eventfd_ctx *kickfd;
  55        struct vdpa_callback cb;
  56        struct work_struct inject;
  57        struct work_struct kick;
  58};
  59
  60struct vduse_dev;
  61
  62struct vduse_vdpa {
  63        struct vdpa_device vdpa;
  64        struct vduse_dev *dev;
  65};
  66
  67struct vduse_dev {
  68        struct vduse_vdpa *vdev;
  69        struct device *dev;
  70        struct vduse_virtqueue *vqs;
  71        struct vduse_iova_domain *domain;
  72        char *name;
  73        struct mutex lock;
  74        spinlock_t msg_lock;
  75        u64 msg_unique;
  76        u32 msg_timeout;
  77        wait_queue_head_t waitq;
  78        struct list_head send_list;
  79        struct list_head recv_list;
  80        struct vdpa_callback config_cb;
  81        struct work_struct inject;
  82        spinlock_t irq_lock;
  83        struct rw_semaphore rwsem;
  84        int minor;
  85        bool broken;
  86        bool connected;
  87        u64 api_version;
  88        u64 device_features;
  89        u64 driver_features;
  90        u32 device_id;
  91        u32 vendor_id;
  92        u32 generation;
  93        u32 config_size;
  94        void *config;
  95        u8 status;
  96        u32 vq_num;
  97        u32 vq_align;
  98};
  99
 100struct vduse_dev_msg {
 101        struct vduse_dev_request req;
 102        struct vduse_dev_response resp;
 103        struct list_head list;
 104        wait_queue_head_t waitq;
 105        bool completed;
 106};
 107
 108struct vduse_control {
 109        u64 api_version;
 110};
 111
 112static DEFINE_MUTEX(vduse_lock);
 113static DEFINE_IDR(vduse_idr);
 114
 115static dev_t vduse_major;
 116static struct class *vduse_class;
 117static struct cdev vduse_ctrl_cdev;
 118static struct cdev vduse_cdev;
 119static struct workqueue_struct *vduse_irq_wq;
 120
 121static u32 allowed_device_id[] = {
 122        VIRTIO_ID_BLOCK,
 123};
 124
 125static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
 126{
 127        struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
 128
 129        return vdev->dev;
 130}
 131
 132static inline struct vduse_dev *dev_to_vduse(struct device *dev)
 133{
 134        struct vdpa_device *vdpa = dev_to_vdpa(dev);
 135
 136        return vdpa_to_vduse(vdpa);
 137}
 138
 139static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
 140                                            uint32_t request_id)
 141{
 142        struct vduse_dev_msg *msg;
 143
 144        list_for_each_entry(msg, head, list) {
 145                if (msg->req.request_id == request_id) {
 146                        list_del(&msg->list);
 147                        return msg;
 148                }
 149        }
 150
 151        return NULL;
 152}
 153
 154static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
 155{
 156        struct vduse_dev_msg *msg = NULL;
 157
 158        if (!list_empty(head)) {
 159                msg = list_first_entry(head, struct vduse_dev_msg, list);
 160                list_del(&msg->list);
 161        }
 162
 163        return msg;
 164}
 165
 166static void vduse_enqueue_msg(struct list_head *head,
 167                              struct vduse_dev_msg *msg)
 168{
 169        list_add_tail(&msg->list, head);
 170}
 171
 172static void vduse_dev_broken(struct vduse_dev *dev)
 173{
 174        struct vduse_dev_msg *msg, *tmp;
 175
 176        if (unlikely(dev->broken))
 177                return;
 178
 179        list_splice_init(&dev->recv_list, &dev->send_list);
 180        list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
 181                list_del(&msg->list);
 182                msg->completed = 1;
 183                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 184                wake_up(&msg->waitq);
 185        }
 186        dev->broken = true;
 187        wake_up(&dev->waitq);
 188}
 189
 190static int vduse_dev_msg_sync(struct vduse_dev *dev,
 191                              struct vduse_dev_msg *msg)
 192{
 193        int ret;
 194
 195        if (unlikely(dev->broken))
 196                return -EIO;
 197
 198        init_waitqueue_head(&msg->waitq);
 199        spin_lock(&dev->msg_lock);
 200        if (unlikely(dev->broken)) {
 201                spin_unlock(&dev->msg_lock);
 202                return -EIO;
 203        }
 204        msg->req.request_id = dev->msg_unique++;
 205        vduse_enqueue_msg(&dev->send_list, msg);
 206        wake_up(&dev->waitq);
 207        spin_unlock(&dev->msg_lock);
 208        if (dev->msg_timeout)
 209                ret = wait_event_killable_timeout(msg->waitq, msg->completed,
 210                                                  (long)dev->msg_timeout * HZ);
 211        else
 212                ret = wait_event_killable(msg->waitq, msg->completed);
 213
 214        spin_lock(&dev->msg_lock);
 215        if (!msg->completed) {
 216                list_del(&msg->list);
 217                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 218                /* Mark the device as malfunction when there is a timeout */
 219                if (!ret)
 220                        vduse_dev_broken(dev);
 221        }
 222        ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
 223        spin_unlock(&dev->msg_lock);
 224
 225        return ret;
 226}
 227
 228static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
 229                                         struct vduse_virtqueue *vq,
 230                                         struct vdpa_vq_state_packed *packed)
 231{
 232        struct vduse_dev_msg msg = { 0 };
 233        int ret;
 234
 235        msg.req.type = VDUSE_GET_VQ_STATE;
 236        msg.req.vq_state.index = vq->index;
 237
 238        ret = vduse_dev_msg_sync(dev, &msg);
 239        if (ret)
 240                return ret;
 241
 242        packed->last_avail_counter =
 243                        msg.resp.vq_state.packed.last_avail_counter & 0x0001;
 244        packed->last_avail_idx =
 245                        msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
 246        packed->last_used_counter =
 247                        msg.resp.vq_state.packed.last_used_counter & 0x0001;
 248        packed->last_used_idx =
 249                        msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
 250
 251        return 0;
 252}
 253
 254static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
 255                                        struct vduse_virtqueue *vq,
 256                                        struct vdpa_vq_state_split *split)
 257{
 258        struct vduse_dev_msg msg = { 0 };
 259        int ret;
 260
 261        msg.req.type = VDUSE_GET_VQ_STATE;
 262        msg.req.vq_state.index = vq->index;
 263
 264        ret = vduse_dev_msg_sync(dev, &msg);
 265        if (ret)
 266                return ret;
 267
 268        split->avail_index = msg.resp.vq_state.split.avail_index;
 269
 270        return 0;
 271}
 272
 273static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
 274{
 275        struct vduse_dev_msg msg = { 0 };
 276
 277        msg.req.type = VDUSE_SET_STATUS;
 278        msg.req.s.status = status;
 279
 280        return vduse_dev_msg_sync(dev, &msg);
 281}
 282
 283static int vduse_dev_update_iotlb(struct vduse_dev *dev,
 284                                  u64 start, u64 last)
 285{
 286        struct vduse_dev_msg msg = { 0 };
 287
 288        if (last < start)
 289                return -EINVAL;
 290
 291        msg.req.type = VDUSE_UPDATE_IOTLB;
 292        msg.req.iova.start = start;
 293        msg.req.iova.last = last;
 294
 295        return vduse_dev_msg_sync(dev, &msg);
 296}
 297
 298static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 299{
 300        struct file *file = iocb->ki_filp;
 301        struct vduse_dev *dev = file->private_data;
 302        struct vduse_dev_msg *msg;
 303        int size = sizeof(struct vduse_dev_request);
 304        ssize_t ret;
 305
 306        if (iov_iter_count(to) < size)
 307                return -EINVAL;
 308
 309        spin_lock(&dev->msg_lock);
 310        while (1) {
 311                msg = vduse_dequeue_msg(&dev->send_list);
 312                if (msg)
 313                        break;
 314
 315                ret = -EAGAIN;
 316                if (file->f_flags & O_NONBLOCK)
 317                        goto unlock;
 318
 319                spin_unlock(&dev->msg_lock);
 320                ret = wait_event_interruptible_exclusive(dev->waitq,
 321                                        !list_empty(&dev->send_list));
 322                if (ret)
 323                        return ret;
 324
 325                spin_lock(&dev->msg_lock);
 326        }
 327        spin_unlock(&dev->msg_lock);
 328        ret = copy_to_iter(&msg->req, size, to);
 329        spin_lock(&dev->msg_lock);
 330        if (ret != size) {
 331                ret = -EFAULT;
 332                vduse_enqueue_msg(&dev->send_list, msg);
 333                goto unlock;
 334        }
 335        vduse_enqueue_msg(&dev->recv_list, msg);
 336unlock:
 337        spin_unlock(&dev->msg_lock);
 338
 339        return ret;
 340}
 341
 342static bool is_mem_zero(const char *ptr, int size)
 343{
 344        int i;
 345
 346        for (i = 0; i < size; i++) {
 347                if (ptr[i])
 348                        return false;
 349        }
 350        return true;
 351}
 352
 353static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 354{
 355        struct file *file = iocb->ki_filp;
 356        struct vduse_dev *dev = file->private_data;
 357        struct vduse_dev_response resp;
 358        struct vduse_dev_msg *msg;
 359        size_t ret;
 360
 361        ret = copy_from_iter(&resp, sizeof(resp), from);
 362        if (ret != sizeof(resp))
 363                return -EINVAL;
 364
 365        if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
 366                return -EINVAL;
 367
 368        spin_lock(&dev->msg_lock);
 369        msg = vduse_find_msg(&dev->recv_list, resp.request_id);
 370        if (!msg) {
 371                ret = -ENOENT;
 372                goto unlock;
 373        }
 374
 375        memcpy(&msg->resp, &resp, sizeof(resp));
 376        msg->completed = 1;
 377        wake_up(&msg->waitq);
 378unlock:
 379        spin_unlock(&dev->msg_lock);
 380
 381        return ret;
 382}
 383
 384static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
 385{
 386        struct vduse_dev *dev = file->private_data;
 387        __poll_t mask = 0;
 388
 389        poll_wait(file, &dev->waitq, wait);
 390
 391        spin_lock(&dev->msg_lock);
 392
 393        if (unlikely(dev->broken))
 394                mask |= EPOLLERR;
 395        if (!list_empty(&dev->send_list))
 396                mask |= EPOLLIN | EPOLLRDNORM;
 397        if (!list_empty(&dev->recv_list))
 398                mask |= EPOLLOUT | EPOLLWRNORM;
 399
 400        spin_unlock(&dev->msg_lock);
 401
 402        return mask;
 403}
 404
 405static void vduse_dev_reset(struct vduse_dev *dev)
 406{
 407        int i;
 408        struct vduse_iova_domain *domain = dev->domain;
 409
 410        /* The coherent mappings are handled in vduse_dev_free_coherent() */
 411        if (domain->bounce_map)
 412                vduse_domain_reset_bounce_map(domain);
 413
 414        down_write(&dev->rwsem);
 415
 416        dev->status = 0;
 417        dev->driver_features = 0;
 418        dev->generation++;
 419        spin_lock(&dev->irq_lock);
 420        dev->config_cb.callback = NULL;
 421        dev->config_cb.private = NULL;
 422        spin_unlock(&dev->irq_lock);
 423        flush_work(&dev->inject);
 424
 425        for (i = 0; i < dev->vq_num; i++) {
 426                struct vduse_virtqueue *vq = &dev->vqs[i];
 427
 428                vq->ready = false;
 429                vq->desc_addr = 0;
 430                vq->driver_addr = 0;
 431                vq->device_addr = 0;
 432                vq->num = 0;
 433                memset(&vq->state, 0, sizeof(vq->state));
 434
 435                spin_lock(&vq->kick_lock);
 436                vq->kicked = false;
 437                if (vq->kickfd)
 438                        eventfd_ctx_put(vq->kickfd);
 439                vq->kickfd = NULL;
 440                spin_unlock(&vq->kick_lock);
 441
 442                spin_lock(&vq->irq_lock);
 443                vq->cb.callback = NULL;
 444                vq->cb.private = NULL;
 445                spin_unlock(&vq->irq_lock);
 446                flush_work(&vq->inject);
 447                flush_work(&vq->kick);
 448        }
 449
 450        up_write(&dev->rwsem);
 451}
 452
 453static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
 454                                u64 desc_area, u64 driver_area,
 455                                u64 device_area)
 456{
 457        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 458        struct vduse_virtqueue *vq = &dev->vqs[idx];
 459
 460        vq->desc_addr = desc_area;
 461        vq->driver_addr = driver_area;
 462        vq->device_addr = device_area;
 463
 464        return 0;
 465}
 466
 467static void vduse_vq_kick(struct vduse_virtqueue *vq)
 468{
 469        spin_lock(&vq->kick_lock);
 470        if (!vq->ready)
 471                goto unlock;
 472
 473        if (vq->kickfd)
 474                eventfd_signal(vq->kickfd, 1);
 475        else
 476                vq->kicked = true;
 477unlock:
 478        spin_unlock(&vq->kick_lock);
 479}
 480
 481static void vduse_vq_kick_work(struct work_struct *work)
 482{
 483        struct vduse_virtqueue *vq = container_of(work,
 484                                        struct vduse_virtqueue, kick);
 485
 486        vduse_vq_kick(vq);
 487}
 488
 489static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
 490{
 491        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 492        struct vduse_virtqueue *vq = &dev->vqs[idx];
 493
 494        if (!eventfd_signal_allowed()) {
 495                schedule_work(&vq->kick);
 496                return;
 497        }
 498        vduse_vq_kick(vq);
 499}
 500
 501static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
 502                              struct vdpa_callback *cb)
 503{
 504        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 505        struct vduse_virtqueue *vq = &dev->vqs[idx];
 506
 507        spin_lock(&vq->irq_lock);
 508        vq->cb.callback = cb->callback;
 509        vq->cb.private = cb->private;
 510        spin_unlock(&vq->irq_lock);
 511}
 512
 513static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
 514{
 515        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 516        struct vduse_virtqueue *vq = &dev->vqs[idx];
 517
 518        vq->num = num;
 519}
 520
 521static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
 522                                        u16 idx, bool ready)
 523{
 524        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 525        struct vduse_virtqueue *vq = &dev->vqs[idx];
 526
 527        vq->ready = ready;
 528}
 529
 530static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
 531{
 532        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 533        struct vduse_virtqueue *vq = &dev->vqs[idx];
 534
 535        return vq->ready;
 536}
 537
 538static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
 539                                const struct vdpa_vq_state *state)
 540{
 541        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 542        struct vduse_virtqueue *vq = &dev->vqs[idx];
 543
 544        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
 545                vq->state.packed.last_avail_counter =
 546                                state->packed.last_avail_counter;
 547                vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
 548                vq->state.packed.last_used_counter =
 549                                state->packed.last_used_counter;
 550                vq->state.packed.last_used_idx = state->packed.last_used_idx;
 551        } else
 552                vq->state.split.avail_index = state->split.avail_index;
 553
 554        return 0;
 555}
 556
 557static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
 558                                struct vdpa_vq_state *state)
 559{
 560        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 561        struct vduse_virtqueue *vq = &dev->vqs[idx];
 562
 563        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
 564                return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
 565
 566        return vduse_dev_get_vq_state_split(dev, vq, &state->split);
 567}
 568
 569static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
 570{
 571        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 572
 573        return dev->vq_align;
 574}
 575
 576static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
 577{
 578        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 579
 580        return dev->device_features;
 581}
 582
 583static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
 584{
 585        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 586
 587        dev->driver_features = features;
 588        return 0;
 589}
 590
 591static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
 592                                  struct vdpa_callback *cb)
 593{
 594        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 595
 596        spin_lock(&dev->irq_lock);
 597        dev->config_cb.callback = cb->callback;
 598        dev->config_cb.private = cb->private;
 599        spin_unlock(&dev->irq_lock);
 600}
 601
 602static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
 603{
 604        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 605        u16 num_max = 0;
 606        int i;
 607
 608        for (i = 0; i < dev->vq_num; i++)
 609                if (num_max < dev->vqs[i].num_max)
 610                        num_max = dev->vqs[i].num_max;
 611
 612        return num_max;
 613}
 614
 615static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
 616{
 617        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 618
 619        return dev->device_id;
 620}
 621
 622static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
 623{
 624        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 625
 626        return dev->vendor_id;
 627}
 628
 629static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
 630{
 631        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 632
 633        return dev->status;
 634}
 635
 636static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
 637{
 638        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 639
 640        if (vduse_dev_set_status(dev, status))
 641                return;
 642
 643        dev->status = status;
 644}
 645
 646static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
 647{
 648        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 649
 650        return dev->config_size;
 651}
 652
 653static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
 654                                  void *buf, unsigned int len)
 655{
 656        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 657
 658        if (offset > dev->config_size ||
 659            len > dev->config_size - offset)
 660                return;
 661
 662        memcpy(buf, dev->config + offset, len);
 663}
 664
 665static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
 666                        const void *buf, unsigned int len)
 667{
 668        /* Now we only support read-only configuration space */
 669}
 670
 671static int vduse_vdpa_reset(struct vdpa_device *vdpa)
 672{
 673        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 674        int ret = vduse_dev_set_status(dev, 0);
 675
 676        vduse_dev_reset(dev);
 677
 678        return ret;
 679}
 680
 681static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
 682{
 683        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 684
 685        return dev->generation;
 686}
 687
 688static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 689                                struct vhost_iotlb *iotlb)
 690{
 691        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 692        int ret;
 693
 694        ret = vduse_domain_set_map(dev->domain, iotlb);
 695        if (ret)
 696                return ret;
 697
 698        ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
 699        if (ret) {
 700                vduse_domain_clear_map(dev->domain, iotlb);
 701                return ret;
 702        }
 703
 704        return 0;
 705}
 706
 707static void vduse_vdpa_free(struct vdpa_device *vdpa)
 708{
 709        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 710
 711        dev->vdev = NULL;
 712}
 713
 714static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 715        .set_vq_address         = vduse_vdpa_set_vq_address,
 716        .kick_vq                = vduse_vdpa_kick_vq,
 717        .set_vq_cb              = vduse_vdpa_set_vq_cb,
 718        .set_vq_num             = vduse_vdpa_set_vq_num,
 719        .set_vq_ready           = vduse_vdpa_set_vq_ready,
 720        .get_vq_ready           = vduse_vdpa_get_vq_ready,
 721        .set_vq_state           = vduse_vdpa_set_vq_state,
 722        .get_vq_state           = vduse_vdpa_get_vq_state,
 723        .get_vq_align           = vduse_vdpa_get_vq_align,
 724        .get_features           = vduse_vdpa_get_features,
 725        .set_features           = vduse_vdpa_set_features,
 726        .set_config_cb          = vduse_vdpa_set_config_cb,
 727        .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
 728        .get_device_id          = vduse_vdpa_get_device_id,
 729        .get_vendor_id          = vduse_vdpa_get_vendor_id,
 730        .get_status             = vduse_vdpa_get_status,
 731        .set_status             = vduse_vdpa_set_status,
 732        .get_config_size        = vduse_vdpa_get_config_size,
 733        .get_config             = vduse_vdpa_get_config,
 734        .set_config             = vduse_vdpa_set_config,
 735        .get_generation         = vduse_vdpa_get_generation,
 736        .reset                  = vduse_vdpa_reset,
 737        .set_map                = vduse_vdpa_set_map,
 738        .free                   = vduse_vdpa_free,
 739};
 740
 741static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
 742                                     unsigned long offset, size_t size,
 743                                     enum dma_data_direction dir,
 744                                     unsigned long attrs)
 745{
 746        struct vduse_dev *vdev = dev_to_vduse(dev);
 747        struct vduse_iova_domain *domain = vdev->domain;
 748
 749        return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
 750}
 751
 752static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
 753                                size_t size, enum dma_data_direction dir,
 754                                unsigned long attrs)
 755{
 756        struct vduse_dev *vdev = dev_to_vduse(dev);
 757        struct vduse_iova_domain *domain = vdev->domain;
 758
 759        return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
 760}
 761
 762static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
 763                                        dma_addr_t *dma_addr, gfp_t flag,
 764                                        unsigned long attrs)
 765{
 766        struct vduse_dev *vdev = dev_to_vduse(dev);
 767        struct vduse_iova_domain *domain = vdev->domain;
 768        unsigned long iova;
 769        void *addr;
 770
 771        *dma_addr = DMA_MAPPING_ERROR;
 772        addr = vduse_domain_alloc_coherent(domain, size,
 773                                (dma_addr_t *)&iova, flag, attrs);
 774        if (!addr)
 775                return NULL;
 776
 777        *dma_addr = (dma_addr_t)iova;
 778
 779        return addr;
 780}
 781
 782static void vduse_dev_free_coherent(struct device *dev, size_t size,
 783                                        void *vaddr, dma_addr_t dma_addr,
 784                                        unsigned long attrs)
 785{
 786        struct vduse_dev *vdev = dev_to_vduse(dev);
 787        struct vduse_iova_domain *domain = vdev->domain;
 788
 789        vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
 790}
 791
 792static size_t vduse_dev_max_mapping_size(struct device *dev)
 793{
 794        struct vduse_dev *vdev = dev_to_vduse(dev);
 795        struct vduse_iova_domain *domain = vdev->domain;
 796
 797        return domain->bounce_size;
 798}
 799
 800static const struct dma_map_ops vduse_dev_dma_ops = {
 801        .map_page = vduse_dev_map_page,
 802        .unmap_page = vduse_dev_unmap_page,
 803        .alloc = vduse_dev_alloc_coherent,
 804        .free = vduse_dev_free_coherent,
 805        .max_mapping_size = vduse_dev_max_mapping_size,
 806};
 807
 808static unsigned int perm_to_file_flags(u8 perm)
 809{
 810        unsigned int flags = 0;
 811
 812        switch (perm) {
 813        case VDUSE_ACCESS_WO:
 814                flags |= O_WRONLY;
 815                break;
 816        case VDUSE_ACCESS_RO:
 817                flags |= O_RDONLY;
 818                break;
 819        case VDUSE_ACCESS_RW:
 820                flags |= O_RDWR;
 821                break;
 822        default:
 823                WARN(1, "invalidate vhost IOTLB permission\n");
 824                break;
 825        }
 826
 827        return flags;
 828}
 829
 830static int vduse_kickfd_setup(struct vduse_dev *dev,
 831                        struct vduse_vq_eventfd *eventfd)
 832{
 833        struct eventfd_ctx *ctx = NULL;
 834        struct vduse_virtqueue *vq;
 835        u32 index;
 836
 837        if (eventfd->index >= dev->vq_num)
 838                return -EINVAL;
 839
 840        index = array_index_nospec(eventfd->index, dev->vq_num);
 841        vq = &dev->vqs[index];
 842        if (eventfd->fd >= 0) {
 843                ctx = eventfd_ctx_fdget(eventfd->fd);
 844                if (IS_ERR(ctx))
 845                        return PTR_ERR(ctx);
 846        } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
 847                return 0;
 848
 849        spin_lock(&vq->kick_lock);
 850        if (vq->kickfd)
 851                eventfd_ctx_put(vq->kickfd);
 852        vq->kickfd = ctx;
 853        if (vq->ready && vq->kicked && vq->kickfd) {
 854                eventfd_signal(vq->kickfd, 1);
 855                vq->kicked = false;
 856        }
 857        spin_unlock(&vq->kick_lock);
 858
 859        return 0;
 860}
 861
 862static bool vduse_dev_is_ready(struct vduse_dev *dev)
 863{
 864        int i;
 865
 866        for (i = 0; i < dev->vq_num; i++)
 867                if (!dev->vqs[i].num_max)
 868                        return false;
 869
 870        return true;
 871}
 872
 873static void vduse_dev_irq_inject(struct work_struct *work)
 874{
 875        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 876
 877        spin_lock_irq(&dev->irq_lock);
 878        if (dev->config_cb.callback)
 879                dev->config_cb.callback(dev->config_cb.private);
 880        spin_unlock_irq(&dev->irq_lock);
 881}
 882
 883static void vduse_vq_irq_inject(struct work_struct *work)
 884{
 885        struct vduse_virtqueue *vq = container_of(work,
 886                                        struct vduse_virtqueue, inject);
 887
 888        spin_lock_irq(&vq->irq_lock);
 889        if (vq->ready && vq->cb.callback)
 890                vq->cb.callback(vq->cb.private);
 891        spin_unlock_irq(&vq->irq_lock);
 892}
 893
 894static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 895                                    struct work_struct *irq_work)
 896{
 897        int ret = -EINVAL;
 898
 899        down_read(&dev->rwsem);
 900        if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 901                goto unlock;
 902
 903        ret = 0;
 904        queue_work(vduse_irq_wq, irq_work);
 905unlock:
 906        up_read(&dev->rwsem);
 907
 908        return ret;
 909}
 910
 911static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 912                            unsigned long arg)
 913{
 914        struct vduse_dev *dev = file->private_data;
 915        void __user *argp = (void __user *)arg;
 916        int ret;
 917
 918        if (unlikely(dev->broken))
 919                return -EPERM;
 920
 921        switch (cmd) {
 922        case VDUSE_IOTLB_GET_FD: {
 923                struct vduse_iotlb_entry entry;
 924                struct vhost_iotlb_map *map;
 925                struct vdpa_map_file *map_file;
 926                struct vduse_iova_domain *domain = dev->domain;
 927                struct file *f = NULL;
 928
 929                ret = -EFAULT;
 930                if (copy_from_user(&entry, argp, sizeof(entry)))
 931                        break;
 932
 933                ret = -EINVAL;
 934                if (entry.start > entry.last)
 935                        break;
 936
 937                spin_lock(&domain->iotlb_lock);
 938                map = vhost_iotlb_itree_first(domain->iotlb,
 939                                              entry.start, entry.last);
 940                if (map) {
 941                        map_file = (struct vdpa_map_file *)map->opaque;
 942                        f = get_file(map_file->file);
 943                        entry.offset = map_file->offset;
 944                        entry.start = map->start;
 945                        entry.last = map->last;
 946                        entry.perm = map->perm;
 947                }
 948                spin_unlock(&domain->iotlb_lock);
 949                ret = -EINVAL;
 950                if (!f)
 951                        break;
 952
 953                ret = -EFAULT;
 954                if (copy_to_user(argp, &entry, sizeof(entry))) {
 955                        fput(f);
 956                        break;
 957                }
 958                ret = receive_fd(f, perm_to_file_flags(entry.perm));
 959                fput(f);
 960                break;
 961        }
 962        case VDUSE_DEV_GET_FEATURES:
 963                /*
 964                 * Just mirror what driver wrote here.
 965                 * The driver is expected to check FEATURE_OK later.
 966                 */
 967                ret = put_user(dev->driver_features, (u64 __user *)argp);
 968                break;
 969        case VDUSE_DEV_SET_CONFIG: {
 970                struct vduse_config_data config;
 971                unsigned long size = offsetof(struct vduse_config_data,
 972                                              buffer);
 973
 974                ret = -EFAULT;
 975                if (copy_from_user(&config, argp, size))
 976                        break;
 977
 978                ret = -EINVAL;
 979                if (config.offset > dev->config_size ||
 980                    config.length == 0 ||
 981                    config.length > dev->config_size - config.offset)
 982                        break;
 983
 984                ret = -EFAULT;
 985                if (copy_from_user(dev->config + config.offset, argp + size,
 986                                   config.length))
 987                        break;
 988
 989                ret = 0;
 990                break;
 991        }
 992        case VDUSE_DEV_INJECT_CONFIG_IRQ:
 993                ret = vduse_dev_queue_irq_work(dev, &dev->inject);
 994                break;
 995        case VDUSE_VQ_SETUP: {
 996                struct vduse_vq_config config;
 997                u32 index;
 998
 999                ret = -EFAULT;
1000                if (copy_from_user(&config, argp, sizeof(config)))
1001                        break;
1002
1003                ret = -EINVAL;
1004                if (config.index >= dev->vq_num)
1005                        break;
1006
1007                if (!is_mem_zero((const char *)config.reserved,
1008                                 sizeof(config.reserved)))
1009                        break;
1010
1011                index = array_index_nospec(config.index, dev->vq_num);
1012                dev->vqs[index].num_max = config.max_size;
1013                ret = 0;
1014                break;
1015        }
1016        case VDUSE_VQ_GET_INFO: {
1017                struct vduse_vq_info vq_info;
1018                struct vduse_virtqueue *vq;
1019                u32 index;
1020
1021                ret = -EFAULT;
1022                if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1023                        break;
1024
1025                ret = -EINVAL;
1026                if (vq_info.index >= dev->vq_num)
1027                        break;
1028
1029                index = array_index_nospec(vq_info.index, dev->vq_num);
1030                vq = &dev->vqs[index];
1031                vq_info.desc_addr = vq->desc_addr;
1032                vq_info.driver_addr = vq->driver_addr;
1033                vq_info.device_addr = vq->device_addr;
1034                vq_info.num = vq->num;
1035
1036                if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1037                        vq_info.packed.last_avail_counter =
1038                                vq->state.packed.last_avail_counter;
1039                        vq_info.packed.last_avail_idx =
1040                                vq->state.packed.last_avail_idx;
1041                        vq_info.packed.last_used_counter =
1042                                vq->state.packed.last_used_counter;
1043                        vq_info.packed.last_used_idx =
1044                                vq->state.packed.last_used_idx;
1045                } else
1046                        vq_info.split.avail_index =
1047                                vq->state.split.avail_index;
1048
1049                vq_info.ready = vq->ready;
1050
1051                ret = -EFAULT;
1052                if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1053                        break;
1054
1055                ret = 0;
1056                break;
1057        }
1058        case VDUSE_VQ_SETUP_KICKFD: {
1059                struct vduse_vq_eventfd eventfd;
1060
1061                ret = -EFAULT;
1062                if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1063                        break;
1064
1065                ret = vduse_kickfd_setup(dev, &eventfd);
1066                break;
1067        }
1068        case VDUSE_VQ_INJECT_IRQ: {
1069                u32 index;
1070
1071                ret = -EFAULT;
1072                if (get_user(index, (u32 __user *)argp))
1073                        break;
1074
1075                ret = -EINVAL;
1076                if (index >= dev->vq_num)
1077                        break;
1078
1079                index = array_index_nospec(index, dev->vq_num);
1080                ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1081                break;
1082        }
1083        default:
1084                ret = -ENOIOCTLCMD;
1085                break;
1086        }
1087
1088        return ret;
1089}
1090
1091static int vduse_dev_release(struct inode *inode, struct file *file)
1092{
1093        struct vduse_dev *dev = file->private_data;
1094
1095        spin_lock(&dev->msg_lock);
1096        /* Make sure the inflight messages can processed after reconncection */
1097        list_splice_init(&dev->recv_list, &dev->send_list);
1098        spin_unlock(&dev->msg_lock);
1099        dev->connected = false;
1100
1101        return 0;
1102}
1103
1104static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1105{
1106        struct vduse_dev *dev;
1107
1108        mutex_lock(&vduse_lock);
1109        dev = idr_find(&vduse_idr, minor);
1110        mutex_unlock(&vduse_lock);
1111
1112        return dev;
1113}
1114
1115static int vduse_dev_open(struct inode *inode, struct file *file)
1116{
1117        int ret;
1118        struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1119
1120        if (!dev)
1121                return -ENODEV;
1122
1123        ret = -EBUSY;
1124        mutex_lock(&dev->lock);
1125        if (dev->connected)
1126                goto unlock;
1127
1128        ret = 0;
1129        dev->connected = true;
1130        file->private_data = dev;
1131unlock:
1132        mutex_unlock(&dev->lock);
1133
1134        return ret;
1135}
1136
1137static const struct file_operations vduse_dev_fops = {
1138        .owner          = THIS_MODULE,
1139        .open           = vduse_dev_open,
1140        .release        = vduse_dev_release,
1141        .read_iter      = vduse_dev_read_iter,
1142        .write_iter     = vduse_dev_write_iter,
1143        .poll           = vduse_dev_poll,
1144        .unlocked_ioctl = vduse_dev_ioctl,
1145        .compat_ioctl   = compat_ptr_ioctl,
1146        .llseek         = noop_llseek,
1147};
1148
1149static struct vduse_dev *vduse_dev_create(void)
1150{
1151        struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1152
1153        if (!dev)
1154                return NULL;
1155
1156        mutex_init(&dev->lock);
1157        spin_lock_init(&dev->msg_lock);
1158        INIT_LIST_HEAD(&dev->send_list);
1159        INIT_LIST_HEAD(&dev->recv_list);
1160        spin_lock_init(&dev->irq_lock);
1161        init_rwsem(&dev->rwsem);
1162
1163        INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1164        init_waitqueue_head(&dev->waitq);
1165
1166        return dev;
1167}
1168
1169static void vduse_dev_destroy(struct vduse_dev *dev)
1170{
1171        kfree(dev);
1172}
1173
1174static struct vduse_dev *vduse_find_dev(const char *name)
1175{
1176        struct vduse_dev *dev;
1177        int id;
1178
1179        idr_for_each_entry(&vduse_idr, dev, id)
1180                if (!strcmp(dev->name, name))
1181                        return dev;
1182
1183        return NULL;
1184}
1185
1186static int vduse_destroy_dev(char *name)
1187{
1188        struct vduse_dev *dev = vduse_find_dev(name);
1189
1190        if (!dev)
1191                return -EINVAL;
1192
1193        mutex_lock(&dev->lock);
1194        if (dev->vdev || dev->connected) {
1195                mutex_unlock(&dev->lock);
1196                return -EBUSY;
1197        }
1198        dev->connected = true;
1199        mutex_unlock(&dev->lock);
1200
1201        vduse_dev_reset(dev);
1202        device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1203        idr_remove(&vduse_idr, dev->minor);
1204        kvfree(dev->config);
1205        kfree(dev->vqs);
1206        vduse_domain_destroy(dev->domain);
1207        kfree(dev->name);
1208        vduse_dev_destroy(dev);
1209        module_put(THIS_MODULE);
1210
1211        return 0;
1212}
1213
1214static bool device_is_allowed(u32 device_id)
1215{
1216        int i;
1217
1218        for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1219                if (allowed_device_id[i] == device_id)
1220                        return true;
1221
1222        return false;
1223}
1224
1225static bool features_is_valid(u64 features)
1226{
1227        if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1228                return false;
1229
1230        /* Now we only support read-only configuration space */
1231        if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1232                return false;
1233
1234        return true;
1235}
1236
1237static bool vduse_validate_config(struct vduse_dev_config *config)
1238{
1239        if (!is_mem_zero((const char *)config->reserved,
1240                         sizeof(config->reserved)))
1241                return false;
1242
1243        if (config->vq_align > PAGE_SIZE)
1244                return false;
1245
1246        if (config->config_size > PAGE_SIZE)
1247                return false;
1248
1249        if (!device_is_allowed(config->device_id))
1250                return false;
1251
1252        if (!features_is_valid(config->features))
1253                return false;
1254
1255        return true;
1256}
1257
1258static ssize_t msg_timeout_show(struct device *device,
1259                                struct device_attribute *attr, char *buf)
1260{
1261        struct vduse_dev *dev = dev_get_drvdata(device);
1262
1263        return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1264}
1265
1266static ssize_t msg_timeout_store(struct device *device,
1267                                 struct device_attribute *attr,
1268                                 const char *buf, size_t count)
1269{
1270        struct vduse_dev *dev = dev_get_drvdata(device);
1271        int ret;
1272
1273        ret = kstrtouint(buf, 10, &dev->msg_timeout);
1274        if (ret < 0)
1275                return ret;
1276
1277        return count;
1278}
1279
1280static DEVICE_ATTR_RW(msg_timeout);
1281
1282static struct attribute *vduse_dev_attrs[] = {
1283        &dev_attr_msg_timeout.attr,
1284        NULL
1285};
1286
1287ATTRIBUTE_GROUPS(vduse_dev);
1288
1289static int vduse_create_dev(struct vduse_dev_config *config,
1290                            void *config_buf, u64 api_version)
1291{
1292        int i, ret;
1293        struct vduse_dev *dev;
1294
1295        ret = -EEXIST;
1296        if (vduse_find_dev(config->name))
1297                goto err;
1298
1299        ret = -ENOMEM;
1300        dev = vduse_dev_create();
1301        if (!dev)
1302                goto err;
1303
1304        dev->api_version = api_version;
1305        dev->device_features = config->features;
1306        dev->device_id = config->device_id;
1307        dev->vendor_id = config->vendor_id;
1308        dev->name = kstrdup(config->name, GFP_KERNEL);
1309        if (!dev->name)
1310                goto err_str;
1311
1312        dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1313                                          VDUSE_BOUNCE_SIZE);
1314        if (!dev->domain)
1315                goto err_domain;
1316
1317        dev->config = config_buf;
1318        dev->config_size = config->config_size;
1319        dev->vq_align = config->vq_align;
1320        dev->vq_num = config->vq_num;
1321        dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1322        if (!dev->vqs)
1323                goto err_vqs;
1324
1325        for (i = 0; i < dev->vq_num; i++) {
1326                dev->vqs[i].index = i;
1327                INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1328                INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1329                spin_lock_init(&dev->vqs[i].kick_lock);
1330                spin_lock_init(&dev->vqs[i].irq_lock);
1331        }
1332
1333        ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1334        if (ret < 0)
1335                goto err_idr;
1336
1337        dev->minor = ret;
1338        dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1339        dev->dev = device_create(vduse_class, NULL,
1340                                 MKDEV(MAJOR(vduse_major), dev->minor),
1341                                 dev, "%s", config->name);
1342        if (IS_ERR(dev->dev)) {
1343                ret = PTR_ERR(dev->dev);
1344                goto err_dev;
1345        }
1346        __module_get(THIS_MODULE);
1347
1348        return 0;
1349err_dev:
1350        idr_remove(&vduse_idr, dev->minor);
1351err_idr:
1352        kfree(dev->vqs);
1353err_vqs:
1354        vduse_domain_destroy(dev->domain);
1355err_domain:
1356        kfree(dev->name);
1357err_str:
1358        vduse_dev_destroy(dev);
1359err:
1360        kvfree(config_buf);
1361        return ret;
1362}
1363
1364static long vduse_ioctl(struct file *file, unsigned int cmd,
1365                        unsigned long arg)
1366{
1367        int ret;
1368        void __user *argp = (void __user *)arg;
1369        struct vduse_control *control = file->private_data;
1370
1371        mutex_lock(&vduse_lock);
1372        switch (cmd) {
1373        case VDUSE_GET_API_VERSION:
1374                ret = put_user(control->api_version, (u64 __user *)argp);
1375                break;
1376        case VDUSE_SET_API_VERSION: {
1377                u64 api_version;
1378
1379                ret = -EFAULT;
1380                if (get_user(api_version, (u64 __user *)argp))
1381                        break;
1382
1383                ret = -EINVAL;
1384                if (api_version > VDUSE_API_VERSION)
1385                        break;
1386
1387                ret = 0;
1388                control->api_version = api_version;
1389                break;
1390        }
1391        case VDUSE_CREATE_DEV: {
1392                struct vduse_dev_config config;
1393                unsigned long size = offsetof(struct vduse_dev_config, config);
1394                void *buf;
1395
1396                ret = -EFAULT;
1397                if (copy_from_user(&config, argp, size))
1398                        break;
1399
1400                ret = -EINVAL;
1401                if (vduse_validate_config(&config) == false)
1402                        break;
1403
1404                buf = vmemdup_user(argp + size, config.config_size);
1405                if (IS_ERR(buf)) {
1406                        ret = PTR_ERR(buf);
1407                        break;
1408                }
1409                config.name[VDUSE_NAME_MAX - 1] = '\0';
1410                ret = vduse_create_dev(&config, buf, control->api_version);
1411                break;
1412        }
1413        case VDUSE_DESTROY_DEV: {
1414                char name[VDUSE_NAME_MAX];
1415
1416                ret = -EFAULT;
1417                if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1418                        break;
1419
1420                name[VDUSE_NAME_MAX - 1] = '\0';
1421                ret = vduse_destroy_dev(name);
1422                break;
1423        }
1424        default:
1425                ret = -EINVAL;
1426                break;
1427        }
1428        mutex_unlock(&vduse_lock);
1429
1430        return ret;
1431}
1432
1433static int vduse_release(struct inode *inode, struct file *file)
1434{
1435        struct vduse_control *control = file->private_data;
1436
1437        kfree(control);
1438        return 0;
1439}
1440
1441static int vduse_open(struct inode *inode, struct file *file)
1442{
1443        struct vduse_control *control;
1444
1445        control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1446        if (!control)
1447                return -ENOMEM;
1448
1449        control->api_version = VDUSE_API_VERSION;
1450        file->private_data = control;
1451
1452        return 0;
1453}
1454
1455static const struct file_operations vduse_ctrl_fops = {
1456        .owner          = THIS_MODULE,
1457        .open           = vduse_open,
1458        .release        = vduse_release,
1459        .unlocked_ioctl = vduse_ioctl,
1460        .compat_ioctl   = compat_ptr_ioctl,
1461        .llseek         = noop_llseek,
1462};
1463
1464static char *vduse_devnode(struct device *dev, umode_t *mode)
1465{
1466        return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1467}
1468
1469static void vduse_mgmtdev_release(struct device *dev)
1470{
1471}
1472
1473static struct device vduse_mgmtdev = {
1474        .init_name = "vduse",
1475        .release = vduse_mgmtdev_release,
1476};
1477
1478static struct vdpa_mgmt_dev mgmt_dev;
1479
1480static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1481{
1482        struct vduse_vdpa *vdev;
1483        int ret;
1484
1485        if (dev->vdev)
1486                return -EEXIST;
1487
1488        vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1489                                 &vduse_vdpa_config_ops, name, true);
1490        if (IS_ERR(vdev))
1491                return PTR_ERR(vdev);
1492
1493        dev->vdev = vdev;
1494        vdev->dev = dev;
1495        vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1496        ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1497        if (ret) {
1498                put_device(&vdev->vdpa.dev);
1499                return ret;
1500        }
1501        set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1502        vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1503        vdev->vdpa.mdev = &mgmt_dev;
1504
1505        return 0;
1506}
1507
1508static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
1509                        const struct vdpa_dev_set_config *config)
1510{
1511        struct vduse_dev *dev;
1512        int ret;
1513
1514        mutex_lock(&vduse_lock);
1515        dev = vduse_find_dev(name);
1516        if (!dev || !vduse_dev_is_ready(dev)) {
1517                mutex_unlock(&vduse_lock);
1518                return -EINVAL;
1519        }
1520        ret = vduse_dev_init_vdpa(dev, name);
1521        mutex_unlock(&vduse_lock);
1522        if (ret)
1523                return ret;
1524
1525        ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1526        if (ret) {
1527                put_device(&dev->vdev->vdpa.dev);
1528                return ret;
1529        }
1530
1531        return 0;
1532}
1533
1534static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1535{
1536        _vdpa_unregister_device(dev);
1537}
1538
1539static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1540        .dev_add = vdpa_dev_add,
1541        .dev_del = vdpa_dev_del,
1542};
1543
1544static struct virtio_device_id id_table[] = {
1545        { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1546        { 0 },
1547};
1548
1549static struct vdpa_mgmt_dev mgmt_dev = {
1550        .device = &vduse_mgmtdev,
1551        .id_table = id_table,
1552        .ops = &vdpa_dev_mgmtdev_ops,
1553};
1554
1555static int vduse_mgmtdev_init(void)
1556{
1557        int ret;
1558
1559        ret = device_register(&vduse_mgmtdev);
1560        if (ret)
1561                return ret;
1562
1563        ret = vdpa_mgmtdev_register(&mgmt_dev);
1564        if (ret)
1565                goto err;
1566
1567        return 0;
1568err:
1569        device_unregister(&vduse_mgmtdev);
1570        return ret;
1571}
1572
1573static void vduse_mgmtdev_exit(void)
1574{
1575        vdpa_mgmtdev_unregister(&mgmt_dev);
1576        device_unregister(&vduse_mgmtdev);
1577}
1578
1579static int vduse_init(void)
1580{
1581        int ret;
1582        struct device *dev;
1583
1584        vduse_class = class_create(THIS_MODULE, "vduse");
1585        if (IS_ERR(vduse_class))
1586                return PTR_ERR(vduse_class);
1587
1588        vduse_class->devnode = vduse_devnode;
1589        vduse_class->dev_groups = vduse_dev_groups;
1590
1591        ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1592        if (ret)
1593                goto err_chardev_region;
1594
1595        /* /dev/vduse/control */
1596        cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1597        vduse_ctrl_cdev.owner = THIS_MODULE;
1598        ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1599        if (ret)
1600                goto err_ctrl_cdev;
1601
1602        dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1603        if (IS_ERR(dev)) {
1604                ret = PTR_ERR(dev);
1605                goto err_device;
1606        }
1607
1608        /* /dev/vduse/$DEVICE */
1609        cdev_init(&vduse_cdev, &vduse_dev_fops);
1610        vduse_cdev.owner = THIS_MODULE;
1611        ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1612                       VDUSE_DEV_MAX - 1);
1613        if (ret)
1614                goto err_cdev;
1615
1616        vduse_irq_wq = alloc_workqueue("vduse-irq",
1617                                WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1618        if (!vduse_irq_wq) {
1619                ret = -ENOMEM;
1620                goto err_wq;
1621        }
1622
1623        ret = vduse_domain_init();
1624        if (ret)
1625                goto err_domain;
1626
1627        ret = vduse_mgmtdev_init();
1628        if (ret)
1629                goto err_mgmtdev;
1630
1631        return 0;
1632err_mgmtdev:
1633        vduse_domain_exit();
1634err_domain:
1635        destroy_workqueue(vduse_irq_wq);
1636err_wq:
1637        cdev_del(&vduse_cdev);
1638err_cdev:
1639        device_destroy(vduse_class, vduse_major);
1640err_device:
1641        cdev_del(&vduse_ctrl_cdev);
1642err_ctrl_cdev:
1643        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1644err_chardev_region:
1645        class_destroy(vduse_class);
1646        return ret;
1647}
1648module_init(vduse_init);
1649
1650static void vduse_exit(void)
1651{
1652        vduse_mgmtdev_exit();
1653        vduse_domain_exit();
1654        destroy_workqueue(vduse_irq_wq);
1655        cdev_del(&vduse_cdev);
1656        device_destroy(vduse_class, vduse_major);
1657        cdev_del(&vduse_ctrl_cdev);
1658        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1659        class_destroy(vduse_class);
1660}
1661module_exit(vduse_exit);
1662
1663MODULE_LICENSE(DRV_LICENSE);
1664MODULE_AUTHOR(DRV_AUTHOR);
1665MODULE_DESCRIPTION(DRV_DESC);
1666