linux/drivers/vdpa/vdpa_user/vduse_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * VDUSE: vDPA Device in Userspace
   4 *
   5 * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
   6 *
   7 * Author: Xie Yongji <xieyongji@bytedance.com>
   8 *
   9 */
  10
  11#include <linux/init.h>
  12#include <linux/module.h>
  13#include <linux/cdev.h>
  14#include <linux/device.h>
  15#include <linux/eventfd.h>
  16#include <linux/slab.h>
  17#include <linux/wait.h>
  18#include <linux/dma-map-ops.h>
  19#include <linux/poll.h>
  20#include <linux/file.h>
  21#include <linux/uio.h>
  22#include <linux/vdpa.h>
  23#include <linux/nospec.h>
  24#include <uapi/linux/vduse.h>
  25#include <uapi/linux/vdpa.h>
  26#include <uapi/linux/virtio_config.h>
  27#include <uapi/linux/virtio_ids.h>
  28#include <uapi/linux/virtio_blk.h>
  29#include <linux/mod_devicetable.h>
  30
  31#include "iova_domain.h"
  32
  33#define DRV_AUTHOR   "Yongji Xie <xieyongji@bytedance.com>"
  34#define DRV_DESC     "vDPA Device in Userspace"
  35#define DRV_LICENSE  "GPL v2"
  36
  37#define VDUSE_DEV_MAX (1U << MINORBITS)
  38#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
  39#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
  40#define VDUSE_MSG_DEFAULT_TIMEOUT 30
  41
  42struct vduse_virtqueue {
  43        u16 index;
  44        u16 num_max;
  45        u32 num;
  46        u64 desc_addr;
  47        u64 driver_addr;
  48        u64 device_addr;
  49        struct vdpa_vq_state state;
  50        bool ready;
  51        bool kicked;
  52        spinlock_t kick_lock;
  53        spinlock_t irq_lock;
  54        struct eventfd_ctx *kickfd;
  55        struct vdpa_callback cb;
  56        struct work_struct inject;
  57        struct work_struct kick;
  58};
  59
  60struct vduse_dev;
  61
  62struct vduse_vdpa {
  63        struct vdpa_device vdpa;
  64        struct vduse_dev *dev;
  65};
  66
  67struct vduse_dev {
  68        struct vduse_vdpa *vdev;
  69        struct device *dev;
  70        struct vduse_virtqueue *vqs;
  71        struct vduse_iova_domain *domain;
  72        char *name;
  73        struct mutex lock;
  74        spinlock_t msg_lock;
  75        u64 msg_unique;
  76        u32 msg_timeout;
  77        wait_queue_head_t waitq;
  78        struct list_head send_list;
  79        struct list_head recv_list;
  80        struct vdpa_callback config_cb;
  81        struct work_struct inject;
  82        spinlock_t irq_lock;
  83        struct rw_semaphore rwsem;
  84        int minor;
  85        bool broken;
  86        bool connected;
  87        u64 api_version;
  88        u64 device_features;
  89        u64 driver_features;
  90        u32 device_id;
  91        u32 vendor_id;
  92        u32 generation;
  93        u32 config_size;
  94        void *config;
  95        u8 status;
  96        u32 vq_num;
  97        u32 vq_align;
  98};
  99
 100struct vduse_dev_msg {
 101        struct vduse_dev_request req;
 102        struct vduse_dev_response resp;
 103        struct list_head list;
 104        wait_queue_head_t waitq;
 105        bool completed;
 106};
 107
 108struct vduse_control {
 109        u64 api_version;
 110};
 111
 112static DEFINE_MUTEX(vduse_lock);
 113static DEFINE_IDR(vduse_idr);
 114
 115static dev_t vduse_major;
 116static struct class *vduse_class;
 117static struct cdev vduse_ctrl_cdev;
 118static struct cdev vduse_cdev;
 119static struct workqueue_struct *vduse_irq_wq;
 120
 121static u32 allowed_device_id[] = {
 122        VIRTIO_ID_BLOCK,
 123};
 124
 125static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
 126{
 127        struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
 128
 129        return vdev->dev;
 130}
 131
 132static inline struct vduse_dev *dev_to_vduse(struct device *dev)
 133{
 134        struct vdpa_device *vdpa = dev_to_vdpa(dev);
 135
 136        return vdpa_to_vduse(vdpa);
 137}
 138
 139static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
 140                                            uint32_t request_id)
 141{
 142        struct vduse_dev_msg *msg;
 143
 144        list_for_each_entry(msg, head, list) {
 145                if (msg->req.request_id == request_id) {
 146                        list_del(&msg->list);
 147                        return msg;
 148                }
 149        }
 150
 151        return NULL;
 152}
 153
 154static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
 155{
 156        struct vduse_dev_msg *msg = NULL;
 157
 158        if (!list_empty(head)) {
 159                msg = list_first_entry(head, struct vduse_dev_msg, list);
 160                list_del(&msg->list);
 161        }
 162
 163        return msg;
 164}
 165
 166static void vduse_enqueue_msg(struct list_head *head,
 167                              struct vduse_dev_msg *msg)
 168{
 169        list_add_tail(&msg->list, head);
 170}
 171
 172static void vduse_dev_broken(struct vduse_dev *dev)
 173{
 174        struct vduse_dev_msg *msg, *tmp;
 175
 176        if (unlikely(dev->broken))
 177                return;
 178
 179        list_splice_init(&dev->recv_list, &dev->send_list);
 180        list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
 181                list_del(&msg->list);
 182                msg->completed = 1;
 183                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 184                wake_up(&msg->waitq);
 185        }
 186        dev->broken = true;
 187        wake_up(&dev->waitq);
 188}
 189
 190static int vduse_dev_msg_sync(struct vduse_dev *dev,
 191                              struct vduse_dev_msg *msg)
 192{
 193        int ret;
 194
 195        if (unlikely(dev->broken))
 196                return -EIO;
 197
 198        init_waitqueue_head(&msg->waitq);
 199        spin_lock(&dev->msg_lock);
 200        if (unlikely(dev->broken)) {
 201                spin_unlock(&dev->msg_lock);
 202                return -EIO;
 203        }
 204        msg->req.request_id = dev->msg_unique++;
 205        vduse_enqueue_msg(&dev->send_list, msg);
 206        wake_up(&dev->waitq);
 207        spin_unlock(&dev->msg_lock);
 208        if (dev->msg_timeout)
 209                ret = wait_event_killable_timeout(msg->waitq, msg->completed,
 210                                                  (long)dev->msg_timeout * HZ);
 211        else
 212                ret = wait_event_killable(msg->waitq, msg->completed);
 213
 214        spin_lock(&dev->msg_lock);
 215        if (!msg->completed) {
 216                list_del(&msg->list);
 217                msg->resp.result = VDUSE_REQ_RESULT_FAILED;
 218                /* Mark the device as malfunction when there is a timeout */
 219                if (!ret)
 220                        vduse_dev_broken(dev);
 221        }
 222        ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
 223        spin_unlock(&dev->msg_lock);
 224
 225        return ret;
 226}
 227
 228static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
 229                                         struct vduse_virtqueue *vq,
 230                                         struct vdpa_vq_state_packed *packed)
 231{
 232        struct vduse_dev_msg msg = { 0 };
 233        int ret;
 234
 235        msg.req.type = VDUSE_GET_VQ_STATE;
 236        msg.req.vq_state.index = vq->index;
 237
 238        ret = vduse_dev_msg_sync(dev, &msg);
 239        if (ret)
 240                return ret;
 241
 242        packed->last_avail_counter =
 243                        msg.resp.vq_state.packed.last_avail_counter & 0x0001;
 244        packed->last_avail_idx =
 245                        msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
 246        packed->last_used_counter =
 247                        msg.resp.vq_state.packed.last_used_counter & 0x0001;
 248        packed->last_used_idx =
 249                        msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
 250
 251        return 0;
 252}
 253
 254static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
 255                                        struct vduse_virtqueue *vq,
 256                                        struct vdpa_vq_state_split *split)
 257{
 258        struct vduse_dev_msg msg = { 0 };
 259        int ret;
 260
 261        msg.req.type = VDUSE_GET_VQ_STATE;
 262        msg.req.vq_state.index = vq->index;
 263
 264        ret = vduse_dev_msg_sync(dev, &msg);
 265        if (ret)
 266                return ret;
 267
 268        split->avail_index = msg.resp.vq_state.split.avail_index;
 269
 270        return 0;
 271}
 272
 273static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
 274{
 275        struct vduse_dev_msg msg = { 0 };
 276
 277        msg.req.type = VDUSE_SET_STATUS;
 278        msg.req.s.status = status;
 279
 280        return vduse_dev_msg_sync(dev, &msg);
 281}
 282
 283static int vduse_dev_update_iotlb(struct vduse_dev *dev,
 284                                  u64 start, u64 last)
 285{
 286        struct vduse_dev_msg msg = { 0 };
 287
 288        if (last < start)
 289                return -EINVAL;
 290
 291        msg.req.type = VDUSE_UPDATE_IOTLB;
 292        msg.req.iova.start = start;
 293        msg.req.iova.last = last;
 294
 295        return vduse_dev_msg_sync(dev, &msg);
 296}
 297
 298static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 299{
 300        struct file *file = iocb->ki_filp;
 301        struct vduse_dev *dev = file->private_data;
 302        struct vduse_dev_msg *msg;
 303        int size = sizeof(struct vduse_dev_request);
 304        ssize_t ret;
 305
 306        if (iov_iter_count(to) < size)
 307                return -EINVAL;
 308
 309        spin_lock(&dev->msg_lock);
 310        while (1) {
 311                msg = vduse_dequeue_msg(&dev->send_list);
 312                if (msg)
 313                        break;
 314
 315                ret = -EAGAIN;
 316                if (file->f_flags & O_NONBLOCK)
 317                        goto unlock;
 318
 319                spin_unlock(&dev->msg_lock);
 320                ret = wait_event_interruptible_exclusive(dev->waitq,
 321                                        !list_empty(&dev->send_list));
 322                if (ret)
 323                        return ret;
 324
 325                spin_lock(&dev->msg_lock);
 326        }
 327        spin_unlock(&dev->msg_lock);
 328        ret = copy_to_iter(&msg->req, size, to);
 329        spin_lock(&dev->msg_lock);
 330        if (ret != size) {
 331                ret = -EFAULT;
 332                vduse_enqueue_msg(&dev->send_list, msg);
 333                goto unlock;
 334        }
 335        vduse_enqueue_msg(&dev->recv_list, msg);
 336unlock:
 337        spin_unlock(&dev->msg_lock);
 338
 339        return ret;
 340}
 341
 342static bool is_mem_zero(const char *ptr, int size)
 343{
 344        int i;
 345
 346        for (i = 0; i < size; i++) {
 347                if (ptr[i])
 348                        return false;
 349        }
 350        return true;
 351}
 352
 353static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 354{
 355        struct file *file = iocb->ki_filp;
 356        struct vduse_dev *dev = file->private_data;
 357        struct vduse_dev_response resp;
 358        struct vduse_dev_msg *msg;
 359        size_t ret;
 360
 361        ret = copy_from_iter(&resp, sizeof(resp), from);
 362        if (ret != sizeof(resp))
 363                return -EINVAL;
 364
 365        if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
 366                return -EINVAL;
 367
 368        spin_lock(&dev->msg_lock);
 369        msg = vduse_find_msg(&dev->recv_list, resp.request_id);
 370        if (!msg) {
 371                ret = -ENOENT;
 372                goto unlock;
 373        }
 374
 375        memcpy(&msg->resp, &resp, sizeof(resp));
 376        msg->completed = 1;
 377        wake_up(&msg->waitq);
 378unlock:
 379        spin_unlock(&dev->msg_lock);
 380
 381        return ret;
 382}
 383
 384static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
 385{
 386        struct vduse_dev *dev = file->private_data;
 387        __poll_t mask = 0;
 388
 389        poll_wait(file, &dev->waitq, wait);
 390
 391        spin_lock(&dev->msg_lock);
 392
 393        if (unlikely(dev->broken))
 394                mask |= EPOLLERR;
 395        if (!list_empty(&dev->send_list))
 396                mask |= EPOLLIN | EPOLLRDNORM;
 397        if (!list_empty(&dev->recv_list))
 398                mask |= EPOLLOUT | EPOLLWRNORM;
 399
 400        spin_unlock(&dev->msg_lock);
 401
 402        return mask;
 403}
 404
 405static void vduse_dev_reset(struct vduse_dev *dev)
 406{
 407        int i;
 408        struct vduse_iova_domain *domain = dev->domain;
 409
 410        /* The coherent mappings are handled in vduse_dev_free_coherent() */
 411        if (domain->bounce_map)
 412                vduse_domain_reset_bounce_map(domain);
 413
 414        down_write(&dev->rwsem);
 415
 416        dev->status = 0;
 417        dev->driver_features = 0;
 418        dev->generation++;
 419        spin_lock(&dev->irq_lock);
 420        dev->config_cb.callback = NULL;
 421        dev->config_cb.private = NULL;
 422        spin_unlock(&dev->irq_lock);
 423        flush_work(&dev->inject);
 424
 425        for (i = 0; i < dev->vq_num; i++) {
 426                struct vduse_virtqueue *vq = &dev->vqs[i];
 427
 428                vq->ready = false;
 429                vq->desc_addr = 0;
 430                vq->driver_addr = 0;
 431                vq->device_addr = 0;
 432                vq->num = 0;
 433                memset(&vq->state, 0, sizeof(vq->state));
 434
 435                spin_lock(&vq->kick_lock);
 436                vq->kicked = false;
 437                if (vq->kickfd)
 438                        eventfd_ctx_put(vq->kickfd);
 439                vq->kickfd = NULL;
 440                spin_unlock(&vq->kick_lock);
 441
 442                spin_lock(&vq->irq_lock);
 443                vq->cb.callback = NULL;
 444                vq->cb.private = NULL;
 445                spin_unlock(&vq->irq_lock);
 446                flush_work(&vq->inject);
 447                flush_work(&vq->kick);
 448        }
 449
 450        up_write(&dev->rwsem);
 451}
 452
 453static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
 454                                u64 desc_area, u64 driver_area,
 455                                u64 device_area)
 456{
 457        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 458        struct vduse_virtqueue *vq = &dev->vqs[idx];
 459
 460        vq->desc_addr = desc_area;
 461        vq->driver_addr = driver_area;
 462        vq->device_addr = device_area;
 463
 464        return 0;
 465}
 466
 467static void vduse_vq_kick(struct vduse_virtqueue *vq)
 468{
 469        spin_lock(&vq->kick_lock);
 470        if (!vq->ready)
 471                goto unlock;
 472
 473        if (vq->kickfd)
 474                eventfd_signal(vq->kickfd, 1);
 475        else
 476                vq->kicked = true;
 477unlock:
 478        spin_unlock(&vq->kick_lock);
 479}
 480
 481static void vduse_vq_kick_work(struct work_struct *work)
 482{
 483        struct vduse_virtqueue *vq = container_of(work,
 484                                        struct vduse_virtqueue, kick);
 485
 486        vduse_vq_kick(vq);
 487}
 488
 489static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
 490{
 491        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 492        struct vduse_virtqueue *vq = &dev->vqs[idx];
 493
 494        if (!eventfd_signal_allowed()) {
 495                schedule_work(&vq->kick);
 496                return;
 497        }
 498        vduse_vq_kick(vq);
 499}
 500
 501static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
 502                              struct vdpa_callback *cb)
 503{
 504        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 505        struct vduse_virtqueue *vq = &dev->vqs[idx];
 506
 507        spin_lock(&vq->irq_lock);
 508        vq->cb.callback = cb->callback;
 509        vq->cb.private = cb->private;
 510        spin_unlock(&vq->irq_lock);
 511}
 512
 513static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
 514{
 515        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 516        struct vduse_virtqueue *vq = &dev->vqs[idx];
 517
 518        vq->num = num;
 519}
 520
 521static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
 522                                        u16 idx, bool ready)
 523{
 524        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 525        struct vduse_virtqueue *vq = &dev->vqs[idx];
 526
 527        vq->ready = ready;
 528}
 529
 530static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
 531{
 532        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 533        struct vduse_virtqueue *vq = &dev->vqs[idx];
 534
 535        return vq->ready;
 536}
 537
 538static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
 539                                const struct vdpa_vq_state *state)
 540{
 541        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 542        struct vduse_virtqueue *vq = &dev->vqs[idx];
 543
 544        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
 545                vq->state.packed.last_avail_counter =
 546                                state->packed.last_avail_counter;
 547                vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
 548                vq->state.packed.last_used_counter =
 549                                state->packed.last_used_counter;
 550                vq->state.packed.last_used_idx = state->packed.last_used_idx;
 551        } else
 552                vq->state.split.avail_index = state->split.avail_index;
 553
 554        return 0;
 555}
 556
 557static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
 558                                struct vdpa_vq_state *state)
 559{
 560        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 561        struct vduse_virtqueue *vq = &dev->vqs[idx];
 562
 563        if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
 564                return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
 565
 566        return vduse_dev_get_vq_state_split(dev, vq, &state->split);
 567}
 568
 569static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
 570{
 571        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 572
 573        return dev->vq_align;
 574}
 575
 576static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
 577{
 578        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 579
 580        return dev->device_features;
 581}
 582
 583static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
 584{
 585        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 586
 587        dev->driver_features = features;
 588        return 0;
 589}
 590
 591static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
 592                                  struct vdpa_callback *cb)
 593{
 594        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 595
 596        spin_lock(&dev->irq_lock);
 597        dev->config_cb.callback = cb->callback;
 598        dev->config_cb.private = cb->private;
 599        spin_unlock(&dev->irq_lock);
 600}
 601
 602static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
 603{
 604        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 605        u16 num_max = 0;
 606        int i;
 607
 608        for (i = 0; i < dev->vq_num; i++)
 609                if (num_max < dev->vqs[i].num_max)
 610                        num_max = dev->vqs[i].num_max;
 611
 612        return num_max;
 613}
 614
 615static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
 616{
 617        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 618
 619        return dev->device_id;
 620}
 621
 622static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
 623{
 624        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 625
 626        return dev->vendor_id;
 627}
 628
 629static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
 630{
 631        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 632
 633        return dev->status;
 634}
 635
 636static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
 637{
 638        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 639
 640        if (vduse_dev_set_status(dev, status))
 641                return;
 642
 643        dev->status = status;
 644}
 645
 646static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
 647{
 648        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 649
 650        return dev->config_size;
 651}
 652
 653static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
 654                                  void *buf, unsigned int len)
 655{
 656        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 657
 658        if (len > dev->config_size - offset)
 659                return;
 660
 661        memcpy(buf, dev->config + offset, len);
 662}
 663
 664static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
 665                        const void *buf, unsigned int len)
 666{
 667        /* Now we only support read-only configuration space */
 668}
 669
 670static int vduse_vdpa_reset(struct vdpa_device *vdpa)
 671{
 672        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 673        int ret = vduse_dev_set_status(dev, 0);
 674
 675        vduse_dev_reset(dev);
 676
 677        return ret;
 678}
 679
 680static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
 681{
 682        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 683
 684        return dev->generation;
 685}
 686
 687static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
 688                                struct vhost_iotlb *iotlb)
 689{
 690        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 691        int ret;
 692
 693        ret = vduse_domain_set_map(dev->domain, iotlb);
 694        if (ret)
 695                return ret;
 696
 697        ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
 698        if (ret) {
 699                vduse_domain_clear_map(dev->domain, iotlb);
 700                return ret;
 701        }
 702
 703        return 0;
 704}
 705
 706static void vduse_vdpa_free(struct vdpa_device *vdpa)
 707{
 708        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
 709
 710        dev->vdev = NULL;
 711}
 712
 713static const struct vdpa_config_ops vduse_vdpa_config_ops = {
 714        .set_vq_address         = vduse_vdpa_set_vq_address,
 715        .kick_vq                = vduse_vdpa_kick_vq,
 716        .set_vq_cb              = vduse_vdpa_set_vq_cb,
 717        .set_vq_num             = vduse_vdpa_set_vq_num,
 718        .set_vq_ready           = vduse_vdpa_set_vq_ready,
 719        .get_vq_ready           = vduse_vdpa_get_vq_ready,
 720        .set_vq_state           = vduse_vdpa_set_vq_state,
 721        .get_vq_state           = vduse_vdpa_get_vq_state,
 722        .get_vq_align           = vduse_vdpa_get_vq_align,
 723        .get_features           = vduse_vdpa_get_features,
 724        .set_features           = vduse_vdpa_set_features,
 725        .set_config_cb          = vduse_vdpa_set_config_cb,
 726        .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
 727        .get_device_id          = vduse_vdpa_get_device_id,
 728        .get_vendor_id          = vduse_vdpa_get_vendor_id,
 729        .get_status             = vduse_vdpa_get_status,
 730        .set_status             = vduse_vdpa_set_status,
 731        .get_config_size        = vduse_vdpa_get_config_size,
 732        .get_config             = vduse_vdpa_get_config,
 733        .set_config             = vduse_vdpa_set_config,
 734        .get_generation         = vduse_vdpa_get_generation,
 735        .reset                  = vduse_vdpa_reset,
 736        .set_map                = vduse_vdpa_set_map,
 737        .free                   = vduse_vdpa_free,
 738};
 739
 740static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
 741                                     unsigned long offset, size_t size,
 742                                     enum dma_data_direction dir,
 743                                     unsigned long attrs)
 744{
 745        struct vduse_dev *vdev = dev_to_vduse(dev);
 746        struct vduse_iova_domain *domain = vdev->domain;
 747
 748        return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
 749}
 750
 751static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
 752                                size_t size, enum dma_data_direction dir,
 753                                unsigned long attrs)
 754{
 755        struct vduse_dev *vdev = dev_to_vduse(dev);
 756        struct vduse_iova_domain *domain = vdev->domain;
 757
 758        return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
 759}
 760
 761static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
 762                                        dma_addr_t *dma_addr, gfp_t flag,
 763                                        unsigned long attrs)
 764{
 765        struct vduse_dev *vdev = dev_to_vduse(dev);
 766        struct vduse_iova_domain *domain = vdev->domain;
 767        unsigned long iova;
 768        void *addr;
 769
 770        *dma_addr = DMA_MAPPING_ERROR;
 771        addr = vduse_domain_alloc_coherent(domain, size,
 772                                (dma_addr_t *)&iova, flag, attrs);
 773        if (!addr)
 774                return NULL;
 775
 776        *dma_addr = (dma_addr_t)iova;
 777
 778        return addr;
 779}
 780
 781static void vduse_dev_free_coherent(struct device *dev, size_t size,
 782                                        void *vaddr, dma_addr_t dma_addr,
 783                                        unsigned long attrs)
 784{
 785        struct vduse_dev *vdev = dev_to_vduse(dev);
 786        struct vduse_iova_domain *domain = vdev->domain;
 787
 788        vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
 789}
 790
 791static size_t vduse_dev_max_mapping_size(struct device *dev)
 792{
 793        struct vduse_dev *vdev = dev_to_vduse(dev);
 794        struct vduse_iova_domain *domain = vdev->domain;
 795
 796        return domain->bounce_size;
 797}
 798
 799static const struct dma_map_ops vduse_dev_dma_ops = {
 800        .map_page = vduse_dev_map_page,
 801        .unmap_page = vduse_dev_unmap_page,
 802        .alloc = vduse_dev_alloc_coherent,
 803        .free = vduse_dev_free_coherent,
 804        .max_mapping_size = vduse_dev_max_mapping_size,
 805};
 806
 807static unsigned int perm_to_file_flags(u8 perm)
 808{
 809        unsigned int flags = 0;
 810
 811        switch (perm) {
 812        case VDUSE_ACCESS_WO:
 813                flags |= O_WRONLY;
 814                break;
 815        case VDUSE_ACCESS_RO:
 816                flags |= O_RDONLY;
 817                break;
 818        case VDUSE_ACCESS_RW:
 819                flags |= O_RDWR;
 820                break;
 821        default:
 822                WARN(1, "invalidate vhost IOTLB permission\n");
 823                break;
 824        }
 825
 826        return flags;
 827}
 828
 829static int vduse_kickfd_setup(struct vduse_dev *dev,
 830                        struct vduse_vq_eventfd *eventfd)
 831{
 832        struct eventfd_ctx *ctx = NULL;
 833        struct vduse_virtqueue *vq;
 834        u32 index;
 835
 836        if (eventfd->index >= dev->vq_num)
 837                return -EINVAL;
 838
 839        index = array_index_nospec(eventfd->index, dev->vq_num);
 840        vq = &dev->vqs[index];
 841        if (eventfd->fd >= 0) {
 842                ctx = eventfd_ctx_fdget(eventfd->fd);
 843                if (IS_ERR(ctx))
 844                        return PTR_ERR(ctx);
 845        } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
 846                return 0;
 847
 848        spin_lock(&vq->kick_lock);
 849        if (vq->kickfd)
 850                eventfd_ctx_put(vq->kickfd);
 851        vq->kickfd = ctx;
 852        if (vq->ready && vq->kicked && vq->kickfd) {
 853                eventfd_signal(vq->kickfd, 1);
 854                vq->kicked = false;
 855        }
 856        spin_unlock(&vq->kick_lock);
 857
 858        return 0;
 859}
 860
 861static bool vduse_dev_is_ready(struct vduse_dev *dev)
 862{
 863        int i;
 864
 865        for (i = 0; i < dev->vq_num; i++)
 866                if (!dev->vqs[i].num_max)
 867                        return false;
 868
 869        return true;
 870}
 871
 872static void vduse_dev_irq_inject(struct work_struct *work)
 873{
 874        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 875
 876        spin_lock_irq(&dev->irq_lock);
 877        if (dev->config_cb.callback)
 878                dev->config_cb.callback(dev->config_cb.private);
 879        spin_unlock_irq(&dev->irq_lock);
 880}
 881
 882static void vduse_vq_irq_inject(struct work_struct *work)
 883{
 884        struct vduse_virtqueue *vq = container_of(work,
 885                                        struct vduse_virtqueue, inject);
 886
 887        spin_lock_irq(&vq->irq_lock);
 888        if (vq->ready && vq->cb.callback)
 889                vq->cb.callback(vq->cb.private);
 890        spin_unlock_irq(&vq->irq_lock);
 891}
 892
 893static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
 894                                    struct work_struct *irq_work)
 895{
 896        int ret = -EINVAL;
 897
 898        down_read(&dev->rwsem);
 899        if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
 900                goto unlock;
 901
 902        ret = 0;
 903        queue_work(vduse_irq_wq, irq_work);
 904unlock:
 905        up_read(&dev->rwsem);
 906
 907        return ret;
 908}
 909
 910static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 911                            unsigned long arg)
 912{
 913        struct vduse_dev *dev = file->private_data;
 914        void __user *argp = (void __user *)arg;
 915        int ret;
 916
 917        if (unlikely(dev->broken))
 918                return -EPERM;
 919
 920        switch (cmd) {
 921        case VDUSE_IOTLB_GET_FD: {
 922                struct vduse_iotlb_entry entry;
 923                struct vhost_iotlb_map *map;
 924                struct vdpa_map_file *map_file;
 925                struct vduse_iova_domain *domain = dev->domain;
 926                struct file *f = NULL;
 927
 928                ret = -EFAULT;
 929                if (copy_from_user(&entry, argp, sizeof(entry)))
 930                        break;
 931
 932                ret = -EINVAL;
 933                if (entry.start > entry.last)
 934                        break;
 935
 936                spin_lock(&domain->iotlb_lock);
 937                map = vhost_iotlb_itree_first(domain->iotlb,
 938                                              entry.start, entry.last);
 939                if (map) {
 940                        map_file = (struct vdpa_map_file *)map->opaque;
 941                        f = get_file(map_file->file);
 942                        entry.offset = map_file->offset;
 943                        entry.start = map->start;
 944                        entry.last = map->last;
 945                        entry.perm = map->perm;
 946                }
 947                spin_unlock(&domain->iotlb_lock);
 948                ret = -EINVAL;
 949                if (!f)
 950                        break;
 951
 952                ret = -EFAULT;
 953                if (copy_to_user(argp, &entry, sizeof(entry))) {
 954                        fput(f);
 955                        break;
 956                }
 957                ret = receive_fd(f, perm_to_file_flags(entry.perm));
 958                fput(f);
 959                break;
 960        }
 961        case VDUSE_DEV_GET_FEATURES:
 962                /*
 963                 * Just mirror what driver wrote here.
 964                 * The driver is expected to check FEATURE_OK later.
 965                 */
 966                ret = put_user(dev->driver_features, (u64 __user *)argp);
 967                break;
 968        case VDUSE_DEV_SET_CONFIG: {
 969                struct vduse_config_data config;
 970                unsigned long size = offsetof(struct vduse_config_data,
 971                                              buffer);
 972
 973                ret = -EFAULT;
 974                if (copy_from_user(&config, argp, size))
 975                        break;
 976
 977                ret = -EINVAL;
 978                if (config.length == 0 ||
 979                    config.length > dev->config_size - config.offset)
 980                        break;
 981
 982                ret = -EFAULT;
 983                if (copy_from_user(dev->config + config.offset, argp + size,
 984                                   config.length))
 985                        break;
 986
 987                ret = 0;
 988                break;
 989        }
 990        case VDUSE_DEV_INJECT_CONFIG_IRQ:
 991                ret = vduse_dev_queue_irq_work(dev, &dev->inject);
 992                break;
 993        case VDUSE_VQ_SETUP: {
 994                struct vduse_vq_config config;
 995                u32 index;
 996
 997                ret = -EFAULT;
 998                if (copy_from_user(&config, argp, sizeof(config)))
 999                        break;
1000
1001                ret = -EINVAL;
1002                if (config.index >= dev->vq_num)
1003                        break;
1004
1005                if (!is_mem_zero((const char *)config.reserved,
1006                                 sizeof(config.reserved)))
1007                        break;
1008
1009                index = array_index_nospec(config.index, dev->vq_num);
1010                dev->vqs[index].num_max = config.max_size;
1011                ret = 0;
1012                break;
1013        }
1014        case VDUSE_VQ_GET_INFO: {
1015                struct vduse_vq_info vq_info;
1016                struct vduse_virtqueue *vq;
1017                u32 index;
1018
1019                ret = -EFAULT;
1020                if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
1021                        break;
1022
1023                ret = -EINVAL;
1024                if (vq_info.index >= dev->vq_num)
1025                        break;
1026
1027                index = array_index_nospec(vq_info.index, dev->vq_num);
1028                vq = &dev->vqs[index];
1029                vq_info.desc_addr = vq->desc_addr;
1030                vq_info.driver_addr = vq->driver_addr;
1031                vq_info.device_addr = vq->device_addr;
1032                vq_info.num = vq->num;
1033
1034                if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
1035                        vq_info.packed.last_avail_counter =
1036                                vq->state.packed.last_avail_counter;
1037                        vq_info.packed.last_avail_idx =
1038                                vq->state.packed.last_avail_idx;
1039                        vq_info.packed.last_used_counter =
1040                                vq->state.packed.last_used_counter;
1041                        vq_info.packed.last_used_idx =
1042                                vq->state.packed.last_used_idx;
1043                } else
1044                        vq_info.split.avail_index =
1045                                vq->state.split.avail_index;
1046
1047                vq_info.ready = vq->ready;
1048
1049                ret = -EFAULT;
1050                if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
1051                        break;
1052
1053                ret = 0;
1054                break;
1055        }
1056        case VDUSE_VQ_SETUP_KICKFD: {
1057                struct vduse_vq_eventfd eventfd;
1058
1059                ret = -EFAULT;
1060                if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
1061                        break;
1062
1063                ret = vduse_kickfd_setup(dev, &eventfd);
1064                break;
1065        }
1066        case VDUSE_VQ_INJECT_IRQ: {
1067                u32 index;
1068
1069                ret = -EFAULT;
1070                if (get_user(index, (u32 __user *)argp))
1071                        break;
1072
1073                ret = -EINVAL;
1074                if (index >= dev->vq_num)
1075                        break;
1076
1077                index = array_index_nospec(index, dev->vq_num);
1078                ret = vduse_dev_queue_irq_work(dev, &dev->vqs[index].inject);
1079                break;
1080        }
1081        default:
1082                ret = -ENOIOCTLCMD;
1083                break;
1084        }
1085
1086        return ret;
1087}
1088
1089static int vduse_dev_release(struct inode *inode, struct file *file)
1090{
1091        struct vduse_dev *dev = file->private_data;
1092
1093        spin_lock(&dev->msg_lock);
1094        /* Make sure the inflight messages can processed after reconncection */
1095        list_splice_init(&dev->recv_list, &dev->send_list);
1096        spin_unlock(&dev->msg_lock);
1097        dev->connected = false;
1098
1099        return 0;
1100}
1101
1102static struct vduse_dev *vduse_dev_get_from_minor(int minor)
1103{
1104        struct vduse_dev *dev;
1105
1106        mutex_lock(&vduse_lock);
1107        dev = idr_find(&vduse_idr, minor);
1108        mutex_unlock(&vduse_lock);
1109
1110        return dev;
1111}
1112
1113static int vduse_dev_open(struct inode *inode, struct file *file)
1114{
1115        int ret;
1116        struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
1117
1118        if (!dev)
1119                return -ENODEV;
1120
1121        ret = -EBUSY;
1122        mutex_lock(&dev->lock);
1123        if (dev->connected)
1124                goto unlock;
1125
1126        ret = 0;
1127        dev->connected = true;
1128        file->private_data = dev;
1129unlock:
1130        mutex_unlock(&dev->lock);
1131
1132        return ret;
1133}
1134
1135static const struct file_operations vduse_dev_fops = {
1136        .owner          = THIS_MODULE,
1137        .open           = vduse_dev_open,
1138        .release        = vduse_dev_release,
1139        .read_iter      = vduse_dev_read_iter,
1140        .write_iter     = vduse_dev_write_iter,
1141        .poll           = vduse_dev_poll,
1142        .unlocked_ioctl = vduse_dev_ioctl,
1143        .compat_ioctl   = compat_ptr_ioctl,
1144        .llseek         = noop_llseek,
1145};
1146
1147static struct vduse_dev *vduse_dev_create(void)
1148{
1149        struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1150
1151        if (!dev)
1152                return NULL;
1153
1154        mutex_init(&dev->lock);
1155        spin_lock_init(&dev->msg_lock);
1156        INIT_LIST_HEAD(&dev->send_list);
1157        INIT_LIST_HEAD(&dev->recv_list);
1158        spin_lock_init(&dev->irq_lock);
1159        init_rwsem(&dev->rwsem);
1160
1161        INIT_WORK(&dev->inject, vduse_dev_irq_inject);
1162        init_waitqueue_head(&dev->waitq);
1163
1164        return dev;
1165}
1166
1167static void vduse_dev_destroy(struct vduse_dev *dev)
1168{
1169        kfree(dev);
1170}
1171
1172static struct vduse_dev *vduse_find_dev(const char *name)
1173{
1174        struct vduse_dev *dev;
1175        int id;
1176
1177        idr_for_each_entry(&vduse_idr, dev, id)
1178                if (!strcmp(dev->name, name))
1179                        return dev;
1180
1181        return NULL;
1182}
1183
1184static int vduse_destroy_dev(char *name)
1185{
1186        struct vduse_dev *dev = vduse_find_dev(name);
1187
1188        if (!dev)
1189                return -EINVAL;
1190
1191        mutex_lock(&dev->lock);
1192        if (dev->vdev || dev->connected) {
1193                mutex_unlock(&dev->lock);
1194                return -EBUSY;
1195        }
1196        dev->connected = true;
1197        mutex_unlock(&dev->lock);
1198
1199        vduse_dev_reset(dev);
1200        device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
1201        idr_remove(&vduse_idr, dev->minor);
1202        kvfree(dev->config);
1203        kfree(dev->vqs);
1204        vduse_domain_destroy(dev->domain);
1205        kfree(dev->name);
1206        vduse_dev_destroy(dev);
1207        module_put(THIS_MODULE);
1208
1209        return 0;
1210}
1211
1212static bool device_is_allowed(u32 device_id)
1213{
1214        int i;
1215
1216        for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
1217                if (allowed_device_id[i] == device_id)
1218                        return true;
1219
1220        return false;
1221}
1222
1223static bool features_is_valid(u64 features)
1224{
1225        if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
1226                return false;
1227
1228        /* Now we only support read-only configuration space */
1229        if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
1230                return false;
1231
1232        return true;
1233}
1234
1235static bool vduse_validate_config(struct vduse_dev_config *config)
1236{
1237        if (!is_mem_zero((const char *)config->reserved,
1238                         sizeof(config->reserved)))
1239                return false;
1240
1241        if (config->vq_align > PAGE_SIZE)
1242                return false;
1243
1244        if (config->config_size > PAGE_SIZE)
1245                return false;
1246
1247        if (!device_is_allowed(config->device_id))
1248                return false;
1249
1250        if (!features_is_valid(config->features))
1251                return false;
1252
1253        return true;
1254}
1255
1256static ssize_t msg_timeout_show(struct device *device,
1257                                struct device_attribute *attr, char *buf)
1258{
1259        struct vduse_dev *dev = dev_get_drvdata(device);
1260
1261        return sysfs_emit(buf, "%u\n", dev->msg_timeout);
1262}
1263
1264static ssize_t msg_timeout_store(struct device *device,
1265                                 struct device_attribute *attr,
1266                                 const char *buf, size_t count)
1267{
1268        struct vduse_dev *dev = dev_get_drvdata(device);
1269        int ret;
1270
1271        ret = kstrtouint(buf, 10, &dev->msg_timeout);
1272        if (ret < 0)
1273                return ret;
1274
1275        return count;
1276}
1277
1278static DEVICE_ATTR_RW(msg_timeout);
1279
1280static struct attribute *vduse_dev_attrs[] = {
1281        &dev_attr_msg_timeout.attr,
1282        NULL
1283};
1284
1285ATTRIBUTE_GROUPS(vduse_dev);
1286
1287static int vduse_create_dev(struct vduse_dev_config *config,
1288                            void *config_buf, u64 api_version)
1289{
1290        int i, ret;
1291        struct vduse_dev *dev;
1292
1293        ret = -EEXIST;
1294        if (vduse_find_dev(config->name))
1295                goto err;
1296
1297        ret = -ENOMEM;
1298        dev = vduse_dev_create();
1299        if (!dev)
1300                goto err;
1301
1302        dev->api_version = api_version;
1303        dev->device_features = config->features;
1304        dev->device_id = config->device_id;
1305        dev->vendor_id = config->vendor_id;
1306        dev->name = kstrdup(config->name, GFP_KERNEL);
1307        if (!dev->name)
1308                goto err_str;
1309
1310        dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
1311                                          VDUSE_BOUNCE_SIZE);
1312        if (!dev->domain)
1313                goto err_domain;
1314
1315        dev->config = config_buf;
1316        dev->config_size = config->config_size;
1317        dev->vq_align = config->vq_align;
1318        dev->vq_num = config->vq_num;
1319        dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
1320        if (!dev->vqs)
1321                goto err_vqs;
1322
1323        for (i = 0; i < dev->vq_num; i++) {
1324                dev->vqs[i].index = i;
1325                INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
1326                INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
1327                spin_lock_init(&dev->vqs[i].kick_lock);
1328                spin_lock_init(&dev->vqs[i].irq_lock);
1329        }
1330
1331        ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
1332        if (ret < 0)
1333                goto err_idr;
1334
1335        dev->minor = ret;
1336        dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
1337        dev->dev = device_create(vduse_class, NULL,
1338                                 MKDEV(MAJOR(vduse_major), dev->minor),
1339                                 dev, "%s", config->name);
1340        if (IS_ERR(dev->dev)) {
1341                ret = PTR_ERR(dev->dev);
1342                goto err_dev;
1343        }
1344        __module_get(THIS_MODULE);
1345
1346        return 0;
1347err_dev:
1348        idr_remove(&vduse_idr, dev->minor);
1349err_idr:
1350        kfree(dev->vqs);
1351err_vqs:
1352        vduse_domain_destroy(dev->domain);
1353err_domain:
1354        kfree(dev->name);
1355err_str:
1356        vduse_dev_destroy(dev);
1357err:
1358        kvfree(config_buf);
1359        return ret;
1360}
1361
1362static long vduse_ioctl(struct file *file, unsigned int cmd,
1363                        unsigned long arg)
1364{
1365        int ret;
1366        void __user *argp = (void __user *)arg;
1367        struct vduse_control *control = file->private_data;
1368
1369        mutex_lock(&vduse_lock);
1370        switch (cmd) {
1371        case VDUSE_GET_API_VERSION:
1372                ret = put_user(control->api_version, (u64 __user *)argp);
1373                break;
1374        case VDUSE_SET_API_VERSION: {
1375                u64 api_version;
1376
1377                ret = -EFAULT;
1378                if (get_user(api_version, (u64 __user *)argp))
1379                        break;
1380
1381                ret = -EINVAL;
1382                if (api_version > VDUSE_API_VERSION)
1383                        break;
1384
1385                ret = 0;
1386                control->api_version = api_version;
1387                break;
1388        }
1389        case VDUSE_CREATE_DEV: {
1390                struct vduse_dev_config config;
1391                unsigned long size = offsetof(struct vduse_dev_config, config);
1392                void *buf;
1393
1394                ret = -EFAULT;
1395                if (copy_from_user(&config, argp, size))
1396                        break;
1397
1398                ret = -EINVAL;
1399                if (vduse_validate_config(&config) == false)
1400                        break;
1401
1402                buf = vmemdup_user(argp + size, config.config_size);
1403                if (IS_ERR(buf)) {
1404                        ret = PTR_ERR(buf);
1405                        break;
1406                }
1407                config.name[VDUSE_NAME_MAX - 1] = '\0';
1408                ret = vduse_create_dev(&config, buf, control->api_version);
1409                break;
1410        }
1411        case VDUSE_DESTROY_DEV: {
1412                char name[VDUSE_NAME_MAX];
1413
1414                ret = -EFAULT;
1415                if (copy_from_user(name, argp, VDUSE_NAME_MAX))
1416                        break;
1417
1418                name[VDUSE_NAME_MAX - 1] = '\0';
1419                ret = vduse_destroy_dev(name);
1420                break;
1421        }
1422        default:
1423                ret = -EINVAL;
1424                break;
1425        }
1426        mutex_unlock(&vduse_lock);
1427
1428        return ret;
1429}
1430
1431static int vduse_release(struct inode *inode, struct file *file)
1432{
1433        struct vduse_control *control = file->private_data;
1434
1435        kfree(control);
1436        return 0;
1437}
1438
1439static int vduse_open(struct inode *inode, struct file *file)
1440{
1441        struct vduse_control *control;
1442
1443        control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
1444        if (!control)
1445                return -ENOMEM;
1446
1447        control->api_version = VDUSE_API_VERSION;
1448        file->private_data = control;
1449
1450        return 0;
1451}
1452
1453static const struct file_operations vduse_ctrl_fops = {
1454        .owner          = THIS_MODULE,
1455        .open           = vduse_open,
1456        .release        = vduse_release,
1457        .unlocked_ioctl = vduse_ioctl,
1458        .compat_ioctl   = compat_ptr_ioctl,
1459        .llseek         = noop_llseek,
1460};
1461
1462static char *vduse_devnode(struct device *dev, umode_t *mode)
1463{
1464        return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
1465}
1466
1467static void vduse_mgmtdev_release(struct device *dev)
1468{
1469}
1470
1471static struct device vduse_mgmtdev = {
1472        .init_name = "vduse",
1473        .release = vduse_mgmtdev_release,
1474};
1475
1476static struct vdpa_mgmt_dev mgmt_dev;
1477
1478static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
1479{
1480        struct vduse_vdpa *vdev;
1481        int ret;
1482
1483        if (dev->vdev)
1484                return -EEXIST;
1485
1486        vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
1487                                 &vduse_vdpa_config_ops, name, true);
1488        if (IS_ERR(vdev))
1489                return PTR_ERR(vdev);
1490
1491        dev->vdev = vdev;
1492        vdev->dev = dev;
1493        vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
1494        ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
1495        if (ret) {
1496                put_device(&vdev->vdpa.dev);
1497                return ret;
1498        }
1499        set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
1500        vdev->vdpa.dma_dev = &vdev->vdpa.dev;
1501        vdev->vdpa.mdev = &mgmt_dev;
1502
1503        return 0;
1504}
1505
1506static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
1507{
1508        struct vduse_dev *dev;
1509        int ret;
1510
1511        mutex_lock(&vduse_lock);
1512        dev = vduse_find_dev(name);
1513        if (!dev || !vduse_dev_is_ready(dev)) {
1514                mutex_unlock(&vduse_lock);
1515                return -EINVAL;
1516        }
1517        ret = vduse_dev_init_vdpa(dev, name);
1518        mutex_unlock(&vduse_lock);
1519        if (ret)
1520                return ret;
1521
1522        ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
1523        if (ret) {
1524                put_device(&dev->vdev->vdpa.dev);
1525                return ret;
1526        }
1527
1528        return 0;
1529}
1530
1531static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
1532{
1533        _vdpa_unregister_device(dev);
1534}
1535
1536static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
1537        .dev_add = vdpa_dev_add,
1538        .dev_del = vdpa_dev_del,
1539};
1540
1541static struct virtio_device_id id_table[] = {
1542        { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
1543        { 0 },
1544};
1545
1546static struct vdpa_mgmt_dev mgmt_dev = {
1547        .device = &vduse_mgmtdev,
1548        .id_table = id_table,
1549        .ops = &vdpa_dev_mgmtdev_ops,
1550};
1551
1552static int vduse_mgmtdev_init(void)
1553{
1554        int ret;
1555
1556        ret = device_register(&vduse_mgmtdev);
1557        if (ret)
1558                return ret;
1559
1560        ret = vdpa_mgmtdev_register(&mgmt_dev);
1561        if (ret)
1562                goto err;
1563
1564        return 0;
1565err:
1566        device_unregister(&vduse_mgmtdev);
1567        return ret;
1568}
1569
1570static void vduse_mgmtdev_exit(void)
1571{
1572        vdpa_mgmtdev_unregister(&mgmt_dev);
1573        device_unregister(&vduse_mgmtdev);
1574}
1575
1576static int vduse_init(void)
1577{
1578        int ret;
1579        struct device *dev;
1580
1581        vduse_class = class_create(THIS_MODULE, "vduse");
1582        if (IS_ERR(vduse_class))
1583                return PTR_ERR(vduse_class);
1584
1585        vduse_class->devnode = vduse_devnode;
1586        vduse_class->dev_groups = vduse_dev_groups;
1587
1588        ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
1589        if (ret)
1590                goto err_chardev_region;
1591
1592        /* /dev/vduse/control */
1593        cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
1594        vduse_ctrl_cdev.owner = THIS_MODULE;
1595        ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
1596        if (ret)
1597                goto err_ctrl_cdev;
1598
1599        dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
1600        if (IS_ERR(dev)) {
1601                ret = PTR_ERR(dev);
1602                goto err_device;
1603        }
1604
1605        /* /dev/vduse/$DEVICE */
1606        cdev_init(&vduse_cdev, &vduse_dev_fops);
1607        vduse_cdev.owner = THIS_MODULE;
1608        ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
1609                       VDUSE_DEV_MAX - 1);
1610        if (ret)
1611                goto err_cdev;
1612
1613        vduse_irq_wq = alloc_workqueue("vduse-irq",
1614                                WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
1615        if (!vduse_irq_wq) {
1616                ret = -ENOMEM;
1617                goto err_wq;
1618        }
1619
1620        ret = vduse_domain_init();
1621        if (ret)
1622                goto err_domain;
1623
1624        ret = vduse_mgmtdev_init();
1625        if (ret)
1626                goto err_mgmtdev;
1627
1628        return 0;
1629err_mgmtdev:
1630        vduse_domain_exit();
1631err_domain:
1632        destroy_workqueue(vduse_irq_wq);
1633err_wq:
1634        cdev_del(&vduse_cdev);
1635err_cdev:
1636        device_destroy(vduse_class, vduse_major);
1637err_device:
1638        cdev_del(&vduse_ctrl_cdev);
1639err_ctrl_cdev:
1640        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1641err_chardev_region:
1642        class_destroy(vduse_class);
1643        return ret;
1644}
1645module_init(vduse_init);
1646
1647static void vduse_exit(void)
1648{
1649        vduse_mgmtdev_exit();
1650        vduse_domain_exit();
1651        destroy_workqueue(vduse_irq_wq);
1652        cdev_del(&vduse_cdev);
1653        device_destroy(vduse_class, vduse_major);
1654        cdev_del(&vduse_ctrl_cdev);
1655        unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
1656        class_destroy(vduse_class);
1657}
1658module_exit(vduse_exit);
1659
1660MODULE_LICENSE(DRV_LICENSE);
1661MODULE_AUTHOR(DRV_AUTHOR);
1662MODULE_DESCRIPTION(DRV_DESC);
1663