linux/drivers/staging/android/vsoc.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * drivers/android/staging/vsoc.c
   4 *
   5 * Android Virtual System on a Chip (VSoC) driver
   6 *
   7 * Copyright (C) 2017 Google, Inc.
   8 *
   9 * Author: ghartman@google.com
  10 *
  11 * Based on drivers/char/kvm_ivshmem.c - driver for KVM Inter-VM shared memory
  12 *         Copyright 2009 Cam Macdonell <cam@cs.ualberta.ca>
  13 *
  14 * Based on cirrusfb.c and 8139cp.c:
  15 *   Copyright 1999-2001 Jeff Garzik
  16 *   Copyright 2001-2004 Jeff Garzik
  17 */
  18
  19#include <linux/dma-mapping.h>
  20#include <linux/freezer.h>
  21#include <linux/futex.h>
  22#include <linux/init.h>
  23#include <linux/kernel.h>
  24#include <linux/module.h>
  25#include <linux/mutex.h>
  26#include <linux/pci.h>
  27#include <linux/proc_fs.h>
  28#include <linux/sched.h>
  29#include <linux/syscalls.h>
  30#include <linux/uaccess.h>
  31#include <linux/interrupt.h>
  32#include <linux/cdev.h>
  33#include <linux/file.h>
  34#include "uapi/vsoc_shm.h"
  35
  36#define VSOC_DEV_NAME "vsoc"
  37
  38/*
  39 * Description of the ivshmem-doorbell PCI device used by QEmu. These
  40 * constants follow docs/specs/ivshmem-spec.txt, which can be found in
  41 * the QEmu repository. This was last reconciled with the version that
  42 * came out with 2.8
  43 */
  44
  45/*
  46 * These constants are determined KVM Inter-VM shared memory device
  47 * register offsets
  48 */
  49enum {
  50        INTR_MASK = 0x00,       /* Interrupt Mask */
  51        INTR_STATUS = 0x04,     /* Interrupt Status */
  52        IV_POSITION = 0x08,     /* VM ID */
  53        DOORBELL = 0x0c,        /* Doorbell */
  54};
  55
  56static const int REGISTER_BAR;  /* Equal to 0 */
  57static const int MAX_REGISTER_BAR_LEN = 0x100;
  58/*
  59 * The MSI-x BAR is not used directly.
  60 *
  61 * static const int MSI_X_BAR = 1;
  62 */
  63static const int SHARED_MEMORY_BAR = 2;
  64
  65struct vsoc_region_data {
  66        char name[VSOC_DEVICE_NAME_SZ + 1];
  67        wait_queue_head_t interrupt_wait_queue;
  68        /* TODO(b/73664181): Use multiple futex wait queues */
  69        wait_queue_head_t futex_wait_queue;
  70        /* Flag indicating that an interrupt has been signalled by the host. */
  71        atomic_t *incoming_signalled;
  72        /* Flag indicating the guest has signalled the host. */
  73        atomic_t *outgoing_signalled;
  74        bool irq_requested;
  75        bool device_created;
  76};
  77
  78struct vsoc_device {
  79        /* Kernel virtual address of REGISTER_BAR. */
  80        void __iomem *regs;
  81        /* Physical address of SHARED_MEMORY_BAR. */
  82        phys_addr_t shm_phys_start;
  83        /* Kernel virtual address of SHARED_MEMORY_BAR. */
  84        void __iomem *kernel_mapped_shm;
  85        /* Size of the entire shared memory window in bytes. */
  86        size_t shm_size;
  87        /*
  88         * Pointer to the virtual address of the shared memory layout structure.
  89         * This is probably identical to kernel_mapped_shm, but saving this
  90         * here saves a lot of annoying casts.
  91         */
  92        struct vsoc_shm_layout_descriptor *layout;
  93        /*
  94         * Points to a table of region descriptors in the kernel's virtual
  95         * address space. Calculated from
  96         * vsoc_shm_layout_descriptor.vsoc_region_desc_offset
  97         */
  98        struct vsoc_device_region *regions;
  99        /* Head of a list of permissions that have been granted. */
 100        struct list_head permissions;
 101        struct pci_dev *dev;
 102        /* Per-region (and therefore per-interrupt) information. */
 103        struct vsoc_region_data *regions_data;
 104        /*
 105         * Table of msi-x entries. This has to be separated from struct
 106         * vsoc_region_data because the kernel deals with them as an array.
 107         */
 108        struct msix_entry *msix_entries;
 109        /* Mutex that protectes the permission list */
 110        struct mutex mtx;
 111        /* Major number assigned by the kernel */
 112        int major;
 113        /* Character device assigned by the kernel */
 114        struct cdev cdev;
 115        /* Device class assigned by the kernel */
 116        struct class *class;
 117        /*
 118         * Flags that indicate what we've initialized. These are used to do an
 119         * orderly cleanup of the device.
 120         */
 121        bool enabled_device;
 122        bool requested_regions;
 123        bool cdev_added;
 124        bool class_added;
 125        bool msix_enabled;
 126};
 127
 128static struct vsoc_device vsoc_dev;
 129
 130/*
 131 * TODO(ghartman): Add a /sys filesystem entry that summarizes the permissions.
 132 */
 133
 134struct fd_scoped_permission_node {
 135        struct fd_scoped_permission permission;
 136        struct list_head list;
 137};
 138
 139struct vsoc_private_data {
 140        struct fd_scoped_permission_node *fd_scoped_permission_node;
 141};
 142
 143static long vsoc_ioctl(struct file *, unsigned int, unsigned long);
 144static int vsoc_mmap(struct file *, struct vm_area_struct *);
 145static int vsoc_open(struct inode *, struct file *);
 146static int vsoc_release(struct inode *, struct file *);
 147static ssize_t vsoc_read(struct file *, char __user *, size_t, loff_t *);
 148static ssize_t vsoc_write(struct file *, const char __user *, size_t, loff_t *);
 149static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin);
 150static int
 151do_create_fd_scoped_permission(struct vsoc_device_region *region_p,
 152                               struct fd_scoped_permission_node *np,
 153                               struct fd_scoped_permission_arg __user *arg);
 154static void
 155do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p,
 156                                struct fd_scoped_permission *perm);
 157static long do_vsoc_describe_region(struct file *,
 158                                    struct vsoc_device_region __user *);
 159static ssize_t vsoc_get_area(struct file *filp, __u32 *perm_off);
 160
 161/**
 162 * Validate arguments on entry points to the driver.
 163 */
 164inline int vsoc_validate_inode(struct inode *inode)
 165{
 166        if (iminor(inode) >= vsoc_dev.layout->region_count) {
 167                dev_err(&vsoc_dev.dev->dev,
 168                        "describe_region: invalid region %d\n", iminor(inode));
 169                return -ENODEV;
 170        }
 171        return 0;
 172}
 173
 174inline int vsoc_validate_filep(struct file *filp)
 175{
 176        int ret = vsoc_validate_inode(file_inode(filp));
 177
 178        if (ret)
 179                return ret;
 180        if (!filp->private_data) {
 181                dev_err(&vsoc_dev.dev->dev,
 182                        "No private data on fd, region %d\n",
 183                        iminor(file_inode(filp)));
 184                return -EBADFD;
 185        }
 186        return 0;
 187}
 188
 189/* Converts from shared memory offset to virtual address */
 190static inline void *shm_off_to_virtual_addr(__u32 offset)
 191{
 192        return (void __force *)vsoc_dev.kernel_mapped_shm + offset;
 193}
 194
 195/* Converts from shared memory offset to physical address */
 196static inline phys_addr_t shm_off_to_phys_addr(__u32 offset)
 197{
 198        return vsoc_dev.shm_phys_start + offset;
 199}
 200
 201/**
 202 * Convenience functions to obtain the region from the inode or file.
 203 * Dangerous to call before validating the inode/file.
 204 */
 205static
 206inline struct vsoc_device_region *vsoc_region_from_inode(struct inode *inode)
 207{
 208        return &vsoc_dev.regions[iminor(inode)];
 209}
 210
 211static
 212inline struct vsoc_device_region *vsoc_region_from_filep(struct file *inode)
 213{
 214        return vsoc_region_from_inode(file_inode(inode));
 215}
 216
 217static inline uint32_t vsoc_device_region_size(struct vsoc_device_region *r)
 218{
 219        return r->region_end_offset - r->region_begin_offset;
 220}
 221
 222static const struct file_operations vsoc_ops = {
 223        .owner = THIS_MODULE,
 224        .open = vsoc_open,
 225        .mmap = vsoc_mmap,
 226        .read = vsoc_read,
 227        .unlocked_ioctl = vsoc_ioctl,
 228        .compat_ioctl = vsoc_ioctl,
 229        .write = vsoc_write,
 230        .llseek = vsoc_lseek,
 231        .release = vsoc_release,
 232};
 233
 234static struct pci_device_id vsoc_id_table[] = {
 235        {0x1af4, 0x1110, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
 236        {0},
 237};
 238
 239MODULE_DEVICE_TABLE(pci, vsoc_id_table);
 240
 241static void vsoc_remove_device(struct pci_dev *pdev);
 242static int vsoc_probe_device(struct pci_dev *pdev,
 243                             const struct pci_device_id *ent);
 244
 245static struct pci_driver vsoc_pci_driver = {
 246        .name = "vsoc",
 247        .id_table = vsoc_id_table,
 248        .probe = vsoc_probe_device,
 249        .remove = vsoc_remove_device,
 250};
 251
 252static int
 253do_create_fd_scoped_permission(struct vsoc_device_region *region_p,
 254                               struct fd_scoped_permission_node *np,
 255                               struct fd_scoped_permission_arg __user *arg)
 256{
 257        struct file *managed_filp;
 258        s32 managed_fd;
 259        atomic_t *owner_ptr = NULL;
 260        struct vsoc_device_region *managed_region_p;
 261
 262        if (copy_from_user(&np->permission,
 263                           &arg->perm, sizeof(np->permission)) ||
 264            copy_from_user(&managed_fd,
 265                           &arg->managed_region_fd, sizeof(managed_fd))) {
 266                return -EFAULT;
 267        }
 268        managed_filp = fdget(managed_fd).file;
 269        /* Check that it's a valid fd, */
 270        if (!managed_filp || vsoc_validate_filep(managed_filp))
 271                return -EPERM;
 272        /* EEXIST if the given fd already has a permission. */
 273        if (((struct vsoc_private_data *)managed_filp->private_data)->
 274            fd_scoped_permission_node)
 275                return -EEXIST;
 276        managed_region_p = vsoc_region_from_filep(managed_filp);
 277        /* Check that the provided region is managed by this one */
 278        if (&vsoc_dev.regions[managed_region_p->managed_by] != region_p)
 279                return -EPERM;
 280        /* The area must be well formed and have non-zero size */
 281        if (np->permission.begin_offset >= np->permission.end_offset)
 282                return -EINVAL;
 283        /* The area must fit in the memory window */
 284        if (np->permission.end_offset >
 285            vsoc_device_region_size(managed_region_p))
 286                return -ERANGE;
 287        /* The area must be in the region data section */
 288        if (np->permission.begin_offset <
 289            managed_region_p->offset_of_region_data)
 290                return -ERANGE;
 291        /* The area must be page aligned */
 292        if (!PAGE_ALIGNED(np->permission.begin_offset) ||
 293            !PAGE_ALIGNED(np->permission.end_offset))
 294                return -EINVAL;
 295        /* Owner offset must be naturally aligned in the window */
 296        if (np->permission.owner_offset &
 297            (sizeof(np->permission.owner_offset) - 1))
 298                return -EINVAL;
 299        /* The owner flag must reside in the owner memory */
 300        if (np->permission.owner_offset + sizeof(np->permission.owner_offset) >
 301            vsoc_device_region_size(region_p))
 302                return -ERANGE;
 303        /* The owner flag must reside in the data section */
 304        if (np->permission.owner_offset < region_p->offset_of_region_data)
 305                return -EINVAL;
 306        /* The owner value must change to claim the memory */
 307        if (np->permission.owned_value == VSOC_REGION_FREE)
 308                return -EINVAL;
 309        owner_ptr =
 310            (atomic_t *)shm_off_to_virtual_addr(region_p->region_begin_offset +
 311                                                np->permission.owner_offset);
 312        /* We've already verified that this is in the shared memory window, so
 313         * it should be safe to write to this address.
 314         */
 315        if (atomic_cmpxchg(owner_ptr,
 316                           VSOC_REGION_FREE,
 317                           np->permission.owned_value) != VSOC_REGION_FREE) {
 318                return -EBUSY;
 319        }
 320        ((struct vsoc_private_data *)managed_filp->private_data)->
 321            fd_scoped_permission_node = np;
 322        /* The file offset needs to be adjusted if the calling
 323         * process did any read/write operations on the fd
 324         * before creating the permission.
 325         */
 326        if (managed_filp->f_pos) {
 327                if (managed_filp->f_pos > np->permission.end_offset) {
 328                        /* If the offset is beyond the permission end, set it
 329                         * to the end.
 330                         */
 331                        managed_filp->f_pos = np->permission.end_offset;
 332                } else {
 333                        /* If the offset is within the permission interval
 334                         * keep it there otherwise reset it to zero.
 335                         */
 336                        if (managed_filp->f_pos < np->permission.begin_offset) {
 337                                managed_filp->f_pos = 0;
 338                        } else {
 339                                managed_filp->f_pos -=
 340                                    np->permission.begin_offset;
 341                        }
 342                }
 343        }
 344        return 0;
 345}
 346
 347static void
 348do_destroy_fd_scoped_permission_node(struct vsoc_device_region *owner_region_p,
 349                                     struct fd_scoped_permission_node *node)
 350{
 351        if (node) {
 352                do_destroy_fd_scoped_permission(owner_region_p,
 353                                                &node->permission);
 354                mutex_lock(&vsoc_dev.mtx);
 355                list_del(&node->list);
 356                mutex_unlock(&vsoc_dev.mtx);
 357                kfree(node);
 358        }
 359}
 360
 361static void
 362do_destroy_fd_scoped_permission(struct vsoc_device_region *owner_region_p,
 363                                struct fd_scoped_permission *perm)
 364{
 365        atomic_t *owner_ptr = NULL;
 366        int prev = 0;
 367
 368        if (!perm)
 369                return;
 370        owner_ptr = (atomic_t *)shm_off_to_virtual_addr
 371                (owner_region_p->region_begin_offset + perm->owner_offset);
 372        prev = atomic_xchg(owner_ptr, VSOC_REGION_FREE);
 373        if (prev != perm->owned_value)
 374                dev_err(&vsoc_dev.dev->dev,
 375                        "%x-%x: owner (%s) %x: expected to be %x was %x",
 376                        perm->begin_offset, perm->end_offset,
 377                        owner_region_p->device_name, perm->owner_offset,
 378                        perm->owned_value, prev);
 379}
 380
 381static long do_vsoc_describe_region(struct file *filp,
 382                                    struct vsoc_device_region __user *dest)
 383{
 384        struct vsoc_device_region *region_p;
 385        int retval = vsoc_validate_filep(filp);
 386
 387        if (retval)
 388                return retval;
 389        region_p = vsoc_region_from_filep(filp);
 390        if (copy_to_user(dest, region_p, sizeof(*region_p)))
 391                return -EFAULT;
 392        return 0;
 393}
 394
 395/**
 396 * Implements the inner logic of cond_wait. Copies to and from userspace are
 397 * done in the helper function below.
 398 */
 399static int handle_vsoc_cond_wait(struct file *filp, struct vsoc_cond_wait *arg)
 400{
 401        DEFINE_WAIT(wait);
 402        u32 region_number = iminor(file_inode(filp));
 403        struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
 404        struct hrtimer_sleeper timeout, *to = NULL;
 405        int ret = 0;
 406        struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
 407        atomic_t *address = NULL;
 408        ktime_t wake_time;
 409
 410        /* Ensure that the offset is aligned */
 411        if (arg->offset & (sizeof(uint32_t) - 1))
 412                return -EADDRNOTAVAIL;
 413        /* Ensure that the offset is within shared memory */
 414        if (((uint64_t)arg->offset) + region_p->region_begin_offset +
 415            sizeof(uint32_t) > region_p->region_end_offset)
 416                return -E2BIG;
 417        address = shm_off_to_virtual_addr(region_p->region_begin_offset +
 418                                          arg->offset);
 419
 420        /* Ensure that the type of wait is valid */
 421        switch (arg->wait_type) {
 422        case VSOC_WAIT_IF_EQUAL:
 423                break;
 424        case VSOC_WAIT_IF_EQUAL_TIMEOUT:
 425                to = &timeout;
 426                break;
 427        default:
 428                return -EINVAL;
 429        }
 430
 431        if (to) {
 432                /* Copy the user-supplied timesec into the kernel structure.
 433                 * We do things this way to flatten differences between 32 bit
 434                 * and 64 bit timespecs.
 435                 */
 436                if (arg->wake_time_nsec >= NSEC_PER_SEC)
 437                        return -EINVAL;
 438                wake_time = ktime_set(arg->wake_time_sec, arg->wake_time_nsec);
 439
 440                hrtimer_init_sleeper_on_stack(to, CLOCK_MONOTONIC,
 441                                              HRTIMER_MODE_ABS);
 442                hrtimer_set_expires_range_ns(&to->timer, wake_time,
 443                                             current->timer_slack_ns);
 444        }
 445
 446        while (1) {
 447                prepare_to_wait(&data->futex_wait_queue, &wait,
 448                                TASK_INTERRUPTIBLE);
 449                /*
 450                 * Check the sentinel value after prepare_to_wait. If the value
 451                 * changes after this check the writer will call signal,
 452                 * changing the task state from INTERRUPTIBLE to RUNNING. That
 453                 * will ensure that schedule() will eventually schedule this
 454                 * task.
 455                 */
 456                if (atomic_read(address) != arg->value) {
 457                        ret = 0;
 458                        break;
 459                }
 460                if (to) {
 461                        hrtimer_sleeper_start_expires(to, HRTIMER_MODE_ABS);
 462                        if (likely(to->task))
 463                                freezable_schedule();
 464                        hrtimer_cancel(&to->timer);
 465                        if (!to->task) {
 466                                ret = -ETIMEDOUT;
 467                                break;
 468                        }
 469                } else {
 470                        freezable_schedule();
 471                }
 472                /* Count the number of times that we woke up. This is useful
 473                 * for unit testing.
 474                 */
 475                ++arg->wakes;
 476                if (signal_pending(current)) {
 477                        ret = -EINTR;
 478                        break;
 479                }
 480        }
 481        finish_wait(&data->futex_wait_queue, &wait);
 482        if (to)
 483                destroy_hrtimer_on_stack(&to->timer);
 484        return ret;
 485}
 486
 487/**
 488 * Handles the details of copying from/to userspace to ensure that the copies
 489 * happen on all of the return paths of cond_wait.
 490 */
 491static int do_vsoc_cond_wait(struct file *filp,
 492                             struct vsoc_cond_wait __user *untrusted_in)
 493{
 494        struct vsoc_cond_wait arg;
 495        int rval = 0;
 496
 497        if (copy_from_user(&arg, untrusted_in, sizeof(arg)))
 498                return -EFAULT;
 499        /* wakes is an out parameter. Initialize it to something sensible. */
 500        arg.wakes = 0;
 501        rval = handle_vsoc_cond_wait(filp, &arg);
 502        if (copy_to_user(untrusted_in, &arg, sizeof(arg)))
 503                return -EFAULT;
 504        return rval;
 505}
 506
 507static int do_vsoc_cond_wake(struct file *filp, uint32_t offset)
 508{
 509        struct vsoc_device_region *region_p = vsoc_region_from_filep(filp);
 510        u32 region_number = iminor(file_inode(filp));
 511        struct vsoc_region_data *data = vsoc_dev.regions_data + region_number;
 512        /* Ensure that the offset is aligned */
 513        if (offset & (sizeof(uint32_t) - 1))
 514                return -EADDRNOTAVAIL;
 515        /* Ensure that the offset is within shared memory */
 516        if (((uint64_t)offset) + region_p->region_begin_offset +
 517            sizeof(uint32_t) > region_p->region_end_offset)
 518                return -E2BIG;
 519        /*
 520         * TODO(b/73664181): Use multiple futex wait queues.
 521         * We need to wake every sleeper when the condition changes. Typically
 522         * only a single thread will be waiting on the condition, but there
 523         * are exceptions. The worst case is about 10 threads.
 524         */
 525        wake_up_interruptible_all(&data->futex_wait_queue);
 526        return 0;
 527}
 528
 529static long vsoc_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 530{
 531        int rv = 0;
 532        struct vsoc_device_region *region_p;
 533        u32 reg_num;
 534        struct vsoc_region_data *reg_data;
 535        int retval = vsoc_validate_filep(filp);
 536
 537        if (retval)
 538                return retval;
 539        region_p = vsoc_region_from_filep(filp);
 540        reg_num = iminor(file_inode(filp));
 541        reg_data = vsoc_dev.regions_data + reg_num;
 542        switch (cmd) {
 543        case VSOC_CREATE_FD_SCOPED_PERMISSION:
 544                {
 545                        struct fd_scoped_permission_node *node = NULL;
 546
 547                        node = kzalloc(sizeof(*node), GFP_KERNEL);
 548                        /* We can't allocate memory for the permission */
 549                        if (!node)
 550                                return -ENOMEM;
 551                        INIT_LIST_HEAD(&node->list);
 552                        rv = do_create_fd_scoped_permission
 553                                (region_p,
 554                                 node,
 555                                 (struct fd_scoped_permission_arg __user *)arg);
 556                        if (!rv) {
 557                                mutex_lock(&vsoc_dev.mtx);
 558                                list_add(&node->list, &vsoc_dev.permissions);
 559                                mutex_unlock(&vsoc_dev.mtx);
 560                        } else {
 561                                kfree(node);
 562                                return rv;
 563                        }
 564                }
 565                break;
 566
 567        case VSOC_GET_FD_SCOPED_PERMISSION:
 568                {
 569                        struct fd_scoped_permission_node *node =
 570                            ((struct vsoc_private_data *)filp->private_data)->
 571                            fd_scoped_permission_node;
 572                        if (!node)
 573                                return -ENOENT;
 574                        if (copy_to_user
 575                            ((struct fd_scoped_permission __user *)arg,
 576                             &node->permission, sizeof(node->permission)))
 577                                return -EFAULT;
 578                }
 579                break;
 580
 581        case VSOC_MAYBE_SEND_INTERRUPT_TO_HOST:
 582                if (!atomic_xchg(reg_data->outgoing_signalled, 1)) {
 583                        writel(reg_num, vsoc_dev.regs + DOORBELL);
 584                        return 0;
 585                } else {
 586                        return -EBUSY;
 587                }
 588                break;
 589
 590        case VSOC_SEND_INTERRUPT_TO_HOST:
 591                writel(reg_num, vsoc_dev.regs + DOORBELL);
 592                return 0;
 593        case VSOC_WAIT_FOR_INCOMING_INTERRUPT:
 594                wait_event_interruptible
 595                        (reg_data->interrupt_wait_queue,
 596                         (atomic_read(reg_data->incoming_signalled) != 0));
 597                break;
 598
 599        case VSOC_DESCRIBE_REGION:
 600                return do_vsoc_describe_region
 601                        (filp,
 602                         (struct vsoc_device_region __user *)arg);
 603
 604        case VSOC_SELF_INTERRUPT:
 605                atomic_set(reg_data->incoming_signalled, 1);
 606                wake_up_interruptible(&reg_data->interrupt_wait_queue);
 607                break;
 608
 609        case VSOC_COND_WAIT:
 610                return do_vsoc_cond_wait(filp,
 611                                         (struct vsoc_cond_wait __user *)arg);
 612        case VSOC_COND_WAKE:
 613                return do_vsoc_cond_wake(filp, arg);
 614
 615        default:
 616                return -EINVAL;
 617        }
 618        return 0;
 619}
 620
 621static ssize_t vsoc_read(struct file *filp, char __user *buffer, size_t len,
 622                         loff_t *poffset)
 623{
 624        __u32 area_off;
 625        const void *area_p;
 626        ssize_t area_len;
 627        int retval = vsoc_validate_filep(filp);
 628
 629        if (retval)
 630                return retval;
 631        area_len = vsoc_get_area(filp, &area_off);
 632        area_p = shm_off_to_virtual_addr(area_off);
 633        area_p += *poffset;
 634        area_len -= *poffset;
 635        if (area_len <= 0)
 636                return 0;
 637        if (area_len < len)
 638                len = area_len;
 639        if (copy_to_user(buffer, area_p, len))
 640                return -EFAULT;
 641        *poffset += len;
 642        return len;
 643}
 644
 645static loff_t vsoc_lseek(struct file *filp, loff_t offset, int origin)
 646{
 647        ssize_t area_len = 0;
 648        int retval = vsoc_validate_filep(filp);
 649
 650        if (retval)
 651                return retval;
 652        area_len = vsoc_get_area(filp, NULL);
 653        switch (origin) {
 654        case SEEK_SET:
 655                break;
 656
 657        case SEEK_CUR:
 658                if (offset > 0 && offset + filp->f_pos < 0)
 659                        return -EOVERFLOW;
 660                offset += filp->f_pos;
 661                break;
 662
 663        case SEEK_END:
 664                if (offset > 0 && offset + area_len < 0)
 665                        return -EOVERFLOW;
 666                offset += area_len;
 667                break;
 668
 669        case SEEK_DATA:
 670                if (offset >= area_len)
 671                        return -EINVAL;
 672                if (offset < 0)
 673                        offset = 0;
 674                break;
 675
 676        case SEEK_HOLE:
 677                /* Next hole is always the end of the region, unless offset is
 678                 * beyond that
 679                 */
 680                if (offset < area_len)
 681                        offset = area_len;
 682                break;
 683
 684        default:
 685                return -EINVAL;
 686        }
 687
 688        if (offset < 0 || offset > area_len)
 689                return -EINVAL;
 690        filp->f_pos = offset;
 691
 692        return offset;
 693}
 694
 695static ssize_t vsoc_write(struct file *filp, const char __user *buffer,
 696                          size_t len, loff_t *poffset)
 697{
 698        __u32 area_off;
 699        void *area_p;
 700        ssize_t area_len;
 701        int retval = vsoc_validate_filep(filp);
 702
 703        if (retval)
 704                return retval;
 705        area_len = vsoc_get_area(filp, &area_off);
 706        area_p = shm_off_to_virtual_addr(area_off);
 707        area_p += *poffset;
 708        area_len -= *poffset;
 709        if (area_len <= 0)
 710                return 0;
 711        if (area_len < len)
 712                len = area_len;
 713        if (copy_from_user(area_p, buffer, len))
 714                return -EFAULT;
 715        *poffset += len;
 716        return len;
 717}
 718
 719static irqreturn_t vsoc_interrupt(int irq, void *region_data_v)
 720{
 721        struct vsoc_region_data *region_data =
 722            (struct vsoc_region_data *)region_data_v;
 723        int reg_num = region_data - vsoc_dev.regions_data;
 724
 725        if (unlikely(!region_data))
 726                return IRQ_NONE;
 727
 728        if (unlikely(reg_num < 0 ||
 729                     reg_num >= vsoc_dev.layout->region_count)) {
 730                dev_err(&vsoc_dev.dev->dev,
 731                        "invalid irq @%p reg_num=0x%04x\n",
 732                        region_data, reg_num);
 733                return IRQ_NONE;
 734        }
 735        if (unlikely(vsoc_dev.regions_data + reg_num != region_data)) {
 736                dev_err(&vsoc_dev.dev->dev,
 737                        "irq not aligned @%p reg_num=0x%04x\n",
 738                        region_data, reg_num);
 739                return IRQ_NONE;
 740        }
 741        wake_up_interruptible(&region_data->interrupt_wait_queue);
 742        return IRQ_HANDLED;
 743}
 744
 745static int vsoc_probe_device(struct pci_dev *pdev,
 746                             const struct pci_device_id *ent)
 747{
 748        int result;
 749        int i;
 750        resource_size_t reg_size;
 751        dev_t devt;
 752
 753        vsoc_dev.dev = pdev;
 754        result = pci_enable_device(pdev);
 755        if (result) {
 756                dev_err(&pdev->dev,
 757                        "pci_enable_device failed %s: error %d\n",
 758                        pci_name(pdev), result);
 759                return result;
 760        }
 761        vsoc_dev.enabled_device = true;
 762        result = pci_request_regions(pdev, "vsoc");
 763        if (result < 0) {
 764                dev_err(&pdev->dev, "pci_request_regions failed\n");
 765                vsoc_remove_device(pdev);
 766                return -EBUSY;
 767        }
 768        vsoc_dev.requested_regions = true;
 769        /* Set up the control registers in BAR 0 */
 770        reg_size = pci_resource_len(pdev, REGISTER_BAR);
 771        if (reg_size > MAX_REGISTER_BAR_LEN)
 772                vsoc_dev.regs =
 773                    pci_iomap(pdev, REGISTER_BAR, MAX_REGISTER_BAR_LEN);
 774        else
 775                vsoc_dev.regs = pci_iomap(pdev, REGISTER_BAR, reg_size);
 776
 777        if (!vsoc_dev.regs) {
 778                dev_err(&pdev->dev,
 779                        "cannot map registers of size %zu\n",
 780                       (size_t)reg_size);
 781                vsoc_remove_device(pdev);
 782                return -EBUSY;
 783        }
 784
 785        /* Map the shared memory in BAR 2 */
 786        vsoc_dev.shm_phys_start = pci_resource_start(pdev, SHARED_MEMORY_BAR);
 787        vsoc_dev.shm_size = pci_resource_len(pdev, SHARED_MEMORY_BAR);
 788
 789        dev_info(&pdev->dev, "shared memory @ DMA %pa size=0x%zx\n",
 790                 &vsoc_dev.shm_phys_start, vsoc_dev.shm_size);
 791        vsoc_dev.kernel_mapped_shm = pci_iomap_wc(pdev, SHARED_MEMORY_BAR, 0);
 792        if (!vsoc_dev.kernel_mapped_shm) {
 793                dev_err(&vsoc_dev.dev->dev, "cannot iomap region\n");
 794                vsoc_remove_device(pdev);
 795                return -EBUSY;
 796        }
 797
 798        vsoc_dev.layout = (struct vsoc_shm_layout_descriptor __force *)
 799                                vsoc_dev.kernel_mapped_shm;
 800        dev_info(&pdev->dev, "major_version: %d\n",
 801                 vsoc_dev.layout->major_version);
 802        dev_info(&pdev->dev, "minor_version: %d\n",
 803                 vsoc_dev.layout->minor_version);
 804        dev_info(&pdev->dev, "size: 0x%x\n", vsoc_dev.layout->size);
 805        dev_info(&pdev->dev, "regions: %d\n", vsoc_dev.layout->region_count);
 806        if (vsoc_dev.layout->major_version !=
 807            CURRENT_VSOC_LAYOUT_MAJOR_VERSION) {
 808                dev_err(&vsoc_dev.dev->dev,
 809                        "driver supports only major_version %d\n",
 810                        CURRENT_VSOC_LAYOUT_MAJOR_VERSION);
 811                vsoc_remove_device(pdev);
 812                return -EBUSY;
 813        }
 814        result = alloc_chrdev_region(&devt, 0, vsoc_dev.layout->region_count,
 815                                     VSOC_DEV_NAME);
 816        if (result) {
 817                dev_err(&vsoc_dev.dev->dev, "alloc_chrdev_region failed\n");
 818                vsoc_remove_device(pdev);
 819                return -EBUSY;
 820        }
 821        vsoc_dev.major = MAJOR(devt);
 822        cdev_init(&vsoc_dev.cdev, &vsoc_ops);
 823        vsoc_dev.cdev.owner = THIS_MODULE;
 824        result = cdev_add(&vsoc_dev.cdev, devt, vsoc_dev.layout->region_count);
 825        if (result) {
 826                dev_err(&vsoc_dev.dev->dev, "cdev_add error\n");
 827                vsoc_remove_device(pdev);
 828                return -EBUSY;
 829        }
 830        vsoc_dev.cdev_added = true;
 831        vsoc_dev.class = class_create(THIS_MODULE, VSOC_DEV_NAME);
 832        if (IS_ERR(vsoc_dev.class)) {
 833                dev_err(&vsoc_dev.dev->dev, "class_create failed\n");
 834                vsoc_remove_device(pdev);
 835                return PTR_ERR(vsoc_dev.class);
 836        }
 837        vsoc_dev.class_added = true;
 838        vsoc_dev.regions = (struct vsoc_device_region __force *)
 839                ((void *)vsoc_dev.layout +
 840                 vsoc_dev.layout->vsoc_region_desc_offset);
 841        vsoc_dev.msix_entries =
 842                kcalloc(vsoc_dev.layout->region_count,
 843                        sizeof(vsoc_dev.msix_entries[0]), GFP_KERNEL);
 844        if (!vsoc_dev.msix_entries) {
 845                dev_err(&vsoc_dev.dev->dev,
 846                        "unable to allocate msix_entries\n");
 847                vsoc_remove_device(pdev);
 848                return -ENOSPC;
 849        }
 850        vsoc_dev.regions_data =
 851                kcalloc(vsoc_dev.layout->region_count,
 852                        sizeof(vsoc_dev.regions_data[0]), GFP_KERNEL);
 853        if (!vsoc_dev.regions_data) {
 854                dev_err(&vsoc_dev.dev->dev,
 855                        "unable to allocate regions' data\n");
 856                vsoc_remove_device(pdev);
 857                return -ENOSPC;
 858        }
 859        for (i = 0; i < vsoc_dev.layout->region_count; ++i)
 860                vsoc_dev.msix_entries[i].entry = i;
 861
 862        result = pci_enable_msix_exact(vsoc_dev.dev, vsoc_dev.msix_entries,
 863                                       vsoc_dev.layout->region_count);
 864        if (result) {
 865                dev_info(&pdev->dev, "pci_enable_msix failed: %d\n", result);
 866                vsoc_remove_device(pdev);
 867                return -ENOSPC;
 868        }
 869        /* Check that all regions are well formed */
 870        for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
 871                const struct vsoc_device_region *region = vsoc_dev.regions + i;
 872
 873                if (!PAGE_ALIGNED(region->region_begin_offset) ||
 874                    !PAGE_ALIGNED(region->region_end_offset)) {
 875                        dev_err(&vsoc_dev.dev->dev,
 876                                "region %d not aligned (%x:%x)", i,
 877                                region->region_begin_offset,
 878                                region->region_end_offset);
 879                        vsoc_remove_device(pdev);
 880                        return -EFAULT;
 881                }
 882                if (region->region_begin_offset >= region->region_end_offset ||
 883                    region->region_end_offset > vsoc_dev.shm_size) {
 884                        dev_err(&vsoc_dev.dev->dev,
 885                                "region %d offsets are wrong: %x %x %zx",
 886                                i, region->region_begin_offset,
 887                                region->region_end_offset, vsoc_dev.shm_size);
 888                        vsoc_remove_device(pdev);
 889                        return -EFAULT;
 890                }
 891                if (region->managed_by >= vsoc_dev.layout->region_count) {
 892                        dev_err(&vsoc_dev.dev->dev,
 893                                "region %d has invalid owner: %u",
 894                                i, region->managed_by);
 895                        vsoc_remove_device(pdev);
 896                        return -EFAULT;
 897                }
 898        }
 899        vsoc_dev.msix_enabled = true;
 900        for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
 901                const struct vsoc_device_region *region = vsoc_dev.regions + i;
 902                size_t name_sz = sizeof(vsoc_dev.regions_data[i].name) - 1;
 903                const struct vsoc_signal_table_layout *h_to_g_signal_table =
 904                        &region->host_to_guest_signal_table;
 905                const struct vsoc_signal_table_layout *g_to_h_signal_table =
 906                        &region->guest_to_host_signal_table;
 907
 908                vsoc_dev.regions_data[i].name[name_sz] = '\0';
 909                memcpy(vsoc_dev.regions_data[i].name, region->device_name,
 910                       name_sz);
 911                dev_info(&pdev->dev, "region %d name=%s\n",
 912                         i, vsoc_dev.regions_data[i].name);
 913                init_waitqueue_head
 914                        (&vsoc_dev.regions_data[i].interrupt_wait_queue);
 915                init_waitqueue_head(&vsoc_dev.regions_data[i].futex_wait_queue);
 916                vsoc_dev.regions_data[i].incoming_signalled =
 917                        shm_off_to_virtual_addr(region->region_begin_offset) +
 918                        h_to_g_signal_table->interrupt_signalled_offset;
 919                vsoc_dev.regions_data[i].outgoing_signalled =
 920                        shm_off_to_virtual_addr(region->region_begin_offset) +
 921                        g_to_h_signal_table->interrupt_signalled_offset;
 922                result = request_irq(vsoc_dev.msix_entries[i].vector,
 923                                     vsoc_interrupt, 0,
 924                                     vsoc_dev.regions_data[i].name,
 925                                     vsoc_dev.regions_data + i);
 926                if (result) {
 927                        dev_info(&pdev->dev,
 928                                 "request_irq failed irq=%d vector=%d\n",
 929                                i, vsoc_dev.msix_entries[i].vector);
 930                        vsoc_remove_device(pdev);
 931                        return -ENOSPC;
 932                }
 933                vsoc_dev.regions_data[i].irq_requested = true;
 934                if (!device_create(vsoc_dev.class, NULL,
 935                                   MKDEV(vsoc_dev.major, i),
 936                                   NULL, vsoc_dev.regions_data[i].name)) {
 937                        dev_err(&vsoc_dev.dev->dev, "device_create failed\n");
 938                        vsoc_remove_device(pdev);
 939                        return -EBUSY;
 940                }
 941                vsoc_dev.regions_data[i].device_created = true;
 942        }
 943        return 0;
 944}
 945
 946/*
 947 * This should undo all of the allocations in the probe function in reverse
 948 * order.
 949 *
 950 * Notes:
 951 *
 952 *   The device may have been partially initialized, so double check
 953 *   that the allocations happened.
 954 *
 955 *   This function may be called multiple times, so mark resources as freed
 956 *   as they are deallocated.
 957 */
 958static void vsoc_remove_device(struct pci_dev *pdev)
 959{
 960        int i;
 961        /*
 962         * pdev is the first thing to be set on probe and the last thing
 963         * to be cleared here. If it's NULL then there is no cleanup.
 964         */
 965        if (!pdev || !vsoc_dev.dev)
 966                return;
 967        dev_info(&pdev->dev, "remove_device\n");
 968        if (vsoc_dev.regions_data) {
 969                for (i = 0; i < vsoc_dev.layout->region_count; ++i) {
 970                        if (vsoc_dev.regions_data[i].device_created) {
 971                                device_destroy(vsoc_dev.class,
 972                                               MKDEV(vsoc_dev.major, i));
 973                                vsoc_dev.regions_data[i].device_created = false;
 974                        }
 975                        if (vsoc_dev.regions_data[i].irq_requested)
 976                                free_irq(vsoc_dev.msix_entries[i].vector, NULL);
 977                        vsoc_dev.regions_data[i].irq_requested = false;
 978                }
 979                kfree(vsoc_dev.regions_data);
 980                vsoc_dev.regions_data = NULL;
 981        }
 982        if (vsoc_dev.msix_enabled) {
 983                pci_disable_msix(pdev);
 984                vsoc_dev.msix_enabled = false;
 985        }
 986        kfree(vsoc_dev.msix_entries);
 987        vsoc_dev.msix_entries = NULL;
 988        vsoc_dev.regions = NULL;
 989        if (vsoc_dev.class_added) {
 990                class_destroy(vsoc_dev.class);
 991                vsoc_dev.class_added = false;
 992        }
 993        if (vsoc_dev.cdev_added) {
 994                cdev_del(&vsoc_dev.cdev);
 995                vsoc_dev.cdev_added = false;
 996        }
 997        if (vsoc_dev.major && vsoc_dev.layout) {
 998                unregister_chrdev_region(MKDEV(vsoc_dev.major, 0),
 999                                         vsoc_dev.layout->region_count);
1000                vsoc_dev.major = 0;
1001        }
1002        vsoc_dev.layout = NULL;
1003        if (vsoc_dev.kernel_mapped_shm) {
1004                pci_iounmap(pdev, vsoc_dev.kernel_mapped_shm);
1005                vsoc_dev.kernel_mapped_shm = NULL;
1006        }
1007        if (vsoc_dev.regs) {
1008                pci_iounmap(pdev, vsoc_dev.regs);
1009                vsoc_dev.regs = NULL;
1010        }
1011        if (vsoc_dev.requested_regions) {
1012                pci_release_regions(pdev);
1013                vsoc_dev.requested_regions = false;
1014        }
1015        if (vsoc_dev.enabled_device) {
1016                pci_disable_device(pdev);
1017                vsoc_dev.enabled_device = false;
1018        }
1019        /* Do this last: it indicates that the device is not initialized. */
1020        vsoc_dev.dev = NULL;
1021}
1022
1023static void __exit vsoc_cleanup_module(void)
1024{
1025        vsoc_remove_device(vsoc_dev.dev);
1026        pci_unregister_driver(&vsoc_pci_driver);
1027}
1028
1029static int __init vsoc_init_module(void)
1030{
1031        int err = -ENOMEM;
1032
1033        INIT_LIST_HEAD(&vsoc_dev.permissions);
1034        mutex_init(&vsoc_dev.mtx);
1035
1036        err = pci_register_driver(&vsoc_pci_driver);
1037        if (err < 0)
1038                return err;
1039        return 0;
1040}
1041
1042static int vsoc_open(struct inode *inode, struct file *filp)
1043{
1044        /* Can't use vsoc_validate_filep because filp is still incomplete */
1045        int ret = vsoc_validate_inode(inode);
1046
1047        if (ret)
1048                return ret;
1049        filp->private_data =
1050                kzalloc(sizeof(struct vsoc_private_data), GFP_KERNEL);
1051        if (!filp->private_data)
1052                return -ENOMEM;
1053        return 0;
1054}
1055
1056static int vsoc_release(struct inode *inode, struct file *filp)
1057{
1058        struct vsoc_private_data *private_data = NULL;
1059        struct fd_scoped_permission_node *node = NULL;
1060        struct vsoc_device_region *owner_region_p = NULL;
1061        int retval = vsoc_validate_filep(filp);
1062
1063        if (retval)
1064                return retval;
1065        private_data = (struct vsoc_private_data *)filp->private_data;
1066        if (!private_data)
1067                return 0;
1068
1069        node = private_data->fd_scoped_permission_node;
1070        if (node) {
1071                owner_region_p = vsoc_region_from_inode(inode);
1072                if (owner_region_p->managed_by != VSOC_REGION_WHOLE) {
1073                        owner_region_p =
1074                            &vsoc_dev.regions[owner_region_p->managed_by];
1075                }
1076                do_destroy_fd_scoped_permission_node(owner_region_p, node);
1077                private_data->fd_scoped_permission_node = NULL;
1078        }
1079        kfree(private_data);
1080        filp->private_data = NULL;
1081
1082        return 0;
1083}
1084
1085/*
1086 * Returns the device relative offset and length of the area specified by the
1087 * fd scoped permission. If there is no fd scoped permission set, a default
1088 * permission covering the entire region is assumed, unless the region is owned
1089 * by another one, in which case the default is a permission with zero size.
1090 */
1091static ssize_t vsoc_get_area(struct file *filp, __u32 *area_offset)
1092{
1093        __u32 off = 0;
1094        ssize_t length = 0;
1095        struct vsoc_device_region *region_p;
1096        struct fd_scoped_permission *perm;
1097
1098        region_p = vsoc_region_from_filep(filp);
1099        off = region_p->region_begin_offset;
1100        perm = &((struct vsoc_private_data *)filp->private_data)->
1101                fd_scoped_permission_node->permission;
1102        if (perm) {
1103                off += perm->begin_offset;
1104                length = perm->end_offset - perm->begin_offset;
1105        } else if (region_p->managed_by == VSOC_REGION_WHOLE) {
1106                /* No permission set and the regions is not owned by another,
1107                 * default to full region access.
1108                 */
1109                length = vsoc_device_region_size(region_p);
1110        } else {
1111                /* return zero length, access is denied. */
1112                length = 0;
1113        }
1114        if (area_offset)
1115                *area_offset = off;
1116        return length;
1117}
1118
1119static int vsoc_mmap(struct file *filp, struct vm_area_struct *vma)
1120{
1121        unsigned long len = vma->vm_end - vma->vm_start;
1122        __u32 area_off;
1123        phys_addr_t mem_off;
1124        ssize_t area_len;
1125        int retval = vsoc_validate_filep(filp);
1126
1127        if (retval)
1128                return retval;
1129        area_len = vsoc_get_area(filp, &area_off);
1130        /* Add the requested offset */
1131        area_off += (vma->vm_pgoff << PAGE_SHIFT);
1132        area_len -= (vma->vm_pgoff << PAGE_SHIFT);
1133        if (area_len < len)
1134                return -EINVAL;
1135        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1136        mem_off = shm_off_to_phys_addr(area_off);
1137        if (io_remap_pfn_range(vma, vma->vm_start, mem_off >> PAGE_SHIFT,
1138                               len, vma->vm_page_prot))
1139                return -EAGAIN;
1140        return 0;
1141}
1142
1143module_init(vsoc_init_module);
1144module_exit(vsoc_cleanup_module);
1145
1146MODULE_LICENSE("GPL");
1147MODULE_AUTHOR("Greg Hartman <ghartman@google.com>");
1148MODULE_DESCRIPTION("VSoC interpretation of QEmu's ivshmem device");
1149MODULE_VERSION("1.0");
1150