linux/drivers/vfio/vfio.c
<<
>>
Prefs
   1/*
   2 * VFIO core
   3 *
   4 * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
   5 *     Author: Alex Williamson <alex.williamson@redhat.com>
   6 *
   7 * This program is free software; you can redistribute it and/or modify
   8 * it under the terms of the GNU General Public License version 2 as
   9 * published by the Free Software Foundation.
  10 *
  11 * Derived from original vfio:
  12 * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
  13 * Author: Tom Lyon, pugs@cisco.com
  14 */
  15
  16#include <linux/cdev.h>
  17#include <linux/compat.h>
  18#include <linux/device.h>
  19#include <linux/file.h>
  20#include <linux/anon_inodes.h>
  21#include <linux/fs.h>
  22#include <linux/idr.h>
  23#include <linux/iommu.h>
  24#include <linux/list.h>
  25#include <linux/miscdevice.h>
  26#include <linux/module.h>
  27#include <linux/mutex.h>
  28#include <linux/rwsem.h>
  29#include <linux/sched.h>
  30#include <linux/slab.h>
  31#include <linux/stat.h>
  32#include <linux/string.h>
  33#include <linux/uaccess.h>
  34#include <linux/vfio.h>
  35#include <linux/wait.h>
  36
  37#define DRIVER_VERSION  "0.3"
  38#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
  39#define DRIVER_DESC     "VFIO - User Level meta-driver"
  40
  41static struct vfio {
  42        struct class                    *class;
  43        struct list_head                iommu_drivers_list;
  44        struct mutex                    iommu_drivers_lock;
  45        struct list_head                group_list;
  46        struct idr                      group_idr;
  47        struct mutex                    group_lock;
  48        struct cdev                     group_cdev;
  49        dev_t                           group_devt;
  50        wait_queue_head_t               release_q;
  51} vfio;
  52
  53struct vfio_iommu_driver {
  54        const struct vfio_iommu_driver_ops      *ops;
  55        struct list_head                        vfio_next;
  56};
  57
  58struct vfio_container {
  59        struct kref                     kref;
  60        struct list_head                group_list;
  61        struct rw_semaphore             group_lock;
  62        struct vfio_iommu_driver        *iommu_driver;
  63        void                            *iommu_data;
  64};
  65
  66struct vfio_unbound_dev {
  67        struct device                   *dev;
  68        struct list_head                unbound_next;
  69};
  70
  71struct vfio_group {
  72        struct kref                     kref;
  73        int                             minor;
  74        atomic_t                        container_users;
  75        struct iommu_group              *iommu_group;
  76        struct vfio_container           *container;
  77        struct list_head                device_list;
  78        struct mutex                    device_lock;
  79        struct device                   *dev;
  80        struct notifier_block           nb;
  81        struct list_head                vfio_next;
  82        struct list_head                container_next;
  83        struct list_head                unbound_list;
  84        struct mutex                    unbound_lock;
  85        atomic_t                        opened;
  86};
  87
  88struct vfio_device {
  89        struct kref                     kref;
  90        struct device                   *dev;
  91        const struct vfio_device_ops    *ops;
  92        struct vfio_group               *group;
  93        struct list_head                group_next;
  94        void                            *device_data;
  95};
  96
  97/**
  98 * IOMMU driver registration
  99 */
 100int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 101{
 102        struct vfio_iommu_driver *driver, *tmp;
 103
 104        driver = kzalloc(sizeof(*driver), GFP_KERNEL);
 105        if (!driver)
 106                return -ENOMEM;
 107
 108        driver->ops = ops;
 109
 110        mutex_lock(&vfio.iommu_drivers_lock);
 111
 112        /* Check for duplicates */
 113        list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
 114                if (tmp->ops == ops) {
 115                        mutex_unlock(&vfio.iommu_drivers_lock);
 116                        kfree(driver);
 117                        return -EINVAL;
 118                }
 119        }
 120
 121        list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
 122
 123        mutex_unlock(&vfio.iommu_drivers_lock);
 124
 125        return 0;
 126}
 127EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
 128
 129void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
 130{
 131        struct vfio_iommu_driver *driver;
 132
 133        mutex_lock(&vfio.iommu_drivers_lock);
 134        list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
 135                if (driver->ops == ops) {
 136                        list_del(&driver->vfio_next);
 137                        mutex_unlock(&vfio.iommu_drivers_lock);
 138                        kfree(driver);
 139                        return;
 140                }
 141        }
 142        mutex_unlock(&vfio.iommu_drivers_lock);
 143}
 144EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
 145
 146/**
 147 * Group minor allocation/free - both called with vfio.group_lock held
 148 */
 149static int vfio_alloc_group_minor(struct vfio_group *group)
 150{
 151        return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
 152}
 153
 154static void vfio_free_group_minor(int minor)
 155{
 156        idr_remove(&vfio.group_idr, minor);
 157}
 158
 159static int vfio_iommu_group_notifier(struct notifier_block *nb,
 160                                     unsigned long action, void *data);
 161static void vfio_group_get(struct vfio_group *group);
 162
 163/**
 164 * Container objects - containers are created when /dev/vfio/vfio is
 165 * opened, but their lifecycle extends until the last user is done, so
 166 * it's freed via kref.  Must support container/group/device being
 167 * closed in any order.
 168 */
 169static void vfio_container_get(struct vfio_container *container)
 170{
 171        kref_get(&container->kref);
 172}
 173
 174static void vfio_container_release(struct kref *kref)
 175{
 176        struct vfio_container *container;
 177        container = container_of(kref, struct vfio_container, kref);
 178
 179        kfree(container);
 180}
 181
 182static void vfio_container_put(struct vfio_container *container)
 183{
 184        kref_put(&container->kref, vfio_container_release);
 185}
 186
 187static void vfio_group_unlock_and_free(struct vfio_group *group)
 188{
 189        mutex_unlock(&vfio.group_lock);
 190        /*
 191         * Unregister outside of lock.  A spurious callback is harmless now
 192         * that the group is no longer in vfio.group_list.
 193         */
 194        iommu_group_unregister_notifier(group->iommu_group, &group->nb);
 195        kfree(group);
 196}
 197
 198/**
 199 * Group objects - create, release, get, put, search
 200 */
 201static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
 202{
 203        struct vfio_group *group, *tmp;
 204        struct device *dev;
 205        int ret, minor;
 206
 207        group = kzalloc(sizeof(*group), GFP_KERNEL);
 208        if (!group)
 209                return ERR_PTR(-ENOMEM);
 210
 211        kref_init(&group->kref);
 212        INIT_LIST_HEAD(&group->device_list);
 213        mutex_init(&group->device_lock);
 214        INIT_LIST_HEAD(&group->unbound_list);
 215        mutex_init(&group->unbound_lock);
 216        atomic_set(&group->container_users, 0);
 217        atomic_set(&group->opened, 0);
 218        group->iommu_group = iommu_group;
 219
 220        group->nb.notifier_call = vfio_iommu_group_notifier;
 221
 222        /*
 223         * blocking notifiers acquire a rwsem around registering and hold
 224         * it around callback.  Therefore, need to register outside of
 225         * vfio.group_lock to avoid A-B/B-A contention.  Our callback won't
 226         * do anything unless it can find the group in vfio.group_list, so
 227         * no harm in registering early.
 228         */
 229        ret = iommu_group_register_notifier(iommu_group, &group->nb);
 230        if (ret) {
 231                kfree(group);
 232                return ERR_PTR(ret);
 233        }
 234
 235        mutex_lock(&vfio.group_lock);
 236
 237        /* Did we race creating this group? */
 238        list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
 239                if (tmp->iommu_group == iommu_group) {
 240                        vfio_group_get(tmp);
 241                        vfio_group_unlock_and_free(group);
 242                        return tmp;
 243                }
 244        }
 245
 246        minor = vfio_alloc_group_minor(group);
 247        if (minor < 0) {
 248                vfio_group_unlock_and_free(group);
 249                return ERR_PTR(minor);
 250        }
 251
 252        dev = device_create(vfio.class, NULL,
 253                            MKDEV(MAJOR(vfio.group_devt), minor),
 254                            group, "%d", iommu_group_id(iommu_group));
 255        if (IS_ERR(dev)) {
 256                vfio_free_group_minor(minor);
 257                vfio_group_unlock_and_free(group);
 258                return (struct vfio_group *)dev; /* ERR_PTR */
 259        }
 260
 261        group->minor = minor;
 262        group->dev = dev;
 263
 264        list_add(&group->vfio_next, &vfio.group_list);
 265
 266        mutex_unlock(&vfio.group_lock);
 267
 268        return group;
 269}
 270
 271/* called with vfio.group_lock held */
 272static void vfio_group_release(struct kref *kref)
 273{
 274        struct vfio_group *group = container_of(kref, struct vfio_group, kref);
 275        struct vfio_unbound_dev *unbound, *tmp;
 276        struct iommu_group *iommu_group = group->iommu_group;
 277
 278        WARN_ON(!list_empty(&group->device_list));
 279
 280        list_for_each_entry_safe(unbound, tmp,
 281                                 &group->unbound_list, unbound_next) {
 282                list_del(&unbound->unbound_next);
 283                kfree(unbound);
 284        }
 285
 286        device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
 287        list_del(&group->vfio_next);
 288        vfio_free_group_minor(group->minor);
 289        vfio_group_unlock_and_free(group);
 290        iommu_group_put(iommu_group);
 291}
 292
 293static void vfio_group_put(struct vfio_group *group)
 294{
 295        kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
 296}
 297
 298/* Assume group_lock or group reference is held */
 299static void vfio_group_get(struct vfio_group *group)
 300{
 301        kref_get(&group->kref);
 302}
 303
 304/*
 305 * Not really a try as we will sleep for mutex, but we need to make
 306 * sure the group pointer is valid under lock and get a reference.
 307 */
 308static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
 309{
 310        struct vfio_group *target = group;
 311
 312        mutex_lock(&vfio.group_lock);
 313        list_for_each_entry(group, &vfio.group_list, vfio_next) {
 314                if (group == target) {
 315                        vfio_group_get(group);
 316                        mutex_unlock(&vfio.group_lock);
 317                        return group;
 318                }
 319        }
 320        mutex_unlock(&vfio.group_lock);
 321
 322        return NULL;
 323}
 324
 325static
 326struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 327{
 328        struct vfio_group *group;
 329
 330        mutex_lock(&vfio.group_lock);
 331        list_for_each_entry(group, &vfio.group_list, vfio_next) {
 332                if (group->iommu_group == iommu_group) {
 333                        vfio_group_get(group);
 334                        mutex_unlock(&vfio.group_lock);
 335                        return group;
 336                }
 337        }
 338        mutex_unlock(&vfio.group_lock);
 339
 340        return NULL;
 341}
 342
 343static struct vfio_group *vfio_group_get_from_minor(int minor)
 344{
 345        struct vfio_group *group;
 346
 347        mutex_lock(&vfio.group_lock);
 348        group = idr_find(&vfio.group_idr, minor);
 349        if (!group) {
 350                mutex_unlock(&vfio.group_lock);
 351                return NULL;
 352        }
 353        vfio_group_get(group);
 354        mutex_unlock(&vfio.group_lock);
 355
 356        return group;
 357}
 358
 359/**
 360 * Device objects - create, release, get, put, search
 361 */
 362static
 363struct vfio_device *vfio_group_create_device(struct vfio_group *group,
 364                                             struct device *dev,
 365                                             const struct vfio_device_ops *ops,
 366                                             void *device_data)
 367{
 368        struct vfio_device *device;
 369
 370        device = kzalloc(sizeof(*device), GFP_KERNEL);
 371        if (!device)
 372                return ERR_PTR(-ENOMEM);
 373
 374        kref_init(&device->kref);
 375        device->dev = dev;
 376        device->group = group;
 377        device->ops = ops;
 378        device->device_data = device_data;
 379        dev_set_drvdata(dev, device);
 380
 381        /* No need to get group_lock, caller has group reference */
 382        vfio_group_get(group);
 383
 384        mutex_lock(&group->device_lock);
 385        list_add(&device->group_next, &group->device_list);
 386        mutex_unlock(&group->device_lock);
 387
 388        return device;
 389}
 390
 391static void vfio_device_release(struct kref *kref)
 392{
 393        struct vfio_device *device = container_of(kref,
 394                                                  struct vfio_device, kref);
 395        struct vfio_group *group = device->group;
 396
 397        list_del(&device->group_next);
 398        mutex_unlock(&group->device_lock);
 399
 400        dev_set_drvdata(device->dev, NULL);
 401
 402        kfree(device);
 403
 404        /* vfio_del_group_dev may be waiting for this device */
 405        wake_up(&vfio.release_q);
 406}
 407
 408/* Device reference always implies a group reference */
 409void vfio_device_put(struct vfio_device *device)
 410{
 411        struct vfio_group *group = device->group;
 412        kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
 413        vfio_group_put(group);
 414}
 415EXPORT_SYMBOL_GPL(vfio_device_put);
 416
 417static void vfio_device_get(struct vfio_device *device)
 418{
 419        vfio_group_get(device->group);
 420        kref_get(&device->kref);
 421}
 422
 423static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
 424                                                 struct device *dev)
 425{
 426        struct vfio_device *device;
 427
 428        mutex_lock(&group->device_lock);
 429        list_for_each_entry(device, &group->device_list, group_next) {
 430                if (device->dev == dev) {
 431                        vfio_device_get(device);
 432                        mutex_unlock(&group->device_lock);
 433                        return device;
 434                }
 435        }
 436        mutex_unlock(&group->device_lock);
 437        return NULL;
 438}
 439
 440/*
 441 * Whitelist some drivers that we know are safe (no dma) or just sit on
 442 * a device.  It's not always practical to leave a device within a group
 443 * driverless as it could get re-bound to something unsafe.
 444 */
 445static const char * const vfio_driver_whitelist[] = { "pci-stub", "pcieport" };
 446
 447static bool vfio_whitelisted_driver(struct device_driver *drv)
 448{
 449        int i;
 450
 451        for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) {
 452                if (!strcmp(drv->name, vfio_driver_whitelist[i]))
 453                        return true;
 454        }
 455
 456        return false;
 457}
 458
 459/*
 460 * A vfio group is viable for use by userspace if all devices are in
 461 * one of the following states:
 462 *  - driver-less
 463 *  - bound to a vfio driver
 464 *  - bound to a whitelisted driver
 465 *
 466 * We use two methods to determine whether a device is bound to a vfio
 467 * driver.  The first is to test whether the device exists in the vfio
 468 * group.  The second is to test if the device exists on the group
 469 * unbound_list, indicating it's in the middle of transitioning from
 470 * a vfio driver to driver-less.
 471 */
 472static int vfio_dev_viable(struct device *dev, void *data)
 473{
 474        struct vfio_group *group = data;
 475        struct vfio_device *device;
 476        struct device_driver *drv = ACCESS_ONCE(dev->driver);
 477        struct vfio_unbound_dev *unbound;
 478        int ret = -EINVAL;
 479
 480        mutex_lock(&group->unbound_lock);
 481        list_for_each_entry(unbound, &group->unbound_list, unbound_next) {
 482                if (dev == unbound->dev) {
 483                        ret = 0;
 484                        break;
 485                }
 486        }
 487        mutex_unlock(&group->unbound_lock);
 488
 489        if (!ret || !drv || vfio_whitelisted_driver(drv))
 490                return 0;
 491
 492        device = vfio_group_get_device(group, dev);
 493        if (device) {
 494                vfio_device_put(device);
 495                return 0;
 496        }
 497
 498        return ret;
 499}
 500
 501/**
 502 * Async device support
 503 */
 504static int vfio_group_nb_add_dev(struct vfio_group *group, struct device *dev)
 505{
 506        struct vfio_device *device;
 507
 508        /* Do we already know about it?  We shouldn't */
 509        device = vfio_group_get_device(group, dev);
 510        if (WARN_ON_ONCE(device)) {
 511                vfio_device_put(device);
 512                return 0;
 513        }
 514
 515        /* Nothing to do for idle groups */
 516        if (!atomic_read(&group->container_users))
 517                return 0;
 518
 519        /* TODO Prevent device auto probing */
 520        WARN("Device %s added to live group %d!\n", dev_name(dev),
 521             iommu_group_id(group->iommu_group));
 522
 523        return 0;
 524}
 525
 526static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
 527{
 528        /* We don't care what happens when the group isn't in use */
 529        if (!atomic_read(&group->container_users))
 530                return 0;
 531
 532        return vfio_dev_viable(dev, group);
 533}
 534
 535static int vfio_iommu_group_notifier(struct notifier_block *nb,
 536                                     unsigned long action, void *data)
 537{
 538        struct vfio_group *group = container_of(nb, struct vfio_group, nb);
 539        struct device *dev = data;
 540        struct vfio_unbound_dev *unbound;
 541
 542        /*
 543         * Need to go through a group_lock lookup to get a reference or we
 544         * risk racing a group being removed.  Ignore spurious notifies.
 545         */
 546        group = vfio_group_try_get(group);
 547        if (!group)
 548                return NOTIFY_OK;
 549
 550        switch (action) {
 551        case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
 552                vfio_group_nb_add_dev(group, dev);
 553                break;
 554        case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
 555                /*
 556                 * Nothing to do here.  If the device is in use, then the
 557                 * vfio sub-driver should block the remove callback until
 558                 * it is unused.  If the device is unused or attached to a
 559                 * stub driver, then it should be released and we don't
 560                 * care that it will be going away.
 561                 */
 562                break;
 563        case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
 564                pr_debug("%s: Device %s, group %d binding to driver\n",
 565                         __func__, dev_name(dev),
 566                         iommu_group_id(group->iommu_group));
 567                break;
 568        case IOMMU_GROUP_NOTIFY_BOUND_DRIVER:
 569                pr_debug("%s: Device %s, group %d bound to driver %s\n",
 570                         __func__, dev_name(dev),
 571                         iommu_group_id(group->iommu_group), dev->driver->name);
 572                BUG_ON(vfio_group_nb_verify(group, dev));
 573                break;
 574        case IOMMU_GROUP_NOTIFY_UNBIND_DRIVER:
 575                pr_debug("%s: Device %s, group %d unbinding from driver %s\n",
 576                         __func__, dev_name(dev),
 577                         iommu_group_id(group->iommu_group), dev->driver->name);
 578                break;
 579        case IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER:
 580                pr_debug("%s: Device %s, group %d unbound from driver\n",
 581                         __func__, dev_name(dev),
 582                         iommu_group_id(group->iommu_group));
 583                /*
 584                 * XXX An unbound device in a live group is ok, but we'd
 585                 * really like to avoid the above BUG_ON by preventing other
 586                 * drivers from binding to it.  Once that occurs, we have to
 587                 * stop the system to maintain isolation.  At a minimum, we'd
 588                 * want a toggle to disable driver auto probe for this device.
 589                 */
 590
 591                mutex_lock(&group->unbound_lock);
 592                list_for_each_entry(unbound,
 593                                    &group->unbound_list, unbound_next) {
 594                        if (dev == unbound->dev) {
 595                                list_del(&unbound->unbound_next);
 596                                kfree(unbound);
 597                                break;
 598                        }
 599                }
 600                mutex_unlock(&group->unbound_lock);
 601                break;
 602        }
 603
 604        vfio_group_put(group);
 605        return NOTIFY_OK;
 606}
 607
 608/**
 609 * VFIO driver API
 610 */
 611int vfio_add_group_dev(struct device *dev,
 612                       const struct vfio_device_ops *ops, void *device_data)
 613{
 614        struct iommu_group *iommu_group;
 615        struct vfio_group *group;
 616        struct vfio_device *device;
 617
 618        iommu_group = iommu_group_get(dev);
 619        if (!iommu_group)
 620                return -EINVAL;
 621
 622        group = vfio_group_get_from_iommu(iommu_group);
 623        if (!group) {
 624                group = vfio_create_group(iommu_group);
 625                if (IS_ERR(group)) {
 626                        iommu_group_put(iommu_group);
 627                        return PTR_ERR(group);
 628                }
 629        } else {
 630                /*
 631                 * A found vfio_group already holds a reference to the
 632                 * iommu_group.  A created vfio_group keeps the reference.
 633                 */
 634                iommu_group_put(iommu_group);
 635        }
 636
 637        device = vfio_group_get_device(group, dev);
 638        if (device) {
 639                WARN(1, "Device %s already exists on group %d\n",
 640                     dev_name(dev), iommu_group_id(iommu_group));
 641                vfio_device_put(device);
 642                vfio_group_put(group);
 643                return -EBUSY;
 644        }
 645
 646        device = vfio_group_create_device(group, dev, ops, device_data);
 647        if (IS_ERR(device)) {
 648                vfio_group_put(group);
 649                return PTR_ERR(device);
 650        }
 651
 652        /*
 653         * Drop all but the vfio_device reference.  The vfio_device holds
 654         * a reference to the vfio_group, which holds a reference to the
 655         * iommu_group.
 656         */
 657        vfio_group_put(group);
 658
 659        return 0;
 660}
 661EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 662
 663/**
 664 * Get a reference to the vfio_device for a device that is known to
 665 * be bound to a vfio driver.  The driver implicitly holds a
 666 * vfio_device reference between vfio_add_group_dev and
 667 * vfio_del_group_dev.  We can therefore use drvdata to increment
 668 * that reference from the struct device.  This additional
 669 * reference must be released by calling vfio_device_put.
 670 */
 671struct vfio_device *vfio_device_get_from_dev(struct device *dev)
 672{
 673        struct vfio_device *device = dev_get_drvdata(dev);
 674
 675        vfio_device_get(device);
 676
 677        return device;
 678}
 679EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
 680
 681/*
 682 * Caller must hold a reference to the vfio_device
 683 */
 684void *vfio_device_data(struct vfio_device *device)
 685{
 686        return device->device_data;
 687}
 688EXPORT_SYMBOL_GPL(vfio_device_data);
 689
 690/* Given a referenced group, check if it contains the device */
 691static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
 692{
 693        struct vfio_device *device;
 694
 695        device = vfio_group_get_device(group, dev);
 696        if (!device)
 697                return false;
 698
 699        vfio_device_put(device);
 700        return true;
 701}
 702
 703/*
 704 * Decrement the device reference count and wait for the device to be
 705 * removed.  Open file descriptors for the device... */
 706void *vfio_del_group_dev(struct device *dev)
 707{
 708        struct vfio_device *device = dev_get_drvdata(dev);
 709        struct vfio_group *group = device->group;
 710        void *device_data = device->device_data;
 711        struct vfio_unbound_dev *unbound;
 712        unsigned int i = 0;
 713        long ret;
 714        bool interrupted = false;
 715
 716        /*
 717         * The group exists so long as we have a device reference.  Get
 718         * a group reference and use it to scan for the device going away.
 719         */
 720        vfio_group_get(group);
 721
 722        /*
 723         * When the device is removed from the group, the group suddenly
 724         * becomes non-viable; the device has a driver (until the unbind
 725         * completes), but it's not present in the group.  This is bad news
 726         * for any external users that need to re-acquire a group reference
 727         * in order to match and release their existing reference.  To
 728         * solve this, we track such devices on the unbound_list to bridge
 729         * the gap until they're fully unbound.
 730         */
 731        unbound = kzalloc(sizeof(*unbound), GFP_KERNEL);
 732        if (unbound) {
 733                unbound->dev = dev;
 734                mutex_lock(&group->unbound_lock);
 735                list_add(&unbound->unbound_next, &group->unbound_list);
 736                mutex_unlock(&group->unbound_lock);
 737        }
 738        WARN_ON(!unbound);
 739
 740        vfio_device_put(device);
 741
 742        /*
 743         * If the device is still present in the group after the above
 744         * 'put', then it is in use and we need to request it from the
 745         * bus driver.  The driver may in turn need to request the
 746         * device from the user.  We send the request on an arbitrary
 747         * interval with counter to allow the driver to take escalating
 748         * measures to release the device if it has the ability to do so.
 749         */
 750        do {
 751                device = vfio_group_get_device(group, dev);
 752                if (!device)
 753                        break;
 754
 755                if (device->ops->request)
 756                        device->ops->request(device_data, i++);
 757
 758                vfio_device_put(device);
 759
 760                if (interrupted) {
 761                        ret = wait_event_timeout(vfio.release_q,
 762                                        !vfio_dev_present(group, dev), HZ * 10);
 763                } else {
 764                        ret = wait_event_interruptible_timeout(vfio.release_q,
 765                                        !vfio_dev_present(group, dev), HZ * 10);
 766                        if (ret == -ERESTARTSYS) {
 767                                interrupted = true;
 768                                dev_warn(dev,
 769                                         "Device is currently in use, task"
 770                                         " \"%s\" (%d) "
 771                                         "blocked until device is released",
 772                                         current->comm, task_pid_nr(current));
 773                        }
 774                }
 775        } while (ret <= 0);
 776
 777        vfio_group_put(group);
 778
 779        return device_data;
 780}
 781EXPORT_SYMBOL_GPL(vfio_del_group_dev);
 782
 783/**
 784 * VFIO base fd, /dev/vfio/vfio
 785 */
 786static long vfio_ioctl_check_extension(struct vfio_container *container,
 787                                       unsigned long arg)
 788{
 789        struct vfio_iommu_driver *driver;
 790        long ret = 0;
 791
 792        down_read(&container->group_lock);
 793
 794        driver = container->iommu_driver;
 795
 796        switch (arg) {
 797                /* No base extensions yet */
 798        default:
 799                /*
 800                 * If no driver is set, poll all registered drivers for
 801                 * extensions and return the first positive result.  If
 802                 * a driver is already set, further queries will be passed
 803                 * only to that driver.
 804                 */
 805                if (!driver) {
 806                        mutex_lock(&vfio.iommu_drivers_lock);
 807                        list_for_each_entry(driver, &vfio.iommu_drivers_list,
 808                                            vfio_next) {
 809                                if (!try_module_get(driver->ops->owner))
 810                                        continue;
 811
 812                                ret = driver->ops->ioctl(NULL,
 813                                                         VFIO_CHECK_EXTENSION,
 814                                                         arg);
 815                                module_put(driver->ops->owner);
 816                                if (ret > 0)
 817                                        break;
 818                        }
 819                        mutex_unlock(&vfio.iommu_drivers_lock);
 820                } else
 821                        ret = driver->ops->ioctl(container->iommu_data,
 822                                                 VFIO_CHECK_EXTENSION, arg);
 823        }
 824
 825        up_read(&container->group_lock);
 826
 827        return ret;
 828}
 829
 830/* hold write lock on container->group_lock */
 831static int __vfio_container_attach_groups(struct vfio_container *container,
 832                                          struct vfio_iommu_driver *driver,
 833                                          void *data)
 834{
 835        struct vfio_group *group;
 836        int ret = -ENODEV;
 837
 838        list_for_each_entry(group, &container->group_list, container_next) {
 839                ret = driver->ops->attach_group(data, group->iommu_group);
 840                if (ret)
 841                        goto unwind;
 842        }
 843
 844        return ret;
 845
 846unwind:
 847        list_for_each_entry_continue_reverse(group, &container->group_list,
 848                                             container_next) {
 849                driver->ops->detach_group(data, group->iommu_group);
 850        }
 851
 852        return ret;
 853}
 854
 855static long vfio_ioctl_set_iommu(struct vfio_container *container,
 856                                 unsigned long arg)
 857{
 858        struct vfio_iommu_driver *driver;
 859        long ret = -ENODEV;
 860
 861        down_write(&container->group_lock);
 862
 863        /*
 864         * The container is designed to be an unprivileged interface while
 865         * the group can be assigned to specific users.  Therefore, only by
 866         * adding a group to a container does the user get the privilege of
 867         * enabling the iommu, which may allocate finite resources.  There
 868         * is no unset_iommu, but by removing all the groups from a container,
 869         * the container is deprivileged and returns to an unset state.
 870         */
 871        if (list_empty(&container->group_list) || container->iommu_driver) {
 872                up_write(&container->group_lock);
 873                return -EINVAL;
 874        }
 875
 876        mutex_lock(&vfio.iommu_drivers_lock);
 877        list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
 878                void *data;
 879
 880                if (!try_module_get(driver->ops->owner))
 881                        continue;
 882
 883                /*
 884                 * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
 885                 * so test which iommu driver reported support for this
 886                 * extension and call open on them.  We also pass them the
 887                 * magic, allowing a single driver to support multiple
 888                 * interfaces if they'd like.
 889                 */
 890                if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
 891                        module_put(driver->ops->owner);
 892                        continue;
 893                }
 894
 895                /* module reference holds the driver we're working on */
 896                mutex_unlock(&vfio.iommu_drivers_lock);
 897
 898                data = driver->ops->open(arg);
 899                if (IS_ERR(data)) {
 900                        ret = PTR_ERR(data);
 901                        module_put(driver->ops->owner);
 902                        goto skip_drivers_unlock;
 903                }
 904
 905                ret = __vfio_container_attach_groups(container, driver, data);
 906                if (!ret) {
 907                        container->iommu_driver = driver;
 908                        container->iommu_data = data;
 909                } else {
 910                        driver->ops->release(data);
 911                        module_put(driver->ops->owner);
 912                }
 913
 914                goto skip_drivers_unlock;
 915        }
 916
 917        mutex_unlock(&vfio.iommu_drivers_lock);
 918skip_drivers_unlock:
 919        up_write(&container->group_lock);
 920
 921        return ret;
 922}
 923
 924static long vfio_fops_unl_ioctl(struct file *filep,
 925                                unsigned int cmd, unsigned long arg)
 926{
 927        struct vfio_container *container = filep->private_data;
 928        struct vfio_iommu_driver *driver;
 929        void *data;
 930        long ret = -EINVAL;
 931
 932        if (!container)
 933                return ret;
 934
 935        switch (cmd) {
 936        case VFIO_GET_API_VERSION:
 937                ret = VFIO_API_VERSION;
 938                break;
 939        case VFIO_CHECK_EXTENSION:
 940                ret = vfio_ioctl_check_extension(container, arg);
 941                break;
 942        case VFIO_SET_IOMMU:
 943                ret = vfio_ioctl_set_iommu(container, arg);
 944                break;
 945        default:
 946                down_read(&container->group_lock);
 947
 948                driver = container->iommu_driver;
 949                data = container->iommu_data;
 950
 951                if (driver) /* passthrough all unrecognized ioctls */
 952                        ret = driver->ops->ioctl(data, cmd, arg);
 953
 954                up_read(&container->group_lock);
 955        }
 956
 957        return ret;
 958}
 959
 960#ifdef CONFIG_COMPAT
 961static long vfio_fops_compat_ioctl(struct file *filep,
 962                                   unsigned int cmd, unsigned long arg)
 963{
 964        arg = (unsigned long)compat_ptr(arg);
 965        return vfio_fops_unl_ioctl(filep, cmd, arg);
 966}
 967#endif  /* CONFIG_COMPAT */
 968
 969static int vfio_fops_open(struct inode *inode, struct file *filep)
 970{
 971        struct vfio_container *container;
 972
 973        container = kzalloc(sizeof(*container), GFP_KERNEL);
 974        if (!container)
 975                return -ENOMEM;
 976
 977        INIT_LIST_HEAD(&container->group_list);
 978        init_rwsem(&container->group_lock);
 979        kref_init(&container->kref);
 980
 981        filep->private_data = container;
 982
 983        return 0;
 984}
 985
 986static int vfio_fops_release(struct inode *inode, struct file *filep)
 987{
 988        struct vfio_container *container = filep->private_data;
 989
 990        filep->private_data = NULL;
 991
 992        vfio_container_put(container);
 993
 994        return 0;
 995}
 996
 997/*
 998 * Once an iommu driver is set, we optionally pass read/write/mmap
 999 * on to the driver, allowing management interfaces beyond ioctl.
1000 */
1001static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
1002                              size_t count, loff_t *ppos)
1003{
1004        struct vfio_container *container = filep->private_data;
1005        struct vfio_iommu_driver *driver;
1006        ssize_t ret = -EINVAL;
1007
1008        down_read(&container->group_lock);
1009
1010        driver = container->iommu_driver;
1011        if (likely(driver && driver->ops->read))
1012                ret = driver->ops->read(container->iommu_data,
1013                                        buf, count, ppos);
1014
1015        up_read(&container->group_lock);
1016
1017        return ret;
1018}
1019
1020static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
1021                               size_t count, loff_t *ppos)
1022{
1023        struct vfio_container *container = filep->private_data;
1024        struct vfio_iommu_driver *driver;
1025        ssize_t ret = -EINVAL;
1026
1027        down_read(&container->group_lock);
1028
1029        driver = container->iommu_driver;
1030        if (likely(driver && driver->ops->write))
1031                ret = driver->ops->write(container->iommu_data,
1032                                         buf, count, ppos);
1033
1034        up_read(&container->group_lock);
1035
1036        return ret;
1037}
1038
1039static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1040{
1041        struct vfio_container *container = filep->private_data;
1042        struct vfio_iommu_driver *driver;
1043        int ret = -EINVAL;
1044
1045        down_read(&container->group_lock);
1046
1047        driver = container->iommu_driver;
1048        if (likely(driver && driver->ops->mmap))
1049                ret = driver->ops->mmap(container->iommu_data, vma);
1050
1051        up_read(&container->group_lock);
1052
1053        return ret;
1054}
1055
1056static const struct file_operations vfio_fops = {
1057        .owner          = THIS_MODULE,
1058        .open           = vfio_fops_open,
1059        .release        = vfio_fops_release,
1060        .read           = vfio_fops_read,
1061        .write          = vfio_fops_write,
1062        .unlocked_ioctl = vfio_fops_unl_ioctl,
1063#ifdef CONFIG_COMPAT
1064        .compat_ioctl   = vfio_fops_compat_ioctl,
1065#endif
1066        .mmap           = vfio_fops_mmap,
1067};
1068
1069/**
1070 * VFIO Group fd, /dev/vfio/$GROUP
1071 */
1072static void __vfio_group_unset_container(struct vfio_group *group)
1073{
1074        struct vfio_container *container = group->container;
1075        struct vfio_iommu_driver *driver;
1076
1077        down_write(&container->group_lock);
1078
1079        driver = container->iommu_driver;
1080        if (driver)
1081                driver->ops->detach_group(container->iommu_data,
1082                                          group->iommu_group);
1083
1084        group->container = NULL;
1085        list_del(&group->container_next);
1086
1087        /* Detaching the last group deprivileges a container, remove iommu */
1088        if (driver && list_empty(&container->group_list)) {
1089                driver->ops->release(container->iommu_data);
1090                module_put(driver->ops->owner);
1091                container->iommu_driver = NULL;
1092                container->iommu_data = NULL;
1093        }
1094
1095        up_write(&container->group_lock);
1096
1097        vfio_container_put(container);
1098}
1099
1100/*
1101 * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
1102 * if there was no container to unset.  Since the ioctl is called on
1103 * the group, we know that still exists, therefore the only valid
1104 * transition here is 1->0.
1105 */
1106static int vfio_group_unset_container(struct vfio_group *group)
1107{
1108        int users = atomic_cmpxchg(&group->container_users, 1, 0);
1109
1110        if (!users)
1111                return -EINVAL;
1112        if (users != 1)
1113                return -EBUSY;
1114
1115        __vfio_group_unset_container(group);
1116
1117        return 0;
1118}
1119
1120/*
1121 * When removing container users, anything that removes the last user
1122 * implicitly removes the group from the container.  That is, if the
1123 * group file descriptor is closed, as well as any device file descriptors,
1124 * the group is free.
1125 */
1126static void vfio_group_try_dissolve_container(struct vfio_group *group)
1127{
1128        if (0 == atomic_dec_if_positive(&group->container_users))
1129                __vfio_group_unset_container(group);
1130}
1131
1132static int vfio_group_set_container(struct vfio_group *group, int container_fd)
1133{
1134        struct fd f;
1135        struct vfio_container *container;
1136        struct vfio_iommu_driver *driver;
1137        int ret = 0;
1138
1139        if (atomic_read(&group->container_users))
1140                return -EINVAL;
1141
1142        f = fdget(container_fd);
1143        if (!f.file)
1144                return -EBADF;
1145
1146        /* Sanity check, is this really our fd? */
1147        if (f.file->f_op != &vfio_fops) {
1148                fdput(f);
1149                return -EINVAL;
1150        }
1151
1152        container = f.file->private_data;
1153        WARN_ON(!container); /* fget ensures we don't race vfio_release */
1154
1155        down_write(&container->group_lock);
1156
1157        driver = container->iommu_driver;
1158        if (driver) {
1159                ret = driver->ops->attach_group(container->iommu_data,
1160                                                group->iommu_group);
1161                if (ret)
1162                        goto unlock_out;
1163        }
1164
1165        group->container = container;
1166        list_add(&group->container_next, &container->group_list);
1167
1168        /* Get a reference on the container and mark a user within the group */
1169        vfio_container_get(container);
1170        atomic_inc(&group->container_users);
1171
1172unlock_out:
1173        up_write(&container->group_lock);
1174        fdput(f);
1175        return ret;
1176}
1177
1178static bool vfio_group_viable(struct vfio_group *group)
1179{
1180        return (iommu_group_for_each_dev(group->iommu_group,
1181                                         group, vfio_dev_viable) == 0);
1182}
1183
1184static const struct file_operations vfio_device_fops;
1185
1186static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
1187{
1188        struct vfio_device *device;
1189        struct file *filep;
1190        int ret = -ENODEV;
1191
1192        if (0 == atomic_read(&group->container_users) ||
1193            !group->container->iommu_driver || !vfio_group_viable(group))
1194                return -EINVAL;
1195
1196        mutex_lock(&group->device_lock);
1197        list_for_each_entry(device, &group->device_list, group_next) {
1198                if (strcmp(dev_name(device->dev), buf))
1199                        continue;
1200
1201                ret = device->ops->open(device->device_data);
1202                if (ret)
1203                        break;
1204                /*
1205                 * We can't use anon_inode_getfd() because we need to modify
1206                 * the f_mode flags directly to allow more than just ioctls
1207                 */
1208                ret = get_unused_fd_flags(O_CLOEXEC);
1209                if (ret < 0) {
1210                        device->ops->release(device->device_data);
1211                        break;
1212                }
1213
1214                filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
1215                                           device, O_RDWR);
1216                if (IS_ERR(filep)) {
1217                        put_unused_fd(ret);
1218                        ret = PTR_ERR(filep);
1219                        device->ops->release(device->device_data);
1220                        break;
1221                }
1222
1223                /*
1224                 * TODO: add an anon_inode interface to do this.
1225                 * Appears to be missing by lack of need rather than
1226                 * explicitly prevented.  Now there's need.
1227                 */
1228                filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1229
1230                vfio_device_get(device);
1231                atomic_inc(&group->container_users);
1232
1233                fd_install(ret, filep);
1234                break;
1235        }
1236        mutex_unlock(&group->device_lock);
1237
1238        return ret;
1239}
1240
1241static long vfio_group_fops_unl_ioctl(struct file *filep,
1242                                      unsigned int cmd, unsigned long arg)
1243{
1244        struct vfio_group *group = filep->private_data;
1245        long ret = -ENOTTY;
1246
1247        switch (cmd) {
1248        case VFIO_GROUP_GET_STATUS:
1249        {
1250                struct vfio_group_status status;
1251                unsigned long minsz;
1252
1253                minsz = offsetofend(struct vfio_group_status, flags);
1254
1255                if (copy_from_user(&status, (void __user *)arg, minsz))
1256                        return -EFAULT;
1257
1258                if (status.argsz < minsz)
1259                        return -EINVAL;
1260
1261                status.flags = 0;
1262
1263                if (vfio_group_viable(group))
1264                        status.flags |= VFIO_GROUP_FLAGS_VIABLE;
1265
1266                if (group->container)
1267                        status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET;
1268
1269                if (copy_to_user((void __user *)arg, &status, minsz))
1270                        return -EFAULT;
1271
1272                ret = 0;
1273                break;
1274        }
1275        case VFIO_GROUP_SET_CONTAINER:
1276        {
1277                int fd;
1278
1279                if (get_user(fd, (int __user *)arg))
1280                        return -EFAULT;
1281
1282                if (fd < 0)
1283                        return -EINVAL;
1284
1285                ret = vfio_group_set_container(group, fd);
1286                break;
1287        }
1288        case VFIO_GROUP_UNSET_CONTAINER:
1289                ret = vfio_group_unset_container(group);
1290                break;
1291        case VFIO_GROUP_GET_DEVICE_FD:
1292        {
1293                char *buf;
1294
1295                buf = strndup_user((const char __user *)arg, PAGE_SIZE);
1296                if (IS_ERR(buf))
1297                        return PTR_ERR(buf);
1298
1299                ret = vfio_group_get_device_fd(group, buf);
1300                kfree(buf);
1301                break;
1302        }
1303        }
1304
1305        return ret;
1306}
1307
1308#ifdef CONFIG_COMPAT
1309static long vfio_group_fops_compat_ioctl(struct file *filep,
1310                                         unsigned int cmd, unsigned long arg)
1311{
1312        arg = (unsigned long)compat_ptr(arg);
1313        return vfio_group_fops_unl_ioctl(filep, cmd, arg);
1314}
1315#endif  /* CONFIG_COMPAT */
1316
1317static int vfio_group_fops_open(struct inode *inode, struct file *filep)
1318{
1319        struct vfio_group *group;
1320        int opened;
1321
1322        group = vfio_group_get_from_minor(iminor(inode));
1323        if (!group)
1324                return -ENODEV;
1325
1326        /* Do we need multiple instances of the group open?  Seems not. */
1327        opened = atomic_cmpxchg(&group->opened, 0, 1);
1328        if (opened) {
1329                vfio_group_put(group);
1330                return -EBUSY;
1331        }
1332
1333        /* Is something still in use from a previous open? */
1334        if (group->container) {
1335                atomic_dec(&group->opened);
1336                vfio_group_put(group);
1337                return -EBUSY;
1338        }
1339
1340        filep->private_data = group;
1341
1342        return 0;
1343}
1344
1345static int vfio_group_fops_release(struct inode *inode, struct file *filep)
1346{
1347        struct vfio_group *group = filep->private_data;
1348
1349        filep->private_data = NULL;
1350
1351        vfio_group_try_dissolve_container(group);
1352
1353        atomic_dec(&group->opened);
1354
1355        vfio_group_put(group);
1356
1357        return 0;
1358}
1359
1360static const struct file_operations vfio_group_fops = {
1361        .owner          = THIS_MODULE,
1362        .unlocked_ioctl = vfio_group_fops_unl_ioctl,
1363#ifdef CONFIG_COMPAT
1364        .compat_ioctl   = vfio_group_fops_compat_ioctl,
1365#endif
1366        .open           = vfio_group_fops_open,
1367        .release        = vfio_group_fops_release,
1368};
1369
1370/**
1371 * VFIO Device fd
1372 */
1373static int vfio_device_fops_release(struct inode *inode, struct file *filep)
1374{
1375        struct vfio_device *device = filep->private_data;
1376
1377        device->ops->release(device->device_data);
1378
1379        vfio_group_try_dissolve_container(device->group);
1380
1381        vfio_device_put(device);
1382
1383        return 0;
1384}
1385
1386static long vfio_device_fops_unl_ioctl(struct file *filep,
1387                                       unsigned int cmd, unsigned long arg)
1388{
1389        struct vfio_device *device = filep->private_data;
1390
1391        if (unlikely(!device->ops->ioctl))
1392                return -EINVAL;
1393
1394        return device->ops->ioctl(device->device_data, cmd, arg);
1395}
1396
1397static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
1398                                     size_t count, loff_t *ppos)
1399{
1400        struct vfio_device *device = filep->private_data;
1401
1402        if (unlikely(!device->ops->read))
1403                return -EINVAL;
1404
1405        return device->ops->read(device->device_data, buf, count, ppos);
1406}
1407
1408static ssize_t vfio_device_fops_write(struct file *filep,
1409                                      const char __user *buf,
1410                                      size_t count, loff_t *ppos)
1411{
1412        struct vfio_device *device = filep->private_data;
1413
1414        if (unlikely(!device->ops->write))
1415                return -EINVAL;
1416
1417        return device->ops->write(device->device_data, buf, count, ppos);
1418}
1419
1420static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
1421{
1422        struct vfio_device *device = filep->private_data;
1423
1424        if (unlikely(!device->ops->mmap))
1425                return -EINVAL;
1426
1427        return device->ops->mmap(device->device_data, vma);
1428}
1429
1430#ifdef CONFIG_COMPAT
1431static long vfio_device_fops_compat_ioctl(struct file *filep,
1432                                          unsigned int cmd, unsigned long arg)
1433{
1434        arg = (unsigned long)compat_ptr(arg);
1435        return vfio_device_fops_unl_ioctl(filep, cmd, arg);
1436}
1437#endif  /* CONFIG_COMPAT */
1438
1439static const struct file_operations vfio_device_fops = {
1440        .owner          = THIS_MODULE,
1441        .release        = vfio_device_fops_release,
1442        .read           = vfio_device_fops_read,
1443        .write          = vfio_device_fops_write,
1444        .unlocked_ioctl = vfio_device_fops_unl_ioctl,
1445#ifdef CONFIG_COMPAT
1446        .compat_ioctl   = vfio_device_fops_compat_ioctl,
1447#endif
1448        .mmap           = vfio_device_fops_mmap,
1449};
1450
1451/**
1452 * External user API, exported by symbols to be linked dynamically.
1453 *
1454 * The protocol includes:
1455 *  1. do normal VFIO init operation:
1456 *      - opening a new container;
1457 *      - attaching group(s) to it;
1458 *      - setting an IOMMU driver for a container.
1459 * When IOMMU is set for a container, all groups in it are
1460 * considered ready to use by an external user.
1461 *
1462 * 2. User space passes a group fd to an external user.
1463 * The external user calls vfio_group_get_external_user()
1464 * to verify that:
1465 *      - the group is initialized;
1466 *      - IOMMU is set for it.
1467 * If both checks passed, vfio_group_get_external_user()
1468 * increments the container user counter to prevent
1469 * the VFIO group from disposal before KVM exits.
1470 *
1471 * 3. The external user calls vfio_external_user_iommu_id()
1472 * to know an IOMMU ID.
1473 *
1474 * 4. When the external KVM finishes, it calls
1475 * vfio_group_put_external_user() to release the VFIO group.
1476 * This call decrements the container user counter.
1477 */
1478struct vfio_group *vfio_group_get_external_user(struct file *filep)
1479{
1480        struct vfio_group *group = filep->private_data;
1481
1482        if (filep->f_op != &vfio_group_fops)
1483                return ERR_PTR(-EINVAL);
1484
1485        if (!atomic_inc_not_zero(&group->container_users))
1486                return ERR_PTR(-EINVAL);
1487
1488        if (!group->container->iommu_driver ||
1489                        !vfio_group_viable(group)) {
1490                atomic_dec(&group->container_users);
1491                return ERR_PTR(-EINVAL);
1492        }
1493
1494        vfio_group_get(group);
1495
1496        return group;
1497}
1498EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
1499
1500void vfio_group_put_external_user(struct vfio_group *group)
1501{
1502        vfio_group_put(group);
1503        vfio_group_try_dissolve_container(group);
1504}
1505EXPORT_SYMBOL_GPL(vfio_group_put_external_user);
1506
1507int vfio_external_user_iommu_id(struct vfio_group *group)
1508{
1509        return iommu_group_id(group->iommu_group);
1510}
1511EXPORT_SYMBOL_GPL(vfio_external_user_iommu_id);
1512
1513long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
1514{
1515        return vfio_ioctl_check_extension(group->container, arg);
1516}
1517EXPORT_SYMBOL_GPL(vfio_external_check_extension);
1518
1519/**
1520 * Module/class support
1521 */
1522static char *vfio_devnode(struct device *dev, umode_t *mode)
1523{
1524        return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
1525}
1526
1527static struct miscdevice vfio_dev = {
1528        .minor = VFIO_MINOR,
1529        .name = "vfio",
1530        .fops = &vfio_fops,
1531        .nodename = "vfio/vfio",
1532        .mode = S_IRUGO | S_IWUGO,
1533};
1534
1535static int __init vfio_init(void)
1536{
1537        int ret;
1538
1539        idr_init(&vfio.group_idr);
1540        mutex_init(&vfio.group_lock);
1541        mutex_init(&vfio.iommu_drivers_lock);
1542        INIT_LIST_HEAD(&vfio.group_list);
1543        INIT_LIST_HEAD(&vfio.iommu_drivers_list);
1544        init_waitqueue_head(&vfio.release_q);
1545
1546        ret = misc_register(&vfio_dev);
1547        if (ret) {
1548                pr_err("vfio: misc device register failed\n");
1549                return ret;
1550        }
1551
1552        /* /dev/vfio/$GROUP */
1553        vfio.class = class_create(THIS_MODULE, "vfio");
1554        if (IS_ERR(vfio.class)) {
1555                ret = PTR_ERR(vfio.class);
1556                goto err_class;
1557        }
1558
1559        vfio.class->devnode = vfio_devnode;
1560
1561        ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK, "vfio");
1562        if (ret)
1563                goto err_alloc_chrdev;
1564
1565        cdev_init(&vfio.group_cdev, &vfio_group_fops);
1566        ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK);
1567        if (ret)
1568                goto err_cdev_add;
1569
1570        pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
1571
1572        /*
1573         * Attempt to load known iommu-drivers.  This gives us a working
1574         * environment without the user needing to explicitly load iommu
1575         * drivers.
1576         */
1577        request_module_nowait("vfio_iommu_type1");
1578        request_module_nowait("vfio_iommu_spapr_tce");
1579
1580        return 0;
1581
1582err_cdev_add:
1583        unregister_chrdev_region(vfio.group_devt, MINORMASK);
1584err_alloc_chrdev:
1585        class_destroy(vfio.class);
1586        vfio.class = NULL;
1587err_class:
1588        misc_deregister(&vfio_dev);
1589        return ret;
1590}
1591
1592static void __exit vfio_cleanup(void)
1593{
1594        WARN_ON(!list_empty(&vfio.group_list));
1595
1596        idr_destroy(&vfio.group_idr);
1597        cdev_del(&vfio.group_cdev);
1598        unregister_chrdev_region(vfio.group_devt, MINORMASK);
1599        class_destroy(vfio.class);
1600        vfio.class = NULL;
1601        misc_deregister(&vfio_dev);
1602}
1603
1604module_init(vfio_init);
1605module_exit(vfio_cleanup);
1606
1607MODULE_VERSION(DRIVER_VERSION);
1608MODULE_LICENSE("GPL v2");
1609MODULE_AUTHOR(DRIVER_AUTHOR);
1610MODULE_DESCRIPTION(DRIVER_DESC);
1611MODULE_ALIAS_MISCDEV(VFIO_MINOR);
1612MODULE_ALIAS("devname:vfio/vfio");
1613