linux/drivers/iommu/iommu.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
   3 * Author: Joerg Roedel <joerg.roedel@amd.com>
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms of the GNU General Public License version 2 as published
   7 * by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope that it will be useful,
  10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 * GNU General Public License for more details.
  13 *
  14 * You should have received a copy of the GNU General Public License
  15 * along with this program; if not, write to the Free Software
  16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  17 */
  18
  19#define pr_fmt(fmt)    "%s: " fmt, __func__
  20
  21#include <linux/device.h>
  22#include <linux/kernel.h>
  23#include <linux/bug.h>
  24#include <linux/types.h>
  25#include <linux/module.h>
  26#include <linux/slab.h>
  27#include <linux/errno.h>
  28#include <linux/iommu.h>
  29#include <linux/idr.h>
  30#include <linux/notifier.h>
  31#include <linux/err.h>
  32#include <linux/pci.h>
  33#include <linux/bitops.h>
  34#include <trace/events/iommu.h>
  35
  36static struct kset *iommu_group_kset;
  37static struct ida iommu_group_ida;
  38static struct mutex iommu_group_mutex;
  39
  40struct iommu_callback_data {
  41        const struct iommu_ops *ops;
  42};
  43
  44struct iommu_group {
  45        struct kobject kobj;
  46        struct kobject *devices_kobj;
  47        struct list_head devices;
  48        struct mutex mutex;
  49        struct blocking_notifier_head notifier;
  50        void *iommu_data;
  51        void (*iommu_data_release)(void *iommu_data);
  52        char *name;
  53        int id;
  54};
  55
  56struct iommu_device {
  57        struct list_head list;
  58        struct device *dev;
  59        char *name;
  60};
  61
  62struct iommu_group_attribute {
  63        struct attribute attr;
  64        ssize_t (*show)(struct iommu_group *group, char *buf);
  65        ssize_t (*store)(struct iommu_group *group,
  66                         const char *buf, size_t count);
  67};
  68
  69#define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)           \
  70struct iommu_group_attribute iommu_group_attr_##_name =         \
  71        __ATTR(_name, _mode, _show, _store)
  72
  73#define to_iommu_group_attr(_attr)      \
  74        container_of(_attr, struct iommu_group_attribute, attr)
  75#define to_iommu_group(_kobj)           \
  76        container_of(_kobj, struct iommu_group, kobj)
  77
  78static ssize_t iommu_group_attr_show(struct kobject *kobj,
  79                                     struct attribute *__attr, char *buf)
  80{
  81        struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
  82        struct iommu_group *group = to_iommu_group(kobj);
  83        ssize_t ret = -EIO;
  84
  85        if (attr->show)
  86                ret = attr->show(group, buf);
  87        return ret;
  88}
  89
  90static ssize_t iommu_group_attr_store(struct kobject *kobj,
  91                                      struct attribute *__attr,
  92                                      const char *buf, size_t count)
  93{
  94        struct iommu_group_attribute *attr = to_iommu_group_attr(__attr);
  95        struct iommu_group *group = to_iommu_group(kobj);
  96        ssize_t ret = -EIO;
  97
  98        if (attr->store)
  99                ret = attr->store(group, buf, count);
 100        return ret;
 101}
 102
 103static const struct sysfs_ops iommu_group_sysfs_ops = {
 104        .show = iommu_group_attr_show,
 105        .store = iommu_group_attr_store,
 106};
 107
 108static int iommu_group_create_file(struct iommu_group *group,
 109                                   struct iommu_group_attribute *attr)
 110{
 111        return sysfs_create_file(&group->kobj, &attr->attr);
 112}
 113
 114static void iommu_group_remove_file(struct iommu_group *group,
 115                                    struct iommu_group_attribute *attr)
 116{
 117        sysfs_remove_file(&group->kobj, &attr->attr);
 118}
 119
 120static ssize_t iommu_group_show_name(struct iommu_group *group, char *buf)
 121{
 122        return sprintf(buf, "%s\n", group->name);
 123}
 124
 125static IOMMU_GROUP_ATTR(name, S_IRUGO, iommu_group_show_name, NULL);
 126
 127static void iommu_group_release(struct kobject *kobj)
 128{
 129        struct iommu_group *group = to_iommu_group(kobj);
 130
 131        if (group->iommu_data_release)
 132                group->iommu_data_release(group->iommu_data);
 133
 134        mutex_lock(&iommu_group_mutex);
 135        ida_remove(&iommu_group_ida, group->id);
 136        mutex_unlock(&iommu_group_mutex);
 137
 138        kfree(group->name);
 139        kfree(group);
 140}
 141
 142static struct kobj_type iommu_group_ktype = {
 143        .sysfs_ops = &iommu_group_sysfs_ops,
 144        .release = iommu_group_release,
 145};
 146
 147/**
 148 * iommu_group_alloc - Allocate a new group
 149 * @name: Optional name to associate with group, visible in sysfs
 150 *
 151 * This function is called by an iommu driver to allocate a new iommu
 152 * group.  The iommu group represents the minimum granularity of the iommu.
 153 * Upon successful return, the caller holds a reference to the supplied
 154 * group in order to hold the group until devices are added.  Use
 155 * iommu_group_put() to release this extra reference count, allowing the
 156 * group to be automatically reclaimed once it has no devices or external
 157 * references.
 158 */
 159struct iommu_group *iommu_group_alloc(void)
 160{
 161        struct iommu_group *group;
 162        int ret;
 163
 164        group = kzalloc(sizeof(*group), GFP_KERNEL);
 165        if (!group)
 166                return ERR_PTR(-ENOMEM);
 167
 168        group->kobj.kset = iommu_group_kset;
 169        mutex_init(&group->mutex);
 170        INIT_LIST_HEAD(&group->devices);
 171        BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
 172
 173        mutex_lock(&iommu_group_mutex);
 174
 175again:
 176        if (unlikely(0 == ida_pre_get(&iommu_group_ida, GFP_KERNEL))) {
 177                kfree(group);
 178                mutex_unlock(&iommu_group_mutex);
 179                return ERR_PTR(-ENOMEM);
 180        }
 181
 182        if (-EAGAIN == ida_get_new(&iommu_group_ida, &group->id))
 183                goto again;
 184
 185        mutex_unlock(&iommu_group_mutex);
 186
 187        ret = kobject_init_and_add(&group->kobj, &iommu_group_ktype,
 188                                   NULL, "%d", group->id);
 189        if (ret) {
 190                mutex_lock(&iommu_group_mutex);
 191                ida_remove(&iommu_group_ida, group->id);
 192                mutex_unlock(&iommu_group_mutex);
 193                kfree(group);
 194                return ERR_PTR(ret);
 195        }
 196
 197        group->devices_kobj = kobject_create_and_add("devices", &group->kobj);
 198        if (!group->devices_kobj) {
 199                kobject_put(&group->kobj); /* triggers .release & free */
 200                return ERR_PTR(-ENOMEM);
 201        }
 202
 203        /*
 204         * The devices_kobj holds a reference on the group kobject, so
 205         * as long as that exists so will the group.  We can therefore
 206         * use the devices_kobj for reference counting.
 207         */
 208        kobject_put(&group->kobj);
 209
 210        return group;
 211}
 212EXPORT_SYMBOL_GPL(iommu_group_alloc);
 213
 214struct iommu_group *iommu_group_get_by_id(int id)
 215{
 216        struct kobject *group_kobj;
 217        struct iommu_group *group;
 218        const char *name;
 219
 220        if (!iommu_group_kset)
 221                return NULL;
 222
 223        name = kasprintf(GFP_KERNEL, "%d", id);
 224        if (!name)
 225                return NULL;
 226
 227        group_kobj = kset_find_obj(iommu_group_kset, name);
 228        kfree(name);
 229
 230        if (!group_kobj)
 231                return NULL;
 232
 233        group = container_of(group_kobj, struct iommu_group, kobj);
 234        BUG_ON(group->id != id);
 235
 236        kobject_get(group->devices_kobj);
 237        kobject_put(&group->kobj);
 238
 239        return group;
 240}
 241EXPORT_SYMBOL_GPL(iommu_group_get_by_id);
 242
 243/**
 244 * iommu_group_get_iommudata - retrieve iommu_data registered for a group
 245 * @group: the group
 246 *
 247 * iommu drivers can store data in the group for use when doing iommu
 248 * operations.  This function provides a way to retrieve it.  Caller
 249 * should hold a group reference.
 250 */
 251void *iommu_group_get_iommudata(struct iommu_group *group)
 252{
 253        return group->iommu_data;
 254}
 255EXPORT_SYMBOL_GPL(iommu_group_get_iommudata);
 256
 257/**
 258 * iommu_group_set_iommudata - set iommu_data for a group
 259 * @group: the group
 260 * @iommu_data: new data
 261 * @release: release function for iommu_data
 262 *
 263 * iommu drivers can store data in the group for use when doing iommu
 264 * operations.  This function provides a way to set the data after
 265 * the group has been allocated.  Caller should hold a group reference.
 266 */
 267void iommu_group_set_iommudata(struct iommu_group *group, void *iommu_data,
 268                               void (*release)(void *iommu_data))
 269{
 270        group->iommu_data = iommu_data;
 271        group->iommu_data_release = release;
 272}
 273EXPORT_SYMBOL_GPL(iommu_group_set_iommudata);
 274
 275/**
 276 * iommu_group_set_name - set name for a group
 277 * @group: the group
 278 * @name: name
 279 *
 280 * Allow iommu driver to set a name for a group.  When set it will
 281 * appear in a name attribute file under the group in sysfs.
 282 */
 283int iommu_group_set_name(struct iommu_group *group, const char *name)
 284{
 285        int ret;
 286
 287        if (group->name) {
 288                iommu_group_remove_file(group, &iommu_group_attr_name);
 289                kfree(group->name);
 290                group->name = NULL;
 291                if (!name)
 292                        return 0;
 293        }
 294
 295        group->name = kstrdup(name, GFP_KERNEL);
 296        if (!group->name)
 297                return -ENOMEM;
 298
 299        ret = iommu_group_create_file(group, &iommu_group_attr_name);
 300        if (ret) {
 301                kfree(group->name);
 302                group->name = NULL;
 303                return ret;
 304        }
 305
 306        return 0;
 307}
 308EXPORT_SYMBOL_GPL(iommu_group_set_name);
 309
 310/**
 311 * iommu_group_add_device - add a device to an iommu group
 312 * @group: the group into which to add the device (reference should be held)
 313 * @dev: the device
 314 *
 315 * This function is called by an iommu driver to add a device into a
 316 * group.  Adding a device increments the group reference count.
 317 */
 318int iommu_group_add_device(struct iommu_group *group, struct device *dev)
 319{
 320        int ret, i = 0;
 321        struct iommu_device *device;
 322
 323        device = kzalloc(sizeof(*device), GFP_KERNEL);
 324        if (!device)
 325                return -ENOMEM;
 326
 327        device->dev = dev;
 328
 329        ret = sysfs_create_link(&dev->kobj, &group->kobj, "iommu_group");
 330        if (ret) {
 331                kfree(device);
 332                return ret;
 333        }
 334
 335        device->name = kasprintf(GFP_KERNEL, "%s", kobject_name(&dev->kobj));
 336rename:
 337        if (!device->name) {
 338                sysfs_remove_link(&dev->kobj, "iommu_group");
 339                kfree(device);
 340                return -ENOMEM;
 341        }
 342
 343        ret = sysfs_create_link_nowarn(group->devices_kobj,
 344                                       &dev->kobj, device->name);
 345        if (ret) {
 346                kfree(device->name);
 347                if (ret == -EEXIST && i >= 0) {
 348                        /*
 349                         * Account for the slim chance of collision
 350                         * and append an instance to the name.
 351                         */
 352                        device->name = kasprintf(GFP_KERNEL, "%s.%d",
 353                                                 kobject_name(&dev->kobj), i++);
 354                        goto rename;
 355                }
 356
 357                sysfs_remove_link(&dev->kobj, "iommu_group");
 358                kfree(device);
 359                return ret;
 360        }
 361
 362        kobject_get(group->devices_kobj);
 363
 364        dev->iommu_group = group;
 365
 366        mutex_lock(&group->mutex);
 367        list_add_tail(&device->list, &group->devices);
 368        mutex_unlock(&group->mutex);
 369
 370        /* Notify any listeners about change to group. */
 371        blocking_notifier_call_chain(&group->notifier,
 372                                     IOMMU_GROUP_NOTIFY_ADD_DEVICE, dev);
 373
 374        trace_add_device_to_group(group->id, dev);
 375        return 0;
 376}
 377EXPORT_SYMBOL_GPL(iommu_group_add_device);
 378
 379/**
 380 * iommu_group_remove_device - remove a device from it's current group
 381 * @dev: device to be removed
 382 *
 383 * This function is called by an iommu driver to remove the device from
 384 * it's current group.  This decrements the iommu group reference count.
 385 */
 386void iommu_group_remove_device(struct device *dev)
 387{
 388        struct iommu_group *group = dev->iommu_group;
 389        struct iommu_device *tmp_device, *device = NULL;
 390
 391        /* Pre-notify listeners that a device is being removed. */
 392        blocking_notifier_call_chain(&group->notifier,
 393                                     IOMMU_GROUP_NOTIFY_DEL_DEVICE, dev);
 394
 395        mutex_lock(&group->mutex);
 396        list_for_each_entry(tmp_device, &group->devices, list) {
 397                if (tmp_device->dev == dev) {
 398                        device = tmp_device;
 399                        list_del(&device->list);
 400                        break;
 401                }
 402        }
 403        mutex_unlock(&group->mutex);
 404
 405        if (!device)
 406                return;
 407
 408        sysfs_remove_link(group->devices_kobj, device->name);
 409        sysfs_remove_link(&dev->kobj, "iommu_group");
 410
 411        trace_remove_device_from_group(group->id, dev);
 412
 413        kfree(device->name);
 414        kfree(device);
 415        dev->iommu_group = NULL;
 416        kobject_put(group->devices_kobj);
 417}
 418EXPORT_SYMBOL_GPL(iommu_group_remove_device);
 419
 420/**
 421 * iommu_group_for_each_dev - iterate over each device in the group
 422 * @group: the group
 423 * @data: caller opaque data to be passed to callback function
 424 * @fn: caller supplied callback function
 425 *
 426 * This function is called by group users to iterate over group devices.
 427 * Callers should hold a reference count to the group during callback.
 428 * The group->mutex is held across callbacks, which will block calls to
 429 * iommu_group_add/remove_device.
 430 */
 431int iommu_group_for_each_dev(struct iommu_group *group, void *data,
 432                             int (*fn)(struct device *, void *))
 433{
 434        struct iommu_device *device;
 435        int ret = 0;
 436
 437        mutex_lock(&group->mutex);
 438        list_for_each_entry(device, &group->devices, list) {
 439                ret = fn(device->dev, data);
 440                if (ret)
 441                        break;
 442        }
 443        mutex_unlock(&group->mutex);
 444        return ret;
 445}
 446EXPORT_SYMBOL_GPL(iommu_group_for_each_dev);
 447
 448/**
 449 * iommu_group_get - Return the group for a device and increment reference
 450 * @dev: get the group that this device belongs to
 451 *
 452 * This function is called by iommu drivers and users to get the group
 453 * for the specified device.  If found, the group is returned and the group
 454 * reference in incremented, else NULL.
 455 */
 456struct iommu_group *iommu_group_get(struct device *dev)
 457{
 458        struct iommu_group *group = dev->iommu_group;
 459
 460        if (group)
 461                kobject_get(group->devices_kobj);
 462
 463        return group;
 464}
 465EXPORT_SYMBOL_GPL(iommu_group_get);
 466
 467/**
 468 * iommu_group_put - Decrement group reference
 469 * @group: the group to use
 470 *
 471 * This function is called by iommu drivers and users to release the
 472 * iommu group.  Once the reference count is zero, the group is released.
 473 */
 474void iommu_group_put(struct iommu_group *group)
 475{
 476        if (group)
 477                kobject_put(group->devices_kobj);
 478}
 479EXPORT_SYMBOL_GPL(iommu_group_put);
 480
 481/**
 482 * iommu_group_register_notifier - Register a notifier for group changes
 483 * @group: the group to watch
 484 * @nb: notifier block to signal
 485 *
 486 * This function allows iommu group users to track changes in a group.
 487 * See include/linux/iommu.h for actions sent via this notifier.  Caller
 488 * should hold a reference to the group throughout notifier registration.
 489 */
 490int iommu_group_register_notifier(struct iommu_group *group,
 491                                  struct notifier_block *nb)
 492{
 493        return blocking_notifier_chain_register(&group->notifier, nb);
 494}
 495EXPORT_SYMBOL_GPL(iommu_group_register_notifier);
 496
 497/**
 498 * iommu_group_unregister_notifier - Unregister a notifier
 499 * @group: the group to watch
 500 * @nb: notifier block to signal
 501 *
 502 * Unregister a previously registered group notifier block.
 503 */
 504int iommu_group_unregister_notifier(struct iommu_group *group,
 505                                    struct notifier_block *nb)
 506{
 507        return blocking_notifier_chain_unregister(&group->notifier, nb);
 508}
 509EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
 510
 511/**
 512 * iommu_group_id - Return ID for a group
 513 * @group: the group to ID
 514 *
 515 * Return the unique ID for the group matching the sysfs group number.
 516 */
 517int iommu_group_id(struct iommu_group *group)
 518{
 519        return group->id;
 520}
 521EXPORT_SYMBOL_GPL(iommu_group_id);
 522
 523static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
 524                                               unsigned long *devfns);
 525
 526/*
 527 * To consider a PCI device isolated, we require ACS to support Source
 528 * Validation, Request Redirection, Completer Redirection, and Upstream
 529 * Forwarding.  This effectively means that devices cannot spoof their
 530 * requester ID, requests and completions cannot be redirected, and all
 531 * transactions are forwarded upstream, even as it passes through a
 532 * bridge where the target device is downstream.
 533 */
 534#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
 535
 536/*
 537 * For multifunction devices which are not isolated from each other, find
 538 * all the other non-isolated functions and look for existing groups.  For
 539 * each function, we also need to look for aliases to or from other devices
 540 * that may already have a group.
 541 */
 542static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev,
 543                                                        unsigned long *devfns)
 544{
 545        struct pci_dev *tmp = NULL;
 546        struct iommu_group *group;
 547
 548        if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS))
 549                return NULL;
 550
 551        for_each_pci_dev(tmp) {
 552                if (tmp == pdev || tmp->bus != pdev->bus ||
 553                    PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) ||
 554                    pci_acs_enabled(tmp, REQ_ACS_FLAGS))
 555                        continue;
 556
 557                group = get_pci_alias_group(tmp, devfns);
 558                if (group) {
 559                        pci_dev_put(tmp);
 560                        return group;
 561                }
 562        }
 563
 564        return NULL;
 565}
 566
 567/*
 568 * Look for aliases to or from the given device for exisiting groups.  The
 569 * dma_alias_devfn only supports aliases on the same bus, therefore the search
 570 * space is quite small (especially since we're really only looking at pcie
 571 * device, and therefore only expect multiple slots on the root complex or
 572 * downstream switch ports).  It's conceivable though that a pair of
 573 * multifunction devices could have aliases between them that would cause a
 574 * loop.  To prevent this, we use a bitmap to track where we've been.
 575 */
 576static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev,
 577                                               unsigned long *devfns)
 578{
 579        struct pci_dev *tmp = NULL;
 580        struct iommu_group *group;
 581
 582        if (test_and_set_bit(pdev->devfn & 0xff, devfns))
 583                return NULL;
 584
 585        group = iommu_group_get(&pdev->dev);
 586        if (group)
 587                return group;
 588
 589        for_each_pci_dev(tmp) {
 590                if (tmp == pdev || tmp->bus != pdev->bus)
 591                        continue;
 592
 593                /* We alias them or they alias us */
 594                if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
 595                     pdev->dma_alias_devfn == tmp->devfn) ||
 596                    ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) &&
 597                     tmp->dma_alias_devfn == pdev->devfn)) {
 598
 599                        group = get_pci_alias_group(tmp, devfns);
 600                        if (group) {
 601                                pci_dev_put(tmp);
 602                                return group;
 603                        }
 604
 605                        group = get_pci_function_alias_group(tmp, devfns);
 606                        if (group) {
 607                                pci_dev_put(tmp);
 608                                return group;
 609                        }
 610                }
 611        }
 612
 613        return NULL;
 614}
 615
 616struct group_for_pci_data {
 617        struct pci_dev *pdev;
 618        struct iommu_group *group;
 619};
 620
 621/*
 622 * DMA alias iterator callback, return the last seen device.  Stop and return
 623 * the IOMMU group if we find one along the way.
 624 */
 625static int get_pci_alias_or_group(struct pci_dev *pdev, u16 alias, void *opaque)
 626{
 627        struct group_for_pci_data *data = opaque;
 628
 629        data->pdev = pdev;
 630        data->group = iommu_group_get(&pdev->dev);
 631
 632        return data->group != NULL;
 633}
 634
 635/*
 636 * Use standard PCI bus topology, isolation features, and DMA alias quirks
 637 * to find or create an IOMMU group for a device.
 638 */
 639static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev)
 640{
 641        struct group_for_pci_data data;
 642        struct pci_bus *bus;
 643        struct iommu_group *group = NULL;
 644        u64 devfns[4] = { 0 };
 645
 646        /*
 647         * Find the upstream DMA alias for the device.  A device must not
 648         * be aliased due to topology in order to have its own IOMMU group.
 649         * If we find an alias along the way that already belongs to a
 650         * group, use it.
 651         */
 652        if (pci_for_each_dma_alias(pdev, get_pci_alias_or_group, &data))
 653                return data.group;
 654
 655        pdev = data.pdev;
 656
 657        /*
 658         * Continue upstream from the point of minimum IOMMU granularity
 659         * due to aliases to the point where devices are protected from
 660         * peer-to-peer DMA by PCI ACS.  Again, if we find an existing
 661         * group, use it.
 662         */
 663        for (bus = pdev->bus; !pci_is_root_bus(bus); bus = bus->parent) {
 664                if (!bus->self)
 665                        continue;
 666
 667                if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
 668                        break;
 669
 670                pdev = bus->self;
 671
 672                group = iommu_group_get(&pdev->dev);
 673                if (group)
 674                        return group;
 675        }
 676
 677        /*
 678         * Look for existing groups on device aliases.  If we alias another
 679         * device or another device aliases us, use the same group.
 680         */
 681        group = get_pci_alias_group(pdev, (unsigned long *)devfns);
 682        if (group)
 683                return group;
 684
 685        /*
 686         * Look for existing groups on non-isolated functions on the same
 687         * slot and aliases of those funcions, if any.  No need to clear
 688         * the search bitmap, the tested devfns are still valid.
 689         */
 690        group = get_pci_function_alias_group(pdev, (unsigned long *)devfns);
 691        if (group)
 692                return group;
 693
 694        /* No shared group found, allocate new */
 695        return iommu_group_alloc();
 696}
 697
 698/**
 699 * iommu_group_get_for_dev - Find or create the IOMMU group for a device
 700 * @dev: target device
 701 *
 702 * This function is intended to be called by IOMMU drivers and extended to
 703 * support common, bus-defined algorithms when determining or creating the
 704 * IOMMU group for a device.  On success, the caller will hold a reference
 705 * to the returned IOMMU group, which will already include the provided
 706 * device.  The reference should be released with iommu_group_put().
 707 */
 708struct iommu_group *iommu_group_get_for_dev(struct device *dev)
 709{
 710        struct iommu_group *group;
 711        int ret;
 712
 713        group = iommu_group_get(dev);
 714        if (group)
 715                return group;
 716
 717        if (!dev_is_pci(dev))
 718                return ERR_PTR(-EINVAL);
 719
 720        group = iommu_group_get_for_pci_dev(to_pci_dev(dev));
 721
 722        if (IS_ERR(group))
 723                return group;
 724
 725        ret = iommu_group_add_device(group, dev);
 726        if (ret) {
 727                iommu_group_put(group);
 728                return ERR_PTR(ret);
 729        }
 730
 731        return group;
 732}
 733
 734static int add_iommu_group(struct device *dev, void *data)
 735{
 736        struct iommu_callback_data *cb = data;
 737        const struct iommu_ops *ops = cb->ops;
 738
 739        if (!ops->add_device)
 740                return 0;
 741
 742        WARN_ON(dev->iommu_group);
 743
 744        ops->add_device(dev);
 745
 746        return 0;
 747}
 748
 749static int iommu_bus_notifier(struct notifier_block *nb,
 750                              unsigned long action, void *data)
 751{
 752        struct device *dev = data;
 753        const struct iommu_ops *ops = dev->bus->iommu_ops;
 754        struct iommu_group *group;
 755        unsigned long group_action = 0;
 756
 757        /*
 758         * ADD/DEL call into iommu driver ops if provided, which may
 759         * result in ADD/DEL notifiers to group->notifier
 760         */
 761        if (action == BUS_NOTIFY_ADD_DEVICE) {
 762                if (ops->add_device)
 763                        return ops->add_device(dev);
 764        } else if (action == BUS_NOTIFY_DEL_DEVICE) {
 765                if (ops->remove_device && dev->iommu_group) {
 766                        ops->remove_device(dev);
 767                        return 0;
 768                }
 769        }
 770
 771        /*
 772         * Remaining BUS_NOTIFYs get filtered and republished to the
 773         * group, if anyone is listening
 774         */
 775        group = iommu_group_get(dev);
 776        if (!group)
 777                return 0;
 778
 779        switch (action) {
 780        case BUS_NOTIFY_BIND_DRIVER:
 781                group_action = IOMMU_GROUP_NOTIFY_BIND_DRIVER;
 782                break;
 783        case BUS_NOTIFY_BOUND_DRIVER:
 784                group_action = IOMMU_GROUP_NOTIFY_BOUND_DRIVER;
 785                break;
 786        case BUS_NOTIFY_UNBIND_DRIVER:
 787                group_action = IOMMU_GROUP_NOTIFY_UNBIND_DRIVER;
 788                break;
 789        case BUS_NOTIFY_UNBOUND_DRIVER:
 790                group_action = IOMMU_GROUP_NOTIFY_UNBOUND_DRIVER;
 791                break;
 792        }
 793
 794        if (group_action)
 795                blocking_notifier_call_chain(&group->notifier,
 796                                             group_action, dev);
 797
 798        iommu_group_put(group);
 799        return 0;
 800}
 801
 802static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops)
 803{
 804        int err;
 805        struct notifier_block *nb;
 806        struct iommu_callback_data cb = {
 807                .ops = ops,
 808        };
 809
 810        nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
 811        if (!nb)
 812                return -ENOMEM;
 813
 814        nb->notifier_call = iommu_bus_notifier;
 815
 816        err = bus_register_notifier(bus, nb);
 817        if (err) {
 818                kfree(nb);
 819                return err;
 820        }
 821
 822        err = bus_for_each_dev(bus, NULL, &cb, add_iommu_group);
 823        if (err) {
 824                bus_unregister_notifier(bus, nb);
 825                kfree(nb);
 826                return err;
 827        }
 828
 829        return 0;
 830}
 831
 832/**
 833 * bus_set_iommu - set iommu-callbacks for the bus
 834 * @bus: bus.
 835 * @ops: the callbacks provided by the iommu-driver
 836 *
 837 * This function is called by an iommu driver to set the iommu methods
 838 * used for a particular bus. Drivers for devices on that bus can use
 839 * the iommu-api after these ops are registered.
 840 * This special function is needed because IOMMUs are usually devices on
 841 * the bus itself, so the iommu drivers are not initialized when the bus
 842 * is set up. With this function the iommu-driver can set the iommu-ops
 843 * afterwards.
 844 */
 845int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops)
 846{
 847        int err;
 848
 849        if (bus->iommu_ops != NULL)
 850                return -EBUSY;
 851
 852        bus->iommu_ops = ops;
 853
 854        /* Do IOMMU specific setup for this bus-type */
 855        err = iommu_bus_init(bus, ops);
 856        if (err)
 857                bus->iommu_ops = NULL;
 858
 859        return err;
 860}
 861EXPORT_SYMBOL_GPL(bus_set_iommu);
 862
 863bool iommu_present(struct bus_type *bus)
 864{
 865        return bus->iommu_ops != NULL;
 866}
 867EXPORT_SYMBOL_GPL(iommu_present);
 868
 869bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
 870{
 871        if (!bus->iommu_ops || !bus->iommu_ops->capable)
 872                return false;
 873
 874        return bus->iommu_ops->capable(cap);
 875}
 876EXPORT_SYMBOL_GPL(iommu_capable);
 877
 878/**
 879 * iommu_set_fault_handler() - set a fault handler for an iommu domain
 880 * @domain: iommu domain
 881 * @handler: fault handler
 882 * @token: user data, will be passed back to the fault handler
 883 *
 884 * This function should be used by IOMMU users which want to be notified
 885 * whenever an IOMMU fault happens.
 886 *
 887 * The fault handler itself should return 0 on success, and an appropriate
 888 * error code otherwise.
 889 */
 890void iommu_set_fault_handler(struct iommu_domain *domain,
 891                                        iommu_fault_handler_t handler,
 892                                        void *token)
 893{
 894        BUG_ON(!domain);
 895
 896        domain->handler = handler;
 897        domain->handler_token = token;
 898}
 899EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
 900
 901struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 902{
 903        struct iommu_domain *domain;
 904        int ret;
 905
 906        if (bus == NULL || bus->iommu_ops == NULL)
 907                return NULL;
 908
 909        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
 910        if (!domain)
 911                return NULL;
 912
 913        domain->ops = bus->iommu_ops;
 914
 915        ret = domain->ops->domain_init(domain);
 916        if (ret)
 917                goto out_free;
 918
 919        return domain;
 920
 921out_free:
 922        kfree(domain);
 923
 924        return NULL;
 925}
 926EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 927
 928void iommu_domain_free(struct iommu_domain *domain)
 929{
 930        if (likely(domain->ops->domain_destroy != NULL))
 931                domain->ops->domain_destroy(domain);
 932
 933        kfree(domain);
 934}
 935EXPORT_SYMBOL_GPL(iommu_domain_free);
 936
 937int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 938{
 939        int ret;
 940        if (unlikely(domain->ops->attach_dev == NULL))
 941                return -ENODEV;
 942
 943        ret = domain->ops->attach_dev(domain, dev);
 944        if (!ret)
 945                trace_attach_device_to_domain(dev);
 946        return ret;
 947}
 948EXPORT_SYMBOL_GPL(iommu_attach_device);
 949
 950void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 951{
 952        if (unlikely(domain->ops->detach_dev == NULL))
 953                return;
 954
 955        domain->ops->detach_dev(domain, dev);
 956        trace_detach_device_from_domain(dev);
 957}
 958EXPORT_SYMBOL_GPL(iommu_detach_device);
 959
 960/*
 961 * IOMMU groups are really the natrual working unit of the IOMMU, but
 962 * the IOMMU API works on domains and devices.  Bridge that gap by
 963 * iterating over the devices in a group.  Ideally we'd have a single
 964 * device which represents the requestor ID of the group, but we also
 965 * allow IOMMU drivers to create policy defined minimum sets, where
 966 * the physical hardware may be able to distiguish members, but we
 967 * wish to group them at a higher level (ex. untrusted multi-function
 968 * PCI devices).  Thus we attach each device.
 969 */
 970static int iommu_group_do_attach_device(struct device *dev, void *data)
 971{
 972        struct iommu_domain *domain = data;
 973
 974        return iommu_attach_device(domain, dev);
 975}
 976
 977int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
 978{
 979        return iommu_group_for_each_dev(group, domain,
 980                                        iommu_group_do_attach_device);
 981}
 982EXPORT_SYMBOL_GPL(iommu_attach_group);
 983
 984static int iommu_group_do_detach_device(struct device *dev, void *data)
 985{
 986        struct iommu_domain *domain = data;
 987
 988        iommu_detach_device(domain, dev);
 989
 990        return 0;
 991}
 992
 993void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group)
 994{
 995        iommu_group_for_each_dev(group, domain, iommu_group_do_detach_device);
 996}
 997EXPORT_SYMBOL_GPL(iommu_detach_group);
 998
 999phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
1000{
1001        if (unlikely(domain->ops->iova_to_phys == NULL))
1002                return 0;
1003
1004        return domain->ops->iova_to_phys(domain, iova);
1005}
1006EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
1007
1008static size_t iommu_pgsize(struct iommu_domain *domain,
1009                           unsigned long addr_merge, size_t size)
1010{
1011        unsigned int pgsize_idx;
1012        size_t pgsize;
1013
1014        /* Max page size that still fits into 'size' */
1015        pgsize_idx = __fls(size);
1016
1017        /* need to consider alignment requirements ? */
1018        if (likely(addr_merge)) {
1019                /* Max page size allowed by address */
1020                unsigned int align_pgsize_idx = __ffs(addr_merge);
1021                pgsize_idx = min(pgsize_idx, align_pgsize_idx);
1022        }
1023
1024        /* build a mask of acceptable page sizes */
1025        pgsize = (1UL << (pgsize_idx + 1)) - 1;
1026
1027        /* throw away page sizes not supported by the hardware */
1028        pgsize &= domain->ops->pgsize_bitmap;
1029
1030        /* make sure we're still sane */
1031        BUG_ON(!pgsize);
1032
1033        /* pick the biggest page */
1034        pgsize_idx = __fls(pgsize);
1035        pgsize = 1UL << pgsize_idx;
1036
1037        return pgsize;
1038}
1039
1040int iommu_map(struct iommu_domain *domain, unsigned long iova,
1041              phys_addr_t paddr, size_t size, int prot)
1042{
1043        unsigned long orig_iova = iova;
1044        unsigned int min_pagesz;
1045        size_t orig_size = size;
1046        int ret = 0;
1047
1048        if (unlikely(domain->ops->map == NULL ||
1049                     domain->ops->pgsize_bitmap == 0UL))
1050                return -ENODEV;
1051
1052        /* find out the minimum page size supported */
1053        min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1054
1055        /*
1056         * both the virtual address and the physical one, as well as
1057         * the size of the mapping, must be aligned (at least) to the
1058         * size of the smallest page supported by the hardware
1059         */
1060        if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
1061                pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
1062                       iova, &paddr, size, min_pagesz);
1063                return -EINVAL;
1064        }
1065
1066        pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
1067
1068        while (size) {
1069                size_t pgsize = iommu_pgsize(domain, iova | paddr, size);
1070
1071                pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n",
1072                         iova, &paddr, pgsize);
1073
1074                ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
1075                if (ret)
1076                        break;
1077
1078                iova += pgsize;
1079                paddr += pgsize;
1080                size -= pgsize;
1081        }
1082
1083        /* unroll mapping in case something went wrong */
1084        if (ret)
1085                iommu_unmap(domain, orig_iova, orig_size - size);
1086        else
1087                trace_map(iova, paddr, size);
1088
1089        return ret;
1090}
1091EXPORT_SYMBOL_GPL(iommu_map);
1092
1093size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
1094{
1095        size_t unmapped_page, unmapped = 0;
1096        unsigned int min_pagesz;
1097
1098        if (unlikely(domain->ops->unmap == NULL ||
1099                     domain->ops->pgsize_bitmap == 0UL))
1100                return -ENODEV;
1101
1102        /* find out the minimum page size supported */
1103        min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1104
1105        /*
1106         * The virtual address, as well as the size of the mapping, must be
1107         * aligned (at least) to the size of the smallest page supported
1108         * by the hardware
1109         */
1110        if (!IS_ALIGNED(iova | size, min_pagesz)) {
1111                pr_err("unaligned: iova 0x%lx size 0x%zx min_pagesz 0x%x\n",
1112                       iova, size, min_pagesz);
1113                return -EINVAL;
1114        }
1115
1116        pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
1117
1118        /*
1119         * Keep iterating until we either unmap 'size' bytes (or more)
1120         * or we hit an area that isn't mapped.
1121         */
1122        while (unmapped < size) {
1123                size_t pgsize = iommu_pgsize(domain, iova, size - unmapped);
1124
1125                unmapped_page = domain->ops->unmap(domain, iova, pgsize);
1126                if (!unmapped_page)
1127                        break;
1128
1129                pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
1130                         iova, unmapped_page);
1131
1132                iova += unmapped_page;
1133                unmapped += unmapped_page;
1134        }
1135
1136        trace_unmap(iova, 0, size);
1137        return unmapped;
1138}
1139EXPORT_SYMBOL_GPL(iommu_unmap);
1140
1141size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
1142                         struct scatterlist *sg, unsigned int nents, int prot)
1143{
1144        struct scatterlist *s;
1145        size_t mapped = 0;
1146        unsigned int i, min_pagesz;
1147        int ret;
1148
1149        if (unlikely(domain->ops->pgsize_bitmap == 0UL))
1150                return 0;
1151
1152        min_pagesz = 1 << __ffs(domain->ops->pgsize_bitmap);
1153
1154        for_each_sg(sg, s, nents, i) {
1155                phys_addr_t phys = page_to_phys(sg_page(s)) + s->offset;
1156
1157                /*
1158                 * We are mapping on IOMMU page boundaries, so offset within
1159                 * the page must be 0. However, the IOMMU may support pages
1160                 * smaller than PAGE_SIZE, so s->offset may still represent
1161                 * an offset of that boundary within the CPU page.
1162                 */
1163                if (!IS_ALIGNED(s->offset, min_pagesz))
1164                        goto out_err;
1165
1166                ret = iommu_map(domain, iova + mapped, phys, s->length, prot);
1167                if (ret)
1168                        goto out_err;
1169
1170                mapped += s->length;
1171        }
1172
1173        return mapped;
1174
1175out_err:
1176        /* undo mappings already done */
1177        iommu_unmap(domain, iova, mapped);
1178
1179        return 0;
1180
1181}
1182EXPORT_SYMBOL_GPL(default_iommu_map_sg);
1183
1184int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
1185                               phys_addr_t paddr, u64 size, int prot)
1186{
1187        if (unlikely(domain->ops->domain_window_enable == NULL))
1188                return -ENODEV;
1189
1190        return domain->ops->domain_window_enable(domain, wnd_nr, paddr, size,
1191                                                 prot);
1192}
1193EXPORT_SYMBOL_GPL(iommu_domain_window_enable);
1194
1195void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr)
1196{
1197        if (unlikely(domain->ops->domain_window_disable == NULL))
1198                return;
1199
1200        return domain->ops->domain_window_disable(domain, wnd_nr);
1201}
1202EXPORT_SYMBOL_GPL(iommu_domain_window_disable);
1203
1204static int __init iommu_init(void)
1205{
1206        iommu_group_kset = kset_create_and_add("iommu_groups",
1207                                               NULL, kernel_kobj);
1208        ida_init(&iommu_group_ida);
1209        mutex_init(&iommu_group_mutex);
1210
1211        BUG_ON(!iommu_group_kset);
1212
1213        return 0;
1214}
1215arch_initcall(iommu_init);
1216
1217int iommu_domain_get_attr(struct iommu_domain *domain,
1218                          enum iommu_attr attr, void *data)
1219{
1220        struct iommu_domain_geometry *geometry;
1221        bool *paging;
1222        int ret = 0;
1223        u32 *count;
1224
1225        switch (attr) {
1226        case DOMAIN_ATTR_GEOMETRY:
1227                geometry  = data;
1228                *geometry = domain->geometry;
1229
1230                break;
1231        case DOMAIN_ATTR_PAGING:
1232                paging  = data;
1233                *paging = (domain->ops->pgsize_bitmap != 0UL);
1234                break;
1235        case DOMAIN_ATTR_WINDOWS:
1236                count = data;
1237
1238                if (domain->ops->domain_get_windows != NULL)
1239                        *count = domain->ops->domain_get_windows(domain);
1240                else
1241                        ret = -ENODEV;
1242
1243                break;
1244        default:
1245                if (!domain->ops->domain_get_attr)
1246                        return -EINVAL;
1247
1248                ret = domain->ops->domain_get_attr(domain, attr, data);
1249        }
1250
1251        return ret;
1252}
1253EXPORT_SYMBOL_GPL(iommu_domain_get_attr);
1254
1255int iommu_domain_set_attr(struct iommu_domain *domain,
1256                          enum iommu_attr attr, void *data)
1257{
1258        int ret = 0;
1259        u32 *count;
1260
1261        switch (attr) {
1262        case DOMAIN_ATTR_WINDOWS:
1263                count = data;
1264
1265                if (domain->ops->domain_set_windows != NULL)
1266                        ret = domain->ops->domain_set_windows(domain, *count);
1267                else
1268                        ret = -ENODEV;
1269
1270                break;
1271        default:
1272                if (domain->ops->domain_set_attr == NULL)
1273                        return -EINVAL;
1274
1275                ret = domain->ops->domain_set_attr(domain, attr, data);
1276        }
1277
1278        return ret;
1279}
1280EXPORT_SYMBOL_GPL(iommu_domain_set_attr);
1281