linux/drivers/iommu/intel/dmar.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2006, Intel Corporation.
   4 *
   5 * Copyright (C) 2006-2008 Intel Corporation
   6 * Author: Ashok Raj <ashok.raj@intel.com>
   7 * Author: Shaohua Li <shaohua.li@intel.com>
   8 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   9 *
  10 * This file implements early detection/parsing of Remapping Devices
  11 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  12 * tables.
  13 *
  14 * These routines are used by both DMA-remapping and Interrupt-remapping
  15 */
  16
  17#define pr_fmt(fmt)     "DMAR: " fmt
  18
  19#include <linux/pci.h>
  20#include <linux/dmar.h>
  21#include <linux/iova.h>
  22#include <linux/intel-iommu.h>
  23#include <linux/timer.h>
  24#include <linux/irq.h>
  25#include <linux/interrupt.h>
  26#include <linux/tboot.h>
  27#include <linux/dmi.h>
  28#include <linux/slab.h>
  29#include <linux/iommu.h>
  30#include <linux/numa.h>
  31#include <linux/limits.h>
  32#include <asm/irq_remapping.h>
  33#include <asm/iommu_table.h>
  34#include <trace/events/intel_iommu.h>
  35
  36#include "../irq_remapping.h"
  37#include "perf.h"
  38
  39typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
  40struct dmar_res_callback {
  41        dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
  42        void                    *arg[ACPI_DMAR_TYPE_RESERVED];
  43        bool                    ignore_unhandled;
  44        bool                    print_entry;
  45};
  46
  47/*
  48 * Assumptions:
  49 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
  50 *    before IO devices managed by that unit.
  51 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
  52 *    after IO devices managed by that unit.
  53 * 3) Hotplug events are rare.
  54 *
  55 * Locking rules for DMA and interrupt remapping related global data structures:
  56 * 1) Use dmar_global_lock in process context
  57 * 2) Use RCU in interrupt context
  58 */
  59DECLARE_RWSEM(dmar_global_lock);
  60LIST_HEAD(dmar_drhd_units);
  61
  62struct acpi_table_header * __initdata dmar_tbl;
  63static int dmar_dev_scope_status = 1;
  64static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
  65
  66static int alloc_iommu(struct dmar_drhd_unit *drhd);
  67static void free_iommu(struct intel_iommu *iommu);
  68
  69extern const struct iommu_ops intel_iommu_ops;
  70
  71static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
  72{
  73        /*
  74         * add INCLUDE_ALL at the tail, so scan the list will find it at
  75         * the very end.
  76         */
  77        if (drhd->include_all)
  78                list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
  79        else
  80                list_add_rcu(&drhd->list, &dmar_drhd_units);
  81}
  82
  83void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
  84{
  85        struct acpi_dmar_device_scope *scope;
  86
  87        *cnt = 0;
  88        while (start < end) {
  89                scope = start;
  90                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
  91                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
  92                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
  93                        (*cnt)++;
  94                else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
  95                        scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
  96                        pr_warn("Unsupported device scope\n");
  97                }
  98                start += scope->length;
  99        }
 100        if (*cnt == 0)
 101                return NULL;
 102
 103        return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
 104}
 105
 106void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
 107{
 108        int i;
 109        struct device *tmp_dev;
 110
 111        if (*devices && *cnt) {
 112                for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
 113                        put_device(tmp_dev);
 114                kfree(*devices);
 115        }
 116
 117        *devices = NULL;
 118        *cnt = 0;
 119}
 120
 121/* Optimize out kzalloc()/kfree() for normal cases */
 122static char dmar_pci_notify_info_buf[64];
 123
 124static struct dmar_pci_notify_info *
 125dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
 126{
 127        int level = 0;
 128        size_t size;
 129        struct pci_dev *tmp;
 130        struct dmar_pci_notify_info *info;
 131
 132        BUG_ON(dev->is_virtfn);
 133
 134        /*
 135         * Ignore devices that have a domain number higher than what can
 136         * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
 137         */
 138        if (pci_domain_nr(dev->bus) > U16_MAX)
 139                return NULL;
 140
 141        /* Only generate path[] for device addition event */
 142        if (event == BUS_NOTIFY_ADD_DEVICE)
 143                for (tmp = dev; tmp; tmp = tmp->bus->self)
 144                        level++;
 145
 146        size = struct_size(info, path, level);
 147        if (size <= sizeof(dmar_pci_notify_info_buf)) {
 148                info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
 149        } else {
 150                info = kzalloc(size, GFP_KERNEL);
 151                if (!info) {
 152                        pr_warn("Out of memory when allocating notify_info "
 153                                "for %s.\n", pci_name(dev));
 154                        if (dmar_dev_scope_status == 0)
 155                                dmar_dev_scope_status = -ENOMEM;
 156                        return NULL;
 157                }
 158        }
 159
 160        info->event = event;
 161        info->dev = dev;
 162        info->seg = pci_domain_nr(dev->bus);
 163        info->level = level;
 164        if (event == BUS_NOTIFY_ADD_DEVICE) {
 165                for (tmp = dev; tmp; tmp = tmp->bus->self) {
 166                        level--;
 167                        info->path[level].bus = tmp->bus->number;
 168                        info->path[level].device = PCI_SLOT(tmp->devfn);
 169                        info->path[level].function = PCI_FUNC(tmp->devfn);
 170                        if (pci_is_root_bus(tmp->bus))
 171                                info->bus = tmp->bus->number;
 172                }
 173        }
 174
 175        return info;
 176}
 177
 178static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
 179{
 180        if ((void *)info != dmar_pci_notify_info_buf)
 181                kfree(info);
 182}
 183
 184static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
 185                                struct acpi_dmar_pci_path *path, int count)
 186{
 187        int i;
 188
 189        if (info->bus != bus)
 190                goto fallback;
 191        if (info->level != count)
 192                goto fallback;
 193
 194        for (i = 0; i < count; i++) {
 195                if (path[i].device != info->path[i].device ||
 196                    path[i].function != info->path[i].function)
 197                        goto fallback;
 198        }
 199
 200        return true;
 201
 202fallback:
 203
 204        if (count != 1)
 205                return false;
 206
 207        i = info->level - 1;
 208        if (bus              == info->path[i].bus &&
 209            path[0].device   == info->path[i].device &&
 210            path[0].function == info->path[i].function) {
 211                pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
 212                        bus, path[0].device, path[0].function);
 213                return true;
 214        }
 215
 216        return false;
 217}
 218
 219/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
 220int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
 221                          void *start, void*end, u16 segment,
 222                          struct dmar_dev_scope *devices,
 223                          int devices_cnt)
 224{
 225        int i, level;
 226        struct device *tmp, *dev = &info->dev->dev;
 227        struct acpi_dmar_device_scope *scope;
 228        struct acpi_dmar_pci_path *path;
 229
 230        if (segment != info->seg)
 231                return 0;
 232
 233        for (; start < end; start += scope->length) {
 234                scope = start;
 235                if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
 236                    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 237                        continue;
 238
 239                path = (struct acpi_dmar_pci_path *)(scope + 1);
 240                level = (scope->length - sizeof(*scope)) / sizeof(*path);
 241                if (!dmar_match_pci_path(info, scope->bus, path, level))
 242                        continue;
 243
 244                /*
 245                 * We expect devices with endpoint scope to have normal PCI
 246                 * headers, and devices with bridge scope to have bridge PCI
 247                 * headers.  However PCI NTB devices may be listed in the
 248                 * DMAR table with bridge scope, even though they have a
 249                 * normal PCI header.  NTB devices are identified by class
 250                 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
 251                 * for this special case.
 252                 */
 253                if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
 254                     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
 255                    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
 256                     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
 257                      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
 258                        pr_warn("Device scope type does not match for %s\n",
 259                                pci_name(info->dev));
 260                        return -EINVAL;
 261                }
 262
 263                for_each_dev_scope(devices, devices_cnt, i, tmp)
 264                        if (tmp == NULL) {
 265                                devices[i].bus = info->dev->bus->number;
 266                                devices[i].devfn = info->dev->devfn;
 267                                rcu_assign_pointer(devices[i].dev,
 268                                                   get_device(dev));
 269                                return 1;
 270                        }
 271                BUG_ON(i >= devices_cnt);
 272        }
 273
 274        return 0;
 275}
 276
 277int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
 278                          struct dmar_dev_scope *devices, int count)
 279{
 280        int index;
 281        struct device *tmp;
 282
 283        if (info->seg != segment)
 284                return 0;
 285
 286        for_each_active_dev_scope(devices, count, index, tmp)
 287                if (tmp == &info->dev->dev) {
 288                        RCU_INIT_POINTER(devices[index].dev, NULL);
 289                        synchronize_rcu();
 290                        put_device(tmp);
 291                        return 1;
 292                }
 293
 294        return 0;
 295}
 296
 297static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
 298{
 299        int ret = 0;
 300        struct dmar_drhd_unit *dmaru;
 301        struct acpi_dmar_hardware_unit *drhd;
 302
 303        for_each_drhd_unit(dmaru) {
 304                if (dmaru->include_all)
 305                        continue;
 306
 307                drhd = container_of(dmaru->hdr,
 308                                    struct acpi_dmar_hardware_unit, header);
 309                ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
 310                                ((void *)drhd) + drhd->header.length,
 311                                dmaru->segment,
 312                                dmaru->devices, dmaru->devices_cnt);
 313                if (ret)
 314                        break;
 315        }
 316        if (ret >= 0)
 317                ret = dmar_iommu_notify_scope_dev(info);
 318        if (ret < 0 && dmar_dev_scope_status == 0)
 319                dmar_dev_scope_status = ret;
 320
 321        if (ret >= 0)
 322                intel_irq_remap_add_device(info);
 323
 324        return ret;
 325}
 326
 327static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
 328{
 329        struct dmar_drhd_unit *dmaru;
 330
 331        for_each_drhd_unit(dmaru)
 332                if (dmar_remove_dev_scope(info, dmaru->segment,
 333                        dmaru->devices, dmaru->devices_cnt))
 334                        break;
 335        dmar_iommu_notify_scope_dev(info);
 336}
 337
 338static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
 339{
 340        struct pci_dev *physfn = pci_physfn(pdev);
 341
 342        dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
 343}
 344
 345static int dmar_pci_bus_notifier(struct notifier_block *nb,
 346                                 unsigned long action, void *data)
 347{
 348        struct pci_dev *pdev = to_pci_dev(data);
 349        struct dmar_pci_notify_info *info;
 350
 351        /* Only care about add/remove events for physical functions.
 352         * For VFs we actually do the lookup based on the corresponding
 353         * PF in device_to_iommu() anyway. */
 354        if (pdev->is_virtfn) {
 355                /*
 356                 * Ensure that the VF device inherits the irq domain of the
 357                 * PF device. Ideally the device would inherit the domain
 358                 * from the bus, but DMAR can have multiple units per bus
 359                 * which makes this impossible. The VF 'bus' could inherit
 360                 * from the PF device, but that's yet another x86'sism to
 361                 * inflict on everybody else.
 362                 */
 363                if (action == BUS_NOTIFY_ADD_DEVICE)
 364                        vf_inherit_msi_domain(pdev);
 365                return NOTIFY_DONE;
 366        }
 367
 368        if (action != BUS_NOTIFY_ADD_DEVICE &&
 369            action != BUS_NOTIFY_REMOVED_DEVICE)
 370                return NOTIFY_DONE;
 371
 372        info = dmar_alloc_pci_notify_info(pdev, action);
 373        if (!info)
 374                return NOTIFY_DONE;
 375
 376        down_write(&dmar_global_lock);
 377        if (action == BUS_NOTIFY_ADD_DEVICE)
 378                dmar_pci_bus_add_dev(info);
 379        else if (action == BUS_NOTIFY_REMOVED_DEVICE)
 380                dmar_pci_bus_del_dev(info);
 381        up_write(&dmar_global_lock);
 382
 383        dmar_free_pci_notify_info(info);
 384
 385        return NOTIFY_OK;
 386}
 387
 388static struct notifier_block dmar_pci_bus_nb = {
 389        .notifier_call = dmar_pci_bus_notifier,
 390        .priority = INT_MIN,
 391};
 392
 393static struct dmar_drhd_unit *
 394dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
 395{
 396        struct dmar_drhd_unit *dmaru;
 397
 398        list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
 399                                dmar_rcu_check())
 400                if (dmaru->segment == drhd->segment &&
 401                    dmaru->reg_base_addr == drhd->address)
 402                        return dmaru;
 403
 404        return NULL;
 405}
 406
 407/*
 408 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
 409 * structure which uniquely represent one DMA remapping hardware unit
 410 * present in the platform
 411 */
 412static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 413{
 414        struct acpi_dmar_hardware_unit *drhd;
 415        struct dmar_drhd_unit *dmaru;
 416        int ret;
 417
 418        drhd = (struct acpi_dmar_hardware_unit *)header;
 419        dmaru = dmar_find_dmaru(drhd);
 420        if (dmaru)
 421                goto out;
 422
 423        dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
 424        if (!dmaru)
 425                return -ENOMEM;
 426
 427        /*
 428         * If header is allocated from slab by ACPI _DSM method, we need to
 429         * copy the content because the memory buffer will be freed on return.
 430         */
 431        dmaru->hdr = (void *)(dmaru + 1);
 432        memcpy(dmaru->hdr, header, header->length);
 433        dmaru->reg_base_addr = drhd->address;
 434        dmaru->segment = drhd->segment;
 435        dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 436        dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
 437                                              ((void *)drhd) + drhd->header.length,
 438                                              &dmaru->devices_cnt);
 439        if (dmaru->devices_cnt && dmaru->devices == NULL) {
 440                kfree(dmaru);
 441                return -ENOMEM;
 442        }
 443
 444        ret = alloc_iommu(dmaru);
 445        if (ret) {
 446                dmar_free_dev_scope(&dmaru->devices,
 447                                    &dmaru->devices_cnt);
 448                kfree(dmaru);
 449                return ret;
 450        }
 451        dmar_register_drhd_unit(dmaru);
 452
 453out:
 454        if (arg)
 455                (*(int *)arg)++;
 456
 457        return 0;
 458}
 459
 460static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
 461{
 462        if (dmaru->devices && dmaru->devices_cnt)
 463                dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
 464        if (dmaru->iommu)
 465                free_iommu(dmaru->iommu);
 466        kfree(dmaru);
 467}
 468
 469static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
 470                                      void *arg)
 471{
 472        struct acpi_dmar_andd *andd = (void *)header;
 473
 474        /* Check for NUL termination within the designated length */
 475        if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
 476                pr_warn(FW_BUG
 477                           "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
 478                           "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 479                           dmi_get_system_info(DMI_BIOS_VENDOR),
 480                           dmi_get_system_info(DMI_BIOS_VERSION),
 481                           dmi_get_system_info(DMI_PRODUCT_VERSION));
 482                add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 483                return -EINVAL;
 484        }
 485        pr_info("ANDD device: %x name: %s\n", andd->device_number,
 486                andd->device_name);
 487
 488        return 0;
 489}
 490
 491#ifdef CONFIG_ACPI_NUMA
 492static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 493{
 494        struct acpi_dmar_rhsa *rhsa;
 495        struct dmar_drhd_unit *drhd;
 496
 497        rhsa = (struct acpi_dmar_rhsa *)header;
 498        for_each_drhd_unit(drhd) {
 499                if (drhd->reg_base_addr == rhsa->base_address) {
 500                        int node = pxm_to_node(rhsa->proximity_domain);
 501
 502                        if (!node_online(node))
 503                                node = NUMA_NO_NODE;
 504                        drhd->iommu->node = node;
 505                        return 0;
 506                }
 507        }
 508        pr_warn(FW_BUG
 509                "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
 510                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 511                rhsa->base_address,
 512                dmi_get_system_info(DMI_BIOS_VENDOR),
 513                dmi_get_system_info(DMI_BIOS_VERSION),
 514                dmi_get_system_info(DMI_PRODUCT_VERSION));
 515        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 516
 517        return 0;
 518}
 519#else
 520#define dmar_parse_one_rhsa             dmar_res_noop
 521#endif
 522
 523static void
 524dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 525{
 526        struct acpi_dmar_hardware_unit *drhd;
 527        struct acpi_dmar_reserved_memory *rmrr;
 528        struct acpi_dmar_atsr *atsr;
 529        struct acpi_dmar_rhsa *rhsa;
 530        struct acpi_dmar_satc *satc;
 531
 532        switch (header->type) {
 533        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 534                drhd = container_of(header, struct acpi_dmar_hardware_unit,
 535                                    header);
 536                pr_info("DRHD base: %#016Lx flags: %#x\n",
 537                        (unsigned long long)drhd->address, drhd->flags);
 538                break;
 539        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 540                rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 541                                    header);
 542                pr_info("RMRR base: %#016Lx end: %#016Lx\n",
 543                        (unsigned long long)rmrr->base_address,
 544                        (unsigned long long)rmrr->end_address);
 545                break;
 546        case ACPI_DMAR_TYPE_ROOT_ATS:
 547                atsr = container_of(header, struct acpi_dmar_atsr, header);
 548                pr_info("ATSR flags: %#x\n", atsr->flags);
 549                break;
 550        case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
 551                rhsa = container_of(header, struct acpi_dmar_rhsa, header);
 552                pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
 553                       (unsigned long long)rhsa->base_address,
 554                       rhsa->proximity_domain);
 555                break;
 556        case ACPI_DMAR_TYPE_NAMESPACE:
 557                /* We don't print this here because we need to sanity-check
 558                   it first. So print it in dmar_parse_one_andd() instead. */
 559                break;
 560        case ACPI_DMAR_TYPE_SATC:
 561                satc = container_of(header, struct acpi_dmar_satc, header);
 562                pr_info("SATC flags: 0x%x\n", satc->flags);
 563                break;
 564        }
 565}
 566
 567/**
 568 * dmar_table_detect - checks to see if the platform supports DMAR devices
 569 */
 570static int __init dmar_table_detect(void)
 571{
 572        acpi_status status = AE_OK;
 573
 574        /* if we could find DMAR table, then there are DMAR devices */
 575        status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
 576
 577        if (ACPI_SUCCESS(status) && !dmar_tbl) {
 578                pr_warn("Unable to map DMAR\n");
 579                status = AE_NOT_FOUND;
 580        }
 581
 582        return ACPI_SUCCESS(status) ? 0 : -ENOENT;
 583}
 584
 585static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
 586                                       size_t len, struct dmar_res_callback *cb)
 587{
 588        struct acpi_dmar_header *iter, *next;
 589        struct acpi_dmar_header *end = ((void *)start) + len;
 590
 591        for (iter = start; iter < end; iter = next) {
 592                next = (void *)iter + iter->length;
 593                if (iter->length == 0) {
 594                        /* Avoid looping forever on bad ACPI tables */
 595                        pr_debug(FW_BUG "Invalid 0-length structure\n");
 596                        break;
 597                } else if (next > end) {
 598                        /* Avoid passing table end */
 599                        pr_warn(FW_BUG "Record passes table end\n");
 600                        return -EINVAL;
 601                }
 602
 603                if (cb->print_entry)
 604                        dmar_table_print_dmar_entry(iter);
 605
 606                if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
 607                        /* continue for forward compatibility */
 608                        pr_debug("Unknown DMAR structure type %d\n",
 609                                 iter->type);
 610                } else if (cb->cb[iter->type]) {
 611                        int ret;
 612
 613                        ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
 614                        if (ret)
 615                                return ret;
 616                } else if (!cb->ignore_unhandled) {
 617                        pr_warn("No handler for DMAR structure type %d\n",
 618                                iter->type);
 619                        return -EINVAL;
 620                }
 621        }
 622
 623        return 0;
 624}
 625
 626static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
 627                                       struct dmar_res_callback *cb)
 628{
 629        return dmar_walk_remapping_entries((void *)(dmar + 1),
 630                        dmar->header.length - sizeof(*dmar), cb);
 631}
 632
 633/**
 634 * parse_dmar_table - parses the DMA reporting table
 635 */
 636static int __init
 637parse_dmar_table(void)
 638{
 639        struct acpi_table_dmar *dmar;
 640        int drhd_count = 0;
 641        int ret;
 642        struct dmar_res_callback cb = {
 643                .print_entry = true,
 644                .ignore_unhandled = true,
 645                .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
 646                .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
 647                .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
 648                .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
 649                .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
 650                .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
 651                .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
 652        };
 653
 654        /*
 655         * Do it again, earlier dmar_tbl mapping could be mapped with
 656         * fixed map.
 657         */
 658        dmar_table_detect();
 659
 660        /*
 661         * ACPI tables may not be DMA protected by tboot, so use DMAR copy
 662         * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
 663         */
 664        dmar_tbl = tboot_get_dmar_table(dmar_tbl);
 665
 666        dmar = (struct acpi_table_dmar *)dmar_tbl;
 667        if (!dmar)
 668                return -ENODEV;
 669
 670        if (dmar->width < PAGE_SHIFT - 1) {
 671                pr_warn("Invalid DMAR haw\n");
 672                return -EINVAL;
 673        }
 674
 675        pr_info("Host address width %d\n", dmar->width + 1);
 676        ret = dmar_walk_dmar_table(dmar, &cb);
 677        if (ret == 0 && drhd_count == 0)
 678                pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
 679
 680        return ret;
 681}
 682
 683static int dmar_pci_device_match(struct dmar_dev_scope devices[],
 684                                 int cnt, struct pci_dev *dev)
 685{
 686        int index;
 687        struct device *tmp;
 688
 689        while (dev) {
 690                for_each_active_dev_scope(devices, cnt, index, tmp)
 691                        if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
 692                                return 1;
 693
 694                /* Check our parent */
 695                dev = dev->bus->self;
 696        }
 697
 698        return 0;
 699}
 700
 701struct dmar_drhd_unit *
 702dmar_find_matched_drhd_unit(struct pci_dev *dev)
 703{
 704        struct dmar_drhd_unit *dmaru;
 705        struct acpi_dmar_hardware_unit *drhd;
 706
 707        dev = pci_physfn(dev);
 708
 709        rcu_read_lock();
 710        for_each_drhd_unit(dmaru) {
 711                drhd = container_of(dmaru->hdr,
 712                                    struct acpi_dmar_hardware_unit,
 713                                    header);
 714
 715                if (dmaru->include_all &&
 716                    drhd->segment == pci_domain_nr(dev->bus))
 717                        goto out;
 718
 719                if (dmar_pci_device_match(dmaru->devices,
 720                                          dmaru->devices_cnt, dev))
 721                        goto out;
 722        }
 723        dmaru = NULL;
 724out:
 725        rcu_read_unlock();
 726
 727        return dmaru;
 728}
 729
 730static void __init dmar_acpi_insert_dev_scope(u8 device_number,
 731                                              struct acpi_device *adev)
 732{
 733        struct dmar_drhd_unit *dmaru;
 734        struct acpi_dmar_hardware_unit *drhd;
 735        struct acpi_dmar_device_scope *scope;
 736        struct device *tmp;
 737        int i;
 738        struct acpi_dmar_pci_path *path;
 739
 740        for_each_drhd_unit(dmaru) {
 741                drhd = container_of(dmaru->hdr,
 742                                    struct acpi_dmar_hardware_unit,
 743                                    header);
 744
 745                for (scope = (void *)(drhd + 1);
 746                     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
 747                     scope = ((void *)scope) + scope->length) {
 748                        if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
 749                                continue;
 750                        if (scope->enumeration_id != device_number)
 751                                continue;
 752
 753                        path = (void *)(scope + 1);
 754                        pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
 755                                dev_name(&adev->dev), dmaru->reg_base_addr,
 756                                scope->bus, path->device, path->function);
 757                        for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
 758                                if (tmp == NULL) {
 759                                        dmaru->devices[i].bus = scope->bus;
 760                                        dmaru->devices[i].devfn = PCI_DEVFN(path->device,
 761                                                                            path->function);
 762                                        rcu_assign_pointer(dmaru->devices[i].dev,
 763                                                           get_device(&adev->dev));
 764                                        return;
 765                                }
 766                        BUG_ON(i >= dmaru->devices_cnt);
 767                }
 768        }
 769        pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
 770                device_number, dev_name(&adev->dev));
 771}
 772
 773static int __init dmar_acpi_dev_scope_init(void)
 774{
 775        struct acpi_dmar_andd *andd;
 776
 777        if (dmar_tbl == NULL)
 778                return -ENODEV;
 779
 780        for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
 781             ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
 782             andd = ((void *)andd) + andd->header.length) {
 783                if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
 784                        acpi_handle h;
 785                        struct acpi_device *adev;
 786
 787                        if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
 788                                                          andd->device_name,
 789                                                          &h))) {
 790                                pr_err("Failed to find handle for ACPI object %s\n",
 791                                       andd->device_name);
 792                                continue;
 793                        }
 794                        if (acpi_bus_get_device(h, &adev)) {
 795                                pr_err("Failed to get device for ACPI object %s\n",
 796                                       andd->device_name);
 797                                continue;
 798                        }
 799                        dmar_acpi_insert_dev_scope(andd->device_number, adev);
 800                }
 801        }
 802        return 0;
 803}
 804
 805int __init dmar_dev_scope_init(void)
 806{
 807        struct pci_dev *dev = NULL;
 808        struct dmar_pci_notify_info *info;
 809
 810        if (dmar_dev_scope_status != 1)
 811                return dmar_dev_scope_status;
 812
 813        if (list_empty(&dmar_drhd_units)) {
 814                dmar_dev_scope_status = -ENODEV;
 815        } else {
 816                dmar_dev_scope_status = 0;
 817
 818                dmar_acpi_dev_scope_init();
 819
 820                for_each_pci_dev(dev) {
 821                        if (dev->is_virtfn)
 822                                continue;
 823
 824                        info = dmar_alloc_pci_notify_info(dev,
 825                                        BUS_NOTIFY_ADD_DEVICE);
 826                        if (!info) {
 827                                return dmar_dev_scope_status;
 828                        } else {
 829                                dmar_pci_bus_add_dev(info);
 830                                dmar_free_pci_notify_info(info);
 831                        }
 832                }
 833        }
 834
 835        return dmar_dev_scope_status;
 836}
 837
 838void __init dmar_register_bus_notifier(void)
 839{
 840        bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
 841}
 842
 843
 844int __init dmar_table_init(void)
 845{
 846        static int dmar_table_initialized;
 847        int ret;
 848
 849        if (dmar_table_initialized == 0) {
 850                ret = parse_dmar_table();
 851                if (ret < 0) {
 852                        if (ret != -ENODEV)
 853                                pr_info("Parse DMAR table failure.\n");
 854                } else  if (list_empty(&dmar_drhd_units)) {
 855                        pr_info("No DMAR devices found\n");
 856                        ret = -ENODEV;
 857                }
 858
 859                if (ret < 0)
 860                        dmar_table_initialized = ret;
 861                else
 862                        dmar_table_initialized = 1;
 863        }
 864
 865        return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
 866}
 867
 868static void warn_invalid_dmar(u64 addr, const char *message)
 869{
 870        pr_warn_once(FW_BUG
 871                "Your BIOS is broken; DMAR reported at address %llx%s!\n"
 872                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 873                addr, message,
 874                dmi_get_system_info(DMI_BIOS_VENDOR),
 875                dmi_get_system_info(DMI_BIOS_VERSION),
 876                dmi_get_system_info(DMI_PRODUCT_VERSION));
 877        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 878}
 879
 880static int __ref
 881dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 882{
 883        struct acpi_dmar_hardware_unit *drhd;
 884        void __iomem *addr;
 885        u64 cap, ecap;
 886
 887        drhd = (void *)entry;
 888        if (!drhd->address) {
 889                warn_invalid_dmar(0, "");
 890                return -EINVAL;
 891        }
 892
 893        if (arg)
 894                addr = ioremap(drhd->address, VTD_PAGE_SIZE);
 895        else
 896                addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
 897        if (!addr) {
 898                pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
 899                return -EINVAL;
 900        }
 901
 902        cap = dmar_readq(addr + DMAR_CAP_REG);
 903        ecap = dmar_readq(addr + DMAR_ECAP_REG);
 904
 905        if (arg)
 906                iounmap(addr);
 907        else
 908                early_iounmap(addr, VTD_PAGE_SIZE);
 909
 910        if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
 911                warn_invalid_dmar(drhd->address, " returns all ones");
 912                return -EINVAL;
 913        }
 914
 915        return 0;
 916}
 917
 918int __init detect_intel_iommu(void)
 919{
 920        int ret;
 921        struct dmar_res_callback validate_drhd_cb = {
 922                .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
 923                .ignore_unhandled = true,
 924        };
 925
 926        down_write(&dmar_global_lock);
 927        ret = dmar_table_detect();
 928        if (!ret)
 929                ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
 930                                           &validate_drhd_cb);
 931        if (!ret && !no_iommu && !iommu_detected &&
 932            (!dmar_disabled || dmar_platform_optin())) {
 933                iommu_detected = 1;
 934                /* Make sure ACS will be enabled */
 935                pci_request_acs();
 936        }
 937
 938#ifdef CONFIG_X86
 939        if (!ret) {
 940                x86_init.iommu.iommu_init = intel_iommu_init;
 941                x86_platform.iommu_shutdown = intel_iommu_shutdown;
 942        }
 943
 944#endif
 945
 946        if (dmar_tbl) {
 947                acpi_put_table(dmar_tbl);
 948                dmar_tbl = NULL;
 949        }
 950        up_write(&dmar_global_lock);
 951
 952        return ret ? ret : 1;
 953}
 954
 955static void unmap_iommu(struct intel_iommu *iommu)
 956{
 957        iounmap(iommu->reg);
 958        release_mem_region(iommu->reg_phys, iommu->reg_size);
 959}
 960
 961/**
 962 * map_iommu: map the iommu's registers
 963 * @iommu: the iommu to map
 964 * @phys_addr: the physical address of the base resgister
 965 *
 966 * Memory map the iommu's registers.  Start w/ a single page, and
 967 * possibly expand if that turns out to be insufficent.
 968 */
 969static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
 970{
 971        int map_size, err=0;
 972
 973        iommu->reg_phys = phys_addr;
 974        iommu->reg_size = VTD_PAGE_SIZE;
 975
 976        if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
 977                pr_err("Can't reserve memory\n");
 978                err = -EBUSY;
 979                goto out;
 980        }
 981
 982        iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
 983        if (!iommu->reg) {
 984                pr_err("Can't map the region\n");
 985                err = -ENOMEM;
 986                goto release;
 987        }
 988
 989        iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 990        iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 991
 992        if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
 993                err = -EINVAL;
 994                warn_invalid_dmar(phys_addr, " returns all ones");
 995                goto unmap;
 996        }
 997        if (ecap_vcs(iommu->ecap))
 998                iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
 999
1000        /* the registers might be more than one page */
1001        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1002                         cap_max_fault_reg_offset(iommu->cap));
1003        map_size = VTD_PAGE_ALIGN(map_size);
1004        if (map_size > iommu->reg_size) {
1005                iounmap(iommu->reg);
1006                release_mem_region(iommu->reg_phys, iommu->reg_size);
1007                iommu->reg_size = map_size;
1008                if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1009                                        iommu->name)) {
1010                        pr_err("Can't reserve memory\n");
1011                        err = -EBUSY;
1012                        goto out;
1013                }
1014                iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1015                if (!iommu->reg) {
1016                        pr_err("Can't map the region\n");
1017                        err = -ENOMEM;
1018                        goto release;
1019                }
1020        }
1021        err = 0;
1022        goto out;
1023
1024unmap:
1025        iounmap(iommu->reg);
1026release:
1027        release_mem_region(iommu->reg_phys, iommu->reg_size);
1028out:
1029        return err;
1030}
1031
1032static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1033{
1034        iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1035                                            DMAR_UNITS_SUPPORTED);
1036        if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1037                iommu->seq_id = -1;
1038        } else {
1039                set_bit(iommu->seq_id, dmar_seq_ids);
1040                sprintf(iommu->name, "dmar%d", iommu->seq_id);
1041        }
1042
1043        return iommu->seq_id;
1044}
1045
1046static void dmar_free_seq_id(struct intel_iommu *iommu)
1047{
1048        if (iommu->seq_id >= 0) {
1049                clear_bit(iommu->seq_id, dmar_seq_ids);
1050                iommu->seq_id = -1;
1051        }
1052}
1053
1054static int alloc_iommu(struct dmar_drhd_unit *drhd)
1055{
1056        struct intel_iommu *iommu;
1057        u32 ver, sts;
1058        int agaw = -1;
1059        int msagaw = -1;
1060        int err;
1061
1062        if (!drhd->reg_base_addr) {
1063                warn_invalid_dmar(0, "");
1064                return -EINVAL;
1065        }
1066
1067        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1068        if (!iommu)
1069                return -ENOMEM;
1070
1071        if (dmar_alloc_seq_id(iommu) < 0) {
1072                pr_err("Failed to allocate seq_id\n");
1073                err = -ENOSPC;
1074                goto error;
1075        }
1076
1077        err = map_iommu(iommu, drhd->reg_base_addr);
1078        if (err) {
1079                pr_err("Failed to map %s\n", iommu->name);
1080                goto error_free_seq_id;
1081        }
1082
1083        err = -EINVAL;
1084        if (cap_sagaw(iommu->cap) == 0) {
1085                pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1086                        iommu->name);
1087                drhd->ignored = 1;
1088        }
1089
1090        if (!drhd->ignored) {
1091                agaw = iommu_calculate_agaw(iommu);
1092                if (agaw < 0) {
1093                        pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1094                               iommu->seq_id);
1095                        drhd->ignored = 1;
1096                }
1097        }
1098        if (!drhd->ignored) {
1099                msagaw = iommu_calculate_max_sagaw(iommu);
1100                if (msagaw < 0) {
1101                        pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1102                               iommu->seq_id);
1103                        drhd->ignored = 1;
1104                        agaw = -1;
1105                }
1106        }
1107        iommu->agaw = agaw;
1108        iommu->msagaw = msagaw;
1109        iommu->segment = drhd->segment;
1110
1111        iommu->node = NUMA_NO_NODE;
1112
1113        ver = readl(iommu->reg + DMAR_VER_REG);
1114        pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1115                iommu->name,
1116                (unsigned long long)drhd->reg_base_addr,
1117                DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1118                (unsigned long long)iommu->cap,
1119                (unsigned long long)iommu->ecap);
1120
1121        /* Reflect status in gcmd */
1122        sts = readl(iommu->reg + DMAR_GSTS_REG);
1123        if (sts & DMA_GSTS_IRES)
1124                iommu->gcmd |= DMA_GCMD_IRE;
1125        if (sts & DMA_GSTS_TES)
1126                iommu->gcmd |= DMA_GCMD_TE;
1127        if (sts & DMA_GSTS_QIES)
1128                iommu->gcmd |= DMA_GCMD_QIE;
1129
1130        raw_spin_lock_init(&iommu->register_lock);
1131
1132        /*
1133         * This is only for hotplug; at boot time intel_iommu_enabled won't
1134         * be set yet. When intel_iommu_init() runs, it registers the units
1135         * present at boot time, then sets intel_iommu_enabled.
1136         */
1137        if (intel_iommu_enabled && !drhd->ignored) {
1138                err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1139                                             intel_iommu_groups,
1140                                             "%s", iommu->name);
1141                if (err)
1142                        goto err_unmap;
1143
1144                err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1145                if (err)
1146                        goto err_sysfs;
1147        }
1148
1149        drhd->iommu = iommu;
1150        iommu->drhd = drhd;
1151
1152        return 0;
1153
1154err_sysfs:
1155        iommu_device_sysfs_remove(&iommu->iommu);
1156err_unmap:
1157        unmap_iommu(iommu);
1158error_free_seq_id:
1159        dmar_free_seq_id(iommu);
1160error:
1161        kfree(iommu);
1162        return err;
1163}
1164
1165static void free_iommu(struct intel_iommu *iommu)
1166{
1167        if (intel_iommu_enabled && !iommu->drhd->ignored) {
1168                iommu_device_unregister(&iommu->iommu);
1169                iommu_device_sysfs_remove(&iommu->iommu);
1170        }
1171
1172        if (iommu->irq) {
1173                if (iommu->pr_irq) {
1174                        free_irq(iommu->pr_irq, iommu);
1175                        dmar_free_hwirq(iommu->pr_irq);
1176                        iommu->pr_irq = 0;
1177                }
1178                free_irq(iommu->irq, iommu);
1179                dmar_free_hwirq(iommu->irq);
1180                iommu->irq = 0;
1181        }
1182
1183        if (iommu->qi) {
1184                free_page((unsigned long)iommu->qi->desc);
1185                kfree(iommu->qi->desc_status);
1186                kfree(iommu->qi);
1187        }
1188
1189        if (iommu->reg)
1190                unmap_iommu(iommu);
1191
1192        dmar_free_seq_id(iommu);
1193        kfree(iommu);
1194}
1195
1196/*
1197 * Reclaim all the submitted descriptors which have completed its work.
1198 */
1199static inline void reclaim_free_desc(struct q_inval *qi)
1200{
1201        while (qi->desc_status[qi->free_tail] == QI_DONE ||
1202               qi->desc_status[qi->free_tail] == QI_ABORT) {
1203                qi->desc_status[qi->free_tail] = QI_FREE;
1204                qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1205                qi->free_cnt++;
1206        }
1207}
1208
1209static const char *qi_type_string(u8 type)
1210{
1211        switch (type) {
1212        case QI_CC_TYPE:
1213                return "Context-cache Invalidation";
1214        case QI_IOTLB_TYPE:
1215                return "IOTLB Invalidation";
1216        case QI_DIOTLB_TYPE:
1217                return "Device-TLB Invalidation";
1218        case QI_IEC_TYPE:
1219                return "Interrupt Entry Cache Invalidation";
1220        case QI_IWD_TYPE:
1221                return "Invalidation Wait";
1222        case QI_EIOTLB_TYPE:
1223                return "PASID-based IOTLB Invalidation";
1224        case QI_PC_TYPE:
1225                return "PASID-cache Invalidation";
1226        case QI_DEIOTLB_TYPE:
1227                return "PASID-based Device-TLB Invalidation";
1228        case QI_PGRP_RESP_TYPE:
1229                return "Page Group Response";
1230        default:
1231                return "UNKNOWN";
1232        }
1233}
1234
1235static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1236{
1237        unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1238        u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1239        struct qi_desc *desc = iommu->qi->desc + head;
1240
1241        if (fault & DMA_FSTS_IQE)
1242                pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1243                       DMAR_IQER_REG_IQEI(iqe_err));
1244        if (fault & DMA_FSTS_ITE)
1245                pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1246                       DMAR_IQER_REG_ITESID(iqe_err));
1247        if (fault & DMA_FSTS_ICE)
1248                pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1249                       DMAR_IQER_REG_ICESID(iqe_err));
1250
1251        pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1252               qi_type_string(desc->qw0 & 0xf),
1253               (unsigned long long)desc->qw0,
1254               (unsigned long long)desc->qw1);
1255
1256        head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1257        head <<= qi_shift(iommu);
1258        desc = iommu->qi->desc + head;
1259
1260        pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1261               qi_type_string(desc->qw0 & 0xf),
1262               (unsigned long long)desc->qw0,
1263               (unsigned long long)desc->qw1);
1264}
1265
1266static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1267{
1268        u32 fault;
1269        int head, tail;
1270        struct q_inval *qi = iommu->qi;
1271        int shift = qi_shift(iommu);
1272
1273        if (qi->desc_status[wait_index] == QI_ABORT)
1274                return -EAGAIN;
1275
1276        fault = readl(iommu->reg + DMAR_FSTS_REG);
1277        if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1278                qi_dump_fault(iommu, fault);
1279
1280        /*
1281         * If IQE happens, the head points to the descriptor associated
1282         * with the error. No new descriptors are fetched until the IQE
1283         * is cleared.
1284         */
1285        if (fault & DMA_FSTS_IQE) {
1286                head = readl(iommu->reg + DMAR_IQH_REG);
1287                if ((head >> shift) == index) {
1288                        struct qi_desc *desc = qi->desc + head;
1289
1290                        /*
1291                         * desc->qw2 and desc->qw3 are either reserved or
1292                         * used by software as private data. We won't print
1293                         * out these two qw's for security consideration.
1294                         */
1295                        memcpy(desc, qi->desc + (wait_index << shift),
1296                               1 << shift);
1297                        writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1298                        pr_info("Invalidation Queue Error (IQE) cleared\n");
1299                        return -EINVAL;
1300                }
1301        }
1302
1303        /*
1304         * If ITE happens, all pending wait_desc commands are aborted.
1305         * No new descriptors are fetched until the ITE is cleared.
1306         */
1307        if (fault & DMA_FSTS_ITE) {
1308                head = readl(iommu->reg + DMAR_IQH_REG);
1309                head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1310                head |= 1;
1311                tail = readl(iommu->reg + DMAR_IQT_REG);
1312                tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1313
1314                writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1315                pr_info("Invalidation Time-out Error (ITE) cleared\n");
1316
1317                do {
1318                        if (qi->desc_status[head] == QI_IN_USE)
1319                                qi->desc_status[head] = QI_ABORT;
1320                        head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1321                } while (head != tail);
1322
1323                if (qi->desc_status[wait_index] == QI_ABORT)
1324                        return -EAGAIN;
1325        }
1326
1327        if (fault & DMA_FSTS_ICE) {
1328                writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1329                pr_info("Invalidation Completion Error (ICE) cleared\n");
1330        }
1331
1332        return 0;
1333}
1334
1335/*
1336 * Function to submit invalidation descriptors of all types to the queued
1337 * invalidation interface(QI). Multiple descriptors can be submitted at a
1338 * time, a wait descriptor will be appended to each submission to ensure
1339 * hardware has completed the invalidation before return. Wait descriptors
1340 * can be part of the submission but it will not be polled for completion.
1341 */
1342int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1343                   unsigned int count, unsigned long options)
1344{
1345        struct q_inval *qi = iommu->qi;
1346        s64 devtlb_start_ktime = 0;
1347        s64 iotlb_start_ktime = 0;
1348        s64 iec_start_ktime = 0;
1349        struct qi_desc wait_desc;
1350        int wait_index, index;
1351        unsigned long flags;
1352        int offset, shift;
1353        int rc, i;
1354        u64 type;
1355
1356        if (!qi)
1357                return 0;
1358
1359        type = desc->qw0 & GENMASK_ULL(3, 0);
1360
1361        if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1362            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1363                iotlb_start_ktime = ktime_to_ns(ktime_get());
1364
1365        if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1366            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1367                devtlb_start_ktime = ktime_to_ns(ktime_get());
1368
1369        if (type == QI_IEC_TYPE &&
1370            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1371                iec_start_ktime = ktime_to_ns(ktime_get());
1372
1373restart:
1374        rc = 0;
1375
1376        raw_spin_lock_irqsave(&qi->q_lock, flags);
1377        /*
1378         * Check if we have enough empty slots in the queue to submit,
1379         * the calculation is based on:
1380         * # of desc + 1 wait desc + 1 space between head and tail
1381         */
1382        while (qi->free_cnt < count + 2) {
1383                raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1384                cpu_relax();
1385                raw_spin_lock_irqsave(&qi->q_lock, flags);
1386        }
1387
1388        index = qi->free_head;
1389        wait_index = (index + count) % QI_LENGTH;
1390        shift = qi_shift(iommu);
1391
1392        for (i = 0; i < count; i++) {
1393                offset = ((index + i) % QI_LENGTH) << shift;
1394                memcpy(qi->desc + offset, &desc[i], 1 << shift);
1395                qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1396                trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1397                                desc[i].qw2, desc[i].qw3);
1398        }
1399        qi->desc_status[wait_index] = QI_IN_USE;
1400
1401        wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1402                        QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1403        if (options & QI_OPT_WAIT_DRAIN)
1404                wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1405        wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1406        wait_desc.qw2 = 0;
1407        wait_desc.qw3 = 0;
1408
1409        offset = wait_index << shift;
1410        memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1411
1412        qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1413        qi->free_cnt -= count + 1;
1414
1415        /*
1416         * update the HW tail register indicating the presence of
1417         * new descriptors.
1418         */
1419        writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1420
1421        while (qi->desc_status[wait_index] != QI_DONE) {
1422                /*
1423                 * We will leave the interrupts disabled, to prevent interrupt
1424                 * context to queue another cmd while a cmd is already submitted
1425                 * and waiting for completion on this cpu. This is to avoid
1426                 * a deadlock where the interrupt context can wait indefinitely
1427                 * for free slots in the queue.
1428                 */
1429                rc = qi_check_fault(iommu, index, wait_index);
1430                if (rc)
1431                        break;
1432
1433                raw_spin_unlock(&qi->q_lock);
1434                cpu_relax();
1435                raw_spin_lock(&qi->q_lock);
1436        }
1437
1438        for (i = 0; i < count; i++)
1439                qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1440
1441        reclaim_free_desc(qi);
1442        raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1443
1444        if (rc == -EAGAIN)
1445                goto restart;
1446
1447        if (iotlb_start_ktime)
1448                dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1449                                ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1450
1451        if (devtlb_start_ktime)
1452                dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1453                                ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1454
1455        if (iec_start_ktime)
1456                dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1457                                ktime_to_ns(ktime_get()) - iec_start_ktime);
1458
1459        return rc;
1460}
1461
1462/*
1463 * Flush the global interrupt entry cache.
1464 */
1465void qi_global_iec(struct intel_iommu *iommu)
1466{
1467        struct qi_desc desc;
1468
1469        desc.qw0 = QI_IEC_TYPE;
1470        desc.qw1 = 0;
1471        desc.qw2 = 0;
1472        desc.qw3 = 0;
1473
1474        /* should never fail */
1475        qi_submit_sync(iommu, &desc, 1, 0);
1476}
1477
1478void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1479                      u64 type)
1480{
1481        struct qi_desc desc;
1482
1483        desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1484                        | QI_CC_GRAN(type) | QI_CC_TYPE;
1485        desc.qw1 = 0;
1486        desc.qw2 = 0;
1487        desc.qw3 = 0;
1488
1489        qi_submit_sync(iommu, &desc, 1, 0);
1490}
1491
1492void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1493                    unsigned int size_order, u64 type)
1494{
1495        u8 dw = 0, dr = 0;
1496
1497        struct qi_desc desc;
1498        int ih = 0;
1499
1500        if (cap_write_drain(iommu->cap))
1501                dw = 1;
1502
1503        if (cap_read_drain(iommu->cap))
1504                dr = 1;
1505
1506        desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1507                | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1508        desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1509                | QI_IOTLB_AM(size_order);
1510        desc.qw2 = 0;
1511        desc.qw3 = 0;
1512
1513        qi_submit_sync(iommu, &desc, 1, 0);
1514}
1515
1516void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1517                        u16 qdep, u64 addr, unsigned mask)
1518{
1519        struct qi_desc desc;
1520
1521        if (mask) {
1522                addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1523                desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1524        } else
1525                desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1526
1527        if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1528                qdep = 0;
1529
1530        desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1531                   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1532        desc.qw2 = 0;
1533        desc.qw3 = 0;
1534
1535        qi_submit_sync(iommu, &desc, 1, 0);
1536}
1537
1538/* PASID-based IOTLB invalidation */
1539void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1540                     unsigned long npages, bool ih)
1541{
1542        struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1543
1544        /*
1545         * npages == -1 means a PASID-selective invalidation, otherwise,
1546         * a positive value for Page-selective-within-PASID invalidation.
1547         * 0 is not a valid input.
1548         */
1549        if (WARN_ON(!npages)) {
1550                pr_err("Invalid input npages = %ld\n", npages);
1551                return;
1552        }
1553
1554        if (npages == -1) {
1555                desc.qw0 = QI_EIOTLB_PASID(pasid) |
1556                                QI_EIOTLB_DID(did) |
1557                                QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1558                                QI_EIOTLB_TYPE;
1559                desc.qw1 = 0;
1560        } else {
1561                int mask = ilog2(__roundup_pow_of_two(npages));
1562                unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1563
1564                if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1565                        addr = ALIGN_DOWN(addr, align);
1566
1567                desc.qw0 = QI_EIOTLB_PASID(pasid) |
1568                                QI_EIOTLB_DID(did) |
1569                                QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1570                                QI_EIOTLB_TYPE;
1571                desc.qw1 = QI_EIOTLB_ADDR(addr) |
1572                                QI_EIOTLB_IH(ih) |
1573                                QI_EIOTLB_AM(mask);
1574        }
1575
1576        qi_submit_sync(iommu, &desc, 1, 0);
1577}
1578
1579/* PASID-based device IOTLB Invalidate */
1580void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1581                              u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1582{
1583        unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1584        struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1585
1586        desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1587                QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1588                QI_DEV_IOTLB_PFSID(pfsid);
1589
1590        /*
1591         * If S bit is 0, we only flush a single page. If S bit is set,
1592         * The least significant zero bit indicates the invalidation address
1593         * range. VT-d spec 6.5.2.6.
1594         * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1595         * size order = 0 is PAGE_SIZE 4KB
1596         * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1597         * ECAP.
1598         */
1599        if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1600                pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1601                                    addr, size_order);
1602
1603        /* Take page address */
1604        desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1605
1606        if (size_order) {
1607                /*
1608                 * Existing 0s in address below size_order may be the least
1609                 * significant bit, we must set them to 1s to avoid having
1610                 * smaller size than desired.
1611                 */
1612                desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1613                                        VTD_PAGE_SHIFT);
1614                /* Clear size_order bit to indicate size */
1615                desc.qw1 &= ~mask;
1616                /* Set the S bit to indicate flushing more than 1 page */
1617                desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1618        }
1619
1620        qi_submit_sync(iommu, &desc, 1, 0);
1621}
1622
1623void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1624                          u64 granu, u32 pasid)
1625{
1626        struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1627
1628        desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1629                        QI_PC_GRAN(granu) | QI_PC_TYPE;
1630        qi_submit_sync(iommu, &desc, 1, 0);
1631}
1632
1633/*
1634 * Disable Queued Invalidation interface.
1635 */
1636void dmar_disable_qi(struct intel_iommu *iommu)
1637{
1638        unsigned long flags;
1639        u32 sts;
1640        cycles_t start_time = get_cycles();
1641
1642        if (!ecap_qis(iommu->ecap))
1643                return;
1644
1645        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1646
1647        sts =  readl(iommu->reg + DMAR_GSTS_REG);
1648        if (!(sts & DMA_GSTS_QIES))
1649                goto end;
1650
1651        /*
1652         * Give a chance to HW to complete the pending invalidation requests.
1653         */
1654        while ((readl(iommu->reg + DMAR_IQT_REG) !=
1655                readl(iommu->reg + DMAR_IQH_REG)) &&
1656                (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1657                cpu_relax();
1658
1659        iommu->gcmd &= ~DMA_GCMD_QIE;
1660        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1661
1662        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1663                      !(sts & DMA_GSTS_QIES), sts);
1664end:
1665        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1666}
1667
1668/*
1669 * Enable queued invalidation.
1670 */
1671static void __dmar_enable_qi(struct intel_iommu *iommu)
1672{
1673        u32 sts;
1674        unsigned long flags;
1675        struct q_inval *qi = iommu->qi;
1676        u64 val = virt_to_phys(qi->desc);
1677
1678        qi->free_head = qi->free_tail = 0;
1679        qi->free_cnt = QI_LENGTH;
1680
1681        /*
1682         * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1683         * is present.
1684         */
1685        if (ecap_smts(iommu->ecap))
1686                val |= (1 << 11) | 1;
1687
1688        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1689
1690        /* write zero to the tail reg */
1691        writel(0, iommu->reg + DMAR_IQT_REG);
1692
1693        dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1694
1695        iommu->gcmd |= DMA_GCMD_QIE;
1696        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1697
1698        /* Make sure hardware complete it */
1699        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1700
1701        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1702}
1703
1704/*
1705 * Enable Queued Invalidation interface. This is a must to support
1706 * interrupt-remapping. Also used by DMA-remapping, which replaces
1707 * register based IOTLB invalidation.
1708 */
1709int dmar_enable_qi(struct intel_iommu *iommu)
1710{
1711        struct q_inval *qi;
1712        struct page *desc_page;
1713
1714        if (!ecap_qis(iommu->ecap))
1715                return -ENOENT;
1716
1717        /*
1718         * queued invalidation is already setup and enabled.
1719         */
1720        if (iommu->qi)
1721                return 0;
1722
1723        iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1724        if (!iommu->qi)
1725                return -ENOMEM;
1726
1727        qi = iommu->qi;
1728
1729        /*
1730         * Need two pages to accommodate 256 descriptors of 256 bits each
1731         * if the remapping hardware supports scalable mode translation.
1732         */
1733        desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1734                                     !!ecap_smts(iommu->ecap));
1735        if (!desc_page) {
1736                kfree(qi);
1737                iommu->qi = NULL;
1738                return -ENOMEM;
1739        }
1740
1741        qi->desc = page_address(desc_page);
1742
1743        qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1744        if (!qi->desc_status) {
1745                free_page((unsigned long) qi->desc);
1746                kfree(qi);
1747                iommu->qi = NULL;
1748                return -ENOMEM;
1749        }
1750
1751        raw_spin_lock_init(&qi->q_lock);
1752
1753        __dmar_enable_qi(iommu);
1754
1755        return 0;
1756}
1757
1758/* iommu interrupt handling. Most stuff are MSI-like. */
1759
1760enum faulttype {
1761        DMA_REMAP,
1762        INTR_REMAP,
1763        UNKNOWN,
1764};
1765
1766static const char *dma_remap_fault_reasons[] =
1767{
1768        "Software",
1769        "Present bit in root entry is clear",
1770        "Present bit in context entry is clear",
1771        "Invalid context entry",
1772        "Access beyond MGAW",
1773        "PTE Write access is not set",
1774        "PTE Read access is not set",
1775        "Next page table ptr is invalid",
1776        "Root table address invalid",
1777        "Context table ptr is invalid",
1778        "non-zero reserved fields in RTP",
1779        "non-zero reserved fields in CTP",
1780        "non-zero reserved fields in PTE",
1781        "PCE for translation request specifies blocking",
1782};
1783
1784static const char * const dma_remap_sm_fault_reasons[] = {
1785        "SM: Invalid Root Table Address",
1786        "SM: TTM 0 for request with PASID",
1787        "SM: TTM 0 for page group request",
1788        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1789        "SM: Error attempting to access Root Entry",
1790        "SM: Present bit in Root Entry is clear",
1791        "SM: Non-zero reserved field set in Root Entry",
1792        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1793        "SM: Error attempting to access Context Entry",
1794        "SM: Present bit in Context Entry is clear",
1795        "SM: Non-zero reserved field set in the Context Entry",
1796        "SM: Invalid Context Entry",
1797        "SM: DTE field in Context Entry is clear",
1798        "SM: PASID Enable field in Context Entry is clear",
1799        "SM: PASID is larger than the max in Context Entry",
1800        "SM: PRE field in Context-Entry is clear",
1801        "SM: RID_PASID field error in Context-Entry",
1802        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1803        "SM: Error attempting to access the PASID Directory Entry",
1804        "SM: Present bit in Directory Entry is clear",
1805        "SM: Non-zero reserved field set in PASID Directory Entry",
1806        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1807        "SM: Error attempting to access PASID Table Entry",
1808        "SM: Present bit in PASID Table Entry is clear",
1809        "SM: Non-zero reserved field set in PASID Table Entry",
1810        "SM: Invalid Scalable-Mode PASID Table Entry",
1811        "SM: ERE field is clear in PASID Table Entry",
1812        "SM: SRE field is clear in PASID Table Entry",
1813        "Unknown", "Unknown",/* 0x5E-0x5F */
1814        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1815        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1816        "SM: Error attempting to access first-level paging entry",
1817        "SM: Present bit in first-level paging entry is clear",
1818        "SM: Non-zero reserved field set in first-level paging entry",
1819        "SM: Error attempting to access FL-PML4 entry",
1820        "SM: First-level entry address beyond MGAW in Nested translation",
1821        "SM: Read permission error in FL-PML4 entry in Nested translation",
1822        "SM: Read permission error in first-level paging entry in Nested translation",
1823        "SM: Write permission error in first-level paging entry in Nested translation",
1824        "SM: Error attempting to access second-level paging entry",
1825        "SM: Read/Write permission error in second-level paging entry",
1826        "SM: Non-zero reserved field set in second-level paging entry",
1827        "SM: Invalid second-level page table pointer",
1828        "SM: A/D bit update needed in second-level entry when set up in no snoop",
1829        "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1830        "SM: Address in first-level translation is not canonical",
1831        "SM: U/S set 0 for first-level translation with user privilege",
1832        "SM: No execute permission for request with PASID and ER=1",
1833        "SM: Address beyond the DMA hardware max",
1834        "SM: Second-level entry address beyond the max",
1835        "SM: No write permission for Write/AtomicOp request",
1836        "SM: No read permission for Read/AtomicOp request",
1837        "SM: Invalid address-interrupt address",
1838        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1839        "SM: A/D bit update needed in first-level entry when set up in no snoop",
1840};
1841
1842static const char *irq_remap_fault_reasons[] =
1843{
1844        "Detected reserved fields in the decoded interrupt-remapped request",
1845        "Interrupt index exceeded the interrupt-remapping table size",
1846        "Present field in the IRTE entry is clear",
1847        "Error accessing interrupt-remapping table pointed by IRTA_REG",
1848        "Detected reserved fields in the IRTE entry",
1849        "Blocked a compatibility format interrupt request",
1850        "Blocked an interrupt request due to source-id verification failure",
1851};
1852
1853static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1854{
1855        if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1856                                        ARRAY_SIZE(irq_remap_fault_reasons))) {
1857                *fault_type = INTR_REMAP;
1858                return irq_remap_fault_reasons[fault_reason - 0x20];
1859        } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1860                        ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1861                *fault_type = DMA_REMAP;
1862                return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1863        } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1864                *fault_type = DMA_REMAP;
1865                return dma_remap_fault_reasons[fault_reason];
1866        } else {
1867                *fault_type = UNKNOWN;
1868                return "Unknown";
1869        }
1870}
1871
1872
1873static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1874{
1875        if (iommu->irq == irq)
1876                return DMAR_FECTL_REG;
1877        else if (iommu->pr_irq == irq)
1878                return DMAR_PECTL_REG;
1879        else
1880                BUG();
1881}
1882
1883void dmar_msi_unmask(struct irq_data *data)
1884{
1885        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1886        int reg = dmar_msi_reg(iommu, data->irq);
1887        unsigned long flag;
1888
1889        /* unmask it */
1890        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1891        writel(0, iommu->reg + reg);
1892        /* Read a reg to force flush the post write */
1893        readl(iommu->reg + reg);
1894        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1895}
1896
1897void dmar_msi_mask(struct irq_data *data)
1898{
1899        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1900        int reg = dmar_msi_reg(iommu, data->irq);
1901        unsigned long flag;
1902
1903        /* mask it */
1904        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1905        writel(DMA_FECTL_IM, iommu->reg + reg);
1906        /* Read a reg to force flush the post write */
1907        readl(iommu->reg + reg);
1908        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1909}
1910
1911void dmar_msi_write(int irq, struct msi_msg *msg)
1912{
1913        struct intel_iommu *iommu = irq_get_handler_data(irq);
1914        int reg = dmar_msi_reg(iommu, irq);
1915        unsigned long flag;
1916
1917        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1918        writel(msg->data, iommu->reg + reg + 4);
1919        writel(msg->address_lo, iommu->reg + reg + 8);
1920        writel(msg->address_hi, iommu->reg + reg + 12);
1921        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1922}
1923
1924void dmar_msi_read(int irq, struct msi_msg *msg)
1925{
1926        struct intel_iommu *iommu = irq_get_handler_data(irq);
1927        int reg = dmar_msi_reg(iommu, irq);
1928        unsigned long flag;
1929
1930        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1931        msg->data = readl(iommu->reg + reg + 4);
1932        msg->address_lo = readl(iommu->reg + reg + 8);
1933        msg->address_hi = readl(iommu->reg + reg + 12);
1934        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1935}
1936
1937static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1938                u8 fault_reason, u32 pasid, u16 source_id,
1939                unsigned long long addr)
1940{
1941        const char *reason;
1942        int fault_type;
1943
1944        reason = dmar_get_fault_reason(fault_reason, &fault_type);
1945
1946        if (fault_type == INTR_REMAP)
1947                pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1948                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1949                       PCI_FUNC(source_id & 0xFF), addr >> 48,
1950                       fault_reason, reason);
1951        else if (pasid == INVALID_IOASID)
1952                pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1953                       type ? "DMA Read" : "DMA Write",
1954                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1955                       PCI_FUNC(source_id & 0xFF), addr,
1956                       fault_reason, reason);
1957        else
1958                pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1959                       type ? "DMA Read" : "DMA Write", pasid,
1960                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1961                       PCI_FUNC(source_id & 0xFF), addr,
1962                       fault_reason, reason);
1963
1964        return 0;
1965}
1966
1967#define PRIMARY_FAULT_REG_LEN (16)
1968irqreturn_t dmar_fault(int irq, void *dev_id)
1969{
1970        struct intel_iommu *iommu = dev_id;
1971        int reg, fault_index;
1972        u32 fault_status;
1973        unsigned long flag;
1974        static DEFINE_RATELIMIT_STATE(rs,
1975                                      DEFAULT_RATELIMIT_INTERVAL,
1976                                      DEFAULT_RATELIMIT_BURST);
1977
1978        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1979        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1980        if (fault_status && __ratelimit(&rs))
1981                pr_err("DRHD: handling fault status reg %x\n", fault_status);
1982
1983        /* TBD: ignore advanced fault log currently */
1984        if (!(fault_status & DMA_FSTS_PPF))
1985                goto unlock_exit;
1986
1987        fault_index = dma_fsts_fault_record_index(fault_status);
1988        reg = cap_fault_reg_offset(iommu->cap);
1989        while (1) {
1990                /* Disable printing, simply clear the fault when ratelimited */
1991                bool ratelimited = !__ratelimit(&rs);
1992                u8 fault_reason;
1993                u16 source_id;
1994                u64 guest_addr;
1995                u32 pasid;
1996                int type;
1997                u32 data;
1998                bool pasid_present;
1999
2000                /* highest 32 bits */
2001                data = readl(iommu->reg + reg +
2002                                fault_index * PRIMARY_FAULT_REG_LEN + 12);
2003                if (!(data & DMA_FRCD_F))
2004                        break;
2005
2006                if (!ratelimited) {
2007                        fault_reason = dma_frcd_fault_reason(data);
2008                        type = dma_frcd_type(data);
2009
2010                        pasid = dma_frcd_pasid_value(data);
2011                        data = readl(iommu->reg + reg +
2012                                     fault_index * PRIMARY_FAULT_REG_LEN + 8);
2013                        source_id = dma_frcd_source_id(data);
2014
2015                        pasid_present = dma_frcd_pasid_present(data);
2016                        guest_addr = dmar_readq(iommu->reg + reg +
2017                                        fault_index * PRIMARY_FAULT_REG_LEN);
2018                        guest_addr = dma_frcd_page_addr(guest_addr);
2019                }
2020
2021                /* clear the fault */
2022                writel(DMA_FRCD_F, iommu->reg + reg +
2023                        fault_index * PRIMARY_FAULT_REG_LEN + 12);
2024
2025                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2026
2027                if (!ratelimited)
2028                        /* Using pasid -1 if pasid is not present */
2029                        dmar_fault_do_one(iommu, type, fault_reason,
2030                                          pasid_present ? pasid : INVALID_IOASID,
2031                                          source_id, guest_addr);
2032
2033                fault_index++;
2034                if (fault_index >= cap_num_fault_regs(iommu->cap))
2035                        fault_index = 0;
2036                raw_spin_lock_irqsave(&iommu->register_lock, flag);
2037        }
2038
2039        writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2040               iommu->reg + DMAR_FSTS_REG);
2041
2042unlock_exit:
2043        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2044        return IRQ_HANDLED;
2045}
2046
2047int dmar_set_interrupt(struct intel_iommu *iommu)
2048{
2049        int irq, ret;
2050
2051        /*
2052         * Check if the fault interrupt is already initialized.
2053         */
2054        if (iommu->irq)
2055                return 0;
2056
2057        irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2058        if (irq > 0) {
2059                iommu->irq = irq;
2060        } else {
2061                pr_err("No free IRQ vectors\n");
2062                return -EINVAL;
2063        }
2064
2065        ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2066        if (ret)
2067                pr_err("Can't request irq\n");
2068        return ret;
2069}
2070
2071int __init enable_drhd_fault_handling(void)
2072{
2073        struct dmar_drhd_unit *drhd;
2074        struct intel_iommu *iommu;
2075
2076        /*
2077         * Enable fault control interrupt.
2078         */
2079        for_each_iommu(iommu, drhd) {
2080                u32 fault_status;
2081                int ret = dmar_set_interrupt(iommu);
2082
2083                if (ret) {
2084                        pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2085                               (unsigned long long)drhd->reg_base_addr, ret);
2086                        return -1;
2087                }
2088
2089                /*
2090                 * Clear any previous faults.
2091                 */
2092                dmar_fault(iommu->irq, iommu);
2093                fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2094                writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2095        }
2096
2097        return 0;
2098}
2099
2100/*
2101 * Re-enable Queued Invalidation interface.
2102 */
2103int dmar_reenable_qi(struct intel_iommu *iommu)
2104{
2105        if (!ecap_qis(iommu->ecap))
2106                return -ENOENT;
2107
2108        if (!iommu->qi)
2109                return -ENOENT;
2110
2111        /*
2112         * First disable queued invalidation.
2113         */
2114        dmar_disable_qi(iommu);
2115        /*
2116         * Then enable queued invalidation again. Since there is no pending
2117         * invalidation requests now, it's safe to re-enable queued
2118         * invalidation.
2119         */
2120        __dmar_enable_qi(iommu);
2121
2122        return 0;
2123}
2124
2125/*
2126 * Check interrupt remapping support in DMAR table description.
2127 */
2128int __init dmar_ir_support(void)
2129{
2130        struct acpi_table_dmar *dmar;
2131        dmar = (struct acpi_table_dmar *)dmar_tbl;
2132        if (!dmar)
2133                return 0;
2134        return dmar->flags & 0x1;
2135}
2136
2137/* Check whether DMAR units are in use */
2138static inline bool dmar_in_use(void)
2139{
2140        return irq_remapping_enabled || intel_iommu_enabled;
2141}
2142
2143static int __init dmar_free_unused_resources(void)
2144{
2145        struct dmar_drhd_unit *dmaru, *dmaru_n;
2146
2147        if (dmar_in_use())
2148                return 0;
2149
2150        if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2151                bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2152
2153        down_write(&dmar_global_lock);
2154        list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2155                list_del(&dmaru->list);
2156                dmar_free_drhd(dmaru);
2157        }
2158        up_write(&dmar_global_lock);
2159
2160        return 0;
2161}
2162
2163late_initcall(dmar_free_unused_resources);
2164IOMMU_INIT_POST(detect_intel_iommu);
2165
2166/*
2167 * DMAR Hotplug Support
2168 * For more details, please refer to Intel(R) Virtualization Technology
2169 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2170 * "Remapping Hardware Unit Hot Plug".
2171 */
2172static guid_t dmar_hp_guid =
2173        GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2174                  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2175
2176/*
2177 * Currently there's only one revision and BIOS will not check the revision id,
2178 * so use 0 for safety.
2179 */
2180#define DMAR_DSM_REV_ID                 0
2181#define DMAR_DSM_FUNC_DRHD              1
2182#define DMAR_DSM_FUNC_ATSR              2
2183#define DMAR_DSM_FUNC_RHSA              3
2184#define DMAR_DSM_FUNC_SATC              4
2185
2186static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2187{
2188        return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2189}
2190
2191static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2192                                  dmar_res_handler_t handler, void *arg)
2193{
2194        int ret = -ENODEV;
2195        union acpi_object *obj;
2196        struct acpi_dmar_header *start;
2197        struct dmar_res_callback callback;
2198        static int res_type[] = {
2199                [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2200                [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2201                [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2202                [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2203        };
2204
2205        if (!dmar_detect_dsm(handle, func))
2206                return 0;
2207
2208        obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2209                                      func, NULL, ACPI_TYPE_BUFFER);
2210        if (!obj)
2211                return -ENODEV;
2212
2213        memset(&callback, 0, sizeof(callback));
2214        callback.cb[res_type[func]] = handler;
2215        callback.arg[res_type[func]] = arg;
2216        start = (struct acpi_dmar_header *)obj->buffer.pointer;
2217        ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2218
2219        ACPI_FREE(obj);
2220
2221        return ret;
2222}
2223
2224static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2225{
2226        int ret;
2227        struct dmar_drhd_unit *dmaru;
2228
2229        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2230        if (!dmaru)
2231                return -ENODEV;
2232
2233        ret = dmar_ir_hotplug(dmaru, true);
2234        if (ret == 0)
2235                ret = dmar_iommu_hotplug(dmaru, true);
2236
2237        return ret;
2238}
2239
2240static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2241{
2242        int i, ret;
2243        struct device *dev;
2244        struct dmar_drhd_unit *dmaru;
2245
2246        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2247        if (!dmaru)
2248                return 0;
2249
2250        /*
2251         * All PCI devices managed by this unit should have been destroyed.
2252         */
2253        if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2254                for_each_active_dev_scope(dmaru->devices,
2255                                          dmaru->devices_cnt, i, dev)
2256                        return -EBUSY;
2257        }
2258
2259        ret = dmar_ir_hotplug(dmaru, false);
2260        if (ret == 0)
2261                ret = dmar_iommu_hotplug(dmaru, false);
2262
2263        return ret;
2264}
2265
2266static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2267{
2268        struct dmar_drhd_unit *dmaru;
2269
2270        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2271        if (dmaru) {
2272                list_del_rcu(&dmaru->list);
2273                synchronize_rcu();
2274                dmar_free_drhd(dmaru);
2275        }
2276
2277        return 0;
2278}
2279
2280static int dmar_hotplug_insert(acpi_handle handle)
2281{
2282        int ret;
2283        int drhd_count = 0;
2284
2285        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2286                                     &dmar_validate_one_drhd, (void *)1);
2287        if (ret)
2288                goto out;
2289
2290        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2291                                     &dmar_parse_one_drhd, (void *)&drhd_count);
2292        if (ret == 0 && drhd_count == 0) {
2293                pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2294                goto out;
2295        } else if (ret) {
2296                goto release_drhd;
2297        }
2298
2299        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2300                                     &dmar_parse_one_rhsa, NULL);
2301        if (ret)
2302                goto release_drhd;
2303
2304        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2305                                     &dmar_parse_one_atsr, NULL);
2306        if (ret)
2307                goto release_atsr;
2308
2309        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2310                                     &dmar_hp_add_drhd, NULL);
2311        if (!ret)
2312                return 0;
2313
2314        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2315                               &dmar_hp_remove_drhd, NULL);
2316release_atsr:
2317        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2318                               &dmar_release_one_atsr, NULL);
2319release_drhd:
2320        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2321                               &dmar_hp_release_drhd, NULL);
2322out:
2323        return ret;
2324}
2325
2326static int dmar_hotplug_remove(acpi_handle handle)
2327{
2328        int ret;
2329
2330        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2331                                     &dmar_check_one_atsr, NULL);
2332        if (ret)
2333                return ret;
2334
2335        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2336                                     &dmar_hp_remove_drhd, NULL);
2337        if (ret == 0) {
2338                WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2339                                               &dmar_release_one_atsr, NULL));
2340                WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2341                                               &dmar_hp_release_drhd, NULL));
2342        } else {
2343                dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2344                                       &dmar_hp_add_drhd, NULL);
2345        }
2346
2347        return ret;
2348}
2349
2350static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2351                                       void *context, void **retval)
2352{
2353        acpi_handle *phdl = retval;
2354
2355        if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2356                *phdl = handle;
2357                return AE_CTRL_TERMINATE;
2358        }
2359
2360        return AE_OK;
2361}
2362
2363static int dmar_device_hotplug(acpi_handle handle, bool insert)
2364{
2365        int ret;
2366        acpi_handle tmp = NULL;
2367        acpi_status status;
2368
2369        if (!dmar_in_use())
2370                return 0;
2371
2372        if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2373                tmp = handle;
2374        } else {
2375                status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2376                                             ACPI_UINT32_MAX,
2377                                             dmar_get_dsm_handle,
2378                                             NULL, NULL, &tmp);
2379                if (ACPI_FAILURE(status)) {
2380                        pr_warn("Failed to locate _DSM method.\n");
2381                        return -ENXIO;
2382                }
2383        }
2384        if (tmp == NULL)
2385                return 0;
2386
2387        down_write(&dmar_global_lock);
2388        if (insert)
2389                ret = dmar_hotplug_insert(tmp);
2390        else
2391                ret = dmar_hotplug_remove(tmp);
2392        up_write(&dmar_global_lock);
2393
2394        return ret;
2395}
2396
2397int dmar_device_add(acpi_handle handle)
2398{
2399        return dmar_device_hotplug(handle, true);
2400}
2401
2402int dmar_device_remove(acpi_handle handle)
2403{
2404        return dmar_device_hotplug(handle, false);
2405}
2406
2407/*
2408 * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2409 *
2410 * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2411 * the ACPI DMAR table. This means that the platform boot firmware has made
2412 * sure no device can issue DMA outside of RMRR regions.
2413 */
2414bool dmar_platform_optin(void)
2415{
2416        struct acpi_table_dmar *dmar;
2417        acpi_status status;
2418        bool ret;
2419
2420        status = acpi_get_table(ACPI_SIG_DMAR, 0,
2421                                (struct acpi_table_header **)&dmar);
2422        if (ACPI_FAILURE(status))
2423                return false;
2424
2425        ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2426        acpi_put_table((struct acpi_table_header *)dmar);
2427
2428        return ret;
2429}
2430EXPORT_SYMBOL_GPL(dmar_platform_optin);
2431