linux/drivers/iommu/intel/dmar.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (c) 2006, Intel Corporation.
   4 *
   5 * Copyright (C) 2006-2008 Intel Corporation
   6 * Author: Ashok Raj <ashok.raj@intel.com>
   7 * Author: Shaohua Li <shaohua.li@intel.com>
   8 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
   9 *
  10 * This file implements early detection/parsing of Remapping Devices
  11 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
  12 * tables.
  13 *
  14 * These routines are used by both DMA-remapping and Interrupt-remapping
  15 */
  16
  17#define pr_fmt(fmt)     "DMAR: " fmt
  18
  19#include <linux/pci.h>
  20#include <linux/dmar.h>
  21#include <linux/iova.h>
  22#include <linux/intel-iommu.h>
  23#include <linux/timer.h>
  24#include <linux/irq.h>
  25#include <linux/interrupt.h>
  26#include <linux/tboot.h>
  27#include <linux/dmi.h>
  28#include <linux/slab.h>
  29#include <linux/iommu.h>
  30#include <linux/numa.h>
  31#include <linux/limits.h>
  32#include <asm/irq_remapping.h>
  33#include <asm/iommu_table.h>
  34#include <trace/events/intel_iommu.h>
  35
  36#include "../irq_remapping.h"
  37#include "perf.h"
  38
  39typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
  40struct dmar_res_callback {
  41        dmar_res_handler_t      cb[ACPI_DMAR_TYPE_RESERVED];
  42        void                    *arg[ACPI_DMAR_TYPE_RESERVED];
  43        bool                    ignore_unhandled;
  44        bool                    print_entry;
  45};
  46
  47/*
  48 * Assumptions:
  49 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
  50 *    before IO devices managed by that unit.
  51 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
  52 *    after IO devices managed by that unit.
  53 * 3) Hotplug events are rare.
  54 *
  55 * Locking rules for DMA and interrupt remapping related global data structures:
  56 * 1) Use dmar_global_lock in process context
  57 * 2) Use RCU in interrupt context
  58 */
  59DECLARE_RWSEM(dmar_global_lock);
  60LIST_HEAD(dmar_drhd_units);
  61
  62struct acpi_table_header * __initdata dmar_tbl;
  63static int dmar_dev_scope_status = 1;
  64static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
  65
  66static int alloc_iommu(struct dmar_drhd_unit *drhd);
  67static void free_iommu(struct intel_iommu *iommu);
  68
  69extern const struct iommu_ops intel_iommu_ops;
  70
  71static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
  72{
  73        /*
  74         * add INCLUDE_ALL at the tail, so scan the list will find it at
  75         * the very end.
  76         */
  77        if (drhd->include_all)
  78                list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
  79        else
  80                list_add_rcu(&drhd->list, &dmar_drhd_units);
  81}
  82
  83void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
  84{
  85        struct acpi_dmar_device_scope *scope;
  86
  87        *cnt = 0;
  88        while (start < end) {
  89                scope = start;
  90                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_NAMESPACE ||
  91                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
  92                    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
  93                        (*cnt)++;
  94                else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
  95                        scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
  96                        pr_warn("Unsupported device scope\n");
  97                }
  98                start += scope->length;
  99        }
 100        if (*cnt == 0)
 101                return NULL;
 102
 103        return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
 104}
 105
 106void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
 107{
 108        int i;
 109        struct device *tmp_dev;
 110
 111        if (*devices && *cnt) {
 112                for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
 113                        put_device(tmp_dev);
 114                kfree(*devices);
 115        }
 116
 117        *devices = NULL;
 118        *cnt = 0;
 119}
 120
 121/* Optimize out kzalloc()/kfree() for normal cases */
 122static char dmar_pci_notify_info_buf[64];
 123
 124static struct dmar_pci_notify_info *
 125dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
 126{
 127        int level = 0;
 128        size_t size;
 129        struct pci_dev *tmp;
 130        struct dmar_pci_notify_info *info;
 131
 132        BUG_ON(dev->is_virtfn);
 133
 134        /*
 135         * Ignore devices that have a domain number higher than what can
 136         * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000
 137         */
 138        if (pci_domain_nr(dev->bus) > U16_MAX)
 139                return NULL;
 140
 141        /* Only generate path[] for device addition event */
 142        if (event == BUS_NOTIFY_ADD_DEVICE)
 143                for (tmp = dev; tmp; tmp = tmp->bus->self)
 144                        level++;
 145
 146        size = struct_size(info, path, level);
 147        if (size <= sizeof(dmar_pci_notify_info_buf)) {
 148                info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
 149        } else {
 150                info = kzalloc(size, GFP_KERNEL);
 151                if (!info) {
 152                        if (dmar_dev_scope_status == 0)
 153                                dmar_dev_scope_status = -ENOMEM;
 154                        return NULL;
 155                }
 156        }
 157
 158        info->event = event;
 159        info->dev = dev;
 160        info->seg = pci_domain_nr(dev->bus);
 161        info->level = level;
 162        if (event == BUS_NOTIFY_ADD_DEVICE) {
 163                for (tmp = dev; tmp; tmp = tmp->bus->self) {
 164                        level--;
 165                        info->path[level].bus = tmp->bus->number;
 166                        info->path[level].device = PCI_SLOT(tmp->devfn);
 167                        info->path[level].function = PCI_FUNC(tmp->devfn);
 168                        if (pci_is_root_bus(tmp->bus))
 169                                info->bus = tmp->bus->number;
 170                }
 171        }
 172
 173        return info;
 174}
 175
 176static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
 177{
 178        if ((void *)info != dmar_pci_notify_info_buf)
 179                kfree(info);
 180}
 181
 182static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
 183                                struct acpi_dmar_pci_path *path, int count)
 184{
 185        int i;
 186
 187        if (info->bus != bus)
 188                goto fallback;
 189        if (info->level != count)
 190                goto fallback;
 191
 192        for (i = 0; i < count; i++) {
 193                if (path[i].device != info->path[i].device ||
 194                    path[i].function != info->path[i].function)
 195                        goto fallback;
 196        }
 197
 198        return true;
 199
 200fallback:
 201
 202        if (count != 1)
 203                return false;
 204
 205        i = info->level - 1;
 206        if (bus              == info->path[i].bus &&
 207            path[0].device   == info->path[i].device &&
 208            path[0].function == info->path[i].function) {
 209                pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n",
 210                        bus, path[0].device, path[0].function);
 211                return true;
 212        }
 213
 214        return false;
 215}
 216
 217/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
 218int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
 219                          void *start, void*end, u16 segment,
 220                          struct dmar_dev_scope *devices,
 221                          int devices_cnt)
 222{
 223        int i, level;
 224        struct device *tmp, *dev = &info->dev->dev;
 225        struct acpi_dmar_device_scope *scope;
 226        struct acpi_dmar_pci_path *path;
 227
 228        if (segment != info->seg)
 229                return 0;
 230
 231        for (; start < end; start += scope->length) {
 232                scope = start;
 233                if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
 234                    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
 235                        continue;
 236
 237                path = (struct acpi_dmar_pci_path *)(scope + 1);
 238                level = (scope->length - sizeof(*scope)) / sizeof(*path);
 239                if (!dmar_match_pci_path(info, scope->bus, path, level))
 240                        continue;
 241
 242                /*
 243                 * We expect devices with endpoint scope to have normal PCI
 244                 * headers, and devices with bridge scope to have bridge PCI
 245                 * headers.  However PCI NTB devices may be listed in the
 246                 * DMAR table with bridge scope, even though they have a
 247                 * normal PCI header.  NTB devices are identified by class
 248                 * "BRIDGE_OTHER" (0680h) - we don't declare a socpe mismatch
 249                 * for this special case.
 250                 */
 251                if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
 252                     info->dev->hdr_type != PCI_HEADER_TYPE_NORMAL) ||
 253                    (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE &&
 254                     (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL &&
 255                      info->dev->class >> 16 != PCI_BASE_CLASS_BRIDGE))) {
 256                        pr_warn("Device scope type does not match for %s\n",
 257                                pci_name(info->dev));
 258                        return -EINVAL;
 259                }
 260
 261                for_each_dev_scope(devices, devices_cnt, i, tmp)
 262                        if (tmp == NULL) {
 263                                devices[i].bus = info->dev->bus->number;
 264                                devices[i].devfn = info->dev->devfn;
 265                                rcu_assign_pointer(devices[i].dev,
 266                                                   get_device(dev));
 267                                return 1;
 268                        }
 269                BUG_ON(i >= devices_cnt);
 270        }
 271
 272        return 0;
 273}
 274
 275int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
 276                          struct dmar_dev_scope *devices, int count)
 277{
 278        int index;
 279        struct device *tmp;
 280
 281        if (info->seg != segment)
 282                return 0;
 283
 284        for_each_active_dev_scope(devices, count, index, tmp)
 285                if (tmp == &info->dev->dev) {
 286                        RCU_INIT_POINTER(devices[index].dev, NULL);
 287                        synchronize_rcu();
 288                        put_device(tmp);
 289                        return 1;
 290                }
 291
 292        return 0;
 293}
 294
 295static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
 296{
 297        int ret = 0;
 298        struct dmar_drhd_unit *dmaru;
 299        struct acpi_dmar_hardware_unit *drhd;
 300
 301        for_each_drhd_unit(dmaru) {
 302                if (dmaru->include_all)
 303                        continue;
 304
 305                drhd = container_of(dmaru->hdr,
 306                                    struct acpi_dmar_hardware_unit, header);
 307                ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
 308                                ((void *)drhd) + drhd->header.length,
 309                                dmaru->segment,
 310                                dmaru->devices, dmaru->devices_cnt);
 311                if (ret)
 312                        break;
 313        }
 314        if (ret >= 0)
 315                ret = dmar_iommu_notify_scope_dev(info);
 316        if (ret < 0 && dmar_dev_scope_status == 0)
 317                dmar_dev_scope_status = ret;
 318
 319        if (ret >= 0)
 320                intel_irq_remap_add_device(info);
 321
 322        return ret;
 323}
 324
 325static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
 326{
 327        struct dmar_drhd_unit *dmaru;
 328
 329        for_each_drhd_unit(dmaru)
 330                if (dmar_remove_dev_scope(info, dmaru->segment,
 331                        dmaru->devices, dmaru->devices_cnt))
 332                        break;
 333        dmar_iommu_notify_scope_dev(info);
 334}
 335
 336static inline void vf_inherit_msi_domain(struct pci_dev *pdev)
 337{
 338        struct pci_dev *physfn = pci_physfn(pdev);
 339
 340        dev_set_msi_domain(&pdev->dev, dev_get_msi_domain(&physfn->dev));
 341}
 342
 343static int dmar_pci_bus_notifier(struct notifier_block *nb,
 344                                 unsigned long action, void *data)
 345{
 346        struct pci_dev *pdev = to_pci_dev(data);
 347        struct dmar_pci_notify_info *info;
 348
 349        /* Only care about add/remove events for physical functions.
 350         * For VFs we actually do the lookup based on the corresponding
 351         * PF in device_to_iommu() anyway. */
 352        if (pdev->is_virtfn) {
 353                /*
 354                 * Ensure that the VF device inherits the irq domain of the
 355                 * PF device. Ideally the device would inherit the domain
 356                 * from the bus, but DMAR can have multiple units per bus
 357                 * which makes this impossible. The VF 'bus' could inherit
 358                 * from the PF device, but that's yet another x86'sism to
 359                 * inflict on everybody else.
 360                 */
 361                if (action == BUS_NOTIFY_ADD_DEVICE)
 362                        vf_inherit_msi_domain(pdev);
 363                return NOTIFY_DONE;
 364        }
 365
 366        if (action != BUS_NOTIFY_ADD_DEVICE &&
 367            action != BUS_NOTIFY_REMOVED_DEVICE)
 368                return NOTIFY_DONE;
 369
 370        info = dmar_alloc_pci_notify_info(pdev, action);
 371        if (!info)
 372                return NOTIFY_DONE;
 373
 374        down_write(&dmar_global_lock);
 375        if (action == BUS_NOTIFY_ADD_DEVICE)
 376                dmar_pci_bus_add_dev(info);
 377        else if (action == BUS_NOTIFY_REMOVED_DEVICE)
 378                dmar_pci_bus_del_dev(info);
 379        up_write(&dmar_global_lock);
 380
 381        dmar_free_pci_notify_info(info);
 382
 383        return NOTIFY_OK;
 384}
 385
 386static struct notifier_block dmar_pci_bus_nb = {
 387        .notifier_call = dmar_pci_bus_notifier,
 388        .priority = INT_MIN,
 389};
 390
 391static struct dmar_drhd_unit *
 392dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
 393{
 394        struct dmar_drhd_unit *dmaru;
 395
 396        list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list,
 397                                dmar_rcu_check())
 398                if (dmaru->segment == drhd->segment &&
 399                    dmaru->reg_base_addr == drhd->address)
 400                        return dmaru;
 401
 402        return NULL;
 403}
 404
 405/*
 406 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
 407 * structure which uniquely represent one DMA remapping hardware unit
 408 * present in the platform
 409 */
 410static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 411{
 412        struct acpi_dmar_hardware_unit *drhd;
 413        struct dmar_drhd_unit *dmaru;
 414        int ret;
 415
 416        drhd = (struct acpi_dmar_hardware_unit *)header;
 417        dmaru = dmar_find_dmaru(drhd);
 418        if (dmaru)
 419                goto out;
 420
 421        dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
 422        if (!dmaru)
 423                return -ENOMEM;
 424
 425        /*
 426         * If header is allocated from slab by ACPI _DSM method, we need to
 427         * copy the content because the memory buffer will be freed on return.
 428         */
 429        dmaru->hdr = (void *)(dmaru + 1);
 430        memcpy(dmaru->hdr, header, header->length);
 431        dmaru->reg_base_addr = drhd->address;
 432        dmaru->segment = drhd->segment;
 433        dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
 434        dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
 435                                              ((void *)drhd) + drhd->header.length,
 436                                              &dmaru->devices_cnt);
 437        if (dmaru->devices_cnt && dmaru->devices == NULL) {
 438                kfree(dmaru);
 439                return -ENOMEM;
 440        }
 441
 442        ret = alloc_iommu(dmaru);
 443        if (ret) {
 444                dmar_free_dev_scope(&dmaru->devices,
 445                                    &dmaru->devices_cnt);
 446                kfree(dmaru);
 447                return ret;
 448        }
 449        dmar_register_drhd_unit(dmaru);
 450
 451out:
 452        if (arg)
 453                (*(int *)arg)++;
 454
 455        return 0;
 456}
 457
 458static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
 459{
 460        if (dmaru->devices && dmaru->devices_cnt)
 461                dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
 462        if (dmaru->iommu)
 463                free_iommu(dmaru->iommu);
 464        kfree(dmaru);
 465}
 466
 467static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
 468                                      void *arg)
 469{
 470        struct acpi_dmar_andd *andd = (void *)header;
 471
 472        /* Check for NUL termination within the designated length */
 473        if (strnlen(andd->device_name, header->length - 8) == header->length - 8) {
 474                pr_warn(FW_BUG
 475                           "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
 476                           "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 477                           dmi_get_system_info(DMI_BIOS_VENDOR),
 478                           dmi_get_system_info(DMI_BIOS_VERSION),
 479                           dmi_get_system_info(DMI_PRODUCT_VERSION));
 480                add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 481                return -EINVAL;
 482        }
 483        pr_info("ANDD device: %x name: %s\n", andd->device_number,
 484                andd->device_name);
 485
 486        return 0;
 487}
 488
 489#ifdef CONFIG_ACPI_NUMA
 490static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 491{
 492        struct acpi_dmar_rhsa *rhsa;
 493        struct dmar_drhd_unit *drhd;
 494
 495        rhsa = (struct acpi_dmar_rhsa *)header;
 496        for_each_drhd_unit(drhd) {
 497                if (drhd->reg_base_addr == rhsa->base_address) {
 498                        int node = pxm_to_node(rhsa->proximity_domain);
 499
 500                        if (!node_online(node))
 501                                node = NUMA_NO_NODE;
 502                        drhd->iommu->node = node;
 503                        return 0;
 504                }
 505        }
 506        pr_warn(FW_BUG
 507                "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
 508                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 509                rhsa->base_address,
 510                dmi_get_system_info(DMI_BIOS_VENDOR),
 511                dmi_get_system_info(DMI_BIOS_VERSION),
 512                dmi_get_system_info(DMI_PRODUCT_VERSION));
 513        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 514
 515        return 0;
 516}
 517#else
 518#define dmar_parse_one_rhsa             dmar_res_noop
 519#endif
 520
 521static void
 522dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
 523{
 524        struct acpi_dmar_hardware_unit *drhd;
 525        struct acpi_dmar_reserved_memory *rmrr;
 526        struct acpi_dmar_atsr *atsr;
 527        struct acpi_dmar_rhsa *rhsa;
 528        struct acpi_dmar_satc *satc;
 529
 530        switch (header->type) {
 531        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
 532                drhd = container_of(header, struct acpi_dmar_hardware_unit,
 533                                    header);
 534                pr_info("DRHD base: %#016Lx flags: %#x\n",
 535                        (unsigned long long)drhd->address, drhd->flags);
 536                break;
 537        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
 538                rmrr = container_of(header, struct acpi_dmar_reserved_memory,
 539                                    header);
 540                pr_info("RMRR base: %#016Lx end: %#016Lx\n",
 541                        (unsigned long long)rmrr->base_address,
 542                        (unsigned long long)rmrr->end_address);
 543                break;
 544        case ACPI_DMAR_TYPE_ROOT_ATS:
 545                atsr = container_of(header, struct acpi_dmar_atsr, header);
 546                pr_info("ATSR flags: %#x\n", atsr->flags);
 547                break;
 548        case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
 549                rhsa = container_of(header, struct acpi_dmar_rhsa, header);
 550                pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
 551                       (unsigned long long)rhsa->base_address,
 552                       rhsa->proximity_domain);
 553                break;
 554        case ACPI_DMAR_TYPE_NAMESPACE:
 555                /* We don't print this here because we need to sanity-check
 556                   it first. So print it in dmar_parse_one_andd() instead. */
 557                break;
 558        case ACPI_DMAR_TYPE_SATC:
 559                satc = container_of(header, struct acpi_dmar_satc, header);
 560                pr_info("SATC flags: 0x%x\n", satc->flags);
 561                break;
 562        }
 563}
 564
 565/**
 566 * dmar_table_detect - checks to see if the platform supports DMAR devices
 567 */
 568static int __init dmar_table_detect(void)
 569{
 570        acpi_status status = AE_OK;
 571
 572        /* if we could find DMAR table, then there are DMAR devices */
 573        status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
 574
 575        if (ACPI_SUCCESS(status) && !dmar_tbl) {
 576                pr_warn("Unable to map DMAR\n");
 577                status = AE_NOT_FOUND;
 578        }
 579
 580        return ACPI_SUCCESS(status) ? 0 : -ENOENT;
 581}
 582
 583static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
 584                                       size_t len, struct dmar_res_callback *cb)
 585{
 586        struct acpi_dmar_header *iter, *next;
 587        struct acpi_dmar_header *end = ((void *)start) + len;
 588
 589        for (iter = start; iter < end; iter = next) {
 590                next = (void *)iter + iter->length;
 591                if (iter->length == 0) {
 592                        /* Avoid looping forever on bad ACPI tables */
 593                        pr_debug(FW_BUG "Invalid 0-length structure\n");
 594                        break;
 595                } else if (next > end) {
 596                        /* Avoid passing table end */
 597                        pr_warn(FW_BUG "Record passes table end\n");
 598                        return -EINVAL;
 599                }
 600
 601                if (cb->print_entry)
 602                        dmar_table_print_dmar_entry(iter);
 603
 604                if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
 605                        /* continue for forward compatibility */
 606                        pr_debug("Unknown DMAR structure type %d\n",
 607                                 iter->type);
 608                } else if (cb->cb[iter->type]) {
 609                        int ret;
 610
 611                        ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
 612                        if (ret)
 613                                return ret;
 614                } else if (!cb->ignore_unhandled) {
 615                        pr_warn("No handler for DMAR structure type %d\n",
 616                                iter->type);
 617                        return -EINVAL;
 618                }
 619        }
 620
 621        return 0;
 622}
 623
 624static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
 625                                       struct dmar_res_callback *cb)
 626{
 627        return dmar_walk_remapping_entries((void *)(dmar + 1),
 628                        dmar->header.length - sizeof(*dmar), cb);
 629}
 630
 631/**
 632 * parse_dmar_table - parses the DMA reporting table
 633 */
 634static int __init
 635parse_dmar_table(void)
 636{
 637        struct acpi_table_dmar *dmar;
 638        int drhd_count = 0;
 639        int ret;
 640        struct dmar_res_callback cb = {
 641                .print_entry = true,
 642                .ignore_unhandled = true,
 643                .arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
 644                .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
 645                .cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
 646                .cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
 647                .cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
 648                .cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
 649                .cb[ACPI_DMAR_TYPE_SATC] = &dmar_parse_one_satc,
 650        };
 651
 652        /*
 653         * Do it again, earlier dmar_tbl mapping could be mapped with
 654         * fixed map.
 655         */
 656        dmar_table_detect();
 657
 658        /*
 659         * ACPI tables may not be DMA protected by tboot, so use DMAR copy
 660         * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
 661         */
 662        dmar_tbl = tboot_get_dmar_table(dmar_tbl);
 663
 664        dmar = (struct acpi_table_dmar *)dmar_tbl;
 665        if (!dmar)
 666                return -ENODEV;
 667
 668        if (dmar->width < PAGE_SHIFT - 1) {
 669                pr_warn("Invalid DMAR haw\n");
 670                return -EINVAL;
 671        }
 672
 673        pr_info("Host address width %d\n", dmar->width + 1);
 674        ret = dmar_walk_dmar_table(dmar, &cb);
 675        if (ret == 0 && drhd_count == 0)
 676                pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
 677
 678        return ret;
 679}
 680
 681static int dmar_pci_device_match(struct dmar_dev_scope devices[],
 682                                 int cnt, struct pci_dev *dev)
 683{
 684        int index;
 685        struct device *tmp;
 686
 687        while (dev) {
 688                for_each_active_dev_scope(devices, cnt, index, tmp)
 689                        if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
 690                                return 1;
 691
 692                /* Check our parent */
 693                dev = dev->bus->self;
 694        }
 695
 696        return 0;
 697}
 698
 699struct dmar_drhd_unit *
 700dmar_find_matched_drhd_unit(struct pci_dev *dev)
 701{
 702        struct dmar_drhd_unit *dmaru;
 703        struct acpi_dmar_hardware_unit *drhd;
 704
 705        dev = pci_physfn(dev);
 706
 707        rcu_read_lock();
 708        for_each_drhd_unit(dmaru) {
 709                drhd = container_of(dmaru->hdr,
 710                                    struct acpi_dmar_hardware_unit,
 711                                    header);
 712
 713                if (dmaru->include_all &&
 714                    drhd->segment == pci_domain_nr(dev->bus))
 715                        goto out;
 716
 717                if (dmar_pci_device_match(dmaru->devices,
 718                                          dmaru->devices_cnt, dev))
 719                        goto out;
 720        }
 721        dmaru = NULL;
 722out:
 723        rcu_read_unlock();
 724
 725        return dmaru;
 726}
 727
 728static void __init dmar_acpi_insert_dev_scope(u8 device_number,
 729                                              struct acpi_device *adev)
 730{
 731        struct dmar_drhd_unit *dmaru;
 732        struct acpi_dmar_hardware_unit *drhd;
 733        struct acpi_dmar_device_scope *scope;
 734        struct device *tmp;
 735        int i;
 736        struct acpi_dmar_pci_path *path;
 737
 738        for_each_drhd_unit(dmaru) {
 739                drhd = container_of(dmaru->hdr,
 740                                    struct acpi_dmar_hardware_unit,
 741                                    header);
 742
 743                for (scope = (void *)(drhd + 1);
 744                     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
 745                     scope = ((void *)scope) + scope->length) {
 746                        if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_NAMESPACE)
 747                                continue;
 748                        if (scope->enumeration_id != device_number)
 749                                continue;
 750
 751                        path = (void *)(scope + 1);
 752                        pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
 753                                dev_name(&adev->dev), dmaru->reg_base_addr,
 754                                scope->bus, path->device, path->function);
 755                        for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
 756                                if (tmp == NULL) {
 757                                        dmaru->devices[i].bus = scope->bus;
 758                                        dmaru->devices[i].devfn = PCI_DEVFN(path->device,
 759                                                                            path->function);
 760                                        rcu_assign_pointer(dmaru->devices[i].dev,
 761                                                           get_device(&adev->dev));
 762                                        return;
 763                                }
 764                        BUG_ON(i >= dmaru->devices_cnt);
 765                }
 766        }
 767        pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
 768                device_number, dev_name(&adev->dev));
 769}
 770
 771static int __init dmar_acpi_dev_scope_init(void)
 772{
 773        struct acpi_dmar_andd *andd;
 774
 775        if (dmar_tbl == NULL)
 776                return -ENODEV;
 777
 778        for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
 779             ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
 780             andd = ((void *)andd) + andd->header.length) {
 781                if (andd->header.type == ACPI_DMAR_TYPE_NAMESPACE) {
 782                        acpi_handle h;
 783                        struct acpi_device *adev;
 784
 785                        if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
 786                                                          andd->device_name,
 787                                                          &h))) {
 788                                pr_err("Failed to find handle for ACPI object %s\n",
 789                                       andd->device_name);
 790                                continue;
 791                        }
 792                        if (acpi_bus_get_device(h, &adev)) {
 793                                pr_err("Failed to get device for ACPI object %s\n",
 794                                       andd->device_name);
 795                                continue;
 796                        }
 797                        dmar_acpi_insert_dev_scope(andd->device_number, adev);
 798                }
 799        }
 800        return 0;
 801}
 802
 803int __init dmar_dev_scope_init(void)
 804{
 805        struct pci_dev *dev = NULL;
 806        struct dmar_pci_notify_info *info;
 807
 808        if (dmar_dev_scope_status != 1)
 809                return dmar_dev_scope_status;
 810
 811        if (list_empty(&dmar_drhd_units)) {
 812                dmar_dev_scope_status = -ENODEV;
 813        } else {
 814                dmar_dev_scope_status = 0;
 815
 816                dmar_acpi_dev_scope_init();
 817
 818                for_each_pci_dev(dev) {
 819                        if (dev->is_virtfn)
 820                                continue;
 821
 822                        info = dmar_alloc_pci_notify_info(dev,
 823                                        BUS_NOTIFY_ADD_DEVICE);
 824                        if (!info) {
 825                                return dmar_dev_scope_status;
 826                        } else {
 827                                dmar_pci_bus_add_dev(info);
 828                                dmar_free_pci_notify_info(info);
 829                        }
 830                }
 831        }
 832
 833        return dmar_dev_scope_status;
 834}
 835
 836void __init dmar_register_bus_notifier(void)
 837{
 838        bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
 839}
 840
 841
 842int __init dmar_table_init(void)
 843{
 844        static int dmar_table_initialized;
 845        int ret;
 846
 847        if (dmar_table_initialized == 0) {
 848                ret = parse_dmar_table();
 849                if (ret < 0) {
 850                        if (ret != -ENODEV)
 851                                pr_info("Parse DMAR table failure.\n");
 852                } else  if (list_empty(&dmar_drhd_units)) {
 853                        pr_info("No DMAR devices found\n");
 854                        ret = -ENODEV;
 855                }
 856
 857                if (ret < 0)
 858                        dmar_table_initialized = ret;
 859                else
 860                        dmar_table_initialized = 1;
 861        }
 862
 863        return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
 864}
 865
 866static void warn_invalid_dmar(u64 addr, const char *message)
 867{
 868        pr_warn_once(FW_BUG
 869                "Your BIOS is broken; DMAR reported at address %llx%s!\n"
 870                "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
 871                addr, message,
 872                dmi_get_system_info(DMI_BIOS_VENDOR),
 873                dmi_get_system_info(DMI_BIOS_VERSION),
 874                dmi_get_system_info(DMI_PRODUCT_VERSION));
 875        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 876}
 877
 878static int __ref
 879dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 880{
 881        struct acpi_dmar_hardware_unit *drhd;
 882        void __iomem *addr;
 883        u64 cap, ecap;
 884
 885        drhd = (void *)entry;
 886        if (!drhd->address) {
 887                warn_invalid_dmar(0, "");
 888                return -EINVAL;
 889        }
 890
 891        if (arg)
 892                addr = ioremap(drhd->address, VTD_PAGE_SIZE);
 893        else
 894                addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
 895        if (!addr) {
 896                pr_warn("Can't validate DRHD address: %llx\n", drhd->address);
 897                return -EINVAL;
 898        }
 899
 900        cap = dmar_readq(addr + DMAR_CAP_REG);
 901        ecap = dmar_readq(addr + DMAR_ECAP_REG);
 902
 903        if (arg)
 904                iounmap(addr);
 905        else
 906                early_iounmap(addr, VTD_PAGE_SIZE);
 907
 908        if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
 909                warn_invalid_dmar(drhd->address, " returns all ones");
 910                return -EINVAL;
 911        }
 912
 913        return 0;
 914}
 915
 916int __init detect_intel_iommu(void)
 917{
 918        int ret;
 919        struct dmar_res_callback validate_drhd_cb = {
 920                .cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
 921                .ignore_unhandled = true,
 922        };
 923
 924        down_write(&dmar_global_lock);
 925        ret = dmar_table_detect();
 926        if (!ret)
 927                ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
 928                                           &validate_drhd_cb);
 929        if (!ret && !no_iommu && !iommu_detected &&
 930            (!dmar_disabled || dmar_platform_optin())) {
 931                iommu_detected = 1;
 932                /* Make sure ACS will be enabled */
 933                pci_request_acs();
 934        }
 935
 936#ifdef CONFIG_X86
 937        if (!ret) {
 938                x86_init.iommu.iommu_init = intel_iommu_init;
 939                x86_platform.iommu_shutdown = intel_iommu_shutdown;
 940        }
 941
 942#endif
 943
 944        if (dmar_tbl) {
 945                acpi_put_table(dmar_tbl);
 946                dmar_tbl = NULL;
 947        }
 948        up_write(&dmar_global_lock);
 949
 950        return ret ? ret : 1;
 951}
 952
 953static void unmap_iommu(struct intel_iommu *iommu)
 954{
 955        iounmap(iommu->reg);
 956        release_mem_region(iommu->reg_phys, iommu->reg_size);
 957}
 958
 959/**
 960 * map_iommu: map the iommu's registers
 961 * @iommu: the iommu to map
 962 * @phys_addr: the physical address of the base resgister
 963 *
 964 * Memory map the iommu's registers.  Start w/ a single page, and
 965 * possibly expand if that turns out to be insufficent.
 966 */
 967static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
 968{
 969        int map_size, err=0;
 970
 971        iommu->reg_phys = phys_addr;
 972        iommu->reg_size = VTD_PAGE_SIZE;
 973
 974        if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
 975                pr_err("Can't reserve memory\n");
 976                err = -EBUSY;
 977                goto out;
 978        }
 979
 980        iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
 981        if (!iommu->reg) {
 982                pr_err("Can't map the region\n");
 983                err = -ENOMEM;
 984                goto release;
 985        }
 986
 987        iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
 988        iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
 989
 990        if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
 991                err = -EINVAL;
 992                warn_invalid_dmar(phys_addr, " returns all ones");
 993                goto unmap;
 994        }
 995        if (ecap_vcs(iommu->ecap))
 996                iommu->vccap = dmar_readq(iommu->reg + DMAR_VCCAP_REG);
 997
 998        /* the registers might be more than one page */
 999        map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1000                         cap_max_fault_reg_offset(iommu->cap));
1001        map_size = VTD_PAGE_ALIGN(map_size);
1002        if (map_size > iommu->reg_size) {
1003                iounmap(iommu->reg);
1004                release_mem_region(iommu->reg_phys, iommu->reg_size);
1005                iommu->reg_size = map_size;
1006                if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
1007                                        iommu->name)) {
1008                        pr_err("Can't reserve memory\n");
1009                        err = -EBUSY;
1010                        goto out;
1011                }
1012                iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
1013                if (!iommu->reg) {
1014                        pr_err("Can't map the region\n");
1015                        err = -ENOMEM;
1016                        goto release;
1017                }
1018        }
1019        err = 0;
1020        goto out;
1021
1022unmap:
1023        iounmap(iommu->reg);
1024release:
1025        release_mem_region(iommu->reg_phys, iommu->reg_size);
1026out:
1027        return err;
1028}
1029
1030static int dmar_alloc_seq_id(struct intel_iommu *iommu)
1031{
1032        iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
1033                                            DMAR_UNITS_SUPPORTED);
1034        if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
1035                iommu->seq_id = -1;
1036        } else {
1037                set_bit(iommu->seq_id, dmar_seq_ids);
1038                sprintf(iommu->name, "dmar%d", iommu->seq_id);
1039        }
1040
1041        return iommu->seq_id;
1042}
1043
1044static void dmar_free_seq_id(struct intel_iommu *iommu)
1045{
1046        if (iommu->seq_id >= 0) {
1047                clear_bit(iommu->seq_id, dmar_seq_ids);
1048                iommu->seq_id = -1;
1049        }
1050}
1051
1052static int alloc_iommu(struct dmar_drhd_unit *drhd)
1053{
1054        struct intel_iommu *iommu;
1055        u32 ver, sts;
1056        int agaw = -1;
1057        int msagaw = -1;
1058        int err;
1059
1060        if (!drhd->reg_base_addr) {
1061                warn_invalid_dmar(0, "");
1062                return -EINVAL;
1063        }
1064
1065        iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
1066        if (!iommu)
1067                return -ENOMEM;
1068
1069        if (dmar_alloc_seq_id(iommu) < 0) {
1070                pr_err("Failed to allocate seq_id\n");
1071                err = -ENOSPC;
1072                goto error;
1073        }
1074
1075        err = map_iommu(iommu, drhd->reg_base_addr);
1076        if (err) {
1077                pr_err("Failed to map %s\n", iommu->name);
1078                goto error_free_seq_id;
1079        }
1080
1081        err = -EINVAL;
1082        if (cap_sagaw(iommu->cap) == 0) {
1083                pr_info("%s: No supported address widths. Not attempting DMA translation.\n",
1084                        iommu->name);
1085                drhd->ignored = 1;
1086        }
1087
1088        if (!drhd->ignored) {
1089                agaw = iommu_calculate_agaw(iommu);
1090                if (agaw < 0) {
1091                        pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
1092                               iommu->seq_id);
1093                        drhd->ignored = 1;
1094                }
1095        }
1096        if (!drhd->ignored) {
1097                msagaw = iommu_calculate_max_sagaw(iommu);
1098                if (msagaw < 0) {
1099                        pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
1100                               iommu->seq_id);
1101                        drhd->ignored = 1;
1102                        agaw = -1;
1103                }
1104        }
1105        iommu->agaw = agaw;
1106        iommu->msagaw = msagaw;
1107        iommu->segment = drhd->segment;
1108
1109        iommu->node = NUMA_NO_NODE;
1110
1111        ver = readl(iommu->reg + DMAR_VER_REG);
1112        pr_info("%s: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
1113                iommu->name,
1114                (unsigned long long)drhd->reg_base_addr,
1115                DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1116                (unsigned long long)iommu->cap,
1117                (unsigned long long)iommu->ecap);
1118
1119        /* Reflect status in gcmd */
1120        sts = readl(iommu->reg + DMAR_GSTS_REG);
1121        if (sts & DMA_GSTS_IRES)
1122                iommu->gcmd |= DMA_GCMD_IRE;
1123        if (sts & DMA_GSTS_TES)
1124                iommu->gcmd |= DMA_GCMD_TE;
1125        if (sts & DMA_GSTS_QIES)
1126                iommu->gcmd |= DMA_GCMD_QIE;
1127
1128        raw_spin_lock_init(&iommu->register_lock);
1129
1130        /*
1131         * This is only for hotplug; at boot time intel_iommu_enabled won't
1132         * be set yet. When intel_iommu_init() runs, it registers the units
1133         * present at boot time, then sets intel_iommu_enabled.
1134         */
1135        if (intel_iommu_enabled && !drhd->ignored) {
1136                err = iommu_device_sysfs_add(&iommu->iommu, NULL,
1137                                             intel_iommu_groups,
1138                                             "%s", iommu->name);
1139                if (err)
1140                        goto err_unmap;
1141
1142                err = iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL);
1143                if (err)
1144                        goto err_sysfs;
1145        }
1146
1147        drhd->iommu = iommu;
1148        iommu->drhd = drhd;
1149
1150        return 0;
1151
1152err_sysfs:
1153        iommu_device_sysfs_remove(&iommu->iommu);
1154err_unmap:
1155        unmap_iommu(iommu);
1156error_free_seq_id:
1157        dmar_free_seq_id(iommu);
1158error:
1159        kfree(iommu);
1160        return err;
1161}
1162
1163static void free_iommu(struct intel_iommu *iommu)
1164{
1165        if (intel_iommu_enabled && !iommu->drhd->ignored) {
1166                iommu_device_unregister(&iommu->iommu);
1167                iommu_device_sysfs_remove(&iommu->iommu);
1168        }
1169
1170        if (iommu->irq) {
1171                if (iommu->pr_irq) {
1172                        free_irq(iommu->pr_irq, iommu);
1173                        dmar_free_hwirq(iommu->pr_irq);
1174                        iommu->pr_irq = 0;
1175                }
1176                free_irq(iommu->irq, iommu);
1177                dmar_free_hwirq(iommu->irq);
1178                iommu->irq = 0;
1179        }
1180
1181        if (iommu->qi) {
1182                free_page((unsigned long)iommu->qi->desc);
1183                kfree(iommu->qi->desc_status);
1184                kfree(iommu->qi);
1185        }
1186
1187        if (iommu->reg)
1188                unmap_iommu(iommu);
1189
1190        dmar_free_seq_id(iommu);
1191        kfree(iommu);
1192}
1193
1194/*
1195 * Reclaim all the submitted descriptors which have completed its work.
1196 */
1197static inline void reclaim_free_desc(struct q_inval *qi)
1198{
1199        while (qi->desc_status[qi->free_tail] == QI_DONE ||
1200               qi->desc_status[qi->free_tail] == QI_ABORT) {
1201                qi->desc_status[qi->free_tail] = QI_FREE;
1202                qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
1203                qi->free_cnt++;
1204        }
1205}
1206
1207static const char *qi_type_string(u8 type)
1208{
1209        switch (type) {
1210        case QI_CC_TYPE:
1211                return "Context-cache Invalidation";
1212        case QI_IOTLB_TYPE:
1213                return "IOTLB Invalidation";
1214        case QI_DIOTLB_TYPE:
1215                return "Device-TLB Invalidation";
1216        case QI_IEC_TYPE:
1217                return "Interrupt Entry Cache Invalidation";
1218        case QI_IWD_TYPE:
1219                return "Invalidation Wait";
1220        case QI_EIOTLB_TYPE:
1221                return "PASID-based IOTLB Invalidation";
1222        case QI_PC_TYPE:
1223                return "PASID-cache Invalidation";
1224        case QI_DEIOTLB_TYPE:
1225                return "PASID-based Device-TLB Invalidation";
1226        case QI_PGRP_RESP_TYPE:
1227                return "Page Group Response";
1228        default:
1229                return "UNKNOWN";
1230        }
1231}
1232
1233static void qi_dump_fault(struct intel_iommu *iommu, u32 fault)
1234{
1235        unsigned int head = dmar_readl(iommu->reg + DMAR_IQH_REG);
1236        u64 iqe_err = dmar_readq(iommu->reg + DMAR_IQER_REG);
1237        struct qi_desc *desc = iommu->qi->desc + head;
1238
1239        if (fault & DMA_FSTS_IQE)
1240                pr_err("VT-d detected Invalidation Queue Error: Reason %llx",
1241                       DMAR_IQER_REG_IQEI(iqe_err));
1242        if (fault & DMA_FSTS_ITE)
1243                pr_err("VT-d detected Invalidation Time-out Error: SID %llx",
1244                       DMAR_IQER_REG_ITESID(iqe_err));
1245        if (fault & DMA_FSTS_ICE)
1246                pr_err("VT-d detected Invalidation Completion Error: SID %llx",
1247                       DMAR_IQER_REG_ICESID(iqe_err));
1248
1249        pr_err("QI HEAD: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1250               qi_type_string(desc->qw0 & 0xf),
1251               (unsigned long long)desc->qw0,
1252               (unsigned long long)desc->qw1);
1253
1254        head = ((head >> qi_shift(iommu)) + QI_LENGTH - 1) % QI_LENGTH;
1255        head <<= qi_shift(iommu);
1256        desc = iommu->qi->desc + head;
1257
1258        pr_err("QI PRIOR: %s qw0 = 0x%llx, qw1 = 0x%llx\n",
1259               qi_type_string(desc->qw0 & 0xf),
1260               (unsigned long long)desc->qw0,
1261               (unsigned long long)desc->qw1);
1262}
1263
1264static int qi_check_fault(struct intel_iommu *iommu, int index, int wait_index)
1265{
1266        u32 fault;
1267        int head, tail;
1268        struct q_inval *qi = iommu->qi;
1269        int shift = qi_shift(iommu);
1270
1271        if (qi->desc_status[wait_index] == QI_ABORT)
1272                return -EAGAIN;
1273
1274        fault = readl(iommu->reg + DMAR_FSTS_REG);
1275        if (fault & (DMA_FSTS_IQE | DMA_FSTS_ITE | DMA_FSTS_ICE))
1276                qi_dump_fault(iommu, fault);
1277
1278        /*
1279         * If IQE happens, the head points to the descriptor associated
1280         * with the error. No new descriptors are fetched until the IQE
1281         * is cleared.
1282         */
1283        if (fault & DMA_FSTS_IQE) {
1284                head = readl(iommu->reg + DMAR_IQH_REG);
1285                if ((head >> shift) == index) {
1286                        struct qi_desc *desc = qi->desc + head;
1287
1288                        /*
1289                         * desc->qw2 and desc->qw3 are either reserved or
1290                         * used by software as private data. We won't print
1291                         * out these two qw's for security consideration.
1292                         */
1293                        memcpy(desc, qi->desc + (wait_index << shift),
1294                               1 << shift);
1295                        writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
1296                        pr_info("Invalidation Queue Error (IQE) cleared\n");
1297                        return -EINVAL;
1298                }
1299        }
1300
1301        /*
1302         * If ITE happens, all pending wait_desc commands are aborted.
1303         * No new descriptors are fetched until the ITE is cleared.
1304         */
1305        if (fault & DMA_FSTS_ITE) {
1306                head = readl(iommu->reg + DMAR_IQH_REG);
1307                head = ((head >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1308                head |= 1;
1309                tail = readl(iommu->reg + DMAR_IQT_REG);
1310                tail = ((tail >> shift) - 1 + QI_LENGTH) % QI_LENGTH;
1311
1312                writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
1313                pr_info("Invalidation Time-out Error (ITE) cleared\n");
1314
1315                do {
1316                        if (qi->desc_status[head] == QI_IN_USE)
1317                                qi->desc_status[head] = QI_ABORT;
1318                        head = (head - 2 + QI_LENGTH) % QI_LENGTH;
1319                } while (head != tail);
1320
1321                if (qi->desc_status[wait_index] == QI_ABORT)
1322                        return -EAGAIN;
1323        }
1324
1325        if (fault & DMA_FSTS_ICE) {
1326                writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
1327                pr_info("Invalidation Completion Error (ICE) cleared\n");
1328        }
1329
1330        return 0;
1331}
1332
1333/*
1334 * Function to submit invalidation descriptors of all types to the queued
1335 * invalidation interface(QI). Multiple descriptors can be submitted at a
1336 * time, a wait descriptor will be appended to each submission to ensure
1337 * hardware has completed the invalidation before return. Wait descriptors
1338 * can be part of the submission but it will not be polled for completion.
1339 */
1340int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
1341                   unsigned int count, unsigned long options)
1342{
1343        struct q_inval *qi = iommu->qi;
1344        s64 devtlb_start_ktime = 0;
1345        s64 iotlb_start_ktime = 0;
1346        s64 iec_start_ktime = 0;
1347        struct qi_desc wait_desc;
1348        int wait_index, index;
1349        unsigned long flags;
1350        int offset, shift;
1351        int rc, i;
1352        u64 type;
1353
1354        if (!qi)
1355                return 0;
1356
1357        type = desc->qw0 & GENMASK_ULL(3, 0);
1358
1359        if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
1360            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
1361                iotlb_start_ktime = ktime_to_ns(ktime_get());
1362
1363        if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
1364            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
1365                devtlb_start_ktime = ktime_to_ns(ktime_get());
1366
1367        if (type == QI_IEC_TYPE &&
1368            dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
1369                iec_start_ktime = ktime_to_ns(ktime_get());
1370
1371restart:
1372        rc = 0;
1373
1374        raw_spin_lock_irqsave(&qi->q_lock, flags);
1375        /*
1376         * Check if we have enough empty slots in the queue to submit,
1377         * the calculation is based on:
1378         * # of desc + 1 wait desc + 1 space between head and tail
1379         */
1380        while (qi->free_cnt < count + 2) {
1381                raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1382                cpu_relax();
1383                raw_spin_lock_irqsave(&qi->q_lock, flags);
1384        }
1385
1386        index = qi->free_head;
1387        wait_index = (index + count) % QI_LENGTH;
1388        shift = qi_shift(iommu);
1389
1390        for (i = 0; i < count; i++) {
1391                offset = ((index + i) % QI_LENGTH) << shift;
1392                memcpy(qi->desc + offset, &desc[i], 1 << shift);
1393                qi->desc_status[(index + i) % QI_LENGTH] = QI_IN_USE;
1394                trace_qi_submit(iommu, desc[i].qw0, desc[i].qw1,
1395                                desc[i].qw2, desc[i].qw3);
1396        }
1397        qi->desc_status[wait_index] = QI_IN_USE;
1398
1399        wait_desc.qw0 = QI_IWD_STATUS_DATA(QI_DONE) |
1400                        QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1401        if (options & QI_OPT_WAIT_DRAIN)
1402                wait_desc.qw0 |= QI_IWD_PRQ_DRAIN;
1403        wait_desc.qw1 = virt_to_phys(&qi->desc_status[wait_index]);
1404        wait_desc.qw2 = 0;
1405        wait_desc.qw3 = 0;
1406
1407        offset = wait_index << shift;
1408        memcpy(qi->desc + offset, &wait_desc, 1 << shift);
1409
1410        qi->free_head = (qi->free_head + count + 1) % QI_LENGTH;
1411        qi->free_cnt -= count + 1;
1412
1413        /*
1414         * update the HW tail register indicating the presence of
1415         * new descriptors.
1416         */
1417        writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG);
1418
1419        while (qi->desc_status[wait_index] != QI_DONE) {
1420                /*
1421                 * We will leave the interrupts disabled, to prevent interrupt
1422                 * context to queue another cmd while a cmd is already submitted
1423                 * and waiting for completion on this cpu. This is to avoid
1424                 * a deadlock where the interrupt context can wait indefinitely
1425                 * for free slots in the queue.
1426                 */
1427                rc = qi_check_fault(iommu, index, wait_index);
1428                if (rc)
1429                        break;
1430
1431                raw_spin_unlock(&qi->q_lock);
1432                cpu_relax();
1433                raw_spin_lock(&qi->q_lock);
1434        }
1435
1436        for (i = 0; i < count; i++)
1437                qi->desc_status[(index + i) % QI_LENGTH] = QI_DONE;
1438
1439        reclaim_free_desc(qi);
1440        raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1441
1442        if (rc == -EAGAIN)
1443                goto restart;
1444
1445        if (iotlb_start_ktime)
1446                dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
1447                                ktime_to_ns(ktime_get()) - iotlb_start_ktime);
1448
1449        if (devtlb_start_ktime)
1450                dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
1451                                ktime_to_ns(ktime_get()) - devtlb_start_ktime);
1452
1453        if (iec_start_ktime)
1454                dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
1455                                ktime_to_ns(ktime_get()) - iec_start_ktime);
1456
1457        return rc;
1458}
1459
1460/*
1461 * Flush the global interrupt entry cache.
1462 */
1463void qi_global_iec(struct intel_iommu *iommu)
1464{
1465        struct qi_desc desc;
1466
1467        desc.qw0 = QI_IEC_TYPE;
1468        desc.qw1 = 0;
1469        desc.qw2 = 0;
1470        desc.qw3 = 0;
1471
1472        /* should never fail */
1473        qi_submit_sync(iommu, &desc, 1, 0);
1474}
1475
1476void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
1477                      u64 type)
1478{
1479        struct qi_desc desc;
1480
1481        desc.qw0 = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
1482                        | QI_CC_GRAN(type) | QI_CC_TYPE;
1483        desc.qw1 = 0;
1484        desc.qw2 = 0;
1485        desc.qw3 = 0;
1486
1487        qi_submit_sync(iommu, &desc, 1, 0);
1488}
1489
1490void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
1491                    unsigned int size_order, u64 type)
1492{
1493        u8 dw = 0, dr = 0;
1494
1495        struct qi_desc desc;
1496        int ih = 0;
1497
1498        if (cap_write_drain(iommu->cap))
1499                dw = 1;
1500
1501        if (cap_read_drain(iommu->cap))
1502                dr = 1;
1503
1504        desc.qw0 = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
1505                | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
1506        desc.qw1 = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
1507                | QI_IOTLB_AM(size_order);
1508        desc.qw2 = 0;
1509        desc.qw3 = 0;
1510
1511        qi_submit_sync(iommu, &desc, 1, 0);
1512}
1513
1514void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1515                        u16 qdep, u64 addr, unsigned mask)
1516{
1517        struct qi_desc desc;
1518
1519        if (mask) {
1520                addr |= (1ULL << (VTD_PAGE_SHIFT + mask - 1)) - 1;
1521                desc.qw1 = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
1522        } else
1523                desc.qw1 = QI_DEV_IOTLB_ADDR(addr);
1524
1525        if (qdep >= QI_DEV_IOTLB_MAX_INVS)
1526                qdep = 0;
1527
1528        desc.qw0 = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
1529                   QI_DIOTLB_TYPE | QI_DEV_IOTLB_PFSID(pfsid);
1530        desc.qw2 = 0;
1531        desc.qw3 = 0;
1532
1533        qi_submit_sync(iommu, &desc, 1, 0);
1534}
1535
1536/* PASID-based IOTLB invalidation */
1537void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr,
1538                     unsigned long npages, bool ih)
1539{
1540        struct qi_desc desc = {.qw2 = 0, .qw3 = 0};
1541
1542        /*
1543         * npages == -1 means a PASID-selective invalidation, otherwise,
1544         * a positive value for Page-selective-within-PASID invalidation.
1545         * 0 is not a valid input.
1546         */
1547        if (WARN_ON(!npages)) {
1548                pr_err("Invalid input npages = %ld\n", npages);
1549                return;
1550        }
1551
1552        if (npages == -1) {
1553                desc.qw0 = QI_EIOTLB_PASID(pasid) |
1554                                QI_EIOTLB_DID(did) |
1555                                QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
1556                                QI_EIOTLB_TYPE;
1557                desc.qw1 = 0;
1558        } else {
1559                int mask = ilog2(__roundup_pow_of_two(npages));
1560                unsigned long align = (1ULL << (VTD_PAGE_SHIFT + mask));
1561
1562                if (WARN_ON_ONCE(!IS_ALIGNED(addr, align)))
1563                        addr = ALIGN_DOWN(addr, align);
1564
1565                desc.qw0 = QI_EIOTLB_PASID(pasid) |
1566                                QI_EIOTLB_DID(did) |
1567                                QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) |
1568                                QI_EIOTLB_TYPE;
1569                desc.qw1 = QI_EIOTLB_ADDR(addr) |
1570                                QI_EIOTLB_IH(ih) |
1571                                QI_EIOTLB_AM(mask);
1572        }
1573
1574        qi_submit_sync(iommu, &desc, 1, 0);
1575}
1576
1577/* PASID-based device IOTLB Invalidate */
1578void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid,
1579                              u32 pasid,  u16 qdep, u64 addr, unsigned int size_order)
1580{
1581        unsigned long mask = 1UL << (VTD_PAGE_SHIFT + size_order - 1);
1582        struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1583
1584        desc.qw0 = QI_DEV_EIOTLB_PASID(pasid) | QI_DEV_EIOTLB_SID(sid) |
1585                QI_DEV_EIOTLB_QDEP(qdep) | QI_DEIOTLB_TYPE |
1586                QI_DEV_IOTLB_PFSID(pfsid);
1587
1588        /*
1589         * If S bit is 0, we only flush a single page. If S bit is set,
1590         * The least significant zero bit indicates the invalidation address
1591         * range. VT-d spec 6.5.2.6.
1592         * e.g. address bit 12[0] indicates 8KB, 13[0] indicates 16KB.
1593         * size order = 0 is PAGE_SIZE 4KB
1594         * Max Invs Pending (MIP) is set to 0 for now until we have DIT in
1595         * ECAP.
1596         */
1597        if (!IS_ALIGNED(addr, VTD_PAGE_SIZE << size_order))
1598                pr_warn_ratelimited("Invalidate non-aligned address %llx, order %d\n",
1599                                    addr, size_order);
1600
1601        /* Take page address */
1602        desc.qw1 = QI_DEV_EIOTLB_ADDR(addr);
1603
1604        if (size_order) {
1605                /*
1606                 * Existing 0s in address below size_order may be the least
1607                 * significant bit, we must set them to 1s to avoid having
1608                 * smaller size than desired.
1609                 */
1610                desc.qw1 |= GENMASK_ULL(size_order + VTD_PAGE_SHIFT - 1,
1611                                        VTD_PAGE_SHIFT);
1612                /* Clear size_order bit to indicate size */
1613                desc.qw1 &= ~mask;
1614                /* Set the S bit to indicate flushing more than 1 page */
1615                desc.qw1 |= QI_DEV_EIOTLB_SIZE;
1616        }
1617
1618        qi_submit_sync(iommu, &desc, 1, 0);
1619}
1620
1621void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did,
1622                          u64 granu, u32 pasid)
1623{
1624        struct qi_desc desc = {.qw1 = 0, .qw2 = 0, .qw3 = 0};
1625
1626        desc.qw0 = QI_PC_PASID(pasid) | QI_PC_DID(did) |
1627                        QI_PC_GRAN(granu) | QI_PC_TYPE;
1628        qi_submit_sync(iommu, &desc, 1, 0);
1629}
1630
1631/*
1632 * Disable Queued Invalidation interface.
1633 */
1634void dmar_disable_qi(struct intel_iommu *iommu)
1635{
1636        unsigned long flags;
1637        u32 sts;
1638        cycles_t start_time = get_cycles();
1639
1640        if (!ecap_qis(iommu->ecap))
1641                return;
1642
1643        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1644
1645        sts =  readl(iommu->reg + DMAR_GSTS_REG);
1646        if (!(sts & DMA_GSTS_QIES))
1647                goto end;
1648
1649        /*
1650         * Give a chance to HW to complete the pending invalidation requests.
1651         */
1652        while ((readl(iommu->reg + DMAR_IQT_REG) !=
1653                readl(iommu->reg + DMAR_IQH_REG)) &&
1654                (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
1655                cpu_relax();
1656
1657        iommu->gcmd &= ~DMA_GCMD_QIE;
1658        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1659
1660        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
1661                      !(sts & DMA_GSTS_QIES), sts);
1662end:
1663        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1664}
1665
1666/*
1667 * Enable queued invalidation.
1668 */
1669static void __dmar_enable_qi(struct intel_iommu *iommu)
1670{
1671        u32 sts;
1672        unsigned long flags;
1673        struct q_inval *qi = iommu->qi;
1674        u64 val = virt_to_phys(qi->desc);
1675
1676        qi->free_head = qi->free_tail = 0;
1677        qi->free_cnt = QI_LENGTH;
1678
1679        /*
1680         * Set DW=1 and QS=1 in IQA_REG when Scalable Mode capability
1681         * is present.
1682         */
1683        if (ecap_smts(iommu->ecap))
1684                val |= (1 << 11) | 1;
1685
1686        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1687
1688        /* write zero to the tail reg */
1689        writel(0, iommu->reg + DMAR_IQT_REG);
1690
1691        dmar_writeq(iommu->reg + DMAR_IQA_REG, val);
1692
1693        iommu->gcmd |= DMA_GCMD_QIE;
1694        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1695
1696        /* Make sure hardware complete it */
1697        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1698
1699        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1700}
1701
1702/*
1703 * Enable Queued Invalidation interface. This is a must to support
1704 * interrupt-remapping. Also used by DMA-remapping, which replaces
1705 * register based IOTLB invalidation.
1706 */
1707int dmar_enable_qi(struct intel_iommu *iommu)
1708{
1709        struct q_inval *qi;
1710        struct page *desc_page;
1711
1712        if (!ecap_qis(iommu->ecap))
1713                return -ENOENT;
1714
1715        /*
1716         * queued invalidation is already setup and enabled.
1717         */
1718        if (iommu->qi)
1719                return 0;
1720
1721        iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1722        if (!iommu->qi)
1723                return -ENOMEM;
1724
1725        qi = iommu->qi;
1726
1727        /*
1728         * Need two pages to accommodate 256 descriptors of 256 bits each
1729         * if the remapping hardware supports scalable mode translation.
1730         */
1731        desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
1732                                     !!ecap_smts(iommu->ecap));
1733        if (!desc_page) {
1734                kfree(qi);
1735                iommu->qi = NULL;
1736                return -ENOMEM;
1737        }
1738
1739        qi->desc = page_address(desc_page);
1740
1741        qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC);
1742        if (!qi->desc_status) {
1743                free_page((unsigned long) qi->desc);
1744                kfree(qi);
1745                iommu->qi = NULL;
1746                return -ENOMEM;
1747        }
1748
1749        raw_spin_lock_init(&qi->q_lock);
1750
1751        __dmar_enable_qi(iommu);
1752
1753        return 0;
1754}
1755
1756/* iommu interrupt handling. Most stuff are MSI-like. */
1757
1758enum faulttype {
1759        DMA_REMAP,
1760        INTR_REMAP,
1761        UNKNOWN,
1762};
1763
1764static const char *dma_remap_fault_reasons[] =
1765{
1766        "Software",
1767        "Present bit in root entry is clear",
1768        "Present bit in context entry is clear",
1769        "Invalid context entry",
1770        "Access beyond MGAW",
1771        "PTE Write access is not set",
1772        "PTE Read access is not set",
1773        "Next page table ptr is invalid",
1774        "Root table address invalid",
1775        "Context table ptr is invalid",
1776        "non-zero reserved fields in RTP",
1777        "non-zero reserved fields in CTP",
1778        "non-zero reserved fields in PTE",
1779        "PCE for translation request specifies blocking",
1780};
1781
1782static const char * const dma_remap_sm_fault_reasons[] = {
1783        "SM: Invalid Root Table Address",
1784        "SM: TTM 0 for request with PASID",
1785        "SM: TTM 0 for page group request",
1786        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x33-0x37 */
1787        "SM: Error attempting to access Root Entry",
1788        "SM: Present bit in Root Entry is clear",
1789        "SM: Non-zero reserved field set in Root Entry",
1790        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x3B-0x3F */
1791        "SM: Error attempting to access Context Entry",
1792        "SM: Present bit in Context Entry is clear",
1793        "SM: Non-zero reserved field set in the Context Entry",
1794        "SM: Invalid Context Entry",
1795        "SM: DTE field in Context Entry is clear",
1796        "SM: PASID Enable field in Context Entry is clear",
1797        "SM: PASID is larger than the max in Context Entry",
1798        "SM: PRE field in Context-Entry is clear",
1799        "SM: RID_PASID field error in Context-Entry",
1800        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x49-0x4F */
1801        "SM: Error attempting to access the PASID Directory Entry",
1802        "SM: Present bit in Directory Entry is clear",
1803        "SM: Non-zero reserved field set in PASID Directory Entry",
1804        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x53-0x57 */
1805        "SM: Error attempting to access PASID Table Entry",
1806        "SM: Present bit in PASID Table Entry is clear",
1807        "SM: Non-zero reserved field set in PASID Table Entry",
1808        "SM: Invalid Scalable-Mode PASID Table Entry",
1809        "SM: ERE field is clear in PASID Table Entry",
1810        "SM: SRE field is clear in PASID Table Entry",
1811        "Unknown", "Unknown",/* 0x5E-0x5F */
1812        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x60-0x67 */
1813        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x68-0x6F */
1814        "SM: Error attempting to access first-level paging entry",
1815        "SM: Present bit in first-level paging entry is clear",
1816        "SM: Non-zero reserved field set in first-level paging entry",
1817        "SM: Error attempting to access FL-PML4 entry",
1818        "SM: First-level entry address beyond MGAW in Nested translation",
1819        "SM: Read permission error in FL-PML4 entry in Nested translation",
1820        "SM: Read permission error in first-level paging entry in Nested translation",
1821        "SM: Write permission error in first-level paging entry in Nested translation",
1822        "SM: Error attempting to access second-level paging entry",
1823        "SM: Read/Write permission error in second-level paging entry",
1824        "SM: Non-zero reserved field set in second-level paging entry",
1825        "SM: Invalid second-level page table pointer",
1826        "SM: A/D bit update needed in second-level entry when set up in no snoop",
1827        "Unknown", "Unknown", "Unknown", /* 0x7D-0x7F */
1828        "SM: Address in first-level translation is not canonical",
1829        "SM: U/S set 0 for first-level translation with user privilege",
1830        "SM: No execute permission for request with PASID and ER=1",
1831        "SM: Address beyond the DMA hardware max",
1832        "SM: Second-level entry address beyond the max",
1833        "SM: No write permission for Write/AtomicOp request",
1834        "SM: No read permission for Read/AtomicOp request",
1835        "SM: Invalid address-interrupt address",
1836        "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", /* 0x88-0x8F */
1837        "SM: A/D bit update needed in first-level entry when set up in no snoop",
1838};
1839
1840static const char *irq_remap_fault_reasons[] =
1841{
1842        "Detected reserved fields in the decoded interrupt-remapped request",
1843        "Interrupt index exceeded the interrupt-remapping table size",
1844        "Present field in the IRTE entry is clear",
1845        "Error accessing interrupt-remapping table pointed by IRTA_REG",
1846        "Detected reserved fields in the IRTE entry",
1847        "Blocked a compatibility format interrupt request",
1848        "Blocked an interrupt request due to source-id verification failure",
1849};
1850
1851static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1852{
1853        if (fault_reason >= 0x20 && (fault_reason - 0x20 <
1854                                        ARRAY_SIZE(irq_remap_fault_reasons))) {
1855                *fault_type = INTR_REMAP;
1856                return irq_remap_fault_reasons[fault_reason - 0x20];
1857        } else if (fault_reason >= 0x30 && (fault_reason - 0x30 <
1858                        ARRAY_SIZE(dma_remap_sm_fault_reasons))) {
1859                *fault_type = DMA_REMAP;
1860                return dma_remap_sm_fault_reasons[fault_reason - 0x30];
1861        } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1862                *fault_type = DMA_REMAP;
1863                return dma_remap_fault_reasons[fault_reason];
1864        } else {
1865                *fault_type = UNKNOWN;
1866                return "Unknown";
1867        }
1868}
1869
1870
1871static inline int dmar_msi_reg(struct intel_iommu *iommu, int irq)
1872{
1873        if (iommu->irq == irq)
1874                return DMAR_FECTL_REG;
1875        else if (iommu->pr_irq == irq)
1876                return DMAR_PECTL_REG;
1877        else
1878                BUG();
1879}
1880
1881void dmar_msi_unmask(struct irq_data *data)
1882{
1883        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1884        int reg = dmar_msi_reg(iommu, data->irq);
1885        unsigned long flag;
1886
1887        /* unmask it */
1888        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1889        writel(0, iommu->reg + reg);
1890        /* Read a reg to force flush the post write */
1891        readl(iommu->reg + reg);
1892        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1893}
1894
1895void dmar_msi_mask(struct irq_data *data)
1896{
1897        struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1898        int reg = dmar_msi_reg(iommu, data->irq);
1899        unsigned long flag;
1900
1901        /* mask it */
1902        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1903        writel(DMA_FECTL_IM, iommu->reg + reg);
1904        /* Read a reg to force flush the post write */
1905        readl(iommu->reg + reg);
1906        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1907}
1908
1909void dmar_msi_write(int irq, struct msi_msg *msg)
1910{
1911        struct intel_iommu *iommu = irq_get_handler_data(irq);
1912        int reg = dmar_msi_reg(iommu, irq);
1913        unsigned long flag;
1914
1915        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1916        writel(msg->data, iommu->reg + reg + 4);
1917        writel(msg->address_lo, iommu->reg + reg + 8);
1918        writel(msg->address_hi, iommu->reg + reg + 12);
1919        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1920}
1921
1922void dmar_msi_read(int irq, struct msi_msg *msg)
1923{
1924        struct intel_iommu *iommu = irq_get_handler_data(irq);
1925        int reg = dmar_msi_reg(iommu, irq);
1926        unsigned long flag;
1927
1928        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1929        msg->data = readl(iommu->reg + reg + 4);
1930        msg->address_lo = readl(iommu->reg + reg + 8);
1931        msg->address_hi = readl(iommu->reg + reg + 12);
1932        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1933}
1934
1935static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1936                u8 fault_reason, u32 pasid, u16 source_id,
1937                unsigned long long addr)
1938{
1939        const char *reason;
1940        int fault_type;
1941
1942        reason = dmar_get_fault_reason(fault_reason, &fault_type);
1943
1944        if (fault_type == INTR_REMAP)
1945                pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
1946                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1947                       PCI_FUNC(source_id & 0xFF), addr >> 48,
1948                       fault_reason, reason);
1949        else if (pasid == INVALID_IOASID)
1950                pr_err("[%s NO_PASID] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1951                       type ? "DMA Read" : "DMA Write",
1952                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1953                       PCI_FUNC(source_id & 0xFF), addr,
1954                       fault_reason, reason);
1955        else
1956                pr_err("[%s PASID 0x%x] Request device [%02x:%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
1957                       type ? "DMA Read" : "DMA Write", pasid,
1958                       source_id >> 8, PCI_SLOT(source_id & 0xFF),
1959                       PCI_FUNC(source_id & 0xFF), addr,
1960                       fault_reason, reason);
1961
1962        return 0;
1963}
1964
1965#define PRIMARY_FAULT_REG_LEN (16)
1966irqreturn_t dmar_fault(int irq, void *dev_id)
1967{
1968        struct intel_iommu *iommu = dev_id;
1969        int reg, fault_index;
1970        u32 fault_status;
1971        unsigned long flag;
1972        static DEFINE_RATELIMIT_STATE(rs,
1973                                      DEFAULT_RATELIMIT_INTERVAL,
1974                                      DEFAULT_RATELIMIT_BURST);
1975
1976        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1977        fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1978        if (fault_status && __ratelimit(&rs))
1979                pr_err("DRHD: handling fault status reg %x\n", fault_status);
1980
1981        /* TBD: ignore advanced fault log currently */
1982        if (!(fault_status & DMA_FSTS_PPF))
1983                goto unlock_exit;
1984
1985        fault_index = dma_fsts_fault_record_index(fault_status);
1986        reg = cap_fault_reg_offset(iommu->cap);
1987        while (1) {
1988                /* Disable printing, simply clear the fault when ratelimited */
1989                bool ratelimited = !__ratelimit(&rs);
1990                u8 fault_reason;
1991                u16 source_id;
1992                u64 guest_addr;
1993                u32 pasid;
1994                int type;
1995                u32 data;
1996                bool pasid_present;
1997
1998                /* highest 32 bits */
1999                data = readl(iommu->reg + reg +
2000                                fault_index * PRIMARY_FAULT_REG_LEN + 12);
2001                if (!(data & DMA_FRCD_F))
2002                        break;
2003
2004                if (!ratelimited) {
2005                        fault_reason = dma_frcd_fault_reason(data);
2006                        type = dma_frcd_type(data);
2007
2008                        pasid = dma_frcd_pasid_value(data);
2009                        data = readl(iommu->reg + reg +
2010                                     fault_index * PRIMARY_FAULT_REG_LEN + 8);
2011                        source_id = dma_frcd_source_id(data);
2012
2013                        pasid_present = dma_frcd_pasid_present(data);
2014                        guest_addr = dmar_readq(iommu->reg + reg +
2015                                        fault_index * PRIMARY_FAULT_REG_LEN);
2016                        guest_addr = dma_frcd_page_addr(guest_addr);
2017                }
2018
2019                /* clear the fault */
2020                writel(DMA_FRCD_F, iommu->reg + reg +
2021                        fault_index * PRIMARY_FAULT_REG_LEN + 12);
2022
2023                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2024
2025                if (!ratelimited)
2026                        /* Using pasid -1 if pasid is not present */
2027                        dmar_fault_do_one(iommu, type, fault_reason,
2028                                          pasid_present ? pasid : INVALID_IOASID,
2029                                          source_id, guest_addr);
2030
2031                fault_index++;
2032                if (fault_index >= cap_num_fault_regs(iommu->cap))
2033                        fault_index = 0;
2034                raw_spin_lock_irqsave(&iommu->register_lock, flag);
2035        }
2036
2037        writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
2038               iommu->reg + DMAR_FSTS_REG);
2039
2040unlock_exit:
2041        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
2042        return IRQ_HANDLED;
2043}
2044
2045int dmar_set_interrupt(struct intel_iommu *iommu)
2046{
2047        int irq, ret;
2048
2049        /*
2050         * Check if the fault interrupt is already initialized.
2051         */
2052        if (iommu->irq)
2053                return 0;
2054
2055        irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
2056        if (irq > 0) {
2057                iommu->irq = irq;
2058        } else {
2059                pr_err("No free IRQ vectors\n");
2060                return -EINVAL;
2061        }
2062
2063        ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
2064        if (ret)
2065                pr_err("Can't request irq\n");
2066        return ret;
2067}
2068
2069int __init enable_drhd_fault_handling(void)
2070{
2071        struct dmar_drhd_unit *drhd;
2072        struct intel_iommu *iommu;
2073
2074        /*
2075         * Enable fault control interrupt.
2076         */
2077        for_each_iommu(iommu, drhd) {
2078                u32 fault_status;
2079                int ret = dmar_set_interrupt(iommu);
2080
2081                if (ret) {
2082                        pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
2083                               (unsigned long long)drhd->reg_base_addr, ret);
2084                        return -1;
2085                }
2086
2087                /*
2088                 * Clear any previous faults.
2089                 */
2090                dmar_fault(iommu->irq, iommu);
2091                fault_status = readl(iommu->reg + DMAR_FSTS_REG);
2092                writel(fault_status, iommu->reg + DMAR_FSTS_REG);
2093        }
2094
2095        return 0;
2096}
2097
2098/*
2099 * Re-enable Queued Invalidation interface.
2100 */
2101int dmar_reenable_qi(struct intel_iommu *iommu)
2102{
2103        if (!ecap_qis(iommu->ecap))
2104                return -ENOENT;
2105
2106        if (!iommu->qi)
2107                return -ENOENT;
2108
2109        /*
2110         * First disable queued invalidation.
2111         */
2112        dmar_disable_qi(iommu);
2113        /*
2114         * Then enable queued invalidation again. Since there is no pending
2115         * invalidation requests now, it's safe to re-enable queued
2116         * invalidation.
2117         */
2118        __dmar_enable_qi(iommu);
2119
2120        return 0;
2121}
2122
2123/*
2124 * Check interrupt remapping support in DMAR table description.
2125 */
2126int __init dmar_ir_support(void)
2127{
2128        struct acpi_table_dmar *dmar;
2129        dmar = (struct acpi_table_dmar *)dmar_tbl;
2130        if (!dmar)
2131                return 0;
2132        return dmar->flags & 0x1;
2133}
2134
2135/* Check whether DMAR units are in use */
2136static inline bool dmar_in_use(void)
2137{
2138        return irq_remapping_enabled || intel_iommu_enabled;
2139}
2140
2141static int __init dmar_free_unused_resources(void)
2142{
2143        struct dmar_drhd_unit *dmaru, *dmaru_n;
2144
2145        if (dmar_in_use())
2146                return 0;
2147
2148        if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
2149                bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
2150
2151        down_write(&dmar_global_lock);
2152        list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
2153                list_del(&dmaru->list);
2154                dmar_free_drhd(dmaru);
2155        }
2156        up_write(&dmar_global_lock);
2157
2158        return 0;
2159}
2160
2161late_initcall(dmar_free_unused_resources);
2162IOMMU_INIT_POST(detect_intel_iommu);
2163
2164/*
2165 * DMAR Hotplug Support
2166 * For more details, please refer to Intel(R) Virtualization Technology
2167 * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
2168 * "Remapping Hardware Unit Hot Plug".
2169 */
2170static guid_t dmar_hp_guid =
2171        GUID_INIT(0xD8C1A3A6, 0xBE9B, 0x4C9B,
2172                  0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF);
2173
2174/*
2175 * Currently there's only one revision and BIOS will not check the revision id,
2176 * so use 0 for safety.
2177 */
2178#define DMAR_DSM_REV_ID                 0
2179#define DMAR_DSM_FUNC_DRHD              1
2180#define DMAR_DSM_FUNC_ATSR              2
2181#define DMAR_DSM_FUNC_RHSA              3
2182#define DMAR_DSM_FUNC_SATC              4
2183
2184static inline bool dmar_detect_dsm(acpi_handle handle, int func)
2185{
2186        return acpi_check_dsm(handle, &dmar_hp_guid, DMAR_DSM_REV_ID, 1 << func);
2187}
2188
2189static int dmar_walk_dsm_resource(acpi_handle handle, int func,
2190                                  dmar_res_handler_t handler, void *arg)
2191{
2192        int ret = -ENODEV;
2193        union acpi_object *obj;
2194        struct acpi_dmar_header *start;
2195        struct dmar_res_callback callback;
2196        static int res_type[] = {
2197                [DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
2198                [DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
2199                [DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
2200                [DMAR_DSM_FUNC_SATC] = ACPI_DMAR_TYPE_SATC,
2201        };
2202
2203        if (!dmar_detect_dsm(handle, func))
2204                return 0;
2205
2206        obj = acpi_evaluate_dsm_typed(handle, &dmar_hp_guid, DMAR_DSM_REV_ID,
2207                                      func, NULL, ACPI_TYPE_BUFFER);
2208        if (!obj)
2209                return -ENODEV;
2210
2211        memset(&callback, 0, sizeof(callback));
2212        callback.cb[res_type[func]] = handler;
2213        callback.arg[res_type[func]] = arg;
2214        start = (struct acpi_dmar_header *)obj->buffer.pointer;
2215        ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
2216
2217        ACPI_FREE(obj);
2218
2219        return ret;
2220}
2221
2222static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
2223{
2224        int ret;
2225        struct dmar_drhd_unit *dmaru;
2226
2227        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2228        if (!dmaru)
2229                return -ENODEV;
2230
2231        ret = dmar_ir_hotplug(dmaru, true);
2232        if (ret == 0)
2233                ret = dmar_iommu_hotplug(dmaru, true);
2234
2235        return ret;
2236}
2237
2238static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
2239{
2240        int i, ret;
2241        struct device *dev;
2242        struct dmar_drhd_unit *dmaru;
2243
2244        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2245        if (!dmaru)
2246                return 0;
2247
2248        /*
2249         * All PCI devices managed by this unit should have been destroyed.
2250         */
2251        if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt) {
2252                for_each_active_dev_scope(dmaru->devices,
2253                                          dmaru->devices_cnt, i, dev)
2254                        return -EBUSY;
2255        }
2256
2257        ret = dmar_ir_hotplug(dmaru, false);
2258        if (ret == 0)
2259                ret = dmar_iommu_hotplug(dmaru, false);
2260
2261        return ret;
2262}
2263
2264static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
2265{
2266        struct dmar_drhd_unit *dmaru;
2267
2268        dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
2269        if (dmaru) {
2270                list_del_rcu(&dmaru->list);
2271                synchronize_rcu();
2272                dmar_free_drhd(dmaru);
2273        }
2274
2275        return 0;
2276}
2277
2278static int dmar_hotplug_insert(acpi_handle handle)
2279{
2280        int ret;
2281        int drhd_count = 0;
2282
2283        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2284                                     &dmar_validate_one_drhd, (void *)1);
2285        if (ret)
2286                goto out;
2287
2288        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2289                                     &dmar_parse_one_drhd, (void *)&drhd_count);
2290        if (ret == 0 && drhd_count == 0) {
2291                pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
2292                goto out;
2293        } else if (ret) {
2294                goto release_drhd;
2295        }
2296
2297        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
2298                                     &dmar_parse_one_rhsa, NULL);
2299        if (ret)
2300                goto release_drhd;
2301
2302        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2303                                     &dmar_parse_one_atsr, NULL);
2304        if (ret)
2305                goto release_atsr;
2306
2307        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2308                                     &dmar_hp_add_drhd, NULL);
2309        if (!ret)
2310                return 0;
2311
2312        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2313                               &dmar_hp_remove_drhd, NULL);
2314release_atsr:
2315        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2316                               &dmar_release_one_atsr, NULL);
2317release_drhd:
2318        dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2319                               &dmar_hp_release_drhd, NULL);
2320out:
2321        return ret;
2322}
2323
2324static int dmar_hotplug_remove(acpi_handle handle)
2325{
2326        int ret;
2327
2328        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2329                                     &dmar_check_one_atsr, NULL);
2330        if (ret)
2331                return ret;
2332
2333        ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2334                                     &dmar_hp_remove_drhd, NULL);
2335        if (ret == 0) {
2336                WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
2337                                               &dmar_release_one_atsr, NULL));
2338                WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2339                                               &dmar_hp_release_drhd, NULL));
2340        } else {
2341                dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
2342                                       &dmar_hp_add_drhd, NULL);
2343        }
2344
2345        return ret;
2346}
2347
2348static acpi_status dmar_get_dsm_handle(acpi_handle handle, u32 lvl,
2349                                       void *context, void **retval)
2350{
2351        acpi_handle *phdl = retval;
2352
2353        if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2354                *phdl = handle;
2355                return AE_CTRL_TERMINATE;
2356        }
2357
2358        return AE_OK;
2359}
2360
2361static int dmar_device_hotplug(acpi_handle handle, bool insert)
2362{
2363        int ret;
2364        acpi_handle tmp = NULL;
2365        acpi_status status;
2366
2367        if (!dmar_in_use())
2368                return 0;
2369
2370        if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) {
2371                tmp = handle;
2372        } else {
2373                status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle,
2374                                             ACPI_UINT32_MAX,
2375                                             dmar_get_dsm_handle,
2376                                             NULL, NULL, &tmp);
2377                if (ACPI_FAILURE(status)) {
2378                        pr_warn("Failed to locate _DSM method.\n");
2379                        return -ENXIO;
2380                }
2381        }
2382        if (tmp == NULL)
2383                return 0;
2384
2385        down_write(&dmar_global_lock);
2386        if (insert)
2387                ret = dmar_hotplug_insert(tmp);
2388        else
2389                ret = dmar_hotplug_remove(tmp);
2390        up_write(&dmar_global_lock);
2391
2392        return ret;
2393}
2394
2395int dmar_device_add(acpi_handle handle)
2396{
2397        return dmar_device_hotplug(handle, true);
2398}
2399
2400int dmar_device_remove(acpi_handle handle)
2401{
2402        return dmar_device_hotplug(handle, false);
2403}
2404
2405/*
2406 * dmar_platform_optin - Is %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in DMAR table
2407 *
2408 * Returns true if the platform has %DMA_CTRL_PLATFORM_OPT_IN_FLAG set in
2409 * the ACPI DMAR table. This means that the platform boot firmware has made
2410 * sure no device can issue DMA outside of RMRR regions.
2411 */
2412bool dmar_platform_optin(void)
2413{
2414        struct acpi_table_dmar *dmar;
2415        acpi_status status;
2416        bool ret;
2417
2418        status = acpi_get_table(ACPI_SIG_DMAR, 0,
2419                                (struct acpi_table_header **)&dmar);
2420        if (ACPI_FAILURE(status))
2421                return false;
2422
2423        ret = !!(dmar->flags & DMAR_PLATFORM_OPT_IN);
2424        acpi_put_table((struct acpi_table_header *)dmar);
2425
2426        return ret;
2427}
2428EXPORT_SYMBOL_GPL(dmar_platform_optin);
2429