linux/drivers/iommu/amd_iommu_init.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/pci.h>
  12#include <linux/acpi.h>
  13#include <linux/list.h>
  14#include <linux/bitmap.h>
  15#include <linux/slab.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/interrupt.h>
  18#include <linux/msi.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/export.h>
  21#include <linux/iommu.h>
  22#include <linux/kmemleak.h>
  23#include <linux/mem_encrypt.h>
  24#include <asm/pci-direct.h>
  25#include <asm/iommu.h>
  26#include <asm/apic.h>
  27#include <asm/msidef.h>
  28#include <asm/gart.h>
  29#include <asm/x86_init.h>
  30#include <asm/iommu_table.h>
  31#include <asm/io_apic.h>
  32#include <asm/irq_remapping.h>
  33
  34#include <linux/crash_dump.h>
  35#include "amd_iommu.h"
  36#include "amd_iommu_proto.h"
  37#include "amd_iommu_types.h"
  38#include "irq_remapping.h"
  39
  40/*
  41 * definitions for the ACPI scanning code
  42 */
  43#define IVRS_HEADER_LENGTH 48
  44
  45#define ACPI_IVHD_TYPE_MAX_SUPPORTED    0x40
  46#define ACPI_IVMD_TYPE_ALL              0x20
  47#define ACPI_IVMD_TYPE                  0x21
  48#define ACPI_IVMD_TYPE_RANGE            0x22
  49
  50#define IVHD_DEV_ALL                    0x01
  51#define IVHD_DEV_SELECT                 0x02
  52#define IVHD_DEV_SELECT_RANGE_START     0x03
  53#define IVHD_DEV_RANGE_END              0x04
  54#define IVHD_DEV_ALIAS                  0x42
  55#define IVHD_DEV_ALIAS_RANGE            0x43
  56#define IVHD_DEV_EXT_SELECT             0x46
  57#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  58#define IVHD_DEV_SPECIAL                0x48
  59#define IVHD_DEV_ACPI_HID               0xf0
  60
  61#define UID_NOT_PRESENT                 0
  62#define UID_IS_INTEGER                  1
  63#define UID_IS_CHARACTER                2
  64
  65#define IVHD_SPECIAL_IOAPIC             1
  66#define IVHD_SPECIAL_HPET               2
  67
  68#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  69#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  70#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  71#define IVHD_FLAG_ISOC_EN_MASK          0x08
  72
  73#define IVMD_FLAG_EXCL_RANGE            0x08
  74#define IVMD_FLAG_UNITY_MAP             0x01
  75
  76#define ACPI_DEVFLAG_INITPASS           0x01
  77#define ACPI_DEVFLAG_EXTINT             0x02
  78#define ACPI_DEVFLAG_NMI                0x04
  79#define ACPI_DEVFLAG_SYSMGT1            0x10
  80#define ACPI_DEVFLAG_SYSMGT2            0x20
  81#define ACPI_DEVFLAG_LINT0              0x40
  82#define ACPI_DEVFLAG_LINT1              0x80
  83#define ACPI_DEVFLAG_ATSDIS             0x10000000
  84
  85#define LOOP_TIMEOUT    100000
  86/*
  87 * ACPI table definitions
  88 *
  89 * These data structures are laid over the table to parse the important values
  90 * out of it.
  91 */
  92
  93extern const struct iommu_ops amd_iommu_ops;
  94
  95/*
  96 * structure describing one IOMMU in the ACPI table. Typically followed by one
  97 * or more ivhd_entrys.
  98 */
  99struct ivhd_header {
 100        u8 type;
 101        u8 flags;
 102        u16 length;
 103        u16 devid;
 104        u16 cap_ptr;
 105        u64 mmio_phys;
 106        u16 pci_seg;
 107        u16 info;
 108        u32 efr_attr;
 109
 110        /* Following only valid on IVHD type 11h and 40h */
 111        u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 112        u64 res;
 113} __attribute__((packed));
 114
 115/*
 116 * A device entry describing which devices a specific IOMMU translates and
 117 * which requestor ids they use.
 118 */
 119struct ivhd_entry {
 120        u8 type;
 121        u16 devid;
 122        u8 flags;
 123        u32 ext;
 124        u32 hidh;
 125        u64 cid;
 126        u8 uidf;
 127        u8 uidl;
 128        u8 uid;
 129} __attribute__((packed));
 130
 131/*
 132 * An AMD IOMMU memory definition structure. It defines things like exclusion
 133 * ranges for devices and regions that should be unity mapped.
 134 */
 135struct ivmd_header {
 136        u8 type;
 137        u8 flags;
 138        u16 length;
 139        u16 devid;
 140        u16 aux;
 141        u64 resv;
 142        u64 range_start;
 143        u64 range_length;
 144} __attribute__((packed));
 145
 146bool amd_iommu_dump;
 147bool amd_iommu_irq_remap __read_mostly;
 148
 149int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 150static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
 151
 152static bool amd_iommu_detected;
 153static bool __initdata amd_iommu_disabled;
 154static int amd_iommu_target_ivhd_type;
 155
 156u16 amd_iommu_last_bdf;                 /* largest PCI device id we have
 157                                           to handle */
 158LIST_HEAD(amd_iommu_unity_map);         /* a list of required unity mappings
 159                                           we find in ACPI */
 160bool amd_iommu_unmap_flush;             /* if true, flush on every unmap */
 161
 162LIST_HEAD(amd_iommu_list);              /* list of all AMD IOMMUs in the
 163                                           system */
 164
 165/* Array to assign indices to IOMMUs*/
 166struct amd_iommu *amd_iommus[MAX_IOMMUS];
 167
 168/* Number of IOMMUs present in the system */
 169static int amd_iommus_present;
 170
 171/* IOMMUs have a non-present cache? */
 172bool amd_iommu_np_cache __read_mostly;
 173bool amd_iommu_iotlb_sup __read_mostly = true;
 174
 175u32 amd_iommu_max_pasid __read_mostly = ~0;
 176
 177bool amd_iommu_v2_present __read_mostly;
 178static bool amd_iommu_pc_present __read_mostly;
 179
 180bool amd_iommu_force_isolation __read_mostly;
 181
 182/*
 183 * Pointer to the device table which is shared by all AMD IOMMUs
 184 * it is indexed by the PCI device id or the HT unit id and contains
 185 * information about the domain the device belongs to as well as the
 186 * page table root pointer.
 187 */
 188struct dev_table_entry *amd_iommu_dev_table;
 189/*
 190 * Pointer to a device table which the content of old device table
 191 * will be copied to. It's only be used in kdump kernel.
 192 */
 193static struct dev_table_entry *old_dev_tbl_cpy;
 194
 195/*
 196 * The alias table is a driver specific data structure which contains the
 197 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
 198 * More than one device can share the same requestor id.
 199 */
 200u16 *amd_iommu_alias_table;
 201
 202/*
 203 * The rlookup table is used to find the IOMMU which is responsible
 204 * for a specific device. It is also indexed by the PCI device id.
 205 */
 206struct amd_iommu **amd_iommu_rlookup_table;
 207EXPORT_SYMBOL(amd_iommu_rlookup_table);
 208
 209/*
 210 * This table is used to find the irq remapping table for a given device id
 211 * quickly.
 212 */
 213struct irq_remap_table **irq_lookup_table;
 214
 215/*
 216 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
 217 * to know which ones are already in use.
 218 */
 219unsigned long *amd_iommu_pd_alloc_bitmap;
 220
 221static u32 dev_table_size;      /* size of the device table */
 222static u32 alias_table_size;    /* size of the alias table */
 223static u32 rlookup_table_size;  /* size if the rlookup table */
 224
 225enum iommu_init_state {
 226        IOMMU_START_STATE,
 227        IOMMU_IVRS_DETECTED,
 228        IOMMU_ACPI_FINISHED,
 229        IOMMU_ENABLED,
 230        IOMMU_PCI_INIT,
 231        IOMMU_INTERRUPTS_EN,
 232        IOMMU_DMA_OPS,
 233        IOMMU_INITIALIZED,
 234        IOMMU_NOT_FOUND,
 235        IOMMU_INIT_ERROR,
 236        IOMMU_CMDLINE_DISABLED,
 237};
 238
 239/* Early ioapic and hpet maps from kernel command line */
 240#define EARLY_MAP_SIZE          4
 241static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 242static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 243static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 244
 245static int __initdata early_ioapic_map_size;
 246static int __initdata early_hpet_map_size;
 247static int __initdata early_acpihid_map_size;
 248
 249static bool __initdata cmdline_maps;
 250
 251static enum iommu_init_state init_state = IOMMU_START_STATE;
 252
 253static int amd_iommu_enable_interrupts(void);
 254static int __init iommu_go_to_state(enum iommu_init_state state);
 255static void init_device_table_dma(void);
 256
 257static bool amd_iommu_pre_enabled = true;
 258
 259bool translation_pre_enabled(struct amd_iommu *iommu)
 260{
 261        return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 262}
 263EXPORT_SYMBOL(translation_pre_enabled);
 264
 265static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 266{
 267        iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 268}
 269
 270static void init_translation_status(struct amd_iommu *iommu)
 271{
 272        u64 ctrl;
 273
 274        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 275        if (ctrl & (1<<CONTROL_IOMMU_EN))
 276                iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 277}
 278
 279static inline void update_last_devid(u16 devid)
 280{
 281        if (devid > amd_iommu_last_bdf)
 282                amd_iommu_last_bdf = devid;
 283}
 284
 285static inline unsigned long tbl_size(int entry_size)
 286{
 287        unsigned shift = PAGE_SHIFT +
 288                         get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 289
 290        return 1UL << shift;
 291}
 292
 293int amd_iommu_get_num_iommus(void)
 294{
 295        return amd_iommus_present;
 296}
 297
 298/* Access to l1 and l2 indexed register spaces */
 299
 300static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 301{
 302        u32 val;
 303
 304        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 305        pci_read_config_dword(iommu->dev, 0xfc, &val);
 306        return val;
 307}
 308
 309static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 310{
 311        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 312        pci_write_config_dword(iommu->dev, 0xfc, val);
 313        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 314}
 315
 316static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 317{
 318        u32 val;
 319
 320        pci_write_config_dword(iommu->dev, 0xf0, address);
 321        pci_read_config_dword(iommu->dev, 0xf4, &val);
 322        return val;
 323}
 324
 325static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 326{
 327        pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 328        pci_write_config_dword(iommu->dev, 0xf4, val);
 329}
 330
 331/****************************************************************************
 332 *
 333 * AMD IOMMU MMIO register space handling functions
 334 *
 335 * These functions are used to program the IOMMU device registers in
 336 * MMIO space required for that driver.
 337 *
 338 ****************************************************************************/
 339
 340/*
 341 * This function set the exclusion range in the IOMMU. DMA accesses to the
 342 * exclusion range are passed through untranslated
 343 */
 344static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 345{
 346        u64 start = iommu->exclusion_start & PAGE_MASK;
 347        u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 348        u64 entry;
 349
 350        if (!iommu->exclusion_start)
 351                return;
 352
 353        entry = start | MMIO_EXCL_ENABLE_MASK;
 354        memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 355                        &entry, sizeof(entry));
 356
 357        entry = limit;
 358        memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 359                        &entry, sizeof(entry));
 360}
 361
 362/* Programs the physical address of the device table into the IOMMU hardware */
 363static void iommu_set_device_table(struct amd_iommu *iommu)
 364{
 365        u64 entry;
 366
 367        BUG_ON(iommu->mmio_base == NULL);
 368
 369        entry = iommu_virt_to_phys(amd_iommu_dev_table);
 370        entry |= (dev_table_size >> 12) - 1;
 371        memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 372                        &entry, sizeof(entry));
 373}
 374
 375/* Generic functions to enable/disable certain features of the IOMMU. */
 376static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 377{
 378        u64 ctrl;
 379
 380        ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 381        ctrl |= (1ULL << bit);
 382        writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 383}
 384
 385static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 386{
 387        u64 ctrl;
 388
 389        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 390        ctrl &= ~(1ULL << bit);
 391        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 392}
 393
 394static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 395{
 396        u64 ctrl;
 397
 398        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 399        ctrl &= ~CTRL_INV_TO_MASK;
 400        ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 401        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 402}
 403
 404/* Function to enable the hardware */
 405static void iommu_enable(struct amd_iommu *iommu)
 406{
 407        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 408}
 409
 410static void iommu_disable(struct amd_iommu *iommu)
 411{
 412        if (!iommu->mmio_base)
 413                return;
 414
 415        /* Disable command buffer */
 416        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 417
 418        /* Disable event logging and event interrupts */
 419        iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 420        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 421
 422        /* Disable IOMMU GA_LOG */
 423        iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 424        iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 425
 426        /* Disable IOMMU hardware itself */
 427        iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 428}
 429
 430/*
 431 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 432 * the system has one.
 433 */
 434static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 435{
 436        if (!request_mem_region(address, end, "amd_iommu")) {
 437                pr_err("Can not reserve memory region %llx-%llx for mmio\n",
 438                        address, end);
 439                pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
 440                return NULL;
 441        }
 442
 443        return (u8 __iomem *)ioremap_nocache(address, end);
 444}
 445
 446static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 447{
 448        if (iommu->mmio_base)
 449                iounmap(iommu->mmio_base);
 450        release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 451}
 452
 453static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 454{
 455        u32 size = 0;
 456
 457        switch (h->type) {
 458        case 0x10:
 459                size = 24;
 460                break;
 461        case 0x11:
 462        case 0x40:
 463                size = 40;
 464                break;
 465        }
 466        return size;
 467}
 468
 469/****************************************************************************
 470 *
 471 * The functions below belong to the first pass of AMD IOMMU ACPI table
 472 * parsing. In this pass we try to find out the highest device id this
 473 * code has to handle. Upon this information the size of the shared data
 474 * structures is determined later.
 475 *
 476 ****************************************************************************/
 477
 478/*
 479 * This function calculates the length of a given IVHD entry
 480 */
 481static inline int ivhd_entry_length(u8 *ivhd)
 482{
 483        u32 type = ((struct ivhd_entry *)ivhd)->type;
 484
 485        if (type < 0x80) {
 486                return 0x04 << (*ivhd >> 6);
 487        } else if (type == IVHD_DEV_ACPI_HID) {
 488                /* For ACPI_HID, offset 21 is uid len */
 489                return *((u8 *)ivhd + 21) + 22;
 490        }
 491        return 0;
 492}
 493
 494/*
 495 * After reading the highest device id from the IOMMU PCI capability header
 496 * this function looks if there is a higher device id defined in the ACPI table
 497 */
 498static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 499{
 500        u8 *p = (void *)h, *end = (void *)h;
 501        struct ivhd_entry *dev;
 502
 503        u32 ivhd_size = get_ivhd_header_size(h);
 504
 505        if (!ivhd_size) {
 506                pr_err("Unsupported IVHD type %#x\n", h->type);
 507                return -EINVAL;
 508        }
 509
 510        p += ivhd_size;
 511        end += h->length;
 512
 513        while (p < end) {
 514                dev = (struct ivhd_entry *)p;
 515                switch (dev->type) {
 516                case IVHD_DEV_ALL:
 517                        /* Use maximum BDF value for DEV_ALL */
 518                        update_last_devid(0xffff);
 519                        break;
 520                case IVHD_DEV_SELECT:
 521                case IVHD_DEV_RANGE_END:
 522                case IVHD_DEV_ALIAS:
 523                case IVHD_DEV_EXT_SELECT:
 524                        /* all the above subfield types refer to device ids */
 525                        update_last_devid(dev->devid);
 526                        break;
 527                default:
 528                        break;
 529                }
 530                p += ivhd_entry_length(p);
 531        }
 532
 533        WARN_ON(p != end);
 534
 535        return 0;
 536}
 537
 538static int __init check_ivrs_checksum(struct acpi_table_header *table)
 539{
 540        int i;
 541        u8 checksum = 0, *p = (u8 *)table;
 542
 543        for (i = 0; i < table->length; ++i)
 544                checksum += p[i];
 545        if (checksum != 0) {
 546                /* ACPI table corrupt */
 547                pr_err(FW_BUG "IVRS invalid checksum\n");
 548                return -ENODEV;
 549        }
 550
 551        return 0;
 552}
 553
 554/*
 555 * Iterate over all IVHD entries in the ACPI table and find the highest device
 556 * id which we need to handle. This is the first of three functions which parse
 557 * the ACPI table. So we check the checksum here.
 558 */
 559static int __init find_last_devid_acpi(struct acpi_table_header *table)
 560{
 561        u8 *p = (u8 *)table, *end = (u8 *)table;
 562        struct ivhd_header *h;
 563
 564        p += IVRS_HEADER_LENGTH;
 565
 566        end += table->length;
 567        while (p < end) {
 568                h = (struct ivhd_header *)p;
 569                if (h->type == amd_iommu_target_ivhd_type) {
 570                        int ret = find_last_devid_from_ivhd(h);
 571
 572                        if (ret)
 573                                return ret;
 574                }
 575                p += h->length;
 576        }
 577        WARN_ON(p != end);
 578
 579        return 0;
 580}
 581
 582/****************************************************************************
 583 *
 584 * The following functions belong to the code path which parses the ACPI table
 585 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 586 * data structures, initialize the device/alias/rlookup table and also
 587 * basically initialize the hardware.
 588 *
 589 ****************************************************************************/
 590
 591/*
 592 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 593 * write commands to that buffer later and the IOMMU will execute them
 594 * asynchronously
 595 */
 596static int __init alloc_command_buffer(struct amd_iommu *iommu)
 597{
 598        iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 599                                                  get_order(CMD_BUFFER_SIZE));
 600
 601        return iommu->cmd_buf ? 0 : -ENOMEM;
 602}
 603
 604/*
 605 * This function resets the command buffer if the IOMMU stopped fetching
 606 * commands from it.
 607 */
 608void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 609{
 610        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 611
 612        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 613        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 614        iommu->cmd_buf_head = 0;
 615        iommu->cmd_buf_tail = 0;
 616
 617        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 618}
 619
 620/*
 621 * This function writes the command buffer address to the hardware and
 622 * enables it.
 623 */
 624static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 625{
 626        u64 entry;
 627
 628        BUG_ON(iommu->cmd_buf == NULL);
 629
 630        entry = iommu_virt_to_phys(iommu->cmd_buf);
 631        entry |= MMIO_CMD_SIZE_512;
 632
 633        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 634                    &entry, sizeof(entry));
 635
 636        amd_iommu_reset_cmd_buffer(iommu);
 637}
 638
 639/*
 640 * This function disables the command buffer
 641 */
 642static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 643{
 644        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 645}
 646
 647static void __init free_command_buffer(struct amd_iommu *iommu)
 648{
 649        free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 650}
 651
 652/* allocates the memory where the IOMMU will log its events to */
 653static int __init alloc_event_buffer(struct amd_iommu *iommu)
 654{
 655        iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 656                                                  get_order(EVT_BUFFER_SIZE));
 657
 658        return iommu->evt_buf ? 0 : -ENOMEM;
 659}
 660
 661static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 662{
 663        u64 entry;
 664
 665        BUG_ON(iommu->evt_buf == NULL);
 666
 667        entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 668
 669        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 670                    &entry, sizeof(entry));
 671
 672        /* set head and tail to zero manually */
 673        writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 674        writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 675
 676        iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 677}
 678
 679/*
 680 * This function disables the event log buffer
 681 */
 682static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 683{
 684        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 685}
 686
 687static void __init free_event_buffer(struct amd_iommu *iommu)
 688{
 689        free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 690}
 691
 692/* allocates the memory where the IOMMU will log its events to */
 693static int __init alloc_ppr_log(struct amd_iommu *iommu)
 694{
 695        iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 696                                                  get_order(PPR_LOG_SIZE));
 697
 698        return iommu->ppr_log ? 0 : -ENOMEM;
 699}
 700
 701static void iommu_enable_ppr_log(struct amd_iommu *iommu)
 702{
 703        u64 entry;
 704
 705        if (iommu->ppr_log == NULL)
 706                return;
 707
 708        entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
 709
 710        memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
 711                    &entry, sizeof(entry));
 712
 713        /* set head and tail to zero manually */
 714        writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 715        writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 716
 717        iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
 718        iommu_feature_enable(iommu, CONTROL_PPR_EN);
 719}
 720
 721static void __init free_ppr_log(struct amd_iommu *iommu)
 722{
 723        if (iommu->ppr_log == NULL)
 724                return;
 725
 726        free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
 727}
 728
 729static void free_ga_log(struct amd_iommu *iommu)
 730{
 731#ifdef CONFIG_IRQ_REMAP
 732        if (iommu->ga_log)
 733                free_pages((unsigned long)iommu->ga_log,
 734                            get_order(GA_LOG_SIZE));
 735        if (iommu->ga_log_tail)
 736                free_pages((unsigned long)iommu->ga_log_tail,
 737                            get_order(8));
 738#endif
 739}
 740
 741static int iommu_ga_log_enable(struct amd_iommu *iommu)
 742{
 743#ifdef CONFIG_IRQ_REMAP
 744        u32 status, i;
 745
 746        if (!iommu->ga_log)
 747                return -EINVAL;
 748
 749        status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 750
 751        /* Check if already running */
 752        if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 753                return 0;
 754
 755        iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 756        iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 757
 758        for (i = 0; i < LOOP_TIMEOUT; ++i) {
 759                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 760                if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 761                        break;
 762        }
 763
 764        if (i >= LOOP_TIMEOUT)
 765                return -EINVAL;
 766#endif /* CONFIG_IRQ_REMAP */
 767        return 0;
 768}
 769
 770#ifdef CONFIG_IRQ_REMAP
 771static int iommu_init_ga_log(struct amd_iommu *iommu)
 772{
 773        u64 entry;
 774
 775        if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 776                return 0;
 777
 778        iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 779                                        get_order(GA_LOG_SIZE));
 780        if (!iommu->ga_log)
 781                goto err_out;
 782
 783        iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 784                                        get_order(8));
 785        if (!iommu->ga_log_tail)
 786                goto err_out;
 787
 788        entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 789        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 790                    &entry, sizeof(entry));
 791        entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
 792                 (BIT_ULL(52)-1)) & ~7ULL;
 793        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 794                    &entry, sizeof(entry));
 795        writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 796        writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 797
 798        return 0;
 799err_out:
 800        free_ga_log(iommu);
 801        return -EINVAL;
 802}
 803#endif /* CONFIG_IRQ_REMAP */
 804
 805static int iommu_init_ga(struct amd_iommu *iommu)
 806{
 807        int ret = 0;
 808
 809#ifdef CONFIG_IRQ_REMAP
 810        /* Note: We have already checked GASup from IVRS table.
 811         *       Now, we need to make sure that GAMSup is set.
 812         */
 813        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
 814            !iommu_feature(iommu, FEATURE_GAM_VAPIC))
 815                amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
 816
 817        ret = iommu_init_ga_log(iommu);
 818#endif /* CONFIG_IRQ_REMAP */
 819
 820        return ret;
 821}
 822
 823static void iommu_enable_xt(struct amd_iommu *iommu)
 824{
 825#ifdef CONFIG_IRQ_REMAP
 826        /*
 827         * XT mode (32-bit APIC destination ID) requires
 828         * GA mode (128-bit IRTE support) as a prerequisite.
 829         */
 830        if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 831            amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 832                iommu_feature_enable(iommu, CONTROL_XT_EN);
 833#endif /* CONFIG_IRQ_REMAP */
 834}
 835
 836static void iommu_enable_gt(struct amd_iommu *iommu)
 837{
 838        if (!iommu_feature(iommu, FEATURE_GT))
 839                return;
 840
 841        iommu_feature_enable(iommu, CONTROL_GT_EN);
 842}
 843
 844/* sets a specific bit in the device table entry. */
 845static void set_dev_entry_bit(u16 devid, u8 bit)
 846{
 847        int i = (bit >> 6) & 0x03;
 848        int _bit = bit & 0x3f;
 849
 850        amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 851}
 852
 853static int get_dev_entry_bit(u16 devid, u8 bit)
 854{
 855        int i = (bit >> 6) & 0x03;
 856        int _bit = bit & 0x3f;
 857
 858        return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 859}
 860
 861
 862static bool copy_device_table(void)
 863{
 864        u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
 865        struct dev_table_entry *old_devtb = NULL;
 866        u32 lo, hi, devid, old_devtb_size;
 867        phys_addr_t old_devtb_phys;
 868        struct amd_iommu *iommu;
 869        u16 dom_id, dte_v, irq_v;
 870        gfp_t gfp_flag;
 871        u64 tmp;
 872
 873        if (!amd_iommu_pre_enabled)
 874                return false;
 875
 876        pr_warn("Translation is already enabled - trying to copy translation structures\n");
 877        for_each_iommu(iommu) {
 878                /* All IOMMUs should use the same device table with the same size */
 879                lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
 880                hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
 881                entry = (((u64) hi) << 32) + lo;
 882                if (last_entry && last_entry != entry) {
 883                        pr_err("IOMMU:%d should use the same dev table as others!\n",
 884                                iommu->index);
 885                        return false;
 886                }
 887                last_entry = entry;
 888
 889                old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
 890                if (old_devtb_size != dev_table_size) {
 891                        pr_err("The device table size of IOMMU:%d is not expected!\n",
 892                                iommu->index);
 893                        return false;
 894                }
 895        }
 896
 897        /*
 898         * When SME is enabled in the first kernel, the entry includes the
 899         * memory encryption mask(sme_me_mask), we must remove the memory
 900         * encryption mask to obtain the true physical address in kdump kernel.
 901         */
 902        old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
 903
 904        if (old_devtb_phys >= 0x100000000ULL) {
 905                pr_err("The address of old device table is above 4G, not trustworthy!\n");
 906                return false;
 907        }
 908        old_devtb = (sme_active() && is_kdump_kernel())
 909                    ? (__force void *)ioremap_encrypted(old_devtb_phys,
 910                                                        dev_table_size)
 911                    : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
 912
 913        if (!old_devtb)
 914                return false;
 915
 916        gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
 917        old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
 918                                get_order(dev_table_size));
 919        if (old_dev_tbl_cpy == NULL) {
 920                pr_err("Failed to allocate memory for copying old device table!\n");
 921                return false;
 922        }
 923
 924        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 925                old_dev_tbl_cpy[devid] = old_devtb[devid];
 926                dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
 927                dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
 928
 929                if (dte_v && dom_id) {
 930                        old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
 931                        old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
 932                        __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 933                        /* If gcr3 table existed, mask it out */
 934                        if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
 935                                tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
 936                                tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
 937                                old_dev_tbl_cpy[devid].data[1] &= ~tmp;
 938                                tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
 939                                tmp |= DTE_FLAG_GV;
 940                                old_dev_tbl_cpy[devid].data[0] &= ~tmp;
 941                        }
 942                }
 943
 944                irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
 945                int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
 946                int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
 947                if (irq_v && (int_ctl || int_tab_len)) {
 948                        if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
 949                            (int_tab_len != DTE_IRQ_TABLE_LEN)) {
 950                                pr_err("Wrong old irq remapping flag: %#x\n", devid);
 951                                return false;
 952                        }
 953
 954                        old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
 955                }
 956        }
 957        memunmap(old_devtb);
 958
 959        return true;
 960}
 961
 962void amd_iommu_apply_erratum_63(u16 devid)
 963{
 964        int sysmgt;
 965
 966        sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
 967                 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
 968
 969        if (sysmgt == 0x01)
 970                set_dev_entry_bit(devid, DEV_ENTRY_IW);
 971}
 972
 973/* Writes the specific IOMMU for a device into the rlookup table */
 974static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
 975{
 976        amd_iommu_rlookup_table[devid] = iommu;
 977}
 978
 979/*
 980 * This function takes the device specific flags read from the ACPI
 981 * table and sets up the device table entry with that information
 982 */
 983static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
 984                                           u16 devid, u32 flags, u32 ext_flags)
 985{
 986        if (flags & ACPI_DEVFLAG_INITPASS)
 987                set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
 988        if (flags & ACPI_DEVFLAG_EXTINT)
 989                set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
 990        if (flags & ACPI_DEVFLAG_NMI)
 991                set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
 992        if (flags & ACPI_DEVFLAG_SYSMGT1)
 993                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
 994        if (flags & ACPI_DEVFLAG_SYSMGT2)
 995                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
 996        if (flags & ACPI_DEVFLAG_LINT0)
 997                set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
 998        if (flags & ACPI_DEVFLAG_LINT1)
 999                set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
1000
1001        amd_iommu_apply_erratum_63(devid);
1002
1003        set_iommu_for_device(iommu, devid);
1004}
1005
1006int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1007{
1008        struct devid_map *entry;
1009        struct list_head *list;
1010
1011        if (type == IVHD_SPECIAL_IOAPIC)
1012                list = &ioapic_map;
1013        else if (type == IVHD_SPECIAL_HPET)
1014                list = &hpet_map;
1015        else
1016                return -EINVAL;
1017
1018        list_for_each_entry(entry, list, list) {
1019                if (!(entry->id == id && entry->cmd_line))
1020                        continue;
1021
1022                pr_info("Command-line override present for %s id %d - ignoring\n",
1023                        type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1024
1025                *devid = entry->devid;
1026
1027                return 0;
1028        }
1029
1030        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1031        if (!entry)
1032                return -ENOMEM;
1033
1034        entry->id       = id;
1035        entry->devid    = *devid;
1036        entry->cmd_line = cmd_line;
1037
1038        list_add_tail(&entry->list, list);
1039
1040        return 0;
1041}
1042
1043static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1044                                      bool cmd_line)
1045{
1046        struct acpihid_map_entry *entry;
1047        struct list_head *list = &acpihid_map;
1048
1049        list_for_each_entry(entry, list, list) {
1050                if (strcmp(entry->hid, hid) ||
1051                    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1052                    !entry->cmd_line)
1053                        continue;
1054
1055                pr_info("Command-line override for hid:%s uid:%s\n",
1056                        hid, uid);
1057                *devid = entry->devid;
1058                return 0;
1059        }
1060
1061        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1062        if (!entry)
1063                return -ENOMEM;
1064
1065        memcpy(entry->uid, uid, strlen(uid));
1066        memcpy(entry->hid, hid, strlen(hid));
1067        entry->devid = *devid;
1068        entry->cmd_line = cmd_line;
1069        entry->root_devid = (entry->devid & (~0x7));
1070
1071        pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1072                entry->cmd_line ? "cmd" : "ivrs",
1073                entry->hid, entry->uid, entry->root_devid);
1074
1075        list_add_tail(&entry->list, list);
1076        return 0;
1077}
1078
1079static int __init add_early_maps(void)
1080{
1081        int i, ret;
1082
1083        for (i = 0; i < early_ioapic_map_size; ++i) {
1084                ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1085                                         early_ioapic_map[i].id,
1086                                         &early_ioapic_map[i].devid,
1087                                         early_ioapic_map[i].cmd_line);
1088                if (ret)
1089                        return ret;
1090        }
1091
1092        for (i = 0; i < early_hpet_map_size; ++i) {
1093                ret = add_special_device(IVHD_SPECIAL_HPET,
1094                                         early_hpet_map[i].id,
1095                                         &early_hpet_map[i].devid,
1096                                         early_hpet_map[i].cmd_line);
1097                if (ret)
1098                        return ret;
1099        }
1100
1101        for (i = 0; i < early_acpihid_map_size; ++i) {
1102                ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1103                                          early_acpihid_map[i].uid,
1104                                          &early_acpihid_map[i].devid,
1105                                          early_acpihid_map[i].cmd_line);
1106                if (ret)
1107                        return ret;
1108        }
1109
1110        return 0;
1111}
1112
1113/*
1114 * Reads the device exclusion range from ACPI and initializes the IOMMU with
1115 * it
1116 */
1117static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
1118{
1119        struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1120
1121        if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
1122                return;
1123
1124        if (iommu) {
1125                /*
1126                 * We only can configure exclusion ranges per IOMMU, not
1127                 * per device. But we can enable the exclusion range per
1128                 * device. This is done here
1129                 */
1130                set_dev_entry_bit(devid, DEV_ENTRY_EX);
1131                iommu->exclusion_start = m->range_start;
1132                iommu->exclusion_length = m->range_length;
1133        }
1134}
1135
1136/*
1137 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1138 * initializes the hardware and our data structures with it.
1139 */
1140static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1141                                        struct ivhd_header *h)
1142{
1143        u8 *p = (u8 *)h;
1144        u8 *end = p, flags = 0;
1145        u16 devid = 0, devid_start = 0, devid_to = 0;
1146        u32 dev_i, ext_flags = 0;
1147        bool alias = false;
1148        struct ivhd_entry *e;
1149        u32 ivhd_size;
1150        int ret;
1151
1152
1153        ret = add_early_maps();
1154        if (ret)
1155                return ret;
1156
1157        amd_iommu_apply_ivrs_quirks();
1158
1159        /*
1160         * First save the recommended feature enable bits from ACPI
1161         */
1162        iommu->acpi_flags = h->flags;
1163
1164        /*
1165         * Done. Now parse the device entries
1166         */
1167        ivhd_size = get_ivhd_header_size(h);
1168        if (!ivhd_size) {
1169                pr_err("Unsupported IVHD type %#x\n", h->type);
1170                return -EINVAL;
1171        }
1172
1173        p += ivhd_size;
1174
1175        end += h->length;
1176
1177
1178        while (p < end) {
1179                e = (struct ivhd_entry *)p;
1180                switch (e->type) {
1181                case IVHD_DEV_ALL:
1182
1183                        DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1184
1185                        for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
1186                                set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1187                        break;
1188                case IVHD_DEV_SELECT:
1189
1190                        DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
1191                                    "flags: %02x\n",
1192                                    PCI_BUS_NUM(e->devid),
1193                                    PCI_SLOT(e->devid),
1194                                    PCI_FUNC(e->devid),
1195                                    e->flags);
1196
1197                        devid = e->devid;
1198                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1199                        break;
1200                case IVHD_DEV_SELECT_RANGE_START:
1201
1202                        DUMP_printk("  DEV_SELECT_RANGE_START\t "
1203                                    "devid: %02x:%02x.%x flags: %02x\n",
1204                                    PCI_BUS_NUM(e->devid),
1205                                    PCI_SLOT(e->devid),
1206                                    PCI_FUNC(e->devid),
1207                                    e->flags);
1208
1209                        devid_start = e->devid;
1210                        flags = e->flags;
1211                        ext_flags = 0;
1212                        alias = false;
1213                        break;
1214                case IVHD_DEV_ALIAS:
1215
1216                        DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
1217                                    "flags: %02x devid_to: %02x:%02x.%x\n",
1218                                    PCI_BUS_NUM(e->devid),
1219                                    PCI_SLOT(e->devid),
1220                                    PCI_FUNC(e->devid),
1221                                    e->flags,
1222                                    PCI_BUS_NUM(e->ext >> 8),
1223                                    PCI_SLOT(e->ext >> 8),
1224                                    PCI_FUNC(e->ext >> 8));
1225
1226                        devid = e->devid;
1227                        devid_to = e->ext >> 8;
1228                        set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1229                        set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1230                        amd_iommu_alias_table[devid] = devid_to;
1231                        break;
1232                case IVHD_DEV_ALIAS_RANGE:
1233
1234                        DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1235                                    "devid: %02x:%02x.%x flags: %02x "
1236                                    "devid_to: %02x:%02x.%x\n",
1237                                    PCI_BUS_NUM(e->devid),
1238                                    PCI_SLOT(e->devid),
1239                                    PCI_FUNC(e->devid),
1240                                    e->flags,
1241                                    PCI_BUS_NUM(e->ext >> 8),
1242                                    PCI_SLOT(e->ext >> 8),
1243                                    PCI_FUNC(e->ext >> 8));
1244
1245                        devid_start = e->devid;
1246                        flags = e->flags;
1247                        devid_to = e->ext >> 8;
1248                        ext_flags = 0;
1249                        alias = true;
1250                        break;
1251                case IVHD_DEV_EXT_SELECT:
1252
1253                        DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
1254                                    "flags: %02x ext: %08x\n",
1255                                    PCI_BUS_NUM(e->devid),
1256                                    PCI_SLOT(e->devid),
1257                                    PCI_FUNC(e->devid),
1258                                    e->flags, e->ext);
1259
1260                        devid = e->devid;
1261                        set_dev_entry_from_acpi(iommu, devid, e->flags,
1262                                                e->ext);
1263                        break;
1264                case IVHD_DEV_EXT_SELECT_RANGE:
1265
1266                        DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1267                                    "%02x:%02x.%x flags: %02x ext: %08x\n",
1268                                    PCI_BUS_NUM(e->devid),
1269                                    PCI_SLOT(e->devid),
1270                                    PCI_FUNC(e->devid),
1271                                    e->flags, e->ext);
1272
1273                        devid_start = e->devid;
1274                        flags = e->flags;
1275                        ext_flags = e->ext;
1276                        alias = false;
1277                        break;
1278                case IVHD_DEV_RANGE_END:
1279
1280                        DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
1281                                    PCI_BUS_NUM(e->devid),
1282                                    PCI_SLOT(e->devid),
1283                                    PCI_FUNC(e->devid));
1284
1285                        devid = e->devid;
1286                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1287                                if (alias) {
1288                                        amd_iommu_alias_table[dev_i] = devid_to;
1289                                        set_dev_entry_from_acpi(iommu,
1290                                                devid_to, flags, ext_flags);
1291                                }
1292                                set_dev_entry_from_acpi(iommu, dev_i,
1293                                                        flags, ext_flags);
1294                        }
1295                        break;
1296                case IVHD_DEV_SPECIAL: {
1297                        u8 handle, type;
1298                        const char *var;
1299                        u16 devid;
1300                        int ret;
1301
1302                        handle = e->ext & 0xff;
1303                        devid  = (e->ext >>  8) & 0xffff;
1304                        type   = (e->ext >> 24) & 0xff;
1305
1306                        if (type == IVHD_SPECIAL_IOAPIC)
1307                                var = "IOAPIC";
1308                        else if (type == IVHD_SPECIAL_HPET)
1309                                var = "HPET";
1310                        else
1311                                var = "UNKNOWN";
1312
1313                        DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
1314                                    var, (int)handle,
1315                                    PCI_BUS_NUM(devid),
1316                                    PCI_SLOT(devid),
1317                                    PCI_FUNC(devid));
1318
1319                        ret = add_special_device(type, handle, &devid, false);
1320                        if (ret)
1321                                return ret;
1322
1323                        /*
1324                         * add_special_device might update the devid in case a
1325                         * command-line override is present. So call
1326                         * set_dev_entry_from_acpi after add_special_device.
1327                         */
1328                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1329
1330                        break;
1331                }
1332                case IVHD_DEV_ACPI_HID: {
1333                        u16 devid;
1334                        u8 hid[ACPIHID_HID_LEN] = {0};
1335                        u8 uid[ACPIHID_UID_LEN] = {0};
1336                        int ret;
1337
1338                        if (h->type != 0x40) {
1339                                pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1340                                       e->type);
1341                                break;
1342                        }
1343
1344                        memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
1345                        hid[ACPIHID_HID_LEN - 1] = '\0';
1346
1347                        if (!(*hid)) {
1348                                pr_err(FW_BUG "Invalid HID.\n");
1349                                break;
1350                        }
1351
1352                        switch (e->uidf) {
1353                        case UID_NOT_PRESENT:
1354
1355                                if (e->uidl != 0)
1356                                        pr_warn(FW_BUG "Invalid UID length.\n");
1357
1358                                break;
1359                        case UID_IS_INTEGER:
1360
1361                                sprintf(uid, "%d", e->uid);
1362
1363                                break;
1364                        case UID_IS_CHARACTER:
1365
1366                                memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
1367                                uid[ACPIHID_UID_LEN - 1] = '\0';
1368
1369                                break;
1370                        default:
1371                                break;
1372                        }
1373
1374                        devid = e->devid;
1375                        DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
1376                                    hid, uid,
1377                                    PCI_BUS_NUM(devid),
1378                                    PCI_SLOT(devid),
1379                                    PCI_FUNC(devid));
1380
1381                        flags = e->flags;
1382
1383                        ret = add_acpi_hid_device(hid, uid, &devid, false);
1384                        if (ret)
1385                                return ret;
1386
1387                        /*
1388                         * add_special_device might update the devid in case a
1389                         * command-line override is present. So call
1390                         * set_dev_entry_from_acpi after add_special_device.
1391                         */
1392                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1393
1394                        break;
1395                }
1396                default:
1397                        break;
1398                }
1399
1400                p += ivhd_entry_length(p);
1401        }
1402
1403        return 0;
1404}
1405
1406static void __init free_iommu_one(struct amd_iommu *iommu)
1407{
1408        free_command_buffer(iommu);
1409        free_event_buffer(iommu);
1410        free_ppr_log(iommu);
1411        free_ga_log(iommu);
1412        iommu_unmap_mmio_space(iommu);
1413}
1414
1415static void __init free_iommu_all(void)
1416{
1417        struct amd_iommu *iommu, *next;
1418
1419        for_each_iommu_safe(iommu, next) {
1420                list_del(&iommu->list);
1421                free_iommu_one(iommu);
1422                kfree(iommu);
1423        }
1424}
1425
1426/*
1427 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1428 * Workaround:
1429 *     BIOS should disable L2B micellaneous clock gating by setting
1430 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1431 */
1432static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1433{
1434        u32 value;
1435
1436        if ((boot_cpu_data.x86 != 0x15) ||
1437            (boot_cpu_data.x86_model < 0x10) ||
1438            (boot_cpu_data.x86_model > 0x1f))
1439                return;
1440
1441        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1442        pci_read_config_dword(iommu->dev, 0xf4, &value);
1443
1444        if (value & BIT(2))
1445                return;
1446
1447        /* Select NB indirect register 0x90 and enable writing */
1448        pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1449
1450        pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1451        pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1452
1453        /* Clear the enable writing bit */
1454        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1455}
1456
1457/*
1458 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1459 * Workaround:
1460 *     BIOS should enable ATS write permission check by setting
1461 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1462 */
1463static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1464{
1465        u32 value;
1466
1467        if ((boot_cpu_data.x86 != 0x15) ||
1468            (boot_cpu_data.x86_model < 0x30) ||
1469            (boot_cpu_data.x86_model > 0x3f))
1470                return;
1471
1472        /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1473        value = iommu_read_l2(iommu, 0x47);
1474
1475        if (value & BIT(0))
1476                return;
1477
1478        /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1479        iommu_write_l2(iommu, 0x47, value | BIT(0));
1480
1481        pci_info(iommu->dev, "Applying ATS write check workaround\n");
1482}
1483
1484/*
1485 * This function clues the initialization function for one IOMMU
1486 * together and also allocates the command buffer and programs the
1487 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1488 */
1489static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1490{
1491        int ret;
1492
1493        raw_spin_lock_init(&iommu->lock);
1494
1495        /* Add IOMMU to internal data structures */
1496        list_add_tail(&iommu->list, &amd_iommu_list);
1497        iommu->index = amd_iommus_present++;
1498
1499        if (unlikely(iommu->index >= MAX_IOMMUS)) {
1500                WARN(1, "System has more IOMMUs than supported by this driver\n");
1501                return -ENOSYS;
1502        }
1503
1504        /* Index is fine - add IOMMU to the array */
1505        amd_iommus[iommu->index] = iommu;
1506
1507        /*
1508         * Copy data from ACPI table entry to the iommu struct
1509         */
1510        iommu->devid   = h->devid;
1511        iommu->cap_ptr = h->cap_ptr;
1512        iommu->pci_seg = h->pci_seg;
1513        iommu->mmio_phys = h->mmio_phys;
1514
1515        switch (h->type) {
1516        case 0x10:
1517                /* Check if IVHD EFR contains proper max banks/counters */
1518                if ((h->efr_attr != 0) &&
1519                    ((h->efr_attr & (0xF << 13)) != 0) &&
1520                    ((h->efr_attr & (0x3F << 17)) != 0))
1521                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1522                else
1523                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1524                if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1525                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1526                if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0))
1527                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1528                break;
1529        case 0x11:
1530        case 0x40:
1531                if (h->efr_reg & (1 << 9))
1532                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1533                else
1534                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1535                if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0))
1536                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1537                if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0))
1538                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1539                break;
1540        default:
1541                return -EINVAL;
1542        }
1543
1544        iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1545                                                iommu->mmio_phys_end);
1546        if (!iommu->mmio_base)
1547                return -ENOMEM;
1548
1549        if (alloc_command_buffer(iommu))
1550                return -ENOMEM;
1551
1552        if (alloc_event_buffer(iommu))
1553                return -ENOMEM;
1554
1555        iommu->int_enabled = false;
1556
1557        init_translation_status(iommu);
1558        if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1559                iommu_disable(iommu);
1560                clear_translation_pre_enabled(iommu);
1561                pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1562                        iommu->index);
1563        }
1564        if (amd_iommu_pre_enabled)
1565                amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1566
1567        ret = init_iommu_from_acpi(iommu, h);
1568        if (ret)
1569                return ret;
1570
1571        ret = amd_iommu_create_irq_domain(iommu);
1572        if (ret)
1573                return ret;
1574
1575        /*
1576         * Make sure IOMMU is not considered to translate itself. The IVRS
1577         * table tells us so, but this is a lie!
1578         */
1579        amd_iommu_rlookup_table[iommu->devid] = NULL;
1580
1581        return 0;
1582}
1583
1584/**
1585 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1586 * @ivrs          Pointer to the IVRS header
1587 *
1588 * This function search through all IVDB of the maximum supported IVHD
1589 */
1590static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1591{
1592        u8 *base = (u8 *)ivrs;
1593        struct ivhd_header *ivhd = (struct ivhd_header *)
1594                                        (base + IVRS_HEADER_LENGTH);
1595        u8 last_type = ivhd->type;
1596        u16 devid = ivhd->devid;
1597
1598        while (((u8 *)ivhd - base < ivrs->length) &&
1599               (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1600                u8 *p = (u8 *) ivhd;
1601
1602                if (ivhd->devid == devid)
1603                        last_type = ivhd->type;
1604                ivhd = (struct ivhd_header *)(p + ivhd->length);
1605        }
1606
1607        return last_type;
1608}
1609
1610/*
1611 * Iterates over all IOMMU entries in the ACPI table, allocates the
1612 * IOMMU structure and initializes it with init_iommu_one()
1613 */
1614static int __init init_iommu_all(struct acpi_table_header *table)
1615{
1616        u8 *p = (u8 *)table, *end = (u8 *)table;
1617        struct ivhd_header *h;
1618        struct amd_iommu *iommu;
1619        int ret;
1620
1621        end += table->length;
1622        p += IVRS_HEADER_LENGTH;
1623
1624        while (p < end) {
1625                h = (struct ivhd_header *)p;
1626                if (*p == amd_iommu_target_ivhd_type) {
1627
1628                        DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1629                                    "seg: %d flags: %01x info %04x\n",
1630                                    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1631                                    PCI_FUNC(h->devid), h->cap_ptr,
1632                                    h->pci_seg, h->flags, h->info);
1633                        DUMP_printk("       mmio-addr: %016llx\n",
1634                                    h->mmio_phys);
1635
1636                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1637                        if (iommu == NULL)
1638                                return -ENOMEM;
1639
1640                        ret = init_iommu_one(iommu, h);
1641                        if (ret)
1642                                return ret;
1643                }
1644                p += h->length;
1645
1646        }
1647        WARN_ON(p != end);
1648
1649        return 0;
1650}
1651
1652static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
1653                                u8 fxn, u64 *value, bool is_write);
1654
1655static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1656{
1657        struct pci_dev *pdev = iommu->dev;
1658        u64 val = 0xabcd, val2 = 0;
1659
1660        if (!iommu_feature(iommu, FEATURE_PC))
1661                return;
1662
1663        amd_iommu_pc_present = true;
1664
1665        /* Check if the performance counters can be written to */
1666        if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
1667            (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
1668            (val != val2)) {
1669                pci_err(pdev, "Unable to write to IOMMU perf counter.\n");
1670                amd_iommu_pc_present = false;
1671                return;
1672        }
1673
1674        pci_info(pdev, "IOMMU performance counters supported\n");
1675
1676        val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1677        iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1678        iommu->max_counters = (u8) ((val >> 7) & 0xf);
1679}
1680
1681static ssize_t amd_iommu_show_cap(struct device *dev,
1682                                  struct device_attribute *attr,
1683                                  char *buf)
1684{
1685        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1686        return sprintf(buf, "%x\n", iommu->cap);
1687}
1688static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1689
1690static ssize_t amd_iommu_show_features(struct device *dev,
1691                                       struct device_attribute *attr,
1692                                       char *buf)
1693{
1694        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1695        return sprintf(buf, "%llx\n", iommu->features);
1696}
1697static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1698
1699static struct attribute *amd_iommu_attrs[] = {
1700        &dev_attr_cap.attr,
1701        &dev_attr_features.attr,
1702        NULL,
1703};
1704
1705static struct attribute_group amd_iommu_group = {
1706        .name = "amd-iommu",
1707        .attrs = amd_iommu_attrs,
1708};
1709
1710static const struct attribute_group *amd_iommu_groups[] = {
1711        &amd_iommu_group,
1712        NULL,
1713};
1714
1715static int __init iommu_init_pci(struct amd_iommu *iommu)
1716{
1717        int cap_ptr = iommu->cap_ptr;
1718        u32 range, misc, low, high;
1719        int ret;
1720
1721        iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
1722                                                 iommu->devid & 0xff);
1723        if (!iommu->dev)
1724                return -ENODEV;
1725
1726        /* Prevent binding other PCI device drivers to IOMMU devices */
1727        iommu->dev->match_driver = false;
1728
1729        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1730                              &iommu->cap);
1731        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
1732                              &range);
1733        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
1734                              &misc);
1735
1736        if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1737                amd_iommu_iotlb_sup = false;
1738
1739        /* read extended feature bits */
1740        low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
1741        high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
1742
1743        iommu->features = ((u64)high << 32) | low;
1744
1745        if (iommu_feature(iommu, FEATURE_GT)) {
1746                int glxval;
1747                u32 max_pasid;
1748                u64 pasmax;
1749
1750                pasmax = iommu->features & FEATURE_PASID_MASK;
1751                pasmax >>= FEATURE_PASID_SHIFT;
1752                max_pasid  = (1 << (pasmax + 1)) - 1;
1753
1754                amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1755
1756                BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1757
1758                glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1759                glxval >>= FEATURE_GLXVAL_SHIFT;
1760
1761                if (amd_iommu_max_glx_val == -1)
1762                        amd_iommu_max_glx_val = glxval;
1763                else
1764                        amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1765        }
1766
1767        if (iommu_feature(iommu, FEATURE_GT) &&
1768            iommu_feature(iommu, FEATURE_PPR)) {
1769                iommu->is_iommu_v2   = true;
1770                amd_iommu_v2_present = true;
1771        }
1772
1773        if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
1774                return -ENOMEM;
1775
1776        ret = iommu_init_ga(iommu);
1777        if (ret)
1778                return ret;
1779
1780        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1781                amd_iommu_np_cache = true;
1782
1783        init_iommu_perf_ctr(iommu);
1784
1785        if (is_rd890_iommu(iommu->dev)) {
1786                int i, j;
1787
1788                iommu->root_pdev =
1789                        pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
1790                                                    PCI_DEVFN(0, 0));
1791
1792                /*
1793                 * Some rd890 systems may not be fully reconfigured by the
1794                 * BIOS, so it's necessary for us to store this information so
1795                 * it can be reprogrammed on resume
1796                 */
1797                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1798                                &iommu->stored_addr_lo);
1799                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1800                                &iommu->stored_addr_hi);
1801
1802                /* Low bit locks writes to configuration space */
1803                iommu->stored_addr_lo &= ~1;
1804
1805                for (i = 0; i < 6; i++)
1806                        for (j = 0; j < 0x12; j++)
1807                                iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1808
1809                for (i = 0; i < 0x83; i++)
1810                        iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1811        }
1812
1813        amd_iommu_erratum_746_workaround(iommu);
1814        amd_iommu_ats_write_check_workaround(iommu);
1815
1816        iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
1817                               amd_iommu_groups, "ivhd%d", iommu->index);
1818        iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
1819        iommu_device_register(&iommu->iommu);
1820
1821        return pci_enable_device(iommu->dev);
1822}
1823
1824static void print_iommu_info(void)
1825{
1826        static const char * const feat_str[] = {
1827                "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1828                "IA", "GA", "HE", "PC"
1829        };
1830        struct amd_iommu *iommu;
1831
1832        for_each_iommu(iommu) {
1833                struct pci_dev *pdev = iommu->dev;
1834                int i;
1835
1836                pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
1837
1838                if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1839                        pci_info(pdev, "Extended features (%#llx):\n",
1840                                 iommu->features);
1841                        for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1842                                if (iommu_feature(iommu, (1ULL << i)))
1843                                        pr_cont(" %s", feat_str[i]);
1844                        }
1845
1846                        if (iommu->features & FEATURE_GAM_VAPIC)
1847                                pr_cont(" GA_vAPIC");
1848
1849                        pr_cont("\n");
1850                }
1851        }
1852        if (irq_remapping_enabled) {
1853                pr_info("Interrupt remapping enabled\n");
1854                if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1855                        pr_info("Virtual APIC enabled\n");
1856                if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1857                        pr_info("X2APIC enabled\n");
1858        }
1859}
1860
1861static int __init amd_iommu_init_pci(void)
1862{
1863        struct amd_iommu *iommu;
1864        int ret = 0;
1865
1866        for_each_iommu(iommu) {
1867                ret = iommu_init_pci(iommu);
1868                if (ret)
1869                        break;
1870        }
1871
1872        /*
1873         * Order is important here to make sure any unity map requirements are
1874         * fulfilled. The unity mappings are created and written to the device
1875         * table during the amd_iommu_init_api() call.
1876         *
1877         * After that we call init_device_table_dma() to make sure any
1878         * uninitialized DTE will block DMA, and in the end we flush the caches
1879         * of all IOMMUs to make sure the changes to the device table are
1880         * active.
1881         */
1882        ret = amd_iommu_init_api();
1883
1884        init_device_table_dma();
1885
1886        for_each_iommu(iommu)
1887                iommu_flush_all_caches(iommu);
1888
1889        if (!ret)
1890                print_iommu_info();
1891
1892        return ret;
1893}
1894
1895/****************************************************************************
1896 *
1897 * The following functions initialize the MSI interrupts for all IOMMUs
1898 * in the system. It's a bit challenging because there could be multiple
1899 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1900 * pci_dev.
1901 *
1902 ****************************************************************************/
1903
1904static int iommu_setup_msi(struct amd_iommu *iommu)
1905{
1906        int r;
1907
1908        r = pci_enable_msi(iommu->dev);
1909        if (r)
1910                return r;
1911
1912        r = request_threaded_irq(iommu->dev->irq,
1913                                 amd_iommu_int_handler,
1914                                 amd_iommu_int_thread,
1915                                 0, "AMD-Vi",
1916                                 iommu);
1917
1918        if (r) {
1919                pci_disable_msi(iommu->dev);
1920                return r;
1921        }
1922
1923        iommu->int_enabled = true;
1924
1925        return 0;
1926}
1927
1928#define XT_INT_DEST_MODE(x)     (((x) & 0x1ULL) << 2)
1929#define XT_INT_DEST_LO(x)       (((x) & 0xFFFFFFULL) << 8)
1930#define XT_INT_VEC(x)           (((x) & 0xFFULL) << 32)
1931#define XT_INT_DEST_HI(x)       ((((x) >> 24) & 0xFFULL) << 56)
1932
1933/**
1934 * Setup the IntCapXT registers with interrupt routing information
1935 * based on the PCI MSI capability block registers, accessed via
1936 * MMIO MSI address low/hi and MSI data registers.
1937 */
1938static void iommu_update_intcapxt(struct amd_iommu *iommu)
1939{
1940        u64 val;
1941        u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
1942        u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
1943        u32 data    = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
1944        bool dm     = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
1945        u32 dest    = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
1946
1947        if (x2apic_enabled())
1948                dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
1949
1950        val = XT_INT_VEC(data & 0xFF) |
1951              XT_INT_DEST_MODE(dm) |
1952              XT_INT_DEST_LO(dest) |
1953              XT_INT_DEST_HI(dest);
1954
1955        /**
1956         * Current IOMMU implemtation uses the same IRQ for all
1957         * 3 IOMMU interrupts.
1958         */
1959        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
1960        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
1961        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
1962}
1963
1964static void _irq_notifier_notify(struct irq_affinity_notify *notify,
1965                                 const cpumask_t *mask)
1966{
1967        struct amd_iommu *iommu;
1968
1969        for_each_iommu(iommu) {
1970                if (iommu->dev->irq == notify->irq) {
1971                        iommu_update_intcapxt(iommu);
1972                        break;
1973                }
1974        }
1975}
1976
1977static void _irq_notifier_release(struct kref *ref)
1978{
1979}
1980
1981static int iommu_init_intcapxt(struct amd_iommu *iommu)
1982{
1983        int ret;
1984        struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
1985
1986        /**
1987         * IntCapXT requires XTSup=1, which can be inferred
1988         * amd_iommu_xt_mode.
1989         */
1990        if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
1991                return 0;
1992
1993        /**
1994         * Also, we need to setup notifier to update the IntCapXT registers
1995         * whenever the irq affinity is changed from user-space.
1996         */
1997        notify->irq = iommu->dev->irq;
1998        notify->notify = _irq_notifier_notify,
1999        notify->release = _irq_notifier_release,
2000        ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
2001        if (ret) {
2002                pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
2003                       iommu->devid, iommu->dev->irq);
2004                return ret;
2005        }
2006
2007        iommu_update_intcapxt(iommu);
2008        iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2009        return ret;
2010}
2011
2012static int iommu_init_msi(struct amd_iommu *iommu)
2013{
2014        int ret;
2015
2016        if (iommu->int_enabled)
2017                goto enable_faults;
2018
2019        if (iommu->dev->msi_cap)
2020                ret = iommu_setup_msi(iommu);
2021        else
2022                ret = -ENODEV;
2023
2024        if (ret)
2025                return ret;
2026
2027enable_faults:
2028        ret = iommu_init_intcapxt(iommu);
2029        if (ret)
2030                return ret;
2031
2032        iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2033
2034        if (iommu->ppr_log != NULL)
2035                iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
2036
2037        iommu_ga_log_enable(iommu);
2038
2039        return 0;
2040}
2041
2042/****************************************************************************
2043 *
2044 * The next functions belong to the third pass of parsing the ACPI
2045 * table. In this last pass the memory mapping requirements are
2046 * gathered (like exclusion and unity mapping ranges).
2047 *
2048 ****************************************************************************/
2049
2050static void __init free_unity_maps(void)
2051{
2052        struct unity_map_entry *entry, *next;
2053
2054        list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
2055                list_del(&entry->list);
2056                kfree(entry);
2057        }
2058}
2059
2060/* called when we find an exclusion range definition in ACPI */
2061static int __init init_exclusion_range(struct ivmd_header *m)
2062{
2063        int i;
2064
2065        switch (m->type) {
2066        case ACPI_IVMD_TYPE:
2067                set_device_exclusion_range(m->devid, m);
2068                break;
2069        case ACPI_IVMD_TYPE_ALL:
2070                for (i = 0; i <= amd_iommu_last_bdf; ++i)
2071                        set_device_exclusion_range(i, m);
2072                break;
2073        case ACPI_IVMD_TYPE_RANGE:
2074                for (i = m->devid; i <= m->aux; ++i)
2075                        set_device_exclusion_range(i, m);
2076                break;
2077        default:
2078                break;
2079        }
2080
2081        return 0;
2082}
2083
2084/* called for unity map ACPI definition */
2085static int __init init_unity_map_range(struct ivmd_header *m)
2086{
2087        struct unity_map_entry *e = NULL;
2088        char *s;
2089
2090        e = kzalloc(sizeof(*e), GFP_KERNEL);
2091        if (e == NULL)
2092                return -ENOMEM;
2093
2094        if (m->flags & IVMD_FLAG_EXCL_RANGE)
2095                init_exclusion_range(m);
2096
2097        switch (m->type) {
2098        default:
2099                kfree(e);
2100                return 0;
2101        case ACPI_IVMD_TYPE:
2102                s = "IVMD_TYPEi\t\t\t";
2103                e->devid_start = e->devid_end = m->devid;
2104                break;
2105        case ACPI_IVMD_TYPE_ALL:
2106                s = "IVMD_TYPE_ALL\t\t";
2107                e->devid_start = 0;
2108                e->devid_end = amd_iommu_last_bdf;
2109                break;
2110        case ACPI_IVMD_TYPE_RANGE:
2111                s = "IVMD_TYPE_RANGE\t\t";
2112                e->devid_start = m->devid;
2113                e->devid_end = m->aux;
2114                break;
2115        }
2116        e->address_start = PAGE_ALIGN(m->range_start);
2117        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2118        e->prot = m->flags >> 1;
2119
2120        DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
2121                    " range_start: %016llx range_end: %016llx flags: %x\n", s,
2122                    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2123                    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
2124                    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2125                    e->address_start, e->address_end, m->flags);
2126
2127        list_add_tail(&e->list, &amd_iommu_unity_map);
2128
2129        return 0;
2130}
2131
2132/* iterates over all memory definitions we find in the ACPI table */
2133static int __init init_memory_definitions(struct acpi_table_header *table)
2134{
2135        u8 *p = (u8 *)table, *end = (u8 *)table;
2136        struct ivmd_header *m;
2137
2138        end += table->length;
2139        p += IVRS_HEADER_LENGTH;
2140
2141        while (p < end) {
2142                m = (struct ivmd_header *)p;
2143                if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2144                        init_unity_map_range(m);
2145
2146                p += m->length;
2147        }
2148
2149        return 0;
2150}
2151
2152/*
2153 * Init the device table to not allow DMA access for devices
2154 */
2155static void init_device_table_dma(void)
2156{
2157        u32 devid;
2158
2159        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2160                set_dev_entry_bit(devid, DEV_ENTRY_VALID);
2161                set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
2162        }
2163}
2164
2165static void __init uninit_device_table_dma(void)
2166{
2167        u32 devid;
2168
2169        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2170                amd_iommu_dev_table[devid].data[0] = 0ULL;
2171                amd_iommu_dev_table[devid].data[1] = 0ULL;
2172        }
2173}
2174
2175static void init_device_table(void)
2176{
2177        u32 devid;
2178
2179        if (!amd_iommu_irq_remap)
2180                return;
2181
2182        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2183                set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
2184}
2185
2186static void iommu_init_flags(struct amd_iommu *iommu)
2187{
2188        iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2189                iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2190                iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2191
2192        iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2193                iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2194                iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2195
2196        iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2197                iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2198                iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2199
2200        iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2201                iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2202                iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2203
2204        /*
2205         * make IOMMU memory accesses cache coherent
2206         */
2207        iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2208
2209        /* Set IOTLB invalidation timeout to 1s */
2210        iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2211}
2212
2213static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2214{
2215        int i, j;
2216        u32 ioc_feature_control;
2217        struct pci_dev *pdev = iommu->root_pdev;
2218
2219        /* RD890 BIOSes may not have completely reconfigured the iommu */
2220        if (!is_rd890_iommu(iommu->dev) || !pdev)
2221                return;
2222
2223        /*
2224         * First, we need to ensure that the iommu is enabled. This is
2225         * controlled by a register in the northbridge
2226         */
2227
2228        /* Select Northbridge indirect register 0x75 and enable writing */
2229        pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2230        pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2231
2232        /* Enable the iommu */
2233        if (!(ioc_feature_control & 0x1))
2234                pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2235
2236        /* Restore the iommu BAR */
2237        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2238                               iommu->stored_addr_lo);
2239        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2240                               iommu->stored_addr_hi);
2241
2242        /* Restore the l1 indirect regs for each of the 6 l1s */
2243        for (i = 0; i < 6; i++)
2244                for (j = 0; j < 0x12; j++)
2245                        iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2246
2247        /* Restore the l2 indirect regs */
2248        for (i = 0; i < 0x83; i++)
2249                iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2250
2251        /* Lock PCI setup registers */
2252        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2253                               iommu->stored_addr_lo | 1);
2254}
2255
2256static void iommu_enable_ga(struct amd_iommu *iommu)
2257{
2258#ifdef CONFIG_IRQ_REMAP
2259        switch (amd_iommu_guest_ir) {
2260        case AMD_IOMMU_GUEST_IR_VAPIC:
2261                iommu_feature_enable(iommu, CONTROL_GAM_EN);
2262                /* Fall through */
2263        case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2264                iommu_feature_enable(iommu, CONTROL_GA_EN);
2265                iommu->irte_ops = &irte_128_ops;
2266                break;
2267        default:
2268                iommu->irte_ops = &irte_32_ops;
2269                break;
2270        }
2271#endif
2272}
2273
2274static void early_enable_iommu(struct amd_iommu *iommu)
2275{
2276        iommu_disable(iommu);
2277        iommu_init_flags(iommu);
2278        iommu_set_device_table(iommu);
2279        iommu_enable_command_buffer(iommu);
2280        iommu_enable_event_buffer(iommu);
2281        iommu_set_exclusion_range(iommu);
2282        iommu_enable_ga(iommu);
2283        iommu_enable_xt(iommu);
2284        iommu_enable(iommu);
2285        iommu_flush_all_caches(iommu);
2286}
2287
2288/*
2289 * This function finally enables all IOMMUs found in the system after
2290 * they have been initialized.
2291 *
2292 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2293 * the old content of device table entries. Not this case or copy failed,
2294 * just continue as normal kernel does.
2295 */
2296static void early_enable_iommus(void)
2297{
2298        struct amd_iommu *iommu;
2299
2300
2301        if (!copy_device_table()) {
2302                /*
2303                 * If come here because of failure in copying device table from old
2304                 * kernel with all IOMMUs enabled, print error message and try to
2305                 * free allocated old_dev_tbl_cpy.
2306                 */
2307                if (amd_iommu_pre_enabled)
2308                        pr_err("Failed to copy DEV table from previous kernel.\n");
2309                if (old_dev_tbl_cpy != NULL)
2310                        free_pages((unsigned long)old_dev_tbl_cpy,
2311                                        get_order(dev_table_size));
2312
2313                for_each_iommu(iommu) {
2314                        clear_translation_pre_enabled(iommu);
2315                        early_enable_iommu(iommu);
2316                }
2317        } else {
2318                pr_info("Copied DEV table from previous kernel.\n");
2319                free_pages((unsigned long)amd_iommu_dev_table,
2320                                get_order(dev_table_size));
2321                amd_iommu_dev_table = old_dev_tbl_cpy;
2322                for_each_iommu(iommu) {
2323                        iommu_disable_command_buffer(iommu);
2324                        iommu_disable_event_buffer(iommu);
2325                        iommu_enable_command_buffer(iommu);
2326                        iommu_enable_event_buffer(iommu);
2327                        iommu_enable_ga(iommu);
2328                        iommu_enable_xt(iommu);
2329                        iommu_set_device_table(iommu);
2330                        iommu_flush_all_caches(iommu);
2331                }
2332        }
2333
2334#ifdef CONFIG_IRQ_REMAP
2335        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2336                amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2337#endif
2338}
2339
2340static void enable_iommus_v2(void)
2341{
2342        struct amd_iommu *iommu;
2343
2344        for_each_iommu(iommu) {
2345                iommu_enable_ppr_log(iommu);
2346                iommu_enable_gt(iommu);
2347        }
2348}
2349
2350static void enable_iommus(void)
2351{
2352        early_enable_iommus();
2353
2354        enable_iommus_v2();
2355}
2356
2357static void disable_iommus(void)
2358{
2359        struct amd_iommu *iommu;
2360
2361        for_each_iommu(iommu)
2362                iommu_disable(iommu);
2363
2364#ifdef CONFIG_IRQ_REMAP
2365        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2366                amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2367#endif
2368}
2369
2370/*
2371 * Suspend/Resume support
2372 * disable suspend until real resume implemented
2373 */
2374
2375static void amd_iommu_resume(void)
2376{
2377        struct amd_iommu *iommu;
2378
2379        for_each_iommu(iommu)
2380                iommu_apply_resume_quirks(iommu);
2381
2382        /* re-load the hardware */
2383        enable_iommus();
2384
2385        amd_iommu_enable_interrupts();
2386}
2387
2388static int amd_iommu_suspend(void)
2389{
2390        /* disable IOMMUs to go out of the way for BIOS */
2391        disable_iommus();
2392
2393        return 0;
2394}
2395
2396static struct syscore_ops amd_iommu_syscore_ops = {
2397        .suspend = amd_iommu_suspend,
2398        .resume = amd_iommu_resume,
2399};
2400
2401static void __init free_iommu_resources(void)
2402{
2403        kmemleak_free(irq_lookup_table);
2404        free_pages((unsigned long)irq_lookup_table,
2405                   get_order(rlookup_table_size));
2406        irq_lookup_table = NULL;
2407
2408        kmem_cache_destroy(amd_iommu_irq_cache);
2409        amd_iommu_irq_cache = NULL;
2410
2411        free_pages((unsigned long)amd_iommu_rlookup_table,
2412                   get_order(rlookup_table_size));
2413        amd_iommu_rlookup_table = NULL;
2414
2415        free_pages((unsigned long)amd_iommu_alias_table,
2416                   get_order(alias_table_size));
2417        amd_iommu_alias_table = NULL;
2418
2419        free_pages((unsigned long)amd_iommu_dev_table,
2420                   get_order(dev_table_size));
2421        amd_iommu_dev_table = NULL;
2422
2423        free_iommu_all();
2424}
2425
2426/* SB IOAPIC is always on this device in AMD systems */
2427#define IOAPIC_SB_DEVID         ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2428
2429static bool __init check_ioapic_information(void)
2430{
2431        const char *fw_bug = FW_BUG;
2432        bool ret, has_sb_ioapic;
2433        int idx;
2434
2435        has_sb_ioapic = false;
2436        ret           = false;
2437
2438        /*
2439         * If we have map overrides on the kernel command line the
2440         * messages in this function might not describe firmware bugs
2441         * anymore - so be careful
2442         */
2443        if (cmdline_maps)
2444                fw_bug = "";
2445
2446        for (idx = 0; idx < nr_ioapics; idx++) {
2447                int devid, id = mpc_ioapic_id(idx);
2448
2449                devid = get_ioapic_devid(id);
2450                if (devid < 0) {
2451                        pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2452                                fw_bug, id);
2453                        ret = false;
2454                } else if (devid == IOAPIC_SB_DEVID) {
2455                        has_sb_ioapic = true;
2456                        ret           = true;
2457                }
2458        }
2459
2460        if (!has_sb_ioapic) {
2461                /*
2462                 * We expect the SB IOAPIC to be listed in the IVRS
2463                 * table. The system timer is connected to the SB IOAPIC
2464                 * and if we don't have it in the list the system will
2465                 * panic at boot time.  This situation usually happens
2466                 * when the BIOS is buggy and provides us the wrong
2467                 * device id for the IOAPIC in the system.
2468                 */
2469                pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2470        }
2471
2472        if (!ret)
2473                pr_err("Disabling interrupt remapping\n");
2474
2475        return ret;
2476}
2477
2478static void __init free_dma_resources(void)
2479{
2480        free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2481                   get_order(MAX_DOMAIN_ID/8));
2482        amd_iommu_pd_alloc_bitmap = NULL;
2483
2484        free_unity_maps();
2485}
2486
2487/*
2488 * This is the hardware init function for AMD IOMMU in the system.
2489 * This function is called either from amd_iommu_init or from the interrupt
2490 * remapping setup code.
2491 *
2492 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2493 * four times:
2494 *
2495 *      1 pass) Discover the most comprehensive IVHD type to use.
2496 *
2497 *      2 pass) Find the highest PCI device id the driver has to handle.
2498 *              Upon this information the size of the data structures is
2499 *              determined that needs to be allocated.
2500 *
2501 *      3 pass) Initialize the data structures just allocated with the
2502 *              information in the ACPI table about available AMD IOMMUs
2503 *              in the system. It also maps the PCI devices in the
2504 *              system to specific IOMMUs
2505 *
2506 *      4 pass) After the basic data structures are allocated and
2507 *              initialized we update them with information about memory
2508 *              remapping requirements parsed out of the ACPI table in
2509 *              this last pass.
2510 *
2511 * After everything is set up the IOMMUs are enabled and the necessary
2512 * hotplug and suspend notifiers are registered.
2513 */
2514static int __init early_amd_iommu_init(void)
2515{
2516        struct acpi_table_header *ivrs_base;
2517        acpi_status status;
2518        int i, remap_cache_sz, ret = 0;
2519
2520        if (!amd_iommu_detected)
2521                return -ENODEV;
2522
2523        status = acpi_get_table("IVRS", 0, &ivrs_base);
2524        if (status == AE_NOT_FOUND)
2525                return -ENODEV;
2526        else if (ACPI_FAILURE(status)) {
2527                const char *err = acpi_format_exception(status);
2528                pr_err("IVRS table error: %s\n", err);
2529                return -EINVAL;
2530        }
2531
2532        /*
2533         * Validate checksum here so we don't need to do it when
2534         * we actually parse the table
2535         */
2536        ret = check_ivrs_checksum(ivrs_base);
2537        if (ret)
2538                goto out;
2539
2540        amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2541        DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
2542
2543        /*
2544         * First parse ACPI tables to find the largest Bus/Dev/Func
2545         * we need to handle. Upon this information the shared data
2546         * structures for the IOMMUs in the system will be allocated
2547         */
2548        ret = find_last_devid_acpi(ivrs_base);
2549        if (ret)
2550                goto out;
2551
2552        dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
2553        alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
2554        rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
2555
2556        /* Device table - directly used by all IOMMUs */
2557        ret = -ENOMEM;
2558        amd_iommu_dev_table = (void *)__get_free_pages(
2559                                      GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
2560                                      get_order(dev_table_size));
2561        if (amd_iommu_dev_table == NULL)
2562                goto out;
2563
2564        /*
2565         * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
2566         * IOMMU see for that device
2567         */
2568        amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
2569                        get_order(alias_table_size));
2570        if (amd_iommu_alias_table == NULL)
2571                goto out;
2572
2573        /* IOMMU rlookup table - find the IOMMU for a specific device */
2574        amd_iommu_rlookup_table = (void *)__get_free_pages(
2575                        GFP_KERNEL | __GFP_ZERO,
2576                        get_order(rlookup_table_size));
2577        if (amd_iommu_rlookup_table == NULL)
2578                goto out;
2579
2580        amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
2581                                            GFP_KERNEL | __GFP_ZERO,
2582                                            get_order(MAX_DOMAIN_ID/8));
2583        if (amd_iommu_pd_alloc_bitmap == NULL)
2584                goto out;
2585
2586        /*
2587         * let all alias entries point to itself
2588         */
2589        for (i = 0; i <= amd_iommu_last_bdf; ++i)
2590                amd_iommu_alias_table[i] = i;
2591
2592        /*
2593         * never allocate domain 0 because its used as the non-allocated and
2594         * error value placeholder
2595         */
2596        __set_bit(0, amd_iommu_pd_alloc_bitmap);
2597
2598        /*
2599         * now the data structures are allocated and basically initialized
2600         * start the real acpi table scan
2601         */
2602        ret = init_iommu_all(ivrs_base);
2603        if (ret)
2604                goto out;
2605
2606        /* Disable any previously enabled IOMMUs */
2607        if (!is_kdump_kernel() || amd_iommu_disabled)
2608                disable_iommus();
2609
2610        if (amd_iommu_irq_remap)
2611                amd_iommu_irq_remap = check_ioapic_information();
2612
2613        if (amd_iommu_irq_remap) {
2614                /*
2615                 * Interrupt remapping enabled, create kmem_cache for the
2616                 * remapping tables.
2617                 */
2618                ret = -ENOMEM;
2619                if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2620                        remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
2621                else
2622                        remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
2623                amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
2624                                                        remap_cache_sz,
2625                                                        IRQ_TABLE_ALIGNMENT,
2626                                                        0, NULL);
2627                if (!amd_iommu_irq_cache)
2628                        goto out;
2629
2630                irq_lookup_table = (void *)__get_free_pages(
2631                                GFP_KERNEL | __GFP_ZERO,
2632                                get_order(rlookup_table_size));
2633                kmemleak_alloc(irq_lookup_table, rlookup_table_size,
2634                               1, GFP_KERNEL);
2635                if (!irq_lookup_table)
2636                        goto out;
2637        }
2638
2639        ret = init_memory_definitions(ivrs_base);
2640        if (ret)
2641                goto out;
2642
2643        /* init the device table */
2644        init_device_table();
2645
2646out:
2647        /* Don't leak any ACPI memory */
2648        acpi_put_table(ivrs_base);
2649        ivrs_base = NULL;
2650
2651        return ret;
2652}
2653
2654static int amd_iommu_enable_interrupts(void)
2655{
2656        struct amd_iommu *iommu;
2657        int ret = 0;
2658
2659        for_each_iommu(iommu) {
2660                ret = iommu_init_msi(iommu);
2661                if (ret)
2662                        goto out;
2663        }
2664
2665out:
2666        return ret;
2667}
2668
2669static bool detect_ivrs(void)
2670{
2671        struct acpi_table_header *ivrs_base;
2672        acpi_status status;
2673
2674        status = acpi_get_table("IVRS", 0, &ivrs_base);
2675        if (status == AE_NOT_FOUND)
2676                return false;
2677        else if (ACPI_FAILURE(status)) {
2678                const char *err = acpi_format_exception(status);
2679                pr_err("IVRS table error: %s\n", err);
2680                return false;
2681        }
2682
2683        acpi_put_table(ivrs_base);
2684
2685        /* Make sure ACS will be enabled during PCI probe */
2686        pci_request_acs();
2687
2688        return true;
2689}
2690
2691/****************************************************************************
2692 *
2693 * AMD IOMMU Initialization State Machine
2694 *
2695 ****************************************************************************/
2696
2697static int __init state_next(void)
2698{
2699        int ret = 0;
2700
2701        switch (init_state) {
2702        case IOMMU_START_STATE:
2703                if (!detect_ivrs()) {
2704                        init_state      = IOMMU_NOT_FOUND;
2705                        ret             = -ENODEV;
2706                } else {
2707                        init_state      = IOMMU_IVRS_DETECTED;
2708                }
2709                break;
2710        case IOMMU_IVRS_DETECTED:
2711                ret = early_amd_iommu_init();
2712                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2713                if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2714                        pr_info("AMD IOMMU disabled on kernel command-line\n");
2715                        init_state = IOMMU_CMDLINE_DISABLED;
2716                        ret = -EINVAL;
2717                }
2718                break;
2719        case IOMMU_ACPI_FINISHED:
2720                early_enable_iommus();
2721                x86_platform.iommu_shutdown = disable_iommus;
2722                init_state = IOMMU_ENABLED;
2723                break;
2724        case IOMMU_ENABLED:
2725                register_syscore_ops(&amd_iommu_syscore_ops);
2726                ret = amd_iommu_init_pci();
2727                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2728                enable_iommus_v2();
2729                break;
2730        case IOMMU_PCI_INIT:
2731                ret = amd_iommu_enable_interrupts();
2732                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2733                break;
2734        case IOMMU_INTERRUPTS_EN:
2735                ret = amd_iommu_init_dma_ops();
2736                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2737                break;
2738        case IOMMU_DMA_OPS:
2739                init_state = IOMMU_INITIALIZED;
2740                break;
2741        case IOMMU_INITIALIZED:
2742                /* Nothing to do */
2743                break;
2744        case IOMMU_NOT_FOUND:
2745        case IOMMU_INIT_ERROR:
2746        case IOMMU_CMDLINE_DISABLED:
2747                /* Error states => do nothing */
2748                ret = -EINVAL;
2749                break;
2750        default:
2751                /* Unknown state */
2752                BUG();
2753        }
2754
2755        if (ret) {
2756                free_dma_resources();
2757                if (!irq_remapping_enabled) {
2758                        disable_iommus();
2759                        free_iommu_resources();
2760                } else {
2761                        struct amd_iommu *iommu;
2762
2763                        uninit_device_table_dma();
2764                        for_each_iommu(iommu)
2765                                iommu_flush_all_caches(iommu);
2766                }
2767        }
2768        return ret;
2769}
2770
2771static int __init iommu_go_to_state(enum iommu_init_state state)
2772{
2773        int ret = -EINVAL;
2774
2775        while (init_state != state) {
2776                if (init_state == IOMMU_NOT_FOUND         ||
2777                    init_state == IOMMU_INIT_ERROR        ||
2778                    init_state == IOMMU_CMDLINE_DISABLED)
2779                        break;
2780                ret = state_next();
2781        }
2782
2783        return ret;
2784}
2785
2786#ifdef CONFIG_IRQ_REMAP
2787int __init amd_iommu_prepare(void)
2788{
2789        int ret;
2790
2791        amd_iommu_irq_remap = true;
2792
2793        ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
2794        if (ret)
2795                return ret;
2796        return amd_iommu_irq_remap ? 0 : -ENODEV;
2797}
2798
2799int __init amd_iommu_enable(void)
2800{
2801        int ret;
2802
2803        ret = iommu_go_to_state(IOMMU_ENABLED);
2804        if (ret)
2805                return ret;
2806
2807        irq_remapping_enabled = 1;
2808        return amd_iommu_xt_mode;
2809}
2810
2811void amd_iommu_disable(void)
2812{
2813        amd_iommu_suspend();
2814}
2815
2816int amd_iommu_reenable(int mode)
2817{
2818        amd_iommu_resume();
2819
2820        return 0;
2821}
2822
2823int __init amd_iommu_enable_faulting(void)
2824{
2825        /* We enable MSI later when PCI is initialized */
2826        return 0;
2827}
2828#endif
2829
2830/*
2831 * This is the core init function for AMD IOMMU hardware in the system.
2832 * This function is called from the generic x86 DMA layer initialization
2833 * code.
2834 */
2835static int __init amd_iommu_init(void)
2836{
2837        struct amd_iommu *iommu;
2838        int ret;
2839
2840        ret = iommu_go_to_state(IOMMU_INITIALIZED);
2841#ifdef CONFIG_GART_IOMMU
2842        if (ret && list_empty(&amd_iommu_list)) {
2843                /*
2844                 * We failed to initialize the AMD IOMMU - try fallback
2845                 * to GART if possible.
2846                 */
2847                gart_iommu_init();
2848        }
2849#endif
2850
2851        for_each_iommu(iommu)
2852                amd_iommu_debugfs_setup(iommu);
2853
2854        return ret;
2855}
2856
2857static bool amd_iommu_sme_check(void)
2858{
2859        if (!sme_active() || (boot_cpu_data.x86 != 0x17))
2860                return true;
2861
2862        /* For Fam17h, a specific level of support is required */
2863        if (boot_cpu_data.microcode >= 0x08001205)
2864                return true;
2865
2866        if ((boot_cpu_data.microcode >= 0x08001126) &&
2867            (boot_cpu_data.microcode <= 0x080011ff))
2868                return true;
2869
2870        pr_notice("IOMMU not currently supported when SME is active\n");
2871
2872        return false;
2873}
2874
2875/****************************************************************************
2876 *
2877 * Early detect code. This code runs at IOMMU detection time in the DMA
2878 * layer. It just looks if there is an IVRS ACPI table to detect AMD
2879 * IOMMUs
2880 *
2881 ****************************************************************************/
2882int __init amd_iommu_detect(void)
2883{
2884        int ret;
2885
2886        if (no_iommu || (iommu_detected && !gart_iommu_aperture))
2887                return -ENODEV;
2888
2889        if (!amd_iommu_sme_check())
2890                return -ENODEV;
2891
2892        ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
2893        if (ret)
2894                return ret;
2895
2896        amd_iommu_detected = true;
2897        iommu_detected = 1;
2898        x86_init.iommu.iommu_init = amd_iommu_init;
2899
2900        return 1;
2901}
2902
2903/****************************************************************************
2904 *
2905 * Parsing functions for the AMD IOMMU specific kernel command line
2906 * options.
2907 *
2908 ****************************************************************************/
2909
2910static int __init parse_amd_iommu_dump(char *str)
2911{
2912        amd_iommu_dump = true;
2913
2914        return 1;
2915}
2916
2917static int __init parse_amd_iommu_intr(char *str)
2918{
2919        for (; *str; ++str) {
2920                if (strncmp(str, "legacy", 6) == 0) {
2921                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
2922                        break;
2923                }
2924                if (strncmp(str, "vapic", 5) == 0) {
2925                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
2926                        break;
2927                }
2928        }
2929        return 1;
2930}
2931
2932static int __init parse_amd_iommu_options(char *str)
2933{
2934        for (; *str; ++str) {
2935                if (strncmp(str, "fullflush", 9) == 0)
2936                        amd_iommu_unmap_flush = true;
2937                if (strncmp(str, "off", 3) == 0)
2938                        amd_iommu_disabled = true;
2939                if (strncmp(str, "force_isolation", 15) == 0)
2940                        amd_iommu_force_isolation = true;
2941        }
2942
2943        return 1;
2944}
2945
2946static int __init parse_ivrs_ioapic(char *str)
2947{
2948        unsigned int bus, dev, fn;
2949        int ret, id, i;
2950        u16 devid;
2951
2952        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2953
2954        if (ret != 4) {
2955                pr_err("Invalid command line: ivrs_ioapic%s\n", str);
2956                return 1;
2957        }
2958
2959        if (early_ioapic_map_size == EARLY_MAP_SIZE) {
2960                pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
2961                        str);
2962                return 1;
2963        }
2964
2965        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2966
2967        cmdline_maps                    = true;
2968        i                               = early_ioapic_map_size++;
2969        early_ioapic_map[i].id          = id;
2970        early_ioapic_map[i].devid       = devid;
2971        early_ioapic_map[i].cmd_line    = true;
2972
2973        return 1;
2974}
2975
2976static int __init parse_ivrs_hpet(char *str)
2977{
2978        unsigned int bus, dev, fn;
2979        int ret, id, i;
2980        u16 devid;
2981
2982        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2983
2984        if (ret != 4) {
2985                pr_err("Invalid command line: ivrs_hpet%s\n", str);
2986                return 1;
2987        }
2988
2989        if (early_hpet_map_size == EARLY_MAP_SIZE) {
2990                pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
2991                        str);
2992                return 1;
2993        }
2994
2995        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2996
2997        cmdline_maps                    = true;
2998        i                               = early_hpet_map_size++;
2999        early_hpet_map[i].id            = id;
3000        early_hpet_map[i].devid         = devid;
3001        early_hpet_map[i].cmd_line      = true;
3002
3003        return 1;
3004}
3005
3006static int __init parse_ivrs_acpihid(char *str)
3007{
3008        u32 bus, dev, fn;
3009        char *hid, *uid, *p;
3010        char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3011        int ret, i;
3012
3013        ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
3014        if (ret != 4) {
3015                pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
3016                return 1;
3017        }
3018
3019        p = acpiid;
3020        hid = strsep(&p, ":");
3021        uid = p;
3022
3023        if (!hid || !(*hid) || !uid) {
3024                pr_err("Invalid command line: hid or uid\n");
3025                return 1;
3026        }
3027
3028        i = early_acpihid_map_size++;
3029        memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3030        memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3031        early_acpihid_map[i].devid =
3032                ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3033        early_acpihid_map[i].cmd_line   = true;
3034
3035        return 1;
3036}
3037
3038__setup("amd_iommu_dump",       parse_amd_iommu_dump);
3039__setup("amd_iommu=",           parse_amd_iommu_options);
3040__setup("amd_iommu_intr=",      parse_amd_iommu_intr);
3041__setup("ivrs_ioapic",          parse_ivrs_ioapic);
3042__setup("ivrs_hpet",            parse_ivrs_hpet);
3043__setup("ivrs_acpihid",         parse_ivrs_acpihid);
3044
3045IOMMU_INIT_FINISH(amd_iommu_detect,
3046                  gart_iommu_hole_init,
3047                  NULL,
3048                  NULL);
3049
3050bool amd_iommu_v2_supported(void)
3051{
3052        return amd_iommu_v2_present;
3053}
3054EXPORT_SYMBOL(amd_iommu_v2_supported);
3055
3056struct amd_iommu *get_amd_iommu(unsigned int idx)
3057{
3058        unsigned int i = 0;
3059        struct amd_iommu *iommu;
3060
3061        for_each_iommu(iommu)
3062                if (i++ == idx)
3063                        return iommu;
3064        return NULL;
3065}
3066EXPORT_SYMBOL(get_amd_iommu);
3067
3068/****************************************************************************
3069 *
3070 * IOMMU EFR Performance Counter support functionality. This code allows
3071 * access to the IOMMU PC functionality.
3072 *
3073 ****************************************************************************/
3074
3075u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3076{
3077        struct amd_iommu *iommu = get_amd_iommu(idx);
3078
3079        if (iommu)
3080                return iommu->max_banks;
3081
3082        return 0;
3083}
3084EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3085
3086bool amd_iommu_pc_supported(void)
3087{
3088        return amd_iommu_pc_present;
3089}
3090EXPORT_SYMBOL(amd_iommu_pc_supported);
3091
3092u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3093{
3094        struct amd_iommu *iommu = get_amd_iommu(idx);
3095
3096        if (iommu)
3097                return iommu->max_counters;
3098
3099        return 0;
3100}
3101EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3102
3103static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3104                                u8 fxn, u64 *value, bool is_write)
3105{
3106        u32 offset;
3107        u32 max_offset_lim;
3108
3109        /* Make sure the IOMMU PC resource is available */
3110        if (!amd_iommu_pc_present)
3111                return -ENODEV;
3112
3113        /* Check for valid iommu and pc register indexing */
3114        if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3115                return -ENODEV;
3116
3117        offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3118
3119        /* Limit the offset to the hw defined mmio region aperture */
3120        max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3121                                (iommu->max_counters << 8) | 0x28);
3122        if ((offset < MMIO_CNTR_REG_OFFSET) ||
3123            (offset > max_offset_lim))
3124                return -EINVAL;
3125
3126        if (is_write) {
3127                u64 val = *value & GENMASK_ULL(47, 0);
3128
3129                writel((u32)val, iommu->mmio_base + offset);
3130                writel((val >> 32), iommu->mmio_base + offset + 4);
3131        } else {
3132                *value = readl(iommu->mmio_base + offset + 4);
3133                *value <<= 32;
3134                *value |= readl(iommu->mmio_base + offset);
3135                *value &= GENMASK_ULL(47, 0);
3136        }
3137
3138        return 0;
3139}
3140
3141int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3142{
3143        if (!iommu)
3144                return -EINVAL;
3145
3146        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3147}
3148EXPORT_SYMBOL(amd_iommu_pc_get_reg);
3149
3150int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3151{
3152        if (!iommu)
3153                return -EINVAL;
3154
3155        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3156}
3157EXPORT_SYMBOL(amd_iommu_pc_set_reg);
3158