linux/drivers/iommu/amd_iommu_init.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/pci.h>
  12#include <linux/acpi.h>
  13#include <linux/list.h>
  14#include <linux/bitmap.h>
  15#include <linux/slab.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/interrupt.h>
  18#include <linux/msi.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/export.h>
  21#include <linux/iommu.h>
  22#include <linux/kmemleak.h>
  23#include <linux/mem_encrypt.h>
  24#include <asm/pci-direct.h>
  25#include <asm/iommu.h>
  26#include <asm/apic.h>
  27#include <asm/msidef.h>
  28#include <asm/gart.h>
  29#include <asm/x86_init.h>
  30#include <asm/iommu_table.h>
  31#include <asm/io_apic.h>
  32#include <asm/irq_remapping.h>
  33
  34#include <linux/crash_dump.h>
  35#include "amd_iommu.h"
  36#include "amd_iommu_proto.h"
  37#include "amd_iommu_types.h"
  38#include "irq_remapping.h"
  39
  40/*
  41 * definitions for the ACPI scanning code
  42 */
  43#define IVRS_HEADER_LENGTH 48
  44
  45#define ACPI_IVHD_TYPE_MAX_SUPPORTED    0x40
  46#define ACPI_IVMD_TYPE_ALL              0x20
  47#define ACPI_IVMD_TYPE                  0x21
  48#define ACPI_IVMD_TYPE_RANGE            0x22
  49
  50#define IVHD_DEV_ALL                    0x01
  51#define IVHD_DEV_SELECT                 0x02
  52#define IVHD_DEV_SELECT_RANGE_START     0x03
  53#define IVHD_DEV_RANGE_END              0x04
  54#define IVHD_DEV_ALIAS                  0x42
  55#define IVHD_DEV_ALIAS_RANGE            0x43
  56#define IVHD_DEV_EXT_SELECT             0x46
  57#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  58#define IVHD_DEV_SPECIAL                0x48
  59#define IVHD_DEV_ACPI_HID               0xf0
  60
  61#define UID_NOT_PRESENT                 0
  62#define UID_IS_INTEGER                  1
  63#define UID_IS_CHARACTER                2
  64
  65#define IVHD_SPECIAL_IOAPIC             1
  66#define IVHD_SPECIAL_HPET               2
  67
  68#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  69#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  70#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  71#define IVHD_FLAG_ISOC_EN_MASK          0x08
  72
  73#define IVMD_FLAG_EXCL_RANGE            0x08
  74#define IVMD_FLAG_UNITY_MAP             0x01
  75
  76#define ACPI_DEVFLAG_INITPASS           0x01
  77#define ACPI_DEVFLAG_EXTINT             0x02
  78#define ACPI_DEVFLAG_NMI                0x04
  79#define ACPI_DEVFLAG_SYSMGT1            0x10
  80#define ACPI_DEVFLAG_SYSMGT2            0x20
  81#define ACPI_DEVFLAG_LINT0              0x40
  82#define ACPI_DEVFLAG_LINT1              0x80
  83#define ACPI_DEVFLAG_ATSDIS             0x10000000
  84
  85#define LOOP_TIMEOUT    100000
  86/*
  87 * ACPI table definitions
  88 *
  89 * These data structures are laid over the table to parse the important values
  90 * out of it.
  91 */
  92
  93extern const struct iommu_ops amd_iommu_ops;
  94
  95/*
  96 * structure describing one IOMMU in the ACPI table. Typically followed by one
  97 * or more ivhd_entrys.
  98 */
  99struct ivhd_header {
 100        u8 type;
 101        u8 flags;
 102        u16 length;
 103        u16 devid;
 104        u16 cap_ptr;
 105        u64 mmio_phys;
 106        u16 pci_seg;
 107        u16 info;
 108        u32 efr_attr;
 109
 110        /* Following only valid on IVHD type 11h and 40h */
 111        u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 112        u64 res;
 113} __attribute__((packed));
 114
 115/*
 116 * A device entry describing which devices a specific IOMMU translates and
 117 * which requestor ids they use.
 118 */
 119struct ivhd_entry {
 120        u8 type;
 121        u16 devid;
 122        u8 flags;
 123        u32 ext;
 124        u32 hidh;
 125        u64 cid;
 126        u8 uidf;
 127        u8 uidl;
 128        u8 uid;
 129} __attribute__((packed));
 130
 131/*
 132 * An AMD IOMMU memory definition structure. It defines things like exclusion
 133 * ranges for devices and regions that should be unity mapped.
 134 */
 135struct ivmd_header {
 136        u8 type;
 137        u8 flags;
 138        u16 length;
 139        u16 devid;
 140        u16 aux;
 141        u64 resv;
 142        u64 range_start;
 143        u64 range_length;
 144} __attribute__((packed));
 145
 146bool amd_iommu_dump;
 147bool amd_iommu_irq_remap __read_mostly;
 148
 149int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 150static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
 151
 152static bool amd_iommu_detected;
 153static bool __initdata amd_iommu_disabled;
 154static int amd_iommu_target_ivhd_type;
 155
 156u16 amd_iommu_last_bdf;                 /* largest PCI device id we have
 157                                           to handle */
 158LIST_HEAD(amd_iommu_unity_map);         /* a list of required unity mappings
 159                                           we find in ACPI */
 160bool amd_iommu_unmap_flush;             /* if true, flush on every unmap */
 161
 162LIST_HEAD(amd_iommu_list);              /* list of all AMD IOMMUs in the
 163                                           system */
 164
 165/* Array to assign indices to IOMMUs*/
 166struct amd_iommu *amd_iommus[MAX_IOMMUS];
 167
 168/* Number of IOMMUs present in the system */
 169static int amd_iommus_present;
 170
 171/* IOMMUs have a non-present cache? */
 172bool amd_iommu_np_cache __read_mostly;
 173bool amd_iommu_iotlb_sup __read_mostly = true;
 174
 175u32 amd_iommu_max_pasid __read_mostly = ~0;
 176
 177bool amd_iommu_v2_present __read_mostly;
 178static bool amd_iommu_pc_present __read_mostly;
 179
 180bool amd_iommu_force_isolation __read_mostly;
 181
 182/*
 183 * Pointer to the device table which is shared by all AMD IOMMUs
 184 * it is indexed by the PCI device id or the HT unit id and contains
 185 * information about the domain the device belongs to as well as the
 186 * page table root pointer.
 187 */
 188struct dev_table_entry *amd_iommu_dev_table;
 189/*
 190 * Pointer to a device table which the content of old device table
 191 * will be copied to. It's only be used in kdump kernel.
 192 */
 193static struct dev_table_entry *old_dev_tbl_cpy;
 194
 195/*
 196 * The alias table is a driver specific data structure which contains the
 197 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
 198 * More than one device can share the same requestor id.
 199 */
 200u16 *amd_iommu_alias_table;
 201
 202/*
 203 * The rlookup table is used to find the IOMMU which is responsible
 204 * for a specific device. It is also indexed by the PCI device id.
 205 */
 206struct amd_iommu **amd_iommu_rlookup_table;
 207EXPORT_SYMBOL(amd_iommu_rlookup_table);
 208
 209/*
 210 * This table is used to find the irq remapping table for a given device id
 211 * quickly.
 212 */
 213struct irq_remap_table **irq_lookup_table;
 214
 215/*
 216 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
 217 * to know which ones are already in use.
 218 */
 219unsigned long *amd_iommu_pd_alloc_bitmap;
 220
 221static u32 dev_table_size;      /* size of the device table */
 222static u32 alias_table_size;    /* size of the alias table */
 223static u32 rlookup_table_size;  /* size if the rlookup table */
 224
 225enum iommu_init_state {
 226        IOMMU_START_STATE,
 227        IOMMU_IVRS_DETECTED,
 228        IOMMU_ACPI_FINISHED,
 229        IOMMU_ENABLED,
 230        IOMMU_PCI_INIT,
 231        IOMMU_INTERRUPTS_EN,
 232        IOMMU_DMA_OPS,
 233        IOMMU_INITIALIZED,
 234        IOMMU_NOT_FOUND,
 235        IOMMU_INIT_ERROR,
 236        IOMMU_CMDLINE_DISABLED,
 237};
 238
 239/* Early ioapic and hpet maps from kernel command line */
 240#define EARLY_MAP_SIZE          4
 241static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 242static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 243static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 244
 245static int __initdata early_ioapic_map_size;
 246static int __initdata early_hpet_map_size;
 247static int __initdata early_acpihid_map_size;
 248
 249static bool __initdata cmdline_maps;
 250
 251static enum iommu_init_state init_state = IOMMU_START_STATE;
 252
 253static int amd_iommu_enable_interrupts(void);
 254static int __init iommu_go_to_state(enum iommu_init_state state);
 255static void init_device_table_dma(void);
 256
 257static bool amd_iommu_pre_enabled = true;
 258
 259bool translation_pre_enabled(struct amd_iommu *iommu)
 260{
 261        return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 262}
 263EXPORT_SYMBOL(translation_pre_enabled);
 264
 265static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 266{
 267        iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 268}
 269
 270static void init_translation_status(struct amd_iommu *iommu)
 271{
 272        u64 ctrl;
 273
 274        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 275        if (ctrl & (1<<CONTROL_IOMMU_EN))
 276                iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 277}
 278
 279static inline void update_last_devid(u16 devid)
 280{
 281        if (devid > amd_iommu_last_bdf)
 282                amd_iommu_last_bdf = devid;
 283}
 284
 285static inline unsigned long tbl_size(int entry_size)
 286{
 287        unsigned shift = PAGE_SHIFT +
 288                         get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 289
 290        return 1UL << shift;
 291}
 292
 293int amd_iommu_get_num_iommus(void)
 294{
 295        return amd_iommus_present;
 296}
 297
 298/* Access to l1 and l2 indexed register spaces */
 299
 300static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 301{
 302        u32 val;
 303
 304        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 305        pci_read_config_dword(iommu->dev, 0xfc, &val);
 306        return val;
 307}
 308
 309static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 310{
 311        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 312        pci_write_config_dword(iommu->dev, 0xfc, val);
 313        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 314}
 315
 316static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 317{
 318        u32 val;
 319
 320        pci_write_config_dword(iommu->dev, 0xf0, address);
 321        pci_read_config_dword(iommu->dev, 0xf4, &val);
 322        return val;
 323}
 324
 325static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 326{
 327        pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 328        pci_write_config_dword(iommu->dev, 0xf4, val);
 329}
 330
 331/****************************************************************************
 332 *
 333 * AMD IOMMU MMIO register space handling functions
 334 *
 335 * These functions are used to program the IOMMU device registers in
 336 * MMIO space required for that driver.
 337 *
 338 ****************************************************************************/
 339
 340/*
 341 * This function set the exclusion range in the IOMMU. DMA accesses to the
 342 * exclusion range are passed through untranslated
 343 */
 344static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 345{
 346        u64 start = iommu->exclusion_start & PAGE_MASK;
 347        u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 348        u64 entry;
 349
 350        if (!iommu->exclusion_start)
 351                return;
 352
 353        entry = start | MMIO_EXCL_ENABLE_MASK;
 354        memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 355                        &entry, sizeof(entry));
 356
 357        entry = limit;
 358        memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 359                        &entry, sizeof(entry));
 360}
 361
 362/* Programs the physical address of the device table into the IOMMU hardware */
 363static void iommu_set_device_table(struct amd_iommu *iommu)
 364{
 365        u64 entry;
 366
 367        BUG_ON(iommu->mmio_base == NULL);
 368
 369        entry = iommu_virt_to_phys(amd_iommu_dev_table);
 370        entry |= (dev_table_size >> 12) - 1;
 371        memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 372                        &entry, sizeof(entry));
 373}
 374
 375/* Generic functions to enable/disable certain features of the IOMMU. */
 376static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 377{
 378        u64 ctrl;
 379
 380        ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 381        ctrl |= (1ULL << bit);
 382        writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 383}
 384
 385static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 386{
 387        u64 ctrl;
 388
 389        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 390        ctrl &= ~(1ULL << bit);
 391        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 392}
 393
 394static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 395{
 396        u64 ctrl;
 397
 398        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 399        ctrl &= ~CTRL_INV_TO_MASK;
 400        ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 401        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 402}
 403
 404/* Function to enable the hardware */
 405static void iommu_enable(struct amd_iommu *iommu)
 406{
 407        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 408}
 409
 410static void iommu_disable(struct amd_iommu *iommu)
 411{
 412        if (!iommu->mmio_base)
 413                return;
 414
 415        /* Disable command buffer */
 416        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 417
 418        /* Disable event logging and event interrupts */
 419        iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 420        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 421
 422        /* Disable IOMMU GA_LOG */
 423        iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 424        iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 425
 426        /* Disable IOMMU hardware itself */
 427        iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 428}
 429
 430/*
 431 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 432 * the system has one.
 433 */
 434static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 435{
 436        if (!request_mem_region(address, end, "amd_iommu")) {
 437                pr_err("Can not reserve memory region %llx-%llx for mmio\n",
 438                        address, end);
 439                pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
 440                return NULL;
 441        }
 442
 443        return (u8 __iomem *)ioremap_nocache(address, end);
 444}
 445
 446static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 447{
 448        if (iommu->mmio_base)
 449                iounmap(iommu->mmio_base);
 450        release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 451}
 452
 453static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 454{
 455        u32 size = 0;
 456
 457        switch (h->type) {
 458        case 0x10:
 459                size = 24;
 460                break;
 461        case 0x11:
 462        case 0x40:
 463                size = 40;
 464                break;
 465        }
 466        return size;
 467}
 468
 469/****************************************************************************
 470 *
 471 * The functions below belong to the first pass of AMD IOMMU ACPI table
 472 * parsing. In this pass we try to find out the highest device id this
 473 * code has to handle. Upon this information the size of the shared data
 474 * structures is determined later.
 475 *
 476 ****************************************************************************/
 477
 478/*
 479 * This function calculates the length of a given IVHD entry
 480 */
 481static inline int ivhd_entry_length(u8 *ivhd)
 482{
 483        u32 type = ((struct ivhd_entry *)ivhd)->type;
 484
 485        if (type < 0x80) {
 486                return 0x04 << (*ivhd >> 6);
 487        } else if (type == IVHD_DEV_ACPI_HID) {
 488                /* For ACPI_HID, offset 21 is uid len */
 489                return *((u8 *)ivhd + 21) + 22;
 490        }
 491        return 0;
 492}
 493
 494/*
 495 * After reading the highest device id from the IOMMU PCI capability header
 496 * this function looks if there is a higher device id defined in the ACPI table
 497 */
 498static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 499{
 500        u8 *p = (void *)h, *end = (void *)h;
 501        struct ivhd_entry *dev;
 502
 503        u32 ivhd_size = get_ivhd_header_size(h);
 504
 505        if (!ivhd_size) {
 506                pr_err("Unsupported IVHD type %#x\n", h->type);
 507                return -EINVAL;
 508        }
 509
 510        p += ivhd_size;
 511        end += h->length;
 512
 513        while (p < end) {
 514                dev = (struct ivhd_entry *)p;
 515                switch (dev->type) {
 516                case IVHD_DEV_ALL:
 517                        /* Use maximum BDF value for DEV_ALL */
 518                        update_last_devid(0xffff);
 519                        break;
 520                case IVHD_DEV_SELECT:
 521                case IVHD_DEV_RANGE_END:
 522                case IVHD_DEV_ALIAS:
 523                case IVHD_DEV_EXT_SELECT:
 524                        /* all the above subfield types refer to device ids */
 525                        update_last_devid(dev->devid);
 526                        break;
 527                default:
 528                        break;
 529                }
 530                p += ivhd_entry_length(p);
 531        }
 532
 533        WARN_ON(p != end);
 534
 535        return 0;
 536}
 537
 538static int __init check_ivrs_checksum(struct acpi_table_header *table)
 539{
 540        int i;
 541        u8 checksum = 0, *p = (u8 *)table;
 542
 543        for (i = 0; i < table->length; ++i)
 544                checksum += p[i];
 545        if (checksum != 0) {
 546                /* ACPI table corrupt */
 547                pr_err(FW_BUG "IVRS invalid checksum\n");
 548                return -ENODEV;
 549        }
 550
 551        return 0;
 552}
 553
 554/*
 555 * Iterate over all IVHD entries in the ACPI table and find the highest device
 556 * id which we need to handle. This is the first of three functions which parse
 557 * the ACPI table. So we check the checksum here.
 558 */
 559static int __init find_last_devid_acpi(struct acpi_table_header *table)
 560{
 561        u8 *p = (u8 *)table, *end = (u8 *)table;
 562        struct ivhd_header *h;
 563
 564        p += IVRS_HEADER_LENGTH;
 565
 566        end += table->length;
 567        while (p < end) {
 568                h = (struct ivhd_header *)p;
 569                if (h->type == amd_iommu_target_ivhd_type) {
 570                        int ret = find_last_devid_from_ivhd(h);
 571
 572                        if (ret)
 573                                return ret;
 574                }
 575                p += h->length;
 576        }
 577        WARN_ON(p != end);
 578
 579        return 0;
 580}
 581
 582/****************************************************************************
 583 *
 584 * The following functions belong to the code path which parses the ACPI table
 585 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 586 * data structures, initialize the device/alias/rlookup table and also
 587 * basically initialize the hardware.
 588 *
 589 ****************************************************************************/
 590
 591/*
 592 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 593 * write commands to that buffer later and the IOMMU will execute them
 594 * asynchronously
 595 */
 596static int __init alloc_command_buffer(struct amd_iommu *iommu)
 597{
 598        iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 599                                                  get_order(CMD_BUFFER_SIZE));
 600
 601        return iommu->cmd_buf ? 0 : -ENOMEM;
 602}
 603
 604/*
 605 * This function resets the command buffer if the IOMMU stopped fetching
 606 * commands from it.
 607 */
 608void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 609{
 610        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 611
 612        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 613        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 614        iommu->cmd_buf_head = 0;
 615        iommu->cmd_buf_tail = 0;
 616
 617        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 618}
 619
 620/*
 621 * This function writes the command buffer address to the hardware and
 622 * enables it.
 623 */
 624static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 625{
 626        u64 entry;
 627
 628        BUG_ON(iommu->cmd_buf == NULL);
 629
 630        entry = iommu_virt_to_phys(iommu->cmd_buf);
 631        entry |= MMIO_CMD_SIZE_512;
 632
 633        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 634                    &entry, sizeof(entry));
 635
 636        amd_iommu_reset_cmd_buffer(iommu);
 637}
 638
 639/*
 640 * This function disables the command buffer
 641 */
 642static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 643{
 644        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 645}
 646
 647static void __init free_command_buffer(struct amd_iommu *iommu)
 648{
 649        free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 650}
 651
 652/* allocates the memory where the IOMMU will log its events to */
 653static int __init alloc_event_buffer(struct amd_iommu *iommu)
 654{
 655        iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 656                                                  get_order(EVT_BUFFER_SIZE));
 657
 658        return iommu->evt_buf ? 0 : -ENOMEM;
 659}
 660
 661static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 662{
 663        u64 entry;
 664
 665        BUG_ON(iommu->evt_buf == NULL);
 666
 667        entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 668
 669        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 670                    &entry, sizeof(entry));
 671
 672        /* set head and tail to zero manually */
 673        writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 674        writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 675
 676        iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 677}
 678
 679/*
 680 * This function disables the event log buffer
 681 */
 682static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 683{
 684        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 685}
 686
 687static void __init free_event_buffer(struct amd_iommu *iommu)
 688{
 689        free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 690}
 691
 692/* allocates the memory where the IOMMU will log its events to */
 693static int __init alloc_ppr_log(struct amd_iommu *iommu)
 694{
 695        iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 696                                                  get_order(PPR_LOG_SIZE));
 697
 698        return iommu->ppr_log ? 0 : -ENOMEM;
 699}
 700
 701static void iommu_enable_ppr_log(struct amd_iommu *iommu)
 702{
 703        u64 entry;
 704
 705        if (iommu->ppr_log == NULL)
 706                return;
 707
 708        entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
 709
 710        memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
 711                    &entry, sizeof(entry));
 712
 713        /* set head and tail to zero manually */
 714        writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 715        writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 716
 717        iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
 718        iommu_feature_enable(iommu, CONTROL_PPR_EN);
 719}
 720
 721static void __init free_ppr_log(struct amd_iommu *iommu)
 722{
 723        if (iommu->ppr_log == NULL)
 724                return;
 725
 726        free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
 727}
 728
 729static void free_ga_log(struct amd_iommu *iommu)
 730{
 731#ifdef CONFIG_IRQ_REMAP
 732        if (iommu->ga_log)
 733                free_pages((unsigned long)iommu->ga_log,
 734                            get_order(GA_LOG_SIZE));
 735        if (iommu->ga_log_tail)
 736                free_pages((unsigned long)iommu->ga_log_tail,
 737                            get_order(8));
 738#endif
 739}
 740
 741static int iommu_ga_log_enable(struct amd_iommu *iommu)
 742{
 743#ifdef CONFIG_IRQ_REMAP
 744        u32 status, i;
 745
 746        if (!iommu->ga_log)
 747                return -EINVAL;
 748
 749        status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 750
 751        /* Check if already running */
 752        if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 753                return 0;
 754
 755        iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 756        iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 757
 758        for (i = 0; i < LOOP_TIMEOUT; ++i) {
 759                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 760                if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 761                        break;
 762        }
 763
 764        if (i >= LOOP_TIMEOUT)
 765                return -EINVAL;
 766#endif /* CONFIG_IRQ_REMAP */
 767        return 0;
 768}
 769
 770#ifdef CONFIG_IRQ_REMAP
 771static int iommu_init_ga_log(struct amd_iommu *iommu)
 772{
 773        u64 entry;
 774
 775        if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 776                return 0;
 777
 778        iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 779                                        get_order(GA_LOG_SIZE));
 780        if (!iommu->ga_log)
 781                goto err_out;
 782
 783        iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 784                                        get_order(8));
 785        if (!iommu->ga_log_tail)
 786                goto err_out;
 787
 788        entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 789        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 790                    &entry, sizeof(entry));
 791        entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
 792                 (BIT_ULL(52)-1)) & ~7ULL;
 793        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 794                    &entry, sizeof(entry));
 795        writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 796        writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 797
 798        return 0;
 799err_out:
 800        free_ga_log(iommu);
 801        return -EINVAL;
 802}
 803#endif /* CONFIG_IRQ_REMAP */
 804
 805static int iommu_init_ga(struct amd_iommu *iommu)
 806{
 807        int ret = 0;
 808
 809#ifdef CONFIG_IRQ_REMAP
 810        /* Note: We have already checked GASup from IVRS table.
 811         *       Now, we need to make sure that GAMSup is set.
 812         */
 813        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
 814            !iommu_feature(iommu, FEATURE_GAM_VAPIC))
 815                amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
 816
 817        ret = iommu_init_ga_log(iommu);
 818#endif /* CONFIG_IRQ_REMAP */
 819
 820        return ret;
 821}
 822
 823static void iommu_enable_xt(struct amd_iommu *iommu)
 824{
 825#ifdef CONFIG_IRQ_REMAP
 826        /*
 827         * XT mode (32-bit APIC destination ID) requires
 828         * GA mode (128-bit IRTE support) as a prerequisite.
 829         */
 830        if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 831            amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 832                iommu_feature_enable(iommu, CONTROL_XT_EN);
 833#endif /* CONFIG_IRQ_REMAP */
 834}
 835
 836static void iommu_enable_gt(struct amd_iommu *iommu)
 837{
 838        if (!iommu_feature(iommu, FEATURE_GT))
 839                return;
 840
 841        iommu_feature_enable(iommu, CONTROL_GT_EN);
 842}
 843
 844/* sets a specific bit in the device table entry. */
 845static void set_dev_entry_bit(u16 devid, u8 bit)
 846{
 847        int i = (bit >> 6) & 0x03;
 848        int _bit = bit & 0x3f;
 849
 850        amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 851}
 852
 853static int get_dev_entry_bit(u16 devid, u8 bit)
 854{
 855        int i = (bit >> 6) & 0x03;
 856        int _bit = bit & 0x3f;
 857
 858        return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 859}
 860
 861
 862static bool copy_device_table(void)
 863{
 864        u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
 865        struct dev_table_entry *old_devtb = NULL;
 866        u32 lo, hi, devid, old_devtb_size;
 867        phys_addr_t old_devtb_phys;
 868        struct amd_iommu *iommu;
 869        u16 dom_id, dte_v, irq_v;
 870        gfp_t gfp_flag;
 871        u64 tmp;
 872
 873        if (!amd_iommu_pre_enabled)
 874                return false;
 875
 876        pr_warn("Translation is already enabled - trying to copy translation structures\n");
 877        for_each_iommu(iommu) {
 878                /* All IOMMUs should use the same device table with the same size */
 879                lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
 880                hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
 881                entry = (((u64) hi) << 32) + lo;
 882                if (last_entry && last_entry != entry) {
 883                        pr_err("IOMMU:%d should use the same dev table as others!\n",
 884                                iommu->index);
 885                        return false;
 886                }
 887                last_entry = entry;
 888
 889                old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
 890                if (old_devtb_size != dev_table_size) {
 891                        pr_err("The device table size of IOMMU:%d is not expected!\n",
 892                                iommu->index);
 893                        return false;
 894                }
 895        }
 896
 897        /*
 898         * When SME is enabled in the first kernel, the entry includes the
 899         * memory encryption mask(sme_me_mask), we must remove the memory
 900         * encryption mask to obtain the true physical address in kdump kernel.
 901         */
 902        old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
 903
 904        if (old_devtb_phys >= 0x100000000ULL) {
 905                pr_err("The address of old device table is above 4G, not trustworthy!\n");
 906                return false;
 907        }
 908        old_devtb = (sme_active() && is_kdump_kernel())
 909                    ? (__force void *)ioremap_encrypted(old_devtb_phys,
 910                                                        dev_table_size)
 911                    : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
 912
 913        if (!old_devtb)
 914                return false;
 915
 916        gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
 917        old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
 918                                get_order(dev_table_size));
 919        if (old_dev_tbl_cpy == NULL) {
 920                pr_err("Failed to allocate memory for copying old device table!\n");
 921                return false;
 922        }
 923
 924        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 925                old_dev_tbl_cpy[devid] = old_devtb[devid];
 926                dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
 927                dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
 928
 929                if (dte_v && dom_id) {
 930                        old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
 931                        old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
 932                        __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 933                        /* If gcr3 table existed, mask it out */
 934                        if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
 935                                tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
 936                                tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
 937                                old_dev_tbl_cpy[devid].data[1] &= ~tmp;
 938                                tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
 939                                tmp |= DTE_FLAG_GV;
 940                                old_dev_tbl_cpy[devid].data[0] &= ~tmp;
 941                        }
 942                }
 943
 944                irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
 945                int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
 946                int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
 947                if (irq_v && (int_ctl || int_tab_len)) {
 948                        if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
 949                            (int_tab_len != DTE_IRQ_TABLE_LEN)) {
 950                                pr_err("Wrong old irq remapping flag: %#x\n", devid);
 951                                return false;
 952                        }
 953
 954                        old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
 955                }
 956        }
 957        memunmap(old_devtb);
 958
 959        return true;
 960}
 961
 962void amd_iommu_apply_erratum_63(u16 devid)
 963{
 964        int sysmgt;
 965
 966        sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
 967                 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
 968
 969        if (sysmgt == 0x01)
 970                set_dev_entry_bit(devid, DEV_ENTRY_IW);
 971}
 972
 973/* Writes the specific IOMMU for a device into the rlookup table */
 974static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
 975{
 976        amd_iommu_rlookup_table[devid] = iommu;
 977}
 978
 979/*
 980 * This function takes the device specific flags read from the ACPI
 981 * table and sets up the device table entry with that information
 982 */
 983static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
 984                                           u16 devid, u32 flags, u32 ext_flags)
 985{
 986        if (flags & ACPI_DEVFLAG_INITPASS)
 987                set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
 988        if (flags & ACPI_DEVFLAG_EXTINT)
 989                set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
 990        if (flags & ACPI_DEVFLAG_NMI)
 991                set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
 992        if (flags & ACPI_DEVFLAG_SYSMGT1)
 993                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
 994        if (flags & ACPI_DEVFLAG_SYSMGT2)
 995                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
 996        if (flags & ACPI_DEVFLAG_LINT0)
 997                set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
 998        if (flags & ACPI_DEVFLAG_LINT1)
 999                set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
1000
1001        amd_iommu_apply_erratum_63(devid);
1002
1003        set_iommu_for_device(iommu, devid);
1004}
1005
1006int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1007{
1008        struct devid_map *entry;
1009        struct list_head *list;
1010
1011        if (type == IVHD_SPECIAL_IOAPIC)
1012                list = &ioapic_map;
1013        else if (type == IVHD_SPECIAL_HPET)
1014                list = &hpet_map;
1015        else
1016                return -EINVAL;
1017
1018        list_for_each_entry(entry, list, list) {
1019                if (!(entry->id == id && entry->cmd_line))
1020                        continue;
1021
1022                pr_info("Command-line override present for %s id %d - ignoring\n",
1023                        type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1024
1025                *devid = entry->devid;
1026
1027                return 0;
1028        }
1029
1030        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1031        if (!entry)
1032                return -ENOMEM;
1033
1034        entry->id       = id;
1035        entry->devid    = *devid;
1036        entry->cmd_line = cmd_line;
1037
1038        list_add_tail(&entry->list, list);
1039
1040        return 0;
1041}
1042
1043static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1044                                      bool cmd_line)
1045{
1046        struct acpihid_map_entry *entry;
1047        struct list_head *list = &acpihid_map;
1048
1049        list_for_each_entry(entry, list, list) {
1050                if (strcmp(entry->hid, hid) ||
1051                    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1052                    !entry->cmd_line)
1053                        continue;
1054
1055                pr_info("Command-line override for hid:%s uid:%s\n",
1056                        hid, uid);
1057                *devid = entry->devid;
1058                return 0;
1059        }
1060
1061        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1062        if (!entry)
1063                return -ENOMEM;
1064
1065        memcpy(entry->uid, uid, strlen(uid));
1066        memcpy(entry->hid, hid, strlen(hid));
1067        entry->devid = *devid;
1068        entry->cmd_line = cmd_line;
1069        entry->root_devid = (entry->devid & (~0x7));
1070
1071        pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1072                entry->cmd_line ? "cmd" : "ivrs",
1073                entry->hid, entry->uid, entry->root_devid);
1074
1075        list_add_tail(&entry->list, list);
1076        return 0;
1077}
1078
1079static int __init add_early_maps(void)
1080{
1081        int i, ret;
1082
1083        for (i = 0; i < early_ioapic_map_size; ++i) {
1084                ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1085                                         early_ioapic_map[i].id,
1086                                         &early_ioapic_map[i].devid,
1087                                         early_ioapic_map[i].cmd_line);
1088                if (ret)
1089                        return ret;
1090        }
1091
1092        for (i = 0; i < early_hpet_map_size; ++i) {
1093                ret = add_special_device(IVHD_SPECIAL_HPET,
1094                                         early_hpet_map[i].id,
1095                                         &early_hpet_map[i].devid,
1096                                         early_hpet_map[i].cmd_line);
1097                if (ret)
1098                        return ret;
1099        }
1100
1101        for (i = 0; i < early_acpihid_map_size; ++i) {
1102                ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1103                                          early_acpihid_map[i].uid,
1104                                          &early_acpihid_map[i].devid,
1105                                          early_acpihid_map[i].cmd_line);
1106                if (ret)
1107                        return ret;
1108        }
1109
1110        return 0;
1111}
1112
1113/*
1114 * Reads the device exclusion range from ACPI and initializes the IOMMU with
1115 * it
1116 */
1117static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
1118{
1119        struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1120
1121        if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
1122                return;
1123
1124        if (iommu) {
1125                /*
1126                 * We only can configure exclusion ranges per IOMMU, not
1127                 * per device. But we can enable the exclusion range per
1128                 * device. This is done here
1129                 */
1130                set_dev_entry_bit(devid, DEV_ENTRY_EX);
1131                iommu->exclusion_start = m->range_start;
1132                iommu->exclusion_length = m->range_length;
1133        }
1134}
1135
1136/*
1137 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1138 * initializes the hardware and our data structures with it.
1139 */
1140static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1141                                        struct ivhd_header *h)
1142{
1143        u8 *p = (u8 *)h;
1144        u8 *end = p, flags = 0;
1145        u16 devid = 0, devid_start = 0, devid_to = 0;
1146        u32 dev_i, ext_flags = 0;
1147        bool alias = false;
1148        struct ivhd_entry *e;
1149        u32 ivhd_size;
1150        int ret;
1151
1152
1153        ret = add_early_maps();
1154        if (ret)
1155                return ret;
1156
1157        amd_iommu_apply_ivrs_quirks();
1158
1159        /*
1160         * First save the recommended feature enable bits from ACPI
1161         */
1162        iommu->acpi_flags = h->flags;
1163
1164        /*
1165         * Done. Now parse the device entries
1166         */
1167        ivhd_size = get_ivhd_header_size(h);
1168        if (!ivhd_size) {
1169                pr_err("Unsupported IVHD type %#x\n", h->type);
1170                return -EINVAL;
1171        }
1172
1173        p += ivhd_size;
1174
1175        end += h->length;
1176
1177
1178        while (p < end) {
1179                e = (struct ivhd_entry *)p;
1180                switch (e->type) {
1181                case IVHD_DEV_ALL:
1182
1183                        DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1184
1185                        for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
1186                                set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1187                        break;
1188                case IVHD_DEV_SELECT:
1189
1190                        DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
1191                                    "flags: %02x\n",
1192                                    PCI_BUS_NUM(e->devid),
1193                                    PCI_SLOT(e->devid),
1194                                    PCI_FUNC(e->devid),
1195                                    e->flags);
1196
1197                        devid = e->devid;
1198                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1199                        break;
1200                case IVHD_DEV_SELECT_RANGE_START:
1201
1202                        DUMP_printk("  DEV_SELECT_RANGE_START\t "
1203                                    "devid: %02x:%02x.%x flags: %02x\n",
1204                                    PCI_BUS_NUM(e->devid),
1205                                    PCI_SLOT(e->devid),
1206                                    PCI_FUNC(e->devid),
1207                                    e->flags);
1208
1209                        devid_start = e->devid;
1210                        flags = e->flags;
1211                        ext_flags = 0;
1212                        alias = false;
1213                        break;
1214                case IVHD_DEV_ALIAS:
1215
1216                        DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
1217                                    "flags: %02x devid_to: %02x:%02x.%x\n",
1218                                    PCI_BUS_NUM(e->devid),
1219                                    PCI_SLOT(e->devid),
1220                                    PCI_FUNC(e->devid),
1221                                    e->flags,
1222                                    PCI_BUS_NUM(e->ext >> 8),
1223                                    PCI_SLOT(e->ext >> 8),
1224                                    PCI_FUNC(e->ext >> 8));
1225
1226                        devid = e->devid;
1227                        devid_to = e->ext >> 8;
1228                        set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1229                        set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1230                        amd_iommu_alias_table[devid] = devid_to;
1231                        break;
1232                case IVHD_DEV_ALIAS_RANGE:
1233
1234                        DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1235                                    "devid: %02x:%02x.%x flags: %02x "
1236                                    "devid_to: %02x:%02x.%x\n",
1237                                    PCI_BUS_NUM(e->devid),
1238                                    PCI_SLOT(e->devid),
1239                                    PCI_FUNC(e->devid),
1240                                    e->flags,
1241                                    PCI_BUS_NUM(e->ext >> 8),
1242                                    PCI_SLOT(e->ext >> 8),
1243                                    PCI_FUNC(e->ext >> 8));
1244
1245                        devid_start = e->devid;
1246                        flags = e->flags;
1247                        devid_to = e->ext >> 8;
1248                        ext_flags = 0;
1249                        alias = true;
1250                        break;
1251                case IVHD_DEV_EXT_SELECT:
1252
1253                        DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
1254                                    "flags: %02x ext: %08x\n",
1255                                    PCI_BUS_NUM(e->devid),
1256                                    PCI_SLOT(e->devid),
1257                                    PCI_FUNC(e->devid),
1258                                    e->flags, e->ext);
1259
1260                        devid = e->devid;
1261                        set_dev_entry_from_acpi(iommu, devid, e->flags,
1262                                                e->ext);
1263                        break;
1264                case IVHD_DEV_EXT_SELECT_RANGE:
1265
1266                        DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1267                                    "%02x:%02x.%x flags: %02x ext: %08x\n",
1268                                    PCI_BUS_NUM(e->devid),
1269                                    PCI_SLOT(e->devid),
1270                                    PCI_FUNC(e->devid),
1271                                    e->flags, e->ext);
1272
1273                        devid_start = e->devid;
1274                        flags = e->flags;
1275                        ext_flags = e->ext;
1276                        alias = false;
1277                        break;
1278                case IVHD_DEV_RANGE_END:
1279
1280                        DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
1281                                    PCI_BUS_NUM(e->devid),
1282                                    PCI_SLOT(e->devid),
1283                                    PCI_FUNC(e->devid));
1284
1285                        devid = e->devid;
1286                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1287                                if (alias) {
1288                                        amd_iommu_alias_table[dev_i] = devid_to;
1289                                        set_dev_entry_from_acpi(iommu,
1290                                                devid_to, flags, ext_flags);
1291                                }
1292                                set_dev_entry_from_acpi(iommu, dev_i,
1293                                                        flags, ext_flags);
1294                        }
1295                        break;
1296                case IVHD_DEV_SPECIAL: {
1297                        u8 handle, type;
1298                        const char *var;
1299                        u16 devid;
1300                        int ret;
1301
1302                        handle = e->ext & 0xff;
1303                        devid  = (e->ext >>  8) & 0xffff;
1304                        type   = (e->ext >> 24) & 0xff;
1305
1306                        if (type == IVHD_SPECIAL_IOAPIC)
1307                                var = "IOAPIC";
1308                        else if (type == IVHD_SPECIAL_HPET)
1309                                var = "HPET";
1310                        else
1311                                var = "UNKNOWN";
1312
1313                        DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
1314                                    var, (int)handle,
1315                                    PCI_BUS_NUM(devid),
1316                                    PCI_SLOT(devid),
1317                                    PCI_FUNC(devid));
1318
1319                        ret = add_special_device(type, handle, &devid, false);
1320                        if (ret)
1321                                return ret;
1322
1323                        /*
1324                         * add_special_device might update the devid in case a
1325                         * command-line override is present. So call
1326                         * set_dev_entry_from_acpi after add_special_device.
1327                         */
1328                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1329
1330                        break;
1331                }
1332                case IVHD_DEV_ACPI_HID: {
1333                        u16 devid;
1334                        u8 hid[ACPIHID_HID_LEN] = {0};
1335                        u8 uid[ACPIHID_UID_LEN] = {0};
1336                        int ret;
1337
1338                        if (h->type != 0x40) {
1339                                pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1340                                       e->type);
1341                                break;
1342                        }
1343
1344                        memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
1345                        hid[ACPIHID_HID_LEN - 1] = '\0';
1346
1347                        if (!(*hid)) {
1348                                pr_err(FW_BUG "Invalid HID.\n");
1349                                break;
1350                        }
1351
1352                        switch (e->uidf) {
1353                        case UID_NOT_PRESENT:
1354
1355                                if (e->uidl != 0)
1356                                        pr_warn(FW_BUG "Invalid UID length.\n");
1357
1358                                break;
1359                        case UID_IS_INTEGER:
1360
1361                                sprintf(uid, "%d", e->uid);
1362
1363                                break;
1364                        case UID_IS_CHARACTER:
1365
1366                                memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
1367                                uid[ACPIHID_UID_LEN - 1] = '\0';
1368
1369                                break;
1370                        default:
1371                                break;
1372                        }
1373
1374                        devid = e->devid;
1375                        DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
1376                                    hid, uid,
1377                                    PCI_BUS_NUM(devid),
1378                                    PCI_SLOT(devid),
1379                                    PCI_FUNC(devid));
1380
1381                        flags = e->flags;
1382
1383                        ret = add_acpi_hid_device(hid, uid, &devid, false);
1384                        if (ret)
1385                                return ret;
1386
1387                        /*
1388                         * add_special_device might update the devid in case a
1389                         * command-line override is present. So call
1390                         * set_dev_entry_from_acpi after add_special_device.
1391                         */
1392                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1393
1394                        break;
1395                }
1396                default:
1397                        break;
1398                }
1399
1400                p += ivhd_entry_length(p);
1401        }
1402
1403        return 0;
1404}
1405
1406static void __init free_iommu_one(struct amd_iommu *iommu)
1407{
1408        free_command_buffer(iommu);
1409        free_event_buffer(iommu);
1410        free_ppr_log(iommu);
1411        free_ga_log(iommu);
1412        iommu_unmap_mmio_space(iommu);
1413}
1414
1415static void __init free_iommu_all(void)
1416{
1417        struct amd_iommu *iommu, *next;
1418
1419        for_each_iommu_safe(iommu, next) {
1420                list_del(&iommu->list);
1421                free_iommu_one(iommu);
1422                kfree(iommu);
1423        }
1424}
1425
1426/*
1427 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1428 * Workaround:
1429 *     BIOS should disable L2B micellaneous clock gating by setting
1430 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1431 */
1432static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1433{
1434        u32 value;
1435
1436        if ((boot_cpu_data.x86 != 0x15) ||
1437            (boot_cpu_data.x86_model < 0x10) ||
1438            (boot_cpu_data.x86_model > 0x1f))
1439                return;
1440
1441        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1442        pci_read_config_dword(iommu->dev, 0xf4, &value);
1443
1444        if (value & BIT(2))
1445                return;
1446
1447        /* Select NB indirect register 0x90 and enable writing */
1448        pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1449
1450        pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1451        pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1452
1453        /* Clear the enable writing bit */
1454        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1455}
1456
1457/*
1458 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1459 * Workaround:
1460 *     BIOS should enable ATS write permission check by setting
1461 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1462 */
1463static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1464{
1465        u32 value;
1466
1467        if ((boot_cpu_data.x86 != 0x15) ||
1468            (boot_cpu_data.x86_model < 0x30) ||
1469            (boot_cpu_data.x86_model > 0x3f))
1470                return;
1471
1472        /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1473        value = iommu_read_l2(iommu, 0x47);
1474
1475        if (value & BIT(0))
1476                return;
1477
1478        /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1479        iommu_write_l2(iommu, 0x47, value | BIT(0));
1480
1481        pci_info(iommu->dev, "Applying ATS write check workaround\n");
1482}
1483
1484/*
1485 * This function clues the initialization function for one IOMMU
1486 * together and also allocates the command buffer and programs the
1487 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1488 */
1489static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1490{
1491        int ret;
1492
1493        raw_spin_lock_init(&iommu->lock);
1494
1495        /* Add IOMMU to internal data structures */
1496        list_add_tail(&iommu->list, &amd_iommu_list);
1497        iommu->index = amd_iommus_present++;
1498
1499        if (unlikely(iommu->index >= MAX_IOMMUS)) {
1500                WARN(1, "System has more IOMMUs than supported by this driver\n");
1501                return -ENOSYS;
1502        }
1503
1504        /* Index is fine - add IOMMU to the array */
1505        amd_iommus[iommu->index] = iommu;
1506
1507        /*
1508         * Copy data from ACPI table entry to the iommu struct
1509         */
1510        iommu->devid   = h->devid;
1511        iommu->cap_ptr = h->cap_ptr;
1512        iommu->pci_seg = h->pci_seg;
1513        iommu->mmio_phys = h->mmio_phys;
1514
1515        switch (h->type) {
1516        case 0x10:
1517                /* Check if IVHD EFR contains proper max banks/counters */
1518                if ((h->efr_attr != 0) &&
1519                    ((h->efr_attr & (0xF << 13)) != 0) &&
1520                    ((h->efr_attr & (0x3F << 17)) != 0))
1521                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1522                else
1523                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1524                if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1525                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1526                if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0))
1527                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1528                break;
1529        case 0x11:
1530        case 0x40:
1531                if (h->efr_reg & (1 << 9))
1532                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1533                else
1534                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1535                if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0))
1536                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1537                if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0))
1538                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1539                break;
1540        default:
1541                return -EINVAL;
1542        }
1543
1544        iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1545                                                iommu->mmio_phys_end);
1546        if (!iommu->mmio_base)
1547                return -ENOMEM;
1548
1549        if (alloc_command_buffer(iommu))
1550                return -ENOMEM;
1551
1552        if (alloc_event_buffer(iommu))
1553                return -ENOMEM;
1554
1555        iommu->int_enabled = false;
1556
1557        init_translation_status(iommu);
1558        if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1559                iommu_disable(iommu);
1560                clear_translation_pre_enabled(iommu);
1561                pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1562                        iommu->index);
1563        }
1564        if (amd_iommu_pre_enabled)
1565                amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1566
1567        ret = init_iommu_from_acpi(iommu, h);
1568        if (ret)
1569                return ret;
1570
1571        ret = amd_iommu_create_irq_domain(iommu);
1572        if (ret)
1573                return ret;
1574
1575        /*
1576         * Make sure IOMMU is not considered to translate itself. The IVRS
1577         * table tells us so, but this is a lie!
1578         */
1579        amd_iommu_rlookup_table[iommu->devid] = NULL;
1580
1581        return 0;
1582}
1583
1584/**
1585 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1586 * @ivrs          Pointer to the IVRS header
1587 *
1588 * This function search through all IVDB of the maximum supported IVHD
1589 */
1590static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1591{
1592        u8 *base = (u8 *)ivrs;
1593        struct ivhd_header *ivhd = (struct ivhd_header *)
1594                                        (base + IVRS_HEADER_LENGTH);
1595        u8 last_type = ivhd->type;
1596        u16 devid = ivhd->devid;
1597
1598        while (((u8 *)ivhd - base < ivrs->length) &&
1599               (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1600                u8 *p = (u8 *) ivhd;
1601
1602                if (ivhd->devid == devid)
1603                        last_type = ivhd->type;
1604                ivhd = (struct ivhd_header *)(p + ivhd->length);
1605        }
1606
1607        return last_type;
1608}
1609
1610/*
1611 * Iterates over all IOMMU entries in the ACPI table, allocates the
1612 * IOMMU structure and initializes it with init_iommu_one()
1613 */
1614static int __init init_iommu_all(struct acpi_table_header *table)
1615{
1616        u8 *p = (u8 *)table, *end = (u8 *)table;
1617        struct ivhd_header *h;
1618        struct amd_iommu *iommu;
1619        int ret;
1620
1621        end += table->length;
1622        p += IVRS_HEADER_LENGTH;
1623
1624        while (p < end) {
1625                h = (struct ivhd_header *)p;
1626                if (*p == amd_iommu_target_ivhd_type) {
1627
1628                        DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1629                                    "seg: %d flags: %01x info %04x\n",
1630                                    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1631                                    PCI_FUNC(h->devid), h->cap_ptr,
1632                                    h->pci_seg, h->flags, h->info);
1633                        DUMP_printk("       mmio-addr: %016llx\n",
1634                                    h->mmio_phys);
1635
1636                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1637                        if (iommu == NULL)
1638                                return -ENOMEM;
1639
1640                        ret = init_iommu_one(iommu, h);
1641                        if (ret)
1642                                return ret;
1643                }
1644                p += h->length;
1645
1646        }
1647        WARN_ON(p != end);
1648
1649        return 0;
1650}
1651
1652static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
1653                                u8 fxn, u64 *value, bool is_write);
1654
1655static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1656{
1657        struct pci_dev *pdev = iommu->dev;
1658        u64 val = 0xabcd, val2 = 0, save_reg = 0;
1659
1660        if (!iommu_feature(iommu, FEATURE_PC))
1661                return;
1662
1663        amd_iommu_pc_present = true;
1664
1665        /* save the value to restore, if writable */
1666        if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
1667                goto pc_false;
1668
1669        /* Check if the performance counters can be written to */
1670        if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
1671            (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
1672            (val != val2))
1673                goto pc_false;
1674
1675        /* restore */
1676        if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
1677                goto pc_false;
1678
1679        pci_info(pdev, "IOMMU performance counters supported\n");
1680
1681        val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1682        iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1683        iommu->max_counters = (u8) ((val >> 7) & 0xf);
1684
1685        return;
1686
1687pc_false:
1688        pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
1689        amd_iommu_pc_present = false;
1690        return;
1691}
1692
1693static ssize_t amd_iommu_show_cap(struct device *dev,
1694                                  struct device_attribute *attr,
1695                                  char *buf)
1696{
1697        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1698        return sprintf(buf, "%x\n", iommu->cap);
1699}
1700static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1701
1702static ssize_t amd_iommu_show_features(struct device *dev,
1703                                       struct device_attribute *attr,
1704                                       char *buf)
1705{
1706        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1707        return sprintf(buf, "%llx\n", iommu->features);
1708}
1709static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1710
1711static struct attribute *amd_iommu_attrs[] = {
1712        &dev_attr_cap.attr,
1713        &dev_attr_features.attr,
1714        NULL,
1715};
1716
1717static struct attribute_group amd_iommu_group = {
1718        .name = "amd-iommu",
1719        .attrs = amd_iommu_attrs,
1720};
1721
1722static const struct attribute_group *amd_iommu_groups[] = {
1723        &amd_iommu_group,
1724        NULL,
1725};
1726
1727static int __init iommu_init_pci(struct amd_iommu *iommu)
1728{
1729        int cap_ptr = iommu->cap_ptr;
1730        u32 range, misc, low, high;
1731        int ret;
1732
1733        iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
1734                                                 iommu->devid & 0xff);
1735        if (!iommu->dev)
1736                return -ENODEV;
1737
1738        /* Prevent binding other PCI device drivers to IOMMU devices */
1739        iommu->dev->match_driver = false;
1740
1741        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1742                              &iommu->cap);
1743        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
1744                              &range);
1745        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
1746                              &misc);
1747
1748        if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1749                amd_iommu_iotlb_sup = false;
1750
1751        /* read extended feature bits */
1752        low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
1753        high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
1754
1755        iommu->features = ((u64)high << 32) | low;
1756
1757        if (iommu_feature(iommu, FEATURE_GT)) {
1758                int glxval;
1759                u32 max_pasid;
1760                u64 pasmax;
1761
1762                pasmax = iommu->features & FEATURE_PASID_MASK;
1763                pasmax >>= FEATURE_PASID_SHIFT;
1764                max_pasid  = (1 << (pasmax + 1)) - 1;
1765
1766                amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1767
1768                BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1769
1770                glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1771                glxval >>= FEATURE_GLXVAL_SHIFT;
1772
1773                if (amd_iommu_max_glx_val == -1)
1774                        amd_iommu_max_glx_val = glxval;
1775                else
1776                        amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1777        }
1778
1779        if (iommu_feature(iommu, FEATURE_GT) &&
1780            iommu_feature(iommu, FEATURE_PPR)) {
1781                iommu->is_iommu_v2   = true;
1782                amd_iommu_v2_present = true;
1783        }
1784
1785        if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
1786                return -ENOMEM;
1787
1788        ret = iommu_init_ga(iommu);
1789        if (ret)
1790                return ret;
1791
1792        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1793                amd_iommu_np_cache = true;
1794
1795        init_iommu_perf_ctr(iommu);
1796
1797        if (is_rd890_iommu(iommu->dev)) {
1798                int i, j;
1799
1800                iommu->root_pdev =
1801                        pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
1802                                                    PCI_DEVFN(0, 0));
1803
1804                /*
1805                 * Some rd890 systems may not be fully reconfigured by the
1806                 * BIOS, so it's necessary for us to store this information so
1807                 * it can be reprogrammed on resume
1808                 */
1809                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1810                                &iommu->stored_addr_lo);
1811                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1812                                &iommu->stored_addr_hi);
1813
1814                /* Low bit locks writes to configuration space */
1815                iommu->stored_addr_lo &= ~1;
1816
1817                for (i = 0; i < 6; i++)
1818                        for (j = 0; j < 0x12; j++)
1819                                iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1820
1821                for (i = 0; i < 0x83; i++)
1822                        iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1823        }
1824
1825        amd_iommu_erratum_746_workaround(iommu);
1826        amd_iommu_ats_write_check_workaround(iommu);
1827
1828        iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
1829                               amd_iommu_groups, "ivhd%d", iommu->index);
1830        iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
1831        iommu_device_register(&iommu->iommu);
1832
1833        return pci_enable_device(iommu->dev);
1834}
1835
1836static void print_iommu_info(void)
1837{
1838        static const char * const feat_str[] = {
1839                "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1840                "IA", "GA", "HE", "PC"
1841        };
1842        struct amd_iommu *iommu;
1843
1844        for_each_iommu(iommu) {
1845                struct pci_dev *pdev = iommu->dev;
1846                int i;
1847
1848                pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
1849
1850                if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1851                        pci_info(pdev, "Extended features (%#llx):\n",
1852                                 iommu->features);
1853                        for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1854                                if (iommu_feature(iommu, (1ULL << i)))
1855                                        pr_cont(" %s", feat_str[i]);
1856                        }
1857
1858                        if (iommu->features & FEATURE_GAM_VAPIC)
1859                                pr_cont(" GA_vAPIC");
1860
1861                        pr_cont("\n");
1862                }
1863        }
1864        if (irq_remapping_enabled) {
1865                pr_info("Interrupt remapping enabled\n");
1866                if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1867                        pr_info("Virtual APIC enabled\n");
1868                if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1869                        pr_info("X2APIC enabled\n");
1870        }
1871}
1872
1873static int __init amd_iommu_init_pci(void)
1874{
1875        struct amd_iommu *iommu;
1876        int ret = 0;
1877
1878        for_each_iommu(iommu) {
1879                ret = iommu_init_pci(iommu);
1880                if (ret)
1881                        break;
1882        }
1883
1884        /*
1885         * Order is important here to make sure any unity map requirements are
1886         * fulfilled. The unity mappings are created and written to the device
1887         * table during the amd_iommu_init_api() call.
1888         *
1889         * After that we call init_device_table_dma() to make sure any
1890         * uninitialized DTE will block DMA, and in the end we flush the caches
1891         * of all IOMMUs to make sure the changes to the device table are
1892         * active.
1893         */
1894        ret = amd_iommu_init_api();
1895
1896        init_device_table_dma();
1897
1898        for_each_iommu(iommu)
1899                iommu_flush_all_caches(iommu);
1900
1901        if (!ret)
1902                print_iommu_info();
1903
1904        return ret;
1905}
1906
1907/****************************************************************************
1908 *
1909 * The following functions initialize the MSI interrupts for all IOMMUs
1910 * in the system. It's a bit challenging because there could be multiple
1911 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1912 * pci_dev.
1913 *
1914 ****************************************************************************/
1915
1916static int iommu_setup_msi(struct amd_iommu *iommu)
1917{
1918        int r;
1919
1920        r = pci_enable_msi(iommu->dev);
1921        if (r)
1922                return r;
1923
1924        r = request_threaded_irq(iommu->dev->irq,
1925                                 amd_iommu_int_handler,
1926                                 amd_iommu_int_thread,
1927                                 0, "AMD-Vi",
1928                                 iommu);
1929
1930        if (r) {
1931                pci_disable_msi(iommu->dev);
1932                return r;
1933        }
1934
1935        iommu->int_enabled = true;
1936
1937        return 0;
1938}
1939
1940#define XT_INT_DEST_MODE(x)     (((x) & 0x1ULL) << 2)
1941#define XT_INT_DEST_LO(x)       (((x) & 0xFFFFFFULL) << 8)
1942#define XT_INT_VEC(x)           (((x) & 0xFFULL) << 32)
1943#define XT_INT_DEST_HI(x)       ((((x) >> 24) & 0xFFULL) << 56)
1944
1945/**
1946 * Setup the IntCapXT registers with interrupt routing information
1947 * based on the PCI MSI capability block registers, accessed via
1948 * MMIO MSI address low/hi and MSI data registers.
1949 */
1950static void iommu_update_intcapxt(struct amd_iommu *iommu)
1951{
1952        u64 val;
1953        u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
1954        u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
1955        u32 data    = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
1956        bool dm     = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
1957        u32 dest    = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
1958
1959        if (x2apic_enabled())
1960                dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
1961
1962        val = XT_INT_VEC(data & 0xFF) |
1963              XT_INT_DEST_MODE(dm) |
1964              XT_INT_DEST_LO(dest) |
1965              XT_INT_DEST_HI(dest);
1966
1967        /**
1968         * Current IOMMU implemtation uses the same IRQ for all
1969         * 3 IOMMU interrupts.
1970         */
1971        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
1972        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
1973        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
1974}
1975
1976static void _irq_notifier_notify(struct irq_affinity_notify *notify,
1977                                 const cpumask_t *mask)
1978{
1979        struct amd_iommu *iommu;
1980
1981        for_each_iommu(iommu) {
1982                if (iommu->dev->irq == notify->irq) {
1983                        iommu_update_intcapxt(iommu);
1984                        break;
1985                }
1986        }
1987}
1988
1989static void _irq_notifier_release(struct kref *ref)
1990{
1991}
1992
1993static int iommu_init_intcapxt(struct amd_iommu *iommu)
1994{
1995        int ret;
1996        struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
1997
1998        /**
1999         * IntCapXT requires XTSup=1, which can be inferred
2000         * amd_iommu_xt_mode.
2001         */
2002        if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
2003                return 0;
2004
2005        /**
2006         * Also, we need to setup notifier to update the IntCapXT registers
2007         * whenever the irq affinity is changed from user-space.
2008         */
2009        notify->irq = iommu->dev->irq;
2010        notify->notify = _irq_notifier_notify,
2011        notify->release = _irq_notifier_release,
2012        ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
2013        if (ret) {
2014                pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
2015                       iommu->devid, iommu->dev->irq);
2016                return ret;
2017        }
2018
2019        iommu_update_intcapxt(iommu);
2020        iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2021        return ret;
2022}
2023
2024static int iommu_init_msi(struct amd_iommu *iommu)
2025{
2026        int ret;
2027
2028        if (iommu->int_enabled)
2029                goto enable_faults;
2030
2031        if (iommu->dev->msi_cap)
2032                ret = iommu_setup_msi(iommu);
2033        else
2034                ret = -ENODEV;
2035
2036        if (ret)
2037                return ret;
2038
2039enable_faults:
2040        ret = iommu_init_intcapxt(iommu);
2041        if (ret)
2042                return ret;
2043
2044        iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2045
2046        if (iommu->ppr_log != NULL)
2047                iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
2048
2049        iommu_ga_log_enable(iommu);
2050
2051        return 0;
2052}
2053
2054/****************************************************************************
2055 *
2056 * The next functions belong to the third pass of parsing the ACPI
2057 * table. In this last pass the memory mapping requirements are
2058 * gathered (like exclusion and unity mapping ranges).
2059 *
2060 ****************************************************************************/
2061
2062static void __init free_unity_maps(void)
2063{
2064        struct unity_map_entry *entry, *next;
2065
2066        list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
2067                list_del(&entry->list);
2068                kfree(entry);
2069        }
2070}
2071
2072/* called when we find an exclusion range definition in ACPI */
2073static int __init init_exclusion_range(struct ivmd_header *m)
2074{
2075        int i;
2076
2077        switch (m->type) {
2078        case ACPI_IVMD_TYPE:
2079                set_device_exclusion_range(m->devid, m);
2080                break;
2081        case ACPI_IVMD_TYPE_ALL:
2082                for (i = 0; i <= amd_iommu_last_bdf; ++i)
2083                        set_device_exclusion_range(i, m);
2084                break;
2085        case ACPI_IVMD_TYPE_RANGE:
2086                for (i = m->devid; i <= m->aux; ++i)
2087                        set_device_exclusion_range(i, m);
2088                break;
2089        default:
2090                break;
2091        }
2092
2093        return 0;
2094}
2095
2096/* called for unity map ACPI definition */
2097static int __init init_unity_map_range(struct ivmd_header *m)
2098{
2099        struct unity_map_entry *e = NULL;
2100        char *s;
2101
2102        e = kzalloc(sizeof(*e), GFP_KERNEL);
2103        if (e == NULL)
2104                return -ENOMEM;
2105
2106        if (m->flags & IVMD_FLAG_EXCL_RANGE)
2107                init_exclusion_range(m);
2108
2109        switch (m->type) {
2110        default:
2111                kfree(e);
2112                return 0;
2113        case ACPI_IVMD_TYPE:
2114                s = "IVMD_TYPEi\t\t\t";
2115                e->devid_start = e->devid_end = m->devid;
2116                break;
2117        case ACPI_IVMD_TYPE_ALL:
2118                s = "IVMD_TYPE_ALL\t\t";
2119                e->devid_start = 0;
2120                e->devid_end = amd_iommu_last_bdf;
2121                break;
2122        case ACPI_IVMD_TYPE_RANGE:
2123                s = "IVMD_TYPE_RANGE\t\t";
2124                e->devid_start = m->devid;
2125                e->devid_end = m->aux;
2126                break;
2127        }
2128        e->address_start = PAGE_ALIGN(m->range_start);
2129        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2130        e->prot = m->flags >> 1;
2131
2132        DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
2133                    " range_start: %016llx range_end: %016llx flags: %x\n", s,
2134                    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2135                    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
2136                    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2137                    e->address_start, e->address_end, m->flags);
2138
2139        list_add_tail(&e->list, &amd_iommu_unity_map);
2140
2141        return 0;
2142}
2143
2144/* iterates over all memory definitions we find in the ACPI table */
2145static int __init init_memory_definitions(struct acpi_table_header *table)
2146{
2147        u8 *p = (u8 *)table, *end = (u8 *)table;
2148        struct ivmd_header *m;
2149
2150        end += table->length;
2151        p += IVRS_HEADER_LENGTH;
2152
2153        while (p < end) {
2154                m = (struct ivmd_header *)p;
2155                if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2156                        init_unity_map_range(m);
2157
2158                p += m->length;
2159        }
2160
2161        return 0;
2162}
2163
2164/*
2165 * Init the device table to not allow DMA access for devices
2166 */
2167static void init_device_table_dma(void)
2168{
2169        u32 devid;
2170
2171        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2172                set_dev_entry_bit(devid, DEV_ENTRY_VALID);
2173                set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
2174        }
2175}
2176
2177static void __init uninit_device_table_dma(void)
2178{
2179        u32 devid;
2180
2181        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2182                amd_iommu_dev_table[devid].data[0] = 0ULL;
2183                amd_iommu_dev_table[devid].data[1] = 0ULL;
2184        }
2185}
2186
2187static void init_device_table(void)
2188{
2189        u32 devid;
2190
2191        if (!amd_iommu_irq_remap)
2192                return;
2193
2194        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2195                set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
2196}
2197
2198static void iommu_init_flags(struct amd_iommu *iommu)
2199{
2200        iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2201                iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2202                iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2203
2204        iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2205                iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2206                iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2207
2208        iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2209                iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2210                iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2211
2212        iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2213                iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2214                iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2215
2216        /*
2217         * make IOMMU memory accesses cache coherent
2218         */
2219        iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2220
2221        /* Set IOTLB invalidation timeout to 1s */
2222        iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2223}
2224
2225static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2226{
2227        int i, j;
2228        u32 ioc_feature_control;
2229        struct pci_dev *pdev = iommu->root_pdev;
2230
2231        /* RD890 BIOSes may not have completely reconfigured the iommu */
2232        if (!is_rd890_iommu(iommu->dev) || !pdev)
2233                return;
2234
2235        /*
2236         * First, we need to ensure that the iommu is enabled. This is
2237         * controlled by a register in the northbridge
2238         */
2239
2240        /* Select Northbridge indirect register 0x75 and enable writing */
2241        pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2242        pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2243
2244        /* Enable the iommu */
2245        if (!(ioc_feature_control & 0x1))
2246                pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2247
2248        /* Restore the iommu BAR */
2249        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2250                               iommu->stored_addr_lo);
2251        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2252                               iommu->stored_addr_hi);
2253
2254        /* Restore the l1 indirect regs for each of the 6 l1s */
2255        for (i = 0; i < 6; i++)
2256                for (j = 0; j < 0x12; j++)
2257                        iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2258
2259        /* Restore the l2 indirect regs */
2260        for (i = 0; i < 0x83; i++)
2261                iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2262
2263        /* Lock PCI setup registers */
2264        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2265                               iommu->stored_addr_lo | 1);
2266}
2267
2268static void iommu_enable_ga(struct amd_iommu *iommu)
2269{
2270#ifdef CONFIG_IRQ_REMAP
2271        switch (amd_iommu_guest_ir) {
2272        case AMD_IOMMU_GUEST_IR_VAPIC:
2273                iommu_feature_enable(iommu, CONTROL_GAM_EN);
2274                /* Fall through */
2275        case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2276                iommu_feature_enable(iommu, CONTROL_GA_EN);
2277                iommu->irte_ops = &irte_128_ops;
2278                break;
2279        default:
2280                iommu->irte_ops = &irte_32_ops;
2281                break;
2282        }
2283#endif
2284}
2285
2286static void early_enable_iommu(struct amd_iommu *iommu)
2287{
2288        iommu_disable(iommu);
2289        iommu_init_flags(iommu);
2290        iommu_set_device_table(iommu);
2291        iommu_enable_command_buffer(iommu);
2292        iommu_enable_event_buffer(iommu);
2293        iommu_set_exclusion_range(iommu);
2294        iommu_enable_ga(iommu);
2295        iommu_enable_xt(iommu);
2296        iommu_enable(iommu);
2297        iommu_flush_all_caches(iommu);
2298}
2299
2300/*
2301 * This function finally enables all IOMMUs found in the system after
2302 * they have been initialized.
2303 *
2304 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2305 * the old content of device table entries. Not this case or copy failed,
2306 * just continue as normal kernel does.
2307 */
2308static void early_enable_iommus(void)
2309{
2310        struct amd_iommu *iommu;
2311
2312
2313        if (!copy_device_table()) {
2314                /*
2315                 * If come here because of failure in copying device table from old
2316                 * kernel with all IOMMUs enabled, print error message and try to
2317                 * free allocated old_dev_tbl_cpy.
2318                 */
2319                if (amd_iommu_pre_enabled)
2320                        pr_err("Failed to copy DEV table from previous kernel.\n");
2321                if (old_dev_tbl_cpy != NULL)
2322                        free_pages((unsigned long)old_dev_tbl_cpy,
2323                                        get_order(dev_table_size));
2324
2325                for_each_iommu(iommu) {
2326                        clear_translation_pre_enabled(iommu);
2327                        early_enable_iommu(iommu);
2328                }
2329        } else {
2330                pr_info("Copied DEV table from previous kernel.\n");
2331                free_pages((unsigned long)amd_iommu_dev_table,
2332                                get_order(dev_table_size));
2333                amd_iommu_dev_table = old_dev_tbl_cpy;
2334                for_each_iommu(iommu) {
2335                        iommu_disable_command_buffer(iommu);
2336                        iommu_disable_event_buffer(iommu);
2337                        iommu_enable_command_buffer(iommu);
2338                        iommu_enable_event_buffer(iommu);
2339                        iommu_enable_ga(iommu);
2340                        iommu_enable_xt(iommu);
2341                        iommu_set_device_table(iommu);
2342                        iommu_flush_all_caches(iommu);
2343                }
2344        }
2345
2346#ifdef CONFIG_IRQ_REMAP
2347        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2348                amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2349#endif
2350}
2351
2352static void enable_iommus_v2(void)
2353{
2354        struct amd_iommu *iommu;
2355
2356        for_each_iommu(iommu) {
2357                iommu_enable_ppr_log(iommu);
2358                iommu_enable_gt(iommu);
2359        }
2360}
2361
2362static void enable_iommus(void)
2363{
2364        early_enable_iommus();
2365
2366        enable_iommus_v2();
2367}
2368
2369static void disable_iommus(void)
2370{
2371        struct amd_iommu *iommu;
2372
2373        for_each_iommu(iommu)
2374                iommu_disable(iommu);
2375
2376#ifdef CONFIG_IRQ_REMAP
2377        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2378                amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2379#endif
2380}
2381
2382/*
2383 * Suspend/Resume support
2384 * disable suspend until real resume implemented
2385 */
2386
2387static void amd_iommu_resume(void)
2388{
2389        struct amd_iommu *iommu;
2390
2391        for_each_iommu(iommu)
2392                iommu_apply_resume_quirks(iommu);
2393
2394        /* re-load the hardware */
2395        enable_iommus();
2396
2397        amd_iommu_enable_interrupts();
2398}
2399
2400static int amd_iommu_suspend(void)
2401{
2402        /* disable IOMMUs to go out of the way for BIOS */
2403        disable_iommus();
2404
2405        return 0;
2406}
2407
2408static struct syscore_ops amd_iommu_syscore_ops = {
2409        .suspend = amd_iommu_suspend,
2410        .resume = amd_iommu_resume,
2411};
2412
2413static void __init free_iommu_resources(void)
2414{
2415        kmemleak_free(irq_lookup_table);
2416        free_pages((unsigned long)irq_lookup_table,
2417                   get_order(rlookup_table_size));
2418        irq_lookup_table = NULL;
2419
2420        kmem_cache_destroy(amd_iommu_irq_cache);
2421        amd_iommu_irq_cache = NULL;
2422
2423        free_pages((unsigned long)amd_iommu_rlookup_table,
2424                   get_order(rlookup_table_size));
2425        amd_iommu_rlookup_table = NULL;
2426
2427        free_pages((unsigned long)amd_iommu_alias_table,
2428                   get_order(alias_table_size));
2429        amd_iommu_alias_table = NULL;
2430
2431        free_pages((unsigned long)amd_iommu_dev_table,
2432                   get_order(dev_table_size));
2433        amd_iommu_dev_table = NULL;
2434
2435        free_iommu_all();
2436}
2437
2438/* SB IOAPIC is always on this device in AMD systems */
2439#define IOAPIC_SB_DEVID         ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2440
2441static bool __init check_ioapic_information(void)
2442{
2443        const char *fw_bug = FW_BUG;
2444        bool ret, has_sb_ioapic;
2445        int idx;
2446
2447        has_sb_ioapic = false;
2448        ret           = false;
2449
2450        /*
2451         * If we have map overrides on the kernel command line the
2452         * messages in this function might not describe firmware bugs
2453         * anymore - so be careful
2454         */
2455        if (cmdline_maps)
2456                fw_bug = "";
2457
2458        for (idx = 0; idx < nr_ioapics; idx++) {
2459                int devid, id = mpc_ioapic_id(idx);
2460
2461                devid = get_ioapic_devid(id);
2462                if (devid < 0) {
2463                        pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2464                                fw_bug, id);
2465                        ret = false;
2466                } else if (devid == IOAPIC_SB_DEVID) {
2467                        has_sb_ioapic = true;
2468                        ret           = true;
2469                }
2470        }
2471
2472        if (!has_sb_ioapic) {
2473                /*
2474                 * We expect the SB IOAPIC to be listed in the IVRS
2475                 * table. The system timer is connected to the SB IOAPIC
2476                 * and if we don't have it in the list the system will
2477                 * panic at boot time.  This situation usually happens
2478                 * when the BIOS is buggy and provides us the wrong
2479                 * device id for the IOAPIC in the system.
2480                 */
2481                pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2482        }
2483
2484        if (!ret)
2485                pr_err("Disabling interrupt remapping\n");
2486
2487        return ret;
2488}
2489
2490static void __init free_dma_resources(void)
2491{
2492        free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2493                   get_order(MAX_DOMAIN_ID/8));
2494        amd_iommu_pd_alloc_bitmap = NULL;
2495
2496        free_unity_maps();
2497}
2498
2499/*
2500 * This is the hardware init function for AMD IOMMU in the system.
2501 * This function is called either from amd_iommu_init or from the interrupt
2502 * remapping setup code.
2503 *
2504 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2505 * four times:
2506 *
2507 *      1 pass) Discover the most comprehensive IVHD type to use.
2508 *
2509 *      2 pass) Find the highest PCI device id the driver has to handle.
2510 *              Upon this information the size of the data structures is
2511 *              determined that needs to be allocated.
2512 *
2513 *      3 pass) Initialize the data structures just allocated with the
2514 *              information in the ACPI table about available AMD IOMMUs
2515 *              in the system. It also maps the PCI devices in the
2516 *              system to specific IOMMUs
2517 *
2518 *      4 pass) After the basic data structures are allocated and
2519 *              initialized we update them with information about memory
2520 *              remapping requirements parsed out of the ACPI table in
2521 *              this last pass.
2522 *
2523 * After everything is set up the IOMMUs are enabled and the necessary
2524 * hotplug and suspend notifiers are registered.
2525 */
2526static int __init early_amd_iommu_init(void)
2527{
2528        struct acpi_table_header *ivrs_base;
2529        acpi_status status;
2530        int i, remap_cache_sz, ret = 0;
2531
2532        if (!amd_iommu_detected)
2533                return -ENODEV;
2534
2535        status = acpi_get_table("IVRS", 0, &ivrs_base);
2536        if (status == AE_NOT_FOUND)
2537                return -ENODEV;
2538        else if (ACPI_FAILURE(status)) {
2539                const char *err = acpi_format_exception(status);
2540                pr_err("IVRS table error: %s\n", err);
2541                return -EINVAL;
2542        }
2543
2544        /*
2545         * Validate checksum here so we don't need to do it when
2546         * we actually parse the table
2547         */
2548        ret = check_ivrs_checksum(ivrs_base);
2549        if (ret)
2550                goto out;
2551
2552        amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2553        DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
2554
2555        /*
2556         * First parse ACPI tables to find the largest Bus/Dev/Func
2557         * we need to handle. Upon this information the shared data
2558         * structures for the IOMMUs in the system will be allocated
2559         */
2560        ret = find_last_devid_acpi(ivrs_base);
2561        if (ret)
2562                goto out;
2563
2564        dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
2565        alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
2566        rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
2567
2568        /* Device table - directly used by all IOMMUs */
2569        ret = -ENOMEM;
2570        amd_iommu_dev_table = (void *)__get_free_pages(
2571                                      GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
2572                                      get_order(dev_table_size));
2573        if (amd_iommu_dev_table == NULL)
2574                goto out;
2575
2576        /*
2577         * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
2578         * IOMMU see for that device
2579         */
2580        amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
2581                        get_order(alias_table_size));
2582        if (amd_iommu_alias_table == NULL)
2583                goto out;
2584
2585        /* IOMMU rlookup table - find the IOMMU for a specific device */
2586        amd_iommu_rlookup_table = (void *)__get_free_pages(
2587                        GFP_KERNEL | __GFP_ZERO,
2588                        get_order(rlookup_table_size));
2589        if (amd_iommu_rlookup_table == NULL)
2590                goto out;
2591
2592        amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
2593                                            GFP_KERNEL | __GFP_ZERO,
2594                                            get_order(MAX_DOMAIN_ID/8));
2595        if (amd_iommu_pd_alloc_bitmap == NULL)
2596                goto out;
2597
2598        /*
2599         * let all alias entries point to itself
2600         */
2601        for (i = 0; i <= amd_iommu_last_bdf; ++i)
2602                amd_iommu_alias_table[i] = i;
2603
2604        /*
2605         * never allocate domain 0 because its used as the non-allocated and
2606         * error value placeholder
2607         */
2608        __set_bit(0, amd_iommu_pd_alloc_bitmap);
2609
2610        /*
2611         * now the data structures are allocated and basically initialized
2612         * start the real acpi table scan
2613         */
2614        ret = init_iommu_all(ivrs_base);
2615        if (ret)
2616                goto out;
2617
2618        /* Disable any previously enabled IOMMUs */
2619        if (!is_kdump_kernel() || amd_iommu_disabled)
2620                disable_iommus();
2621
2622        if (amd_iommu_irq_remap)
2623                amd_iommu_irq_remap = check_ioapic_information();
2624
2625        if (amd_iommu_irq_remap) {
2626                /*
2627                 * Interrupt remapping enabled, create kmem_cache for the
2628                 * remapping tables.
2629                 */
2630                ret = -ENOMEM;
2631                if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2632                        remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
2633                else
2634                        remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
2635                amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
2636                                                        remap_cache_sz,
2637                                                        IRQ_TABLE_ALIGNMENT,
2638                                                        0, NULL);
2639                if (!amd_iommu_irq_cache)
2640                        goto out;
2641
2642                irq_lookup_table = (void *)__get_free_pages(
2643                                GFP_KERNEL | __GFP_ZERO,
2644                                get_order(rlookup_table_size));
2645                kmemleak_alloc(irq_lookup_table, rlookup_table_size,
2646                               1, GFP_KERNEL);
2647                if (!irq_lookup_table)
2648                        goto out;
2649        }
2650
2651        ret = init_memory_definitions(ivrs_base);
2652        if (ret)
2653                goto out;
2654
2655        /* init the device table */
2656        init_device_table();
2657
2658out:
2659        /* Don't leak any ACPI memory */
2660        acpi_put_table(ivrs_base);
2661        ivrs_base = NULL;
2662
2663        return ret;
2664}
2665
2666static int amd_iommu_enable_interrupts(void)
2667{
2668        struct amd_iommu *iommu;
2669        int ret = 0;
2670
2671        for_each_iommu(iommu) {
2672                ret = iommu_init_msi(iommu);
2673                if (ret)
2674                        goto out;
2675        }
2676
2677out:
2678        return ret;
2679}
2680
2681static bool detect_ivrs(void)
2682{
2683        struct acpi_table_header *ivrs_base;
2684        acpi_status status;
2685
2686        status = acpi_get_table("IVRS", 0, &ivrs_base);
2687        if (status == AE_NOT_FOUND)
2688                return false;
2689        else if (ACPI_FAILURE(status)) {
2690                const char *err = acpi_format_exception(status);
2691                pr_err("IVRS table error: %s\n", err);
2692                return false;
2693        }
2694
2695        acpi_put_table(ivrs_base);
2696
2697        /* Make sure ACS will be enabled during PCI probe */
2698        pci_request_acs();
2699
2700        return true;
2701}
2702
2703/****************************************************************************
2704 *
2705 * AMD IOMMU Initialization State Machine
2706 *
2707 ****************************************************************************/
2708
2709static int __init state_next(void)
2710{
2711        int ret = 0;
2712
2713        switch (init_state) {
2714        case IOMMU_START_STATE:
2715                if (!detect_ivrs()) {
2716                        init_state      = IOMMU_NOT_FOUND;
2717                        ret             = -ENODEV;
2718                } else {
2719                        init_state      = IOMMU_IVRS_DETECTED;
2720                }
2721                break;
2722        case IOMMU_IVRS_DETECTED:
2723                ret = early_amd_iommu_init();
2724                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2725                if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2726                        pr_info("AMD IOMMU disabled on kernel command-line\n");
2727                        init_state = IOMMU_CMDLINE_DISABLED;
2728                        ret = -EINVAL;
2729                }
2730                break;
2731        case IOMMU_ACPI_FINISHED:
2732                early_enable_iommus();
2733                x86_platform.iommu_shutdown = disable_iommus;
2734                init_state = IOMMU_ENABLED;
2735                break;
2736        case IOMMU_ENABLED:
2737                register_syscore_ops(&amd_iommu_syscore_ops);
2738                ret = amd_iommu_init_pci();
2739                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2740                enable_iommus_v2();
2741                break;
2742        case IOMMU_PCI_INIT:
2743                ret = amd_iommu_enable_interrupts();
2744                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2745                break;
2746        case IOMMU_INTERRUPTS_EN:
2747                ret = amd_iommu_init_dma_ops();
2748                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2749                break;
2750        case IOMMU_DMA_OPS:
2751                init_state = IOMMU_INITIALIZED;
2752                break;
2753        case IOMMU_INITIALIZED:
2754                /* Nothing to do */
2755                break;
2756        case IOMMU_NOT_FOUND:
2757        case IOMMU_INIT_ERROR:
2758        case IOMMU_CMDLINE_DISABLED:
2759                /* Error states => do nothing */
2760                ret = -EINVAL;
2761                break;
2762        default:
2763                /* Unknown state */
2764                BUG();
2765        }
2766
2767        if (ret) {
2768                free_dma_resources();
2769                if (!irq_remapping_enabled) {
2770                        disable_iommus();
2771                        free_iommu_resources();
2772                } else {
2773                        struct amd_iommu *iommu;
2774
2775                        uninit_device_table_dma();
2776                        for_each_iommu(iommu)
2777                                iommu_flush_all_caches(iommu);
2778                }
2779        }
2780        return ret;
2781}
2782
2783static int __init iommu_go_to_state(enum iommu_init_state state)
2784{
2785        int ret = -EINVAL;
2786
2787        while (init_state != state) {
2788                if (init_state == IOMMU_NOT_FOUND         ||
2789                    init_state == IOMMU_INIT_ERROR        ||
2790                    init_state == IOMMU_CMDLINE_DISABLED)
2791                        break;
2792                ret = state_next();
2793        }
2794
2795        return ret;
2796}
2797
2798#ifdef CONFIG_IRQ_REMAP
2799int __init amd_iommu_prepare(void)
2800{
2801        int ret;
2802
2803        amd_iommu_irq_remap = true;
2804
2805        ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
2806        if (ret)
2807                return ret;
2808        return amd_iommu_irq_remap ? 0 : -ENODEV;
2809}
2810
2811int __init amd_iommu_enable(void)
2812{
2813        int ret;
2814
2815        ret = iommu_go_to_state(IOMMU_ENABLED);
2816        if (ret)
2817                return ret;
2818
2819        irq_remapping_enabled = 1;
2820        return amd_iommu_xt_mode;
2821}
2822
2823void amd_iommu_disable(void)
2824{
2825        amd_iommu_suspend();
2826}
2827
2828int amd_iommu_reenable(int mode)
2829{
2830        amd_iommu_resume();
2831
2832        return 0;
2833}
2834
2835int __init amd_iommu_enable_faulting(void)
2836{
2837        /* We enable MSI later when PCI is initialized */
2838        return 0;
2839}
2840#endif
2841
2842/*
2843 * This is the core init function for AMD IOMMU hardware in the system.
2844 * This function is called from the generic x86 DMA layer initialization
2845 * code.
2846 */
2847static int __init amd_iommu_init(void)
2848{
2849        struct amd_iommu *iommu;
2850        int ret;
2851
2852        ret = iommu_go_to_state(IOMMU_INITIALIZED);
2853#ifdef CONFIG_GART_IOMMU
2854        if (ret && list_empty(&amd_iommu_list)) {
2855                /*
2856                 * We failed to initialize the AMD IOMMU - try fallback
2857                 * to GART if possible.
2858                 */
2859                gart_iommu_init();
2860        }
2861#endif
2862
2863        for_each_iommu(iommu)
2864                amd_iommu_debugfs_setup(iommu);
2865
2866        return ret;
2867}
2868
2869static bool amd_iommu_sme_check(void)
2870{
2871        if (!sme_active() || (boot_cpu_data.x86 != 0x17))
2872                return true;
2873
2874        /* For Fam17h, a specific level of support is required */
2875        if (boot_cpu_data.microcode >= 0x08001205)
2876                return true;
2877
2878        if ((boot_cpu_data.microcode >= 0x08001126) &&
2879            (boot_cpu_data.microcode <= 0x080011ff))
2880                return true;
2881
2882        pr_notice("IOMMU not currently supported when SME is active\n");
2883
2884        return false;
2885}
2886
2887/****************************************************************************
2888 *
2889 * Early detect code. This code runs at IOMMU detection time in the DMA
2890 * layer. It just looks if there is an IVRS ACPI table to detect AMD
2891 * IOMMUs
2892 *
2893 ****************************************************************************/
2894int __init amd_iommu_detect(void)
2895{
2896        int ret;
2897
2898        if (no_iommu || (iommu_detected && !gart_iommu_aperture))
2899                return -ENODEV;
2900
2901        if (!amd_iommu_sme_check())
2902                return -ENODEV;
2903
2904        ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
2905        if (ret)
2906                return ret;
2907
2908        amd_iommu_detected = true;
2909        iommu_detected = 1;
2910        x86_init.iommu.iommu_init = amd_iommu_init;
2911
2912        return 1;
2913}
2914
2915/****************************************************************************
2916 *
2917 * Parsing functions for the AMD IOMMU specific kernel command line
2918 * options.
2919 *
2920 ****************************************************************************/
2921
2922static int __init parse_amd_iommu_dump(char *str)
2923{
2924        amd_iommu_dump = true;
2925
2926        return 1;
2927}
2928
2929static int __init parse_amd_iommu_intr(char *str)
2930{
2931        for (; *str; ++str) {
2932                if (strncmp(str, "legacy", 6) == 0) {
2933                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
2934                        break;
2935                }
2936                if (strncmp(str, "vapic", 5) == 0) {
2937                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
2938                        break;
2939                }
2940        }
2941        return 1;
2942}
2943
2944static int __init parse_amd_iommu_options(char *str)
2945{
2946        for (; *str; ++str) {
2947                if (strncmp(str, "fullflush", 9) == 0)
2948                        amd_iommu_unmap_flush = true;
2949                if (strncmp(str, "off", 3) == 0)
2950                        amd_iommu_disabled = true;
2951                if (strncmp(str, "force_isolation", 15) == 0)
2952                        amd_iommu_force_isolation = true;
2953        }
2954
2955        return 1;
2956}
2957
2958static int __init parse_ivrs_ioapic(char *str)
2959{
2960        unsigned int bus, dev, fn;
2961        int ret, id, i;
2962        u16 devid;
2963
2964        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2965
2966        if (ret != 4) {
2967                pr_err("Invalid command line: ivrs_ioapic%s\n", str);
2968                return 1;
2969        }
2970
2971        if (early_ioapic_map_size == EARLY_MAP_SIZE) {
2972                pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
2973                        str);
2974                return 1;
2975        }
2976
2977        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2978
2979        cmdline_maps                    = true;
2980        i                               = early_ioapic_map_size++;
2981        early_ioapic_map[i].id          = id;
2982        early_ioapic_map[i].devid       = devid;
2983        early_ioapic_map[i].cmd_line    = true;
2984
2985        return 1;
2986}
2987
2988static int __init parse_ivrs_hpet(char *str)
2989{
2990        unsigned int bus, dev, fn;
2991        int ret, id, i;
2992        u16 devid;
2993
2994        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2995
2996        if (ret != 4) {
2997                pr_err("Invalid command line: ivrs_hpet%s\n", str);
2998                return 1;
2999        }
3000
3001        if (early_hpet_map_size == EARLY_MAP_SIZE) {
3002                pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3003                        str);
3004                return 1;
3005        }
3006
3007        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3008
3009        cmdline_maps                    = true;
3010        i                               = early_hpet_map_size++;
3011        early_hpet_map[i].id            = id;
3012        early_hpet_map[i].devid         = devid;
3013        early_hpet_map[i].cmd_line      = true;
3014
3015        return 1;
3016}
3017
3018static int __init parse_ivrs_acpihid(char *str)
3019{
3020        u32 bus, dev, fn;
3021        char *hid, *uid, *p;
3022        char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3023        int ret, i;
3024
3025        ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
3026        if (ret != 4) {
3027                pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
3028                return 1;
3029        }
3030
3031        p = acpiid;
3032        hid = strsep(&p, ":");
3033        uid = p;
3034
3035        if (!hid || !(*hid) || !uid) {
3036                pr_err("Invalid command line: hid or uid\n");
3037                return 1;
3038        }
3039
3040        i = early_acpihid_map_size++;
3041        memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3042        memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3043        early_acpihid_map[i].devid =
3044                ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3045        early_acpihid_map[i].cmd_line   = true;
3046
3047        return 1;
3048}
3049
3050__setup("amd_iommu_dump",       parse_amd_iommu_dump);
3051__setup("amd_iommu=",           parse_amd_iommu_options);
3052__setup("amd_iommu_intr=",      parse_amd_iommu_intr);
3053__setup("ivrs_ioapic",          parse_ivrs_ioapic);
3054__setup("ivrs_hpet",            parse_ivrs_hpet);
3055__setup("ivrs_acpihid",         parse_ivrs_acpihid);
3056
3057IOMMU_INIT_FINISH(amd_iommu_detect,
3058                  gart_iommu_hole_init,
3059                  NULL,
3060                  NULL);
3061
3062bool amd_iommu_v2_supported(void)
3063{
3064        return amd_iommu_v2_present;
3065}
3066EXPORT_SYMBOL(amd_iommu_v2_supported);
3067
3068struct amd_iommu *get_amd_iommu(unsigned int idx)
3069{
3070        unsigned int i = 0;
3071        struct amd_iommu *iommu;
3072
3073        for_each_iommu(iommu)
3074                if (i++ == idx)
3075                        return iommu;
3076        return NULL;
3077}
3078EXPORT_SYMBOL(get_amd_iommu);
3079
3080/****************************************************************************
3081 *
3082 * IOMMU EFR Performance Counter support functionality. This code allows
3083 * access to the IOMMU PC functionality.
3084 *
3085 ****************************************************************************/
3086
3087u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3088{
3089        struct amd_iommu *iommu = get_amd_iommu(idx);
3090
3091        if (iommu)
3092                return iommu->max_banks;
3093
3094        return 0;
3095}
3096EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3097
3098bool amd_iommu_pc_supported(void)
3099{
3100        return amd_iommu_pc_present;
3101}
3102EXPORT_SYMBOL(amd_iommu_pc_supported);
3103
3104u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3105{
3106        struct amd_iommu *iommu = get_amd_iommu(idx);
3107
3108        if (iommu)
3109                return iommu->max_counters;
3110
3111        return 0;
3112}
3113EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3114
3115static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3116                                u8 fxn, u64 *value, bool is_write)
3117{
3118        u32 offset;
3119        u32 max_offset_lim;
3120
3121        /* Make sure the IOMMU PC resource is available */
3122        if (!amd_iommu_pc_present)
3123                return -ENODEV;
3124
3125        /* Check for valid iommu and pc register indexing */
3126        if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3127                return -ENODEV;
3128
3129        offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3130
3131        /* Limit the offset to the hw defined mmio region aperture */
3132        max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3133                                (iommu->max_counters << 8) | 0x28);
3134        if ((offset < MMIO_CNTR_REG_OFFSET) ||
3135            (offset > max_offset_lim))
3136                return -EINVAL;
3137
3138        if (is_write) {
3139                u64 val = *value & GENMASK_ULL(47, 0);
3140
3141                writel((u32)val, iommu->mmio_base + offset);
3142                writel((val >> 32), iommu->mmio_base + offset + 4);
3143        } else {
3144                *value = readl(iommu->mmio_base + offset + 4);
3145                *value <<= 32;
3146                *value |= readl(iommu->mmio_base + offset);
3147                *value &= GENMASK_ULL(47, 0);
3148        }
3149
3150        return 0;
3151}
3152
3153int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3154{
3155        if (!iommu)
3156                return -EINVAL;
3157
3158        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3159}
3160EXPORT_SYMBOL(amd_iommu_pc_get_reg);
3161
3162int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3163{
3164        if (!iommu)
3165                return -EINVAL;
3166
3167        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3168}
3169EXPORT_SYMBOL(amd_iommu_pc_set_reg);
3170