linux/drivers/iommu/amd_iommu_init.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   3 * Author: Joerg Roedel <jroedel@suse.de>
   4 *         Leo Duran <leo.duran@amd.com>
   5 *
   6 * This program is free software; you can redistribute it and/or modify it
   7 * under the terms of the GNU General Public License version 2 as published
   8 * by the Free Software Foundation.
   9 *
  10 * This program is distributed in the hope that it will be useful,
  11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 * GNU General Public License for more details.
  14 *
  15 * You should have received a copy of the GNU General Public License
  16 * along with this program; if not, write to the Free Software
  17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  18 */
  19
  20#include <linux/pci.h>
  21#include <linux/acpi.h>
  22#include <linux/list.h>
  23#include <linux/bitmap.h>
  24#include <linux/slab.h>
  25#include <linux/syscore_ops.h>
  26#include <linux/interrupt.h>
  27#include <linux/msi.h>
  28#include <linux/amd-iommu.h>
  29#include <linux/export.h>
  30#include <linux/iommu.h>
  31#include <linux/kmemleak.h>
  32#include <linux/mem_encrypt.h>
  33#include <asm/pci-direct.h>
  34#include <asm/iommu.h>
  35#include <asm/gart.h>
  36#include <asm/x86_init.h>
  37#include <asm/iommu_table.h>
  38#include <asm/io_apic.h>
  39#include <asm/irq_remapping.h>
  40
  41#include <linux/crash_dump.h>
  42#include "amd_iommu_proto.h"
  43#include "amd_iommu_types.h"
  44#include "irq_remapping.h"
  45
  46/*
  47 * definitions for the ACPI scanning code
  48 */
  49#define IVRS_HEADER_LENGTH 48
  50
  51#define ACPI_IVHD_TYPE_MAX_SUPPORTED    0x40
  52#define ACPI_IVMD_TYPE_ALL              0x20
  53#define ACPI_IVMD_TYPE                  0x21
  54#define ACPI_IVMD_TYPE_RANGE            0x22
  55
  56#define IVHD_DEV_ALL                    0x01
  57#define IVHD_DEV_SELECT                 0x02
  58#define IVHD_DEV_SELECT_RANGE_START     0x03
  59#define IVHD_DEV_RANGE_END              0x04
  60#define IVHD_DEV_ALIAS                  0x42
  61#define IVHD_DEV_ALIAS_RANGE            0x43
  62#define IVHD_DEV_EXT_SELECT             0x46
  63#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  64#define IVHD_DEV_SPECIAL                0x48
  65#define IVHD_DEV_ACPI_HID               0xf0
  66
  67#define UID_NOT_PRESENT                 0
  68#define UID_IS_INTEGER                  1
  69#define UID_IS_CHARACTER                2
  70
  71#define IVHD_SPECIAL_IOAPIC             1
  72#define IVHD_SPECIAL_HPET               2
  73
  74#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  75#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  76#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  77#define IVHD_FLAG_ISOC_EN_MASK          0x08
  78
  79#define IVMD_FLAG_EXCL_RANGE            0x08
  80#define IVMD_FLAG_UNITY_MAP             0x01
  81
  82#define ACPI_DEVFLAG_INITPASS           0x01
  83#define ACPI_DEVFLAG_EXTINT             0x02
  84#define ACPI_DEVFLAG_NMI                0x04
  85#define ACPI_DEVFLAG_SYSMGT1            0x10
  86#define ACPI_DEVFLAG_SYSMGT2            0x20
  87#define ACPI_DEVFLAG_LINT0              0x40
  88#define ACPI_DEVFLAG_LINT1              0x80
  89#define ACPI_DEVFLAG_ATSDIS             0x10000000
  90
  91#define LOOP_TIMEOUT    100000
  92/*
  93 * ACPI table definitions
  94 *
  95 * These data structures are laid over the table to parse the important values
  96 * out of it.
  97 */
  98
  99extern const struct iommu_ops amd_iommu_ops;
 100
 101/*
 102 * structure describing one IOMMU in the ACPI table. Typically followed by one
 103 * or more ivhd_entrys.
 104 */
 105struct ivhd_header {
 106        u8 type;
 107        u8 flags;
 108        u16 length;
 109        u16 devid;
 110        u16 cap_ptr;
 111        u64 mmio_phys;
 112        u16 pci_seg;
 113        u16 info;
 114        u32 efr_attr;
 115
 116        /* Following only valid on IVHD type 11h and 40h */
 117        u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 118        u64 res;
 119} __attribute__((packed));
 120
 121/*
 122 * A device entry describing which devices a specific IOMMU translates and
 123 * which requestor ids they use.
 124 */
 125struct ivhd_entry {
 126        u8 type;
 127        u16 devid;
 128        u8 flags;
 129        u32 ext;
 130        u32 hidh;
 131        u64 cid;
 132        u8 uidf;
 133        u8 uidl;
 134        u8 uid;
 135} __attribute__((packed));
 136
 137/*
 138 * An AMD IOMMU memory definition structure. It defines things like exclusion
 139 * ranges for devices and regions that should be unity mapped.
 140 */
 141struct ivmd_header {
 142        u8 type;
 143        u8 flags;
 144        u16 length;
 145        u16 devid;
 146        u16 aux;
 147        u64 resv;
 148        u64 range_start;
 149        u64 range_length;
 150} __attribute__((packed));
 151
 152bool amd_iommu_dump;
 153bool amd_iommu_irq_remap __read_mostly;
 154
 155int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 156static int amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
 157
 158static bool amd_iommu_detected;
 159static bool __initdata amd_iommu_disabled;
 160static int amd_iommu_target_ivhd_type;
 161
 162u16 amd_iommu_last_bdf;                 /* largest PCI device id we have
 163                                           to handle */
 164LIST_HEAD(amd_iommu_unity_map);         /* a list of required unity mappings
 165                                           we find in ACPI */
 166bool amd_iommu_unmap_flush;             /* if true, flush on every unmap */
 167
 168LIST_HEAD(amd_iommu_list);              /* list of all AMD IOMMUs in the
 169                                           system */
 170
 171/* Array to assign indices to IOMMUs*/
 172struct amd_iommu *amd_iommus[MAX_IOMMUS];
 173
 174/* Number of IOMMUs present in the system */
 175static int amd_iommus_present;
 176
 177/* IOMMUs have a non-present cache? */
 178bool amd_iommu_np_cache __read_mostly;
 179bool amd_iommu_iotlb_sup __read_mostly = true;
 180
 181u32 amd_iommu_max_pasid __read_mostly = ~0;
 182
 183bool amd_iommu_v2_present __read_mostly;
 184static bool amd_iommu_pc_present __read_mostly;
 185
 186bool amd_iommu_force_isolation __read_mostly;
 187
 188/*
 189 * List of protection domains - used during resume
 190 */
 191LIST_HEAD(amd_iommu_pd_list);
 192spinlock_t amd_iommu_pd_lock;
 193
 194/*
 195 * Pointer to the device table which is shared by all AMD IOMMUs
 196 * it is indexed by the PCI device id or the HT unit id and contains
 197 * information about the domain the device belongs to as well as the
 198 * page table root pointer.
 199 */
 200struct dev_table_entry *amd_iommu_dev_table;
 201/*
 202 * Pointer to a device table which the content of old device table
 203 * will be copied to. It's only be used in kdump kernel.
 204 */
 205static struct dev_table_entry *old_dev_tbl_cpy;
 206
 207/*
 208 * The alias table is a driver specific data structure which contains the
 209 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
 210 * More than one device can share the same requestor id.
 211 */
 212u16 *amd_iommu_alias_table;
 213
 214/*
 215 * The rlookup table is used to find the IOMMU which is responsible
 216 * for a specific device. It is also indexed by the PCI device id.
 217 */
 218struct amd_iommu **amd_iommu_rlookup_table;
 219EXPORT_SYMBOL(amd_iommu_rlookup_table);
 220
 221/*
 222 * This table is used to find the irq remapping table for a given device id
 223 * quickly.
 224 */
 225struct irq_remap_table **irq_lookup_table;
 226
 227/*
 228 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
 229 * to know which ones are already in use.
 230 */
 231unsigned long *amd_iommu_pd_alloc_bitmap;
 232
 233static u32 dev_table_size;      /* size of the device table */
 234static u32 alias_table_size;    /* size of the alias table */
 235static u32 rlookup_table_size;  /* size if the rlookup table */
 236
 237enum iommu_init_state {
 238        IOMMU_START_STATE,
 239        IOMMU_IVRS_DETECTED,
 240        IOMMU_ACPI_FINISHED,
 241        IOMMU_ENABLED,
 242        IOMMU_PCI_INIT,
 243        IOMMU_INTERRUPTS_EN,
 244        IOMMU_DMA_OPS,
 245        IOMMU_INITIALIZED,
 246        IOMMU_NOT_FOUND,
 247        IOMMU_INIT_ERROR,
 248        IOMMU_CMDLINE_DISABLED,
 249};
 250
 251/* Early ioapic and hpet maps from kernel command line */
 252#define EARLY_MAP_SIZE          4
 253static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 254static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 255static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 256
 257static int __initdata early_ioapic_map_size;
 258static int __initdata early_hpet_map_size;
 259static int __initdata early_acpihid_map_size;
 260
 261static bool __initdata cmdline_maps;
 262
 263static enum iommu_init_state init_state = IOMMU_START_STATE;
 264
 265static int amd_iommu_enable_interrupts(void);
 266static int __init iommu_go_to_state(enum iommu_init_state state);
 267static void init_device_table_dma(void);
 268
 269static bool amd_iommu_pre_enabled = true;
 270
 271bool translation_pre_enabled(struct amd_iommu *iommu)
 272{
 273        return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 274}
 275EXPORT_SYMBOL(translation_pre_enabled);
 276
 277static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 278{
 279        iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 280}
 281
 282static void init_translation_status(struct amd_iommu *iommu)
 283{
 284        u64 ctrl;
 285
 286        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 287        if (ctrl & (1<<CONTROL_IOMMU_EN))
 288                iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 289}
 290
 291static inline void update_last_devid(u16 devid)
 292{
 293        if (devid > amd_iommu_last_bdf)
 294                amd_iommu_last_bdf = devid;
 295}
 296
 297static inline unsigned long tbl_size(int entry_size)
 298{
 299        unsigned shift = PAGE_SHIFT +
 300                         get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 301
 302        return 1UL << shift;
 303}
 304
 305int amd_iommu_get_num_iommus(void)
 306{
 307        return amd_iommus_present;
 308}
 309
 310/* Access to l1 and l2 indexed register spaces */
 311
 312static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 313{
 314        u32 val;
 315
 316        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 317        pci_read_config_dword(iommu->dev, 0xfc, &val);
 318        return val;
 319}
 320
 321static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 322{
 323        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 324        pci_write_config_dword(iommu->dev, 0xfc, val);
 325        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 326}
 327
 328static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 329{
 330        u32 val;
 331
 332        pci_write_config_dword(iommu->dev, 0xf0, address);
 333        pci_read_config_dword(iommu->dev, 0xf4, &val);
 334        return val;
 335}
 336
 337static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 338{
 339        pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 340        pci_write_config_dword(iommu->dev, 0xf4, val);
 341}
 342
 343/****************************************************************************
 344 *
 345 * AMD IOMMU MMIO register space handling functions
 346 *
 347 * These functions are used to program the IOMMU device registers in
 348 * MMIO space required for that driver.
 349 *
 350 ****************************************************************************/
 351
 352/*
 353 * This function set the exclusion range in the IOMMU. DMA accesses to the
 354 * exclusion range are passed through untranslated
 355 */
 356static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 357{
 358        u64 start = iommu->exclusion_start & PAGE_MASK;
 359        u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
 360        u64 entry;
 361
 362        if (!iommu->exclusion_start)
 363                return;
 364
 365        entry = start | MMIO_EXCL_ENABLE_MASK;
 366        memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 367                        &entry, sizeof(entry));
 368
 369        entry = limit;
 370        memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 371                        &entry, sizeof(entry));
 372}
 373
 374/* Programs the physical address of the device table into the IOMMU hardware */
 375static void iommu_set_device_table(struct amd_iommu *iommu)
 376{
 377        u64 entry;
 378
 379        BUG_ON(iommu->mmio_base == NULL);
 380
 381        entry = iommu_virt_to_phys(amd_iommu_dev_table);
 382        entry |= (dev_table_size >> 12) - 1;
 383        memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 384                        &entry, sizeof(entry));
 385}
 386
 387/* Generic functions to enable/disable certain features of the IOMMU. */
 388static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 389{
 390        u64 ctrl;
 391
 392        ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 393        ctrl |= (1ULL << bit);
 394        writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 395}
 396
 397static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 398{
 399        u64 ctrl;
 400
 401        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 402        ctrl &= ~(1ULL << bit);
 403        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 404}
 405
 406static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 407{
 408        u64 ctrl;
 409
 410        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 411        ctrl &= ~CTRL_INV_TO_MASK;
 412        ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 413        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 414}
 415
 416/* Function to enable the hardware */
 417static void iommu_enable(struct amd_iommu *iommu)
 418{
 419        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 420}
 421
 422static void iommu_disable(struct amd_iommu *iommu)
 423{
 424        /* Disable command buffer */
 425        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 426
 427        /* Disable event logging and event interrupts */
 428        iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 429        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 430
 431        /* Disable IOMMU GA_LOG */
 432        iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 433        iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 434
 435        /* Disable IOMMU hardware itself */
 436        iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 437}
 438
 439/*
 440 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 441 * the system has one.
 442 */
 443static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 444{
 445        if (!request_mem_region(address, end, "amd_iommu")) {
 446                pr_err("AMD-Vi: Can not reserve memory region %llx-%llx for mmio\n",
 447                        address, end);
 448                pr_err("AMD-Vi: This is a BIOS bug. Please contact your hardware vendor\n");
 449                return NULL;
 450        }
 451
 452        return (u8 __iomem *)ioremap_nocache(address, end);
 453}
 454
 455static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 456{
 457        if (iommu->mmio_base)
 458                iounmap(iommu->mmio_base);
 459        release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 460}
 461
 462static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 463{
 464        u32 size = 0;
 465
 466        switch (h->type) {
 467        case 0x10:
 468                size = 24;
 469                break;
 470        case 0x11:
 471        case 0x40:
 472                size = 40;
 473                break;
 474        }
 475        return size;
 476}
 477
 478/****************************************************************************
 479 *
 480 * The functions below belong to the first pass of AMD IOMMU ACPI table
 481 * parsing. In this pass we try to find out the highest device id this
 482 * code has to handle. Upon this information the size of the shared data
 483 * structures is determined later.
 484 *
 485 ****************************************************************************/
 486
 487/*
 488 * This function calculates the length of a given IVHD entry
 489 */
 490static inline int ivhd_entry_length(u8 *ivhd)
 491{
 492        u32 type = ((struct ivhd_entry *)ivhd)->type;
 493
 494        if (type < 0x80) {
 495                return 0x04 << (*ivhd >> 6);
 496        } else if (type == IVHD_DEV_ACPI_HID) {
 497                /* For ACPI_HID, offset 21 is uid len */
 498                return *((u8 *)ivhd + 21) + 22;
 499        }
 500        return 0;
 501}
 502
 503/*
 504 * After reading the highest device id from the IOMMU PCI capability header
 505 * this function looks if there is a higher device id defined in the ACPI table
 506 */
 507static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 508{
 509        u8 *p = (void *)h, *end = (void *)h;
 510        struct ivhd_entry *dev;
 511
 512        u32 ivhd_size = get_ivhd_header_size(h);
 513
 514        if (!ivhd_size) {
 515                pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
 516                return -EINVAL;
 517        }
 518
 519        p += ivhd_size;
 520        end += h->length;
 521
 522        while (p < end) {
 523                dev = (struct ivhd_entry *)p;
 524                switch (dev->type) {
 525                case IVHD_DEV_ALL:
 526                        /* Use maximum BDF value for DEV_ALL */
 527                        update_last_devid(0xffff);
 528                        break;
 529                case IVHD_DEV_SELECT:
 530                case IVHD_DEV_RANGE_END:
 531                case IVHD_DEV_ALIAS:
 532                case IVHD_DEV_EXT_SELECT:
 533                        /* all the above subfield types refer to device ids */
 534                        update_last_devid(dev->devid);
 535                        break;
 536                default:
 537                        break;
 538                }
 539                p += ivhd_entry_length(p);
 540        }
 541
 542        WARN_ON(p != end);
 543
 544        return 0;
 545}
 546
 547static int __init check_ivrs_checksum(struct acpi_table_header *table)
 548{
 549        int i;
 550        u8 checksum = 0, *p = (u8 *)table;
 551
 552        for (i = 0; i < table->length; ++i)
 553                checksum += p[i];
 554        if (checksum != 0) {
 555                /* ACPI table corrupt */
 556                pr_err(FW_BUG "AMD-Vi: IVRS invalid checksum\n");
 557                return -ENODEV;
 558        }
 559
 560        return 0;
 561}
 562
 563/*
 564 * Iterate over all IVHD entries in the ACPI table and find the highest device
 565 * id which we need to handle. This is the first of three functions which parse
 566 * the ACPI table. So we check the checksum here.
 567 */
 568static int __init find_last_devid_acpi(struct acpi_table_header *table)
 569{
 570        u8 *p = (u8 *)table, *end = (u8 *)table;
 571        struct ivhd_header *h;
 572
 573        p += IVRS_HEADER_LENGTH;
 574
 575        end += table->length;
 576        while (p < end) {
 577                h = (struct ivhd_header *)p;
 578                if (h->type == amd_iommu_target_ivhd_type) {
 579                        int ret = find_last_devid_from_ivhd(h);
 580
 581                        if (ret)
 582                                return ret;
 583                }
 584                p += h->length;
 585        }
 586        WARN_ON(p != end);
 587
 588        return 0;
 589}
 590
 591/****************************************************************************
 592 *
 593 * The following functions belong to the code path which parses the ACPI table
 594 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 595 * data structures, initialize the device/alias/rlookup table and also
 596 * basically initialize the hardware.
 597 *
 598 ****************************************************************************/
 599
 600/*
 601 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 602 * write commands to that buffer later and the IOMMU will execute them
 603 * asynchronously
 604 */
 605static int __init alloc_command_buffer(struct amd_iommu *iommu)
 606{
 607        iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 608                                                  get_order(CMD_BUFFER_SIZE));
 609
 610        return iommu->cmd_buf ? 0 : -ENOMEM;
 611}
 612
 613/*
 614 * This function resets the command buffer if the IOMMU stopped fetching
 615 * commands from it.
 616 */
 617void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 618{
 619        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 620
 621        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 622        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 623        iommu->cmd_buf_head = 0;
 624        iommu->cmd_buf_tail = 0;
 625
 626        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 627}
 628
 629/*
 630 * This function writes the command buffer address to the hardware and
 631 * enables it.
 632 */
 633static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 634{
 635        u64 entry;
 636
 637        BUG_ON(iommu->cmd_buf == NULL);
 638
 639        entry = iommu_virt_to_phys(iommu->cmd_buf);
 640        entry |= MMIO_CMD_SIZE_512;
 641
 642        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 643                    &entry, sizeof(entry));
 644
 645        amd_iommu_reset_cmd_buffer(iommu);
 646}
 647
 648/*
 649 * This function disables the command buffer
 650 */
 651static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 652{
 653        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 654}
 655
 656static void __init free_command_buffer(struct amd_iommu *iommu)
 657{
 658        free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 659}
 660
 661/* allocates the memory where the IOMMU will log its events to */
 662static int __init alloc_event_buffer(struct amd_iommu *iommu)
 663{
 664        iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 665                                                  get_order(EVT_BUFFER_SIZE));
 666
 667        return iommu->evt_buf ? 0 : -ENOMEM;
 668}
 669
 670static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 671{
 672        u64 entry;
 673
 674        BUG_ON(iommu->evt_buf == NULL);
 675
 676        entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 677
 678        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 679                    &entry, sizeof(entry));
 680
 681        /* set head and tail to zero manually */
 682        writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 683        writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 684
 685        iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 686}
 687
 688/*
 689 * This function disables the event log buffer
 690 */
 691static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 692{
 693        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 694}
 695
 696static void __init free_event_buffer(struct amd_iommu *iommu)
 697{
 698        free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 699}
 700
 701/* allocates the memory where the IOMMU will log its events to */
 702static int __init alloc_ppr_log(struct amd_iommu *iommu)
 703{
 704        iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 705                                                  get_order(PPR_LOG_SIZE));
 706
 707        return iommu->ppr_log ? 0 : -ENOMEM;
 708}
 709
 710static void iommu_enable_ppr_log(struct amd_iommu *iommu)
 711{
 712        u64 entry;
 713
 714        if (iommu->ppr_log == NULL)
 715                return;
 716
 717        entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
 718
 719        memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
 720                    &entry, sizeof(entry));
 721
 722        /* set head and tail to zero manually */
 723        writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 724        writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 725
 726        iommu_feature_enable(iommu, CONTROL_PPFLOG_EN);
 727        iommu_feature_enable(iommu, CONTROL_PPR_EN);
 728}
 729
 730static void __init free_ppr_log(struct amd_iommu *iommu)
 731{
 732        if (iommu->ppr_log == NULL)
 733                return;
 734
 735        free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
 736}
 737
 738static void free_ga_log(struct amd_iommu *iommu)
 739{
 740#ifdef CONFIG_IRQ_REMAP
 741        if (iommu->ga_log)
 742                free_pages((unsigned long)iommu->ga_log,
 743                            get_order(GA_LOG_SIZE));
 744        if (iommu->ga_log_tail)
 745                free_pages((unsigned long)iommu->ga_log_tail,
 746                            get_order(8));
 747#endif
 748}
 749
 750static int iommu_ga_log_enable(struct amd_iommu *iommu)
 751{
 752#ifdef CONFIG_IRQ_REMAP
 753        u32 status, i;
 754
 755        if (!iommu->ga_log)
 756                return -EINVAL;
 757
 758        status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 759
 760        /* Check if already running */
 761        if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 762                return 0;
 763
 764        iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 765        iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 766
 767        for (i = 0; i < LOOP_TIMEOUT; ++i) {
 768                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 769                if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 770                        break;
 771        }
 772
 773        if (i >= LOOP_TIMEOUT)
 774                return -EINVAL;
 775#endif /* CONFIG_IRQ_REMAP */
 776        return 0;
 777}
 778
 779#ifdef CONFIG_IRQ_REMAP
 780static int iommu_init_ga_log(struct amd_iommu *iommu)
 781{
 782        u64 entry;
 783
 784        if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 785                return 0;
 786
 787        iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 788                                        get_order(GA_LOG_SIZE));
 789        if (!iommu->ga_log)
 790                goto err_out;
 791
 792        iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 793                                        get_order(8));
 794        if (!iommu->ga_log_tail)
 795                goto err_out;
 796
 797        entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 798        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 799                    &entry, sizeof(entry));
 800        entry = (iommu_virt_to_phys(iommu->ga_log) & 0xFFFFFFFFFFFFFULL) & ~7ULL;
 801        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 802                    &entry, sizeof(entry));
 803        writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 804        writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 805
 806        return 0;
 807err_out:
 808        free_ga_log(iommu);
 809        return -EINVAL;
 810}
 811#endif /* CONFIG_IRQ_REMAP */
 812
 813static int iommu_init_ga(struct amd_iommu *iommu)
 814{
 815        int ret = 0;
 816
 817#ifdef CONFIG_IRQ_REMAP
 818        /* Note: We have already checked GASup from IVRS table.
 819         *       Now, we need to make sure that GAMSup is set.
 820         */
 821        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
 822            !iommu_feature(iommu, FEATURE_GAM_VAPIC))
 823                amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
 824
 825        ret = iommu_init_ga_log(iommu);
 826#endif /* CONFIG_IRQ_REMAP */
 827
 828        return ret;
 829}
 830
 831static void iommu_enable_xt(struct amd_iommu *iommu)
 832{
 833#ifdef CONFIG_IRQ_REMAP
 834        /*
 835         * XT mode (32-bit APIC destination ID) requires
 836         * GA mode (128-bit IRTE support) as a prerequisite.
 837         */
 838        if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 839            amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 840                iommu_feature_enable(iommu, CONTROL_XT_EN);
 841#endif /* CONFIG_IRQ_REMAP */
 842}
 843
 844static void iommu_enable_gt(struct amd_iommu *iommu)
 845{
 846        if (!iommu_feature(iommu, FEATURE_GT))
 847                return;
 848
 849        iommu_feature_enable(iommu, CONTROL_GT_EN);
 850}
 851
 852/* sets a specific bit in the device table entry. */
 853static void set_dev_entry_bit(u16 devid, u8 bit)
 854{
 855        int i = (bit >> 6) & 0x03;
 856        int _bit = bit & 0x3f;
 857
 858        amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 859}
 860
 861static int get_dev_entry_bit(u16 devid, u8 bit)
 862{
 863        int i = (bit >> 6) & 0x03;
 864        int _bit = bit & 0x3f;
 865
 866        return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 867}
 868
 869
 870static bool copy_device_table(void)
 871{
 872        u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
 873        struct dev_table_entry *old_devtb = NULL;
 874        u32 lo, hi, devid, old_devtb_size;
 875        phys_addr_t old_devtb_phys;
 876        struct amd_iommu *iommu;
 877        u16 dom_id, dte_v, irq_v;
 878        gfp_t gfp_flag;
 879        u64 tmp;
 880
 881        if (!amd_iommu_pre_enabled)
 882                return false;
 883
 884        pr_warn("Translation is already enabled - trying to copy translation structures\n");
 885        for_each_iommu(iommu) {
 886                /* All IOMMUs should use the same device table with the same size */
 887                lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
 888                hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
 889                entry = (((u64) hi) << 32) + lo;
 890                if (last_entry && last_entry != entry) {
 891                        pr_err("IOMMU:%d should use the same dev table as others!\n",
 892                                iommu->index);
 893                        return false;
 894                }
 895                last_entry = entry;
 896
 897                old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
 898                if (old_devtb_size != dev_table_size) {
 899                        pr_err("The device table size of IOMMU:%d is not expected!\n",
 900                                iommu->index);
 901                        return false;
 902                }
 903        }
 904
 905        old_devtb_phys = entry & PAGE_MASK;
 906        if (old_devtb_phys >= 0x100000000ULL) {
 907                pr_err("The address of old device table is above 4G, not trustworthy!\n");
 908                return false;
 909        }
 910        old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
 911        if (!old_devtb)
 912                return false;
 913
 914        gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
 915        old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
 916                                get_order(dev_table_size));
 917        if (old_dev_tbl_cpy == NULL) {
 918                pr_err("Failed to allocate memory for copying old device table!\n");
 919                return false;
 920        }
 921
 922        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 923                old_dev_tbl_cpy[devid] = old_devtb[devid];
 924                dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
 925                dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
 926
 927                if (dte_v && dom_id) {
 928                        old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
 929                        old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
 930                        __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 931                        /* If gcr3 table existed, mask it out */
 932                        if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
 933                                tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
 934                                tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
 935                                old_dev_tbl_cpy[devid].data[1] &= ~tmp;
 936                                tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
 937                                tmp |= DTE_FLAG_GV;
 938                                old_dev_tbl_cpy[devid].data[0] &= ~tmp;
 939                        }
 940                }
 941
 942                irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
 943                int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
 944                int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
 945                if (irq_v && (int_ctl || int_tab_len)) {
 946                        if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
 947                            (int_tab_len != DTE_IRQ_TABLE_LEN)) {
 948                                pr_err("Wrong old irq remapping flag: %#x\n", devid);
 949                                return false;
 950                        }
 951
 952                        old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
 953                }
 954        }
 955        memunmap(old_devtb);
 956
 957        return true;
 958}
 959
 960void amd_iommu_apply_erratum_63(u16 devid)
 961{
 962        int sysmgt;
 963
 964        sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
 965                 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
 966
 967        if (sysmgt == 0x01)
 968                set_dev_entry_bit(devid, DEV_ENTRY_IW);
 969}
 970
 971/* Writes the specific IOMMU for a device into the rlookup table */
 972static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
 973{
 974        amd_iommu_rlookup_table[devid] = iommu;
 975}
 976
 977/*
 978 * This function takes the device specific flags read from the ACPI
 979 * table and sets up the device table entry with that information
 980 */
 981static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
 982                                           u16 devid, u32 flags, u32 ext_flags)
 983{
 984        if (flags & ACPI_DEVFLAG_INITPASS)
 985                set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
 986        if (flags & ACPI_DEVFLAG_EXTINT)
 987                set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
 988        if (flags & ACPI_DEVFLAG_NMI)
 989                set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
 990        if (flags & ACPI_DEVFLAG_SYSMGT1)
 991                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
 992        if (flags & ACPI_DEVFLAG_SYSMGT2)
 993                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
 994        if (flags & ACPI_DEVFLAG_LINT0)
 995                set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
 996        if (flags & ACPI_DEVFLAG_LINT1)
 997                set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
 998
 999        amd_iommu_apply_erratum_63(devid);
1000
1001        set_iommu_for_device(iommu, devid);
1002}
1003
1004static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1005{
1006        struct devid_map *entry;
1007        struct list_head *list;
1008
1009        if (type == IVHD_SPECIAL_IOAPIC)
1010                list = &ioapic_map;
1011        else if (type == IVHD_SPECIAL_HPET)
1012                list = &hpet_map;
1013        else
1014                return -EINVAL;
1015
1016        list_for_each_entry(entry, list, list) {
1017                if (!(entry->id == id && entry->cmd_line))
1018                        continue;
1019
1020                pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n",
1021                        type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1022
1023                *devid = entry->devid;
1024
1025                return 0;
1026        }
1027
1028        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1029        if (!entry)
1030                return -ENOMEM;
1031
1032        entry->id       = id;
1033        entry->devid    = *devid;
1034        entry->cmd_line = cmd_line;
1035
1036        list_add_tail(&entry->list, list);
1037
1038        return 0;
1039}
1040
1041static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1042                                      bool cmd_line)
1043{
1044        struct acpihid_map_entry *entry;
1045        struct list_head *list = &acpihid_map;
1046
1047        list_for_each_entry(entry, list, list) {
1048                if (strcmp(entry->hid, hid) ||
1049                    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1050                    !entry->cmd_line)
1051                        continue;
1052
1053                pr_info("AMD-Vi: Command-line override for hid:%s uid:%s\n",
1054                        hid, uid);
1055                *devid = entry->devid;
1056                return 0;
1057        }
1058
1059        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1060        if (!entry)
1061                return -ENOMEM;
1062
1063        memcpy(entry->uid, uid, strlen(uid));
1064        memcpy(entry->hid, hid, strlen(hid));
1065        entry->devid = *devid;
1066        entry->cmd_line = cmd_line;
1067        entry->root_devid = (entry->devid & (~0x7));
1068
1069        pr_info("AMD-Vi:%s, add hid:%s, uid:%s, rdevid:%d\n",
1070                entry->cmd_line ? "cmd" : "ivrs",
1071                entry->hid, entry->uid, entry->root_devid);
1072
1073        list_add_tail(&entry->list, list);
1074        return 0;
1075}
1076
1077static int __init add_early_maps(void)
1078{
1079        int i, ret;
1080
1081        for (i = 0; i < early_ioapic_map_size; ++i) {
1082                ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1083                                         early_ioapic_map[i].id,
1084                                         &early_ioapic_map[i].devid,
1085                                         early_ioapic_map[i].cmd_line);
1086                if (ret)
1087                        return ret;
1088        }
1089
1090        for (i = 0; i < early_hpet_map_size; ++i) {
1091                ret = add_special_device(IVHD_SPECIAL_HPET,
1092                                         early_hpet_map[i].id,
1093                                         &early_hpet_map[i].devid,
1094                                         early_hpet_map[i].cmd_line);
1095                if (ret)
1096                        return ret;
1097        }
1098
1099        for (i = 0; i < early_acpihid_map_size; ++i) {
1100                ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1101                                          early_acpihid_map[i].uid,
1102                                          &early_acpihid_map[i].devid,
1103                                          early_acpihid_map[i].cmd_line);
1104                if (ret)
1105                        return ret;
1106        }
1107
1108        return 0;
1109}
1110
1111/*
1112 * Reads the device exclusion range from ACPI and initializes the IOMMU with
1113 * it
1114 */
1115static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m)
1116{
1117        struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
1118
1119        if (!(m->flags & IVMD_FLAG_EXCL_RANGE))
1120                return;
1121
1122        if (iommu) {
1123                /*
1124                 * We only can configure exclusion ranges per IOMMU, not
1125                 * per device. But we can enable the exclusion range per
1126                 * device. This is done here
1127                 */
1128                set_dev_entry_bit(devid, DEV_ENTRY_EX);
1129                iommu->exclusion_start = m->range_start;
1130                iommu->exclusion_length = m->range_length;
1131        }
1132}
1133
1134/*
1135 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1136 * initializes the hardware and our data structures with it.
1137 */
1138static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1139                                        struct ivhd_header *h)
1140{
1141        u8 *p = (u8 *)h;
1142        u8 *end = p, flags = 0;
1143        u16 devid = 0, devid_start = 0, devid_to = 0;
1144        u32 dev_i, ext_flags = 0;
1145        bool alias = false;
1146        struct ivhd_entry *e;
1147        u32 ivhd_size;
1148        int ret;
1149
1150
1151        ret = add_early_maps();
1152        if (ret)
1153                return ret;
1154
1155        /*
1156         * First save the recommended feature enable bits from ACPI
1157         */
1158        iommu->acpi_flags = h->flags;
1159
1160        /*
1161         * Done. Now parse the device entries
1162         */
1163        ivhd_size = get_ivhd_header_size(h);
1164        if (!ivhd_size) {
1165                pr_err("AMD-Vi: Unsupported IVHD type %#x\n", h->type);
1166                return -EINVAL;
1167        }
1168
1169        p += ivhd_size;
1170
1171        end += h->length;
1172
1173
1174        while (p < end) {
1175                e = (struct ivhd_entry *)p;
1176                switch (e->type) {
1177                case IVHD_DEV_ALL:
1178
1179                        DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1180
1181                        for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
1182                                set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1183                        break;
1184                case IVHD_DEV_SELECT:
1185
1186                        DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
1187                                    "flags: %02x\n",
1188                                    PCI_BUS_NUM(e->devid),
1189                                    PCI_SLOT(e->devid),
1190                                    PCI_FUNC(e->devid),
1191                                    e->flags);
1192
1193                        devid = e->devid;
1194                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1195                        break;
1196                case IVHD_DEV_SELECT_RANGE_START:
1197
1198                        DUMP_printk("  DEV_SELECT_RANGE_START\t "
1199                                    "devid: %02x:%02x.%x flags: %02x\n",
1200                                    PCI_BUS_NUM(e->devid),
1201                                    PCI_SLOT(e->devid),
1202                                    PCI_FUNC(e->devid),
1203                                    e->flags);
1204
1205                        devid_start = e->devid;
1206                        flags = e->flags;
1207                        ext_flags = 0;
1208                        alias = false;
1209                        break;
1210                case IVHD_DEV_ALIAS:
1211
1212                        DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
1213                                    "flags: %02x devid_to: %02x:%02x.%x\n",
1214                                    PCI_BUS_NUM(e->devid),
1215                                    PCI_SLOT(e->devid),
1216                                    PCI_FUNC(e->devid),
1217                                    e->flags,
1218                                    PCI_BUS_NUM(e->ext >> 8),
1219                                    PCI_SLOT(e->ext >> 8),
1220                                    PCI_FUNC(e->ext >> 8));
1221
1222                        devid = e->devid;
1223                        devid_to = e->ext >> 8;
1224                        set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1225                        set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1226                        amd_iommu_alias_table[devid] = devid_to;
1227                        break;
1228                case IVHD_DEV_ALIAS_RANGE:
1229
1230                        DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1231                                    "devid: %02x:%02x.%x flags: %02x "
1232                                    "devid_to: %02x:%02x.%x\n",
1233                                    PCI_BUS_NUM(e->devid),
1234                                    PCI_SLOT(e->devid),
1235                                    PCI_FUNC(e->devid),
1236                                    e->flags,
1237                                    PCI_BUS_NUM(e->ext >> 8),
1238                                    PCI_SLOT(e->ext >> 8),
1239                                    PCI_FUNC(e->ext >> 8));
1240
1241                        devid_start = e->devid;
1242                        flags = e->flags;
1243                        devid_to = e->ext >> 8;
1244                        ext_flags = 0;
1245                        alias = true;
1246                        break;
1247                case IVHD_DEV_EXT_SELECT:
1248
1249                        DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
1250                                    "flags: %02x ext: %08x\n",
1251                                    PCI_BUS_NUM(e->devid),
1252                                    PCI_SLOT(e->devid),
1253                                    PCI_FUNC(e->devid),
1254                                    e->flags, e->ext);
1255
1256                        devid = e->devid;
1257                        set_dev_entry_from_acpi(iommu, devid, e->flags,
1258                                                e->ext);
1259                        break;
1260                case IVHD_DEV_EXT_SELECT_RANGE:
1261
1262                        DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1263                                    "%02x:%02x.%x flags: %02x ext: %08x\n",
1264                                    PCI_BUS_NUM(e->devid),
1265                                    PCI_SLOT(e->devid),
1266                                    PCI_FUNC(e->devid),
1267                                    e->flags, e->ext);
1268
1269                        devid_start = e->devid;
1270                        flags = e->flags;
1271                        ext_flags = e->ext;
1272                        alias = false;
1273                        break;
1274                case IVHD_DEV_RANGE_END:
1275
1276                        DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
1277                                    PCI_BUS_NUM(e->devid),
1278                                    PCI_SLOT(e->devid),
1279                                    PCI_FUNC(e->devid));
1280
1281                        devid = e->devid;
1282                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1283                                if (alias) {
1284                                        amd_iommu_alias_table[dev_i] = devid_to;
1285                                        set_dev_entry_from_acpi(iommu,
1286                                                devid_to, flags, ext_flags);
1287                                }
1288                                set_dev_entry_from_acpi(iommu, dev_i,
1289                                                        flags, ext_flags);
1290                        }
1291                        break;
1292                case IVHD_DEV_SPECIAL: {
1293                        u8 handle, type;
1294                        const char *var;
1295                        u16 devid;
1296                        int ret;
1297
1298                        handle = e->ext & 0xff;
1299                        devid  = (e->ext >>  8) & 0xffff;
1300                        type   = (e->ext >> 24) & 0xff;
1301
1302                        if (type == IVHD_SPECIAL_IOAPIC)
1303                                var = "IOAPIC";
1304                        else if (type == IVHD_SPECIAL_HPET)
1305                                var = "HPET";
1306                        else
1307                                var = "UNKNOWN";
1308
1309                        DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
1310                                    var, (int)handle,
1311                                    PCI_BUS_NUM(devid),
1312                                    PCI_SLOT(devid),
1313                                    PCI_FUNC(devid));
1314
1315                        ret = add_special_device(type, handle, &devid, false);
1316                        if (ret)
1317                                return ret;
1318
1319                        /*
1320                         * add_special_device might update the devid in case a
1321                         * command-line override is present. So call
1322                         * set_dev_entry_from_acpi after add_special_device.
1323                         */
1324                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1325
1326                        break;
1327                }
1328                case IVHD_DEV_ACPI_HID: {
1329                        u16 devid;
1330                        u8 hid[ACPIHID_HID_LEN] = {0};
1331                        u8 uid[ACPIHID_UID_LEN] = {0};
1332                        int ret;
1333
1334                        if (h->type != 0x40) {
1335                                pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1336                                       e->type);
1337                                break;
1338                        }
1339
1340                        memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
1341                        hid[ACPIHID_HID_LEN - 1] = '\0';
1342
1343                        if (!(*hid)) {
1344                                pr_err(FW_BUG "Invalid HID.\n");
1345                                break;
1346                        }
1347
1348                        switch (e->uidf) {
1349                        case UID_NOT_PRESENT:
1350
1351                                if (e->uidl != 0)
1352                                        pr_warn(FW_BUG "Invalid UID length.\n");
1353
1354                                break;
1355                        case UID_IS_INTEGER:
1356
1357                                sprintf(uid, "%d", e->uid);
1358
1359                                break;
1360                        case UID_IS_CHARACTER:
1361
1362                                memcpy(uid, (u8 *)(&e->uid), ACPIHID_UID_LEN - 1);
1363                                uid[ACPIHID_UID_LEN - 1] = '\0';
1364
1365                                break;
1366                        default:
1367                                break;
1368                        }
1369
1370                        devid = e->devid;
1371                        DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
1372                                    hid, uid,
1373                                    PCI_BUS_NUM(devid),
1374                                    PCI_SLOT(devid),
1375                                    PCI_FUNC(devid));
1376
1377                        flags = e->flags;
1378
1379                        ret = add_acpi_hid_device(hid, uid, &devid, false);
1380                        if (ret)
1381                                return ret;
1382
1383                        /*
1384                         * add_special_device might update the devid in case a
1385                         * command-line override is present. So call
1386                         * set_dev_entry_from_acpi after add_special_device.
1387                         */
1388                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1389
1390                        break;
1391                }
1392                default:
1393                        break;
1394                }
1395
1396                p += ivhd_entry_length(p);
1397        }
1398
1399        return 0;
1400}
1401
1402static void __init free_iommu_one(struct amd_iommu *iommu)
1403{
1404        free_command_buffer(iommu);
1405        free_event_buffer(iommu);
1406        free_ppr_log(iommu);
1407        free_ga_log(iommu);
1408        iommu_unmap_mmio_space(iommu);
1409}
1410
1411static void __init free_iommu_all(void)
1412{
1413        struct amd_iommu *iommu, *next;
1414
1415        for_each_iommu_safe(iommu, next) {
1416                list_del(&iommu->list);
1417                free_iommu_one(iommu);
1418                kfree(iommu);
1419        }
1420}
1421
1422/*
1423 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1424 * Workaround:
1425 *     BIOS should disable L2B micellaneous clock gating by setting
1426 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1427 */
1428static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1429{
1430        u32 value;
1431
1432        if ((boot_cpu_data.x86 != 0x15) ||
1433            (boot_cpu_data.x86_model < 0x10) ||
1434            (boot_cpu_data.x86_model > 0x1f))
1435                return;
1436
1437        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1438        pci_read_config_dword(iommu->dev, 0xf4, &value);
1439
1440        if (value & BIT(2))
1441                return;
1442
1443        /* Select NB indirect register 0x90 and enable writing */
1444        pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1445
1446        pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1447        pr_info("AMD-Vi: Applying erratum 746 workaround for IOMMU at %s\n",
1448                dev_name(&iommu->dev->dev));
1449
1450        /* Clear the enable writing bit */
1451        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1452}
1453
1454/*
1455 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1456 * Workaround:
1457 *     BIOS should enable ATS write permission check by setting
1458 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1459 */
1460static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1461{
1462        u32 value;
1463
1464        if ((boot_cpu_data.x86 != 0x15) ||
1465            (boot_cpu_data.x86_model < 0x30) ||
1466            (boot_cpu_data.x86_model > 0x3f))
1467                return;
1468
1469        /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1470        value = iommu_read_l2(iommu, 0x47);
1471
1472        if (value & BIT(0))
1473                return;
1474
1475        /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1476        iommu_write_l2(iommu, 0x47, value | BIT(0));
1477
1478        pr_info("AMD-Vi: Applying ATS write check workaround for IOMMU at %s\n",
1479                dev_name(&iommu->dev->dev));
1480}
1481
1482/*
1483 * This function clues the initialization function for one IOMMU
1484 * together and also allocates the command buffer and programs the
1485 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1486 */
1487static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1488{
1489        int ret;
1490
1491        raw_spin_lock_init(&iommu->lock);
1492
1493        /* Add IOMMU to internal data structures */
1494        list_add_tail(&iommu->list, &amd_iommu_list);
1495        iommu->index = amd_iommus_present++;
1496
1497        if (unlikely(iommu->index >= MAX_IOMMUS)) {
1498                WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n");
1499                return -ENOSYS;
1500        }
1501
1502        /* Index is fine - add IOMMU to the array */
1503        amd_iommus[iommu->index] = iommu;
1504
1505        /*
1506         * Copy data from ACPI table entry to the iommu struct
1507         */
1508        iommu->devid   = h->devid;
1509        iommu->cap_ptr = h->cap_ptr;
1510        iommu->pci_seg = h->pci_seg;
1511        iommu->mmio_phys = h->mmio_phys;
1512
1513        switch (h->type) {
1514        case 0x10:
1515                /* Check if IVHD EFR contains proper max banks/counters */
1516                if ((h->efr_attr != 0) &&
1517                    ((h->efr_attr & (0xF << 13)) != 0) &&
1518                    ((h->efr_attr & (0x3F << 17)) != 0))
1519                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1520                else
1521                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1522                if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1523                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1524                if (((h->efr_attr & (0x1 << IOMMU_FEAT_XTSUP_SHIFT)) == 0))
1525                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1526                break;
1527        case 0x11:
1528        case 0x40:
1529                if (h->efr_reg & (1 << 9))
1530                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1531                else
1532                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1533                if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0))
1534                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1535                if (((h->efr_reg & (0x1 << IOMMU_EFR_XTSUP_SHIFT)) == 0))
1536                        amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
1537                break;
1538        default:
1539                return -EINVAL;
1540        }
1541
1542        iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1543                                                iommu->mmio_phys_end);
1544        if (!iommu->mmio_base)
1545                return -ENOMEM;
1546
1547        if (alloc_command_buffer(iommu))
1548                return -ENOMEM;
1549
1550        if (alloc_event_buffer(iommu))
1551                return -ENOMEM;
1552
1553        iommu->int_enabled = false;
1554
1555        init_translation_status(iommu);
1556        if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1557                iommu_disable(iommu);
1558                clear_translation_pre_enabled(iommu);
1559                pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1560                        iommu->index);
1561        }
1562        if (amd_iommu_pre_enabled)
1563                amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1564
1565        ret = init_iommu_from_acpi(iommu, h);
1566        if (ret)
1567                return ret;
1568
1569        ret = amd_iommu_create_irq_domain(iommu);
1570        if (ret)
1571                return ret;
1572
1573        /*
1574         * Make sure IOMMU is not considered to translate itself. The IVRS
1575         * table tells us so, but this is a lie!
1576         */
1577        amd_iommu_rlookup_table[iommu->devid] = NULL;
1578
1579        return 0;
1580}
1581
1582/**
1583 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1584 * @ivrs          Pointer to the IVRS header
1585 *
1586 * This function search through all IVDB of the maximum supported IVHD
1587 */
1588static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1589{
1590        u8 *base = (u8 *)ivrs;
1591        struct ivhd_header *ivhd = (struct ivhd_header *)
1592                                        (base + IVRS_HEADER_LENGTH);
1593        u8 last_type = ivhd->type;
1594        u16 devid = ivhd->devid;
1595
1596        while (((u8 *)ivhd - base < ivrs->length) &&
1597               (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1598                u8 *p = (u8 *) ivhd;
1599
1600                if (ivhd->devid == devid)
1601                        last_type = ivhd->type;
1602                ivhd = (struct ivhd_header *)(p + ivhd->length);
1603        }
1604
1605        return last_type;
1606}
1607
1608/*
1609 * Iterates over all IOMMU entries in the ACPI table, allocates the
1610 * IOMMU structure and initializes it with init_iommu_one()
1611 */
1612static int __init init_iommu_all(struct acpi_table_header *table)
1613{
1614        u8 *p = (u8 *)table, *end = (u8 *)table;
1615        struct ivhd_header *h;
1616        struct amd_iommu *iommu;
1617        int ret;
1618
1619        end += table->length;
1620        p += IVRS_HEADER_LENGTH;
1621
1622        while (p < end) {
1623                h = (struct ivhd_header *)p;
1624                if (*p == amd_iommu_target_ivhd_type) {
1625
1626                        DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1627                                    "seg: %d flags: %01x info %04x\n",
1628                                    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1629                                    PCI_FUNC(h->devid), h->cap_ptr,
1630                                    h->pci_seg, h->flags, h->info);
1631                        DUMP_printk("       mmio-addr: %016llx\n",
1632                                    h->mmio_phys);
1633
1634                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1635                        if (iommu == NULL)
1636                                return -ENOMEM;
1637
1638                        ret = init_iommu_one(iommu, h);
1639                        if (ret)
1640                                return ret;
1641                }
1642                p += h->length;
1643
1644        }
1645        WARN_ON(p != end);
1646
1647        return 0;
1648}
1649
1650static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
1651                                u8 fxn, u64 *value, bool is_write);
1652
1653static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1654{
1655        u64 val = 0xabcd, val2 = 0;
1656
1657        if (!iommu_feature(iommu, FEATURE_PC))
1658                return;
1659
1660        amd_iommu_pc_present = true;
1661
1662        /* Check if the performance counters can be written to */
1663        if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
1664            (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
1665            (val != val2)) {
1666                pr_err("AMD-Vi: Unable to write to IOMMU perf counter.\n");
1667                amd_iommu_pc_present = false;
1668                return;
1669        }
1670
1671        pr_info("AMD-Vi: IOMMU performance counters supported\n");
1672
1673        val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1674        iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1675        iommu->max_counters = (u8) ((val >> 7) & 0xf);
1676}
1677
1678static ssize_t amd_iommu_show_cap(struct device *dev,
1679                                  struct device_attribute *attr,
1680                                  char *buf)
1681{
1682        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1683        return sprintf(buf, "%x\n", iommu->cap);
1684}
1685static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1686
1687static ssize_t amd_iommu_show_features(struct device *dev,
1688                                       struct device_attribute *attr,
1689                                       char *buf)
1690{
1691        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1692        return sprintf(buf, "%llx\n", iommu->features);
1693}
1694static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1695
1696static struct attribute *amd_iommu_attrs[] = {
1697        &dev_attr_cap.attr,
1698        &dev_attr_features.attr,
1699        NULL,
1700};
1701
1702static struct attribute_group amd_iommu_group = {
1703        .name = "amd-iommu",
1704        .attrs = amd_iommu_attrs,
1705};
1706
1707static const struct attribute_group *amd_iommu_groups[] = {
1708        &amd_iommu_group,
1709        NULL,
1710};
1711
1712static int iommu_init_pci(struct amd_iommu *iommu)
1713{
1714        int cap_ptr = iommu->cap_ptr;
1715        u32 range, misc, low, high;
1716        int ret;
1717
1718        iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
1719                                                 iommu->devid & 0xff);
1720        if (!iommu->dev)
1721                return -ENODEV;
1722
1723        /* Prevent binding other PCI device drivers to IOMMU devices */
1724        iommu->dev->match_driver = false;
1725
1726        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1727                              &iommu->cap);
1728        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_RANGE_OFFSET,
1729                              &range);
1730        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_MISC_OFFSET,
1731                              &misc);
1732
1733        if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1734                amd_iommu_iotlb_sup = false;
1735
1736        /* read extended feature bits */
1737        low  = readl(iommu->mmio_base + MMIO_EXT_FEATURES);
1738        high = readl(iommu->mmio_base + MMIO_EXT_FEATURES + 4);
1739
1740        iommu->features = ((u64)high << 32) | low;
1741
1742        if (iommu_feature(iommu, FEATURE_GT)) {
1743                int glxval;
1744                u32 max_pasid;
1745                u64 pasmax;
1746
1747                pasmax = iommu->features & FEATURE_PASID_MASK;
1748                pasmax >>= FEATURE_PASID_SHIFT;
1749                max_pasid  = (1 << (pasmax + 1)) - 1;
1750
1751                amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1752
1753                BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1754
1755                glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1756                glxval >>= FEATURE_GLXVAL_SHIFT;
1757
1758                if (amd_iommu_max_glx_val == -1)
1759                        amd_iommu_max_glx_val = glxval;
1760                else
1761                        amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1762        }
1763
1764        if (iommu_feature(iommu, FEATURE_GT) &&
1765            iommu_feature(iommu, FEATURE_PPR)) {
1766                iommu->is_iommu_v2   = true;
1767                amd_iommu_v2_present = true;
1768        }
1769
1770        if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
1771                return -ENOMEM;
1772
1773        ret = iommu_init_ga(iommu);
1774        if (ret)
1775                return ret;
1776
1777        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1778                amd_iommu_np_cache = true;
1779
1780        init_iommu_perf_ctr(iommu);
1781
1782        if (is_rd890_iommu(iommu->dev)) {
1783                int i, j;
1784
1785                iommu->root_pdev =
1786                        pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
1787                                                    PCI_DEVFN(0, 0));
1788
1789                /*
1790                 * Some rd890 systems may not be fully reconfigured by the
1791                 * BIOS, so it's necessary for us to store this information so
1792                 * it can be reprogrammed on resume
1793                 */
1794                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1795                                &iommu->stored_addr_lo);
1796                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1797                                &iommu->stored_addr_hi);
1798
1799                /* Low bit locks writes to configuration space */
1800                iommu->stored_addr_lo &= ~1;
1801
1802                for (i = 0; i < 6; i++)
1803                        for (j = 0; j < 0x12; j++)
1804                                iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1805
1806                for (i = 0; i < 0x83; i++)
1807                        iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1808        }
1809
1810        amd_iommu_erratum_746_workaround(iommu);
1811        amd_iommu_ats_write_check_workaround(iommu);
1812
1813        iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
1814                               amd_iommu_groups, "ivhd%d", iommu->index);
1815        iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
1816        iommu_device_register(&iommu->iommu);
1817
1818        return pci_enable_device(iommu->dev);
1819}
1820
1821static void print_iommu_info(void)
1822{
1823        static const char * const feat_str[] = {
1824                "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1825                "IA", "GA", "HE", "PC"
1826        };
1827        struct amd_iommu *iommu;
1828
1829        for_each_iommu(iommu) {
1830                int i;
1831
1832                pr_info("AMD-Vi: Found IOMMU at %s cap 0x%hx\n",
1833                        dev_name(&iommu->dev->dev), iommu->cap_ptr);
1834
1835                if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1836                        pr_info("AMD-Vi: Extended features (%#llx):\n",
1837                                iommu->features);
1838                        for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1839                                if (iommu_feature(iommu, (1ULL << i)))
1840                                        pr_cont(" %s", feat_str[i]);
1841                        }
1842
1843                        if (iommu->features & FEATURE_GAM_VAPIC)
1844                                pr_cont(" GA_vAPIC");
1845
1846                        pr_cont("\n");
1847                }
1848        }
1849        if (irq_remapping_enabled) {
1850                pr_info("AMD-Vi: Interrupt remapping enabled\n");
1851                if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1852                        pr_info("AMD-Vi: virtual APIC enabled\n");
1853                if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1854                        pr_info("AMD-Vi: X2APIC enabled\n");
1855        }
1856}
1857
1858static int __init amd_iommu_init_pci(void)
1859{
1860        struct amd_iommu *iommu;
1861        int ret = 0;
1862
1863        for_each_iommu(iommu) {
1864                ret = iommu_init_pci(iommu);
1865                if (ret)
1866                        break;
1867        }
1868
1869        /*
1870         * Order is important here to make sure any unity map requirements are
1871         * fulfilled. The unity mappings are created and written to the device
1872         * table during the amd_iommu_init_api() call.
1873         *
1874         * After that we call init_device_table_dma() to make sure any
1875         * uninitialized DTE will block DMA, and in the end we flush the caches
1876         * of all IOMMUs to make sure the changes to the device table are
1877         * active.
1878         */
1879        ret = amd_iommu_init_api();
1880
1881        init_device_table_dma();
1882
1883        for_each_iommu(iommu)
1884                iommu_flush_all_caches(iommu);
1885
1886        if (!ret)
1887                print_iommu_info();
1888
1889        return ret;
1890}
1891
1892/****************************************************************************
1893 *
1894 * The following functions initialize the MSI interrupts for all IOMMUs
1895 * in the system. It's a bit challenging because there could be multiple
1896 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1897 * pci_dev.
1898 *
1899 ****************************************************************************/
1900
1901static int iommu_setup_msi(struct amd_iommu *iommu)
1902{
1903        int r;
1904
1905        r = pci_enable_msi(iommu->dev);
1906        if (r)
1907                return r;
1908
1909        r = request_threaded_irq(iommu->dev->irq,
1910                                 amd_iommu_int_handler,
1911                                 amd_iommu_int_thread,
1912                                 0, "AMD-Vi",
1913                                 iommu);
1914
1915        if (r) {
1916                pci_disable_msi(iommu->dev);
1917                return r;
1918        }
1919
1920        iommu->int_enabled = true;
1921
1922        return 0;
1923}
1924
1925static int iommu_init_msi(struct amd_iommu *iommu)
1926{
1927        int ret;
1928
1929        if (iommu->int_enabled)
1930                goto enable_faults;
1931
1932        if (iommu->dev->msi_cap)
1933                ret = iommu_setup_msi(iommu);
1934        else
1935                ret = -ENODEV;
1936
1937        if (ret)
1938                return ret;
1939
1940enable_faults:
1941        iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
1942
1943        if (iommu->ppr_log != NULL)
1944                iommu_feature_enable(iommu, CONTROL_PPFINT_EN);
1945
1946        iommu_ga_log_enable(iommu);
1947
1948        return 0;
1949}
1950
1951/****************************************************************************
1952 *
1953 * The next functions belong to the third pass of parsing the ACPI
1954 * table. In this last pass the memory mapping requirements are
1955 * gathered (like exclusion and unity mapping ranges).
1956 *
1957 ****************************************************************************/
1958
1959static void __init free_unity_maps(void)
1960{
1961        struct unity_map_entry *entry, *next;
1962
1963        list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
1964                list_del(&entry->list);
1965                kfree(entry);
1966        }
1967}
1968
1969/* called when we find an exclusion range definition in ACPI */
1970static int __init init_exclusion_range(struct ivmd_header *m)
1971{
1972        int i;
1973
1974        switch (m->type) {
1975        case ACPI_IVMD_TYPE:
1976                set_device_exclusion_range(m->devid, m);
1977                break;
1978        case ACPI_IVMD_TYPE_ALL:
1979                for (i = 0; i <= amd_iommu_last_bdf; ++i)
1980                        set_device_exclusion_range(i, m);
1981                break;
1982        case ACPI_IVMD_TYPE_RANGE:
1983                for (i = m->devid; i <= m->aux; ++i)
1984                        set_device_exclusion_range(i, m);
1985                break;
1986        default:
1987                break;
1988        }
1989
1990        return 0;
1991}
1992
1993/* called for unity map ACPI definition */
1994static int __init init_unity_map_range(struct ivmd_header *m)
1995{
1996        struct unity_map_entry *e = NULL;
1997        char *s;
1998
1999        e = kzalloc(sizeof(*e), GFP_KERNEL);
2000        if (e == NULL)
2001                return -ENOMEM;
2002
2003        switch (m->type) {
2004        default:
2005                kfree(e);
2006                return 0;
2007        case ACPI_IVMD_TYPE:
2008                s = "IVMD_TYPEi\t\t\t";
2009                e->devid_start = e->devid_end = m->devid;
2010                break;
2011        case ACPI_IVMD_TYPE_ALL:
2012                s = "IVMD_TYPE_ALL\t\t";
2013                e->devid_start = 0;
2014                e->devid_end = amd_iommu_last_bdf;
2015                break;
2016        case ACPI_IVMD_TYPE_RANGE:
2017                s = "IVMD_TYPE_RANGE\t\t";
2018                e->devid_start = m->devid;
2019                e->devid_end = m->aux;
2020                break;
2021        }
2022        e->address_start = PAGE_ALIGN(m->range_start);
2023        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2024        e->prot = m->flags >> 1;
2025
2026        DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
2027                    " range_start: %016llx range_end: %016llx flags: %x\n", s,
2028                    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2029                    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
2030                    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2031                    e->address_start, e->address_end, m->flags);
2032
2033        list_add_tail(&e->list, &amd_iommu_unity_map);
2034
2035        return 0;
2036}
2037
2038/* iterates over all memory definitions we find in the ACPI table */
2039static int __init init_memory_definitions(struct acpi_table_header *table)
2040{
2041        u8 *p = (u8 *)table, *end = (u8 *)table;
2042        struct ivmd_header *m;
2043
2044        end += table->length;
2045        p += IVRS_HEADER_LENGTH;
2046
2047        while (p < end) {
2048                m = (struct ivmd_header *)p;
2049                if (m->flags & IVMD_FLAG_EXCL_RANGE)
2050                        init_exclusion_range(m);
2051                else if (m->flags & IVMD_FLAG_UNITY_MAP)
2052                        init_unity_map_range(m);
2053
2054                p += m->length;
2055        }
2056
2057        return 0;
2058}
2059
2060/*
2061 * Init the device table to not allow DMA access for devices
2062 */
2063static void init_device_table_dma(void)
2064{
2065        u32 devid;
2066
2067        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2068                set_dev_entry_bit(devid, DEV_ENTRY_VALID);
2069                set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
2070        }
2071}
2072
2073static void __init uninit_device_table_dma(void)
2074{
2075        u32 devid;
2076
2077        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2078                amd_iommu_dev_table[devid].data[0] = 0ULL;
2079                amd_iommu_dev_table[devid].data[1] = 0ULL;
2080        }
2081}
2082
2083static void init_device_table(void)
2084{
2085        u32 devid;
2086
2087        if (!amd_iommu_irq_remap)
2088                return;
2089
2090        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2091                set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
2092}
2093
2094static void iommu_init_flags(struct amd_iommu *iommu)
2095{
2096        iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2097                iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2098                iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2099
2100        iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2101                iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2102                iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2103
2104        iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2105                iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2106                iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2107
2108        iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2109                iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2110                iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2111
2112        /*
2113         * make IOMMU memory accesses cache coherent
2114         */
2115        iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2116
2117        /* Set IOTLB invalidation timeout to 1s */
2118        iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2119}
2120
2121static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2122{
2123        int i, j;
2124        u32 ioc_feature_control;
2125        struct pci_dev *pdev = iommu->root_pdev;
2126
2127        /* RD890 BIOSes may not have completely reconfigured the iommu */
2128        if (!is_rd890_iommu(iommu->dev) || !pdev)
2129                return;
2130
2131        /*
2132         * First, we need to ensure that the iommu is enabled. This is
2133         * controlled by a register in the northbridge
2134         */
2135
2136        /* Select Northbridge indirect register 0x75 and enable writing */
2137        pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2138        pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2139
2140        /* Enable the iommu */
2141        if (!(ioc_feature_control & 0x1))
2142                pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2143
2144        /* Restore the iommu BAR */
2145        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2146                               iommu->stored_addr_lo);
2147        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2148                               iommu->stored_addr_hi);
2149
2150        /* Restore the l1 indirect regs for each of the 6 l1s */
2151        for (i = 0; i < 6; i++)
2152                for (j = 0; j < 0x12; j++)
2153                        iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2154
2155        /* Restore the l2 indirect regs */
2156        for (i = 0; i < 0x83; i++)
2157                iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2158
2159        /* Lock PCI setup registers */
2160        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2161                               iommu->stored_addr_lo | 1);
2162}
2163
2164static void iommu_enable_ga(struct amd_iommu *iommu)
2165{
2166#ifdef CONFIG_IRQ_REMAP
2167        switch (amd_iommu_guest_ir) {
2168        case AMD_IOMMU_GUEST_IR_VAPIC:
2169                iommu_feature_enable(iommu, CONTROL_GAM_EN);
2170                /* Fall through */
2171        case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2172                iommu_feature_enable(iommu, CONTROL_GA_EN);
2173                iommu->irte_ops = &irte_128_ops;
2174                break;
2175        default:
2176                iommu->irte_ops = &irte_32_ops;
2177                break;
2178        }
2179#endif
2180}
2181
2182static void early_enable_iommu(struct amd_iommu *iommu)
2183{
2184        iommu_disable(iommu);
2185        iommu_init_flags(iommu);
2186        iommu_set_device_table(iommu);
2187        iommu_enable_command_buffer(iommu);
2188        iommu_enable_event_buffer(iommu);
2189        iommu_set_exclusion_range(iommu);
2190        iommu_enable_ga(iommu);
2191        iommu_enable_xt(iommu);
2192        iommu_enable(iommu);
2193        iommu_flush_all_caches(iommu);
2194}
2195
2196/*
2197 * This function finally enables all IOMMUs found in the system after
2198 * they have been initialized.
2199 *
2200 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2201 * the old content of device table entries. Not this case or copy failed,
2202 * just continue as normal kernel does.
2203 */
2204static void early_enable_iommus(void)
2205{
2206        struct amd_iommu *iommu;
2207
2208
2209        if (!copy_device_table()) {
2210                /*
2211                 * If come here because of failure in copying device table from old
2212                 * kernel with all IOMMUs enabled, print error message and try to
2213                 * free allocated old_dev_tbl_cpy.
2214                 */
2215                if (amd_iommu_pre_enabled)
2216                        pr_err("Failed to copy DEV table from previous kernel.\n");
2217                if (old_dev_tbl_cpy != NULL)
2218                        free_pages((unsigned long)old_dev_tbl_cpy,
2219                                        get_order(dev_table_size));
2220
2221                for_each_iommu(iommu) {
2222                        clear_translation_pre_enabled(iommu);
2223                        early_enable_iommu(iommu);
2224                }
2225        } else {
2226                pr_info("Copied DEV table from previous kernel.\n");
2227                free_pages((unsigned long)amd_iommu_dev_table,
2228                                get_order(dev_table_size));
2229                amd_iommu_dev_table = old_dev_tbl_cpy;
2230                for_each_iommu(iommu) {
2231                        iommu_disable_command_buffer(iommu);
2232                        iommu_disable_event_buffer(iommu);
2233                        iommu_enable_command_buffer(iommu);
2234                        iommu_enable_event_buffer(iommu);
2235                        iommu_enable_ga(iommu);
2236                        iommu_enable_xt(iommu);
2237                        iommu_set_device_table(iommu);
2238                        iommu_flush_all_caches(iommu);
2239                }
2240        }
2241
2242#ifdef CONFIG_IRQ_REMAP
2243        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2244                amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2245#endif
2246}
2247
2248static void enable_iommus_v2(void)
2249{
2250        struct amd_iommu *iommu;
2251
2252        for_each_iommu(iommu) {
2253                iommu_enable_ppr_log(iommu);
2254                iommu_enable_gt(iommu);
2255        }
2256}
2257
2258static void enable_iommus(void)
2259{
2260        early_enable_iommus();
2261
2262        enable_iommus_v2();
2263}
2264
2265static void disable_iommus(void)
2266{
2267        struct amd_iommu *iommu;
2268
2269        for_each_iommu(iommu)
2270                iommu_disable(iommu);
2271
2272#ifdef CONFIG_IRQ_REMAP
2273        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2274                amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2275#endif
2276}
2277
2278/*
2279 * Suspend/Resume support
2280 * disable suspend until real resume implemented
2281 */
2282
2283static void amd_iommu_resume(void)
2284{
2285        struct amd_iommu *iommu;
2286
2287        for_each_iommu(iommu)
2288                iommu_apply_resume_quirks(iommu);
2289
2290        /* re-load the hardware */
2291        enable_iommus();
2292
2293        amd_iommu_enable_interrupts();
2294}
2295
2296static int amd_iommu_suspend(void)
2297{
2298        /* disable IOMMUs to go out of the way for BIOS */
2299        disable_iommus();
2300
2301        return 0;
2302}
2303
2304static struct syscore_ops amd_iommu_syscore_ops = {
2305        .suspend = amd_iommu_suspend,
2306        .resume = amd_iommu_resume,
2307};
2308
2309static void __init free_iommu_resources(void)
2310{
2311        kmemleak_free(irq_lookup_table);
2312        free_pages((unsigned long)irq_lookup_table,
2313                   get_order(rlookup_table_size));
2314        irq_lookup_table = NULL;
2315
2316        kmem_cache_destroy(amd_iommu_irq_cache);
2317        amd_iommu_irq_cache = NULL;
2318
2319        free_pages((unsigned long)amd_iommu_rlookup_table,
2320                   get_order(rlookup_table_size));
2321        amd_iommu_rlookup_table = NULL;
2322
2323        free_pages((unsigned long)amd_iommu_alias_table,
2324                   get_order(alias_table_size));
2325        amd_iommu_alias_table = NULL;
2326
2327        free_pages((unsigned long)amd_iommu_dev_table,
2328                   get_order(dev_table_size));
2329        amd_iommu_dev_table = NULL;
2330
2331        free_iommu_all();
2332
2333#ifdef CONFIG_GART_IOMMU
2334        /*
2335         * We failed to initialize the AMD IOMMU - try fallback to GART
2336         * if possible.
2337         */
2338        gart_iommu_init();
2339
2340#endif
2341}
2342
2343/* SB IOAPIC is always on this device in AMD systems */
2344#define IOAPIC_SB_DEVID         ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2345
2346static bool __init check_ioapic_information(void)
2347{
2348        const char *fw_bug = FW_BUG;
2349        bool ret, has_sb_ioapic;
2350        int idx;
2351
2352        has_sb_ioapic = false;
2353        ret           = false;
2354
2355        /*
2356         * If we have map overrides on the kernel command line the
2357         * messages in this function might not describe firmware bugs
2358         * anymore - so be careful
2359         */
2360        if (cmdline_maps)
2361                fw_bug = "";
2362
2363        for (idx = 0; idx < nr_ioapics; idx++) {
2364                int devid, id = mpc_ioapic_id(idx);
2365
2366                devid = get_ioapic_devid(id);
2367                if (devid < 0) {
2368                        pr_err("%sAMD-Vi: IOAPIC[%d] not in IVRS table\n",
2369                                fw_bug, id);
2370                        ret = false;
2371                } else if (devid == IOAPIC_SB_DEVID) {
2372                        has_sb_ioapic = true;
2373                        ret           = true;
2374                }
2375        }
2376
2377        if (!has_sb_ioapic) {
2378                /*
2379                 * We expect the SB IOAPIC to be listed in the IVRS
2380                 * table. The system timer is connected to the SB IOAPIC
2381                 * and if we don't have it in the list the system will
2382                 * panic at boot time.  This situation usually happens
2383                 * when the BIOS is buggy and provides us the wrong
2384                 * device id for the IOAPIC in the system.
2385                 */
2386                pr_err("%sAMD-Vi: No southbridge IOAPIC found\n", fw_bug);
2387        }
2388
2389        if (!ret)
2390                pr_err("AMD-Vi: Disabling interrupt remapping\n");
2391
2392        return ret;
2393}
2394
2395static void __init free_dma_resources(void)
2396{
2397        free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2398                   get_order(MAX_DOMAIN_ID/8));
2399        amd_iommu_pd_alloc_bitmap = NULL;
2400
2401        free_unity_maps();
2402}
2403
2404/*
2405 * This is the hardware init function for AMD IOMMU in the system.
2406 * This function is called either from amd_iommu_init or from the interrupt
2407 * remapping setup code.
2408 *
2409 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2410 * four times:
2411 *
2412 *      1 pass) Discover the most comprehensive IVHD type to use.
2413 *
2414 *      2 pass) Find the highest PCI device id the driver has to handle.
2415 *              Upon this information the size of the data structures is
2416 *              determined that needs to be allocated.
2417 *
2418 *      3 pass) Initialize the data structures just allocated with the
2419 *              information in the ACPI table about available AMD IOMMUs
2420 *              in the system. It also maps the PCI devices in the
2421 *              system to specific IOMMUs
2422 *
2423 *      4 pass) After the basic data structures are allocated and
2424 *              initialized we update them with information about memory
2425 *              remapping requirements parsed out of the ACPI table in
2426 *              this last pass.
2427 *
2428 * After everything is set up the IOMMUs are enabled and the necessary
2429 * hotplug and suspend notifiers are registered.
2430 */
2431static int __init early_amd_iommu_init(void)
2432{
2433        struct acpi_table_header *ivrs_base;
2434        acpi_status status;
2435        int i, remap_cache_sz, ret = 0;
2436
2437        if (!amd_iommu_detected)
2438                return -ENODEV;
2439
2440        status = acpi_get_table("IVRS", 0, &ivrs_base);
2441        if (status == AE_NOT_FOUND)
2442                return -ENODEV;
2443        else if (ACPI_FAILURE(status)) {
2444                const char *err = acpi_format_exception(status);
2445                pr_err("AMD-Vi: IVRS table error: %s\n", err);
2446                return -EINVAL;
2447        }
2448
2449        /*
2450         * Validate checksum here so we don't need to do it when
2451         * we actually parse the table
2452         */
2453        ret = check_ivrs_checksum(ivrs_base);
2454        if (ret)
2455                goto out;
2456
2457        amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2458        DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
2459
2460        /*
2461         * First parse ACPI tables to find the largest Bus/Dev/Func
2462         * we need to handle. Upon this information the shared data
2463         * structures for the IOMMUs in the system will be allocated
2464         */
2465        ret = find_last_devid_acpi(ivrs_base);
2466        if (ret)
2467                goto out;
2468
2469        dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
2470        alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
2471        rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
2472
2473        /* Device table - directly used by all IOMMUs */
2474        ret = -ENOMEM;
2475        amd_iommu_dev_table = (void *)__get_free_pages(
2476                                      GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
2477                                      get_order(dev_table_size));
2478        if (amd_iommu_dev_table == NULL)
2479                goto out;
2480
2481        /*
2482         * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
2483         * IOMMU see for that device
2484         */
2485        amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
2486                        get_order(alias_table_size));
2487        if (amd_iommu_alias_table == NULL)
2488                goto out;
2489
2490        /* IOMMU rlookup table - find the IOMMU for a specific device */
2491        amd_iommu_rlookup_table = (void *)__get_free_pages(
2492                        GFP_KERNEL | __GFP_ZERO,
2493                        get_order(rlookup_table_size));
2494        if (amd_iommu_rlookup_table == NULL)
2495                goto out;
2496
2497        amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
2498                                            GFP_KERNEL | __GFP_ZERO,
2499                                            get_order(MAX_DOMAIN_ID/8));
2500        if (amd_iommu_pd_alloc_bitmap == NULL)
2501                goto out;
2502
2503        /*
2504         * let all alias entries point to itself
2505         */
2506        for (i = 0; i <= amd_iommu_last_bdf; ++i)
2507                amd_iommu_alias_table[i] = i;
2508
2509        /*
2510         * never allocate domain 0 because its used as the non-allocated and
2511         * error value placeholder
2512         */
2513        __set_bit(0, amd_iommu_pd_alloc_bitmap);
2514
2515        spin_lock_init(&amd_iommu_pd_lock);
2516
2517        /*
2518         * now the data structures are allocated and basically initialized
2519         * start the real acpi table scan
2520         */
2521        ret = init_iommu_all(ivrs_base);
2522        if (ret)
2523                goto out;
2524
2525        /* Disable any previously enabled IOMMUs */
2526        if (!is_kdump_kernel() || amd_iommu_disabled)
2527                disable_iommus();
2528
2529        if (amd_iommu_irq_remap)
2530                amd_iommu_irq_remap = check_ioapic_information();
2531
2532        if (amd_iommu_irq_remap) {
2533                /*
2534                 * Interrupt remapping enabled, create kmem_cache for the
2535                 * remapping tables.
2536                 */
2537                ret = -ENOMEM;
2538                if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2539                        remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
2540                else
2541                        remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
2542                amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
2543                                                        remap_cache_sz,
2544                                                        IRQ_TABLE_ALIGNMENT,
2545                                                        0, NULL);
2546                if (!amd_iommu_irq_cache)
2547                        goto out;
2548
2549                irq_lookup_table = (void *)__get_free_pages(
2550                                GFP_KERNEL | __GFP_ZERO,
2551                                get_order(rlookup_table_size));
2552                kmemleak_alloc(irq_lookup_table, rlookup_table_size,
2553                               1, GFP_KERNEL);
2554                if (!irq_lookup_table)
2555                        goto out;
2556        }
2557
2558        ret = init_memory_definitions(ivrs_base);
2559        if (ret)
2560                goto out;
2561
2562        /* init the device table */
2563        init_device_table();
2564
2565out:
2566        /* Don't leak any ACPI memory */
2567        acpi_put_table(ivrs_base);
2568        ivrs_base = NULL;
2569
2570        return ret;
2571}
2572
2573static int amd_iommu_enable_interrupts(void)
2574{
2575        struct amd_iommu *iommu;
2576        int ret = 0;
2577
2578        for_each_iommu(iommu) {
2579                ret = iommu_init_msi(iommu);
2580                if (ret)
2581                        goto out;
2582        }
2583
2584out:
2585        return ret;
2586}
2587
2588static bool detect_ivrs(void)
2589{
2590        struct acpi_table_header *ivrs_base;
2591        acpi_status status;
2592
2593        status = acpi_get_table("IVRS", 0, &ivrs_base);
2594        if (status == AE_NOT_FOUND)
2595                return false;
2596        else if (ACPI_FAILURE(status)) {
2597                const char *err = acpi_format_exception(status);
2598                pr_err("AMD-Vi: IVRS table error: %s\n", err);
2599                return false;
2600        }
2601
2602        acpi_put_table(ivrs_base);
2603
2604        /* Make sure ACS will be enabled during PCI probe */
2605        pci_request_acs();
2606
2607        return true;
2608}
2609
2610/****************************************************************************
2611 *
2612 * AMD IOMMU Initialization State Machine
2613 *
2614 ****************************************************************************/
2615
2616static int __init state_next(void)
2617{
2618        int ret = 0;
2619
2620        switch (init_state) {
2621        case IOMMU_START_STATE:
2622                if (!detect_ivrs()) {
2623                        init_state      = IOMMU_NOT_FOUND;
2624                        ret             = -ENODEV;
2625                } else {
2626                        init_state      = IOMMU_IVRS_DETECTED;
2627                }
2628                break;
2629        case IOMMU_IVRS_DETECTED:
2630                ret = early_amd_iommu_init();
2631                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2632                if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2633                        pr_info("AMD-Vi: AMD IOMMU disabled on kernel command-line\n");
2634                        free_dma_resources();
2635                        free_iommu_resources();
2636                        init_state = IOMMU_CMDLINE_DISABLED;
2637                        ret = -EINVAL;
2638                }
2639                break;
2640        case IOMMU_ACPI_FINISHED:
2641                early_enable_iommus();
2642                x86_platform.iommu_shutdown = disable_iommus;
2643                init_state = IOMMU_ENABLED;
2644                break;
2645        case IOMMU_ENABLED:
2646                register_syscore_ops(&amd_iommu_syscore_ops);
2647                ret = amd_iommu_init_pci();
2648                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2649                enable_iommus_v2();
2650                break;
2651        case IOMMU_PCI_INIT:
2652                ret = amd_iommu_enable_interrupts();
2653                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2654                break;
2655        case IOMMU_INTERRUPTS_EN:
2656                ret = amd_iommu_init_dma_ops();
2657                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2658                break;
2659        case IOMMU_DMA_OPS:
2660                init_state = IOMMU_INITIALIZED;
2661                break;
2662        case IOMMU_INITIALIZED:
2663                /* Nothing to do */
2664                break;
2665        case IOMMU_NOT_FOUND:
2666        case IOMMU_INIT_ERROR:
2667        case IOMMU_CMDLINE_DISABLED:
2668                /* Error states => do nothing */
2669                ret = -EINVAL;
2670                break;
2671        default:
2672                /* Unknown state */
2673                BUG();
2674        }
2675
2676        return ret;
2677}
2678
2679static int __init iommu_go_to_state(enum iommu_init_state state)
2680{
2681        int ret = -EINVAL;
2682
2683        while (init_state != state) {
2684                if (init_state == IOMMU_NOT_FOUND         ||
2685                    init_state == IOMMU_INIT_ERROR        ||
2686                    init_state == IOMMU_CMDLINE_DISABLED)
2687                        break;
2688                ret = state_next();
2689        }
2690
2691        return ret;
2692}
2693
2694#ifdef CONFIG_IRQ_REMAP
2695int __init amd_iommu_prepare(void)
2696{
2697        int ret;
2698
2699        amd_iommu_irq_remap = true;
2700
2701        ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
2702        if (ret)
2703                return ret;
2704        return amd_iommu_irq_remap ? 0 : -ENODEV;
2705}
2706
2707int __init amd_iommu_enable(void)
2708{
2709        int ret;
2710
2711        ret = iommu_go_to_state(IOMMU_ENABLED);
2712        if (ret)
2713                return ret;
2714
2715        irq_remapping_enabled = 1;
2716        return amd_iommu_xt_mode;
2717}
2718
2719void amd_iommu_disable(void)
2720{
2721        amd_iommu_suspend();
2722}
2723
2724int amd_iommu_reenable(int mode)
2725{
2726        amd_iommu_resume();
2727
2728        return 0;
2729}
2730
2731int __init amd_iommu_enable_faulting(void)
2732{
2733        /* We enable MSI later when PCI is initialized */
2734        return 0;
2735}
2736#endif
2737
2738/*
2739 * This is the core init function for AMD IOMMU hardware in the system.
2740 * This function is called from the generic x86 DMA layer initialization
2741 * code.
2742 */
2743static int __init amd_iommu_init(void)
2744{
2745        struct amd_iommu *iommu;
2746        int ret;
2747
2748        ret = iommu_go_to_state(IOMMU_INITIALIZED);
2749        if (ret) {
2750                free_dma_resources();
2751                if (!irq_remapping_enabled) {
2752                        disable_iommus();
2753                        free_iommu_resources();
2754                } else {
2755                        uninit_device_table_dma();
2756                        for_each_iommu(iommu)
2757                                iommu_flush_all_caches(iommu);
2758                }
2759        }
2760
2761        for_each_iommu(iommu)
2762                amd_iommu_debugfs_setup(iommu);
2763
2764        return ret;
2765}
2766
2767static bool amd_iommu_sme_check(void)
2768{
2769        if (!sme_active() || (boot_cpu_data.x86 != 0x17))
2770                return true;
2771
2772        /* For Fam17h, a specific level of support is required */
2773        if (boot_cpu_data.microcode >= 0x08001205)
2774                return true;
2775
2776        if ((boot_cpu_data.microcode >= 0x08001126) &&
2777            (boot_cpu_data.microcode <= 0x080011ff))
2778                return true;
2779
2780        pr_notice("AMD-Vi: IOMMU not currently supported when SME is active\n");
2781
2782        return false;
2783}
2784
2785/****************************************************************************
2786 *
2787 * Early detect code. This code runs at IOMMU detection time in the DMA
2788 * layer. It just looks if there is an IVRS ACPI table to detect AMD
2789 * IOMMUs
2790 *
2791 ****************************************************************************/
2792int __init amd_iommu_detect(void)
2793{
2794        int ret;
2795
2796        if (no_iommu || (iommu_detected && !gart_iommu_aperture))
2797                return -ENODEV;
2798
2799        if (!amd_iommu_sme_check())
2800                return -ENODEV;
2801
2802        ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
2803        if (ret)
2804                return ret;
2805
2806        amd_iommu_detected = true;
2807        iommu_detected = 1;
2808        x86_init.iommu.iommu_init = amd_iommu_init;
2809
2810        return 1;
2811}
2812
2813/****************************************************************************
2814 *
2815 * Parsing functions for the AMD IOMMU specific kernel command line
2816 * options.
2817 *
2818 ****************************************************************************/
2819
2820static int __init parse_amd_iommu_dump(char *str)
2821{
2822        amd_iommu_dump = true;
2823
2824        return 1;
2825}
2826
2827static int __init parse_amd_iommu_intr(char *str)
2828{
2829        for (; *str; ++str) {
2830                if (strncmp(str, "legacy", 6) == 0) {
2831                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
2832                        break;
2833                }
2834                if (strncmp(str, "vapic", 5) == 0) {
2835                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
2836                        break;
2837                }
2838        }
2839        return 1;
2840}
2841
2842static int __init parse_amd_iommu_options(char *str)
2843{
2844        for (; *str; ++str) {
2845                if (strncmp(str, "fullflush", 9) == 0)
2846                        amd_iommu_unmap_flush = true;
2847                if (strncmp(str, "off", 3) == 0)
2848                        amd_iommu_disabled = true;
2849                if (strncmp(str, "force_isolation", 15) == 0)
2850                        amd_iommu_force_isolation = true;
2851        }
2852
2853        return 1;
2854}
2855
2856static int __init parse_ivrs_ioapic(char *str)
2857{
2858        unsigned int bus, dev, fn;
2859        int ret, id, i;
2860        u16 devid;
2861
2862        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2863
2864        if (ret != 4) {
2865                pr_err("AMD-Vi: Invalid command line: ivrs_ioapic%s\n", str);
2866                return 1;
2867        }
2868
2869        if (early_ioapic_map_size == EARLY_MAP_SIZE) {
2870                pr_err("AMD-Vi: Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
2871                        str);
2872                return 1;
2873        }
2874
2875        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2876
2877        cmdline_maps                    = true;
2878        i                               = early_ioapic_map_size++;
2879        early_ioapic_map[i].id          = id;
2880        early_ioapic_map[i].devid       = devid;
2881        early_ioapic_map[i].cmd_line    = true;
2882
2883        return 1;
2884}
2885
2886static int __init parse_ivrs_hpet(char *str)
2887{
2888        unsigned int bus, dev, fn;
2889        int ret, id, i;
2890        u16 devid;
2891
2892        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
2893
2894        if (ret != 4) {
2895                pr_err("AMD-Vi: Invalid command line: ivrs_hpet%s\n", str);
2896                return 1;
2897        }
2898
2899        if (early_hpet_map_size == EARLY_MAP_SIZE) {
2900                pr_err("AMD-Vi: Early HPET map overflow - ignoring ivrs_hpet%s\n",
2901                        str);
2902                return 1;
2903        }
2904
2905        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2906
2907        cmdline_maps                    = true;
2908        i                               = early_hpet_map_size++;
2909        early_hpet_map[i].id            = id;
2910        early_hpet_map[i].devid         = devid;
2911        early_hpet_map[i].cmd_line      = true;
2912
2913        return 1;
2914}
2915
2916static int __init parse_ivrs_acpihid(char *str)
2917{
2918        u32 bus, dev, fn;
2919        char *hid, *uid, *p;
2920        char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
2921        int ret, i;
2922
2923        ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
2924        if (ret != 4) {
2925                pr_err("AMD-Vi: Invalid command line: ivrs_acpihid(%s)\n", str);
2926                return 1;
2927        }
2928
2929        p = acpiid;
2930        hid = strsep(&p, ":");
2931        uid = p;
2932
2933        if (!hid || !(*hid) || !uid) {
2934                pr_err("AMD-Vi: Invalid command line: hid or uid\n");
2935                return 1;
2936        }
2937
2938        i = early_acpihid_map_size++;
2939        memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
2940        memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
2941        early_acpihid_map[i].devid =
2942                ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
2943        early_acpihid_map[i].cmd_line   = true;
2944
2945        return 1;
2946}
2947
2948__setup("amd_iommu_dump",       parse_amd_iommu_dump);
2949__setup("amd_iommu=",           parse_amd_iommu_options);
2950__setup("amd_iommu_intr=",      parse_amd_iommu_intr);
2951__setup("ivrs_ioapic",          parse_ivrs_ioapic);
2952__setup("ivrs_hpet",            parse_ivrs_hpet);
2953__setup("ivrs_acpihid",         parse_ivrs_acpihid);
2954
2955IOMMU_INIT_FINISH(amd_iommu_detect,
2956                  gart_iommu_hole_init,
2957                  NULL,
2958                  NULL);
2959
2960bool amd_iommu_v2_supported(void)
2961{
2962        return amd_iommu_v2_present;
2963}
2964EXPORT_SYMBOL(amd_iommu_v2_supported);
2965
2966struct amd_iommu *get_amd_iommu(unsigned int idx)
2967{
2968        unsigned int i = 0;
2969        struct amd_iommu *iommu;
2970
2971        for_each_iommu(iommu)
2972                if (i++ == idx)
2973                        return iommu;
2974        return NULL;
2975}
2976EXPORT_SYMBOL(get_amd_iommu);
2977
2978/****************************************************************************
2979 *
2980 * IOMMU EFR Performance Counter support functionality. This code allows
2981 * access to the IOMMU PC functionality.
2982 *
2983 ****************************************************************************/
2984
2985u8 amd_iommu_pc_get_max_banks(unsigned int idx)
2986{
2987        struct amd_iommu *iommu = get_amd_iommu(idx);
2988
2989        if (iommu)
2990                return iommu->max_banks;
2991
2992        return 0;
2993}
2994EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
2995
2996bool amd_iommu_pc_supported(void)
2997{
2998        return amd_iommu_pc_present;
2999}
3000EXPORT_SYMBOL(amd_iommu_pc_supported);
3001
3002u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3003{
3004        struct amd_iommu *iommu = get_amd_iommu(idx);
3005
3006        if (iommu)
3007                return iommu->max_counters;
3008
3009        return 0;
3010}
3011EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3012
3013static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3014                                u8 fxn, u64 *value, bool is_write)
3015{
3016        u32 offset;
3017        u32 max_offset_lim;
3018
3019        /* Make sure the IOMMU PC resource is available */
3020        if (!amd_iommu_pc_present)
3021                return -ENODEV;
3022
3023        /* Check for valid iommu and pc register indexing */
3024        if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3025                return -ENODEV;
3026
3027        offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3028
3029        /* Limit the offset to the hw defined mmio region aperture */
3030        max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3031                                (iommu->max_counters << 8) | 0x28);
3032        if ((offset < MMIO_CNTR_REG_OFFSET) ||
3033            (offset > max_offset_lim))
3034                return -EINVAL;
3035
3036        if (is_write) {
3037                u64 val = *value & GENMASK_ULL(47, 0);
3038
3039                writel((u32)val, iommu->mmio_base + offset);
3040                writel((val >> 32), iommu->mmio_base + offset + 4);
3041        } else {
3042                *value = readl(iommu->mmio_base + offset + 4);
3043                *value <<= 32;
3044                *value |= readl(iommu->mmio_base + offset);
3045                *value &= GENMASK_ULL(47, 0);
3046        }
3047
3048        return 0;
3049}
3050
3051int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3052{
3053        if (!iommu)
3054                return -EINVAL;
3055
3056        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3057}
3058EXPORT_SYMBOL(amd_iommu_pc_get_reg);
3059
3060int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3061{
3062        if (!iommu)
3063                return -EINVAL;
3064
3065        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3066}
3067EXPORT_SYMBOL(amd_iommu_pc_set_reg);
3068