linux/drivers/iommu/amd/init.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Copyright (C) 2007-2010 Advanced Micro Devices, Inc.
   4 * Author: Joerg Roedel <jroedel@suse.de>
   5 *         Leo Duran <leo.duran@amd.com>
   6 */
   7
   8#define pr_fmt(fmt)     "AMD-Vi: " fmt
   9#define dev_fmt(fmt)    pr_fmt(fmt)
  10
  11#include <linux/pci.h>
  12#include <linux/acpi.h>
  13#include <linux/list.h>
  14#include <linux/bitmap.h>
  15#include <linux/slab.h>
  16#include <linux/syscore_ops.h>
  17#include <linux/interrupt.h>
  18#include <linux/msi.h>
  19#include <linux/amd-iommu.h>
  20#include <linux/export.h>
  21#include <linux/kmemleak.h>
  22#include <linux/mem_encrypt.h>
  23#include <asm/pci-direct.h>
  24#include <asm/iommu.h>
  25#include <asm/apic.h>
  26#include <asm/msidef.h>
  27#include <asm/gart.h>
  28#include <asm/x86_init.h>
  29#include <asm/iommu_table.h>
  30#include <asm/io_apic.h>
  31#include <asm/irq_remapping.h>
  32#include <asm/set_memory.h>
  33
  34#include <linux/crash_dump.h>
  35
  36#include "amd_iommu.h"
  37#include "../irq_remapping.h"
  38
  39/*
  40 * definitions for the ACPI scanning code
  41 */
  42#define IVRS_HEADER_LENGTH 48
  43
  44#define ACPI_IVHD_TYPE_MAX_SUPPORTED    0x40
  45#define ACPI_IVMD_TYPE_ALL              0x20
  46#define ACPI_IVMD_TYPE                  0x21
  47#define ACPI_IVMD_TYPE_RANGE            0x22
  48
  49#define IVHD_DEV_ALL                    0x01
  50#define IVHD_DEV_SELECT                 0x02
  51#define IVHD_DEV_SELECT_RANGE_START     0x03
  52#define IVHD_DEV_RANGE_END              0x04
  53#define IVHD_DEV_ALIAS                  0x42
  54#define IVHD_DEV_ALIAS_RANGE            0x43
  55#define IVHD_DEV_EXT_SELECT             0x46
  56#define IVHD_DEV_EXT_SELECT_RANGE       0x47
  57#define IVHD_DEV_SPECIAL                0x48
  58#define IVHD_DEV_ACPI_HID               0xf0
  59
  60#define UID_NOT_PRESENT                 0
  61#define UID_IS_INTEGER                  1
  62#define UID_IS_CHARACTER                2
  63
  64#define IVHD_SPECIAL_IOAPIC             1
  65#define IVHD_SPECIAL_HPET               2
  66
  67#define IVHD_FLAG_HT_TUN_EN_MASK        0x01
  68#define IVHD_FLAG_PASSPW_EN_MASK        0x02
  69#define IVHD_FLAG_RESPASSPW_EN_MASK     0x04
  70#define IVHD_FLAG_ISOC_EN_MASK          0x08
  71
  72#define IVMD_FLAG_EXCL_RANGE            0x08
  73#define IVMD_FLAG_IW                    0x04
  74#define IVMD_FLAG_IR                    0x02
  75#define IVMD_FLAG_UNITY_MAP             0x01
  76
  77#define ACPI_DEVFLAG_INITPASS           0x01
  78#define ACPI_DEVFLAG_EXTINT             0x02
  79#define ACPI_DEVFLAG_NMI                0x04
  80#define ACPI_DEVFLAG_SYSMGT1            0x10
  81#define ACPI_DEVFLAG_SYSMGT2            0x20
  82#define ACPI_DEVFLAG_LINT0              0x40
  83#define ACPI_DEVFLAG_LINT1              0x80
  84#define ACPI_DEVFLAG_ATSDIS             0x10000000
  85
  86#define LOOP_TIMEOUT    100000
  87/*
  88 * ACPI table definitions
  89 *
  90 * These data structures are laid over the table to parse the important values
  91 * out of it.
  92 */
  93
  94extern const struct iommu_ops amd_iommu_ops;
  95
  96/*
  97 * structure describing one IOMMU in the ACPI table. Typically followed by one
  98 * or more ivhd_entrys.
  99 */
 100struct ivhd_header {
 101        u8 type;
 102        u8 flags;
 103        u16 length;
 104        u16 devid;
 105        u16 cap_ptr;
 106        u64 mmio_phys;
 107        u16 pci_seg;
 108        u16 info;
 109        u32 efr_attr;
 110
 111        /* Following only valid on IVHD type 11h and 40h */
 112        u64 efr_reg; /* Exact copy of MMIO_EXT_FEATURES */
 113        u64 res;
 114} __attribute__((packed));
 115
 116/*
 117 * A device entry describing which devices a specific IOMMU translates and
 118 * which requestor ids they use.
 119 */
 120struct ivhd_entry {
 121        u8 type;
 122        u16 devid;
 123        u8 flags;
 124        u32 ext;
 125        u32 hidh;
 126        u64 cid;
 127        u8 uidf;
 128        u8 uidl;
 129        u8 uid;
 130} __attribute__((packed));
 131
 132/*
 133 * An AMD IOMMU memory definition structure. It defines things like exclusion
 134 * ranges for devices and regions that should be unity mapped.
 135 */
 136struct ivmd_header {
 137        u8 type;
 138        u8 flags;
 139        u16 length;
 140        u16 devid;
 141        u16 aux;
 142        u64 resv;
 143        u64 range_start;
 144        u64 range_length;
 145} __attribute__((packed));
 146
 147bool amd_iommu_dump;
 148bool amd_iommu_irq_remap __read_mostly;
 149
 150int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
 151static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 152
 153static bool amd_iommu_detected;
 154static bool __initdata amd_iommu_disabled;
 155static int amd_iommu_target_ivhd_type;
 156
 157u16 amd_iommu_last_bdf;                 /* largest PCI device id we have
 158                                           to handle */
 159LIST_HEAD(amd_iommu_unity_map);         /* a list of required unity mappings
 160                                           we find in ACPI */
 161bool amd_iommu_unmap_flush;             /* if true, flush on every unmap */
 162
 163LIST_HEAD(amd_iommu_list);              /* list of all AMD IOMMUs in the
 164                                           system */
 165
 166/* Array to assign indices to IOMMUs*/
 167struct amd_iommu *amd_iommus[MAX_IOMMUS];
 168
 169/* Number of IOMMUs present in the system */
 170static int amd_iommus_present;
 171
 172/* IOMMUs have a non-present cache? */
 173bool amd_iommu_np_cache __read_mostly;
 174bool amd_iommu_iotlb_sup __read_mostly = true;
 175
 176u32 amd_iommu_max_pasid __read_mostly = ~0;
 177
 178bool amd_iommu_v2_present __read_mostly;
 179static bool amd_iommu_pc_present __read_mostly;
 180
 181bool amd_iommu_force_isolation __read_mostly;
 182
 183/*
 184 * Pointer to the device table which is shared by all AMD IOMMUs
 185 * it is indexed by the PCI device id or the HT unit id and contains
 186 * information about the domain the device belongs to as well as the
 187 * page table root pointer.
 188 */
 189struct dev_table_entry *amd_iommu_dev_table;
 190/*
 191 * Pointer to a device table which the content of old device table
 192 * will be copied to. It's only be used in kdump kernel.
 193 */
 194static struct dev_table_entry *old_dev_tbl_cpy;
 195
 196/*
 197 * The alias table is a driver specific data structure which contains the
 198 * mappings of the PCI device ids to the actual requestor ids on the IOMMU.
 199 * More than one device can share the same requestor id.
 200 */
 201u16 *amd_iommu_alias_table;
 202
 203/*
 204 * The rlookup table is used to find the IOMMU which is responsible
 205 * for a specific device. It is also indexed by the PCI device id.
 206 */
 207struct amd_iommu **amd_iommu_rlookup_table;
 208EXPORT_SYMBOL(amd_iommu_rlookup_table);
 209
 210/*
 211 * This table is used to find the irq remapping table for a given device id
 212 * quickly.
 213 */
 214struct irq_remap_table **irq_lookup_table;
 215
 216/*
 217 * AMD IOMMU allows up to 2^16 different protection domains. This is a bitmap
 218 * to know which ones are already in use.
 219 */
 220unsigned long *amd_iommu_pd_alloc_bitmap;
 221
 222static u32 dev_table_size;      /* size of the device table */
 223static u32 alias_table_size;    /* size of the alias table */
 224static u32 rlookup_table_size;  /* size if the rlookup table */
 225
 226enum iommu_init_state {
 227        IOMMU_START_STATE,
 228        IOMMU_IVRS_DETECTED,
 229        IOMMU_ACPI_FINISHED,
 230        IOMMU_ENABLED,
 231        IOMMU_PCI_INIT,
 232        IOMMU_INTERRUPTS_EN,
 233        IOMMU_DMA_OPS,
 234        IOMMU_INITIALIZED,
 235        IOMMU_NOT_FOUND,
 236        IOMMU_INIT_ERROR,
 237        IOMMU_CMDLINE_DISABLED,
 238};
 239
 240/* Early ioapic and hpet maps from kernel command line */
 241#define EARLY_MAP_SIZE          4
 242static struct devid_map __initdata early_ioapic_map[EARLY_MAP_SIZE];
 243static struct devid_map __initdata early_hpet_map[EARLY_MAP_SIZE];
 244static struct acpihid_map_entry __initdata early_acpihid_map[EARLY_MAP_SIZE];
 245
 246static int __initdata early_ioapic_map_size;
 247static int __initdata early_hpet_map_size;
 248static int __initdata early_acpihid_map_size;
 249
 250static bool __initdata cmdline_maps;
 251
 252static enum iommu_init_state init_state = IOMMU_START_STATE;
 253
 254static int amd_iommu_enable_interrupts(void);
 255static int __init iommu_go_to_state(enum iommu_init_state state);
 256static void init_device_table_dma(void);
 257
 258static bool amd_iommu_pre_enabled = true;
 259
 260bool translation_pre_enabled(struct amd_iommu *iommu)
 261{
 262        return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
 263}
 264EXPORT_SYMBOL(translation_pre_enabled);
 265
 266static void clear_translation_pre_enabled(struct amd_iommu *iommu)
 267{
 268        iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 269}
 270
 271static void init_translation_status(struct amd_iommu *iommu)
 272{
 273        u64 ctrl;
 274
 275        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 276        if (ctrl & (1<<CONTROL_IOMMU_EN))
 277                iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
 278}
 279
 280static inline void update_last_devid(u16 devid)
 281{
 282        if (devid > amd_iommu_last_bdf)
 283                amd_iommu_last_bdf = devid;
 284}
 285
 286static inline unsigned long tbl_size(int entry_size)
 287{
 288        unsigned shift = PAGE_SHIFT +
 289                         get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 290
 291        return 1UL << shift;
 292}
 293
 294int amd_iommu_get_num_iommus(void)
 295{
 296        return amd_iommus_present;
 297}
 298
 299/* Access to l1 and l2 indexed register spaces */
 300
 301static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
 302{
 303        u32 val;
 304
 305        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 306        pci_read_config_dword(iommu->dev, 0xfc, &val);
 307        return val;
 308}
 309
 310static void iommu_write_l1(struct amd_iommu *iommu, u16 l1, u8 address, u32 val)
 311{
 312        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16 | 1 << 31));
 313        pci_write_config_dword(iommu->dev, 0xfc, val);
 314        pci_write_config_dword(iommu->dev, 0xf8, (address | l1 << 16));
 315}
 316
 317static u32 iommu_read_l2(struct amd_iommu *iommu, u8 address)
 318{
 319        u32 val;
 320
 321        pci_write_config_dword(iommu->dev, 0xf0, address);
 322        pci_read_config_dword(iommu->dev, 0xf4, &val);
 323        return val;
 324}
 325
 326static void iommu_write_l2(struct amd_iommu *iommu, u8 address, u32 val)
 327{
 328        pci_write_config_dword(iommu->dev, 0xf0, (address | 1 << 8));
 329        pci_write_config_dword(iommu->dev, 0xf4, val);
 330}
 331
 332/****************************************************************************
 333 *
 334 * AMD IOMMU MMIO register space handling functions
 335 *
 336 * These functions are used to program the IOMMU device registers in
 337 * MMIO space required for that driver.
 338 *
 339 ****************************************************************************/
 340
 341/*
 342 * This function set the exclusion range in the IOMMU. DMA accesses to the
 343 * exclusion range are passed through untranslated
 344 */
 345static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 346{
 347        u64 start = iommu->exclusion_start & PAGE_MASK;
 348        u64 limit = (start + iommu->exclusion_length - 1) & PAGE_MASK;
 349        u64 entry;
 350
 351        if (!iommu->exclusion_start)
 352                return;
 353
 354        entry = start | MMIO_EXCL_ENABLE_MASK;
 355        memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 356                        &entry, sizeof(entry));
 357
 358        entry = limit;
 359        memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 360                        &entry, sizeof(entry));
 361}
 362
 363static void iommu_set_cwwb_range(struct amd_iommu *iommu)
 364{
 365        u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
 366        u64 entry = start & PM_ADDR_MASK;
 367
 368        if (!iommu_feature(iommu, FEATURE_SNP))
 369                return;
 370
 371        /* Note:
 372         * Re-purpose Exclusion base/limit registers for Completion wait
 373         * write-back base/limit.
 374         */
 375        memcpy_toio(iommu->mmio_base + MMIO_EXCL_BASE_OFFSET,
 376                    &entry, sizeof(entry));
 377
 378        /* Note:
 379         * Default to 4 Kbytes, which can be specified by setting base
 380         * address equal to the limit address.
 381         */
 382        memcpy_toio(iommu->mmio_base + MMIO_EXCL_LIMIT_OFFSET,
 383                    &entry, sizeof(entry));
 384}
 385
 386/* Programs the physical address of the device table into the IOMMU hardware */
 387static void iommu_set_device_table(struct amd_iommu *iommu)
 388{
 389        u64 entry;
 390
 391        BUG_ON(iommu->mmio_base == NULL);
 392
 393        entry = iommu_virt_to_phys(amd_iommu_dev_table);
 394        entry |= (dev_table_size >> 12) - 1;
 395        memcpy_toio(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET,
 396                        &entry, sizeof(entry));
 397}
 398
 399/* Generic functions to enable/disable certain features of the IOMMU. */
 400static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 401{
 402        u64 ctrl;
 403
 404        ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 405        ctrl |= (1ULL << bit);
 406        writeq(ctrl, iommu->mmio_base +  MMIO_CONTROL_OFFSET);
 407}
 408
 409static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 410{
 411        u64 ctrl;
 412
 413        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 414        ctrl &= ~(1ULL << bit);
 415        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 416}
 417
 418static void iommu_set_inv_tlb_timeout(struct amd_iommu *iommu, int timeout)
 419{
 420        u64 ctrl;
 421
 422        ctrl = readq(iommu->mmio_base + MMIO_CONTROL_OFFSET);
 423        ctrl &= ~CTRL_INV_TO_MASK;
 424        ctrl |= (timeout << CONTROL_INV_TIMEOUT) & CTRL_INV_TO_MASK;
 425        writeq(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
 426}
 427
 428/* Function to enable the hardware */
 429static void iommu_enable(struct amd_iommu *iommu)
 430{
 431        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 432}
 433
 434static void iommu_disable(struct amd_iommu *iommu)
 435{
 436        if (!iommu->mmio_base)
 437                return;
 438
 439        /* Disable command buffer */
 440        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 441
 442        /* Disable event logging and event interrupts */
 443        iommu_feature_disable(iommu, CONTROL_EVT_INT_EN);
 444        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 445
 446        /* Disable IOMMU GA_LOG */
 447        iommu_feature_disable(iommu, CONTROL_GALOG_EN);
 448        iommu_feature_disable(iommu, CONTROL_GAINT_EN);
 449
 450        /* Disable IOMMU hardware itself */
 451        iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 452}
 453
 454/*
 455 * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in
 456 * the system has one.
 457 */
 458static u8 __iomem * __init iommu_map_mmio_space(u64 address, u64 end)
 459{
 460        if (!request_mem_region(address, end, "amd_iommu")) {
 461                pr_err("Can not reserve memory region %llx-%llx for mmio\n",
 462                        address, end);
 463                pr_err("This is a BIOS bug. Please contact your hardware vendor\n");
 464                return NULL;
 465        }
 466
 467        return (u8 __iomem *)ioremap(address, end);
 468}
 469
 470static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu)
 471{
 472        if (iommu->mmio_base)
 473                iounmap(iommu->mmio_base);
 474        release_mem_region(iommu->mmio_phys, iommu->mmio_phys_end);
 475}
 476
 477static inline u32 get_ivhd_header_size(struct ivhd_header *h)
 478{
 479        u32 size = 0;
 480
 481        switch (h->type) {
 482        case 0x10:
 483                size = 24;
 484                break;
 485        case 0x11:
 486        case 0x40:
 487                size = 40;
 488                break;
 489        }
 490        return size;
 491}
 492
 493/****************************************************************************
 494 *
 495 * The functions below belong to the first pass of AMD IOMMU ACPI table
 496 * parsing. In this pass we try to find out the highest device id this
 497 * code has to handle. Upon this information the size of the shared data
 498 * structures is determined later.
 499 *
 500 ****************************************************************************/
 501
 502/*
 503 * This function calculates the length of a given IVHD entry
 504 */
 505static inline int ivhd_entry_length(u8 *ivhd)
 506{
 507        u32 type = ((struct ivhd_entry *)ivhd)->type;
 508
 509        if (type < 0x80) {
 510                return 0x04 << (*ivhd >> 6);
 511        } else if (type == IVHD_DEV_ACPI_HID) {
 512                /* For ACPI_HID, offset 21 is uid len */
 513                return *((u8 *)ivhd + 21) + 22;
 514        }
 515        return 0;
 516}
 517
 518/*
 519 * After reading the highest device id from the IOMMU PCI capability header
 520 * this function looks if there is a higher device id defined in the ACPI table
 521 */
 522static int __init find_last_devid_from_ivhd(struct ivhd_header *h)
 523{
 524        u8 *p = (void *)h, *end = (void *)h;
 525        struct ivhd_entry *dev;
 526
 527        u32 ivhd_size = get_ivhd_header_size(h);
 528
 529        if (!ivhd_size) {
 530                pr_err("Unsupported IVHD type %#x\n", h->type);
 531                return -EINVAL;
 532        }
 533
 534        p += ivhd_size;
 535        end += h->length;
 536
 537        while (p < end) {
 538                dev = (struct ivhd_entry *)p;
 539                switch (dev->type) {
 540                case IVHD_DEV_ALL:
 541                        /* Use maximum BDF value for DEV_ALL */
 542                        update_last_devid(0xffff);
 543                        break;
 544                case IVHD_DEV_SELECT:
 545                case IVHD_DEV_RANGE_END:
 546                case IVHD_DEV_ALIAS:
 547                case IVHD_DEV_EXT_SELECT:
 548                        /* all the above subfield types refer to device ids */
 549                        update_last_devid(dev->devid);
 550                        break;
 551                default:
 552                        break;
 553                }
 554                p += ivhd_entry_length(p);
 555        }
 556
 557        WARN_ON(p != end);
 558
 559        return 0;
 560}
 561
 562static int __init check_ivrs_checksum(struct acpi_table_header *table)
 563{
 564        int i;
 565        u8 checksum = 0, *p = (u8 *)table;
 566
 567        for (i = 0; i < table->length; ++i)
 568                checksum += p[i];
 569        if (checksum != 0) {
 570                /* ACPI table corrupt */
 571                pr_err(FW_BUG "IVRS invalid checksum\n");
 572                return -ENODEV;
 573        }
 574
 575        return 0;
 576}
 577
 578/*
 579 * Iterate over all IVHD entries in the ACPI table and find the highest device
 580 * id which we need to handle. This is the first of three functions which parse
 581 * the ACPI table. So we check the checksum here.
 582 */
 583static int __init find_last_devid_acpi(struct acpi_table_header *table)
 584{
 585        u8 *p = (u8 *)table, *end = (u8 *)table;
 586        struct ivhd_header *h;
 587
 588        p += IVRS_HEADER_LENGTH;
 589
 590        end += table->length;
 591        while (p < end) {
 592                h = (struct ivhd_header *)p;
 593                if (h->type == amd_iommu_target_ivhd_type) {
 594                        int ret = find_last_devid_from_ivhd(h);
 595
 596                        if (ret)
 597                                return ret;
 598                }
 599                p += h->length;
 600        }
 601        WARN_ON(p != end);
 602
 603        return 0;
 604}
 605
 606/****************************************************************************
 607 *
 608 * The following functions belong to the code path which parses the ACPI table
 609 * the second time. In this ACPI parsing iteration we allocate IOMMU specific
 610 * data structures, initialize the device/alias/rlookup table and also
 611 * basically initialize the hardware.
 612 *
 613 ****************************************************************************/
 614
 615/*
 616 * Allocates the command buffer. This buffer is per AMD IOMMU. We can
 617 * write commands to that buffer later and the IOMMU will execute them
 618 * asynchronously
 619 */
 620static int __init alloc_command_buffer(struct amd_iommu *iommu)
 621{
 622        iommu->cmd_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 623                                                  get_order(CMD_BUFFER_SIZE));
 624
 625        return iommu->cmd_buf ? 0 : -ENOMEM;
 626}
 627
 628/*
 629 * This function resets the command buffer if the IOMMU stopped fetching
 630 * commands from it.
 631 */
 632void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
 633{
 634        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 635
 636        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
 637        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 638        iommu->cmd_buf_head = 0;
 639        iommu->cmd_buf_tail = 0;
 640
 641        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
 642}
 643
 644/*
 645 * This function writes the command buffer address to the hardware and
 646 * enables it.
 647 */
 648static void iommu_enable_command_buffer(struct amd_iommu *iommu)
 649{
 650        u64 entry;
 651
 652        BUG_ON(iommu->cmd_buf == NULL);
 653
 654        entry = iommu_virt_to_phys(iommu->cmd_buf);
 655        entry |= MMIO_CMD_SIZE_512;
 656
 657        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
 658                    &entry, sizeof(entry));
 659
 660        amd_iommu_reset_cmd_buffer(iommu);
 661}
 662
 663/*
 664 * This function disables the command buffer
 665 */
 666static void iommu_disable_command_buffer(struct amd_iommu *iommu)
 667{
 668        iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
 669}
 670
 671static void __init free_command_buffer(struct amd_iommu *iommu)
 672{
 673        free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
 674}
 675
 676static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
 677                                         gfp_t gfp, size_t size)
 678{
 679        int order = get_order(size);
 680        void *buf = (void *)__get_free_pages(gfp, order);
 681
 682        if (buf &&
 683            iommu_feature(iommu, FEATURE_SNP) &&
 684            set_memory_4k((unsigned long)buf, (1 << order))) {
 685                free_pages((unsigned long)buf, order);
 686                buf = NULL;
 687        }
 688
 689        return buf;
 690}
 691
 692/* allocates the memory where the IOMMU will log its events to */
 693static int __init alloc_event_buffer(struct amd_iommu *iommu)
 694{
 695        iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
 696                                              EVT_BUFFER_SIZE);
 697
 698        return iommu->evt_buf ? 0 : -ENOMEM;
 699}
 700
 701static void iommu_enable_event_buffer(struct amd_iommu *iommu)
 702{
 703        u64 entry;
 704
 705        BUG_ON(iommu->evt_buf == NULL);
 706
 707        entry = iommu_virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
 708
 709        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
 710                    &entry, sizeof(entry));
 711
 712        /* set head and tail to zero manually */
 713        writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 714        writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 715
 716        iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 717}
 718
 719/*
 720 * This function disables the event log buffer
 721 */
 722static void iommu_disable_event_buffer(struct amd_iommu *iommu)
 723{
 724        iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
 725}
 726
 727static void __init free_event_buffer(struct amd_iommu *iommu)
 728{
 729        free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
 730}
 731
 732/* allocates the memory where the IOMMU will log its events to */
 733static int __init alloc_ppr_log(struct amd_iommu *iommu)
 734{
 735        iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
 736                                              PPR_LOG_SIZE);
 737
 738        return iommu->ppr_log ? 0 : -ENOMEM;
 739}
 740
 741static void iommu_enable_ppr_log(struct amd_iommu *iommu)
 742{
 743        u64 entry;
 744
 745        if (iommu->ppr_log == NULL)
 746                return;
 747
 748        entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512;
 749
 750        memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET,
 751                    &entry, sizeof(entry));
 752
 753        /* set head and tail to zero manually */
 754        writel(0x00, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 755        writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 756
 757        iommu_feature_enable(iommu, CONTROL_PPRLOG_EN);
 758        iommu_feature_enable(iommu, CONTROL_PPR_EN);
 759}
 760
 761static void __init free_ppr_log(struct amd_iommu *iommu)
 762{
 763        free_pages((unsigned long)iommu->ppr_log, get_order(PPR_LOG_SIZE));
 764}
 765
 766static void free_ga_log(struct amd_iommu *iommu)
 767{
 768#ifdef CONFIG_IRQ_REMAP
 769        free_pages((unsigned long)iommu->ga_log, get_order(GA_LOG_SIZE));
 770        free_pages((unsigned long)iommu->ga_log_tail, get_order(8));
 771#endif
 772}
 773
 774static int iommu_ga_log_enable(struct amd_iommu *iommu)
 775{
 776#ifdef CONFIG_IRQ_REMAP
 777        u32 status, i;
 778
 779        if (!iommu->ga_log)
 780                return -EINVAL;
 781
 782        status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 783
 784        /* Check if already running */
 785        if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 786                return 0;
 787
 788        iommu_feature_enable(iommu, CONTROL_GAINT_EN);
 789        iommu_feature_enable(iommu, CONTROL_GALOG_EN);
 790
 791        for (i = 0; i < LOOP_TIMEOUT; ++i) {
 792                status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 793                if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 794                        break;
 795        }
 796
 797        if (i >= LOOP_TIMEOUT)
 798                return -EINVAL;
 799#endif /* CONFIG_IRQ_REMAP */
 800        return 0;
 801}
 802
 803#ifdef CONFIG_IRQ_REMAP
 804static int iommu_init_ga_log(struct amd_iommu *iommu)
 805{
 806        u64 entry;
 807
 808        if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 809                return 0;
 810
 811        iommu->ga_log = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 812                                        get_order(GA_LOG_SIZE));
 813        if (!iommu->ga_log)
 814                goto err_out;
 815
 816        iommu->ga_log_tail = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
 817                                        get_order(8));
 818        if (!iommu->ga_log_tail)
 819                goto err_out;
 820
 821        entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512;
 822        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET,
 823                    &entry, sizeof(entry));
 824        entry = (iommu_virt_to_phys(iommu->ga_log_tail) &
 825                 (BIT_ULL(52)-1)) & ~7ULL;
 826        memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET,
 827                    &entry, sizeof(entry));
 828        writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET);
 829        writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET);
 830
 831        return 0;
 832err_out:
 833        free_ga_log(iommu);
 834        return -EINVAL;
 835}
 836#endif /* CONFIG_IRQ_REMAP */
 837
 838static int iommu_init_ga(struct amd_iommu *iommu)
 839{
 840        int ret = 0;
 841
 842#ifdef CONFIG_IRQ_REMAP
 843        /* Note: We have already checked GASup from IVRS table.
 844         *       Now, we need to make sure that GAMSup is set.
 845         */
 846        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
 847            !iommu_feature(iommu, FEATURE_GAM_VAPIC))
 848                amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
 849
 850        ret = iommu_init_ga_log(iommu);
 851#endif /* CONFIG_IRQ_REMAP */
 852
 853        return ret;
 854}
 855
 856static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
 857{
 858        iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
 859
 860        return iommu->cmd_sem ? 0 : -ENOMEM;
 861}
 862
 863static void __init free_cwwb_sem(struct amd_iommu *iommu)
 864{
 865        if (iommu->cmd_sem)
 866                free_page((unsigned long)iommu->cmd_sem);
 867}
 868
 869static void iommu_enable_xt(struct amd_iommu *iommu)
 870{
 871#ifdef CONFIG_IRQ_REMAP
 872        /*
 873         * XT mode (32-bit APIC destination ID) requires
 874         * GA mode (128-bit IRTE support) as a prerequisite.
 875         */
 876        if (AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir) &&
 877            amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
 878                iommu_feature_enable(iommu, CONTROL_XT_EN);
 879#endif /* CONFIG_IRQ_REMAP */
 880}
 881
 882static void iommu_enable_gt(struct amd_iommu *iommu)
 883{
 884        if (!iommu_feature(iommu, FEATURE_GT))
 885                return;
 886
 887        iommu_feature_enable(iommu, CONTROL_GT_EN);
 888}
 889
 890/* sets a specific bit in the device table entry. */
 891static void set_dev_entry_bit(u16 devid, u8 bit)
 892{
 893        int i = (bit >> 6) & 0x03;
 894        int _bit = bit & 0x3f;
 895
 896        amd_iommu_dev_table[devid].data[i] |= (1UL << _bit);
 897}
 898
 899static int get_dev_entry_bit(u16 devid, u8 bit)
 900{
 901        int i = (bit >> 6) & 0x03;
 902        int _bit = bit & 0x3f;
 903
 904        return (amd_iommu_dev_table[devid].data[i] & (1UL << _bit)) >> _bit;
 905}
 906
 907
 908static bool copy_device_table(void)
 909{
 910        u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
 911        struct dev_table_entry *old_devtb = NULL;
 912        u32 lo, hi, devid, old_devtb_size;
 913        phys_addr_t old_devtb_phys;
 914        struct amd_iommu *iommu;
 915        u16 dom_id, dte_v, irq_v;
 916        gfp_t gfp_flag;
 917        u64 tmp;
 918
 919        if (!amd_iommu_pre_enabled)
 920                return false;
 921
 922        pr_warn("Translation is already enabled - trying to copy translation structures\n");
 923        for_each_iommu(iommu) {
 924                /* All IOMMUs should use the same device table with the same size */
 925                lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
 926                hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
 927                entry = (((u64) hi) << 32) + lo;
 928                if (last_entry && last_entry != entry) {
 929                        pr_err("IOMMU:%d should use the same dev table as others!\n",
 930                                iommu->index);
 931                        return false;
 932                }
 933                last_entry = entry;
 934
 935                old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
 936                if (old_devtb_size != dev_table_size) {
 937                        pr_err("The device table size of IOMMU:%d is not expected!\n",
 938                                iommu->index);
 939                        return false;
 940                }
 941        }
 942
 943        /*
 944         * When SME is enabled in the first kernel, the entry includes the
 945         * memory encryption mask(sme_me_mask), we must remove the memory
 946         * encryption mask to obtain the true physical address in kdump kernel.
 947         */
 948        old_devtb_phys = __sme_clr(entry) & PAGE_MASK;
 949
 950        if (old_devtb_phys >= 0x100000000ULL) {
 951                pr_err("The address of old device table is above 4G, not trustworthy!\n");
 952                return false;
 953        }
 954        old_devtb = (sme_active() && is_kdump_kernel())
 955                    ? (__force void *)ioremap_encrypted(old_devtb_phys,
 956                                                        dev_table_size)
 957                    : memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
 958
 959        if (!old_devtb)
 960                return false;
 961
 962        gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
 963        old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
 964                                get_order(dev_table_size));
 965        if (old_dev_tbl_cpy == NULL) {
 966                pr_err("Failed to allocate memory for copying old device table!\n");
 967                return false;
 968        }
 969
 970        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
 971                old_dev_tbl_cpy[devid] = old_devtb[devid];
 972                dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
 973                dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
 974
 975                if (dte_v && dom_id) {
 976                        old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
 977                        old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
 978                        __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
 979                        /* If gcr3 table existed, mask it out */
 980                        if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
 981                                tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
 982                                tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
 983                                old_dev_tbl_cpy[devid].data[1] &= ~tmp;
 984                                tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
 985                                tmp |= DTE_FLAG_GV;
 986                                old_dev_tbl_cpy[devid].data[0] &= ~tmp;
 987                        }
 988                }
 989
 990                irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
 991                int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
 992                int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
 993                if (irq_v && (int_ctl || int_tab_len)) {
 994                        if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
 995                            (int_tab_len != DTE_IRQ_TABLE_LEN)) {
 996                                pr_err("Wrong old irq remapping flag: %#x\n", devid);
 997                                return false;
 998                        }
 999
1000                        old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
1001                }
1002        }
1003        memunmap(old_devtb);
1004
1005        return true;
1006}
1007
1008void amd_iommu_apply_erratum_63(u16 devid)
1009{
1010        int sysmgt;
1011
1012        sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
1013                 (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
1014
1015        if (sysmgt == 0x01)
1016                set_dev_entry_bit(devid, DEV_ENTRY_IW);
1017}
1018
1019/* Writes the specific IOMMU for a device into the rlookup table */
1020static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
1021{
1022        amd_iommu_rlookup_table[devid] = iommu;
1023}
1024
1025/*
1026 * This function takes the device specific flags read from the ACPI
1027 * table and sets up the device table entry with that information
1028 */
1029static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
1030                                           u16 devid, u32 flags, u32 ext_flags)
1031{
1032        if (flags & ACPI_DEVFLAG_INITPASS)
1033                set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS);
1034        if (flags & ACPI_DEVFLAG_EXTINT)
1035                set_dev_entry_bit(devid, DEV_ENTRY_EINT_PASS);
1036        if (flags & ACPI_DEVFLAG_NMI)
1037                set_dev_entry_bit(devid, DEV_ENTRY_NMI_PASS);
1038        if (flags & ACPI_DEVFLAG_SYSMGT1)
1039                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1);
1040        if (flags & ACPI_DEVFLAG_SYSMGT2)
1041                set_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2);
1042        if (flags & ACPI_DEVFLAG_LINT0)
1043                set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS);
1044        if (flags & ACPI_DEVFLAG_LINT1)
1045                set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
1046
1047        amd_iommu_apply_erratum_63(devid);
1048
1049        set_iommu_for_device(iommu, devid);
1050}
1051
1052int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line)
1053{
1054        struct devid_map *entry;
1055        struct list_head *list;
1056
1057        if (type == IVHD_SPECIAL_IOAPIC)
1058                list = &ioapic_map;
1059        else if (type == IVHD_SPECIAL_HPET)
1060                list = &hpet_map;
1061        else
1062                return -EINVAL;
1063
1064        list_for_each_entry(entry, list, list) {
1065                if (!(entry->id == id && entry->cmd_line))
1066                        continue;
1067
1068                pr_info("Command-line override present for %s id %d - ignoring\n",
1069                        type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id);
1070
1071                *devid = entry->devid;
1072
1073                return 0;
1074        }
1075
1076        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1077        if (!entry)
1078                return -ENOMEM;
1079
1080        entry->id       = id;
1081        entry->devid    = *devid;
1082        entry->cmd_line = cmd_line;
1083
1084        list_add_tail(&entry->list, list);
1085
1086        return 0;
1087}
1088
1089static int __init add_acpi_hid_device(u8 *hid, u8 *uid, u16 *devid,
1090                                      bool cmd_line)
1091{
1092        struct acpihid_map_entry *entry;
1093        struct list_head *list = &acpihid_map;
1094
1095        list_for_each_entry(entry, list, list) {
1096                if (strcmp(entry->hid, hid) ||
1097                    (*uid && *entry->uid && strcmp(entry->uid, uid)) ||
1098                    !entry->cmd_line)
1099                        continue;
1100
1101                pr_info("Command-line override for hid:%s uid:%s\n",
1102                        hid, uid);
1103                *devid = entry->devid;
1104                return 0;
1105        }
1106
1107        entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1108        if (!entry)
1109                return -ENOMEM;
1110
1111        memcpy(entry->uid, uid, strlen(uid));
1112        memcpy(entry->hid, hid, strlen(hid));
1113        entry->devid = *devid;
1114        entry->cmd_line = cmd_line;
1115        entry->root_devid = (entry->devid & (~0x7));
1116
1117        pr_info("%s, add hid:%s, uid:%s, rdevid:%d\n",
1118                entry->cmd_line ? "cmd" : "ivrs",
1119                entry->hid, entry->uid, entry->root_devid);
1120
1121        list_add_tail(&entry->list, list);
1122        return 0;
1123}
1124
1125static int __init add_early_maps(void)
1126{
1127        int i, ret;
1128
1129        for (i = 0; i < early_ioapic_map_size; ++i) {
1130                ret = add_special_device(IVHD_SPECIAL_IOAPIC,
1131                                         early_ioapic_map[i].id,
1132                                         &early_ioapic_map[i].devid,
1133                                         early_ioapic_map[i].cmd_line);
1134                if (ret)
1135                        return ret;
1136        }
1137
1138        for (i = 0; i < early_hpet_map_size; ++i) {
1139                ret = add_special_device(IVHD_SPECIAL_HPET,
1140                                         early_hpet_map[i].id,
1141                                         &early_hpet_map[i].devid,
1142                                         early_hpet_map[i].cmd_line);
1143                if (ret)
1144                        return ret;
1145        }
1146
1147        for (i = 0; i < early_acpihid_map_size; ++i) {
1148                ret = add_acpi_hid_device(early_acpihid_map[i].hid,
1149                                          early_acpihid_map[i].uid,
1150                                          &early_acpihid_map[i].devid,
1151                                          early_acpihid_map[i].cmd_line);
1152                if (ret)
1153                        return ret;
1154        }
1155
1156        return 0;
1157}
1158
1159/*
1160 * Takes a pointer to an AMD IOMMU entry in the ACPI table and
1161 * initializes the hardware and our data structures with it.
1162 */
1163static int __init init_iommu_from_acpi(struct amd_iommu *iommu,
1164                                        struct ivhd_header *h)
1165{
1166        u8 *p = (u8 *)h;
1167        u8 *end = p, flags = 0;
1168        u16 devid = 0, devid_start = 0, devid_to = 0;
1169        u32 dev_i, ext_flags = 0;
1170        bool alias = false;
1171        struct ivhd_entry *e;
1172        u32 ivhd_size;
1173        int ret;
1174
1175
1176        ret = add_early_maps();
1177        if (ret)
1178                return ret;
1179
1180        amd_iommu_apply_ivrs_quirks();
1181
1182        /*
1183         * First save the recommended feature enable bits from ACPI
1184         */
1185        iommu->acpi_flags = h->flags;
1186
1187        /*
1188         * Done. Now parse the device entries
1189         */
1190        ivhd_size = get_ivhd_header_size(h);
1191        if (!ivhd_size) {
1192                pr_err("Unsupported IVHD type %#x\n", h->type);
1193                return -EINVAL;
1194        }
1195
1196        p += ivhd_size;
1197
1198        end += h->length;
1199
1200
1201        while (p < end) {
1202                e = (struct ivhd_entry *)p;
1203                switch (e->type) {
1204                case IVHD_DEV_ALL:
1205
1206                        DUMP_printk("  DEV_ALL\t\t\tflags: %02x\n", e->flags);
1207
1208                        for (dev_i = 0; dev_i <= amd_iommu_last_bdf; ++dev_i)
1209                                set_dev_entry_from_acpi(iommu, dev_i, e->flags, 0);
1210                        break;
1211                case IVHD_DEV_SELECT:
1212
1213                        DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
1214                                    "flags: %02x\n",
1215                                    PCI_BUS_NUM(e->devid),
1216                                    PCI_SLOT(e->devid),
1217                                    PCI_FUNC(e->devid),
1218                                    e->flags);
1219
1220                        devid = e->devid;
1221                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1222                        break;
1223                case IVHD_DEV_SELECT_RANGE_START:
1224
1225                        DUMP_printk("  DEV_SELECT_RANGE_START\t "
1226                                    "devid: %02x:%02x.%x flags: %02x\n",
1227                                    PCI_BUS_NUM(e->devid),
1228                                    PCI_SLOT(e->devid),
1229                                    PCI_FUNC(e->devid),
1230                                    e->flags);
1231
1232                        devid_start = e->devid;
1233                        flags = e->flags;
1234                        ext_flags = 0;
1235                        alias = false;
1236                        break;
1237                case IVHD_DEV_ALIAS:
1238
1239                        DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
1240                                    "flags: %02x devid_to: %02x:%02x.%x\n",
1241                                    PCI_BUS_NUM(e->devid),
1242                                    PCI_SLOT(e->devid),
1243                                    PCI_FUNC(e->devid),
1244                                    e->flags,
1245                                    PCI_BUS_NUM(e->ext >> 8),
1246                                    PCI_SLOT(e->ext >> 8),
1247                                    PCI_FUNC(e->ext >> 8));
1248
1249                        devid = e->devid;
1250                        devid_to = e->ext >> 8;
1251                        set_dev_entry_from_acpi(iommu, devid   , e->flags, 0);
1252                        set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
1253                        amd_iommu_alias_table[devid] = devid_to;
1254                        break;
1255                case IVHD_DEV_ALIAS_RANGE:
1256
1257                        DUMP_printk("  DEV_ALIAS_RANGE\t\t "
1258                                    "devid: %02x:%02x.%x flags: %02x "
1259                                    "devid_to: %02x:%02x.%x\n",
1260                                    PCI_BUS_NUM(e->devid),
1261                                    PCI_SLOT(e->devid),
1262                                    PCI_FUNC(e->devid),
1263                                    e->flags,
1264                                    PCI_BUS_NUM(e->ext >> 8),
1265                                    PCI_SLOT(e->ext >> 8),
1266                                    PCI_FUNC(e->ext >> 8));
1267
1268                        devid_start = e->devid;
1269                        flags = e->flags;
1270                        devid_to = e->ext >> 8;
1271                        ext_flags = 0;
1272                        alias = true;
1273                        break;
1274                case IVHD_DEV_EXT_SELECT:
1275
1276                        DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
1277                                    "flags: %02x ext: %08x\n",
1278                                    PCI_BUS_NUM(e->devid),
1279                                    PCI_SLOT(e->devid),
1280                                    PCI_FUNC(e->devid),
1281                                    e->flags, e->ext);
1282
1283                        devid = e->devid;
1284                        set_dev_entry_from_acpi(iommu, devid, e->flags,
1285                                                e->ext);
1286                        break;
1287                case IVHD_DEV_EXT_SELECT_RANGE:
1288
1289                        DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
1290                                    "%02x:%02x.%x flags: %02x ext: %08x\n",
1291                                    PCI_BUS_NUM(e->devid),
1292                                    PCI_SLOT(e->devid),
1293                                    PCI_FUNC(e->devid),
1294                                    e->flags, e->ext);
1295
1296                        devid_start = e->devid;
1297                        flags = e->flags;
1298                        ext_flags = e->ext;
1299                        alias = false;
1300                        break;
1301                case IVHD_DEV_RANGE_END:
1302
1303                        DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
1304                                    PCI_BUS_NUM(e->devid),
1305                                    PCI_SLOT(e->devid),
1306                                    PCI_FUNC(e->devid));
1307
1308                        devid = e->devid;
1309                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
1310                                if (alias) {
1311                                        amd_iommu_alias_table[dev_i] = devid_to;
1312                                        set_dev_entry_from_acpi(iommu,
1313                                                devid_to, flags, ext_flags);
1314                                }
1315                                set_dev_entry_from_acpi(iommu, dev_i,
1316                                                        flags, ext_flags);
1317                        }
1318                        break;
1319                case IVHD_DEV_SPECIAL: {
1320                        u8 handle, type;
1321                        const char *var;
1322                        u16 devid;
1323                        int ret;
1324
1325                        handle = e->ext & 0xff;
1326                        devid  = (e->ext >>  8) & 0xffff;
1327                        type   = (e->ext >> 24) & 0xff;
1328
1329                        if (type == IVHD_SPECIAL_IOAPIC)
1330                                var = "IOAPIC";
1331                        else if (type == IVHD_SPECIAL_HPET)
1332                                var = "HPET";
1333                        else
1334                                var = "UNKNOWN";
1335
1336                        DUMP_printk("  DEV_SPECIAL(%s[%d])\t\tdevid: %02x:%02x.%x\n",
1337                                    var, (int)handle,
1338                                    PCI_BUS_NUM(devid),
1339                                    PCI_SLOT(devid),
1340                                    PCI_FUNC(devid));
1341
1342                        ret = add_special_device(type, handle, &devid, false);
1343                        if (ret)
1344                                return ret;
1345
1346                        /*
1347                         * add_special_device might update the devid in case a
1348                         * command-line override is present. So call
1349                         * set_dev_entry_from_acpi after add_special_device.
1350                         */
1351                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1352
1353                        break;
1354                }
1355                case IVHD_DEV_ACPI_HID: {
1356                        u16 devid;
1357                        u8 hid[ACPIHID_HID_LEN];
1358                        u8 uid[ACPIHID_UID_LEN];
1359                        int ret;
1360
1361                        if (h->type != 0x40) {
1362                                pr_err(FW_BUG "Invalid IVHD device type %#x\n",
1363                                       e->type);
1364                                break;
1365                        }
1366
1367                        memcpy(hid, (u8 *)(&e->ext), ACPIHID_HID_LEN - 1);
1368                        hid[ACPIHID_HID_LEN - 1] = '\0';
1369
1370                        if (!(*hid)) {
1371                                pr_err(FW_BUG "Invalid HID.\n");
1372                                break;
1373                        }
1374
1375                        uid[0] = '\0';
1376                        switch (e->uidf) {
1377                        case UID_NOT_PRESENT:
1378
1379                                if (e->uidl != 0)
1380                                        pr_warn(FW_BUG "Invalid UID length.\n");
1381
1382                                break;
1383                        case UID_IS_INTEGER:
1384
1385                                sprintf(uid, "%d", e->uid);
1386
1387                                break;
1388                        case UID_IS_CHARACTER:
1389
1390                                memcpy(uid, &e->uid, e->uidl);
1391                                uid[e->uidl] = '\0';
1392
1393                                break;
1394                        default:
1395                                break;
1396                        }
1397
1398                        devid = e->devid;
1399                        DUMP_printk("  DEV_ACPI_HID(%s[%s])\t\tdevid: %02x:%02x.%x\n",
1400                                    hid, uid,
1401                                    PCI_BUS_NUM(devid),
1402                                    PCI_SLOT(devid),
1403                                    PCI_FUNC(devid));
1404
1405                        flags = e->flags;
1406
1407                        ret = add_acpi_hid_device(hid, uid, &devid, false);
1408                        if (ret)
1409                                return ret;
1410
1411                        /*
1412                         * add_special_device might update the devid in case a
1413                         * command-line override is present. So call
1414                         * set_dev_entry_from_acpi after add_special_device.
1415                         */
1416                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
1417
1418                        break;
1419                }
1420                default:
1421                        break;
1422                }
1423
1424                p += ivhd_entry_length(p);
1425        }
1426
1427        return 0;
1428}
1429
1430static void __init free_iommu_one(struct amd_iommu *iommu)
1431{
1432        free_cwwb_sem(iommu);
1433        free_command_buffer(iommu);
1434        free_event_buffer(iommu);
1435        free_ppr_log(iommu);
1436        free_ga_log(iommu);
1437        iommu_unmap_mmio_space(iommu);
1438}
1439
1440static void __init free_iommu_all(void)
1441{
1442        struct amd_iommu *iommu, *next;
1443
1444        for_each_iommu_safe(iommu, next) {
1445                list_del(&iommu->list);
1446                free_iommu_one(iommu);
1447                kfree(iommu);
1448        }
1449}
1450
1451/*
1452 * Family15h Model 10h-1fh erratum 746 (IOMMU Logging May Stall Translations)
1453 * Workaround:
1454 *     BIOS should disable L2B micellaneous clock gating by setting
1455 *     L2_L2B_CK_GATE_CONTROL[CKGateL2BMiscDisable](D0F2xF4_x90[2]) = 1b
1456 */
1457static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu)
1458{
1459        u32 value;
1460
1461        if ((boot_cpu_data.x86 != 0x15) ||
1462            (boot_cpu_data.x86_model < 0x10) ||
1463            (boot_cpu_data.x86_model > 0x1f))
1464                return;
1465
1466        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1467        pci_read_config_dword(iommu->dev, 0xf4, &value);
1468
1469        if (value & BIT(2))
1470                return;
1471
1472        /* Select NB indirect register 0x90 and enable writing */
1473        pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8));
1474
1475        pci_write_config_dword(iommu->dev, 0xf4, value | 0x4);
1476        pci_info(iommu->dev, "Applying erratum 746 workaround\n");
1477
1478        /* Clear the enable writing bit */
1479        pci_write_config_dword(iommu->dev, 0xf0, 0x90);
1480}
1481
1482/*
1483 * Family15h Model 30h-3fh (IOMMU Mishandles ATS Write Permission)
1484 * Workaround:
1485 *     BIOS should enable ATS write permission check by setting
1486 *     L2_DEBUG_3[AtsIgnoreIWDis](D0F2xF4_x47[0]) = 1b
1487 */
1488static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu)
1489{
1490        u32 value;
1491
1492        if ((boot_cpu_data.x86 != 0x15) ||
1493            (boot_cpu_data.x86_model < 0x30) ||
1494            (boot_cpu_data.x86_model > 0x3f))
1495                return;
1496
1497        /* Test L2_DEBUG_3[AtsIgnoreIWDis] == 1 */
1498        value = iommu_read_l2(iommu, 0x47);
1499
1500        if (value & BIT(0))
1501                return;
1502
1503        /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */
1504        iommu_write_l2(iommu, 0x47, value | BIT(0));
1505
1506        pci_info(iommu->dev, "Applying ATS write check workaround\n");
1507}
1508
1509/*
1510 * This function clues the initialization function for one IOMMU
1511 * together and also allocates the command buffer and programs the
1512 * hardware. It does NOT enable the IOMMU. This is done afterwards.
1513 */
1514static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
1515{
1516        int ret;
1517
1518        raw_spin_lock_init(&iommu->lock);
1519        iommu->cmd_sem_val = 0;
1520
1521        /* Add IOMMU to internal data structures */
1522        list_add_tail(&iommu->list, &amd_iommu_list);
1523        iommu->index = amd_iommus_present++;
1524
1525        if (unlikely(iommu->index >= MAX_IOMMUS)) {
1526                WARN(1, "System has more IOMMUs than supported by this driver\n");
1527                return -ENOSYS;
1528        }
1529
1530        /* Index is fine - add IOMMU to the array */
1531        amd_iommus[iommu->index] = iommu;
1532
1533        /*
1534         * Copy data from ACPI table entry to the iommu struct
1535         */
1536        iommu->devid   = h->devid;
1537        iommu->cap_ptr = h->cap_ptr;
1538        iommu->pci_seg = h->pci_seg;
1539        iommu->mmio_phys = h->mmio_phys;
1540
1541        switch (h->type) {
1542        case 0x10:
1543                /* Check if IVHD EFR contains proper max banks/counters */
1544                if ((h->efr_attr != 0) &&
1545                    ((h->efr_attr & (0xF << 13)) != 0) &&
1546                    ((h->efr_attr & (0x3F << 17)) != 0))
1547                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1548                else
1549                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1550
1551                /*
1552                 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1553                 * GAM also requires GA mode. Therefore, we need to
1554                 * check cmpxchg16b support before enabling it.
1555                 */
1556                if (!boot_cpu_has(X86_FEATURE_CX16) ||
1557                    ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0))
1558                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1559                break;
1560        case 0x11:
1561        case 0x40:
1562                if (h->efr_reg & (1 << 9))
1563                        iommu->mmio_phys_end = MMIO_REG_END_OFFSET;
1564                else
1565                        iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET;
1566
1567                /*
1568                 * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports.
1569                 * XT, GAM also requires GA mode. Therefore, we need to
1570                 * check cmpxchg16b support before enabling them.
1571                 */
1572                if (!boot_cpu_has(X86_FEATURE_CX16) ||
1573                    ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) {
1574                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY;
1575                        break;
1576                }
1577
1578                /*
1579                 * Note: Since iommu_update_intcapxt() leverages
1580                 * the IOMMU MMIO access to MSI capability block registers
1581                 * for MSI address lo/hi/data, we need to check both
1582                 * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support.
1583                 */
1584                if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) &&
1585                    (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT)))
1586                        amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
1587                break;
1588        default:
1589                return -EINVAL;
1590        }
1591
1592        iommu->mmio_base = iommu_map_mmio_space(iommu->mmio_phys,
1593                                                iommu->mmio_phys_end);
1594        if (!iommu->mmio_base)
1595                return -ENOMEM;
1596
1597        if (alloc_cwwb_sem(iommu))
1598                return -ENOMEM;
1599
1600        if (alloc_command_buffer(iommu))
1601                return -ENOMEM;
1602
1603        if (alloc_event_buffer(iommu))
1604                return -ENOMEM;
1605
1606        iommu->int_enabled = false;
1607
1608        init_translation_status(iommu);
1609        if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
1610                iommu_disable(iommu);
1611                clear_translation_pre_enabled(iommu);
1612                pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
1613                        iommu->index);
1614        }
1615        if (amd_iommu_pre_enabled)
1616                amd_iommu_pre_enabled = translation_pre_enabled(iommu);
1617
1618        ret = init_iommu_from_acpi(iommu, h);
1619        if (ret)
1620                return ret;
1621
1622        ret = amd_iommu_create_irq_domain(iommu);
1623        if (ret)
1624                return ret;
1625
1626        /*
1627         * Make sure IOMMU is not considered to translate itself. The IVRS
1628         * table tells us so, but this is a lie!
1629         */
1630        amd_iommu_rlookup_table[iommu->devid] = NULL;
1631
1632        return 0;
1633}
1634
1635/**
1636 * get_highest_supported_ivhd_type - Look up the appropriate IVHD type
1637 * @ivrs: Pointer to the IVRS header
1638 *
1639 * This function search through all IVDB of the maximum supported IVHD
1640 */
1641static u8 get_highest_supported_ivhd_type(struct acpi_table_header *ivrs)
1642{
1643        u8 *base = (u8 *)ivrs;
1644        struct ivhd_header *ivhd = (struct ivhd_header *)
1645                                        (base + IVRS_HEADER_LENGTH);
1646        u8 last_type = ivhd->type;
1647        u16 devid = ivhd->devid;
1648
1649        while (((u8 *)ivhd - base < ivrs->length) &&
1650               (ivhd->type <= ACPI_IVHD_TYPE_MAX_SUPPORTED)) {
1651                u8 *p = (u8 *) ivhd;
1652
1653                if (ivhd->devid == devid)
1654                        last_type = ivhd->type;
1655                ivhd = (struct ivhd_header *)(p + ivhd->length);
1656        }
1657
1658        return last_type;
1659}
1660
1661/*
1662 * Iterates over all IOMMU entries in the ACPI table, allocates the
1663 * IOMMU structure and initializes it with init_iommu_one()
1664 */
1665static int __init init_iommu_all(struct acpi_table_header *table)
1666{
1667        u8 *p = (u8 *)table, *end = (u8 *)table;
1668        struct ivhd_header *h;
1669        struct amd_iommu *iommu;
1670        int ret;
1671
1672        end += table->length;
1673        p += IVRS_HEADER_LENGTH;
1674
1675        while (p < end) {
1676                h = (struct ivhd_header *)p;
1677                if (*p == amd_iommu_target_ivhd_type) {
1678
1679                        DUMP_printk("device: %02x:%02x.%01x cap: %04x "
1680                                    "seg: %d flags: %01x info %04x\n",
1681                                    PCI_BUS_NUM(h->devid), PCI_SLOT(h->devid),
1682                                    PCI_FUNC(h->devid), h->cap_ptr,
1683                                    h->pci_seg, h->flags, h->info);
1684                        DUMP_printk("       mmio-addr: %016llx\n",
1685                                    h->mmio_phys);
1686
1687                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
1688                        if (iommu == NULL)
1689                                return -ENOMEM;
1690
1691                        ret = init_iommu_one(iommu, h);
1692                        if (ret)
1693                                return ret;
1694                }
1695                p += h->length;
1696
1697        }
1698        WARN_ON(p != end);
1699
1700        return 0;
1701}
1702
1703static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
1704                                u8 fxn, u64 *value, bool is_write);
1705
1706static void init_iommu_perf_ctr(struct amd_iommu *iommu)
1707{
1708        struct pci_dev *pdev = iommu->dev;
1709        u64 val = 0xabcd, val2 = 0, save_reg = 0;
1710
1711        if (!iommu_feature(iommu, FEATURE_PC))
1712                return;
1713
1714        amd_iommu_pc_present = true;
1715
1716        /* save the value to restore, if writable */
1717        if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
1718                goto pc_false;
1719
1720        /* Check if the performance counters can be written to */
1721        if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
1722            (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
1723            (val != val2))
1724                goto pc_false;
1725
1726        /* restore */
1727        if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
1728                goto pc_false;
1729
1730        pci_info(pdev, "IOMMU performance counters supported\n");
1731
1732        val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
1733        iommu->max_banks = (u8) ((val >> 12) & 0x3f);
1734        iommu->max_counters = (u8) ((val >> 7) & 0xf);
1735
1736        return;
1737
1738pc_false:
1739        pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
1740        amd_iommu_pc_present = false;
1741        return;
1742}
1743
1744static ssize_t amd_iommu_show_cap(struct device *dev,
1745                                  struct device_attribute *attr,
1746                                  char *buf)
1747{
1748        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1749        return sprintf(buf, "%x\n", iommu->cap);
1750}
1751static DEVICE_ATTR(cap, S_IRUGO, amd_iommu_show_cap, NULL);
1752
1753static ssize_t amd_iommu_show_features(struct device *dev,
1754                                       struct device_attribute *attr,
1755                                       char *buf)
1756{
1757        struct amd_iommu *iommu = dev_to_amd_iommu(dev);
1758        return sprintf(buf, "%llx\n", iommu->features);
1759}
1760static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
1761
1762static struct attribute *amd_iommu_attrs[] = {
1763        &dev_attr_cap.attr,
1764        &dev_attr_features.attr,
1765        NULL,
1766};
1767
1768static struct attribute_group amd_iommu_group = {
1769        .name = "amd-iommu",
1770        .attrs = amd_iommu_attrs,
1771};
1772
1773static const struct attribute_group *amd_iommu_groups[] = {
1774        &amd_iommu_group,
1775        NULL,
1776};
1777
1778static int __init iommu_init_pci(struct amd_iommu *iommu)
1779{
1780        int cap_ptr = iommu->cap_ptr;
1781        int ret;
1782
1783        iommu->dev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(iommu->devid),
1784                                                 iommu->devid & 0xff);
1785        if (!iommu->dev)
1786                return -ENODEV;
1787
1788        /* Prevent binding other PCI device drivers to IOMMU devices */
1789        iommu->dev->match_driver = false;
1790
1791        pci_read_config_dword(iommu->dev, cap_ptr + MMIO_CAP_HDR_OFFSET,
1792                              &iommu->cap);
1793
1794        if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
1795                amd_iommu_iotlb_sup = false;
1796
1797        /* read extended feature bits */
1798        iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
1799
1800        if (iommu_feature(iommu, FEATURE_GT)) {
1801                int glxval;
1802                u32 max_pasid;
1803                u64 pasmax;
1804
1805                pasmax = iommu->features & FEATURE_PASID_MASK;
1806                pasmax >>= FEATURE_PASID_SHIFT;
1807                max_pasid  = (1 << (pasmax + 1)) - 1;
1808
1809                amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
1810
1811                BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
1812
1813                glxval   = iommu->features & FEATURE_GLXVAL_MASK;
1814                glxval >>= FEATURE_GLXVAL_SHIFT;
1815
1816                if (amd_iommu_max_glx_val == -1)
1817                        amd_iommu_max_glx_val = glxval;
1818                else
1819                        amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
1820        }
1821
1822        if (iommu_feature(iommu, FEATURE_GT) &&
1823            iommu_feature(iommu, FEATURE_PPR)) {
1824                iommu->is_iommu_v2   = true;
1825                amd_iommu_v2_present = true;
1826        }
1827
1828        if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
1829                return -ENOMEM;
1830
1831        ret = iommu_init_ga(iommu);
1832        if (ret)
1833                return ret;
1834
1835        if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE))
1836                amd_iommu_np_cache = true;
1837
1838        init_iommu_perf_ctr(iommu);
1839
1840        if (is_rd890_iommu(iommu->dev)) {
1841                int i, j;
1842
1843                iommu->root_pdev =
1844                        pci_get_domain_bus_and_slot(0, iommu->dev->bus->number,
1845                                                    PCI_DEVFN(0, 0));
1846
1847                /*
1848                 * Some rd890 systems may not be fully reconfigured by the
1849                 * BIOS, so it's necessary for us to store this information so
1850                 * it can be reprogrammed on resume
1851                 */
1852                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 4,
1853                                &iommu->stored_addr_lo);
1854                pci_read_config_dword(iommu->dev, iommu->cap_ptr + 8,
1855                                &iommu->stored_addr_hi);
1856
1857                /* Low bit locks writes to configuration space */
1858                iommu->stored_addr_lo &= ~1;
1859
1860                for (i = 0; i < 6; i++)
1861                        for (j = 0; j < 0x12; j++)
1862                                iommu->stored_l1[i][j] = iommu_read_l1(iommu, i, j);
1863
1864                for (i = 0; i < 0x83; i++)
1865                        iommu->stored_l2[i] = iommu_read_l2(iommu, i);
1866        }
1867
1868        amd_iommu_erratum_746_workaround(iommu);
1869        amd_iommu_ats_write_check_workaround(iommu);
1870
1871        iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
1872                               amd_iommu_groups, "ivhd%d", iommu->index);
1873        iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
1874        iommu_device_register(&iommu->iommu);
1875
1876        return pci_enable_device(iommu->dev);
1877}
1878
1879static void print_iommu_info(void)
1880{
1881        static const char * const feat_str[] = {
1882                "PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
1883                "IA", "GA", "HE", "PC"
1884        };
1885        struct amd_iommu *iommu;
1886
1887        for_each_iommu(iommu) {
1888                struct pci_dev *pdev = iommu->dev;
1889                int i;
1890
1891                pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
1892
1893                if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
1894                        pci_info(pdev, "Extended features (%#llx):",
1895                                 iommu->features);
1896                        for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
1897                                if (iommu_feature(iommu, (1ULL << i)))
1898                                        pr_cont(" %s", feat_str[i]);
1899                        }
1900
1901                        if (iommu->features & FEATURE_GAM_VAPIC)
1902                                pr_cont(" GA_vAPIC");
1903
1904                        pr_cont("\n");
1905                }
1906        }
1907        if (irq_remapping_enabled) {
1908                pr_info("Interrupt remapping enabled\n");
1909                if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
1910                        pr_info("Virtual APIC enabled\n");
1911                if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
1912                        pr_info("X2APIC enabled\n");
1913        }
1914}
1915
1916static int __init amd_iommu_init_pci(void)
1917{
1918        struct amd_iommu *iommu;
1919        int ret = 0;
1920
1921        for_each_iommu(iommu) {
1922                ret = iommu_init_pci(iommu);
1923                if (ret)
1924                        break;
1925
1926                /* Need to setup range after PCI init */
1927                iommu_set_cwwb_range(iommu);
1928        }
1929
1930        /*
1931         * Order is important here to make sure any unity map requirements are
1932         * fulfilled. The unity mappings are created and written to the device
1933         * table during the amd_iommu_init_api() call.
1934         *
1935         * After that we call init_device_table_dma() to make sure any
1936         * uninitialized DTE will block DMA, and in the end we flush the caches
1937         * of all IOMMUs to make sure the changes to the device table are
1938         * active.
1939         */
1940        ret = amd_iommu_init_api();
1941
1942        init_device_table_dma();
1943
1944        for_each_iommu(iommu)
1945                iommu_flush_all_caches(iommu);
1946
1947        if (!ret)
1948                print_iommu_info();
1949
1950        return ret;
1951}
1952
1953/****************************************************************************
1954 *
1955 * The following functions initialize the MSI interrupts for all IOMMUs
1956 * in the system. It's a bit challenging because there could be multiple
1957 * IOMMUs per PCI BDF but we can call pci_enable_msi(x) only once per
1958 * pci_dev.
1959 *
1960 ****************************************************************************/
1961
1962static int iommu_setup_msi(struct amd_iommu *iommu)
1963{
1964        int r;
1965
1966        r = pci_enable_msi(iommu->dev);
1967        if (r)
1968                return r;
1969
1970        r = request_threaded_irq(iommu->dev->irq,
1971                                 amd_iommu_int_handler,
1972                                 amd_iommu_int_thread,
1973                                 0, "AMD-Vi",
1974                                 iommu);
1975
1976        if (r) {
1977                pci_disable_msi(iommu->dev);
1978                return r;
1979        }
1980
1981        iommu->int_enabled = true;
1982
1983        return 0;
1984}
1985
1986#define XT_INT_DEST_MODE(x)     (((x) & 0x1ULL) << 2)
1987#define XT_INT_DEST_LO(x)       (((x) & 0xFFFFFFULL) << 8)
1988#define XT_INT_VEC(x)           (((x) & 0xFFULL) << 32)
1989#define XT_INT_DEST_HI(x)       ((((x) >> 24) & 0xFFULL) << 56)
1990
1991/*
1992 * Setup the IntCapXT registers with interrupt routing information
1993 * based on the PCI MSI capability block registers, accessed via
1994 * MMIO MSI address low/hi and MSI data registers.
1995 */
1996static void iommu_update_intcapxt(struct amd_iommu *iommu)
1997{
1998        u64 val;
1999        u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
2000        u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
2001        u32 data    = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
2002        bool dm     = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
2003        u32 dest    = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
2004
2005        if (x2apic_enabled())
2006                dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
2007
2008        val = XT_INT_VEC(data & 0xFF) |
2009              XT_INT_DEST_MODE(dm) |
2010              XT_INT_DEST_LO(dest) |
2011              XT_INT_DEST_HI(dest);
2012
2013        /**
2014         * Current IOMMU implemtation uses the same IRQ for all
2015         * 3 IOMMU interrupts.
2016         */
2017        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
2018        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
2019        writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
2020}
2021
2022static void _irq_notifier_notify(struct irq_affinity_notify *notify,
2023                                 const cpumask_t *mask)
2024{
2025        struct amd_iommu *iommu;
2026
2027        for_each_iommu(iommu) {
2028                if (iommu->dev->irq == notify->irq) {
2029                        iommu_update_intcapxt(iommu);
2030                        break;
2031                }
2032        }
2033}
2034
2035static void _irq_notifier_release(struct kref *ref)
2036{
2037}
2038
2039static int iommu_init_intcapxt(struct amd_iommu *iommu)
2040{
2041        int ret;
2042        struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
2043
2044        /**
2045         * IntCapXT requires XTSup=1 and MsiCapMmioSup=1,
2046         * which can be inferred from amd_iommu_xt_mode.
2047         */
2048        if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
2049                return 0;
2050
2051        /**
2052         * Also, we need to setup notifier to update the IntCapXT registers
2053         * whenever the irq affinity is changed from user-space.
2054         */
2055        notify->irq = iommu->dev->irq;
2056        notify->notify = _irq_notifier_notify,
2057        notify->release = _irq_notifier_release,
2058        ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
2059        if (ret) {
2060                pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
2061                       iommu->devid, iommu->dev->irq);
2062                return ret;
2063        }
2064
2065        iommu_update_intcapxt(iommu);
2066        iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
2067        return ret;
2068}
2069
2070static int iommu_init_msi(struct amd_iommu *iommu)
2071{
2072        int ret;
2073
2074        if (iommu->int_enabled)
2075                goto enable_faults;
2076
2077        if (iommu->dev->msi_cap)
2078                ret = iommu_setup_msi(iommu);
2079        else
2080                ret = -ENODEV;
2081
2082        if (ret)
2083                return ret;
2084
2085enable_faults:
2086        ret = iommu_init_intcapxt(iommu);
2087        if (ret)
2088                return ret;
2089
2090        iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
2091
2092        if (iommu->ppr_log != NULL)
2093                iommu_feature_enable(iommu, CONTROL_PPRINT_EN);
2094
2095        iommu_ga_log_enable(iommu);
2096
2097        return 0;
2098}
2099
2100/****************************************************************************
2101 *
2102 * The next functions belong to the third pass of parsing the ACPI
2103 * table. In this last pass the memory mapping requirements are
2104 * gathered (like exclusion and unity mapping ranges).
2105 *
2106 ****************************************************************************/
2107
2108static void __init free_unity_maps(void)
2109{
2110        struct unity_map_entry *entry, *next;
2111
2112        list_for_each_entry_safe(entry, next, &amd_iommu_unity_map, list) {
2113                list_del(&entry->list);
2114                kfree(entry);
2115        }
2116}
2117
2118/* called for unity map ACPI definition */
2119static int __init init_unity_map_range(struct ivmd_header *m)
2120{
2121        struct unity_map_entry *e = NULL;
2122        char *s;
2123
2124        e = kzalloc(sizeof(*e), GFP_KERNEL);
2125        if (e == NULL)
2126                return -ENOMEM;
2127
2128        switch (m->type) {
2129        default:
2130                kfree(e);
2131                return 0;
2132        case ACPI_IVMD_TYPE:
2133                s = "IVMD_TYPEi\t\t\t";
2134                e->devid_start = e->devid_end = m->devid;
2135                break;
2136        case ACPI_IVMD_TYPE_ALL:
2137                s = "IVMD_TYPE_ALL\t\t";
2138                e->devid_start = 0;
2139                e->devid_end = amd_iommu_last_bdf;
2140                break;
2141        case ACPI_IVMD_TYPE_RANGE:
2142                s = "IVMD_TYPE_RANGE\t\t";
2143                e->devid_start = m->devid;
2144                e->devid_end = m->aux;
2145                break;
2146        }
2147        e->address_start = PAGE_ALIGN(m->range_start);
2148        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
2149        e->prot = m->flags >> 1;
2150
2151        /*
2152         * Treat per-device exclusion ranges as r/w unity-mapped regions
2153         * since some buggy BIOSes might lead to the overwritten exclusion
2154         * range (exclusion_start and exclusion_length members). This
2155         * happens when there are multiple exclusion ranges (IVMD entries)
2156         * defined in ACPI table.
2157         */
2158        if (m->flags & IVMD_FLAG_EXCL_RANGE)
2159                e->prot = (IVMD_FLAG_IW | IVMD_FLAG_IR) >> 1;
2160
2161        DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
2162                    " range_start: %016llx range_end: %016llx flags: %x\n", s,
2163                    PCI_BUS_NUM(e->devid_start), PCI_SLOT(e->devid_start),
2164                    PCI_FUNC(e->devid_start), PCI_BUS_NUM(e->devid_end),
2165                    PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
2166                    e->address_start, e->address_end, m->flags);
2167
2168        list_add_tail(&e->list, &amd_iommu_unity_map);
2169
2170        return 0;
2171}
2172
2173/* iterates over all memory definitions we find in the ACPI table */
2174static int __init init_memory_definitions(struct acpi_table_header *table)
2175{
2176        u8 *p = (u8 *)table, *end = (u8 *)table;
2177        struct ivmd_header *m;
2178
2179        end += table->length;
2180        p += IVRS_HEADER_LENGTH;
2181
2182        while (p < end) {
2183                m = (struct ivmd_header *)p;
2184                if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE))
2185                        init_unity_map_range(m);
2186
2187                p += m->length;
2188        }
2189
2190        return 0;
2191}
2192
2193/*
2194 * Init the device table to not allow DMA access for devices
2195 */
2196static void init_device_table_dma(void)
2197{
2198        u32 devid;
2199
2200        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2201                set_dev_entry_bit(devid, DEV_ENTRY_VALID);
2202                set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
2203        }
2204}
2205
2206static void __init uninit_device_table_dma(void)
2207{
2208        u32 devid;
2209
2210        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
2211                amd_iommu_dev_table[devid].data[0] = 0ULL;
2212                amd_iommu_dev_table[devid].data[1] = 0ULL;
2213        }
2214}
2215
2216static void init_device_table(void)
2217{
2218        u32 devid;
2219
2220        if (!amd_iommu_irq_remap)
2221                return;
2222
2223        for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
2224                set_dev_entry_bit(devid, DEV_ENTRY_IRQ_TBL_EN);
2225}
2226
2227static void iommu_init_flags(struct amd_iommu *iommu)
2228{
2229        iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ?
2230                iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) :
2231                iommu_feature_disable(iommu, CONTROL_HT_TUN_EN);
2232
2233        iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ?
2234                iommu_feature_enable(iommu, CONTROL_PASSPW_EN) :
2235                iommu_feature_disable(iommu, CONTROL_PASSPW_EN);
2236
2237        iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ?
2238                iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) :
2239                iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN);
2240
2241        iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ?
2242                iommu_feature_enable(iommu, CONTROL_ISOC_EN) :
2243                iommu_feature_disable(iommu, CONTROL_ISOC_EN);
2244
2245        /*
2246         * make IOMMU memory accesses cache coherent
2247         */
2248        iommu_feature_enable(iommu, CONTROL_COHERENT_EN);
2249
2250        /* Set IOTLB invalidation timeout to 1s */
2251        iommu_set_inv_tlb_timeout(iommu, CTRL_INV_TO_1S);
2252}
2253
2254static void iommu_apply_resume_quirks(struct amd_iommu *iommu)
2255{
2256        int i, j;
2257        u32 ioc_feature_control;
2258        struct pci_dev *pdev = iommu->root_pdev;
2259
2260        /* RD890 BIOSes may not have completely reconfigured the iommu */
2261        if (!is_rd890_iommu(iommu->dev) || !pdev)
2262                return;
2263
2264        /*
2265         * First, we need to ensure that the iommu is enabled. This is
2266         * controlled by a register in the northbridge
2267         */
2268
2269        /* Select Northbridge indirect register 0x75 and enable writing */
2270        pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7));
2271        pci_read_config_dword(pdev, 0x64, &ioc_feature_control);
2272
2273        /* Enable the iommu */
2274        if (!(ioc_feature_control & 0x1))
2275                pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1);
2276
2277        /* Restore the iommu BAR */
2278        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2279                               iommu->stored_addr_lo);
2280        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 8,
2281                               iommu->stored_addr_hi);
2282
2283        /* Restore the l1 indirect regs for each of the 6 l1s */
2284        for (i = 0; i < 6; i++)
2285                for (j = 0; j < 0x12; j++)
2286                        iommu_write_l1(iommu, i, j, iommu->stored_l1[i][j]);
2287
2288        /* Restore the l2 indirect regs */
2289        for (i = 0; i < 0x83; i++)
2290                iommu_write_l2(iommu, i, iommu->stored_l2[i]);
2291
2292        /* Lock PCI setup registers */
2293        pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4,
2294                               iommu->stored_addr_lo | 1);
2295}
2296
2297static void iommu_enable_ga(struct amd_iommu *iommu)
2298{
2299#ifdef CONFIG_IRQ_REMAP
2300        switch (amd_iommu_guest_ir) {
2301        case AMD_IOMMU_GUEST_IR_VAPIC:
2302                iommu_feature_enable(iommu, CONTROL_GAM_EN);
2303                fallthrough;
2304        case AMD_IOMMU_GUEST_IR_LEGACY_GA:
2305                iommu_feature_enable(iommu, CONTROL_GA_EN);
2306                iommu->irte_ops = &irte_128_ops;
2307                break;
2308        default:
2309                iommu->irte_ops = &irte_32_ops;
2310                break;
2311        }
2312#endif
2313}
2314
2315static void early_enable_iommu(struct amd_iommu *iommu)
2316{
2317        iommu_disable(iommu);
2318        iommu_init_flags(iommu);
2319        iommu_set_device_table(iommu);
2320        iommu_enable_command_buffer(iommu);
2321        iommu_enable_event_buffer(iommu);
2322        iommu_set_exclusion_range(iommu);
2323        iommu_enable_ga(iommu);
2324        iommu_enable_xt(iommu);
2325        iommu_enable(iommu);
2326        iommu_flush_all_caches(iommu);
2327}
2328
2329/*
2330 * This function finally enables all IOMMUs found in the system after
2331 * they have been initialized.
2332 *
2333 * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
2334 * the old content of device table entries. Not this case or copy failed,
2335 * just continue as normal kernel does.
2336 */
2337static void early_enable_iommus(void)
2338{
2339        struct amd_iommu *iommu;
2340
2341
2342        if (!copy_device_table()) {
2343                /*
2344                 * If come here because of failure in copying device table from old
2345                 * kernel with all IOMMUs enabled, print error message and try to
2346                 * free allocated old_dev_tbl_cpy.
2347                 */
2348                if (amd_iommu_pre_enabled)
2349                        pr_err("Failed to copy DEV table from previous kernel.\n");
2350                if (old_dev_tbl_cpy != NULL)
2351                        free_pages((unsigned long)old_dev_tbl_cpy,
2352                                        get_order(dev_table_size));
2353
2354                for_each_iommu(iommu) {
2355                        clear_translation_pre_enabled(iommu);
2356                        early_enable_iommu(iommu);
2357                }
2358        } else {
2359                pr_info("Copied DEV table from previous kernel.\n");
2360                free_pages((unsigned long)amd_iommu_dev_table,
2361                                get_order(dev_table_size));
2362                amd_iommu_dev_table = old_dev_tbl_cpy;
2363                for_each_iommu(iommu) {
2364                        iommu_disable_command_buffer(iommu);
2365                        iommu_disable_event_buffer(iommu);
2366                        iommu_enable_command_buffer(iommu);
2367                        iommu_enable_event_buffer(iommu);
2368                        iommu_enable_ga(iommu);
2369                        iommu_enable_xt(iommu);
2370                        iommu_set_device_table(iommu);
2371                        iommu_flush_all_caches(iommu);
2372                }
2373        }
2374
2375#ifdef CONFIG_IRQ_REMAP
2376        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2377                amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
2378#endif
2379}
2380
2381static void enable_iommus_v2(void)
2382{
2383        struct amd_iommu *iommu;
2384
2385        for_each_iommu(iommu) {
2386                iommu_enable_ppr_log(iommu);
2387                iommu_enable_gt(iommu);
2388        }
2389}
2390
2391static void enable_iommus(void)
2392{
2393        early_enable_iommus();
2394
2395        enable_iommus_v2();
2396}
2397
2398static void disable_iommus(void)
2399{
2400        struct amd_iommu *iommu;
2401
2402        for_each_iommu(iommu)
2403                iommu_disable(iommu);
2404
2405#ifdef CONFIG_IRQ_REMAP
2406        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
2407                amd_iommu_irq_ops.capability &= ~(1 << IRQ_POSTING_CAP);
2408#endif
2409}
2410
2411/*
2412 * Suspend/Resume support
2413 * disable suspend until real resume implemented
2414 */
2415
2416static void amd_iommu_resume(void)
2417{
2418        struct amd_iommu *iommu;
2419
2420        for_each_iommu(iommu)
2421                iommu_apply_resume_quirks(iommu);
2422
2423        /* re-load the hardware */
2424        enable_iommus();
2425
2426        amd_iommu_enable_interrupts();
2427}
2428
2429static int amd_iommu_suspend(void)
2430{
2431        /* disable IOMMUs to go out of the way for BIOS */
2432        disable_iommus();
2433
2434        return 0;
2435}
2436
2437static struct syscore_ops amd_iommu_syscore_ops = {
2438        .suspend = amd_iommu_suspend,
2439        .resume = amd_iommu_resume,
2440};
2441
2442static void __init free_iommu_resources(void)
2443{
2444        kmemleak_free(irq_lookup_table);
2445        free_pages((unsigned long)irq_lookup_table,
2446                   get_order(rlookup_table_size));
2447        irq_lookup_table = NULL;
2448
2449        kmem_cache_destroy(amd_iommu_irq_cache);
2450        amd_iommu_irq_cache = NULL;
2451
2452        free_pages((unsigned long)amd_iommu_rlookup_table,
2453                   get_order(rlookup_table_size));
2454        amd_iommu_rlookup_table = NULL;
2455
2456        free_pages((unsigned long)amd_iommu_alias_table,
2457                   get_order(alias_table_size));
2458        amd_iommu_alias_table = NULL;
2459
2460        free_pages((unsigned long)amd_iommu_dev_table,
2461                   get_order(dev_table_size));
2462        amd_iommu_dev_table = NULL;
2463
2464        free_iommu_all();
2465}
2466
2467/* SB IOAPIC is always on this device in AMD systems */
2468#define IOAPIC_SB_DEVID         ((0x00 << 8) | PCI_DEVFN(0x14, 0))
2469
2470static bool __init check_ioapic_information(void)
2471{
2472        const char *fw_bug = FW_BUG;
2473        bool ret, has_sb_ioapic;
2474        int idx;
2475
2476        has_sb_ioapic = false;
2477        ret           = false;
2478
2479        /*
2480         * If we have map overrides on the kernel command line the
2481         * messages in this function might not describe firmware bugs
2482         * anymore - so be careful
2483         */
2484        if (cmdline_maps)
2485                fw_bug = "";
2486
2487        for (idx = 0; idx < nr_ioapics; idx++) {
2488                int devid, id = mpc_ioapic_id(idx);
2489
2490                devid = get_ioapic_devid(id);
2491                if (devid < 0) {
2492                        pr_err("%s: IOAPIC[%d] not in IVRS table\n",
2493                                fw_bug, id);
2494                        ret = false;
2495                } else if (devid == IOAPIC_SB_DEVID) {
2496                        has_sb_ioapic = true;
2497                        ret           = true;
2498                }
2499        }
2500
2501        if (!has_sb_ioapic) {
2502                /*
2503                 * We expect the SB IOAPIC to be listed in the IVRS
2504                 * table. The system timer is connected to the SB IOAPIC
2505                 * and if we don't have it in the list the system will
2506                 * panic at boot time.  This situation usually happens
2507                 * when the BIOS is buggy and provides us the wrong
2508                 * device id for the IOAPIC in the system.
2509                 */
2510                pr_err("%s: No southbridge IOAPIC found\n", fw_bug);
2511        }
2512
2513        if (!ret)
2514                pr_err("Disabling interrupt remapping\n");
2515
2516        return ret;
2517}
2518
2519static void __init free_dma_resources(void)
2520{
2521        free_pages((unsigned long)amd_iommu_pd_alloc_bitmap,
2522                   get_order(MAX_DOMAIN_ID/8));
2523        amd_iommu_pd_alloc_bitmap = NULL;
2524
2525        free_unity_maps();
2526}
2527
2528/*
2529 * This is the hardware init function for AMD IOMMU in the system.
2530 * This function is called either from amd_iommu_init or from the interrupt
2531 * remapping setup code.
2532 *
2533 * This function basically parses the ACPI table for AMD IOMMU (IVRS)
2534 * four times:
2535 *
2536 *      1 pass) Discover the most comprehensive IVHD type to use.
2537 *
2538 *      2 pass) Find the highest PCI device id the driver has to handle.
2539 *              Upon this information the size of the data structures is
2540 *              determined that needs to be allocated.
2541 *
2542 *      3 pass) Initialize the data structures just allocated with the
2543 *              information in the ACPI table about available AMD IOMMUs
2544 *              in the system. It also maps the PCI devices in the
2545 *              system to specific IOMMUs
2546 *
2547 *      4 pass) After the basic data structures are allocated and
2548 *              initialized we update them with information about memory
2549 *              remapping requirements parsed out of the ACPI table in
2550 *              this last pass.
2551 *
2552 * After everything is set up the IOMMUs are enabled and the necessary
2553 * hotplug and suspend notifiers are registered.
2554 */
2555static int __init early_amd_iommu_init(void)
2556{
2557        struct acpi_table_header *ivrs_base;
2558        acpi_status status;
2559        int i, remap_cache_sz, ret = 0;
2560        u32 pci_id;
2561
2562        if (!amd_iommu_detected)
2563                return -ENODEV;
2564
2565        status = acpi_get_table("IVRS", 0, &ivrs_base);
2566        if (status == AE_NOT_FOUND)
2567                return -ENODEV;
2568        else if (ACPI_FAILURE(status)) {
2569                const char *err = acpi_format_exception(status);
2570                pr_err("IVRS table error: %s\n", err);
2571                return -EINVAL;
2572        }
2573
2574        /*
2575         * Validate checksum here so we don't need to do it when
2576         * we actually parse the table
2577         */
2578        ret = check_ivrs_checksum(ivrs_base);
2579        if (ret)
2580                goto out;
2581
2582        amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
2583        DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
2584
2585        /*
2586         * First parse ACPI tables to find the largest Bus/Dev/Func
2587         * we need to handle. Upon this information the shared data
2588         * structures for the IOMMUs in the system will be allocated
2589         */
2590        ret = find_last_devid_acpi(ivrs_base);
2591        if (ret)
2592                goto out;
2593
2594        dev_table_size     = tbl_size(DEV_TABLE_ENTRY_SIZE);
2595        alias_table_size   = tbl_size(ALIAS_TABLE_ENTRY_SIZE);
2596        rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE);
2597
2598        /* Device table - directly used by all IOMMUs */
2599        ret = -ENOMEM;
2600        amd_iommu_dev_table = (void *)__get_free_pages(
2601                                      GFP_KERNEL | __GFP_ZERO | GFP_DMA32,
2602                                      get_order(dev_table_size));
2603        if (amd_iommu_dev_table == NULL)
2604                goto out;
2605
2606        /*
2607         * Alias table - map PCI Bus/Dev/Func to Bus/Dev/Func the
2608         * IOMMU see for that device
2609         */
2610        amd_iommu_alias_table = (void *)__get_free_pages(GFP_KERNEL,
2611                        get_order(alias_table_size));
2612        if (amd_iommu_alias_table == NULL)
2613                goto out;
2614
2615        /* IOMMU rlookup table - find the IOMMU for a specific device */
2616        amd_iommu_rlookup_table = (void *)__get_free_pages(
2617                        GFP_KERNEL | __GFP_ZERO,
2618                        get_order(rlookup_table_size));
2619        if (amd_iommu_rlookup_table == NULL)
2620                goto out;
2621
2622        amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(
2623                                            GFP_KERNEL | __GFP_ZERO,
2624                                            get_order(MAX_DOMAIN_ID/8));
2625        if (amd_iommu_pd_alloc_bitmap == NULL)
2626                goto out;
2627
2628        /*
2629         * let all alias entries point to itself
2630         */
2631        for (i = 0; i <= amd_iommu_last_bdf; ++i)
2632                amd_iommu_alias_table[i] = i;
2633
2634        /*
2635         * never allocate domain 0 because its used as the non-allocated and
2636         * error value placeholder
2637         */
2638        __set_bit(0, amd_iommu_pd_alloc_bitmap);
2639
2640        /*
2641         * now the data structures are allocated and basically initialized
2642         * start the real acpi table scan
2643         */
2644        ret = init_iommu_all(ivrs_base);
2645        if (ret)
2646                goto out;
2647
2648        /* Disable IOMMU if there's Stoney Ridge graphics */
2649        for (i = 0; i < 32; i++) {
2650                pci_id = read_pci_config(0, i, 0, 0);
2651                if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
2652                        pr_info("Disable IOMMU on Stoney Ridge\n");
2653                        amd_iommu_disabled = true;
2654                        break;
2655                }
2656        }
2657
2658        /* Disable any previously enabled IOMMUs */
2659        if (!is_kdump_kernel() || amd_iommu_disabled)
2660                disable_iommus();
2661
2662        if (amd_iommu_irq_remap)
2663                amd_iommu_irq_remap = check_ioapic_information();
2664
2665        if (amd_iommu_irq_remap) {
2666                /*
2667                 * Interrupt remapping enabled, create kmem_cache for the
2668                 * remapping tables.
2669                 */
2670                ret = -ENOMEM;
2671                if (!AMD_IOMMU_GUEST_IR_GA(amd_iommu_guest_ir))
2672                        remap_cache_sz = MAX_IRQS_PER_TABLE * sizeof(u32);
2673                else
2674                        remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
2675                amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
2676                                                        remap_cache_sz,
2677                                                        IRQ_TABLE_ALIGNMENT,
2678                                                        0, NULL);
2679                if (!amd_iommu_irq_cache)
2680                        goto out;
2681
2682                irq_lookup_table = (void *)__get_free_pages(
2683                                GFP_KERNEL | __GFP_ZERO,
2684                                get_order(rlookup_table_size));
2685                kmemleak_alloc(irq_lookup_table, rlookup_table_size,
2686                               1, GFP_KERNEL);
2687                if (!irq_lookup_table)
2688                        goto out;
2689        }
2690
2691        ret = init_memory_definitions(ivrs_base);
2692        if (ret)
2693                goto out;
2694
2695        /* init the device table */
2696        init_device_table();
2697
2698out:
2699        /* Don't leak any ACPI memory */
2700        acpi_put_table(ivrs_base);
2701        ivrs_base = NULL;
2702
2703        return ret;
2704}
2705
2706static int amd_iommu_enable_interrupts(void)
2707{
2708        struct amd_iommu *iommu;
2709        int ret = 0;
2710
2711        for_each_iommu(iommu) {
2712                ret = iommu_init_msi(iommu);
2713                if (ret)
2714                        goto out;
2715        }
2716
2717out:
2718        return ret;
2719}
2720
2721static bool detect_ivrs(void)
2722{
2723        struct acpi_table_header *ivrs_base;
2724        acpi_status status;
2725
2726        status = acpi_get_table("IVRS", 0, &ivrs_base);
2727        if (status == AE_NOT_FOUND)
2728                return false;
2729        else if (ACPI_FAILURE(status)) {
2730                const char *err = acpi_format_exception(status);
2731                pr_err("IVRS table error: %s\n", err);
2732                return false;
2733        }
2734
2735        acpi_put_table(ivrs_base);
2736
2737        /* Make sure ACS will be enabled during PCI probe */
2738        pci_request_acs();
2739
2740        return true;
2741}
2742
2743/****************************************************************************
2744 *
2745 * AMD IOMMU Initialization State Machine
2746 *
2747 ****************************************************************************/
2748
2749static int __init state_next(void)
2750{
2751        int ret = 0;
2752
2753        switch (init_state) {
2754        case IOMMU_START_STATE:
2755                if (!detect_ivrs()) {
2756                        init_state      = IOMMU_NOT_FOUND;
2757                        ret             = -ENODEV;
2758                } else {
2759                        init_state      = IOMMU_IVRS_DETECTED;
2760                }
2761                break;
2762        case IOMMU_IVRS_DETECTED:
2763                ret = early_amd_iommu_init();
2764                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
2765                if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
2766                        pr_info("AMD IOMMU disabled\n");
2767                        init_state = IOMMU_CMDLINE_DISABLED;
2768                        ret = -EINVAL;
2769                }
2770                break;
2771        case IOMMU_ACPI_FINISHED:
2772                early_enable_iommus();
2773                x86_platform.iommu_shutdown = disable_iommus;
2774                init_state = IOMMU_ENABLED;
2775                break;
2776        case IOMMU_ENABLED:
2777                register_syscore_ops(&amd_iommu_syscore_ops);
2778                ret = amd_iommu_init_pci();
2779                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT;
2780                enable_iommus_v2();
2781                break;
2782        case IOMMU_PCI_INIT:
2783                ret = amd_iommu_enable_interrupts();
2784                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
2785                break;
2786        case IOMMU_INTERRUPTS_EN:
2787                ret = amd_iommu_init_dma_ops();
2788                init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
2789                break;
2790        case IOMMU_DMA_OPS:
2791                init_state = IOMMU_INITIALIZED;
2792                break;
2793        case IOMMU_INITIALIZED:
2794                /* Nothing to do */
2795                break;
2796        case IOMMU_NOT_FOUND:
2797        case IOMMU_INIT_ERROR:
2798        case IOMMU_CMDLINE_DISABLED:
2799                /* Error states => do nothing */
2800                ret = -EINVAL;
2801                break;
2802        default:
2803                /* Unknown state */
2804                BUG();
2805        }
2806
2807        if (ret) {
2808                free_dma_resources();
2809                if (!irq_remapping_enabled) {
2810                        disable_iommus();
2811                        free_iommu_resources();
2812                } else {
2813                        struct amd_iommu *iommu;
2814
2815                        uninit_device_table_dma();
2816                        for_each_iommu(iommu)
2817                                iommu_flush_all_caches(iommu);
2818                }
2819        }
2820        return ret;
2821}
2822
2823static int __init iommu_go_to_state(enum iommu_init_state state)
2824{
2825        int ret = -EINVAL;
2826
2827        while (init_state != state) {
2828                if (init_state == IOMMU_NOT_FOUND         ||
2829                    init_state == IOMMU_INIT_ERROR        ||
2830                    init_state == IOMMU_CMDLINE_DISABLED)
2831                        break;
2832                ret = state_next();
2833        }
2834
2835        return ret;
2836}
2837
2838#ifdef CONFIG_IRQ_REMAP
2839int __init amd_iommu_prepare(void)
2840{
2841        int ret;
2842
2843        amd_iommu_irq_remap = true;
2844
2845        ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
2846        if (ret)
2847                return ret;
2848        return amd_iommu_irq_remap ? 0 : -ENODEV;
2849}
2850
2851int __init amd_iommu_enable(void)
2852{
2853        int ret;
2854
2855        ret = iommu_go_to_state(IOMMU_ENABLED);
2856        if (ret)
2857                return ret;
2858
2859        irq_remapping_enabled = 1;
2860        return amd_iommu_xt_mode;
2861}
2862
2863void amd_iommu_disable(void)
2864{
2865        amd_iommu_suspend();
2866}
2867
2868int amd_iommu_reenable(int mode)
2869{
2870        amd_iommu_resume();
2871
2872        return 0;
2873}
2874
2875int __init amd_iommu_enable_faulting(void)
2876{
2877        /* We enable MSI later when PCI is initialized */
2878        return 0;
2879}
2880#endif
2881
2882/*
2883 * This is the core init function for AMD IOMMU hardware in the system.
2884 * This function is called from the generic x86 DMA layer initialization
2885 * code.
2886 */
2887static int __init amd_iommu_init(void)
2888{
2889        struct amd_iommu *iommu;
2890        int ret;
2891
2892        ret = iommu_go_to_state(IOMMU_INITIALIZED);
2893#ifdef CONFIG_GART_IOMMU
2894        if (ret && list_empty(&amd_iommu_list)) {
2895                /*
2896                 * We failed to initialize the AMD IOMMU - try fallback
2897                 * to GART if possible.
2898                 */
2899                gart_iommu_init();
2900        }
2901#endif
2902
2903        for_each_iommu(iommu)
2904                amd_iommu_debugfs_setup(iommu);
2905
2906        return ret;
2907}
2908
2909static bool amd_iommu_sme_check(void)
2910{
2911        if (!sme_active() || (boot_cpu_data.x86 != 0x17))
2912                return true;
2913
2914        /* For Fam17h, a specific level of support is required */
2915        if (boot_cpu_data.microcode >= 0x08001205)
2916                return true;
2917
2918        if ((boot_cpu_data.microcode >= 0x08001126) &&
2919            (boot_cpu_data.microcode <= 0x080011ff))
2920                return true;
2921
2922        pr_notice("IOMMU not currently supported when SME is active\n");
2923
2924        return false;
2925}
2926
2927/****************************************************************************
2928 *
2929 * Early detect code. This code runs at IOMMU detection time in the DMA
2930 * layer. It just looks if there is an IVRS ACPI table to detect AMD
2931 * IOMMUs
2932 *
2933 ****************************************************************************/
2934int __init amd_iommu_detect(void)
2935{
2936        int ret;
2937
2938        if (no_iommu || (iommu_detected && !gart_iommu_aperture))
2939                return -ENODEV;
2940
2941        if (!amd_iommu_sme_check())
2942                return -ENODEV;
2943
2944        ret = iommu_go_to_state(IOMMU_IVRS_DETECTED);
2945        if (ret)
2946                return ret;
2947
2948        amd_iommu_detected = true;
2949        iommu_detected = 1;
2950        x86_init.iommu.iommu_init = amd_iommu_init;
2951
2952        return 1;
2953}
2954
2955/****************************************************************************
2956 *
2957 * Parsing functions for the AMD IOMMU specific kernel command line
2958 * options.
2959 *
2960 ****************************************************************************/
2961
2962static int __init parse_amd_iommu_dump(char *str)
2963{
2964        amd_iommu_dump = true;
2965
2966        return 1;
2967}
2968
2969static int __init parse_amd_iommu_intr(char *str)
2970{
2971        for (; *str; ++str) {
2972                if (strncmp(str, "legacy", 6) == 0) {
2973                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
2974                        break;
2975                }
2976                if (strncmp(str, "vapic", 5) == 0) {
2977                        amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
2978                        break;
2979                }
2980        }
2981        return 1;
2982}
2983
2984static int __init parse_amd_iommu_options(char *str)
2985{
2986        for (; *str; ++str) {
2987                if (strncmp(str, "fullflush", 9) == 0)
2988                        amd_iommu_unmap_flush = true;
2989                if (strncmp(str, "off", 3) == 0)
2990                        amd_iommu_disabled = true;
2991                if (strncmp(str, "force_isolation", 15) == 0)
2992                        amd_iommu_force_isolation = true;
2993        }
2994
2995        return 1;
2996}
2997
2998static int __init parse_ivrs_ioapic(char *str)
2999{
3000        unsigned int bus, dev, fn;
3001        int ret, id, i;
3002        u16 devid;
3003
3004        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3005
3006        if (ret != 4) {
3007                pr_err("Invalid command line: ivrs_ioapic%s\n", str);
3008                return 1;
3009        }
3010
3011        if (early_ioapic_map_size == EARLY_MAP_SIZE) {
3012                pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
3013                        str);
3014                return 1;
3015        }
3016
3017        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3018
3019        cmdline_maps                    = true;
3020        i                               = early_ioapic_map_size++;
3021        early_ioapic_map[i].id          = id;
3022        early_ioapic_map[i].devid       = devid;
3023        early_ioapic_map[i].cmd_line    = true;
3024
3025        return 1;
3026}
3027
3028static int __init parse_ivrs_hpet(char *str)
3029{
3030        unsigned int bus, dev, fn;
3031        int ret, id, i;
3032        u16 devid;
3033
3034        ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
3035
3036        if (ret != 4) {
3037                pr_err("Invalid command line: ivrs_hpet%s\n", str);
3038                return 1;
3039        }
3040
3041        if (early_hpet_map_size == EARLY_MAP_SIZE) {
3042                pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
3043                        str);
3044                return 1;
3045        }
3046
3047        devid = ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3048
3049        cmdline_maps                    = true;
3050        i                               = early_hpet_map_size++;
3051        early_hpet_map[i].id            = id;
3052        early_hpet_map[i].devid         = devid;
3053        early_hpet_map[i].cmd_line      = true;
3054
3055        return 1;
3056}
3057
3058static int __init parse_ivrs_acpihid(char *str)
3059{
3060        u32 bus, dev, fn;
3061        char *hid, *uid, *p;
3062        char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
3063        int ret, i;
3064
3065        ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
3066        if (ret != 4) {
3067                pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
3068                return 1;
3069        }
3070
3071        p = acpiid;
3072        hid = strsep(&p, ":");
3073        uid = p;
3074
3075        if (!hid || !(*hid) || !uid) {
3076                pr_err("Invalid command line: hid or uid\n");
3077                return 1;
3078        }
3079
3080        i = early_acpihid_map_size++;
3081        memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
3082        memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
3083        early_acpihid_map[i].devid =
3084                ((bus & 0xff) << 8) | ((dev & 0x1f) << 3) | (fn & 0x7);
3085        early_acpihid_map[i].cmd_line   = true;
3086
3087        return 1;
3088}
3089
3090__setup("amd_iommu_dump",       parse_amd_iommu_dump);
3091__setup("amd_iommu=",           parse_amd_iommu_options);
3092__setup("amd_iommu_intr=",      parse_amd_iommu_intr);
3093__setup("ivrs_ioapic",          parse_ivrs_ioapic);
3094__setup("ivrs_hpet",            parse_ivrs_hpet);
3095__setup("ivrs_acpihid",         parse_ivrs_acpihid);
3096
3097IOMMU_INIT_FINISH(amd_iommu_detect,
3098                  gart_iommu_hole_init,
3099                  NULL,
3100                  NULL);
3101
3102bool amd_iommu_v2_supported(void)
3103{
3104        return amd_iommu_v2_present;
3105}
3106EXPORT_SYMBOL(amd_iommu_v2_supported);
3107
3108struct amd_iommu *get_amd_iommu(unsigned int idx)
3109{
3110        unsigned int i = 0;
3111        struct amd_iommu *iommu;
3112
3113        for_each_iommu(iommu)
3114                if (i++ == idx)
3115                        return iommu;
3116        return NULL;
3117}
3118EXPORT_SYMBOL(get_amd_iommu);
3119
3120/****************************************************************************
3121 *
3122 * IOMMU EFR Performance Counter support functionality. This code allows
3123 * access to the IOMMU PC functionality.
3124 *
3125 ****************************************************************************/
3126
3127u8 amd_iommu_pc_get_max_banks(unsigned int idx)
3128{
3129        struct amd_iommu *iommu = get_amd_iommu(idx);
3130
3131        if (iommu)
3132                return iommu->max_banks;
3133
3134        return 0;
3135}
3136EXPORT_SYMBOL(amd_iommu_pc_get_max_banks);
3137
3138bool amd_iommu_pc_supported(void)
3139{
3140        return amd_iommu_pc_present;
3141}
3142EXPORT_SYMBOL(amd_iommu_pc_supported);
3143
3144u8 amd_iommu_pc_get_max_counters(unsigned int idx)
3145{
3146        struct amd_iommu *iommu = get_amd_iommu(idx);
3147
3148        if (iommu)
3149                return iommu->max_counters;
3150
3151        return 0;
3152}
3153EXPORT_SYMBOL(amd_iommu_pc_get_max_counters);
3154
3155static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
3156                                u8 fxn, u64 *value, bool is_write)
3157{
3158        u32 offset;
3159        u32 max_offset_lim;
3160
3161        /* Make sure the IOMMU PC resource is available */
3162        if (!amd_iommu_pc_present)
3163                return -ENODEV;
3164
3165        /* Check for valid iommu and pc register indexing */
3166        if (WARN_ON(!iommu || (fxn > 0x28) || (fxn & 7)))
3167                return -ENODEV;
3168
3169        offset = (u32)(((0x40 | bank) << 12) | (cntr << 8) | fxn);
3170
3171        /* Limit the offset to the hw defined mmio region aperture */
3172        max_offset_lim = (u32)(((0x40 | iommu->max_banks) << 12) |
3173                                (iommu->max_counters << 8) | 0x28);
3174        if ((offset < MMIO_CNTR_REG_OFFSET) ||
3175            (offset > max_offset_lim))
3176                return -EINVAL;
3177
3178        if (is_write) {
3179                u64 val = *value & GENMASK_ULL(47, 0);
3180
3181                writel((u32)val, iommu->mmio_base + offset);
3182                writel((val >> 32), iommu->mmio_base + offset + 4);
3183        } else {
3184                *value = readl(iommu->mmio_base + offset + 4);
3185                *value <<= 32;
3186                *value |= readl(iommu->mmio_base + offset);
3187                *value &= GENMASK_ULL(47, 0);
3188        }
3189
3190        return 0;
3191}
3192
3193int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3194{
3195        if (!iommu)
3196                return -EINVAL;
3197
3198        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
3199}
3200EXPORT_SYMBOL(amd_iommu_pc_get_reg);
3201
3202int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
3203{
3204        if (!iommu)
3205                return -EINVAL;
3206
3207        return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
3208}
3209EXPORT_SYMBOL(amd_iommu_pc_set_reg);
3210