linux/drivers/iommu/intel_irq_remapping.c
<<
>>
Prefs
   1#include <linux/interrupt.h>
   2#include <linux/dmar.h>
   3#include <linux/spinlock.h>
   4#include <linux/slab.h>
   5#include <linux/jiffies.h>
   6#include <linux/hpet.h>
   7#include <linux/pci.h>
   8#include <linux/irq.h>
   9#include <linux/intel-iommu.h>
  10#include <linux/acpi.h>
  11#include <asm/io_apic.h>
  12#include <asm/smp.h>
  13#include <asm/cpu.h>
  14#include <asm/irq_remapping.h>
  15#include <asm/pci-direct.h>
  16#include <asm/msidef.h>
  17
  18#include "irq_remapping.h"
  19
  20struct ioapic_scope {
  21        struct intel_iommu *iommu;
  22        unsigned int id;
  23        unsigned int bus;       /* PCI bus number */
  24        unsigned int devfn;     /* PCI devfn number */
  25};
  26
  27struct hpet_scope {
  28        struct intel_iommu *iommu;
  29        u8 id;
  30        unsigned int bus;
  31        unsigned int devfn;
  32};
  33
  34#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
  35#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
  36
  37static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
  38static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  39
  40/*
  41 * Lock ordering:
  42 * ->dmar_global_lock
  43 *      ->irq_2_ir_lock
  44 *              ->qi->q_lock
  45 *      ->iommu->register_lock
  46 * Note:
  47 * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called
  48 * in single-threaded environment with interrupt disabled, so no need to tabke
  49 * the dmar_global_lock.
  50 */
  51static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
  52
  53static int __init parse_ioapics_under_ir(void);
  54
  55static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
  56{
  57        struct irq_cfg *cfg = irq_cfg(irq);
  58        return cfg ? &cfg->irq_2_iommu : NULL;
  59}
  60
  61static int get_irte(int irq, struct irte *entry)
  62{
  63        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  64        unsigned long flags;
  65        int index;
  66
  67        if (!entry || !irq_iommu)
  68                return -1;
  69
  70        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
  71
  72        if (unlikely(!irq_iommu->iommu)) {
  73                raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  74                return -1;
  75        }
  76
  77        index = irq_iommu->irte_index + irq_iommu->sub_handle;
  78        *entry = *(irq_iommu->iommu->ir_table->base + index);
  79
  80        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
  81        return 0;
  82}
  83
  84static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
  85{
  86        struct ir_table *table = iommu->ir_table;
  87        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
  88        struct irq_cfg *cfg = irq_cfg(irq);
  89        unsigned int mask = 0;
  90        unsigned long flags;
  91        int index;
  92
  93        if (!count || !irq_iommu)
  94                return -1;
  95
  96        if (count > 1) {
  97                count = __roundup_pow_of_two(count);
  98                mask = ilog2(count);
  99        }
 100
 101        if (mask > ecap_max_handle_mask(iommu->ecap)) {
 102                printk(KERN_ERR
 103                       "Requested mask %x exceeds the max invalidation handle"
 104                       " mask value %Lx\n", mask,
 105                       ecap_max_handle_mask(iommu->ecap));
 106                return -1;
 107        }
 108
 109        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 110        index = bitmap_find_free_region(table->bitmap,
 111                                        INTR_REMAP_TABLE_ENTRIES, mask);
 112        if (index < 0) {
 113                pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
 114        } else {
 115                cfg->remapped = 1;
 116                irq_iommu->iommu = iommu;
 117                irq_iommu->irte_index =  index;
 118                irq_iommu->sub_handle = 0;
 119                irq_iommu->irte_mask = mask;
 120        }
 121        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 122
 123        return index;
 124}
 125
 126static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
 127{
 128        struct qi_desc desc;
 129
 130        desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
 131                   | QI_IEC_SELECTIVE;
 132        desc.high = 0;
 133
 134        return qi_submit_sync(&desc, iommu);
 135}
 136
 137static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
 138{
 139        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 140        unsigned long flags;
 141        int index;
 142
 143        if (!irq_iommu)
 144                return -1;
 145
 146        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 147        *sub_handle = irq_iommu->sub_handle;
 148        index = irq_iommu->irte_index;
 149        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 150        return index;
 151}
 152
 153static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 154{
 155        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 156        struct irq_cfg *cfg = irq_cfg(irq);
 157        unsigned long flags;
 158
 159        if (!irq_iommu)
 160                return -1;
 161
 162        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 163
 164        cfg->remapped = 1;
 165        irq_iommu->iommu = iommu;
 166        irq_iommu->irte_index = index;
 167        irq_iommu->sub_handle = subhandle;
 168        irq_iommu->irte_mask = 0;
 169
 170        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 171
 172        return 0;
 173}
 174
 175static int modify_irte(int irq, struct irte *irte_modified)
 176{
 177        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 178        struct intel_iommu *iommu;
 179        unsigned long flags;
 180        struct irte *irte;
 181        int rc, index;
 182
 183        if (!irq_iommu)
 184                return -1;
 185
 186        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 187
 188        iommu = irq_iommu->iommu;
 189
 190        index = irq_iommu->irte_index + irq_iommu->sub_handle;
 191        irte = &iommu->ir_table->base[index];
 192
 193        set_64bit(&irte->low, irte_modified->low);
 194        set_64bit(&irte->high, irte_modified->high);
 195        __iommu_flush_cache(iommu, irte, sizeof(*irte));
 196
 197        rc = qi_flush_iec(iommu, index, 0);
 198        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 199
 200        return rc;
 201}
 202
 203static struct intel_iommu *map_hpet_to_ir(u8 hpet_id)
 204{
 205        int i;
 206
 207        for (i = 0; i < MAX_HPET_TBS; i++)
 208                if (ir_hpet[i].id == hpet_id && ir_hpet[i].iommu)
 209                        return ir_hpet[i].iommu;
 210        return NULL;
 211}
 212
 213static struct intel_iommu *map_ioapic_to_ir(int apic)
 214{
 215        int i;
 216
 217        for (i = 0; i < MAX_IO_APICS; i++)
 218                if (ir_ioapic[i].id == apic && ir_ioapic[i].iommu)
 219                        return ir_ioapic[i].iommu;
 220        return NULL;
 221}
 222
 223static struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
 224{
 225        struct dmar_drhd_unit *drhd;
 226
 227        drhd = dmar_find_matched_drhd_unit(dev);
 228        if (!drhd)
 229                return NULL;
 230
 231        return drhd->iommu;
 232}
 233
 234static int clear_entries(struct irq_2_iommu *irq_iommu)
 235{
 236        struct irte *start, *entry, *end;
 237        struct intel_iommu *iommu;
 238        int index;
 239
 240        if (irq_iommu->sub_handle)
 241                return 0;
 242
 243        iommu = irq_iommu->iommu;
 244        index = irq_iommu->irte_index + irq_iommu->sub_handle;
 245
 246        start = iommu->ir_table->base + index;
 247        end = start + (1 << irq_iommu->irte_mask);
 248
 249        for (entry = start; entry < end; entry++) {
 250                set_64bit(&entry->low, 0);
 251                set_64bit(&entry->high, 0);
 252        }
 253        bitmap_release_region(iommu->ir_table->bitmap, index,
 254                              irq_iommu->irte_mask);
 255
 256        return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
 257}
 258
 259static int free_irte(int irq)
 260{
 261        struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
 262        unsigned long flags;
 263        int rc;
 264
 265        if (!irq_iommu)
 266                return -1;
 267
 268        raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
 269
 270        rc = clear_entries(irq_iommu);
 271
 272        irq_iommu->iommu = NULL;
 273        irq_iommu->irte_index = 0;
 274        irq_iommu->sub_handle = 0;
 275        irq_iommu->irte_mask = 0;
 276
 277        raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
 278
 279        return rc;
 280}
 281
 282/*
 283 * source validation type
 284 */
 285#define SVT_NO_VERIFY           0x0  /* no verification is required */
 286#define SVT_VERIFY_SID_SQ       0x1  /* verify using SID and SQ fields */
 287#define SVT_VERIFY_BUS          0x2  /* verify bus of request-id */
 288
 289/*
 290 * source-id qualifier
 291 */
 292#define SQ_ALL_16       0x0  /* verify all 16 bits of request-id */
 293#define SQ_13_IGNORE_1  0x1  /* verify most significant 13 bits, ignore
 294                              * the third least significant bit
 295                              */
 296#define SQ_13_IGNORE_2  0x2  /* verify most significant 13 bits, ignore
 297                              * the second and third least significant bits
 298                              */
 299#define SQ_13_IGNORE_3  0x3  /* verify most significant 13 bits, ignore
 300                              * the least three significant bits
 301                              */
 302
 303/*
 304 * set SVT, SQ and SID fields of irte to verify
 305 * source ids of interrupt requests
 306 */
 307static void set_irte_sid(struct irte *irte, unsigned int svt,
 308                         unsigned int sq, unsigned int sid)
 309{
 310        if (disable_sourceid_checking)
 311                svt = SVT_NO_VERIFY;
 312        irte->svt = svt;
 313        irte->sq = sq;
 314        irte->sid = sid;
 315}
 316
 317static int set_ioapic_sid(struct irte *irte, int apic)
 318{
 319        int i;
 320        u16 sid = 0;
 321
 322        if (!irte)
 323                return -1;
 324
 325        down_read(&dmar_global_lock);
 326        for (i = 0; i < MAX_IO_APICS; i++) {
 327                if (ir_ioapic[i].iommu && ir_ioapic[i].id == apic) {
 328                        sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn;
 329                        break;
 330                }
 331        }
 332        up_read(&dmar_global_lock);
 333
 334        if (sid == 0) {
 335                pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic);
 336                return -1;
 337        }
 338
 339        set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, sid);
 340
 341        return 0;
 342}
 343
 344static int set_hpet_sid(struct irte *irte, u8 id)
 345{
 346        int i;
 347        u16 sid = 0;
 348
 349        if (!irte)
 350                return -1;
 351
 352        down_read(&dmar_global_lock);
 353        for (i = 0; i < MAX_HPET_TBS; i++) {
 354                if (ir_hpet[i].iommu && ir_hpet[i].id == id) {
 355                        sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn;
 356                        break;
 357                }
 358        }
 359        up_read(&dmar_global_lock);
 360
 361        if (sid == 0) {
 362                pr_warning("Failed to set source-id of HPET block (%d)\n", id);
 363                return -1;
 364        }
 365
 366        /*
 367         * Should really use SQ_ALL_16. Some platforms are broken.
 368         * While we figure out the right quirks for these broken platforms, use
 369         * SQ_13_IGNORE_3 for now.
 370         */
 371        set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_13_IGNORE_3, sid);
 372
 373        return 0;
 374}
 375
 376struct set_msi_sid_data {
 377        struct pci_dev *pdev;
 378        u16 alias;
 379};
 380
 381static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque)
 382{
 383        struct set_msi_sid_data *data = opaque;
 384
 385        data->pdev = pdev;
 386        data->alias = alias;
 387
 388        return 0;
 389}
 390
 391static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 392{
 393        struct set_msi_sid_data data;
 394
 395        if (!irte || !dev)
 396                return -1;
 397
 398        pci_for_each_dma_alias(dev, set_msi_sid_cb, &data);
 399
 400        /*
 401         * DMA alias provides us with a PCI device and alias.  The only case
 402         * where the it will return an alias on a different bus than the
 403         * device is the case of a PCIe-to-PCI bridge, where the alias is for
 404         * the subordinate bus.  In this case we can only verify the bus.
 405         *
 406         * If the alias device is on a different bus than our source device
 407         * then we have a topology based alias, use it.
 408         *
 409         * Otherwise, the alias is for a device DMA quirk and we cannot
 410         * assume that MSI uses the same requester ID.  Therefore use the
 411         * original device.
 412         */
 413        if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number)
 414                set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16,
 415                             PCI_DEVID(PCI_BUS_NUM(data.alias),
 416                                       dev->bus->number));
 417        else if (data.pdev->bus->number != dev->bus->number)
 418                set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias);
 419        else
 420                set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16,
 421                             PCI_DEVID(dev->bus->number, dev->devfn));
 422
 423        return 0;
 424}
 425
 426static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode)
 427{
 428        u64 addr;
 429        u32 sts;
 430        unsigned long flags;
 431
 432        addr = virt_to_phys((void *)iommu->ir_table->base);
 433
 434        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 435
 436        dmar_writeq(iommu->reg + DMAR_IRTA_REG,
 437                    (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
 438
 439        /* Set interrupt-remapping table pointer */
 440        writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG);
 441
 442        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 443                      readl, (sts & DMA_GSTS_IRTPS), sts);
 444        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 445
 446        /*
 447         * global invalidation of interrupt entry cache before enabling
 448         * interrupt-remapping.
 449         */
 450        qi_global_iec(iommu);
 451
 452        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 453
 454        /* Enable interrupt-remapping */
 455        iommu->gcmd |= DMA_GCMD_IRE;
 456        iommu->gcmd &= ~DMA_GCMD_CFI;  /* Block compatibility-format MSIs */
 457        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 458
 459        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 460                      readl, (sts & DMA_GSTS_IRES), sts);
 461
 462        /*
 463         * With CFI clear in the Global Command register, we should be
 464         * protected from dangerous (i.e. compatibility) interrupts
 465         * regardless of x2apic status.  Check just to be sure.
 466         */
 467        if (sts & DMA_GSTS_CFIS)
 468                WARN(1, KERN_WARNING
 469                        "Compatibility-format IRQs enabled despite intr remapping;\n"
 470                        "you are vulnerable to IRQ injection.\n");
 471
 472        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 473}
 474
 475static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 476{
 477        struct ir_table *ir_table;
 478        struct page *pages;
 479        unsigned long *bitmap;
 480
 481        if (iommu->ir_table)
 482                return 0;
 483
 484        ir_table = kzalloc(sizeof(struct ir_table), GFP_ATOMIC);
 485        if (!ir_table)
 486                return -ENOMEM;
 487
 488        pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO,
 489                                 INTR_REMAP_PAGE_ORDER);
 490
 491        if (!pages) {
 492                pr_err("IR%d: failed to allocate pages of order %d\n",
 493                       iommu->seq_id, INTR_REMAP_PAGE_ORDER);
 494                goto out_free_table;
 495        }
 496
 497        bitmap = kcalloc(BITS_TO_LONGS(INTR_REMAP_TABLE_ENTRIES),
 498                         sizeof(long), GFP_ATOMIC);
 499        if (bitmap == NULL) {
 500                pr_err("IR%d: failed to allocate bitmap\n", iommu->seq_id);
 501                goto out_free_pages;
 502        }
 503
 504        ir_table->base = page_address(pages);
 505        ir_table->bitmap = bitmap;
 506        iommu->ir_table = ir_table;
 507        return 0;
 508
 509out_free_pages:
 510        __free_pages(pages, INTR_REMAP_PAGE_ORDER);
 511out_free_table:
 512        kfree(ir_table);
 513        return -ENOMEM;
 514}
 515
 516static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
 517{
 518        if (iommu && iommu->ir_table) {
 519                free_pages((unsigned long)iommu->ir_table->base,
 520                           INTR_REMAP_PAGE_ORDER);
 521                kfree(iommu->ir_table->bitmap);
 522                kfree(iommu->ir_table);
 523                iommu->ir_table = NULL;
 524        }
 525}
 526
 527/*
 528 * Disable Interrupt Remapping.
 529 */
 530static void iommu_disable_irq_remapping(struct intel_iommu *iommu)
 531{
 532        unsigned long flags;
 533        u32 sts;
 534
 535        if (!ecap_ir_support(iommu->ecap))
 536                return;
 537
 538        /*
 539         * global invalidation of interrupt entry cache before disabling
 540         * interrupt-remapping.
 541         */
 542        qi_global_iec(iommu);
 543
 544        raw_spin_lock_irqsave(&iommu->register_lock, flags);
 545
 546        sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
 547        if (!(sts & DMA_GSTS_IRES))
 548                goto end;
 549
 550        iommu->gcmd &= ~DMA_GCMD_IRE;
 551        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
 552
 553        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
 554                      readl, !(sts & DMA_GSTS_IRES), sts);
 555
 556end:
 557        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
 558}
 559
 560static int __init dmar_x2apic_optout(void)
 561{
 562        struct acpi_table_dmar *dmar;
 563        dmar = (struct acpi_table_dmar *)dmar_tbl;
 564        if (!dmar || no_x2apic_optout)
 565                return 0;
 566        return dmar->flags & DMAR_X2APIC_OPT_OUT;
 567}
 568
 569static int __init intel_irq_remapping_supported(void)
 570{
 571        struct dmar_drhd_unit *drhd;
 572        struct intel_iommu *iommu;
 573
 574        if (disable_irq_remap)
 575                return 0;
 576        if (irq_remap_broken) {
 577                printk(KERN_WARNING
 578                        "This system BIOS has enabled interrupt remapping\n"
 579                        "on a chipset that contains an erratum making that\n"
 580                        "feature unstable.  To maintain system stability\n"
 581                        "interrupt remapping is being disabled.  Please\n"
 582                        "contact your BIOS vendor for an update\n");
 583                add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
 584                disable_irq_remap = 1;
 585                return 0;
 586        }
 587
 588        if (!dmar_ir_support())
 589                return 0;
 590
 591        for_each_iommu(iommu, drhd)
 592                if (!ecap_ir_support(iommu->ecap))
 593                        return 0;
 594
 595        return 1;
 596}
 597
 598static int __init intel_enable_irq_remapping(void)
 599{
 600        struct dmar_drhd_unit *drhd;
 601        struct intel_iommu *iommu;
 602        bool x2apic_present;
 603        int setup = 0;
 604        int eim = 0;
 605
 606        x2apic_present = x2apic_supported();
 607
 608        if (parse_ioapics_under_ir() != 1) {
 609                printk(KERN_INFO "Not enable interrupt remapping\n");
 610                goto error;
 611        }
 612
 613        if (x2apic_present) {
 614                pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
 615
 616                eim = !dmar_x2apic_optout();
 617                if (!eim)
 618                        printk(KERN_WARNING
 619                                "Your BIOS is broken and requested that x2apic be disabled.\n"
 620                                "This will slightly decrease performance.\n"
 621                                "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
 622        }
 623
 624        for_each_iommu(iommu, drhd) {
 625                /*
 626                 * If the queued invalidation is already initialized,
 627                 * shouldn't disable it.
 628                 */
 629                if (iommu->qi)
 630                        continue;
 631
 632                /*
 633                 * Clear previous faults.
 634                 */
 635                dmar_fault(-1, iommu);
 636
 637                /*
 638                 * Disable intr remapping and queued invalidation, if already
 639                 * enabled prior to OS handover.
 640                 */
 641                iommu_disable_irq_remapping(iommu);
 642
 643                dmar_disable_qi(iommu);
 644        }
 645
 646        /*
 647         * check for the Interrupt-remapping support
 648         */
 649        for_each_iommu(iommu, drhd) {
 650                if (!ecap_ir_support(iommu->ecap))
 651                        continue;
 652
 653                if (eim && !ecap_eim_support(iommu->ecap)) {
 654                        printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
 655                               " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
 656                        goto error;
 657                }
 658        }
 659
 660        /*
 661         * Enable queued invalidation for all the DRHD's.
 662         */
 663        for_each_iommu(iommu, drhd) {
 664                int ret = dmar_enable_qi(iommu);
 665
 666                if (ret) {
 667                        printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
 668                               " invalidation, ecap %Lx, ret %d\n",
 669                               drhd->reg_base_addr, iommu->ecap, ret);
 670                        goto error;
 671                }
 672        }
 673
 674        /*
 675         * Setup Interrupt-remapping for all the DRHD's now.
 676         */
 677        for_each_iommu(iommu, drhd) {
 678                if (!ecap_ir_support(iommu->ecap))
 679                        continue;
 680
 681                if (intel_setup_irq_remapping(iommu))
 682                        goto error;
 683
 684                iommu_set_irq_remapping(iommu, eim);
 685                setup = 1;
 686        }
 687
 688        if (!setup)
 689                goto error;
 690
 691        irq_remapping_enabled = 1;
 692
 693        /*
 694         * VT-d has a different layout for IO-APIC entries when
 695         * interrupt remapping is enabled. So it needs a special routine
 696         * to print IO-APIC entries for debugging purposes too.
 697         */
 698        x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
 699
 700        pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
 701
 702        return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
 703
 704error:
 705        for_each_iommu(iommu, drhd)
 706                if (ecap_ir_support(iommu->ecap)) {
 707                        iommu_disable_irq_remapping(iommu);
 708                        intel_teardown_irq_remapping(iommu);
 709                }
 710
 711        if (x2apic_present)
 712                pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");
 713
 714        return -1;
 715}
 716
 717static int ir_parse_one_hpet_scope(struct acpi_dmar_device_scope *scope,
 718                                   struct intel_iommu *iommu,
 719                                   struct acpi_dmar_hardware_unit *drhd)
 720{
 721        struct acpi_dmar_pci_path *path;
 722        u8 bus;
 723        int count, free = -1;
 724
 725        bus = scope->bus;
 726        path = (struct acpi_dmar_pci_path *)(scope + 1);
 727        count = (scope->length - sizeof(struct acpi_dmar_device_scope))
 728                / sizeof(struct acpi_dmar_pci_path);
 729
 730        while (--count > 0) {
 731                /*
 732                 * Access PCI directly due to the PCI
 733                 * subsystem isn't initialized yet.
 734                 */
 735                bus = read_pci_config_byte(bus, path->device, path->function,
 736                                           PCI_SECONDARY_BUS);
 737                path++;
 738        }
 739
 740        for (count = 0; count < MAX_HPET_TBS; count++) {
 741                if (ir_hpet[count].iommu == iommu &&
 742                    ir_hpet[count].id == scope->enumeration_id)
 743                        return 0;
 744                else if (ir_hpet[count].iommu == NULL && free == -1)
 745                        free = count;
 746        }
 747        if (free == -1) {
 748                pr_warn("Exceeded Max HPET blocks\n");
 749                return -ENOSPC;
 750        }
 751
 752        ir_hpet[free].iommu = iommu;
 753        ir_hpet[free].id    = scope->enumeration_id;
 754        ir_hpet[free].bus   = bus;
 755        ir_hpet[free].devfn = PCI_DEVFN(path->device, path->function);
 756        pr_info("HPET id %d under DRHD base 0x%Lx\n",
 757                scope->enumeration_id, drhd->address);
 758
 759        return 0;
 760}
 761
 762static int ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
 763                                     struct intel_iommu *iommu,
 764                                     struct acpi_dmar_hardware_unit *drhd)
 765{
 766        struct acpi_dmar_pci_path *path;
 767        u8 bus;
 768        int count, free = -1;
 769
 770        bus = scope->bus;
 771        path = (struct acpi_dmar_pci_path *)(scope + 1);
 772        count = (scope->length - sizeof(struct acpi_dmar_device_scope))
 773                / sizeof(struct acpi_dmar_pci_path);
 774
 775        while (--count > 0) {
 776                /*
 777                 * Access PCI directly due to the PCI
 778                 * subsystem isn't initialized yet.
 779                 */
 780                bus = read_pci_config_byte(bus, path->device, path->function,
 781                                           PCI_SECONDARY_BUS);
 782                path++;
 783        }
 784
 785        for (count = 0; count < MAX_IO_APICS; count++) {
 786                if (ir_ioapic[count].iommu == iommu &&
 787                    ir_ioapic[count].id == scope->enumeration_id)
 788                        return 0;
 789                else if (ir_ioapic[count].iommu == NULL && free == -1)
 790                        free = count;
 791        }
 792        if (free == -1) {
 793                pr_warn("Exceeded Max IO APICS\n");
 794                return -ENOSPC;
 795        }
 796
 797        ir_ioapic[free].bus   = bus;
 798        ir_ioapic[free].devfn = PCI_DEVFN(path->device, path->function);
 799        ir_ioapic[free].iommu = iommu;
 800        ir_ioapic[free].id    = scope->enumeration_id;
 801        pr_info("IOAPIC id %d under DRHD base  0x%Lx IOMMU %d\n",
 802                scope->enumeration_id, drhd->address, iommu->seq_id);
 803
 804        return 0;
 805}
 806
 807static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
 808                                      struct intel_iommu *iommu)
 809{
 810        int ret = 0;
 811        struct acpi_dmar_hardware_unit *drhd;
 812        struct acpi_dmar_device_scope *scope;
 813        void *start, *end;
 814
 815        drhd = (struct acpi_dmar_hardware_unit *)header;
 816        start = (void *)(drhd + 1);
 817        end = ((void *)drhd) + header->length;
 818
 819        while (start < end && ret == 0) {
 820                scope = start;
 821                if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC)
 822                        ret = ir_parse_one_ioapic_scope(scope, iommu, drhd);
 823                else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET)
 824                        ret = ir_parse_one_hpet_scope(scope, iommu, drhd);
 825                start += scope->length;
 826        }
 827
 828        return ret;
 829}
 830
 831static void ir_remove_ioapic_hpet_scope(struct intel_iommu *iommu)
 832{
 833        int i;
 834
 835        for (i = 0; i < MAX_HPET_TBS; i++)
 836                if (ir_hpet[i].iommu == iommu)
 837                        ir_hpet[i].iommu = NULL;
 838
 839        for (i = 0; i < MAX_IO_APICS; i++)
 840                if (ir_ioapic[i].iommu == iommu)
 841                        ir_ioapic[i].iommu = NULL;
 842}
 843
 844/*
 845 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
 846 * hardware unit.
 847 */
 848static int __init parse_ioapics_under_ir(void)
 849{
 850        struct dmar_drhd_unit *drhd;
 851        struct intel_iommu *iommu;
 852        int ir_supported = 0;
 853        int ioapic_idx;
 854
 855        for_each_iommu(iommu, drhd)
 856                if (ecap_ir_support(iommu->ecap)) {
 857                        if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
 858                                return -1;
 859
 860                        ir_supported = 1;
 861                }
 862
 863        if (!ir_supported)
 864                return 0;
 865
 866        for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
 867                int ioapic_id = mpc_ioapic_id(ioapic_idx);
 868                if (!map_ioapic_to_ir(ioapic_id)) {
 869                        pr_err(FW_BUG "ioapic %d has no mapping iommu, "
 870                               "interrupt remapping will be disabled\n",
 871                               ioapic_id);
 872                        return -1;
 873                }
 874        }
 875
 876        return 1;
 877}
 878
 879static int __init ir_dev_scope_init(void)
 880{
 881        int ret;
 882
 883        if (!irq_remapping_enabled)
 884                return 0;
 885
 886        down_write(&dmar_global_lock);
 887        ret = dmar_dev_scope_init();
 888        up_write(&dmar_global_lock);
 889
 890        return ret;
 891}
 892rootfs_initcall(ir_dev_scope_init);
 893
 894static void disable_irq_remapping(void)
 895{
 896        struct dmar_drhd_unit *drhd;
 897        struct intel_iommu *iommu = NULL;
 898
 899        /*
 900         * Disable Interrupt-remapping for all the DRHD's now.
 901         */
 902        for_each_iommu(iommu, drhd) {
 903                if (!ecap_ir_support(iommu->ecap))
 904                        continue;
 905
 906                iommu_disable_irq_remapping(iommu);
 907        }
 908}
 909
 910static int reenable_irq_remapping(int eim)
 911{
 912        struct dmar_drhd_unit *drhd;
 913        int setup = 0;
 914        struct intel_iommu *iommu = NULL;
 915
 916        for_each_iommu(iommu, drhd)
 917                if (iommu->qi)
 918                        dmar_reenable_qi(iommu);
 919
 920        /*
 921         * Setup Interrupt-remapping for all the DRHD's now.
 922         */
 923        for_each_iommu(iommu, drhd) {
 924                if (!ecap_ir_support(iommu->ecap))
 925                        continue;
 926
 927                /* Set up interrupt remapping for iommu.*/
 928                iommu_set_irq_remapping(iommu, eim);
 929                setup = 1;
 930        }
 931
 932        if (!setup)
 933                goto error;
 934
 935        return 0;
 936
 937error:
 938        /*
 939         * handle error condition gracefully here!
 940         */
 941        return -1;
 942}
 943
 944static void prepare_irte(struct irte *irte, int vector,
 945                         unsigned int dest)
 946{
 947        memset(irte, 0, sizeof(*irte));
 948
 949        irte->present = 1;
 950        irte->dst_mode = apic->irq_dest_mode;
 951        /*
 952         * Trigger mode in the IRTE will always be edge, and for IO-APIC, the
 953         * actual level or edge trigger will be setup in the IO-APIC
 954         * RTE. This will help simplify level triggered irq migration.
 955         * For more details, see the comments (in io_apic.c) explainig IO-APIC
 956         * irq migration in the presence of interrupt-remapping.
 957        */
 958        irte->trigger_mode = 0;
 959        irte->dlvry_mode = apic->irq_delivery_mode;
 960        irte->vector = vector;
 961        irte->dest_id = IRTE_DEST(dest);
 962        irte->redir_hint = 1;
 963}
 964
 965static int intel_setup_ioapic_entry(int irq,
 966                                    struct IO_APIC_route_entry *route_entry,
 967                                    unsigned int destination, int vector,
 968                                    struct io_apic_irq_attr *attr)
 969{
 970        int ioapic_id = mpc_ioapic_id(attr->ioapic);
 971        struct intel_iommu *iommu;
 972        struct IR_IO_APIC_route_entry *entry;
 973        struct irte irte;
 974        int index;
 975
 976        down_read(&dmar_global_lock);
 977        iommu = map_ioapic_to_ir(ioapic_id);
 978        if (!iommu) {
 979                pr_warn("No mapping iommu for ioapic %d\n", ioapic_id);
 980                index = -ENODEV;
 981        } else {
 982                index = alloc_irte(iommu, irq, 1);
 983                if (index < 0) {
 984                        pr_warn("Failed to allocate IRTE for ioapic %d\n",
 985                                ioapic_id);
 986                        index = -ENOMEM;
 987                }
 988        }
 989        up_read(&dmar_global_lock);
 990        if (index < 0)
 991                return index;
 992
 993        prepare_irte(&irte, vector, destination);
 994
 995        /* Set source-id of interrupt request */
 996        set_ioapic_sid(&irte, ioapic_id);
 997
 998        modify_irte(irq, &irte);
 999
1000        apic_printk(APIC_VERBOSE, KERN_DEBUG "IOAPIC[%d]: "
1001                "Set IRTE entry (P:%d FPD:%d Dst_Mode:%d "
1002                "Redir_hint:%d Trig_Mode:%d Dlvry_Mode:%X "
1003                "Avail:%X Vector:%02X Dest:%08X "
1004                "SID:%04X SQ:%X SVT:%X)\n",
1005                attr->ioapic, irte.present, irte.fpd, irte.dst_mode,
1006                irte.redir_hint, irte.trigger_mode, irte.dlvry_mode,
1007                irte.avail, irte.vector, irte.dest_id,
1008                irte.sid, irte.sq, irte.svt);
1009
1010        entry = (struct IR_IO_APIC_route_entry *)route_entry;
1011        memset(entry, 0, sizeof(*entry));
1012
1013        entry->index2   = (index >> 15) & 0x1;
1014        entry->zero     = 0;
1015        entry->format   = 1;
1016        entry->index    = (index & 0x7fff);
1017        /*
1018         * IO-APIC RTE will be configured with virtual vector.
1019         * irq handler will do the explicit EOI to the io-apic.
1020         */
1021        entry->vector   = attr->ioapic_pin;
1022        entry->mask     = 0;                    /* enable IRQ */
1023        entry->trigger  = attr->trigger;
1024        entry->polarity = attr->polarity;
1025
1026        /* Mask level triggered irqs.
1027         * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1028         */
1029        if (attr->trigger)
1030                entry->mask = 1;
1031
1032        return 0;
1033}
1034
1035/*
1036 * Migrate the IO-APIC irq in the presence of intr-remapping.
1037 *
1038 * For both level and edge triggered, irq migration is a simple atomic
1039 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
1040 *
1041 * For level triggered, we eliminate the io-apic RTE modification (with the
1042 * updated vector information), by using a virtual vector (io-apic pin number).
1043 * Real vector that is used for interrupting cpu will be coming from
1044 * the interrupt-remapping table entry.
1045 *
1046 * As the migration is a simple atomic update of IRTE, the same mechanism
1047 * is used to migrate MSI irq's in the presence of interrupt-remapping.
1048 */
1049static int
1050intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
1051                          bool force)
1052{
1053        struct irq_cfg *cfg = irqd_cfg(data);
1054        unsigned int dest, irq = data->irq;
1055        struct irte irte;
1056        int err;
1057
1058        if (!config_enabled(CONFIG_SMP))
1059                return -EINVAL;
1060
1061        if (!cpumask_intersects(mask, cpu_online_mask))
1062                return -EINVAL;
1063
1064        if (get_irte(irq, &irte))
1065                return -EBUSY;
1066
1067        err = assign_irq_vector(irq, cfg, mask);
1068        if (err)
1069                return err;
1070
1071        err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
1072        if (err) {
1073                if (assign_irq_vector(irq, cfg, data->affinity))
1074                        pr_err("Failed to recover vector for irq %d\n", irq);
1075                return err;
1076        }
1077
1078        irte.vector = cfg->vector;
1079        irte.dest_id = IRTE_DEST(dest);
1080
1081        /*
1082         * Atomically updates the IRTE with the new destination, vector
1083         * and flushes the interrupt entry cache.
1084         */
1085        modify_irte(irq, &irte);
1086
1087        /*
1088         * After this point, all the interrupts will start arriving
1089         * at the new destination. So, time to cleanup the previous
1090         * vector allocation.
1091         */
1092        if (cfg->move_in_progress)
1093                send_cleanup_vector(cfg);
1094
1095        cpumask_copy(data->affinity, mask);
1096        return 0;
1097}
1098
1099static void intel_compose_msi_msg(struct pci_dev *pdev,
1100                                  unsigned int irq, unsigned int dest,
1101                                  struct msi_msg *msg, u8 hpet_id)
1102{
1103        struct irq_cfg *cfg;
1104        struct irte irte;
1105        u16 sub_handle = 0;
1106        int ir_index;
1107
1108        cfg = irq_cfg(irq);
1109
1110        ir_index = map_irq_to_irte_handle(irq, &sub_handle);
1111        BUG_ON(ir_index == -1);
1112
1113        prepare_irte(&irte, cfg->vector, dest);
1114
1115        /* Set source-id of interrupt request */
1116        if (pdev)
1117                set_msi_sid(&irte, pdev);
1118        else
1119                set_hpet_sid(&irte, hpet_id);
1120
1121        modify_irte(irq, &irte);
1122
1123        msg->address_hi = MSI_ADDR_BASE_HI;
1124        msg->data = sub_handle;
1125        msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
1126                          MSI_ADDR_IR_SHV |
1127                          MSI_ADDR_IR_INDEX1(ir_index) |
1128                          MSI_ADDR_IR_INDEX2(ir_index);
1129}
1130
1131/*
1132 * Map the PCI dev to the corresponding remapping hardware unit
1133 * and allocate 'nvec' consecutive interrupt-remapping table entries
1134 * in it.
1135 */
1136static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec)
1137{
1138        struct intel_iommu *iommu;
1139        int index;
1140
1141        down_read(&dmar_global_lock);
1142        iommu = map_dev_to_ir(dev);
1143        if (!iommu) {
1144                printk(KERN_ERR
1145                       "Unable to map PCI %s to iommu\n", pci_name(dev));
1146                index = -ENOENT;
1147        } else {
1148                index = alloc_irte(iommu, irq, nvec);
1149                if (index < 0) {
1150                        printk(KERN_ERR
1151                               "Unable to allocate %d IRTE for PCI %s\n",
1152                               nvec, pci_name(dev));
1153                        index = -ENOSPC;
1154                }
1155        }
1156        up_read(&dmar_global_lock);
1157
1158        return index;
1159}
1160
1161static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
1162                               int index, int sub_handle)
1163{
1164        struct intel_iommu *iommu;
1165        int ret = -ENOENT;
1166
1167        down_read(&dmar_global_lock);
1168        iommu = map_dev_to_ir(pdev);
1169        if (iommu) {
1170                /*
1171                 * setup the mapping between the irq and the IRTE
1172                 * base index, the sub_handle pointing to the
1173                 * appropriate interrupt remap table entry.
1174                 */
1175                set_irte_irq(irq, iommu, index, sub_handle);
1176                ret = 0;
1177        }
1178        up_read(&dmar_global_lock);
1179
1180        return ret;
1181}
1182
1183static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id)
1184{
1185        int ret = -1;
1186        struct intel_iommu *iommu;
1187        int index;
1188
1189        down_read(&dmar_global_lock);
1190        iommu = map_hpet_to_ir(id);
1191        if (iommu) {
1192                index = alloc_irte(iommu, irq, 1);
1193                if (index >= 0)
1194                        ret = 0;
1195        }
1196        up_read(&dmar_global_lock);
1197
1198        return ret;
1199}
1200
1201struct irq_remap_ops intel_irq_remap_ops = {
1202        .supported              = intel_irq_remapping_supported,
1203        .prepare                = dmar_table_init,
1204        .enable                 = intel_enable_irq_remapping,
1205        .disable                = disable_irq_remapping,
1206        .reenable               = reenable_irq_remapping,
1207        .enable_faulting        = enable_drhd_fault_handling,
1208        .setup_ioapic_entry     = intel_setup_ioapic_entry,
1209        .set_affinity           = intel_ioapic_set_affinity,
1210        .free_irq               = free_irte,
1211        .compose_msi_msg        = intel_compose_msi_msg,
1212        .msi_alloc_irq          = intel_msi_alloc_irq,
1213        .msi_setup_irq          = intel_msi_setup_irq,
1214        .alloc_hpet_msi         = intel_alloc_hpet_msi,
1215};
1216
1217/*
1218 * Support of Interrupt Remapping Unit Hotplug
1219 */
1220static int dmar_ir_add(struct dmar_drhd_unit *dmaru, struct intel_iommu *iommu)
1221{
1222        int ret;
1223        int eim = x2apic_enabled();
1224
1225        if (eim && !ecap_eim_support(iommu->ecap)) {
1226                pr_info("DRHD %Lx: EIM not supported by DRHD, ecap %Lx\n",
1227                        iommu->reg_phys, iommu->ecap);
1228                return -ENODEV;
1229        }
1230
1231        if (ir_parse_ioapic_hpet_scope(dmaru->hdr, iommu)) {
1232                pr_warn("DRHD %Lx: failed to parse managed IOAPIC/HPET\n",
1233                        iommu->reg_phys);
1234                return -ENODEV;
1235        }
1236
1237        /* TODO: check all IOAPICs are covered by IOMMU */
1238
1239        /* Setup Interrupt-remapping now. */
1240        ret = intel_setup_irq_remapping(iommu);
1241        if (ret) {
1242                pr_err("DRHD %Lx: failed to allocate resource\n",
1243                       iommu->reg_phys);
1244                ir_remove_ioapic_hpet_scope(iommu);
1245                return ret;
1246        }
1247
1248        if (!iommu->qi) {
1249                /* Clear previous faults. */
1250                dmar_fault(-1, iommu);
1251                iommu_disable_irq_remapping(iommu);
1252                dmar_disable_qi(iommu);
1253        }
1254
1255        /* Enable queued invalidation */
1256        ret = dmar_enable_qi(iommu);
1257        if (!ret) {
1258                iommu_set_irq_remapping(iommu, eim);
1259        } else {
1260                pr_err("DRHD %Lx: failed to enable queued invalidation, ecap %Lx, ret %d\n",
1261                       iommu->reg_phys, iommu->ecap, ret);
1262                intel_teardown_irq_remapping(iommu);
1263                ir_remove_ioapic_hpet_scope(iommu);
1264        }
1265
1266        return ret;
1267}
1268
1269int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
1270{
1271        int ret = 0;
1272        struct intel_iommu *iommu = dmaru->iommu;
1273
1274        if (!irq_remapping_enabled)
1275                return 0;
1276        if (iommu == NULL)
1277                return -EINVAL;
1278        if (!ecap_ir_support(iommu->ecap))
1279                return 0;
1280
1281        if (insert) {
1282                if (!iommu->ir_table)
1283                        ret = dmar_ir_add(dmaru, iommu);
1284        } else {
1285                if (iommu->ir_table) {
1286                        if (!bitmap_empty(iommu->ir_table->bitmap,
1287                                          INTR_REMAP_TABLE_ENTRIES)) {
1288                                ret = -EBUSY;
1289                        } else {
1290                                iommu_disable_irq_remapping(iommu);
1291                                intel_teardown_irq_remapping(iommu);
1292                                ir_remove_ioapic_hpet_scope(iommu);
1293                        }
1294                }
1295        }
1296
1297        return ret;
1298}
1299