linux/drivers/iommu/intel-iommu.c
<<
>>
Prefs
   1/*
   2 * Copyright © 2006-2014 Intel Corporation.
   3 *
   4 * This program is free software; you can redistribute it and/or modify it
   5 * under the terms and conditions of the GNU General Public License,
   6 * version 2, as published by the Free Software Foundation.
   7 *
   8 * This program is distributed in the hope it will be useful, but WITHOUT
   9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  11 * more details.
  12 *
  13 * Authors: David Woodhouse <dwmw2@infradead.org>,
  14 *          Ashok Raj <ashok.raj@intel.com>,
  15 *          Shaohua Li <shaohua.li@intel.com>,
  16 *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
  17 *          Fenghua Yu <fenghua.yu@intel.com>
  18 */
  19
  20#include <linux/init.h>
  21#include <linux/bitmap.h>
  22#include <linux/debugfs.h>
  23#include <linux/export.h>
  24#include <linux/slab.h>
  25#include <linux/irq.h>
  26#include <linux/interrupt.h>
  27#include <linux/spinlock.h>
  28#include <linux/pci.h>
  29#include <linux/dmar.h>
  30#include <linux/dma-mapping.h>
  31#include <linux/mempool.h>
  32#include <linux/memory.h>
  33#include <linux/timer.h>
  34#include <linux/iova.h>
  35#include <linux/iommu.h>
  36#include <linux/intel-iommu.h>
  37#include <linux/syscore_ops.h>
  38#include <linux/tboot.h>
  39#include <linux/dmi.h>
  40#include <linux/pci-ats.h>
  41#include <linux/memblock.h>
  42#include <asm/irq_remapping.h>
  43#include <asm/cacheflush.h>
  44#include <asm/iommu.h>
  45
  46#include "irq_remapping.h"
  47#include "pci.h"
  48
  49#define ROOT_SIZE               VTD_PAGE_SIZE
  50#define CONTEXT_SIZE            VTD_PAGE_SIZE
  51
  52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
  53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
  54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
  55
  56#define IOAPIC_RANGE_START      (0xfee00000)
  57#define IOAPIC_RANGE_END        (0xfeefffff)
  58#define IOVA_START_ADDR         (0x1000)
  59
  60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
  61
  62#define MAX_AGAW_WIDTH 64
  63#define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
  64
  65#define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
  66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
  67
  68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
  69   to match. That way, we can use 'unsigned long' for PFNs with impunity. */
  70#define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
  71                                __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
  72#define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
  73
  74#define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
  75#define DMA_32BIT_PFN           IOVA_PFN(DMA_BIT_MASK(32))
  76#define DMA_64BIT_PFN           IOVA_PFN(DMA_BIT_MASK(64))
  77
  78/* page table handling */
  79#define LEVEL_STRIDE            (9)
  80#define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
  81
  82/*
  83 * This bitmap is used to advertise the page sizes our hardware support
  84 * to the IOMMU core, which will then use this information to split
  85 * physically contiguous memory regions it is mapping into page sizes
  86 * that we support.
  87 *
  88 * Traditionally the IOMMU core just handed us the mappings directly,
  89 * after making sure the size is an order of a 4KiB page and that the
  90 * mapping has natural alignment.
  91 *
  92 * To retain this behavior, we currently advertise that we support
  93 * all page sizes that are an order of 4KiB.
  94 *
  95 * If at some point we'd like to utilize the IOMMU core's new behavior,
  96 * we could change this to advertise the real page sizes we support.
  97 */
  98#define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
  99
 100static inline int agaw_to_level(int agaw)
 101{
 102        return agaw + 2;
 103}
 104
 105static inline int agaw_to_width(int agaw)
 106{
 107        return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
 108}
 109
 110static inline int width_to_agaw(int width)
 111{
 112        return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
 113}
 114
 115static inline unsigned int level_to_offset_bits(int level)
 116{
 117        return (level - 1) * LEVEL_STRIDE;
 118}
 119
 120static inline int pfn_level_offset(unsigned long pfn, int level)
 121{
 122        return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
 123}
 124
 125static inline unsigned long level_mask(int level)
 126{
 127        return -1UL << level_to_offset_bits(level);
 128}
 129
 130static inline unsigned long level_size(int level)
 131{
 132        return 1UL << level_to_offset_bits(level);
 133}
 134
 135static inline unsigned long align_to_level(unsigned long pfn, int level)
 136{
 137        return (pfn + level_size(level) - 1) & level_mask(level);
 138}
 139
 140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
 141{
 142        return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
 143}
 144
 145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
 146   are never going to work. */
 147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
 148{
 149        return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
 150}
 151
 152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
 153{
 154        return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
 155}
 156static inline unsigned long page_to_dma_pfn(struct page *pg)
 157{
 158        return mm_to_dma_pfn(page_to_pfn(pg));
 159}
 160static inline unsigned long virt_to_dma_pfn(void *p)
 161{
 162        return page_to_dma_pfn(virt_to_page(p));
 163}
 164
 165/* global iommu list, set NULL for ignored DMAR units */
 166static struct intel_iommu **g_iommus;
 167
 168static void __init check_tylersburg_isoch(void);
 169static int rwbf_quirk;
 170
 171/*
 172 * set to 1 to panic kernel if can't successfully enable VT-d
 173 * (used when kernel is launched w/ TXT)
 174 */
 175static int force_on = 0;
 176
 177/*
 178 * 0: Present
 179 * 1-11: Reserved
 180 * 12-63: Context Ptr (12 - (haw-1))
 181 * 64-127: Reserved
 182 */
 183struct root_entry {
 184        u64     val;
 185        u64     rsvd1;
 186};
 187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 188static inline bool root_present(struct root_entry *root)
 189{
 190        return (root->val & 1);
 191}
 192static inline void set_root_present(struct root_entry *root)
 193{
 194        root->val |= 1;
 195}
 196static inline void set_root_value(struct root_entry *root, unsigned long value)
 197{
 198        root->val |= value & VTD_PAGE_MASK;
 199}
 200
 201static inline struct context_entry *
 202get_context_addr_from_root(struct root_entry *root)
 203{
 204        return (struct context_entry *)
 205                (root_present(root)?phys_to_virt(
 206                root->val & VTD_PAGE_MASK) :
 207                NULL);
 208}
 209
 210/*
 211 * low 64 bits:
 212 * 0: present
 213 * 1: fault processing disable
 214 * 2-3: translation type
 215 * 12-63: address space root
 216 * high 64 bits:
 217 * 0-2: address width
 218 * 3-6: aval
 219 * 8-23: domain id
 220 */
 221struct context_entry {
 222        u64 lo;
 223        u64 hi;
 224};
 225
 226static inline bool context_present(struct context_entry *context)
 227{
 228        return (context->lo & 1);
 229}
 230static inline void context_set_present(struct context_entry *context)
 231{
 232        context->lo |= 1;
 233}
 234
 235static inline void context_set_fault_enable(struct context_entry *context)
 236{
 237        context->lo &= (((u64)-1) << 2) | 1;
 238}
 239
 240static inline void context_set_translation_type(struct context_entry *context,
 241                                                unsigned long value)
 242{
 243        context->lo &= (((u64)-1) << 4) | 3;
 244        context->lo |= (value & 3) << 2;
 245}
 246
 247static inline void context_set_address_root(struct context_entry *context,
 248                                            unsigned long value)
 249{
 250        context->lo |= value & VTD_PAGE_MASK;
 251}
 252
 253static inline void context_set_address_width(struct context_entry *context,
 254                                             unsigned long value)
 255{
 256        context->hi |= value & 7;
 257}
 258
 259static inline void context_set_domain_id(struct context_entry *context,
 260                                         unsigned long value)
 261{
 262        context->hi |= (value & ((1 << 16) - 1)) << 8;
 263}
 264
 265static inline void context_clear_entry(struct context_entry *context)
 266{
 267        context->lo = 0;
 268        context->hi = 0;
 269}
 270
 271/*
 272 * 0: readable
 273 * 1: writable
 274 * 2-6: reserved
 275 * 7: super page
 276 * 8-10: available
 277 * 11: snoop behavior
 278 * 12-63: Host physcial address
 279 */
 280struct dma_pte {
 281        u64 val;
 282};
 283
 284static inline void dma_clear_pte(struct dma_pte *pte)
 285{
 286        pte->val = 0;
 287}
 288
 289static inline u64 dma_pte_addr(struct dma_pte *pte)
 290{
 291#ifdef CONFIG_64BIT
 292        return pte->val & VTD_PAGE_MASK;
 293#else
 294        /* Must have a full atomic 64-bit read */
 295        return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
 296#endif
 297}
 298
 299static inline bool dma_pte_present(struct dma_pte *pte)
 300{
 301        return (pte->val & 3) != 0;
 302}
 303
 304static inline bool dma_pte_superpage(struct dma_pte *pte)
 305{
 306        return (pte->val & (1 << 7));
 307}
 308
 309static inline int first_pte_in_page(struct dma_pte *pte)
 310{
 311        return !((unsigned long)pte & ~VTD_PAGE_MASK);
 312}
 313
 314/*
 315 * This domain is a statically identity mapping domain.
 316 *      1. This domain creats a static 1:1 mapping to all usable memory.
 317 *      2. It maps to each iommu if successful.
 318 *      3. Each iommu mapps to this domain if successful.
 319 */
 320static struct dmar_domain *si_domain;
 321static int hw_pass_through = 1;
 322
 323/* devices under the same p2p bridge are owned in one domain */
 324#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
 325
 326/* domain represents a virtual machine, more than one devices
 327 * across iommus may be owned in one domain, e.g. kvm guest.
 328 */
 329#define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 1)
 330
 331/* si_domain contains mulitple devices */
 332#define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 2)
 333
 334/* define the limit of IOMMUs supported in each domain */
 335#ifdef  CONFIG_X86
 336# define        IOMMU_UNITS_SUPPORTED   MAX_IO_APICS
 337#else
 338# define        IOMMU_UNITS_SUPPORTED   64
 339#endif
 340
 341struct dmar_domain {
 342        int     id;                     /* domain id */
 343        int     nid;                    /* node id */
 344        DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
 345                                        /* bitmap of iommus this domain uses*/
 346
 347        struct list_head devices;       /* all devices' list */
 348        struct iova_domain iovad;       /* iova's that belong to this domain */
 349
 350        struct dma_pte  *pgd;           /* virtual address */
 351        int             gaw;            /* max guest address width */
 352
 353        /* adjusted guest address width, 0 is level 2 30-bit */
 354        int             agaw;
 355
 356        int             flags;          /* flags to find out type of domain */
 357
 358        int             iommu_coherency;/* indicate coherency of iommu access */
 359        int             iommu_snooping; /* indicate snooping control feature*/
 360        int             iommu_count;    /* reference count of iommu */
 361        int             iommu_superpage;/* Level of superpages supported:
 362                                           0 == 4KiB (no superpages), 1 == 2MiB,
 363                                           2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
 364        spinlock_t      iommu_lock;     /* protect iommu set in domain */
 365        u64             max_addr;       /* maximum mapped address */
 366};
 367
 368/* PCI domain-device relationship */
 369struct device_domain_info {
 370        struct list_head link;  /* link to domain siblings */
 371        struct list_head global; /* link to global list */
 372        u8 bus;                 /* PCI bus number */
 373        u8 devfn;               /* PCI devfn number */
 374        struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
 375        struct intel_iommu *iommu; /* IOMMU used by this device */
 376        struct dmar_domain *domain; /* pointer to domain */
 377};
 378
 379struct dmar_rmrr_unit {
 380        struct list_head list;          /* list of rmrr units   */
 381        struct acpi_dmar_header *hdr;   /* ACPI header          */
 382        u64     base_address;           /* reserved base address*/
 383        u64     end_address;            /* reserved end address */
 384        struct dmar_dev_scope *devices; /* target devices */
 385        int     devices_cnt;            /* target device count */
 386};
 387
 388struct dmar_atsr_unit {
 389        struct list_head list;          /* list of ATSR units */
 390        struct acpi_dmar_header *hdr;   /* ACPI header */
 391        struct dmar_dev_scope *devices; /* target devices */
 392        int devices_cnt;                /* target device count */
 393        u8 include_all:1;               /* include all ports */
 394};
 395
 396static LIST_HEAD(dmar_atsr_units);
 397static LIST_HEAD(dmar_rmrr_units);
 398
 399#define for_each_rmrr_units(rmrr) \
 400        list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 401
 402static void flush_unmaps_timeout(unsigned long data);
 403
 404static DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
 405
 406#define HIGH_WATER_MARK 250
 407struct deferred_flush_tables {
 408        int next;
 409        struct iova *iova[HIGH_WATER_MARK];
 410        struct dmar_domain *domain[HIGH_WATER_MARK];
 411        struct page *freelist[HIGH_WATER_MARK];
 412};
 413
 414static struct deferred_flush_tables *deferred_flush;
 415
 416/* bitmap for indexing intel_iommus */
 417static int g_num_of_iommus;
 418
 419static DEFINE_SPINLOCK(async_umap_flush_lock);
 420static LIST_HEAD(unmaps_to_do);
 421
 422static int timer_on;
 423static long list_size;
 424
 425static void domain_exit(struct dmar_domain *domain);
 426static void domain_remove_dev_info(struct dmar_domain *domain);
 427static void domain_remove_one_dev_info(struct dmar_domain *domain,
 428                                       struct device *dev);
 429static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
 430                                           struct device *dev);
 431
 432#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
 433int dmar_disabled = 0;
 434#else
 435int dmar_disabled = 1;
 436#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
 437
 438int intel_iommu_enabled = 0;
 439EXPORT_SYMBOL_GPL(intel_iommu_enabled);
 440
 441static int dmar_map_gfx = 1;
 442static int dmar_forcedac;
 443static int intel_iommu_strict;
 444static int intel_iommu_superpage = 1;
 445
 446int intel_iommu_gfx_mapped;
 447EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
 448
 449#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
 450static DEFINE_SPINLOCK(device_domain_lock);
 451static LIST_HEAD(device_domain_list);
 452
 453static struct iommu_ops intel_iommu_ops;
 454
 455static int __init intel_iommu_setup(char *str)
 456{
 457        if (!str)
 458                return -EINVAL;
 459        while (*str) {
 460                if (!strncmp(str, "on", 2)) {
 461                        dmar_disabled = 0;
 462                        printk(KERN_INFO "Intel-IOMMU: enabled\n");
 463                } else if (!strncmp(str, "off", 3)) {
 464                        dmar_disabled = 1;
 465                        printk(KERN_INFO "Intel-IOMMU: disabled\n");
 466                } else if (!strncmp(str, "igfx_off", 8)) {
 467                        dmar_map_gfx = 0;
 468                        printk(KERN_INFO
 469                                "Intel-IOMMU: disable GFX device mapping\n");
 470                } else if (!strncmp(str, "forcedac", 8)) {
 471                        printk(KERN_INFO
 472                                "Intel-IOMMU: Forcing DAC for PCI devices\n");
 473                        dmar_forcedac = 1;
 474                } else if (!strncmp(str, "strict", 6)) {
 475                        printk(KERN_INFO
 476                                "Intel-IOMMU: disable batched IOTLB flush\n");
 477                        intel_iommu_strict = 1;
 478                } else if (!strncmp(str, "sp_off", 6)) {
 479                        printk(KERN_INFO
 480                                "Intel-IOMMU: disable supported super page\n");
 481                        intel_iommu_superpage = 0;
 482                }
 483
 484                str += strcspn(str, ",");
 485                while (*str == ',')
 486                        str++;
 487        }
 488        return 0;
 489}
 490__setup("intel_iommu=", intel_iommu_setup);
 491
 492static struct kmem_cache *iommu_domain_cache;
 493static struct kmem_cache *iommu_devinfo_cache;
 494static struct kmem_cache *iommu_iova_cache;
 495
 496static inline void *alloc_pgtable_page(int node)
 497{
 498        struct page *page;
 499        void *vaddr = NULL;
 500
 501        page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
 502        if (page)
 503                vaddr = page_address(page);
 504        return vaddr;
 505}
 506
 507static inline void free_pgtable_page(void *vaddr)
 508{
 509        free_page((unsigned long)vaddr);
 510}
 511
 512static inline void *alloc_domain_mem(void)
 513{
 514        return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
 515}
 516
 517static void free_domain_mem(void *vaddr)
 518{
 519        kmem_cache_free(iommu_domain_cache, vaddr);
 520}
 521
 522static inline void * alloc_devinfo_mem(void)
 523{
 524        return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
 525}
 526
 527static inline void free_devinfo_mem(void *vaddr)
 528{
 529        kmem_cache_free(iommu_devinfo_cache, vaddr);
 530}
 531
 532struct iova *alloc_iova_mem(void)
 533{
 534        return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
 535}
 536
 537void free_iova_mem(struct iova *iova)
 538{
 539        kmem_cache_free(iommu_iova_cache, iova);
 540}
 541
 542
 543static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
 544{
 545        unsigned long sagaw;
 546        int agaw = -1;
 547
 548        sagaw = cap_sagaw(iommu->cap);
 549        for (agaw = width_to_agaw(max_gaw);
 550             agaw >= 0; agaw--) {
 551                if (test_bit(agaw, &sagaw))
 552                        break;
 553        }
 554
 555        return agaw;
 556}
 557
 558/*
 559 * Calculate max SAGAW for each iommu.
 560 */
 561int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 562{
 563        return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
 564}
 565
 566/*
 567 * calculate agaw for each iommu.
 568 * "SAGAW" may be different across iommus, use a default agaw, and
 569 * get a supported less agaw for iommus that don't support the default agaw.
 570 */
 571int iommu_calculate_agaw(struct intel_iommu *iommu)
 572{
 573        return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 574}
 575
 576/* This functionin only returns single iommu in a domain */
 577static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
 578{
 579        int iommu_id;
 580
 581        /* si_domain and vm domain should not get here. */
 582        BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
 583        BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
 584
 585        iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
 586        if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
 587                return NULL;
 588
 589        return g_iommus[iommu_id];
 590}
 591
 592static void domain_update_iommu_coherency(struct dmar_domain *domain)
 593{
 594        struct dmar_drhd_unit *drhd;
 595        struct intel_iommu *iommu;
 596        int i, found = 0;
 597
 598        domain->iommu_coherency = 1;
 599
 600        for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
 601                found = 1;
 602                if (!ecap_coherent(g_iommus[i]->ecap)) {
 603                        domain->iommu_coherency = 0;
 604                        break;
 605                }
 606        }
 607        if (found)
 608                return;
 609
 610        /* No hardware attached; use lowest common denominator */
 611        rcu_read_lock();
 612        for_each_active_iommu(iommu, drhd) {
 613                if (!ecap_coherent(iommu->ecap)) {
 614                        domain->iommu_coherency = 0;
 615                        break;
 616                }
 617        }
 618        rcu_read_unlock();
 619}
 620
 621static void domain_update_iommu_snooping(struct dmar_domain *domain)
 622{
 623        int i;
 624
 625        domain->iommu_snooping = 1;
 626
 627        for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
 628                if (!ecap_sc_support(g_iommus[i]->ecap)) {
 629                        domain->iommu_snooping = 0;
 630                        break;
 631                }
 632        }
 633}
 634
 635static void domain_update_iommu_superpage(struct dmar_domain *domain)
 636{
 637        struct dmar_drhd_unit *drhd;
 638        struct intel_iommu *iommu = NULL;
 639        int mask = 0xf;
 640
 641        if (!intel_iommu_superpage) {
 642                domain->iommu_superpage = 0;
 643                return;
 644        }
 645
 646        /* set iommu_superpage to the smallest common denominator */
 647        rcu_read_lock();
 648        for_each_active_iommu(iommu, drhd) {
 649                mask &= cap_super_page_val(iommu->cap);
 650                if (!mask) {
 651                        break;
 652                }
 653        }
 654        rcu_read_unlock();
 655
 656        domain->iommu_superpage = fls(mask);
 657}
 658
 659/* Some capabilities may be different across iommus */
 660static void domain_update_iommu_cap(struct dmar_domain *domain)
 661{
 662        domain_update_iommu_coherency(domain);
 663        domain_update_iommu_snooping(domain);
 664        domain_update_iommu_superpage(domain);
 665}
 666
 667static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
 668{
 669        struct dmar_drhd_unit *drhd = NULL;
 670        struct intel_iommu *iommu;
 671        struct device *tmp;
 672        struct pci_dev *ptmp, *pdev = NULL;
 673        u16 segment;
 674        int i;
 675
 676        if (dev_is_pci(dev)) {
 677                pdev = to_pci_dev(dev);
 678                segment = pci_domain_nr(pdev->bus);
 679        } else if (ACPI_COMPANION(dev))
 680                dev = &ACPI_COMPANION(dev)->dev;
 681
 682        rcu_read_lock();
 683        for_each_active_iommu(iommu, drhd) {
 684                if (pdev && segment != drhd->segment)
 685                        continue;
 686
 687                for_each_active_dev_scope(drhd->devices,
 688                                          drhd->devices_cnt, i, tmp) {
 689                        if (tmp == dev) {
 690                                *bus = drhd->devices[i].bus;
 691                                *devfn = drhd->devices[i].devfn;
 692                                goto out;
 693                        }
 694
 695                        if (!pdev || !dev_is_pci(tmp))
 696                                continue;
 697
 698                        ptmp = to_pci_dev(tmp);
 699                        if (ptmp->subordinate &&
 700                            ptmp->subordinate->number <= pdev->bus->number &&
 701                            ptmp->subordinate->busn_res.end >= pdev->bus->number)
 702                                goto got_pdev;
 703                }
 704
 705                if (pdev && drhd->include_all) {
 706                got_pdev:
 707                        *bus = pdev->bus->number;
 708                        *devfn = pdev->devfn;
 709                        goto out;
 710                }
 711        }
 712        iommu = NULL;
 713 out:
 714        rcu_read_unlock();
 715
 716        return iommu;
 717}
 718
 719static void domain_flush_cache(struct dmar_domain *domain,
 720                               void *addr, int size)
 721{
 722        if (!domain->iommu_coherency)
 723                clflush_cache_range(addr, size);
 724}
 725
 726/* Gets context entry for a given bus and devfn */
 727static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
 728                u8 bus, u8 devfn)
 729{
 730        struct root_entry *root;
 731        struct context_entry *context;
 732        unsigned long phy_addr;
 733        unsigned long flags;
 734
 735        spin_lock_irqsave(&iommu->lock, flags);
 736        root = &iommu->root_entry[bus];
 737        context = get_context_addr_from_root(root);
 738        if (!context) {
 739                context = (struct context_entry *)
 740                                alloc_pgtable_page(iommu->node);
 741                if (!context) {
 742                        spin_unlock_irqrestore(&iommu->lock, flags);
 743                        return NULL;
 744                }
 745                __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
 746                phy_addr = virt_to_phys((void *)context);
 747                set_root_value(root, phy_addr);
 748                set_root_present(root);
 749                __iommu_flush_cache(iommu, root, sizeof(*root));
 750        }
 751        spin_unlock_irqrestore(&iommu->lock, flags);
 752        return &context[devfn];
 753}
 754
 755static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
 756{
 757        struct root_entry *root;
 758        struct context_entry *context;
 759        int ret;
 760        unsigned long flags;
 761
 762        spin_lock_irqsave(&iommu->lock, flags);
 763        root = &iommu->root_entry[bus];
 764        context = get_context_addr_from_root(root);
 765        if (!context) {
 766                ret = 0;
 767                goto out;
 768        }
 769        ret = context_present(&context[devfn]);
 770out:
 771        spin_unlock_irqrestore(&iommu->lock, flags);
 772        return ret;
 773}
 774
 775static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
 776{
 777        struct root_entry *root;
 778        struct context_entry *context;
 779        unsigned long flags;
 780
 781        spin_lock_irqsave(&iommu->lock, flags);
 782        root = &iommu->root_entry[bus];
 783        context = get_context_addr_from_root(root);
 784        if (context) {
 785                context_clear_entry(&context[devfn]);
 786                __iommu_flush_cache(iommu, &context[devfn], \
 787                        sizeof(*context));
 788        }
 789        spin_unlock_irqrestore(&iommu->lock, flags);
 790}
 791
 792static void free_context_table(struct intel_iommu *iommu)
 793{
 794        struct root_entry *root;
 795        int i;
 796        unsigned long flags;
 797        struct context_entry *context;
 798
 799        spin_lock_irqsave(&iommu->lock, flags);
 800        if (!iommu->root_entry) {
 801                goto out;
 802        }
 803        for (i = 0; i < ROOT_ENTRY_NR; i++) {
 804                root = &iommu->root_entry[i];
 805                context = get_context_addr_from_root(root);
 806                if (context)
 807                        free_pgtable_page(context);
 808        }
 809        free_pgtable_page(iommu->root_entry);
 810        iommu->root_entry = NULL;
 811out:
 812        spin_unlock_irqrestore(&iommu->lock, flags);
 813}
 814
 815static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 816                                      unsigned long pfn, int *target_level)
 817{
 818        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 819        struct dma_pte *parent, *pte = NULL;
 820        int level = agaw_to_level(domain->agaw);
 821        int offset;
 822
 823        BUG_ON(!domain->pgd);
 824
 825        if (addr_width < BITS_PER_LONG && pfn >> addr_width)
 826                /* Address beyond IOMMU's addressing capabilities. */
 827                return NULL;
 828
 829        parent = domain->pgd;
 830
 831        while (1) {
 832                void *tmp_page;
 833
 834                offset = pfn_level_offset(pfn, level);
 835                pte = &parent[offset];
 836                if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
 837                        break;
 838                if (level == *target_level)
 839                        break;
 840
 841                if (!dma_pte_present(pte)) {
 842                        uint64_t pteval;
 843
 844                        tmp_page = alloc_pgtable_page(domain->nid);
 845
 846                        if (!tmp_page)
 847                                return NULL;
 848
 849                        domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
 850                        pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
 851                        if (cmpxchg64(&pte->val, 0ULL, pteval)) {
 852                                /* Someone else set it while we were thinking; use theirs. */
 853                                free_pgtable_page(tmp_page);
 854                        } else {
 855                                dma_pte_addr(pte);
 856                                domain_flush_cache(domain, pte, sizeof(*pte));
 857                        }
 858                }
 859                if (level == 1)
 860                        break;
 861
 862                parent = phys_to_virt(dma_pte_addr(pte));
 863                level--;
 864        }
 865
 866        if (!*target_level)
 867                *target_level = level;
 868
 869        return pte;
 870}
 871
 872
 873/* return address's pte at specific level */
 874static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
 875                                         unsigned long pfn,
 876                                         int level, int *large_page)
 877{
 878        struct dma_pte *parent, *pte = NULL;
 879        int total = agaw_to_level(domain->agaw);
 880        int offset;
 881
 882        parent = domain->pgd;
 883        while (level <= total) {
 884                offset = pfn_level_offset(pfn, total);
 885                pte = &parent[offset];
 886                if (level == total)
 887                        return pte;
 888
 889                if (!dma_pte_present(pte)) {
 890                        *large_page = total;
 891                        break;
 892                }
 893
 894                if (pte->val & DMA_PTE_LARGE_PAGE) {
 895                        *large_page = total;
 896                        return pte;
 897                }
 898
 899                parent = phys_to_virt(dma_pte_addr(pte));
 900                total--;
 901        }
 902        return NULL;
 903}
 904
 905/* clear last level pte, a tlb flush should be followed */
 906static void dma_pte_clear_range(struct dmar_domain *domain,
 907                                unsigned long start_pfn,
 908                                unsigned long last_pfn)
 909{
 910        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 911        unsigned int large_page = 1;
 912        struct dma_pte *first_pte, *pte;
 913
 914        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 915        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 916        BUG_ON(start_pfn > last_pfn);
 917
 918        /* we don't need lock here; nobody else touches the iova range */
 919        do {
 920                large_page = 1;
 921                first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
 922                if (!pte) {
 923                        start_pfn = align_to_level(start_pfn + 1, large_page + 1);
 924                        continue;
 925                }
 926                do {
 927                        dma_clear_pte(pte);
 928                        start_pfn += lvl_to_nr_pages(large_page);
 929                        pte++;
 930                } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
 931
 932                domain_flush_cache(domain, first_pte,
 933                                   (void *)pte - (void *)first_pte);
 934
 935        } while (start_pfn && start_pfn <= last_pfn);
 936}
 937
 938static void dma_pte_free_level(struct dmar_domain *domain, int level,
 939                               struct dma_pte *pte, unsigned long pfn,
 940                               unsigned long start_pfn, unsigned long last_pfn)
 941{
 942        pfn = max(start_pfn, pfn);
 943        pte = &pte[pfn_level_offset(pfn, level)];
 944
 945        do {
 946                unsigned long level_pfn;
 947                struct dma_pte *level_pte;
 948
 949                if (!dma_pte_present(pte) || dma_pte_superpage(pte))
 950                        goto next;
 951
 952                level_pfn = pfn & level_mask(level - 1);
 953                level_pte = phys_to_virt(dma_pte_addr(pte));
 954
 955                if (level > 2)
 956                        dma_pte_free_level(domain, level - 1, level_pte,
 957                                           level_pfn, start_pfn, last_pfn);
 958
 959                /* If range covers entire pagetable, free it */
 960                if (!(start_pfn > level_pfn ||
 961                      last_pfn < level_pfn + level_size(level) - 1)) {
 962                        dma_clear_pte(pte);
 963                        domain_flush_cache(domain, pte, sizeof(*pte));
 964                        free_pgtable_page(level_pte);
 965                }
 966next:
 967                pfn += level_size(level);
 968        } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
 969}
 970
 971/* free page table pages. last level pte should already be cleared */
 972static void dma_pte_free_pagetable(struct dmar_domain *domain,
 973                                   unsigned long start_pfn,
 974                                   unsigned long last_pfn)
 975{
 976        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
 977
 978        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
 979        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
 980        BUG_ON(start_pfn > last_pfn);
 981
 982        /* We don't need lock here; nobody else touches the iova range */
 983        dma_pte_free_level(domain, agaw_to_level(domain->agaw),
 984                           domain->pgd, 0, start_pfn, last_pfn);
 985
 986        /* free pgd */
 987        if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
 988                free_pgtable_page(domain->pgd);
 989                domain->pgd = NULL;
 990        }
 991}
 992
 993/* When a page at a given level is being unlinked from its parent, we don't
 994   need to *modify* it at all. All we need to do is make a list of all the
 995   pages which can be freed just as soon as we've flushed the IOTLB and we
 996   know the hardware page-walk will no longer touch them.
 997   The 'pte' argument is the *parent* PTE, pointing to the page that is to
 998   be freed. */
 999static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1000                                            int level, struct dma_pte *pte,
1001                                            struct page *freelist)
1002{
1003        struct page *pg;
1004
1005        pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1006        pg->freelist = freelist;
1007        freelist = pg;
1008
1009        if (level == 1)
1010                return freelist;
1011
1012        pte = page_address(pg);
1013        do {
1014                if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1015                        freelist = dma_pte_list_pagetables(domain, level - 1,
1016                                                           pte, freelist);
1017                pte++;
1018        } while (!first_pte_in_page(pte));
1019
1020        return freelist;
1021}
1022
1023static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1024                                        struct dma_pte *pte, unsigned long pfn,
1025                                        unsigned long start_pfn,
1026                                        unsigned long last_pfn,
1027                                        struct page *freelist)
1028{
1029        struct dma_pte *first_pte = NULL, *last_pte = NULL;
1030
1031        pfn = max(start_pfn, pfn);
1032        pte = &pte[pfn_level_offset(pfn, level)];
1033
1034        do {
1035                unsigned long level_pfn;
1036
1037                if (!dma_pte_present(pte))
1038                        goto next;
1039
1040                level_pfn = pfn & level_mask(level);
1041
1042                /* If range covers entire pagetable, free it */
1043                if (start_pfn <= level_pfn &&
1044                    last_pfn >= level_pfn + level_size(level) - 1) {
1045                        /* These suborbinate page tables are going away entirely. Don't
1046                           bother to clear them; we're just going to *free* them. */
1047                        if (level > 1 && !dma_pte_superpage(pte))
1048                                freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1049
1050                        dma_clear_pte(pte);
1051                        if (!first_pte)
1052                                first_pte = pte;
1053                        last_pte = pte;
1054                } else if (level > 1) {
1055                        /* Recurse down into a level that isn't *entirely* obsolete */
1056                        freelist = dma_pte_clear_level(domain, level - 1,
1057                                                       phys_to_virt(dma_pte_addr(pte)),
1058                                                       level_pfn, start_pfn, last_pfn,
1059                                                       freelist);
1060                }
1061next:
1062                pfn += level_size(level);
1063        } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1064
1065        if (first_pte)
1066                domain_flush_cache(domain, first_pte,
1067                                   (void *)++last_pte - (void *)first_pte);
1068
1069        return freelist;
1070}
1071
1072/* We can't just free the pages because the IOMMU may still be walking
1073   the page tables, and may have cached the intermediate levels. The
1074   pages can only be freed after the IOTLB flush has been done. */
1075struct page *domain_unmap(struct dmar_domain *domain,
1076                          unsigned long start_pfn,
1077                          unsigned long last_pfn)
1078{
1079        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1080        struct page *freelist = NULL;
1081
1082        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1083        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1084        BUG_ON(start_pfn > last_pfn);
1085
1086        /* we don't need lock here; nobody else touches the iova range */
1087        freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1088                                       domain->pgd, 0, start_pfn, last_pfn, NULL);
1089
1090        /* free pgd */
1091        if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1092                struct page *pgd_page = virt_to_page(domain->pgd);
1093                pgd_page->freelist = freelist;
1094                freelist = pgd_page;
1095
1096                domain->pgd = NULL;
1097        }
1098
1099        return freelist;
1100}
1101
1102void dma_free_pagelist(struct page *freelist)
1103{
1104        struct page *pg;
1105
1106        while ((pg = freelist)) {
1107                freelist = pg->freelist;
1108                free_pgtable_page(page_address(pg));
1109        }
1110}
1111
1112/* iommu handling */
1113static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1114{
1115        struct root_entry *root;
1116        unsigned long flags;
1117
1118        root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1119        if (!root)
1120                return -ENOMEM;
1121
1122        __iommu_flush_cache(iommu, root, ROOT_SIZE);
1123
1124        spin_lock_irqsave(&iommu->lock, flags);
1125        iommu->root_entry = root;
1126        spin_unlock_irqrestore(&iommu->lock, flags);
1127
1128        return 0;
1129}
1130
1131static void iommu_set_root_entry(struct intel_iommu *iommu)
1132{
1133        void *addr;
1134        u32 sts;
1135        unsigned long flag;
1136
1137        addr = iommu->root_entry;
1138
1139        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1140        dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1141
1142        writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1143
1144        /* Make sure hardware complete it */
1145        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1146                      readl, (sts & DMA_GSTS_RTPS), sts);
1147
1148        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1149}
1150
1151static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1152{
1153        u32 val;
1154        unsigned long flag;
1155
1156        if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1157                return;
1158
1159        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1160        writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1161
1162        /* Make sure hardware complete it */
1163        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1164                      readl, (!(val & DMA_GSTS_WBFS)), val);
1165
1166        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1167}
1168
1169/* return value determine if we need a write buffer flush */
1170static void __iommu_flush_context(struct intel_iommu *iommu,
1171                                  u16 did, u16 source_id, u8 function_mask,
1172                                  u64 type)
1173{
1174        u64 val = 0;
1175        unsigned long flag;
1176
1177        switch (type) {
1178        case DMA_CCMD_GLOBAL_INVL:
1179                val = DMA_CCMD_GLOBAL_INVL;
1180                break;
1181        case DMA_CCMD_DOMAIN_INVL:
1182                val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1183                break;
1184        case DMA_CCMD_DEVICE_INVL:
1185                val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1186                        | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1187                break;
1188        default:
1189                BUG();
1190        }
1191        val |= DMA_CCMD_ICC;
1192
1193        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1194        dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1195
1196        /* Make sure hardware complete it */
1197        IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1198                dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1199
1200        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1201}
1202
1203/* return value determine if we need a write buffer flush */
1204static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1205                                u64 addr, unsigned int size_order, u64 type)
1206{
1207        int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1208        u64 val = 0, val_iva = 0;
1209        unsigned long flag;
1210
1211        switch (type) {
1212        case DMA_TLB_GLOBAL_FLUSH:
1213                /* global flush doesn't need set IVA_REG */
1214                val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1215                break;
1216        case DMA_TLB_DSI_FLUSH:
1217                val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1218                break;
1219        case DMA_TLB_PSI_FLUSH:
1220                val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1221                /* IH bit is passed in as part of address */
1222                val_iva = size_order | addr;
1223                break;
1224        default:
1225                BUG();
1226        }
1227        /* Note: set drain read/write */
1228#if 0
1229        /*
1230         * This is probably to be super secure.. Looks like we can
1231         * ignore it without any impact.
1232         */
1233        if (cap_read_drain(iommu->cap))
1234                val |= DMA_TLB_READ_DRAIN;
1235#endif
1236        if (cap_write_drain(iommu->cap))
1237                val |= DMA_TLB_WRITE_DRAIN;
1238
1239        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1240        /* Note: Only uses first TLB reg currently */
1241        if (val_iva)
1242                dmar_writeq(iommu->reg + tlb_offset, val_iva);
1243        dmar_writeq(iommu->reg + tlb_offset + 8, val);
1244
1245        /* Make sure hardware complete it */
1246        IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1247                dmar_readq, (!(val & DMA_TLB_IVT)), val);
1248
1249        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1250
1251        /* check IOTLB invalidation granularity */
1252        if (DMA_TLB_IAIG(val) == 0)
1253                printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1254        if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1255                pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1256                        (unsigned long long)DMA_TLB_IIRG(type),
1257                        (unsigned long long)DMA_TLB_IAIG(val));
1258}
1259
1260static struct device_domain_info *
1261iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1262                         u8 bus, u8 devfn)
1263{
1264        int found = 0;
1265        unsigned long flags;
1266        struct device_domain_info *info;
1267        struct pci_dev *pdev;
1268
1269        if (!ecap_dev_iotlb_support(iommu->ecap))
1270                return NULL;
1271
1272        if (!iommu->qi)
1273                return NULL;
1274
1275        spin_lock_irqsave(&device_domain_lock, flags);
1276        list_for_each_entry(info, &domain->devices, link)
1277                if (info->bus == bus && info->devfn == devfn) {
1278                        found = 1;
1279                        break;
1280                }
1281        spin_unlock_irqrestore(&device_domain_lock, flags);
1282
1283        if (!found || !info->dev || !dev_is_pci(info->dev))
1284                return NULL;
1285
1286        pdev = to_pci_dev(info->dev);
1287
1288        if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1289                return NULL;
1290
1291        if (!dmar_find_matched_atsr_unit(pdev))
1292                return NULL;
1293
1294        return info;
1295}
1296
1297static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1298{
1299        if (!info || !dev_is_pci(info->dev))
1300                return;
1301
1302        pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1303}
1304
1305static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1306{
1307        if (!info->dev || !dev_is_pci(info->dev) ||
1308            !pci_ats_enabled(to_pci_dev(info->dev)))
1309                return;
1310
1311        pci_disable_ats(to_pci_dev(info->dev));
1312}
1313
1314static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1315                                  u64 addr, unsigned mask)
1316{
1317        u16 sid, qdep;
1318        unsigned long flags;
1319        struct device_domain_info *info;
1320
1321        spin_lock_irqsave(&device_domain_lock, flags);
1322        list_for_each_entry(info, &domain->devices, link) {
1323                struct pci_dev *pdev;
1324                if (!info->dev || !dev_is_pci(info->dev))
1325                        continue;
1326
1327                pdev = to_pci_dev(info->dev);
1328                if (!pci_ats_enabled(pdev))
1329                        continue;
1330
1331                sid = info->bus << 8 | info->devfn;
1332                qdep = pci_ats_queue_depth(pdev);
1333                qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1334        }
1335        spin_unlock_irqrestore(&device_domain_lock, flags);
1336}
1337
1338static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1339                                  unsigned long pfn, unsigned int pages, int ih, int map)
1340{
1341        unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1342        uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1343
1344        BUG_ON(pages == 0);
1345
1346        if (ih)
1347                ih = 1 << 6;
1348        /*
1349         * Fallback to domain selective flush if no PSI support or the size is
1350         * too big.
1351         * PSI requires page size to be 2 ^ x, and the base address is naturally
1352         * aligned to the size
1353         */
1354        if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1355                iommu->flush.flush_iotlb(iommu, did, 0, 0,
1356                                                DMA_TLB_DSI_FLUSH);
1357        else
1358                iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1359                                                DMA_TLB_PSI_FLUSH);
1360
1361        /*
1362         * In caching mode, changes of pages from non-present to present require
1363         * flush. However, device IOTLB doesn't need to be flushed in this case.
1364         */
1365        if (!cap_caching_mode(iommu->cap) || !map)
1366                iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1367}
1368
1369static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1370{
1371        u32 pmen;
1372        unsigned long flags;
1373
1374        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1375        pmen = readl(iommu->reg + DMAR_PMEN_REG);
1376        pmen &= ~DMA_PMEN_EPM;
1377        writel(pmen, iommu->reg + DMAR_PMEN_REG);
1378
1379        /* wait for the protected region status bit to clear */
1380        IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1381                readl, !(pmen & DMA_PMEN_PRS), pmen);
1382
1383        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1384}
1385
1386static int iommu_enable_translation(struct intel_iommu *iommu)
1387{
1388        u32 sts;
1389        unsigned long flags;
1390
1391        raw_spin_lock_irqsave(&iommu->register_lock, flags);
1392        iommu->gcmd |= DMA_GCMD_TE;
1393        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1394
1395        /* Make sure hardware complete it */
1396        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1397                      readl, (sts & DMA_GSTS_TES), sts);
1398
1399        raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1400        return 0;
1401}
1402
1403static int iommu_disable_translation(struct intel_iommu *iommu)
1404{
1405        u32 sts;
1406        unsigned long flag;
1407
1408        raw_spin_lock_irqsave(&iommu->register_lock, flag);
1409        iommu->gcmd &= ~DMA_GCMD_TE;
1410        writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1411
1412        /* Make sure hardware complete it */
1413        IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1414                      readl, (!(sts & DMA_GSTS_TES)), sts);
1415
1416        raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1417        return 0;
1418}
1419
1420
1421static int iommu_init_domains(struct intel_iommu *iommu)
1422{
1423        unsigned long ndomains;
1424        unsigned long nlongs;
1425
1426        ndomains = cap_ndoms(iommu->cap);
1427        pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1428                 iommu->seq_id, ndomains);
1429        nlongs = BITS_TO_LONGS(ndomains);
1430
1431        spin_lock_init(&iommu->lock);
1432
1433        /* TBD: there might be 64K domains,
1434         * consider other allocation for future chip
1435         */
1436        iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1437        if (!iommu->domain_ids) {
1438                pr_err("IOMMU%d: allocating domain id array failed\n",
1439                       iommu->seq_id);
1440                return -ENOMEM;
1441        }
1442        iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1443                        GFP_KERNEL);
1444        if (!iommu->domains) {
1445                pr_err("IOMMU%d: allocating domain array failed\n",
1446                       iommu->seq_id);
1447                kfree(iommu->domain_ids);
1448                iommu->domain_ids = NULL;
1449                return -ENOMEM;
1450        }
1451
1452        /*
1453         * if Caching mode is set, then invalid translations are tagged
1454         * with domainid 0. Hence we need to pre-allocate it.
1455         */
1456        if (cap_caching_mode(iommu->cap))
1457                set_bit(0, iommu->domain_ids);
1458        return 0;
1459}
1460
1461static void free_dmar_iommu(struct intel_iommu *iommu)
1462{
1463        struct dmar_domain *domain;
1464        int i, count;
1465        unsigned long flags;
1466
1467        if ((iommu->domains) && (iommu->domain_ids)) {
1468                for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1469                        /*
1470                         * Domain id 0 is reserved for invalid translation
1471                         * if hardware supports caching mode.
1472                         */
1473                        if (cap_caching_mode(iommu->cap) && i == 0)
1474                                continue;
1475
1476                        domain = iommu->domains[i];
1477                        clear_bit(i, iommu->domain_ids);
1478
1479                        spin_lock_irqsave(&domain->iommu_lock, flags);
1480                        count = --domain->iommu_count;
1481                        spin_unlock_irqrestore(&domain->iommu_lock, flags);
1482                        if (count == 0)
1483                                domain_exit(domain);
1484                }
1485        }
1486
1487        if (iommu->gcmd & DMA_GCMD_TE)
1488                iommu_disable_translation(iommu);
1489
1490        kfree(iommu->domains);
1491        kfree(iommu->domain_ids);
1492        iommu->domains = NULL;
1493        iommu->domain_ids = NULL;
1494
1495        g_iommus[iommu->seq_id] = NULL;
1496
1497        /* free context mapping */
1498        free_context_table(iommu);
1499}
1500
1501static struct dmar_domain *alloc_domain(bool vm)
1502{
1503        /* domain id for virtual machine, it won't be set in context */
1504        static atomic_t vm_domid = ATOMIC_INIT(0);
1505        struct dmar_domain *domain;
1506
1507        domain = alloc_domain_mem();
1508        if (!domain)
1509                return NULL;
1510
1511        domain->nid = -1;
1512        domain->iommu_count = 0;
1513        memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1514        domain->flags = 0;
1515        spin_lock_init(&domain->iommu_lock);
1516        INIT_LIST_HEAD(&domain->devices);
1517        if (vm) {
1518                domain->id = atomic_inc_return(&vm_domid);
1519                domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
1520        }
1521
1522        return domain;
1523}
1524
1525static int iommu_attach_domain(struct dmar_domain *domain,
1526                               struct intel_iommu *iommu)
1527{
1528        int num;
1529        unsigned long ndomains;
1530        unsigned long flags;
1531
1532        ndomains = cap_ndoms(iommu->cap);
1533
1534        spin_lock_irqsave(&iommu->lock, flags);
1535
1536        num = find_first_zero_bit(iommu->domain_ids, ndomains);
1537        if (num >= ndomains) {
1538                spin_unlock_irqrestore(&iommu->lock, flags);
1539                printk(KERN_ERR "IOMMU: no free domain ids\n");
1540                return -ENOMEM;
1541        }
1542
1543        domain->id = num;
1544        domain->iommu_count++;
1545        set_bit(num, iommu->domain_ids);
1546        set_bit(iommu->seq_id, domain->iommu_bmp);
1547        iommu->domains[num] = domain;
1548        spin_unlock_irqrestore(&iommu->lock, flags);
1549
1550        return 0;
1551}
1552
1553static void iommu_detach_domain(struct dmar_domain *domain,
1554                                struct intel_iommu *iommu)
1555{
1556        unsigned long flags;
1557        int num, ndomains;
1558
1559        spin_lock_irqsave(&iommu->lock, flags);
1560        ndomains = cap_ndoms(iommu->cap);
1561        for_each_set_bit(num, iommu->domain_ids, ndomains) {
1562                if (iommu->domains[num] == domain) {
1563                        clear_bit(num, iommu->domain_ids);
1564                        iommu->domains[num] = NULL;
1565                        break;
1566                }
1567        }
1568        spin_unlock_irqrestore(&iommu->lock, flags);
1569}
1570
1571static struct iova_domain reserved_iova_list;
1572static struct lock_class_key reserved_rbtree_key;
1573
1574static int dmar_init_reserved_ranges(void)
1575{
1576        struct pci_dev *pdev = NULL;
1577        struct iova *iova;
1578        int i;
1579
1580        init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1581
1582        lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1583                &reserved_rbtree_key);
1584
1585        /* IOAPIC ranges shouldn't be accessed by DMA */
1586        iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1587                IOVA_PFN(IOAPIC_RANGE_END));
1588        if (!iova) {
1589                printk(KERN_ERR "Reserve IOAPIC range failed\n");
1590                return -ENODEV;
1591        }
1592
1593        /* Reserve all PCI MMIO to avoid peer-to-peer access */
1594        for_each_pci_dev(pdev) {
1595                struct resource *r;
1596
1597                for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1598                        r = &pdev->resource[i];
1599                        if (!r->flags || !(r->flags & IORESOURCE_MEM))
1600                                continue;
1601                        iova = reserve_iova(&reserved_iova_list,
1602                                            IOVA_PFN(r->start),
1603                                            IOVA_PFN(r->end));
1604                        if (!iova) {
1605                                printk(KERN_ERR "Reserve iova failed\n");
1606                                return -ENODEV;
1607                        }
1608                }
1609        }
1610        return 0;
1611}
1612
1613static void domain_reserve_special_ranges(struct dmar_domain *domain)
1614{
1615        copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1616}
1617
1618static inline int guestwidth_to_adjustwidth(int gaw)
1619{
1620        int agaw;
1621        int r = (gaw - 12) % 9;
1622
1623        if (r == 0)
1624                agaw = gaw;
1625        else
1626                agaw = gaw + 9 - r;
1627        if (agaw > 64)
1628                agaw = 64;
1629        return agaw;
1630}
1631
1632static int domain_init(struct dmar_domain *domain, int guest_width)
1633{
1634        struct intel_iommu *iommu;
1635        int adjust_width, agaw;
1636        unsigned long sagaw;
1637
1638        init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1639        domain_reserve_special_ranges(domain);
1640
1641        /* calculate AGAW */
1642        iommu = domain_get_iommu(domain);
1643        if (guest_width > cap_mgaw(iommu->cap))
1644                guest_width = cap_mgaw(iommu->cap);
1645        domain->gaw = guest_width;
1646        adjust_width = guestwidth_to_adjustwidth(guest_width);
1647        agaw = width_to_agaw(adjust_width);
1648        sagaw = cap_sagaw(iommu->cap);
1649        if (!test_bit(agaw, &sagaw)) {
1650                /* hardware doesn't support it, choose a bigger one */
1651                pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1652                agaw = find_next_bit(&sagaw, 5, agaw);
1653                if (agaw >= 5)
1654                        return -ENODEV;
1655        }
1656        domain->agaw = agaw;
1657
1658        if (ecap_coherent(iommu->ecap))
1659                domain->iommu_coherency = 1;
1660        else
1661                domain->iommu_coherency = 0;
1662
1663        if (ecap_sc_support(iommu->ecap))
1664                domain->iommu_snooping = 1;
1665        else
1666                domain->iommu_snooping = 0;
1667
1668        if (intel_iommu_superpage)
1669                domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1670        else
1671                domain->iommu_superpage = 0;
1672
1673        domain->nid = iommu->node;
1674
1675        /* always allocate the top pgd */
1676        domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1677        if (!domain->pgd)
1678                return -ENOMEM;
1679        __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1680        return 0;
1681}
1682
1683static void domain_exit(struct dmar_domain *domain)
1684{
1685        struct dmar_drhd_unit *drhd;
1686        struct intel_iommu *iommu;
1687        struct page *freelist = NULL;
1688
1689        /* Domain 0 is reserved, so dont process it */
1690        if (!domain)
1691                return;
1692
1693        /* Flush any lazy unmaps that may reference this domain */
1694        if (!intel_iommu_strict)
1695                flush_unmaps_timeout(0);
1696
1697        /* remove associated devices */
1698        domain_remove_dev_info(domain);
1699
1700        /* destroy iovas */
1701        put_iova_domain(&domain->iovad);
1702
1703        freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1704
1705        /* clear attached or cached domains */
1706        rcu_read_lock();
1707        for_each_active_iommu(iommu, drhd)
1708                if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1709                    test_bit(iommu->seq_id, domain->iommu_bmp))
1710                        iommu_detach_domain(domain, iommu);
1711        rcu_read_unlock();
1712
1713        dma_free_pagelist(freelist);
1714
1715        free_domain_mem(domain);
1716}
1717
1718static int domain_context_mapping_one(struct dmar_domain *domain,
1719                                      struct intel_iommu *iommu,
1720                                      u8 bus, u8 devfn, int translation)
1721{
1722        struct context_entry *context;
1723        unsigned long flags;
1724        struct dma_pte *pgd;
1725        unsigned long num;
1726        unsigned long ndomains;
1727        int id;
1728        int agaw;
1729        struct device_domain_info *info = NULL;
1730
1731        pr_debug("Set context mapping for %02x:%02x.%d\n",
1732                bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1733
1734        BUG_ON(!domain->pgd);
1735        BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1736               translation != CONTEXT_TT_MULTI_LEVEL);
1737
1738        context = device_to_context_entry(iommu, bus, devfn);
1739        if (!context)
1740                return -ENOMEM;
1741        spin_lock_irqsave(&iommu->lock, flags);
1742        if (context_present(context)) {
1743                spin_unlock_irqrestore(&iommu->lock, flags);
1744                return 0;
1745        }
1746
1747        id = domain->id;
1748        pgd = domain->pgd;
1749
1750        if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1751            domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1752                int found = 0;
1753
1754                /* find an available domain id for this device in iommu */
1755                ndomains = cap_ndoms(iommu->cap);
1756                for_each_set_bit(num, iommu->domain_ids, ndomains) {
1757                        if (iommu->domains[num] == domain) {
1758                                id = num;
1759                                found = 1;
1760                                break;
1761                        }
1762                }
1763
1764                if (found == 0) {
1765                        num = find_first_zero_bit(iommu->domain_ids, ndomains);
1766                        if (num >= ndomains) {
1767                                spin_unlock_irqrestore(&iommu->lock, flags);
1768                                printk(KERN_ERR "IOMMU: no free domain ids\n");
1769                                return -EFAULT;
1770                        }
1771
1772                        set_bit(num, iommu->domain_ids);
1773                        iommu->domains[num] = domain;
1774                        id = num;
1775                }
1776
1777                /* Skip top levels of page tables for
1778                 * iommu which has less agaw than default.
1779                 * Unnecessary for PT mode.
1780                 */
1781                if (translation != CONTEXT_TT_PASS_THROUGH) {
1782                        for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1783                                pgd = phys_to_virt(dma_pte_addr(pgd));
1784                                if (!dma_pte_present(pgd)) {
1785                                        spin_unlock_irqrestore(&iommu->lock, flags);
1786                                        return -ENOMEM;
1787                                }
1788                        }
1789                }
1790        }
1791
1792        context_set_domain_id(context, id);
1793
1794        if (translation != CONTEXT_TT_PASS_THROUGH) {
1795                info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1796                translation = info ? CONTEXT_TT_DEV_IOTLB :
1797                                     CONTEXT_TT_MULTI_LEVEL;
1798        }
1799        /*
1800         * In pass through mode, AW must be programmed to indicate the largest
1801         * AGAW value supported by hardware. And ASR is ignored by hardware.
1802         */
1803        if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1804                context_set_address_width(context, iommu->msagaw);
1805        else {
1806                context_set_address_root(context, virt_to_phys(pgd));
1807                context_set_address_width(context, iommu->agaw);
1808        }
1809
1810        context_set_translation_type(context, translation);
1811        context_set_fault_enable(context);
1812        context_set_present(context);
1813        domain_flush_cache(domain, context, sizeof(*context));
1814
1815        /*
1816         * It's a non-present to present mapping. If hardware doesn't cache
1817         * non-present entry we only need to flush the write-buffer. If the
1818         * _does_ cache non-present entries, then it does so in the special
1819         * domain #0, which we have to flush:
1820         */
1821        if (cap_caching_mode(iommu->cap)) {
1822                iommu->flush.flush_context(iommu, 0,
1823                                           (((u16)bus) << 8) | devfn,
1824                                           DMA_CCMD_MASK_NOBIT,
1825                                           DMA_CCMD_DEVICE_INVL);
1826                iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1827        } else {
1828                iommu_flush_write_buffer(iommu);
1829        }
1830        iommu_enable_dev_iotlb(info);
1831        spin_unlock_irqrestore(&iommu->lock, flags);
1832
1833        spin_lock_irqsave(&domain->iommu_lock, flags);
1834        if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1835                domain->iommu_count++;
1836                if (domain->iommu_count == 1)
1837                        domain->nid = iommu->node;
1838                domain_update_iommu_cap(domain);
1839        }
1840        spin_unlock_irqrestore(&domain->iommu_lock, flags);
1841        return 0;
1842}
1843
1844static int
1845domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1846                       int translation)
1847{
1848        int ret;
1849        struct pci_dev *pdev, *tmp, *parent;
1850        struct intel_iommu *iommu;
1851        u8 bus, devfn;
1852
1853        iommu = device_to_iommu(dev, &bus, &devfn);
1854        if (!iommu)
1855                return -ENODEV;
1856
1857        ret = domain_context_mapping_one(domain, iommu, bus, devfn,
1858                                         translation);
1859        if (ret || !dev_is_pci(dev))
1860                return ret;
1861
1862        /* dependent device mapping */
1863        pdev = to_pci_dev(dev);
1864        tmp = pci_find_upstream_pcie_bridge(pdev);
1865        if (!tmp)
1866                return 0;
1867        /* Secondary interface's bus number and devfn 0 */
1868        parent = pdev->bus->self;
1869        while (parent != tmp) {
1870                ret = domain_context_mapping_one(domain, iommu,
1871                                                 parent->bus->number,
1872                                                 parent->devfn, translation);
1873                if (ret)
1874                        return ret;
1875                parent = parent->bus->self;
1876        }
1877        if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
1878                return domain_context_mapping_one(domain, iommu,
1879                                        tmp->subordinate->number, 0,
1880                                        translation);
1881        else /* this is a legacy PCI bridge */
1882                return domain_context_mapping_one(domain, iommu,
1883                                                  tmp->bus->number,
1884                                                  tmp->devfn,
1885                                                  translation);
1886}
1887
1888static int domain_context_mapped(struct device *dev)
1889{
1890        int ret;
1891        struct pci_dev *pdev, *tmp, *parent;
1892        struct intel_iommu *iommu;
1893        u8 bus, devfn;
1894
1895        iommu = device_to_iommu(dev, &bus, &devfn);
1896        if (!iommu)
1897                return -ENODEV;
1898
1899        ret = device_context_mapped(iommu, bus, devfn);
1900        if (!ret || !dev_is_pci(dev))
1901                return ret;
1902
1903        /* dependent device mapping */
1904        pdev = to_pci_dev(dev);
1905        tmp = pci_find_upstream_pcie_bridge(pdev);
1906        if (!tmp)
1907                return ret;
1908        /* Secondary interface's bus number and devfn 0 */
1909        parent = pdev->bus->self;
1910        while (parent != tmp) {
1911                ret = device_context_mapped(iommu, parent->bus->number,
1912                                            parent->devfn);
1913                if (!ret)
1914                        return ret;
1915                parent = parent->bus->self;
1916        }
1917        if (pci_is_pcie(tmp))
1918                return device_context_mapped(iommu, tmp->subordinate->number,
1919                                             0);
1920        else
1921                return device_context_mapped(iommu, tmp->bus->number,
1922                                             tmp->devfn);
1923}
1924
1925/* Returns a number of VTD pages, but aligned to MM page size */
1926static inline unsigned long aligned_nrpages(unsigned long host_addr,
1927                                            size_t size)
1928{
1929        host_addr &= ~PAGE_MASK;
1930        return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1931}
1932
1933/* Return largest possible superpage level for a given mapping */
1934static inline int hardware_largepage_caps(struct dmar_domain *domain,
1935                                          unsigned long iov_pfn,
1936                                          unsigned long phy_pfn,
1937                                          unsigned long pages)
1938{
1939        int support, level = 1;
1940        unsigned long pfnmerge;
1941
1942        support = domain->iommu_superpage;
1943
1944        /* To use a large page, the virtual *and* physical addresses
1945           must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1946           of them will mean we have to use smaller pages. So just
1947           merge them and check both at once. */
1948        pfnmerge = iov_pfn | phy_pfn;
1949
1950        while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1951                pages >>= VTD_STRIDE_SHIFT;
1952                if (!pages)
1953                        break;
1954                pfnmerge >>= VTD_STRIDE_SHIFT;
1955                level++;
1956                support--;
1957        }
1958        return level;
1959}
1960
1961static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1962                            struct scatterlist *sg, unsigned long phys_pfn,
1963                            unsigned long nr_pages, int prot)
1964{
1965        struct dma_pte *first_pte = NULL, *pte = NULL;
1966        phys_addr_t uninitialized_var(pteval);
1967        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1968        unsigned long sg_res;
1969        unsigned int largepage_lvl = 0;
1970        unsigned long lvl_pages = 0;
1971
1972        BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1973
1974        if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1975                return -EINVAL;
1976
1977        prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1978
1979        if (sg)
1980                sg_res = 0;
1981        else {
1982                sg_res = nr_pages + 1;
1983                pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1984        }
1985
1986        while (nr_pages > 0) {
1987                uint64_t tmp;
1988
1989                if (!sg_res) {
1990                        sg_res = aligned_nrpages(sg->offset, sg->length);
1991                        sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1992                        sg->dma_length = sg->length;
1993                        pteval = page_to_phys(sg_page(sg)) | prot;
1994                        phys_pfn = pteval >> VTD_PAGE_SHIFT;
1995                }
1996
1997                if (!pte) {
1998                        largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1999
2000                        first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2001                        if (!pte)
2002                                return -ENOMEM;
2003                        /* It is large page*/
2004                        if (largepage_lvl > 1) {
2005                                pteval |= DMA_PTE_LARGE_PAGE;
2006                                /* Ensure that old small page tables are removed to make room
2007                                   for superpage, if they exist. */
2008                                dma_pte_clear_range(domain, iov_pfn,
2009                                                    iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2010                                dma_pte_free_pagetable(domain, iov_pfn,
2011                                                       iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2012                        } else {
2013                                pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2014                        }
2015
2016                }
2017                /* We don't need lock here, nobody else
2018                 * touches the iova range
2019                 */
2020                tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2021                if (tmp) {
2022                        static int dumps = 5;
2023                        printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2024                               iov_pfn, tmp, (unsigned long long)pteval);
2025                        if (dumps) {
2026                                dumps--;
2027                                debug_dma_dump_mappings(NULL);
2028                        }
2029                        WARN_ON(1);
2030                }
2031
2032                lvl_pages = lvl_to_nr_pages(largepage_lvl);
2033
2034                BUG_ON(nr_pages < lvl_pages);
2035                BUG_ON(sg_res < lvl_pages);
2036
2037                nr_pages -= lvl_pages;
2038                iov_pfn += lvl_pages;
2039                phys_pfn += lvl_pages;
2040                pteval += lvl_pages * VTD_PAGE_SIZE;
2041                sg_res -= lvl_pages;
2042
2043                /* If the next PTE would be the first in a new page, then we
2044                   need to flush the cache on the entries we've just written.
2045                   And then we'll need to recalculate 'pte', so clear it and
2046                   let it get set again in the if (!pte) block above.
2047
2048                   If we're done (!nr_pages) we need to flush the cache too.
2049
2050                   Also if we've been setting superpages, we may need to
2051                   recalculate 'pte' and switch back to smaller pages for the
2052                   end of the mapping, if the trailing size is not enough to
2053                   use another superpage (i.e. sg_res < lvl_pages). */
2054                pte++;
2055                if (!nr_pages || first_pte_in_page(pte) ||
2056                    (largepage_lvl > 1 && sg_res < lvl_pages)) {
2057                        domain_flush_cache(domain, first_pte,
2058                                           (void *)pte - (void *)first_pte);
2059                        pte = NULL;
2060                }
2061
2062                if (!sg_res && nr_pages)
2063                        sg = sg_next(sg);
2064        }
2065        return 0;
2066}
2067
2068static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2069                                    struct scatterlist *sg, unsigned long nr_pages,
2070                                    int prot)
2071{
2072        return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2073}
2074
2075static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2076                                     unsigned long phys_pfn, unsigned long nr_pages,
2077                                     int prot)
2078{
2079        return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2080}
2081
2082static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2083{
2084        if (!iommu)
2085                return;
2086
2087        clear_context_table(iommu, bus, devfn);
2088        iommu->flush.flush_context(iommu, 0, 0, 0,
2089                                           DMA_CCMD_GLOBAL_INVL);
2090        iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2091}
2092
2093static inline void unlink_domain_info(struct device_domain_info *info)
2094{
2095        assert_spin_locked(&device_domain_lock);
2096        list_del(&info->link);
2097        list_del(&info->global);
2098        if (info->dev)
2099                info->dev->archdata.iommu = NULL;
2100}
2101
2102static void domain_remove_dev_info(struct dmar_domain *domain)
2103{
2104        struct device_domain_info *info;
2105        unsigned long flags, flags2;
2106
2107        spin_lock_irqsave(&device_domain_lock, flags);
2108        while (!list_empty(&domain->devices)) {
2109                info = list_entry(domain->devices.next,
2110                        struct device_domain_info, link);
2111                unlink_domain_info(info);
2112                spin_unlock_irqrestore(&device_domain_lock, flags);
2113
2114                iommu_disable_dev_iotlb(info);
2115                iommu_detach_dev(info->iommu, info->bus, info->devfn);
2116
2117                if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
2118                        iommu_detach_dependent_devices(info->iommu, info->dev);
2119                        /* clear this iommu in iommu_bmp, update iommu count
2120                         * and capabilities
2121                         */
2122                        spin_lock_irqsave(&domain->iommu_lock, flags2);
2123                        if (test_and_clear_bit(info->iommu->seq_id,
2124                                               domain->iommu_bmp)) {
2125                                domain->iommu_count--;
2126                                domain_update_iommu_cap(domain);
2127                        }
2128                        spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2129                }
2130
2131                free_devinfo_mem(info);
2132                spin_lock_irqsave(&device_domain_lock, flags);
2133        }
2134        spin_unlock_irqrestore(&device_domain_lock, flags);
2135}
2136
2137/*
2138 * find_domain
2139 * Note: we use struct device->archdata.iommu stores the info
2140 */
2141static struct dmar_domain *find_domain(struct device *dev)
2142{
2143        struct device_domain_info *info;
2144
2145        /* No lock here, assumes no domain exit in normal case */
2146        info = dev->archdata.iommu;
2147        if (info)
2148                return info->domain;
2149        return NULL;
2150}
2151
2152static inline struct device_domain_info *
2153dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2154{
2155        struct device_domain_info *info;
2156
2157        list_for_each_entry(info, &device_domain_list, global)
2158                if (info->iommu->segment == segment && info->bus == bus &&
2159                    info->devfn == devfn)
2160                        return info;
2161
2162        return NULL;
2163}
2164
2165static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2166                                                int bus, int devfn,
2167                                                struct device *dev,
2168                                                struct dmar_domain *domain)
2169{
2170        struct dmar_domain *found = NULL;
2171        struct device_domain_info *info;
2172        unsigned long flags;
2173
2174        info = alloc_devinfo_mem();
2175        if (!info)
2176                return NULL;
2177
2178        info->bus = bus;
2179        info->devfn = devfn;
2180        info->dev = dev;
2181        info->domain = domain;
2182        info->iommu = iommu;
2183        if (!dev)
2184                domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2185
2186        spin_lock_irqsave(&device_domain_lock, flags);
2187        if (dev)
2188                found = find_domain(dev);
2189        else {
2190                struct device_domain_info *info2;
2191                info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2192                if (info2)
2193                        found = info2->domain;
2194        }
2195        if (found) {
2196                spin_unlock_irqrestore(&device_domain_lock, flags);
2197                free_devinfo_mem(info);
2198                /* Caller must free the original domain */
2199                return found;
2200        }
2201
2202        list_add(&info->link, &domain->devices);
2203        list_add(&info->global, &device_domain_list);
2204        if (dev)
2205                dev->archdata.iommu = info;
2206        spin_unlock_irqrestore(&device_domain_lock, flags);
2207
2208        return domain;
2209}
2210
2211/* domain is initialized */
2212static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2213{
2214        struct dmar_domain *domain, *free = NULL;
2215        struct intel_iommu *iommu = NULL;
2216        struct device_domain_info *info;
2217        struct pci_dev *dev_tmp = NULL;
2218        unsigned long flags;
2219        u8 bus, devfn, bridge_bus, bridge_devfn;
2220
2221        domain = find_domain(dev);
2222        if (domain)
2223                return domain;
2224
2225        if (dev_is_pci(dev)) {
2226                struct pci_dev *pdev = to_pci_dev(dev);
2227                u16 segment;
2228
2229                segment = pci_domain_nr(pdev->bus);
2230                dev_tmp = pci_find_upstream_pcie_bridge(pdev);
2231                if (dev_tmp) {
2232                        if (pci_is_pcie(dev_tmp)) {
2233                                bridge_bus = dev_tmp->subordinate->number;
2234                                bridge_devfn = 0;
2235                        } else {
2236                                bridge_bus = dev_tmp->bus->number;
2237                                bridge_devfn = dev_tmp->devfn;
2238                        }
2239                        spin_lock_irqsave(&device_domain_lock, flags);
2240                        info = dmar_search_domain_by_dev_info(segment,
2241                                                              bridge_bus,
2242                                                              bridge_devfn);
2243                        if (info) {
2244                                iommu = info->iommu;
2245                                domain = info->domain;
2246                        }
2247                        spin_unlock_irqrestore(&device_domain_lock, flags);
2248                        /* pcie-pci bridge already has a domain, uses it */
2249                        if (info)
2250                                goto found_domain;
2251                }
2252        }
2253
2254        iommu = device_to_iommu(dev, &bus, &devfn);
2255        if (!iommu)
2256                goto error;
2257
2258        /* Allocate and initialize new domain for the device */
2259        domain = alloc_domain(false);
2260        if (!domain)
2261                goto error;
2262        if (iommu_attach_domain(domain, iommu)) {
2263                free_domain_mem(domain);
2264                domain = NULL;
2265                goto error;
2266        }
2267        free = domain;
2268        if (domain_init(domain, gaw))
2269                goto error;
2270
2271        /* register pcie-to-pci device */
2272        if (dev_tmp) {
2273                domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn,
2274                                              NULL, domain);
2275                if (!domain)
2276                        goto error;
2277        }
2278
2279found_domain:
2280        domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2281error:
2282        if (free != domain)
2283                domain_exit(free);
2284
2285        return domain;
2286}
2287
2288static int iommu_identity_mapping;
2289#define IDENTMAP_ALL            1
2290#define IDENTMAP_GFX            2
2291#define IDENTMAP_AZALIA         4
2292
2293static int iommu_domain_identity_map(struct dmar_domain *domain,
2294                                     unsigned long long start,
2295                                     unsigned long long end)
2296{
2297        unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2298        unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2299
2300        if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2301                          dma_to_mm_pfn(last_vpfn))) {
2302                printk(KERN_ERR "IOMMU: reserve iova failed\n");
2303                return -ENOMEM;
2304        }
2305
2306        pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2307                 start, end, domain->id);
2308        /*
2309         * RMRR range might have overlap with physical memory range,
2310         * clear it first
2311         */
2312        dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2313
2314        return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2315                                  last_vpfn - first_vpfn + 1,
2316                                  DMA_PTE_READ|DMA_PTE_WRITE);
2317}
2318
2319static int iommu_prepare_identity_map(struct device *dev,
2320                                      unsigned long long start,
2321                                      unsigned long long end)
2322{
2323        struct dmar_domain *domain;
2324        int ret;
2325
2326        domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2327        if (!domain)
2328                return -ENOMEM;
2329
2330        /* For _hardware_ passthrough, don't bother. But for software
2331           passthrough, we do it anyway -- it may indicate a memory
2332           range which is reserved in E820, so which didn't get set
2333           up to start with in si_domain */
2334        if (domain == si_domain && hw_pass_through) {
2335                printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2336                       dev_name(dev), start, end);
2337                return 0;
2338        }
2339
2340        printk(KERN_INFO
2341               "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2342               dev_name(dev), start, end);
2343        
2344        if (end < start) {
2345                WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2346                        "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2347                        dmi_get_system_info(DMI_BIOS_VENDOR),
2348                        dmi_get_system_info(DMI_BIOS_VERSION),
2349                     dmi_get_system_info(DMI_PRODUCT_VERSION));
2350                ret = -EIO;
2351                goto error;
2352        }
2353
2354        if (end >> agaw_to_width(domain->agaw)) {
2355                WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2356                     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2357                     agaw_to_width(domain->agaw),
2358                     dmi_get_system_info(DMI_BIOS_VENDOR),
2359                     dmi_get_system_info(DMI_BIOS_VERSION),
2360                     dmi_get_system_info(DMI_PRODUCT_VERSION));
2361                ret = -EIO;
2362                goto error;
2363        }
2364
2365        ret = iommu_domain_identity_map(domain, start, end);
2366        if (ret)
2367                goto error;
2368
2369        /* context entry init */
2370        ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2371        if (ret)
2372                goto error;
2373
2374        return 0;
2375
2376 error:
2377        domain_exit(domain);
2378        return ret;
2379}
2380
2381static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2382                                         struct device *dev)
2383{
2384        if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2385                return 0;
2386        return iommu_prepare_identity_map(dev, rmrr->base_address,
2387                                          rmrr->end_address);
2388}
2389
2390#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2391static inline void iommu_prepare_isa(void)
2392{
2393        struct pci_dev *pdev;
2394        int ret;
2395
2396        pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2397        if (!pdev)
2398                return;
2399
2400        printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2401        ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2402
2403        if (ret)
2404                printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2405                       "floppy might not work\n");
2406
2407}
2408#else
2409static inline void iommu_prepare_isa(void)
2410{
2411        return;
2412}
2413#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2414
2415static int md_domain_init(struct dmar_domain *domain, int guest_width);
2416
2417static int __init si_domain_init(int hw)
2418{
2419        struct dmar_drhd_unit *drhd;
2420        struct intel_iommu *iommu;
2421        int nid, ret = 0;
2422
2423        si_domain = alloc_domain(false);
2424        if (!si_domain)
2425                return -EFAULT;
2426
2427        si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2428
2429        for_each_active_iommu(iommu, drhd) {
2430                ret = iommu_attach_domain(si_domain, iommu);
2431                if (ret) {
2432                        domain_exit(si_domain);
2433                        return -EFAULT;
2434                }
2435        }
2436
2437        if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2438                domain_exit(si_domain);
2439                return -EFAULT;
2440        }
2441
2442        pr_debug("IOMMU: identity mapping domain is domain %d\n",
2443                 si_domain->id);
2444
2445        if (hw)
2446                return 0;
2447
2448        for_each_online_node(nid) {
2449                unsigned long start_pfn, end_pfn;
2450                int i;
2451
2452                for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2453                        ret = iommu_domain_identity_map(si_domain,
2454                                        PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2455                        if (ret)
2456                                return ret;
2457                }
2458        }
2459
2460        return 0;
2461}
2462
2463static int identity_mapping(struct device *dev)
2464{
2465        struct device_domain_info *info;
2466
2467        if (likely(!iommu_identity_mapping))
2468                return 0;
2469
2470        info = dev->archdata.iommu;
2471        if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2472                return (info->domain == si_domain);
2473
2474        return 0;
2475}
2476
2477static int domain_add_dev_info(struct dmar_domain *domain,
2478                               struct device *dev, int translation)
2479{
2480        struct dmar_domain *ndomain;
2481        struct intel_iommu *iommu;
2482        u8 bus, devfn;
2483        int ret;
2484
2485        iommu = device_to_iommu(dev, &bus, &devfn);
2486        if (!iommu)
2487                return -ENODEV;
2488
2489        ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2490        if (ndomain != domain)
2491                return -EBUSY;
2492
2493        ret = domain_context_mapping(domain, dev, translation);
2494        if (ret) {
2495                domain_remove_one_dev_info(domain, dev);
2496                return ret;
2497        }
2498
2499        return 0;
2500}
2501
2502static bool device_has_rmrr(struct device *dev)
2503{
2504        struct dmar_rmrr_unit *rmrr;
2505        struct device *tmp;
2506        int i;
2507
2508        rcu_read_lock();
2509        for_each_rmrr_units(rmrr) {
2510                /*
2511                 * Return TRUE if this RMRR contains the device that
2512                 * is passed in.
2513                 */
2514                for_each_active_dev_scope(rmrr->devices,
2515                                          rmrr->devices_cnt, i, tmp)
2516                        if (tmp == dev) {
2517                                rcu_read_unlock();
2518                                return true;
2519                        }
2520        }
2521        rcu_read_unlock();
2522        return false;
2523}
2524
2525static int iommu_should_identity_map(struct device *dev, int startup)
2526{
2527
2528        if (dev_is_pci(dev)) {
2529                struct pci_dev *pdev = to_pci_dev(dev);
2530
2531                /*
2532                 * We want to prevent any device associated with an RMRR from
2533                 * getting placed into the SI Domain. This is done because
2534                 * problems exist when devices are moved in and out of domains
2535                 * and their respective RMRR info is lost. We exempt USB devices
2536                 * from this process due to their usage of RMRRs that are known
2537                 * to not be needed after BIOS hand-off to OS.
2538                 */
2539                if (device_has_rmrr(dev) &&
2540                    (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2541                        return 0;
2542
2543                if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2544                        return 1;
2545
2546                if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2547                        return 1;
2548
2549                if (!(iommu_identity_mapping & IDENTMAP_ALL))
2550                        return 0;
2551
2552                /*
2553                 * We want to start off with all devices in the 1:1 domain, and
2554                 * take them out later if we find they can't access all of memory.
2555                 *
2556                 * However, we can't do this for PCI devices behind bridges,
2557                 * because all PCI devices behind the same bridge will end up
2558                 * with the same source-id on their transactions.
2559                 *
2560                 * Practically speaking, we can't change things around for these
2561                 * devices at run-time, because we can't be sure there'll be no
2562                 * DMA transactions in flight for any of their siblings.
2563                 *
2564                 * So PCI devices (unless they're on the root bus) as well as
2565                 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2566                 * the 1:1 domain, just in _case_ one of their siblings turns out
2567                 * not to be able to map all of memory.
2568                 */
2569                if (!pci_is_pcie(pdev)) {
2570                        if (!pci_is_root_bus(pdev->bus))
2571                                return 0;
2572                        if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2573                                return 0;
2574                } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2575                        return 0;
2576        } else {
2577                if (device_has_rmrr(dev))
2578                        return 0;
2579        }
2580
2581        /*
2582         * At boot time, we don't yet know if devices will be 64-bit capable.
2583         * Assume that they will — if they turn out not to be, then we can
2584         * take them out of the 1:1 domain later.
2585         */
2586        if (!startup) {
2587                /*
2588                 * If the device's dma_mask is less than the system's memory
2589                 * size then this is not a candidate for identity mapping.
2590                 */
2591                u64 dma_mask = *dev->dma_mask;
2592
2593                if (dev->coherent_dma_mask &&
2594                    dev->coherent_dma_mask < dma_mask)
2595                        dma_mask = dev->coherent_dma_mask;
2596
2597                return dma_mask >= dma_get_required_mask(dev);
2598        }
2599
2600        return 1;
2601}
2602
2603static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2604{
2605        int ret;
2606
2607        if (!iommu_should_identity_map(dev, 1))
2608                return 0;
2609
2610        ret = domain_add_dev_info(si_domain, dev,
2611                                  hw ? CONTEXT_TT_PASS_THROUGH :
2612                                       CONTEXT_TT_MULTI_LEVEL);
2613        if (!ret)
2614                pr_info("IOMMU: %s identity mapping for device %s\n",
2615                        hw ? "hardware" : "software", dev_name(dev));
2616        else if (ret == -ENODEV)
2617                /* device not associated with an iommu */
2618                ret = 0;
2619
2620        return ret;
2621}
2622
2623
2624static int __init iommu_prepare_static_identity_mapping(int hw)
2625{
2626        struct pci_dev *pdev = NULL;
2627        struct dmar_drhd_unit *drhd;
2628        struct intel_iommu *iommu;
2629        struct device *dev;
2630        int i;
2631        int ret = 0;
2632
2633        ret = si_domain_init(hw);
2634        if (ret)
2635                return -EFAULT;
2636
2637        for_each_pci_dev(pdev) {
2638                ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2639                if (ret)
2640                        return ret;
2641        }
2642
2643        for_each_active_iommu(iommu, drhd)
2644                for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2645                        struct acpi_device_physical_node *pn;
2646                        struct acpi_device *adev;
2647
2648                        if (dev->bus != &acpi_bus_type)
2649                                continue;
2650                                
2651                        adev= to_acpi_device(dev);
2652                        mutex_lock(&adev->physical_node_lock);
2653                        list_for_each_entry(pn, &adev->physical_node_list, node) {
2654                                ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2655                                if (ret)
2656                                        break;
2657                        }
2658                        mutex_unlock(&adev->physical_node_lock);
2659                        if (ret)
2660                                return ret;
2661                }
2662
2663        return 0;
2664}
2665
2666static int __init init_dmars(void)
2667{
2668        struct dmar_drhd_unit *drhd;
2669        struct dmar_rmrr_unit *rmrr;
2670        struct device *dev;
2671        struct intel_iommu *iommu;
2672        int i, ret;
2673
2674        /*
2675         * for each drhd
2676         *    allocate root
2677         *    initialize and program root entry to not present
2678         * endfor
2679         */
2680        for_each_drhd_unit(drhd) {
2681                /*
2682                 * lock not needed as this is only incremented in the single
2683                 * threaded kernel __init code path all other access are read
2684                 * only
2685                 */
2686                if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2687                        g_num_of_iommus++;
2688                        continue;
2689                }
2690                printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2691                          IOMMU_UNITS_SUPPORTED);
2692        }
2693
2694        g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2695                        GFP_KERNEL);
2696        if (!g_iommus) {
2697                printk(KERN_ERR "Allocating global iommu array failed\n");
2698                ret = -ENOMEM;
2699                goto error;
2700        }
2701
2702        deferred_flush = kzalloc(g_num_of_iommus *
2703                sizeof(struct deferred_flush_tables), GFP_KERNEL);
2704        if (!deferred_flush) {
2705                ret = -ENOMEM;
2706                goto free_g_iommus;
2707        }
2708
2709        for_each_active_iommu(iommu, drhd) {
2710                g_iommus[iommu->seq_id] = iommu;
2711
2712                ret = iommu_init_domains(iommu);
2713                if (ret)
2714                        goto free_iommu;
2715
2716                /*
2717                 * TBD:
2718                 * we could share the same root & context tables
2719                 * among all IOMMU's. Need to Split it later.
2720                 */
2721                ret = iommu_alloc_root_entry(iommu);
2722                if (ret) {
2723                        printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2724                        goto free_iommu;
2725                }
2726                if (!ecap_pass_through(iommu->ecap))
2727                        hw_pass_through = 0;
2728        }
2729
2730        /*
2731         * Start from the sane iommu hardware state.
2732         */
2733        for_each_active_iommu(iommu, drhd) {
2734                /*
2735                 * If the queued invalidation is already initialized by us
2736                 * (for example, while enabling interrupt-remapping) then
2737                 * we got the things already rolling from a sane state.
2738                 */
2739                if (iommu->qi)
2740                        continue;
2741
2742                /*
2743                 * Clear any previous faults.
2744                 */
2745                dmar_fault(-1, iommu);
2746                /*
2747                 * Disable queued invalidation if supported and already enabled
2748                 * before OS handover.
2749                 */
2750                dmar_disable_qi(iommu);
2751        }
2752
2753        for_each_active_iommu(iommu, drhd) {
2754                if (dmar_enable_qi(iommu)) {
2755                        /*
2756                         * Queued Invalidate not enabled, use Register Based
2757                         * Invalidate
2758                         */
2759                        iommu->flush.flush_context = __iommu_flush_context;
2760                        iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2761                        printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2762                               "invalidation\n",
2763                                iommu->seq_id,
2764                               (unsigned long long)drhd->reg_base_addr);
2765                } else {
2766                        iommu->flush.flush_context = qi_flush_context;
2767                        iommu->flush.flush_iotlb = qi_flush_iotlb;
2768                        printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2769                               "invalidation\n",
2770                                iommu->seq_id,
2771                               (unsigned long long)drhd->reg_base_addr);
2772                }
2773        }
2774
2775        if (iommu_pass_through)
2776                iommu_identity_mapping |= IDENTMAP_ALL;
2777
2778#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2779        iommu_identity_mapping |= IDENTMAP_GFX;
2780#endif
2781
2782        check_tylersburg_isoch();
2783
2784        /*
2785         * If pass through is not set or not enabled, setup context entries for
2786         * identity mappings for rmrr, gfx, and isa and may fall back to static
2787         * identity mapping if iommu_identity_mapping is set.
2788         */
2789        if (iommu_identity_mapping) {
2790                ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2791                if (ret) {
2792                        printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2793                        goto free_iommu;
2794                }
2795        }
2796        /*
2797         * For each rmrr
2798         *   for each dev attached to rmrr
2799         *   do
2800         *     locate drhd for dev, alloc domain for dev
2801         *     allocate free domain
2802         *     allocate page table entries for rmrr
2803         *     if context not allocated for bus
2804         *           allocate and init context
2805         *           set present in root table for this bus
2806         *     init context with domain, translation etc
2807         *    endfor
2808         * endfor
2809         */
2810        printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2811        for_each_rmrr_units(rmrr) {
2812                /* some BIOS lists non-exist devices in DMAR table. */
2813                for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2814                                          i, dev) {
2815                        ret = iommu_prepare_rmrr_dev(rmrr, dev);
2816                        if (ret)
2817                                printk(KERN_ERR
2818                                       "IOMMU: mapping reserved region failed\n");
2819                }
2820        }
2821
2822        iommu_prepare_isa();
2823
2824        /*
2825         * for each drhd
2826         *   enable fault log
2827         *   global invalidate context cache
2828         *   global invalidate iotlb
2829         *   enable translation
2830         */
2831        for_each_iommu(iommu, drhd) {
2832                if (drhd->ignored) {
2833                        /*
2834                         * we always have to disable PMRs or DMA may fail on
2835                         * this device
2836                         */
2837                        if (force_on)
2838                                iommu_disable_protect_mem_regions(iommu);
2839                        continue;
2840                }
2841
2842                iommu_flush_write_buffer(iommu);
2843
2844                ret = dmar_set_interrupt(iommu);
2845                if (ret)
2846                        goto free_iommu;
2847
2848                iommu_set_root_entry(iommu);
2849
2850                iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2851                iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2852
2853                ret = iommu_enable_translation(iommu);
2854                if (ret)
2855                        goto free_iommu;
2856
2857                iommu_disable_protect_mem_regions(iommu);
2858        }
2859
2860        return 0;
2861
2862free_iommu:
2863        for_each_active_iommu(iommu, drhd)
2864                free_dmar_iommu(iommu);
2865        kfree(deferred_flush);
2866free_g_iommus:
2867        kfree(g_iommus);
2868error:
2869        return ret;
2870}
2871
2872/* This takes a number of _MM_ pages, not VTD pages */
2873static struct iova *intel_alloc_iova(struct device *dev,
2874                                     struct dmar_domain *domain,
2875                                     unsigned long nrpages, uint64_t dma_mask)
2876{
2877        struct iova *iova = NULL;
2878
2879        /* Restrict dma_mask to the width that the iommu can handle */
2880        dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2881
2882        if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2883                /*
2884                 * First try to allocate an io virtual address in
2885                 * DMA_BIT_MASK(32) and if that fails then try allocating
2886                 * from higher range
2887                 */
2888                iova = alloc_iova(&domain->iovad, nrpages,
2889                                  IOVA_PFN(DMA_BIT_MASK(32)), 1);
2890                if (iova)
2891                        return iova;
2892        }
2893        iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2894        if (unlikely(!iova)) {
2895                printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2896                       nrpages, dev_name(dev));
2897                return NULL;
2898        }
2899
2900        return iova;
2901}
2902
2903static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2904{
2905        struct dmar_domain *domain;
2906        int ret;
2907
2908        domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2909        if (!domain) {
2910                printk(KERN_ERR "Allocating domain for %s failed",
2911                       dev_name(dev));
2912                return NULL;
2913        }
2914
2915        /* make sure context mapping is ok */
2916        if (unlikely(!domain_context_mapped(dev))) {
2917                ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2918                if (ret) {
2919                        printk(KERN_ERR "Domain context map for %s failed",
2920                               dev_name(dev));
2921                        return NULL;
2922                }
2923        }
2924
2925        return domain;
2926}
2927
2928static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2929{
2930        struct device_domain_info *info;
2931
2932        /* No lock here, assumes no domain exit in normal case */
2933        info = dev->archdata.iommu;
2934        if (likely(info))
2935                return info->domain;
2936
2937        return __get_valid_domain_for_dev(dev);
2938}
2939
2940static int iommu_dummy(struct device *dev)
2941{
2942        return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2943}
2944
2945/* Check if the dev needs to go through non-identity map and unmap process.*/
2946static int iommu_no_mapping(struct device *dev)
2947{
2948        int found;
2949
2950        if (iommu_dummy(dev))
2951                return 1;
2952
2953        if (!iommu_identity_mapping)
2954                return 0;
2955
2956        found = identity_mapping(dev);
2957        if (found) {
2958                if (iommu_should_identity_map(dev, 0))
2959                        return 1;
2960                else {
2961                        /*
2962                         * 32 bit DMA is removed from si_domain and fall back
2963                         * to non-identity mapping.
2964                         */
2965                        domain_remove_one_dev_info(si_domain, dev);
2966                        printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2967                               dev_name(dev));
2968                        return 0;
2969                }
2970        } else {
2971                /*
2972                 * In case of a detached 64 bit DMA device from vm, the device
2973                 * is put into si_domain for identity mapping.
2974                 */
2975                if (iommu_should_identity_map(dev, 0)) {
2976                        int ret;
2977                        ret = domain_add_dev_info(si_domain, dev,
2978                                                  hw_pass_through ?
2979                                                  CONTEXT_TT_PASS_THROUGH :
2980                                                  CONTEXT_TT_MULTI_LEVEL);
2981                        if (!ret) {
2982                                printk(KERN_INFO "64bit %s uses identity mapping\n",
2983                                       dev_name(dev));
2984                                return 1;
2985                        }
2986                }
2987        }
2988
2989        return 0;
2990}
2991
2992static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
2993                                     size_t size, int dir, u64 dma_mask)
2994{
2995        struct dmar_domain *domain;
2996        phys_addr_t start_paddr;
2997        struct iova *iova;
2998        int prot = 0;
2999        int ret;
3000        struct intel_iommu *iommu;
3001        unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3002
3003        BUG_ON(dir == DMA_NONE);
3004
3005        if (iommu_no_mapping(dev))
3006                return paddr;
3007
3008        domain = get_valid_domain_for_dev(dev);
3009        if (!domain)
3010                return 0;
3011
3012        iommu = domain_get_iommu(domain);
3013        size = aligned_nrpages(paddr, size);
3014
3015        iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3016        if (!iova)
3017                goto error;
3018
3019        /*
3020         * Check if DMAR supports zero-length reads on write only
3021         * mappings..
3022         */
3023        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3024                        !cap_zlr(iommu->cap))
3025                prot |= DMA_PTE_READ;
3026        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3027                prot |= DMA_PTE_WRITE;
3028        /*
3029         * paddr - (paddr + size) might be partial page, we should map the whole
3030         * page.  Note: if two part of one page are separately mapped, we
3031         * might have two guest_addr mapping to the same host paddr, but this
3032         * is not a big problem
3033         */
3034        ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3035                                 mm_to_dma_pfn(paddr_pfn), size, prot);
3036        if (ret)
3037                goto error;
3038
3039        /* it's a non-present to present mapping. Only flush if caching mode */
3040        if (cap_caching_mode(iommu->cap))
3041                iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3042        else
3043                iommu_flush_write_buffer(iommu);
3044
3045        start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3046        start_paddr += paddr & ~PAGE_MASK;
3047        return start_paddr;
3048
3049error:
3050        if (iova)
3051                __free_iova(&domain->iovad, iova);
3052        printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3053                dev_name(dev), size, (unsigned long long)paddr, dir);
3054        return 0;
3055}
3056
3057static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3058                                 unsigned long offset, size_t size,
3059                                 enum dma_data_direction dir,
3060                                 struct dma_attrs *attrs)
3061{
3062        return __intel_map_single(dev, page_to_phys(page) + offset, size,
3063                                  dir, *dev->dma_mask);
3064}
3065
3066static void flush_unmaps(void)
3067{
3068        int i, j;
3069
3070        timer_on = 0;
3071
3072        /* just flush them all */
3073        for (i = 0; i < g_num_of_iommus; i++) {
3074                struct intel_iommu *iommu = g_iommus[i];
3075                if (!iommu)
3076                        continue;
3077
3078                if (!deferred_flush[i].next)
3079                        continue;
3080
3081                /* In caching mode, global flushes turn emulation expensive */
3082                if (!cap_caching_mode(iommu->cap))
3083                        iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3084                                         DMA_TLB_GLOBAL_FLUSH);
3085                for (j = 0; j < deferred_flush[i].next; j++) {
3086                        unsigned long mask;
3087                        struct iova *iova = deferred_flush[i].iova[j];
3088                        struct dmar_domain *domain = deferred_flush[i].domain[j];
3089
3090                        /* On real hardware multiple invalidations are expensive */
3091                        if (cap_caching_mode(iommu->cap))
3092                                iommu_flush_iotlb_psi(iommu, domain->id,
3093                                        iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3094                                        !deferred_flush[i].freelist[j], 0);
3095                        else {
3096                                mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3097                                iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3098                                                (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3099                        }
3100                        __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3101                        if (deferred_flush[i].freelist[j])
3102                                dma_free_pagelist(deferred_flush[i].freelist[j]);
3103                }
3104                deferred_flush[i].next = 0;
3105        }
3106
3107        list_size = 0;
3108}
3109
3110static void flush_unmaps_timeout(unsigned long data)
3111{
3112        unsigned long flags;
3113
3114        spin_lock_irqsave(&async_umap_flush_lock, flags);
3115        flush_unmaps();
3116        spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3117}
3118
3119static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3120{
3121        unsigned long flags;
3122        int next, iommu_id;
3123        struct intel_iommu *iommu;
3124
3125        spin_lock_irqsave(&async_umap_flush_lock, flags);
3126        if (list_size == HIGH_WATER_MARK)
3127                flush_unmaps();
3128
3129        iommu = domain_get_iommu(dom);
3130        iommu_id = iommu->seq_id;
3131
3132        next = deferred_flush[iommu_id].next;
3133        deferred_flush[iommu_id].domain[next] = dom;
3134        deferred_flush[iommu_id].iova[next] = iova;
3135        deferred_flush[iommu_id].freelist[next] = freelist;
3136        deferred_flush[iommu_id].next++;
3137
3138        if (!timer_on) {
3139                mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3140                timer_on = 1;
3141        }
3142        list_size++;
3143        spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3144}
3145
3146static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3147                             size_t size, enum dma_data_direction dir,
3148                             struct dma_attrs *attrs)
3149{
3150        struct dmar_domain *domain;
3151        unsigned long start_pfn, last_pfn;
3152        struct iova *iova;
3153        struct intel_iommu *iommu;
3154        struct page *freelist;
3155
3156        if (iommu_no_mapping(dev))
3157                return;
3158
3159        domain = find_domain(dev);
3160        BUG_ON(!domain);
3161
3162        iommu = domain_get_iommu(domain);
3163
3164        iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3165        if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3166                      (unsigned long long)dev_addr))
3167                return;
3168
3169        start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3170        last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3171
3172        pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3173                 dev_name(dev), start_pfn, last_pfn);
3174
3175        freelist = domain_unmap(domain, start_pfn, last_pfn);
3176
3177        if (intel_iommu_strict) {
3178                iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3179                                      last_pfn - start_pfn + 1, !freelist, 0);
3180                /* free iova */
3181                __free_iova(&domain->iovad, iova);
3182                dma_free_pagelist(freelist);
3183        } else {
3184                add_unmap(domain, iova, freelist);
3185                /*
3186                 * queue up the release of the unmap to save the 1/6th of the
3187                 * cpu used up by the iotlb flush operation...
3188                 */
3189        }
3190}
3191
3192static void *intel_alloc_coherent(struct device *dev, size_t size,
3193                                  dma_addr_t *dma_handle, gfp_t flags,
3194                                  struct dma_attrs *attrs)
3195{
3196        void *vaddr;
3197        int order;
3198
3199        size = PAGE_ALIGN(size);
3200        order = get_order(size);
3201
3202        if (!iommu_no_mapping(dev))
3203                flags &= ~(GFP_DMA | GFP_DMA32);
3204        else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3205                if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3206                        flags |= GFP_DMA;
3207                else
3208                        flags |= GFP_DMA32;
3209        }
3210
3211        vaddr = (void *)__get_free_pages(flags, order);
3212        if (!vaddr)
3213                return NULL;
3214        memset(vaddr, 0, size);
3215
3216        *dma_handle = __intel_map_single(dev, virt_to_bus(vaddr), size,
3217                                         DMA_BIDIRECTIONAL,
3218                                         dev->coherent_dma_mask);
3219        if (*dma_handle)
3220                return vaddr;
3221        free_pages((unsigned long)vaddr, order);
3222        return NULL;
3223}
3224
3225static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3226                                dma_addr_t dma_handle, struct dma_attrs *attrs)
3227{
3228        int order;
3229
3230        size = PAGE_ALIGN(size);
3231        order = get_order(size);
3232
3233        intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3234        free_pages((unsigned long)vaddr, order);
3235}
3236
3237static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3238                           int nelems, enum dma_data_direction dir,
3239                           struct dma_attrs *attrs)
3240{
3241        struct dmar_domain *domain;
3242        unsigned long start_pfn, last_pfn;
3243        struct iova *iova;
3244        struct intel_iommu *iommu;
3245        struct page *freelist;
3246
3247        if (iommu_no_mapping(dev))
3248                return;
3249
3250        domain = find_domain(dev);
3251        BUG_ON(!domain);
3252
3253        iommu = domain_get_iommu(domain);
3254
3255        iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3256        if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3257                      (unsigned long long)sglist[0].dma_address))
3258                return;
3259
3260        start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3261        last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3262
3263        freelist = domain_unmap(domain, start_pfn, last_pfn);
3264
3265        if (intel_iommu_strict) {
3266                iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3267                                      last_pfn - start_pfn + 1, !freelist, 0);
3268                /* free iova */
3269                __free_iova(&domain->iovad, iova);
3270                dma_free_pagelist(freelist);
3271        } else {
3272                add_unmap(domain, iova, freelist);
3273                /*
3274                 * queue up the release of the unmap to save the 1/6th of the
3275                 * cpu used up by the iotlb flush operation...
3276                 */
3277        }
3278}
3279
3280static int intel_nontranslate_map_sg(struct device *hddev,
3281        struct scatterlist *sglist, int nelems, int dir)
3282{
3283        int i;
3284        struct scatterlist *sg;
3285
3286        for_each_sg(sglist, sg, nelems, i) {
3287                BUG_ON(!sg_page(sg));
3288                sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3289                sg->dma_length = sg->length;
3290        }
3291        return nelems;
3292}
3293
3294static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3295                        enum dma_data_direction dir, struct dma_attrs *attrs)
3296{
3297        int i;
3298        struct dmar_domain *domain;
3299        size_t size = 0;
3300        int prot = 0;
3301        struct iova *iova = NULL;
3302        int ret;
3303        struct scatterlist *sg;
3304        unsigned long start_vpfn;
3305        struct intel_iommu *iommu;
3306
3307        BUG_ON(dir == DMA_NONE);
3308        if (iommu_no_mapping(dev))
3309                return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3310
3311        domain = get_valid_domain_for_dev(dev);
3312        if (!domain)
3313                return 0;
3314
3315        iommu = domain_get_iommu(domain);
3316
3317        for_each_sg(sglist, sg, nelems, i)
3318                size += aligned_nrpages(sg->offset, sg->length);
3319
3320        iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3321                                *dev->dma_mask);
3322        if (!iova) {
3323                sglist->dma_length = 0;
3324                return 0;
3325        }
3326
3327        /*
3328         * Check if DMAR supports zero-length reads on write only
3329         * mappings..
3330         */
3331        if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3332                        !cap_zlr(iommu->cap))
3333                prot |= DMA_PTE_READ;
3334        if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3335                prot |= DMA_PTE_WRITE;
3336
3337        start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3338
3339        ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3340        if (unlikely(ret)) {
3341                /*  clear the page */
3342                dma_pte_clear_range(domain, start_vpfn,
3343                                    start_vpfn + size - 1);
3344                /* free page tables */
3345                dma_pte_free_pagetable(domain, start_vpfn,
3346                                       start_vpfn + size - 1);
3347                /* free iova */
3348                __free_iova(&domain->iovad, iova);
3349                return 0;
3350        }
3351
3352        /* it's a non-present to present mapping. Only flush if caching mode */
3353        if (cap_caching_mode(iommu->cap))
3354                iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3355        else
3356                iommu_flush_write_buffer(iommu);
3357
3358        return nelems;
3359}
3360
3361static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3362{
3363        return !dma_addr;
3364}
3365
3366struct dma_map_ops intel_dma_ops = {
3367        .alloc = intel_alloc_coherent,
3368        .free = intel_free_coherent,
3369        .map_sg = intel_map_sg,
3370        .unmap_sg = intel_unmap_sg,
3371        .map_page = intel_map_page,
3372        .unmap_page = intel_unmap_page,
3373        .mapping_error = intel_mapping_error,
3374};
3375
3376static inline int iommu_domain_cache_init(void)
3377{
3378        int ret = 0;
3379
3380        iommu_domain_cache = kmem_cache_create("iommu_domain",
3381                                         sizeof(struct dmar_domain),
3382                                         0,
3383                                         SLAB_HWCACHE_ALIGN,
3384
3385                                         NULL);
3386        if (!iommu_domain_cache) {
3387                printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3388                ret = -ENOMEM;
3389        }
3390
3391        return ret;
3392}
3393
3394static inline int iommu_devinfo_cache_init(void)
3395{
3396        int ret = 0;
3397
3398        iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3399                                         sizeof(struct device_domain_info),
3400                                         0,
3401                                         SLAB_HWCACHE_ALIGN,
3402                                         NULL);
3403        if (!iommu_devinfo_cache) {
3404                printk(KERN_ERR "Couldn't create devinfo cache\n");
3405                ret = -ENOMEM;
3406        }
3407
3408        return ret;
3409}
3410
3411static inline int iommu_iova_cache_init(void)
3412{
3413        int ret = 0;
3414
3415        iommu_iova_cache = kmem_cache_create("iommu_iova",
3416                                         sizeof(struct iova),
3417                                         0,
3418                                         SLAB_HWCACHE_ALIGN,
3419                                         NULL);
3420        if (!iommu_iova_cache) {
3421                printk(KERN_ERR "Couldn't create iova cache\n");
3422                ret = -ENOMEM;
3423        }
3424
3425        return ret;
3426}
3427
3428static int __init iommu_init_mempool(void)
3429{
3430        int ret;
3431        ret = iommu_iova_cache_init();
3432        if (ret)
3433                return ret;
3434
3435        ret = iommu_domain_cache_init();
3436        if (ret)
3437                goto domain_error;
3438
3439        ret = iommu_devinfo_cache_init();
3440        if (!ret)
3441                return ret;
3442
3443        kmem_cache_destroy(iommu_domain_cache);
3444domain_error:
3445        kmem_cache_destroy(iommu_iova_cache);
3446
3447        return -ENOMEM;
3448}
3449
3450static void __init iommu_exit_mempool(void)
3451{
3452        kmem_cache_destroy(iommu_devinfo_cache);
3453        kmem_cache_destroy(iommu_domain_cache);
3454        kmem_cache_destroy(iommu_iova_cache);
3455
3456}
3457
3458static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3459{
3460        struct dmar_drhd_unit *drhd;
3461        u32 vtbar;
3462        int rc;
3463
3464        /* We know that this device on this chipset has its own IOMMU.
3465         * If we find it under a different IOMMU, then the BIOS is lying
3466         * to us. Hope that the IOMMU for this device is actually
3467         * disabled, and it needs no translation...
3468         */
3469        rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3470        if (rc) {
3471                /* "can't" happen */
3472                dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3473                return;
3474        }
3475        vtbar &= 0xffff0000;
3476
3477        /* we know that the this iommu should be at offset 0xa000 from vtbar */
3478        drhd = dmar_find_matched_drhd_unit(pdev);
3479        if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3480                            TAINT_FIRMWARE_WORKAROUND,
3481                            "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3482                pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3483}
3484DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3485
3486static void __init init_no_remapping_devices(void)
3487{
3488        struct dmar_drhd_unit *drhd;
3489        struct device *dev;
3490        int i;
3491
3492        for_each_drhd_unit(drhd) {
3493                if (!drhd->include_all) {
3494                        for_each_active_dev_scope(drhd->devices,
3495                                                  drhd->devices_cnt, i, dev)
3496                                break;
3497                        /* ignore DMAR unit if no devices exist */
3498                        if (i == drhd->devices_cnt)
3499                                drhd->ignored = 1;
3500                }
3501        }
3502
3503        for_each_active_drhd_unit(drhd) {
3504                if (drhd->include_all)
3505                        continue;
3506
3507                for_each_active_dev_scope(drhd->devices,
3508                                          drhd->devices_cnt, i, dev)
3509                        if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3510                                break;
3511                if (i < drhd->devices_cnt)
3512                        continue;
3513
3514                /* This IOMMU has *only* gfx devices. Either bypass it or
3515                   set the gfx_mapped flag, as appropriate */
3516                if (dmar_map_gfx) {
3517                        intel_iommu_gfx_mapped = 1;
3518                } else {
3519                        drhd->ignored = 1;
3520                        for_each_active_dev_scope(drhd->devices,
3521                                                  drhd->devices_cnt, i, dev)
3522                                dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3523                }
3524        }
3525}
3526
3527#ifdef CONFIG_SUSPEND
3528static int init_iommu_hw(void)
3529{
3530        struct dmar_drhd_unit *drhd;
3531        struct intel_iommu *iommu = NULL;
3532
3533        for_each_active_iommu(iommu, drhd)
3534                if (iommu->qi)
3535                        dmar_reenable_qi(iommu);
3536
3537        for_each_iommu(iommu, drhd) {
3538                if (drhd->ignored) {
3539                        /*
3540                         * we always have to disable PMRs or DMA may fail on
3541                         * this device
3542                         */
3543                        if (force_on)
3544                                iommu_disable_protect_mem_regions(iommu);
3545                        continue;
3546                }
3547        
3548                iommu_flush_write_buffer(iommu);
3549
3550                iommu_set_root_entry(iommu);
3551
3552                iommu->flush.flush_context(iommu, 0, 0, 0,
3553                                           DMA_CCMD_GLOBAL_INVL);
3554                iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3555                                         DMA_TLB_GLOBAL_FLUSH);
3556                if (iommu_enable_translation(iommu))
3557                        return 1;
3558                iommu_disable_protect_mem_regions(iommu);
3559        }
3560
3561        return 0;
3562}
3563
3564static void iommu_flush_all(void)
3565{
3566        struct dmar_drhd_unit *drhd;
3567        struct intel_iommu *iommu;
3568
3569        for_each_active_iommu(iommu, drhd) {
3570                iommu->flush.flush_context(iommu, 0, 0, 0,
3571                                           DMA_CCMD_GLOBAL_INVL);
3572                iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3573                                         DMA_TLB_GLOBAL_FLUSH);
3574        }
3575}
3576
3577static int iommu_suspend(void)
3578{
3579        struct dmar_drhd_unit *drhd;
3580        struct intel_iommu *iommu = NULL;
3581        unsigned long flag;
3582
3583        for_each_active_iommu(iommu, drhd) {
3584                iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3585                                                 GFP_ATOMIC);
3586                if (!iommu->iommu_state)
3587                        goto nomem;
3588        }
3589
3590        iommu_flush_all();
3591
3592        for_each_active_iommu(iommu, drhd) {
3593                iommu_disable_translation(iommu);
3594
3595                raw_spin_lock_irqsave(&iommu->register_lock, flag);
3596
3597                iommu->iommu_state[SR_DMAR_FECTL_REG] =
3598                        readl(iommu->reg + DMAR_FECTL_REG);
3599                iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3600                        readl(iommu->reg + DMAR_FEDATA_REG);
3601                iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3602                        readl(iommu->reg + DMAR_FEADDR_REG);
3603                iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3604                        readl(iommu->reg + DMAR_FEUADDR_REG);
3605
3606                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3607        }
3608        return 0;
3609
3610nomem:
3611        for_each_active_iommu(iommu, drhd)
3612                kfree(iommu->iommu_state);
3613
3614        return -ENOMEM;
3615}
3616
3617static void iommu_resume(void)
3618{
3619        struct dmar_drhd_unit *drhd;
3620        struct intel_iommu *iommu = NULL;
3621        unsigned long flag;
3622
3623        if (init_iommu_hw()) {
3624                if (force_on)
3625                        panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3626                else
3627                        WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3628                return;
3629        }
3630
3631        for_each_active_iommu(iommu, drhd) {
3632
3633                raw_spin_lock_irqsave(&iommu->register_lock, flag);
3634
3635                writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3636                        iommu->reg + DMAR_FECTL_REG);
3637                writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3638                        iommu->reg + DMAR_FEDATA_REG);
3639                writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3640                        iommu->reg + DMAR_FEADDR_REG);
3641                writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3642                        iommu->reg + DMAR_FEUADDR_REG);
3643
3644                raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3645        }
3646
3647        for_each_active_iommu(iommu, drhd)
3648                kfree(iommu->iommu_state);
3649}
3650
3651static struct syscore_ops iommu_syscore_ops = {
3652        .resume         = iommu_resume,
3653        .suspend        = iommu_suspend,
3654};
3655
3656static void __init init_iommu_pm_ops(void)
3657{
3658        register_syscore_ops(&iommu_syscore_ops);
3659}
3660
3661#else
3662static inline void init_iommu_pm_ops(void) {}
3663#endif  /* CONFIG_PM */
3664
3665
3666int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3667{
3668        struct acpi_dmar_reserved_memory *rmrr;
3669        struct dmar_rmrr_unit *rmrru;
3670
3671        rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3672        if (!rmrru)
3673                return -ENOMEM;
3674
3675        rmrru->hdr = header;
3676        rmrr = (struct acpi_dmar_reserved_memory *)header;
3677        rmrru->base_address = rmrr->base_address;
3678        rmrru->end_address = rmrr->end_address;
3679        rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3680                                ((void *)rmrr) + rmrr->header.length,
3681                                &rmrru->devices_cnt);
3682        if (rmrru->devices_cnt && rmrru->devices == NULL) {
3683                kfree(rmrru);
3684                return -ENOMEM;
3685        }
3686
3687        list_add(&rmrru->list, &dmar_rmrr_units);
3688
3689        return 0;
3690}
3691
3692int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3693{
3694        struct acpi_dmar_atsr *atsr;
3695        struct dmar_atsr_unit *atsru;
3696
3697        atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3698        atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3699        if (!atsru)
3700                return -ENOMEM;
3701
3702        atsru->hdr = hdr;
3703        atsru->include_all = atsr->flags & 0x1;
3704        if (!atsru->include_all) {
3705                atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3706                                (void *)atsr + atsr->header.length,
3707                                &atsru->devices_cnt);
3708                if (atsru->devices_cnt && atsru->devices == NULL) {
3709                        kfree(atsru);
3710                        return -ENOMEM;
3711                }
3712        }
3713
3714        list_add_rcu(&atsru->list, &dmar_atsr_units);
3715
3716        return 0;
3717}
3718
3719static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3720{
3721        dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3722        kfree(atsru);
3723}
3724
3725static void intel_iommu_free_dmars(void)
3726{
3727        struct dmar_rmrr_unit *rmrru, *rmrr_n;
3728        struct dmar_atsr_unit *atsru, *atsr_n;
3729
3730        list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3731                list_del(&rmrru->list);
3732                dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3733                kfree(rmrru);
3734        }
3735
3736        list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3737                list_del(&atsru->list);
3738                intel_iommu_free_atsr(atsru);
3739        }
3740}
3741
3742int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3743{
3744        int i, ret = 1;
3745        struct pci_bus *bus;
3746        struct pci_dev *bridge = NULL;
3747        struct device *tmp;
3748        struct acpi_dmar_atsr *atsr;
3749        struct dmar_atsr_unit *atsru;
3750
3751        dev = pci_physfn(dev);
3752        for (bus = dev->bus; bus; bus = bus->parent) {
3753                bridge = bus->self;
3754                if (!bridge || !pci_is_pcie(bridge) ||
3755                    pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3756                        return 0;
3757                if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3758                        break;
3759        }
3760        if (!bridge)
3761                return 0;
3762
3763        rcu_read_lock();
3764        list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3765                atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3766                if (atsr->segment != pci_domain_nr(dev->bus))
3767                        continue;
3768
3769                for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3770                        if (tmp == &bridge->dev)
3771                                goto out;
3772
3773                if (atsru->include_all)
3774                        goto out;
3775        }
3776        ret = 0;
3777out:
3778        rcu_read_unlock();
3779
3780        return ret;
3781}
3782
3783int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3784{
3785        int ret = 0;
3786        struct dmar_rmrr_unit *rmrru;
3787        struct dmar_atsr_unit *atsru;
3788        struct acpi_dmar_atsr *atsr;
3789        struct acpi_dmar_reserved_memory *rmrr;
3790
3791        if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3792                return 0;
3793
3794        list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3795                rmrr = container_of(rmrru->hdr,
3796                                    struct acpi_dmar_reserved_memory, header);
3797                if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3798                        ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3799                                ((void *)rmrr) + rmrr->header.length,
3800                                rmrr->segment, rmrru->devices,
3801                                rmrru->devices_cnt);
3802                        if (ret > 0)
3803                                break;
3804                        else if(ret < 0)
3805                                return ret;
3806                } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3807                        if (dmar_remove_dev_scope(info, rmrr->segment,
3808                                rmrru->devices, rmrru->devices_cnt))
3809                                break;
3810                }
3811        }
3812
3813        list_for_each_entry(atsru, &dmar_atsr_units, list) {
3814                if (atsru->include_all)
3815                        continue;
3816
3817                atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3818                if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3819                        ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3820                                        (void *)atsr + atsr->header.length,
3821                                        atsr->segment, atsru->devices,
3822                                        atsru->devices_cnt);
3823                        if (ret > 0)
3824                                break;
3825                        else if(ret < 0)
3826                                return ret;
3827                } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3828                        if (dmar_remove_dev_scope(info, atsr->segment,
3829                                        atsru->devices, atsru->devices_cnt))
3830                                break;
3831                }
3832        }
3833
3834        return 0;
3835}
3836
3837/*
3838 * Here we only respond to action of unbound device from driver.
3839 *
3840 * Added device is not attached to its DMAR domain here yet. That will happen
3841 * when mapping the device to iova.
3842 */
3843static int device_notifier(struct notifier_block *nb,
3844                                  unsigned long action, void *data)
3845{
3846        struct device *dev = data;
3847        struct dmar_domain *domain;
3848
3849        if (iommu_dummy(dev))
3850                return 0;
3851
3852        if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3853            action != BUS_NOTIFY_DEL_DEVICE)
3854                return 0;
3855
3856        domain = find_domain(dev);
3857        if (!domain)
3858                return 0;
3859
3860        down_read(&dmar_global_lock);
3861        domain_remove_one_dev_info(domain, dev);
3862        if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3863            !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3864            list_empty(&domain->devices))
3865                domain_exit(domain);
3866        up_read(&dmar_global_lock);
3867
3868        return 0;
3869}
3870
3871static struct notifier_block device_nb = {
3872        .notifier_call = device_notifier,
3873};
3874
3875static int intel_iommu_memory_notifier(struct notifier_block *nb,
3876                                       unsigned long val, void *v)
3877{
3878        struct memory_notify *mhp = v;
3879        unsigned long long start, end;
3880        unsigned long start_vpfn, last_vpfn;
3881
3882        switch (val) {
3883        case MEM_GOING_ONLINE:
3884                start = mhp->start_pfn << PAGE_SHIFT;
3885                end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3886                if (iommu_domain_identity_map(si_domain, start, end)) {
3887                        pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3888                                start, end);
3889                        return NOTIFY_BAD;
3890                }
3891                break;
3892
3893        case MEM_OFFLINE:
3894        case MEM_CANCEL_ONLINE:
3895                start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3896                last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3897                while (start_vpfn <= last_vpfn) {
3898                        struct iova *iova;
3899                        struct dmar_drhd_unit *drhd;
3900                        struct intel_iommu *iommu;
3901                        struct page *freelist;
3902
3903                        iova = find_iova(&si_domain->iovad, start_vpfn);
3904                        if (iova == NULL) {
3905                                pr_debug("dmar: failed get IOVA for PFN %lx\n",
3906                                         start_vpfn);
3907                                break;
3908                        }
3909
3910                        iova = split_and_remove_iova(&si_domain->iovad, iova,
3911                                                     start_vpfn, last_vpfn);
3912                        if (iova == NULL) {
3913                                pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3914                                        start_vpfn, last_vpfn);
3915                                return NOTIFY_BAD;
3916                        }
3917
3918                        freelist = domain_unmap(si_domain, iova->pfn_lo,
3919                                               iova->pfn_hi);
3920
3921                        rcu_read_lock();
3922                        for_each_active_iommu(iommu, drhd)
3923                                iommu_flush_iotlb_psi(iommu, si_domain->id,
3924                                        iova->pfn_lo,
3925                                        iova->pfn_hi - iova->pfn_lo + 1,
3926                                        !freelist, 0);
3927                        rcu_read_unlock();
3928                        dma_free_pagelist(freelist);
3929
3930                        start_vpfn = iova->pfn_hi + 1;
3931                        free_iova_mem(iova);
3932                }
3933                break;
3934        }
3935
3936        return NOTIFY_OK;
3937}
3938
3939static struct notifier_block intel_iommu_memory_nb = {
3940        .notifier_call = intel_iommu_memory_notifier,
3941        .priority = 0
3942};
3943
3944int __init intel_iommu_init(void)
3945{
3946        int ret = -ENODEV;
3947        struct dmar_drhd_unit *drhd;
3948        struct intel_iommu *iommu;
3949
3950        /* VT-d is required for a TXT/tboot launch, so enforce that */
3951        force_on = tboot_force_iommu();
3952
3953        if (iommu_init_mempool()) {
3954                if (force_on)
3955                        panic("tboot: Failed to initialize iommu memory\n");
3956                return -ENOMEM;
3957        }
3958
3959        down_write(&dmar_global_lock);
3960        if (dmar_table_init()) {
3961                if (force_on)
3962                        panic("tboot: Failed to initialize DMAR table\n");
3963                goto out_free_dmar;
3964        }
3965
3966        /*
3967         * Disable translation if already enabled prior to OS handover.
3968         */
3969        for_each_active_iommu(iommu, drhd)
3970                if (iommu->gcmd & DMA_GCMD_TE)
3971                        iommu_disable_translation(iommu);
3972
3973        if (dmar_dev_scope_init() < 0) {
3974                if (force_on)
3975                        panic("tboot: Failed to initialize DMAR device scope\n");
3976                goto out_free_dmar;
3977        }
3978
3979        if (no_iommu || dmar_disabled)
3980                goto out_free_dmar;
3981
3982        if (list_empty(&dmar_rmrr_units))
3983                printk(KERN_INFO "DMAR: No RMRR found\n");
3984
3985        if (list_empty(&dmar_atsr_units))
3986                printk(KERN_INFO "DMAR: No ATSR found\n");
3987
3988        if (dmar_init_reserved_ranges()) {
3989                if (force_on)
3990                        panic("tboot: Failed to reserve iommu ranges\n");
3991                goto out_free_reserved_range;
3992        }
3993
3994        init_no_remapping_devices();
3995
3996        ret = init_dmars();
3997        if (ret) {
3998                if (force_on)
3999                        panic("tboot: Failed to initialize DMARs\n");
4000                printk(KERN_ERR "IOMMU: dmar init failed\n");
4001                goto out_free_reserved_range;
4002        }
4003        up_write(&dmar_global_lock);
4004        printk(KERN_INFO
4005        "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4006
4007        init_timer(&unmap_timer);
4008#ifdef CONFIG_SWIOTLB
4009        swiotlb = 0;
4010#endif
4011        dma_ops = &intel_dma_ops;
4012
4013        init_iommu_pm_ops();
4014
4015        bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4016        bus_register_notifier(&pci_bus_type, &device_nb);
4017        if (si_domain && !hw_pass_through)
4018                register_memory_notifier(&intel_iommu_memory_nb);
4019
4020        intel_iommu_enabled = 1;
4021
4022        return 0;
4023
4024out_free_reserved_range:
4025        put_iova_domain(&reserved_iova_list);
4026out_free_dmar:
4027        intel_iommu_free_dmars();
4028        up_write(&dmar_global_lock);
4029        iommu_exit_mempool();
4030        return ret;
4031}
4032
4033static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4034                                           struct device *dev)
4035{
4036        struct pci_dev *tmp, *parent, *pdev;
4037
4038        if (!iommu || !dev || !dev_is_pci(dev))
4039                return;
4040
4041        pdev = to_pci_dev(dev);
4042
4043        /* dependent device detach */
4044        tmp = pci_find_upstream_pcie_bridge(pdev);
4045        /* Secondary interface's bus number and devfn 0 */
4046        if (tmp) {
4047                parent = pdev->bus->self;
4048                while (parent != tmp) {
4049                        iommu_detach_dev(iommu, parent->bus->number,
4050                                         parent->devfn);
4051                        parent = parent->bus->self;
4052                }
4053                if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
4054                        iommu_detach_dev(iommu,
4055                                tmp->subordinate->number, 0);
4056                else /* this is a legacy PCI bridge */
4057                        iommu_detach_dev(iommu, tmp->bus->number,
4058                                         tmp->devfn);
4059        }
4060}
4061
4062static void domain_remove_one_dev_info(struct dmar_domain *domain,
4063                                       struct device *dev)
4064{
4065        struct device_domain_info *info, *tmp;
4066        struct intel_iommu *iommu;
4067        unsigned long flags;
4068        int found = 0;
4069        u8 bus, devfn;
4070
4071        iommu = device_to_iommu(dev, &bus, &devfn);
4072        if (!iommu)
4073                return;
4074
4075        spin_lock_irqsave(&device_domain_lock, flags);
4076        list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4077                if (info->iommu == iommu && info->bus == bus &&
4078                    info->devfn == devfn) {
4079                        unlink_domain_info(info);
4080                        spin_unlock_irqrestore(&device_domain_lock, flags);
4081
4082                        iommu_disable_dev_iotlb(info);
4083                        iommu_detach_dev(iommu, info->bus, info->devfn);
4084                        iommu_detach_dependent_devices(iommu, dev);
4085                        free_devinfo_mem(info);
4086
4087                        spin_lock_irqsave(&device_domain_lock, flags);
4088
4089                        if (found)
4090                                break;
4091                        else
4092                                continue;
4093                }
4094
4095                /* if there is no other devices under the same iommu
4096                 * owned by this domain, clear this iommu in iommu_bmp
4097                 * update iommu count and coherency
4098                 */
4099                if (info->iommu == iommu)
4100                        found = 1;
4101        }
4102
4103        spin_unlock_irqrestore(&device_domain_lock, flags);
4104
4105        if (found == 0) {
4106                unsigned long tmp_flags;
4107                spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
4108                clear_bit(iommu->seq_id, domain->iommu_bmp);
4109                domain->iommu_count--;
4110                domain_update_iommu_cap(domain);
4111                spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
4112
4113                if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
4114                    !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
4115                        spin_lock_irqsave(&iommu->lock, tmp_flags);
4116                        clear_bit(domain->id, iommu->domain_ids);
4117                        iommu->domains[domain->id] = NULL;
4118                        spin_unlock_irqrestore(&iommu->lock, tmp_flags);
4119                }
4120        }
4121}
4122
4123static int md_domain_init(struct dmar_domain *domain, int guest_width)
4124{
4125        int adjust_width;
4126
4127        init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
4128        domain_reserve_special_ranges(domain);
4129
4130        /* calculate AGAW */
4131        domain->gaw = guest_width;
4132        adjust_width = guestwidth_to_adjustwidth(guest_width);
4133        domain->agaw = width_to_agaw(adjust_width);
4134
4135        domain->iommu_coherency = 0;
4136        domain->iommu_snooping = 0;
4137        domain->iommu_superpage = 0;
4138        domain->max_addr = 0;
4139        domain->nid = -1;
4140
4141        /* always allocate the top pgd */
4142        domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4143        if (!domain->pgd)
4144                return -ENOMEM;
4145        domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4146        return 0;
4147}
4148
4149static int intel_iommu_domain_init(struct iommu_domain *domain)
4150{
4151        struct dmar_domain *dmar_domain;
4152
4153        dmar_domain = alloc_domain(true);
4154        if (!dmar_domain) {
4155                printk(KERN_ERR
4156                        "intel_iommu_domain_init: dmar_domain == NULL\n");
4157                return -ENOMEM;
4158        }
4159        if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4160                printk(KERN_ERR
4161                        "intel_iommu_domain_init() failed\n");
4162                domain_exit(dmar_domain);
4163                return -ENOMEM;
4164        }
4165        domain_update_iommu_cap(dmar_domain);
4166        domain->priv = dmar_domain;
4167
4168        domain->geometry.aperture_start = 0;
4169        domain->geometry.aperture_end   = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4170        domain->geometry.force_aperture = true;
4171
4172        return 0;
4173}
4174
4175static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4176{
4177        struct dmar_domain *dmar_domain = domain->priv;
4178
4179        domain->priv = NULL;
4180        domain_exit(dmar_domain);
4181}
4182
4183static int intel_iommu_attach_device(struct iommu_domain *domain,
4184                                     struct device *dev)
4185{
4186        struct dmar_domain *dmar_domain = domain->priv;
4187        struct intel_iommu *iommu;
4188        int addr_width;
4189        u8 bus, devfn;
4190
4191        /* normally dev is not mapped */
4192        if (unlikely(domain_context_mapped(dev))) {
4193                struct dmar_domain *old_domain;
4194
4195                old_domain = find_domain(dev);
4196                if (old_domain) {
4197                        if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4198                            dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4199                                domain_remove_one_dev_info(old_domain, dev);
4200                        else
4201                                domain_remove_dev_info(old_domain);
4202                }
4203        }
4204
4205        iommu = device_to_iommu(dev, &bus, &devfn);
4206        if (!iommu)
4207                return -ENODEV;
4208
4209        /* check if this iommu agaw is sufficient for max mapped address */
4210        addr_width = agaw_to_width(iommu->agaw);
4211        if (addr_width > cap_mgaw(iommu->cap))
4212                addr_width = cap_mgaw(iommu->cap);
4213
4214        if (dmar_domain->max_addr > (1LL << addr_width)) {
4215                printk(KERN_ERR "%s: iommu width (%d) is not "
4216                       "sufficient for the mapped address (%llx)\n",
4217                       __func__, addr_width, dmar_domain->max_addr);
4218                return -EFAULT;
4219        }
4220        dmar_domain->gaw = addr_width;
4221
4222        /*
4223         * Knock out extra levels of page tables if necessary
4224         */
4225        while (iommu->agaw < dmar_domain->agaw) {
4226                struct dma_pte *pte;
4227
4228                pte = dmar_domain->pgd;
4229                if (dma_pte_present(pte)) {
4230                        dmar_domain->pgd = (struct dma_pte *)
4231                                phys_to_virt(dma_pte_addr(pte));
4232                        free_pgtable_page(pte);
4233                }
4234                dmar_domain->agaw--;
4235        }
4236
4237        return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4238}
4239
4240static void intel_iommu_detach_device(struct iommu_domain *domain,
4241                                      struct device *dev)
4242{
4243        struct dmar_domain *dmar_domain = domain->priv;
4244
4245        domain_remove_one_dev_info(dmar_domain, dev);
4246}
4247
4248static int intel_iommu_map(struct iommu_domain *domain,
4249                           unsigned long iova, phys_addr_t hpa,
4250                           size_t size, int iommu_prot)
4251{
4252        struct dmar_domain *dmar_domain = domain->priv;
4253        u64 max_addr;
4254        int prot = 0;
4255        int ret;
4256
4257        if (iommu_prot & IOMMU_READ)
4258                prot |= DMA_PTE_READ;
4259        if (iommu_prot & IOMMU_WRITE)
4260                prot |= DMA_PTE_WRITE;
4261        if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4262                prot |= DMA_PTE_SNP;
4263
4264        max_addr = iova + size;
4265        if (dmar_domain->max_addr < max_addr) {
4266                u64 end;
4267
4268                /* check if minimum agaw is sufficient for mapped address */
4269                end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4270                if (end < max_addr) {
4271                        printk(KERN_ERR "%s: iommu width (%d) is not "
4272                               "sufficient for the mapped address (%llx)\n",
4273                               __func__, dmar_domain->gaw, max_addr);
4274                        return -EFAULT;
4275                }
4276                dmar_domain->max_addr = max_addr;
4277        }
4278        /* Round up size to next multiple of PAGE_SIZE, if it and
4279           the low bits of hpa would take us onto the next page */
4280        size = aligned_nrpages(hpa, size);
4281        ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4282                                 hpa >> VTD_PAGE_SHIFT, size, prot);
4283        return ret;
4284}
4285
4286static size_t intel_iommu_unmap(struct iommu_domain *domain,
4287                                unsigned long iova, size_t size)
4288{
4289        struct dmar_domain *dmar_domain = domain->priv;
4290        struct page *freelist = NULL;
4291        struct intel_iommu *iommu;
4292        unsigned long start_pfn, last_pfn;
4293        unsigned int npages;
4294        int iommu_id, num, ndomains, level = 0;
4295
4296        /* Cope with horrid API which requires us to unmap more than the
4297           size argument if it happens to be a large-page mapping. */
4298        if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4299                BUG();
4300
4301        if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4302                size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4303
4304        start_pfn = iova >> VTD_PAGE_SHIFT;
4305        last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4306
4307        freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4308
4309        npages = last_pfn - start_pfn + 1;
4310
4311        for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4312               iommu = g_iommus[iommu_id];
4313
4314               /*
4315                * find bit position of dmar_domain
4316                */
4317               ndomains = cap_ndoms(iommu->cap);
4318               for_each_set_bit(num, iommu->domain_ids, ndomains) {
4319                       if (iommu->domains[num] == dmar_domain)
4320                               iommu_flush_iotlb_psi(iommu, num, start_pfn,
4321                                                     npages, !freelist, 0);
4322               }
4323
4324        }
4325
4326        dma_free_pagelist(freelist);
4327
4328        if (dmar_domain->max_addr == iova + size)
4329                dmar_domain->max_addr = iova;
4330
4331        return size;
4332}
4333
4334static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4335                                            dma_addr_t iova)
4336{
4337        struct dmar_domain *dmar_domain = domain->priv;
4338        struct dma_pte *pte;
4339        int level = 0;
4340        u64 phys = 0;
4341
4342        pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4343        if (pte)
4344                phys = dma_pte_addr(pte);
4345
4346        return phys;
4347}
4348
4349static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4350                                      unsigned long cap)
4351{
4352        struct dmar_domain *dmar_domain = domain->priv;
4353
4354        if (cap == IOMMU_CAP_CACHE_COHERENCY)
4355                return dmar_domain->iommu_snooping;
4356        if (cap == IOMMU_CAP_INTR_REMAP)
4357                return irq_remapping_enabled;
4358
4359        return 0;
4360}
4361
4362#define REQ_ACS_FLAGS   (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4363
4364static int intel_iommu_add_device(struct device *dev)
4365{
4366        struct pci_dev *pdev = to_pci_dev(dev);
4367        struct pci_dev *bridge, *dma_pdev = NULL;
4368        struct iommu_group *group;
4369        int ret;
4370        u8 bus, devfn;
4371
4372        if (!device_to_iommu(dev, &bus, &devfn))
4373                return -ENODEV;
4374
4375        bridge = pci_find_upstream_pcie_bridge(pdev);
4376        if (bridge) {
4377                if (pci_is_pcie(bridge))
4378                        dma_pdev = pci_get_domain_bus_and_slot(
4379                                                pci_domain_nr(pdev->bus),
4380                                                bridge->subordinate->number, 0);
4381                if (!dma_pdev)
4382                        dma_pdev = pci_dev_get(bridge);
4383        } else
4384                dma_pdev = pci_dev_get(pdev);
4385
4386        /* Account for quirked devices */
4387        swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4388
4389        /*
4390         * If it's a multifunction device that does not support our
4391         * required ACS flags, add to the same group as lowest numbered
4392         * function that also does not suport the required ACS flags.
4393         */
4394        if (dma_pdev->multifunction &&
4395            !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4396                u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4397
4398                for (i = 0; i < 8; i++) {
4399                        struct pci_dev *tmp;
4400
4401                        tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4402                        if (!tmp)
4403                                continue;
4404
4405                        if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4406                                swap_pci_ref(&dma_pdev, tmp);
4407                                break;
4408                        }
4409                        pci_dev_put(tmp);
4410                }
4411        }
4412
4413        /*
4414         * Devices on the root bus go through the iommu.  If that's not us,
4415         * find the next upstream device and test ACS up to the root bus.
4416         * Finding the next device may require skipping virtual buses.
4417         */
4418        while (!pci_is_root_bus(dma_pdev->bus)) {
4419                struct pci_bus *bus = dma_pdev->bus;
4420
4421                while (!bus->self) {
4422                        if (!pci_is_root_bus(bus))
4423                                bus = bus->parent;
4424                        else
4425                                goto root_bus;
4426                }
4427
4428                if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4429                        break;
4430
4431                swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4432        }
4433
4434root_bus:
4435        group = iommu_group_get(&dma_pdev->dev);
4436        pci_dev_put(dma_pdev);
4437        if (!group) {
4438                group = iommu_group_alloc();
4439                if (IS_ERR(group))
4440                        return PTR_ERR(group);
4441        }
4442
4443        ret = iommu_group_add_device(group, dev);
4444
4445        iommu_group_put(group);
4446        return ret;
4447}
4448
4449static void intel_iommu_remove_device(struct device *dev)
4450{
4451        iommu_group_remove_device(dev);
4452}
4453
4454static struct iommu_ops intel_iommu_ops = {
4455        .domain_init    = intel_iommu_domain_init,
4456        .domain_destroy = intel_iommu_domain_destroy,
4457        .attach_dev     = intel_iommu_attach_device,
4458        .detach_dev     = intel_iommu_detach_device,
4459        .map            = intel_iommu_map,
4460        .unmap          = intel_iommu_unmap,
4461        .iova_to_phys   = intel_iommu_iova_to_phys,
4462        .domain_has_cap = intel_iommu_domain_has_cap,
4463        .add_device     = intel_iommu_add_device,
4464        .remove_device  = intel_iommu_remove_device,
4465        .pgsize_bitmap  = INTEL_IOMMU_PGSIZES,
4466};
4467
4468static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4469{
4470        /* G4x/GM45 integrated gfx dmar support is totally busted. */
4471        printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4472        dmar_map_gfx = 0;
4473}
4474
4475DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4476DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4477DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4478DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4479DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4480DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4481DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4482
4483static void quirk_iommu_rwbf(struct pci_dev *dev)
4484{
4485        /*
4486         * Mobile 4 Series Chipset neglects to set RWBF capability,
4487         * but needs it. Same seems to hold for the desktop versions.
4488         */
4489        printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4490        rwbf_quirk = 1;
4491}
4492
4493DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4494DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4495DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4496DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4497DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4498DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4499DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4500
4501#define GGC 0x52
4502#define GGC_MEMORY_SIZE_MASK    (0xf << 8)
4503#define GGC_MEMORY_SIZE_NONE    (0x0 << 8)
4504#define GGC_MEMORY_SIZE_1M      (0x1 << 8)
4505#define GGC_MEMORY_SIZE_2M      (0x3 << 8)
4506#define GGC_MEMORY_VT_ENABLED   (0x8 << 8)
4507#define GGC_MEMORY_SIZE_2M_VT   (0x9 << 8)
4508#define GGC_MEMORY_SIZE_3M_VT   (0xa << 8)
4509#define GGC_MEMORY_SIZE_4M_VT   (0xb << 8)
4510
4511static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4512{
4513        unsigned short ggc;
4514
4515        if (pci_read_config_word(dev, GGC, &ggc))
4516                return;
4517
4518        if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4519                printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4520                dmar_map_gfx = 0;
4521        } else if (dmar_map_gfx) {
4522                /* we have to ensure the gfx device is idle before we flush */
4523                printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4524                intel_iommu_strict = 1;
4525       }
4526}
4527DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4528DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4529DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4530DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4531
4532/* On Tylersburg chipsets, some BIOSes have been known to enable the
4533   ISOCH DMAR unit for the Azalia sound device, but not give it any
4534   TLB entries, which causes it to deadlock. Check for that.  We do
4535   this in a function called from init_dmars(), instead of in a PCI
4536   quirk, because we don't want to print the obnoxious "BIOS broken"
4537   message if VT-d is actually disabled.
4538*/
4539static void __init check_tylersburg_isoch(void)
4540{
4541        struct pci_dev *pdev;
4542        uint32_t vtisochctrl;
4543
4544        /* If there's no Azalia in the system anyway, forget it. */
4545        pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4546        if (!pdev)
4547                return;
4548        pci_dev_put(pdev);
4549
4550        /* System Management Registers. Might be hidden, in which case
4551           we can't do the sanity check. But that's OK, because the
4552           known-broken BIOSes _don't_ actually hide it, so far. */
4553        pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4554        if (!pdev)
4555                return;
4556
4557        if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4558                pci_dev_put(pdev);
4559                return;
4560        }
4561
4562        pci_dev_put(pdev);
4563
4564        /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4565        if (vtisochctrl & 1)
4566                return;
4567
4568        /* Drop all bits other than the number of TLB entries */
4569        vtisochctrl &= 0x1c;
4570
4571        /* If we have the recommended number of TLB entries (16), fine. */
4572        if (vtisochctrl == 0x10)
4573                return;
4574
4575        /* Zero TLB entries? You get to ride the short bus to school. */
4576        if (!vtisochctrl) {
4577                WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4578                     "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4579                     dmi_get_system_info(DMI_BIOS_VENDOR),
4580                     dmi_get_system_info(DMI_BIOS_VERSION),
4581                     dmi_get_system_info(DMI_PRODUCT_VERSION));
4582                iommu_identity_mapping |= IDENTMAP_AZALIA;
4583                return;
4584        }
4585        
4586        printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4587               vtisochctrl);
4588}
4589