1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <asm/irq_remapping.h>
46#include <asm/cacheflush.h>
47#include <asm/iommu.h>
48
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62#define MAX_AGAW_WIDTH 64
63
64#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
65#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
66
67
68
69#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
70 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
71#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
72
73#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
74#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
75#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
76
77
78#define LEVEL_STRIDE (9)
79#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
98
99static inline int agaw_to_level(int agaw)
100{
101 return agaw + 2;
102}
103
104static inline int agaw_to_width(int agaw)
105{
106 return 30 + agaw * LEVEL_STRIDE;
107}
108
109static inline int width_to_agaw(int width)
110{
111 return (width - 30) / LEVEL_STRIDE;
112}
113
114static inline unsigned int level_to_offset_bits(int level)
115{
116 return (level - 1) * LEVEL_STRIDE;
117}
118
119static inline int pfn_level_offset(unsigned long pfn, int level)
120{
121 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
122}
123
124static inline unsigned long level_mask(int level)
125{
126 return -1UL << level_to_offset_bits(level);
127}
128
129static inline unsigned long level_size(int level)
130{
131 return 1UL << level_to_offset_bits(level);
132}
133
134static inline unsigned long align_to_level(unsigned long pfn, int level)
135{
136 return (pfn + level_size(level) - 1) & level_mask(level);
137}
138
139static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
140{
141 return 1 << ((lvl - 1) * LEVEL_STRIDE);
142}
143
144
145
146static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
147{
148 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
149}
150
151static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
152{
153 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
154}
155static inline unsigned long page_to_dma_pfn(struct page *pg)
156{
157 return mm_to_dma_pfn(page_to_pfn(pg));
158}
159static inline unsigned long virt_to_dma_pfn(void *p)
160{
161 return page_to_dma_pfn(virt_to_page(p));
162}
163
164
165static struct intel_iommu **g_iommus;
166
167static void __init check_tylersburg_isoch(void);
168static int rwbf_quirk;
169
170
171
172
173
174static int force_on = 0;
175
176
177
178
179
180
181
182struct root_entry {
183 u64 val;
184 u64 rsvd1;
185};
186#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
187static inline bool root_present(struct root_entry *root)
188{
189 return (root->val & 1);
190}
191static inline void set_root_present(struct root_entry *root)
192{
193 root->val |= 1;
194}
195static inline void set_root_value(struct root_entry *root, unsigned long value)
196{
197 root->val |= value & VTD_PAGE_MASK;
198}
199
200static inline struct context_entry *
201get_context_addr_from_root(struct root_entry *root)
202{
203 return (struct context_entry *)
204 (root_present(root)?phys_to_virt(
205 root->val & VTD_PAGE_MASK) :
206 NULL);
207}
208
209
210
211
212
213
214
215
216
217
218
219
220struct context_entry {
221 u64 lo;
222 u64 hi;
223};
224
225static inline bool context_present(struct context_entry *context)
226{
227 return (context->lo & 1);
228}
229static inline void context_set_present(struct context_entry *context)
230{
231 context->lo |= 1;
232}
233
234static inline void context_set_fault_enable(struct context_entry *context)
235{
236 context->lo &= (((u64)-1) << 2) | 1;
237}
238
239static inline void context_set_translation_type(struct context_entry *context,
240 unsigned long value)
241{
242 context->lo &= (((u64)-1) << 4) | 3;
243 context->lo |= (value & 3) << 2;
244}
245
246static inline void context_set_address_root(struct context_entry *context,
247 unsigned long value)
248{
249 context->lo |= value & VTD_PAGE_MASK;
250}
251
252static inline void context_set_address_width(struct context_entry *context,
253 unsigned long value)
254{
255 context->hi |= value & 7;
256}
257
258static inline void context_set_domain_id(struct context_entry *context,
259 unsigned long value)
260{
261 context->hi |= (value & ((1 << 16) - 1)) << 8;
262}
263
264static inline void context_clear_entry(struct context_entry *context)
265{
266 context->lo = 0;
267 context->hi = 0;
268}
269
270
271
272
273
274
275
276
277
278
279struct dma_pte {
280 u64 val;
281};
282
283static inline void dma_clear_pte(struct dma_pte *pte)
284{
285 pte->val = 0;
286}
287
288static inline void dma_set_pte_readable(struct dma_pte *pte)
289{
290 pte->val |= DMA_PTE_READ;
291}
292
293static inline void dma_set_pte_writable(struct dma_pte *pte)
294{
295 pte->val |= DMA_PTE_WRITE;
296}
297
298static inline void dma_set_pte_snp(struct dma_pte *pte)
299{
300 pte->val |= DMA_PTE_SNP;
301}
302
303static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
304{
305 pte->val = (pte->val & ~3) | (prot & 3);
306}
307
308static inline u64 dma_pte_addr(struct dma_pte *pte)
309{
310#ifdef CONFIG_64BIT
311 return pte->val & VTD_PAGE_MASK;
312#else
313
314 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
315#endif
316}
317
318static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
319{
320 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
321}
322
323static inline bool dma_pte_present(struct dma_pte *pte)
324{
325 return (pte->val & 3) != 0;
326}
327
328static inline bool dma_pte_superpage(struct dma_pte *pte)
329{
330 return (pte->val & (1 << 7));
331}
332
333static inline int first_pte_in_page(struct dma_pte *pte)
334{
335 return !((unsigned long)pte & ~VTD_PAGE_MASK);
336}
337
338
339
340
341
342
343
344static struct dmar_domain *si_domain;
345static int hw_pass_through = 1;
346
347
348#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
349
350
351
352
353#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
354
355
356#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
357
358
359#ifdef CONFIG_X86
360# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
361#else
362# define IOMMU_UNITS_SUPPORTED 64
363#endif
364
365struct dmar_domain {
366 int id;
367 int nid;
368 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
369
370
371 struct list_head devices;
372 struct iova_domain iovad;
373
374 struct dma_pte *pgd;
375 int gaw;
376
377
378 int agaw;
379
380 int flags;
381
382 int iommu_coherency;
383 int iommu_snooping;
384 int iommu_count;
385 int iommu_superpage;
386
387
388 spinlock_t iommu_lock;
389 u64 max_addr;
390};
391
392
393struct device_domain_info {
394 struct list_head link;
395 struct list_head global;
396 int segment;
397 u8 bus;
398 u8 devfn;
399 struct pci_dev *dev;
400 struct intel_iommu *iommu;
401 struct dmar_domain *domain;
402};
403
404static void flush_unmaps_timeout(unsigned long data);
405
406DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
407
408#define HIGH_WATER_MARK 250
409struct deferred_flush_tables {
410 int next;
411 struct iova *iova[HIGH_WATER_MARK];
412 struct dmar_domain *domain[HIGH_WATER_MARK];
413};
414
415static struct deferred_flush_tables *deferred_flush;
416
417
418static int g_num_of_iommus;
419
420static DEFINE_SPINLOCK(async_umap_flush_lock);
421static LIST_HEAD(unmaps_to_do);
422
423static int timer_on;
424static long list_size;
425
426static void domain_remove_dev_info(struct dmar_domain *domain);
427
428#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
429int dmar_disabled = 0;
430#else
431int dmar_disabled = 1;
432#endif
433
434int intel_iommu_enabled = 0;
435EXPORT_SYMBOL_GPL(intel_iommu_enabled);
436
437static int dmar_map_gfx = 1;
438static int dmar_forcedac;
439static int intel_iommu_strict;
440static int intel_iommu_superpage = 1;
441
442int intel_iommu_gfx_mapped;
443EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
444
445#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
446static DEFINE_SPINLOCK(device_domain_lock);
447static LIST_HEAD(device_domain_list);
448
449static struct iommu_ops intel_iommu_ops;
450
451static int __init intel_iommu_setup(char *str)
452{
453 if (!str)
454 return -EINVAL;
455 while (*str) {
456 if (!strncmp(str, "on", 2)) {
457 dmar_disabled = 0;
458 printk(KERN_INFO "Intel-IOMMU: enabled\n");
459 } else if (!strncmp(str, "off", 3)) {
460 dmar_disabled = 1;
461 printk(KERN_INFO "Intel-IOMMU: disabled\n");
462 } else if (!strncmp(str, "igfx_off", 8)) {
463 dmar_map_gfx = 0;
464 printk(KERN_INFO
465 "Intel-IOMMU: disable GFX device mapping\n");
466 } else if (!strncmp(str, "forcedac", 8)) {
467 printk(KERN_INFO
468 "Intel-IOMMU: Forcing DAC for PCI devices\n");
469 dmar_forcedac = 1;
470 } else if (!strncmp(str, "strict", 6)) {
471 printk(KERN_INFO
472 "Intel-IOMMU: disable batched IOTLB flush\n");
473 intel_iommu_strict = 1;
474 } else if (!strncmp(str, "sp_off", 6)) {
475 printk(KERN_INFO
476 "Intel-IOMMU: disable supported super page\n");
477 intel_iommu_superpage = 0;
478 }
479
480 str += strcspn(str, ",");
481 while (*str == ',')
482 str++;
483 }
484 return 0;
485}
486__setup("intel_iommu=", intel_iommu_setup);
487
488static struct kmem_cache *iommu_domain_cache;
489static struct kmem_cache *iommu_devinfo_cache;
490static struct kmem_cache *iommu_iova_cache;
491
492static inline void *alloc_pgtable_page(int node)
493{
494 struct page *page;
495 void *vaddr = NULL;
496
497 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
498 if (page)
499 vaddr = page_address(page);
500 return vaddr;
501}
502
503static inline void free_pgtable_page(void *vaddr)
504{
505 free_page((unsigned long)vaddr);
506}
507
508static inline void *alloc_domain_mem(void)
509{
510 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
511}
512
513static void free_domain_mem(void *vaddr)
514{
515 kmem_cache_free(iommu_domain_cache, vaddr);
516}
517
518static inline void * alloc_devinfo_mem(void)
519{
520 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
521}
522
523static inline void free_devinfo_mem(void *vaddr)
524{
525 kmem_cache_free(iommu_devinfo_cache, vaddr);
526}
527
528struct iova *alloc_iova_mem(void)
529{
530 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
531}
532
533void free_iova_mem(struct iova *iova)
534{
535 kmem_cache_free(iommu_iova_cache, iova);
536}
537
538
539static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
540{
541 unsigned long sagaw;
542 int agaw = -1;
543
544 sagaw = cap_sagaw(iommu->cap);
545 for (agaw = width_to_agaw(max_gaw);
546 agaw >= 0; agaw--) {
547 if (test_bit(agaw, &sagaw))
548 break;
549 }
550
551 return agaw;
552}
553
554
555
556
557int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
558{
559 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
560}
561
562
563
564
565
566
567int iommu_calculate_agaw(struct intel_iommu *iommu)
568{
569 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
570}
571
572
573static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
574{
575 int iommu_id;
576
577
578 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
579 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
580
581 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
582 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
583 return NULL;
584
585 return g_iommus[iommu_id];
586}
587
588static void domain_update_iommu_coherency(struct dmar_domain *domain)
589{
590 int i;
591
592 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
593
594 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
595
596 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
597 if (!ecap_coherent(g_iommus[i]->ecap)) {
598 domain->iommu_coherency = 0;
599 break;
600 }
601 }
602}
603
604static void domain_update_iommu_snooping(struct dmar_domain *domain)
605{
606 int i;
607
608 domain->iommu_snooping = 1;
609
610 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
611 if (!ecap_sc_support(g_iommus[i]->ecap)) {
612 domain->iommu_snooping = 0;
613 break;
614 }
615 }
616}
617
618static void domain_update_iommu_superpage(struct dmar_domain *domain)
619{
620 struct dmar_drhd_unit *drhd;
621 struct intel_iommu *iommu = NULL;
622 int mask = 0xf;
623
624 if (!intel_iommu_superpage) {
625 domain->iommu_superpage = 0;
626 return;
627 }
628
629
630 for_each_active_iommu(iommu, drhd) {
631 mask &= cap_super_page_val(iommu->cap);
632 if (!mask) {
633 break;
634 }
635 }
636 domain->iommu_superpage = fls(mask);
637}
638
639
640static void domain_update_iommu_cap(struct dmar_domain *domain)
641{
642 domain_update_iommu_coherency(domain);
643 domain_update_iommu_snooping(domain);
644 domain_update_iommu_superpage(domain);
645}
646
647static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
648{
649 struct dmar_drhd_unit *drhd = NULL;
650 int i;
651
652 for_each_drhd_unit(drhd) {
653 if (drhd->ignored)
654 continue;
655 if (segment != drhd->segment)
656 continue;
657
658 for (i = 0; i < drhd->devices_cnt; i++) {
659 if (drhd->devices[i] &&
660 drhd->devices[i]->bus->number == bus &&
661 drhd->devices[i]->devfn == devfn)
662 return drhd->iommu;
663 if (drhd->devices[i] &&
664 drhd->devices[i]->subordinate &&
665 drhd->devices[i]->subordinate->number <= bus &&
666 drhd->devices[i]->subordinate->busn_res.end >= bus)
667 return drhd->iommu;
668 }
669
670 if (drhd->include_all)
671 return drhd->iommu;
672 }
673
674 return NULL;
675}
676
677static void domain_flush_cache(struct dmar_domain *domain,
678 void *addr, int size)
679{
680 if (!domain->iommu_coherency)
681 clflush_cache_range(addr, size);
682}
683
684
685static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
686 u8 bus, u8 devfn)
687{
688 struct root_entry *root;
689 struct context_entry *context;
690 unsigned long phy_addr;
691 unsigned long flags;
692
693 spin_lock_irqsave(&iommu->lock, flags);
694 root = &iommu->root_entry[bus];
695 context = get_context_addr_from_root(root);
696 if (!context) {
697 context = (struct context_entry *)
698 alloc_pgtable_page(iommu->node);
699 if (!context) {
700 spin_unlock_irqrestore(&iommu->lock, flags);
701 return NULL;
702 }
703 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
704 phy_addr = virt_to_phys((void *)context);
705 set_root_value(root, phy_addr);
706 set_root_present(root);
707 __iommu_flush_cache(iommu, root, sizeof(*root));
708 }
709 spin_unlock_irqrestore(&iommu->lock, flags);
710 return &context[devfn];
711}
712
713static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
714{
715 struct root_entry *root;
716 struct context_entry *context;
717 int ret;
718 unsigned long flags;
719
720 spin_lock_irqsave(&iommu->lock, flags);
721 root = &iommu->root_entry[bus];
722 context = get_context_addr_from_root(root);
723 if (!context) {
724 ret = 0;
725 goto out;
726 }
727 ret = context_present(&context[devfn]);
728out:
729 spin_unlock_irqrestore(&iommu->lock, flags);
730 return ret;
731}
732
733static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
734{
735 struct root_entry *root;
736 struct context_entry *context;
737 unsigned long flags;
738
739 spin_lock_irqsave(&iommu->lock, flags);
740 root = &iommu->root_entry[bus];
741 context = get_context_addr_from_root(root);
742 if (context) {
743 context_clear_entry(&context[devfn]);
744 __iommu_flush_cache(iommu, &context[devfn], \
745 sizeof(*context));
746 }
747 spin_unlock_irqrestore(&iommu->lock, flags);
748}
749
750static void free_context_table(struct intel_iommu *iommu)
751{
752 struct root_entry *root;
753 int i;
754 unsigned long flags;
755 struct context_entry *context;
756
757 spin_lock_irqsave(&iommu->lock, flags);
758 if (!iommu->root_entry) {
759 goto out;
760 }
761 for (i = 0; i < ROOT_ENTRY_NR; i++) {
762 root = &iommu->root_entry[i];
763 context = get_context_addr_from_root(root);
764 if (context)
765 free_pgtable_page(context);
766 }
767 free_pgtable_page(iommu->root_entry);
768 iommu->root_entry = NULL;
769out:
770 spin_unlock_irqrestore(&iommu->lock, flags);
771}
772
773static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
774 unsigned long pfn, int target_level)
775{
776 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
777 struct dma_pte *parent, *pte = NULL;
778 int level = agaw_to_level(domain->agaw);
779 int offset;
780
781 BUG_ON(!domain->pgd);
782 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
783 parent = domain->pgd;
784
785 while (level > 0) {
786 void *tmp_page;
787
788 offset = pfn_level_offset(pfn, level);
789 pte = &parent[offset];
790 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
791 break;
792 if (level == target_level)
793 break;
794
795 if (!dma_pte_present(pte)) {
796 uint64_t pteval;
797
798 tmp_page = alloc_pgtable_page(domain->nid);
799
800 if (!tmp_page)
801 return NULL;
802
803 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
804 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
805 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
806
807 free_pgtable_page(tmp_page);
808 } else {
809 dma_pte_addr(pte);
810 domain_flush_cache(domain, pte, sizeof(*pte));
811 }
812 }
813 parent = phys_to_virt(dma_pte_addr(pte));
814 level--;
815 }
816
817 return pte;
818}
819
820
821
822static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
823 unsigned long pfn,
824 int level, int *large_page)
825{
826 struct dma_pte *parent, *pte = NULL;
827 int total = agaw_to_level(domain->agaw);
828 int offset;
829
830 parent = domain->pgd;
831 while (level <= total) {
832 offset = pfn_level_offset(pfn, total);
833 pte = &parent[offset];
834 if (level == total)
835 return pte;
836
837 if (!dma_pte_present(pte)) {
838 *large_page = total;
839 break;
840 }
841
842 if (pte->val & DMA_PTE_LARGE_PAGE) {
843 *large_page = total;
844 return pte;
845 }
846
847 parent = phys_to_virt(dma_pte_addr(pte));
848 total--;
849 }
850 return NULL;
851}
852
853
854static int dma_pte_clear_range(struct dmar_domain *domain,
855 unsigned long start_pfn,
856 unsigned long last_pfn)
857{
858 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
859 unsigned int large_page = 1;
860 struct dma_pte *first_pte, *pte;
861 int order;
862
863 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
864 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
865 BUG_ON(start_pfn > last_pfn);
866
867
868 do {
869 large_page = 1;
870 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
871 if (!pte) {
872 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
873 continue;
874 }
875 do {
876 dma_clear_pte(pte);
877 start_pfn += lvl_to_nr_pages(large_page);
878 pte++;
879 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
880
881 domain_flush_cache(domain, first_pte,
882 (void *)pte - (void *)first_pte);
883
884 } while (start_pfn && start_pfn <= last_pfn);
885
886 order = (large_page - 1) * 9;
887 return order;
888}
889
890
891static void dma_pte_free_pagetable(struct dmar_domain *domain,
892 unsigned long start_pfn,
893 unsigned long last_pfn)
894{
895 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
896 struct dma_pte *first_pte, *pte;
897 int total = agaw_to_level(domain->agaw);
898 int level;
899 unsigned long tmp;
900 int large_page = 2;
901
902 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
903 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
904 BUG_ON(start_pfn > last_pfn);
905
906
907 level = 2;
908 while (level <= total) {
909 tmp = align_to_level(start_pfn, level);
910
911
912 if (tmp + level_size(level) - 1 > last_pfn)
913 return;
914
915 do {
916 large_page = level;
917 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
918 if (large_page > level)
919 level = large_page + 1;
920 if (!pte) {
921 tmp = align_to_level(tmp + 1, level + 1);
922 continue;
923 }
924 do {
925 if (dma_pte_present(pte)) {
926 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
927 dma_clear_pte(pte);
928 }
929 pte++;
930 tmp += level_size(level);
931 } while (!first_pte_in_page(pte) &&
932 tmp + level_size(level) - 1 <= last_pfn);
933
934 domain_flush_cache(domain, first_pte,
935 (void *)pte - (void *)first_pte);
936
937 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
938 level++;
939 }
940
941 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
942 free_pgtable_page(domain->pgd);
943 domain->pgd = NULL;
944 }
945}
946
947
948static int iommu_alloc_root_entry(struct intel_iommu *iommu)
949{
950 struct root_entry *root;
951 unsigned long flags;
952
953 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
954 if (!root)
955 return -ENOMEM;
956
957 __iommu_flush_cache(iommu, root, ROOT_SIZE);
958
959 spin_lock_irqsave(&iommu->lock, flags);
960 iommu->root_entry = root;
961 spin_unlock_irqrestore(&iommu->lock, flags);
962
963 return 0;
964}
965
966static void iommu_set_root_entry(struct intel_iommu *iommu)
967{
968 void *addr;
969 u32 sts;
970 unsigned long flag;
971
972 addr = iommu->root_entry;
973
974 raw_spin_lock_irqsave(&iommu->register_lock, flag);
975 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
976
977 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
978
979
980 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
981 readl, (sts & DMA_GSTS_RTPS), sts);
982
983 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
984}
985
986static void iommu_flush_write_buffer(struct intel_iommu *iommu)
987{
988 u32 val;
989 unsigned long flag;
990
991 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
992 return;
993
994 raw_spin_lock_irqsave(&iommu->register_lock, flag);
995 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
996
997
998 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
999 readl, (!(val & DMA_GSTS_WBFS)), val);
1000
1001 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1002}
1003
1004
1005static void __iommu_flush_context(struct intel_iommu *iommu,
1006 u16 did, u16 source_id, u8 function_mask,
1007 u64 type)
1008{
1009 u64 val = 0;
1010 unsigned long flag;
1011
1012 switch (type) {
1013 case DMA_CCMD_GLOBAL_INVL:
1014 val = DMA_CCMD_GLOBAL_INVL;
1015 break;
1016 case DMA_CCMD_DOMAIN_INVL:
1017 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1018 break;
1019 case DMA_CCMD_DEVICE_INVL:
1020 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1021 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1022 break;
1023 default:
1024 BUG();
1025 }
1026 val |= DMA_CCMD_ICC;
1027
1028 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1029 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1030
1031
1032 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1033 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1034
1035 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1036}
1037
1038
1039static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1040 u64 addr, unsigned int size_order, u64 type)
1041{
1042 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1043 u64 val = 0, val_iva = 0;
1044 unsigned long flag;
1045
1046 switch (type) {
1047 case DMA_TLB_GLOBAL_FLUSH:
1048
1049 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1050 break;
1051 case DMA_TLB_DSI_FLUSH:
1052 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1053 break;
1054 case DMA_TLB_PSI_FLUSH:
1055 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1056
1057 val_iva = size_order | addr;
1058 break;
1059 default:
1060 BUG();
1061 }
1062
1063#if 0
1064
1065
1066
1067
1068 if (cap_read_drain(iommu->cap))
1069 val |= DMA_TLB_READ_DRAIN;
1070#endif
1071 if (cap_write_drain(iommu->cap))
1072 val |= DMA_TLB_WRITE_DRAIN;
1073
1074 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1075
1076 if (val_iva)
1077 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1078 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1079
1080
1081 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1082 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1083
1084 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1085
1086
1087 if (DMA_TLB_IAIG(val) == 0)
1088 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1089 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1090 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1091 (unsigned long long)DMA_TLB_IIRG(type),
1092 (unsigned long long)DMA_TLB_IAIG(val));
1093}
1094
1095static struct device_domain_info *iommu_support_dev_iotlb(
1096 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1097{
1098 int found = 0;
1099 unsigned long flags;
1100 struct device_domain_info *info;
1101 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1102
1103 if (!ecap_dev_iotlb_support(iommu->ecap))
1104 return NULL;
1105
1106 if (!iommu->qi)
1107 return NULL;
1108
1109 spin_lock_irqsave(&device_domain_lock, flags);
1110 list_for_each_entry(info, &domain->devices, link)
1111 if (info->bus == bus && info->devfn == devfn) {
1112 found = 1;
1113 break;
1114 }
1115 spin_unlock_irqrestore(&device_domain_lock, flags);
1116
1117 if (!found || !info->dev)
1118 return NULL;
1119
1120 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1121 return NULL;
1122
1123 if (!dmar_find_matched_atsr_unit(info->dev))
1124 return NULL;
1125
1126 info->iommu = iommu;
1127
1128 return info;
1129}
1130
1131static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1132{
1133 if (!info)
1134 return;
1135
1136 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1137}
1138
1139static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1140{
1141 if (!info->dev || !pci_ats_enabled(info->dev))
1142 return;
1143
1144 pci_disable_ats(info->dev);
1145}
1146
1147static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1148 u64 addr, unsigned mask)
1149{
1150 u16 sid, qdep;
1151 unsigned long flags;
1152 struct device_domain_info *info;
1153
1154 spin_lock_irqsave(&device_domain_lock, flags);
1155 list_for_each_entry(info, &domain->devices, link) {
1156 if (!info->dev || !pci_ats_enabled(info->dev))
1157 continue;
1158
1159 sid = info->bus << 8 | info->devfn;
1160 qdep = pci_ats_queue_depth(info->dev);
1161 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1162 }
1163 spin_unlock_irqrestore(&device_domain_lock, flags);
1164}
1165
1166static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1167 unsigned long pfn, unsigned int pages, int map)
1168{
1169 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1170 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1171
1172 BUG_ON(pages == 0);
1173
1174
1175
1176
1177
1178
1179
1180 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1181 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1182 DMA_TLB_DSI_FLUSH);
1183 else
1184 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1185 DMA_TLB_PSI_FLUSH);
1186
1187
1188
1189
1190
1191 if (!cap_caching_mode(iommu->cap) || !map)
1192 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1193}
1194
1195static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1196{
1197 u32 pmen;
1198 unsigned long flags;
1199
1200 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1201 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1202 pmen &= ~DMA_PMEN_EPM;
1203 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1204
1205
1206 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1207 readl, !(pmen & DMA_PMEN_PRS), pmen);
1208
1209 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1210}
1211
1212static int iommu_enable_translation(struct intel_iommu *iommu)
1213{
1214 u32 sts;
1215 unsigned long flags;
1216
1217 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1218 iommu->gcmd |= DMA_GCMD_TE;
1219 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1220
1221
1222 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1223 readl, (sts & DMA_GSTS_TES), sts);
1224
1225 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1226 return 0;
1227}
1228
1229static int iommu_disable_translation(struct intel_iommu *iommu)
1230{
1231 u32 sts;
1232 unsigned long flag;
1233
1234 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1235 iommu->gcmd &= ~DMA_GCMD_TE;
1236 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1237
1238
1239 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1240 readl, (!(sts & DMA_GSTS_TES)), sts);
1241
1242 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1243 return 0;
1244}
1245
1246
1247static int iommu_init_domains(struct intel_iommu *iommu)
1248{
1249 unsigned long ndomains;
1250 unsigned long nlongs;
1251
1252 ndomains = cap_ndoms(iommu->cap);
1253 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
1254 ndomains);
1255 nlongs = BITS_TO_LONGS(ndomains);
1256
1257 spin_lock_init(&iommu->lock);
1258
1259
1260
1261
1262 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1263 if (!iommu->domain_ids) {
1264 printk(KERN_ERR "Allocating domain id array failed\n");
1265 return -ENOMEM;
1266 }
1267 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1268 GFP_KERNEL);
1269 if (!iommu->domains) {
1270 printk(KERN_ERR "Allocating domain array failed\n");
1271 return -ENOMEM;
1272 }
1273
1274
1275
1276
1277
1278 if (cap_caching_mode(iommu->cap))
1279 set_bit(0, iommu->domain_ids);
1280 return 0;
1281}
1282
1283
1284static void domain_exit(struct dmar_domain *domain);
1285static void vm_domain_exit(struct dmar_domain *domain);
1286
1287void free_dmar_iommu(struct intel_iommu *iommu)
1288{
1289 struct dmar_domain *domain;
1290 int i;
1291 unsigned long flags;
1292
1293 if ((iommu->domains) && (iommu->domain_ids)) {
1294 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1295 domain = iommu->domains[i];
1296 clear_bit(i, iommu->domain_ids);
1297
1298 spin_lock_irqsave(&domain->iommu_lock, flags);
1299 if (--domain->iommu_count == 0) {
1300 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1301 vm_domain_exit(domain);
1302 else
1303 domain_exit(domain);
1304 }
1305 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1306 }
1307 }
1308
1309 if (iommu->gcmd & DMA_GCMD_TE)
1310 iommu_disable_translation(iommu);
1311
1312 if (iommu->irq) {
1313 irq_set_handler_data(iommu->irq, NULL);
1314
1315 free_irq(iommu->irq, iommu);
1316 destroy_irq(iommu->irq);
1317 }
1318
1319 kfree(iommu->domains);
1320 kfree(iommu->domain_ids);
1321
1322 g_iommus[iommu->seq_id] = NULL;
1323
1324
1325 for (i = 0; i < g_num_of_iommus; i++) {
1326 if (g_iommus[i])
1327 break;
1328 }
1329
1330 if (i == g_num_of_iommus)
1331 kfree(g_iommus);
1332
1333
1334 free_context_table(iommu);
1335}
1336
1337static struct dmar_domain *alloc_domain(void)
1338{
1339 struct dmar_domain *domain;
1340
1341 domain = alloc_domain_mem();
1342 if (!domain)
1343 return NULL;
1344
1345 domain->nid = -1;
1346 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1347 domain->flags = 0;
1348
1349 return domain;
1350}
1351
1352static int iommu_attach_domain(struct dmar_domain *domain,
1353 struct intel_iommu *iommu)
1354{
1355 int num;
1356 unsigned long ndomains;
1357 unsigned long flags;
1358
1359 ndomains = cap_ndoms(iommu->cap);
1360
1361 spin_lock_irqsave(&iommu->lock, flags);
1362
1363 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1364 if (num >= ndomains) {
1365 spin_unlock_irqrestore(&iommu->lock, flags);
1366 printk(KERN_ERR "IOMMU: no free domain ids\n");
1367 return -ENOMEM;
1368 }
1369
1370 domain->id = num;
1371 set_bit(num, iommu->domain_ids);
1372 set_bit(iommu->seq_id, domain->iommu_bmp);
1373 iommu->domains[num] = domain;
1374 spin_unlock_irqrestore(&iommu->lock, flags);
1375
1376 return 0;
1377}
1378
1379static void iommu_detach_domain(struct dmar_domain *domain,
1380 struct intel_iommu *iommu)
1381{
1382 unsigned long flags;
1383 int num, ndomains;
1384 int found = 0;
1385
1386 spin_lock_irqsave(&iommu->lock, flags);
1387 ndomains = cap_ndoms(iommu->cap);
1388 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1389 if (iommu->domains[num] == domain) {
1390 found = 1;
1391 break;
1392 }
1393 }
1394
1395 if (found) {
1396 clear_bit(num, iommu->domain_ids);
1397 clear_bit(iommu->seq_id, domain->iommu_bmp);
1398 iommu->domains[num] = NULL;
1399 }
1400 spin_unlock_irqrestore(&iommu->lock, flags);
1401}
1402
1403static struct iova_domain reserved_iova_list;
1404static struct lock_class_key reserved_rbtree_key;
1405
1406static int dmar_init_reserved_ranges(void)
1407{
1408 struct pci_dev *pdev = NULL;
1409 struct iova *iova;
1410 int i;
1411
1412 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1413
1414 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1415 &reserved_rbtree_key);
1416
1417
1418 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1419 IOVA_PFN(IOAPIC_RANGE_END));
1420 if (!iova) {
1421 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1422 return -ENODEV;
1423 }
1424
1425
1426 for_each_pci_dev(pdev) {
1427 struct resource *r;
1428
1429 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1430 r = &pdev->resource[i];
1431 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1432 continue;
1433 iova = reserve_iova(&reserved_iova_list,
1434 IOVA_PFN(r->start),
1435 IOVA_PFN(r->end));
1436 if (!iova) {
1437 printk(KERN_ERR "Reserve iova failed\n");
1438 return -ENODEV;
1439 }
1440 }
1441 }
1442 return 0;
1443}
1444
1445static void domain_reserve_special_ranges(struct dmar_domain *domain)
1446{
1447 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1448}
1449
1450static inline int guestwidth_to_adjustwidth(int gaw)
1451{
1452 int agaw;
1453 int r = (gaw - 12) % 9;
1454
1455 if (r == 0)
1456 agaw = gaw;
1457 else
1458 agaw = gaw + 9 - r;
1459 if (agaw > 64)
1460 agaw = 64;
1461 return agaw;
1462}
1463
1464static int domain_init(struct dmar_domain *domain, int guest_width)
1465{
1466 struct intel_iommu *iommu;
1467 int adjust_width, agaw;
1468 unsigned long sagaw;
1469
1470 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1471 spin_lock_init(&domain->iommu_lock);
1472
1473 domain_reserve_special_ranges(domain);
1474
1475
1476 iommu = domain_get_iommu(domain);
1477 if (guest_width > cap_mgaw(iommu->cap))
1478 guest_width = cap_mgaw(iommu->cap);
1479 domain->gaw = guest_width;
1480 adjust_width = guestwidth_to_adjustwidth(guest_width);
1481 agaw = width_to_agaw(adjust_width);
1482 sagaw = cap_sagaw(iommu->cap);
1483 if (!test_bit(agaw, &sagaw)) {
1484
1485 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1486 agaw = find_next_bit(&sagaw, 5, agaw);
1487 if (agaw >= 5)
1488 return -ENODEV;
1489 }
1490 domain->agaw = agaw;
1491 INIT_LIST_HEAD(&domain->devices);
1492
1493 if (ecap_coherent(iommu->ecap))
1494 domain->iommu_coherency = 1;
1495 else
1496 domain->iommu_coherency = 0;
1497
1498 if (ecap_sc_support(iommu->ecap))
1499 domain->iommu_snooping = 1;
1500 else
1501 domain->iommu_snooping = 0;
1502
1503 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1504 domain->iommu_count = 1;
1505 domain->nid = iommu->node;
1506
1507
1508 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1509 if (!domain->pgd)
1510 return -ENOMEM;
1511 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1512 return 0;
1513}
1514
1515static void domain_exit(struct dmar_domain *domain)
1516{
1517 struct dmar_drhd_unit *drhd;
1518 struct intel_iommu *iommu;
1519
1520
1521 if (!domain)
1522 return;
1523
1524
1525 if (!intel_iommu_strict)
1526 flush_unmaps_timeout(0);
1527
1528 domain_remove_dev_info(domain);
1529
1530 put_iova_domain(&domain->iovad);
1531
1532
1533 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1534
1535
1536 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1537
1538 for_each_active_iommu(iommu, drhd)
1539 if (test_bit(iommu->seq_id, domain->iommu_bmp))
1540 iommu_detach_domain(domain, iommu);
1541
1542 free_domain_mem(domain);
1543}
1544
1545static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1546 u8 bus, u8 devfn, int translation)
1547{
1548 struct context_entry *context;
1549 unsigned long flags;
1550 struct intel_iommu *iommu;
1551 struct dma_pte *pgd;
1552 unsigned long num;
1553 unsigned long ndomains;
1554 int id;
1555 int agaw;
1556 struct device_domain_info *info = NULL;
1557
1558 pr_debug("Set context mapping for %02x:%02x.%d\n",
1559 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1560
1561 BUG_ON(!domain->pgd);
1562 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1563 translation != CONTEXT_TT_MULTI_LEVEL);
1564
1565 iommu = device_to_iommu(segment, bus, devfn);
1566 if (!iommu)
1567 return -ENODEV;
1568
1569 context = device_to_context_entry(iommu, bus, devfn);
1570 if (!context)
1571 return -ENOMEM;
1572 spin_lock_irqsave(&iommu->lock, flags);
1573 if (context_present(context)) {
1574 spin_unlock_irqrestore(&iommu->lock, flags);
1575 return 0;
1576 }
1577
1578 id = domain->id;
1579 pgd = domain->pgd;
1580
1581 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1582 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1583 int found = 0;
1584
1585
1586 ndomains = cap_ndoms(iommu->cap);
1587 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1588 if (iommu->domains[num] == domain) {
1589 id = num;
1590 found = 1;
1591 break;
1592 }
1593 }
1594
1595 if (found == 0) {
1596 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1597 if (num >= ndomains) {
1598 spin_unlock_irqrestore(&iommu->lock, flags);
1599 printk(KERN_ERR "IOMMU: no free domain ids\n");
1600 return -EFAULT;
1601 }
1602
1603 set_bit(num, iommu->domain_ids);
1604 iommu->domains[num] = domain;
1605 id = num;
1606 }
1607
1608
1609
1610
1611
1612 if (translation != CONTEXT_TT_PASS_THROUGH) {
1613 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1614 pgd = phys_to_virt(dma_pte_addr(pgd));
1615 if (!dma_pte_present(pgd)) {
1616 spin_unlock_irqrestore(&iommu->lock, flags);
1617 return -ENOMEM;
1618 }
1619 }
1620 }
1621 }
1622
1623 context_set_domain_id(context, id);
1624
1625 if (translation != CONTEXT_TT_PASS_THROUGH) {
1626 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1627 translation = info ? CONTEXT_TT_DEV_IOTLB :
1628 CONTEXT_TT_MULTI_LEVEL;
1629 }
1630
1631
1632
1633
1634 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1635 context_set_address_width(context, iommu->msagaw);
1636 else {
1637 context_set_address_root(context, virt_to_phys(pgd));
1638 context_set_address_width(context, iommu->agaw);
1639 }
1640
1641 context_set_translation_type(context, translation);
1642 context_set_fault_enable(context);
1643 context_set_present(context);
1644 domain_flush_cache(domain, context, sizeof(*context));
1645
1646
1647
1648
1649
1650
1651
1652 if (cap_caching_mode(iommu->cap)) {
1653 iommu->flush.flush_context(iommu, 0,
1654 (((u16)bus) << 8) | devfn,
1655 DMA_CCMD_MASK_NOBIT,
1656 DMA_CCMD_DEVICE_INVL);
1657 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1658 } else {
1659 iommu_flush_write_buffer(iommu);
1660 }
1661 iommu_enable_dev_iotlb(info);
1662 spin_unlock_irqrestore(&iommu->lock, flags);
1663
1664 spin_lock_irqsave(&domain->iommu_lock, flags);
1665 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1666 domain->iommu_count++;
1667 if (domain->iommu_count == 1)
1668 domain->nid = iommu->node;
1669 domain_update_iommu_cap(domain);
1670 }
1671 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1672 return 0;
1673}
1674
1675static int
1676domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1677 int translation)
1678{
1679 int ret;
1680 struct pci_dev *tmp, *parent;
1681
1682 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1683 pdev->bus->number, pdev->devfn,
1684 translation);
1685 if (ret)
1686 return ret;
1687
1688
1689 tmp = pci_find_upstream_pcie_bridge(pdev);
1690 if (!tmp)
1691 return 0;
1692
1693 parent = pdev->bus->self;
1694 while (parent != tmp) {
1695 ret = domain_context_mapping_one(domain,
1696 pci_domain_nr(parent->bus),
1697 parent->bus->number,
1698 parent->devfn, translation);
1699 if (ret)
1700 return ret;
1701 parent = parent->bus->self;
1702 }
1703 if (pci_is_pcie(tmp))
1704 return domain_context_mapping_one(domain,
1705 pci_domain_nr(tmp->subordinate),
1706 tmp->subordinate->number, 0,
1707 translation);
1708 else
1709 return domain_context_mapping_one(domain,
1710 pci_domain_nr(tmp->bus),
1711 tmp->bus->number,
1712 tmp->devfn,
1713 translation);
1714}
1715
1716static int domain_context_mapped(struct pci_dev *pdev)
1717{
1718 int ret;
1719 struct pci_dev *tmp, *parent;
1720 struct intel_iommu *iommu;
1721
1722 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1723 pdev->devfn);
1724 if (!iommu)
1725 return -ENODEV;
1726
1727 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1728 if (!ret)
1729 return ret;
1730
1731 tmp = pci_find_upstream_pcie_bridge(pdev);
1732 if (!tmp)
1733 return ret;
1734
1735 parent = pdev->bus->self;
1736 while (parent != tmp) {
1737 ret = device_context_mapped(iommu, parent->bus->number,
1738 parent->devfn);
1739 if (!ret)
1740 return ret;
1741 parent = parent->bus->self;
1742 }
1743 if (pci_is_pcie(tmp))
1744 return device_context_mapped(iommu, tmp->subordinate->number,
1745 0);
1746 else
1747 return device_context_mapped(iommu, tmp->bus->number,
1748 tmp->devfn);
1749}
1750
1751
1752static inline unsigned long aligned_nrpages(unsigned long host_addr,
1753 size_t size)
1754{
1755 host_addr &= ~PAGE_MASK;
1756 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1757}
1758
1759
1760static inline int hardware_largepage_caps(struct dmar_domain *domain,
1761 unsigned long iov_pfn,
1762 unsigned long phy_pfn,
1763 unsigned long pages)
1764{
1765 int support, level = 1;
1766 unsigned long pfnmerge;
1767
1768 support = domain->iommu_superpage;
1769
1770
1771
1772
1773
1774 pfnmerge = iov_pfn | phy_pfn;
1775
1776 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1777 pages >>= VTD_STRIDE_SHIFT;
1778 if (!pages)
1779 break;
1780 pfnmerge >>= VTD_STRIDE_SHIFT;
1781 level++;
1782 support--;
1783 }
1784 return level;
1785}
1786
1787static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1788 struct scatterlist *sg, unsigned long phys_pfn,
1789 unsigned long nr_pages, int prot)
1790{
1791 struct dma_pte *first_pte = NULL, *pte = NULL;
1792 phys_addr_t uninitialized_var(pteval);
1793 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1794 unsigned long sg_res;
1795 unsigned int largepage_lvl = 0;
1796 unsigned long lvl_pages = 0;
1797
1798 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1799
1800 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1801 return -EINVAL;
1802
1803 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1804
1805 if (sg)
1806 sg_res = 0;
1807 else {
1808 sg_res = nr_pages + 1;
1809 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1810 }
1811
1812 while (nr_pages > 0) {
1813 uint64_t tmp;
1814
1815 if (!sg_res) {
1816 sg_res = aligned_nrpages(sg->offset, sg->length);
1817 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1818 sg->dma_length = sg->length;
1819 pteval = page_to_phys(sg_page(sg)) | prot;
1820 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1821 }
1822
1823 if (!pte) {
1824 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1825
1826 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1827 if (!pte)
1828 return -ENOMEM;
1829
1830 if (largepage_lvl > 1)
1831 pteval |= DMA_PTE_LARGE_PAGE;
1832 else
1833 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1834
1835 }
1836
1837
1838
1839 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1840 if (tmp) {
1841 static int dumps = 5;
1842 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1843 iov_pfn, tmp, (unsigned long long)pteval);
1844 if (dumps) {
1845 dumps--;
1846 debug_dma_dump_mappings(NULL);
1847 }
1848 WARN_ON(1);
1849 }
1850
1851 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1852
1853 BUG_ON(nr_pages < lvl_pages);
1854 BUG_ON(sg_res < lvl_pages);
1855
1856 nr_pages -= lvl_pages;
1857 iov_pfn += lvl_pages;
1858 phys_pfn += lvl_pages;
1859 pteval += lvl_pages * VTD_PAGE_SIZE;
1860 sg_res -= lvl_pages;
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873 pte++;
1874 if (!nr_pages || first_pte_in_page(pte) ||
1875 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1876 domain_flush_cache(domain, first_pte,
1877 (void *)pte - (void *)first_pte);
1878 pte = NULL;
1879 }
1880
1881 if (!sg_res && nr_pages)
1882 sg = sg_next(sg);
1883 }
1884 return 0;
1885}
1886
1887static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1888 struct scatterlist *sg, unsigned long nr_pages,
1889 int prot)
1890{
1891 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1892}
1893
1894static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1895 unsigned long phys_pfn, unsigned long nr_pages,
1896 int prot)
1897{
1898 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1899}
1900
1901static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1902{
1903 if (!iommu)
1904 return;
1905
1906 clear_context_table(iommu, bus, devfn);
1907 iommu->flush.flush_context(iommu, 0, 0, 0,
1908 DMA_CCMD_GLOBAL_INVL);
1909 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1910}
1911
1912static inline void unlink_domain_info(struct device_domain_info *info)
1913{
1914 assert_spin_locked(&device_domain_lock);
1915 list_del(&info->link);
1916 list_del(&info->global);
1917 if (info->dev)
1918 info->dev->dev.archdata.iommu = NULL;
1919}
1920
1921static void domain_remove_dev_info(struct dmar_domain *domain)
1922{
1923 struct device_domain_info *info;
1924 unsigned long flags;
1925 struct intel_iommu *iommu;
1926
1927 spin_lock_irqsave(&device_domain_lock, flags);
1928 while (!list_empty(&domain->devices)) {
1929 info = list_entry(domain->devices.next,
1930 struct device_domain_info, link);
1931 unlink_domain_info(info);
1932 spin_unlock_irqrestore(&device_domain_lock, flags);
1933
1934 iommu_disable_dev_iotlb(info);
1935 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1936 iommu_detach_dev(iommu, info->bus, info->devfn);
1937 free_devinfo_mem(info);
1938
1939 spin_lock_irqsave(&device_domain_lock, flags);
1940 }
1941 spin_unlock_irqrestore(&device_domain_lock, flags);
1942}
1943
1944
1945
1946
1947
1948static struct dmar_domain *
1949find_domain(struct pci_dev *pdev)
1950{
1951 struct device_domain_info *info;
1952
1953
1954 info = pdev->dev.archdata.iommu;
1955 if (info)
1956 return info->domain;
1957 return NULL;
1958}
1959
1960
1961static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1962{
1963 struct dmar_domain *domain, *found = NULL;
1964 struct intel_iommu *iommu;
1965 struct dmar_drhd_unit *drhd;
1966 struct device_domain_info *info, *tmp;
1967 struct pci_dev *dev_tmp;
1968 unsigned long flags;
1969 int bus = 0, devfn = 0;
1970 int segment;
1971 int ret;
1972
1973 domain = find_domain(pdev);
1974 if (domain)
1975 return domain;
1976
1977 segment = pci_domain_nr(pdev->bus);
1978
1979 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1980 if (dev_tmp) {
1981 if (pci_is_pcie(dev_tmp)) {
1982 bus = dev_tmp->subordinate->number;
1983 devfn = 0;
1984 } else {
1985 bus = dev_tmp->bus->number;
1986 devfn = dev_tmp->devfn;
1987 }
1988 spin_lock_irqsave(&device_domain_lock, flags);
1989 list_for_each_entry(info, &device_domain_list, global) {
1990 if (info->segment == segment &&
1991 info->bus == bus && info->devfn == devfn) {
1992 found = info->domain;
1993 break;
1994 }
1995 }
1996 spin_unlock_irqrestore(&device_domain_lock, flags);
1997
1998 if (found) {
1999 domain = found;
2000 goto found_domain;
2001 }
2002 }
2003
2004 domain = alloc_domain();
2005 if (!domain)
2006 goto error;
2007
2008
2009 drhd = dmar_find_matched_drhd_unit(pdev);
2010 if (!drhd) {
2011 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2012 pci_name(pdev));
2013 free_domain_mem(domain);
2014 return NULL;
2015 }
2016 iommu = drhd->iommu;
2017
2018 ret = iommu_attach_domain(domain, iommu);
2019 if (ret) {
2020 free_domain_mem(domain);
2021 goto error;
2022 }
2023
2024 if (domain_init(domain, gaw)) {
2025 domain_exit(domain);
2026 goto error;
2027 }
2028
2029
2030 if (dev_tmp) {
2031 info = alloc_devinfo_mem();
2032 if (!info) {
2033 domain_exit(domain);
2034 goto error;
2035 }
2036 info->segment = segment;
2037 info->bus = bus;
2038 info->devfn = devfn;
2039 info->dev = NULL;
2040 info->domain = domain;
2041
2042 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2043
2044
2045 found = NULL;
2046 spin_lock_irqsave(&device_domain_lock, flags);
2047 list_for_each_entry(tmp, &device_domain_list, global) {
2048 if (tmp->segment == segment &&
2049 tmp->bus == bus && tmp->devfn == devfn) {
2050 found = tmp->domain;
2051 break;
2052 }
2053 }
2054 if (found) {
2055 spin_unlock_irqrestore(&device_domain_lock, flags);
2056 free_devinfo_mem(info);
2057 domain_exit(domain);
2058 domain = found;
2059 } else {
2060 list_add(&info->link, &domain->devices);
2061 list_add(&info->global, &device_domain_list);
2062 spin_unlock_irqrestore(&device_domain_lock, flags);
2063 }
2064 }
2065
2066found_domain:
2067 info = alloc_devinfo_mem();
2068 if (!info)
2069 goto error;
2070 info->segment = segment;
2071 info->bus = pdev->bus->number;
2072 info->devfn = pdev->devfn;
2073 info->dev = pdev;
2074 info->domain = domain;
2075 spin_lock_irqsave(&device_domain_lock, flags);
2076
2077 found = find_domain(pdev);
2078 if (found != NULL) {
2079 spin_unlock_irqrestore(&device_domain_lock, flags);
2080 if (found != domain) {
2081 domain_exit(domain);
2082 domain = found;
2083 }
2084 free_devinfo_mem(info);
2085 return domain;
2086 }
2087 list_add(&info->link, &domain->devices);
2088 list_add(&info->global, &device_domain_list);
2089 pdev->dev.archdata.iommu = info;
2090 spin_unlock_irqrestore(&device_domain_lock, flags);
2091 return domain;
2092error:
2093
2094 return find_domain(pdev);
2095}
2096
2097static int iommu_identity_mapping;
2098#define IDENTMAP_ALL 1
2099#define IDENTMAP_GFX 2
2100#define IDENTMAP_AZALIA 4
2101
2102static int iommu_domain_identity_map(struct dmar_domain *domain,
2103 unsigned long long start,
2104 unsigned long long end)
2105{
2106 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2107 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2108
2109 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2110 dma_to_mm_pfn(last_vpfn))) {
2111 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2112 return -ENOMEM;
2113 }
2114
2115 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2116 start, end, domain->id);
2117
2118
2119
2120
2121 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2122
2123 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2124 last_vpfn - first_vpfn + 1,
2125 DMA_PTE_READ|DMA_PTE_WRITE);
2126}
2127
2128static int iommu_prepare_identity_map(struct pci_dev *pdev,
2129 unsigned long long start,
2130 unsigned long long end)
2131{
2132 struct dmar_domain *domain;
2133 int ret;
2134
2135 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2136 if (!domain)
2137 return -ENOMEM;
2138
2139
2140
2141
2142
2143 if (domain == si_domain && hw_pass_through) {
2144 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2145 pci_name(pdev), start, end);
2146 return 0;
2147 }
2148
2149 printk(KERN_INFO
2150 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2151 pci_name(pdev), start, end);
2152
2153 if (end < start) {
2154 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2155 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2156 dmi_get_system_info(DMI_BIOS_VENDOR),
2157 dmi_get_system_info(DMI_BIOS_VERSION),
2158 dmi_get_system_info(DMI_PRODUCT_VERSION));
2159 ret = -EIO;
2160 goto error;
2161 }
2162
2163 if (end >> agaw_to_width(domain->agaw)) {
2164 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2165 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2166 agaw_to_width(domain->agaw),
2167 dmi_get_system_info(DMI_BIOS_VENDOR),
2168 dmi_get_system_info(DMI_BIOS_VERSION),
2169 dmi_get_system_info(DMI_PRODUCT_VERSION));
2170 ret = -EIO;
2171 goto error;
2172 }
2173
2174 ret = iommu_domain_identity_map(domain, start, end);
2175 if (ret)
2176 goto error;
2177
2178
2179 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2180 if (ret)
2181 goto error;
2182
2183 return 0;
2184
2185 error:
2186 domain_exit(domain);
2187 return ret;
2188}
2189
2190static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2191 struct pci_dev *pdev)
2192{
2193 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2194 return 0;
2195 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2196 rmrr->end_address);
2197}
2198
2199#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2200static inline void iommu_prepare_isa(void)
2201{
2202 struct pci_dev *pdev;
2203 int ret;
2204
2205 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2206 if (!pdev)
2207 return;
2208
2209 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2210 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2211
2212 if (ret)
2213 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2214 "floppy might not work\n");
2215
2216}
2217#else
2218static inline void iommu_prepare_isa(void)
2219{
2220 return;
2221}
2222#endif
2223
2224static int md_domain_init(struct dmar_domain *domain, int guest_width);
2225
2226static int __init si_domain_init(int hw)
2227{
2228 struct dmar_drhd_unit *drhd;
2229 struct intel_iommu *iommu;
2230 int nid, ret = 0;
2231
2232 si_domain = alloc_domain();
2233 if (!si_domain)
2234 return -EFAULT;
2235
2236 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2237
2238 for_each_active_iommu(iommu, drhd) {
2239 ret = iommu_attach_domain(si_domain, iommu);
2240 if (ret) {
2241 domain_exit(si_domain);
2242 return -EFAULT;
2243 }
2244 }
2245
2246 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2247 domain_exit(si_domain);
2248 return -EFAULT;
2249 }
2250
2251 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2252
2253 if (hw)
2254 return 0;
2255
2256 for_each_online_node(nid) {
2257 unsigned long start_pfn, end_pfn;
2258 int i;
2259
2260 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2261 ret = iommu_domain_identity_map(si_domain,
2262 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2263 if (ret)
2264 return ret;
2265 }
2266 }
2267
2268 return 0;
2269}
2270
2271static void domain_remove_one_dev_info(struct dmar_domain *domain,
2272 struct pci_dev *pdev);
2273static int identity_mapping(struct pci_dev *pdev)
2274{
2275 struct device_domain_info *info;
2276
2277 if (likely(!iommu_identity_mapping))
2278 return 0;
2279
2280 info = pdev->dev.archdata.iommu;
2281 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2282 return (info->domain == si_domain);
2283
2284 return 0;
2285}
2286
2287static int domain_add_dev_info(struct dmar_domain *domain,
2288 struct pci_dev *pdev,
2289 int translation)
2290{
2291 struct device_domain_info *info;
2292 unsigned long flags;
2293 int ret;
2294
2295 info = alloc_devinfo_mem();
2296 if (!info)
2297 return -ENOMEM;
2298
2299 info->segment = pci_domain_nr(pdev->bus);
2300 info->bus = pdev->bus->number;
2301 info->devfn = pdev->devfn;
2302 info->dev = pdev;
2303 info->domain = domain;
2304
2305 spin_lock_irqsave(&device_domain_lock, flags);
2306 list_add(&info->link, &domain->devices);
2307 list_add(&info->global, &device_domain_list);
2308 pdev->dev.archdata.iommu = info;
2309 spin_unlock_irqrestore(&device_domain_lock, flags);
2310
2311 ret = domain_context_mapping(domain, pdev, translation);
2312 if (ret) {
2313 spin_lock_irqsave(&device_domain_lock, flags);
2314 unlink_domain_info(info);
2315 spin_unlock_irqrestore(&device_domain_lock, flags);
2316 free_devinfo_mem(info);
2317 return ret;
2318 }
2319
2320 return 0;
2321}
2322
2323static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2324{
2325 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2326 return 1;
2327
2328 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2329 return 1;
2330
2331 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2332 return 0;
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351 if (!pci_is_pcie(pdev)) {
2352 if (!pci_is_root_bus(pdev->bus))
2353 return 0;
2354 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2355 return 0;
2356 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2357 return 0;
2358
2359
2360
2361
2362
2363
2364 if (!startup) {
2365
2366
2367
2368
2369 u64 dma_mask = pdev->dma_mask;
2370
2371 if (pdev->dev.coherent_dma_mask &&
2372 pdev->dev.coherent_dma_mask < dma_mask)
2373 dma_mask = pdev->dev.coherent_dma_mask;
2374
2375 return dma_mask >= dma_get_required_mask(&pdev->dev);
2376 }
2377
2378 return 1;
2379}
2380
2381static int __init iommu_prepare_static_identity_mapping(int hw)
2382{
2383 struct pci_dev *pdev = NULL;
2384 int ret;
2385
2386 ret = si_domain_init(hw);
2387 if (ret)
2388 return -EFAULT;
2389
2390 for_each_pci_dev(pdev) {
2391 if (iommu_should_identity_map(pdev, 1)) {
2392 ret = domain_add_dev_info(si_domain, pdev,
2393 hw ? CONTEXT_TT_PASS_THROUGH :
2394 CONTEXT_TT_MULTI_LEVEL);
2395 if (ret) {
2396
2397 if (ret == -ENODEV)
2398 continue;
2399 return ret;
2400 }
2401 pr_info("IOMMU: %s identity mapping for device %s\n",
2402 hw ? "hardware" : "software", pci_name(pdev));
2403 }
2404 }
2405
2406 return 0;
2407}
2408
2409static int __init init_dmars(void)
2410{
2411 struct dmar_drhd_unit *drhd;
2412 struct dmar_rmrr_unit *rmrr;
2413 struct pci_dev *pdev;
2414 struct intel_iommu *iommu;
2415 int i, ret;
2416
2417
2418
2419
2420
2421
2422
2423 for_each_drhd_unit(drhd) {
2424
2425
2426
2427
2428
2429 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2430 g_num_of_iommus++;
2431 continue;
2432 }
2433 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2434 IOMMU_UNITS_SUPPORTED);
2435 }
2436
2437 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2438 GFP_KERNEL);
2439 if (!g_iommus) {
2440 printk(KERN_ERR "Allocating global iommu array failed\n");
2441 ret = -ENOMEM;
2442 goto error;
2443 }
2444
2445 deferred_flush = kzalloc(g_num_of_iommus *
2446 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2447 if (!deferred_flush) {
2448 ret = -ENOMEM;
2449 goto error;
2450 }
2451
2452 for_each_drhd_unit(drhd) {
2453 if (drhd->ignored)
2454 continue;
2455
2456 iommu = drhd->iommu;
2457 g_iommus[iommu->seq_id] = iommu;
2458
2459 ret = iommu_init_domains(iommu);
2460 if (ret)
2461 goto error;
2462
2463
2464
2465
2466
2467
2468 ret = iommu_alloc_root_entry(iommu);
2469 if (ret) {
2470 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2471 goto error;
2472 }
2473 if (!ecap_pass_through(iommu->ecap))
2474 hw_pass_through = 0;
2475 }
2476
2477
2478
2479
2480 for_each_drhd_unit(drhd) {
2481 if (drhd->ignored)
2482 continue;
2483
2484 iommu = drhd->iommu;
2485
2486
2487
2488
2489
2490
2491 if (iommu->qi)
2492 continue;
2493
2494
2495
2496
2497 dmar_fault(-1, iommu);
2498
2499
2500
2501
2502 dmar_disable_qi(iommu);
2503 }
2504
2505 for_each_drhd_unit(drhd) {
2506 if (drhd->ignored)
2507 continue;
2508
2509 iommu = drhd->iommu;
2510
2511 if (dmar_enable_qi(iommu)) {
2512
2513
2514
2515
2516 iommu->flush.flush_context = __iommu_flush_context;
2517 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2518 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2519 "invalidation\n",
2520 iommu->seq_id,
2521 (unsigned long long)drhd->reg_base_addr);
2522 } else {
2523 iommu->flush.flush_context = qi_flush_context;
2524 iommu->flush.flush_iotlb = qi_flush_iotlb;
2525 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2526 "invalidation\n",
2527 iommu->seq_id,
2528 (unsigned long long)drhd->reg_base_addr);
2529 }
2530 }
2531
2532 if (iommu_pass_through)
2533 iommu_identity_mapping |= IDENTMAP_ALL;
2534
2535#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2536 iommu_identity_mapping |= IDENTMAP_GFX;
2537#endif
2538
2539 check_tylersburg_isoch();
2540
2541
2542
2543
2544
2545
2546 if (iommu_identity_mapping) {
2547 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2548 if (ret) {
2549 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2550 goto error;
2551 }
2552 }
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2568 for_each_rmrr_units(rmrr) {
2569 for (i = 0; i < rmrr->devices_cnt; i++) {
2570 pdev = rmrr->devices[i];
2571
2572
2573
2574
2575 if (!pdev)
2576 continue;
2577 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2578 if (ret)
2579 printk(KERN_ERR
2580 "IOMMU: mapping reserved region failed\n");
2581 }
2582 }
2583
2584 iommu_prepare_isa();
2585
2586
2587
2588
2589
2590
2591
2592
2593 for_each_drhd_unit(drhd) {
2594 if (drhd->ignored) {
2595
2596
2597
2598
2599 if (force_on)
2600 iommu_disable_protect_mem_regions(drhd->iommu);
2601 continue;
2602 }
2603 iommu = drhd->iommu;
2604
2605 iommu_flush_write_buffer(iommu);
2606
2607 ret = dmar_set_interrupt(iommu);
2608 if (ret)
2609 goto error;
2610
2611 iommu_set_root_entry(iommu);
2612
2613 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2614 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2615
2616 ret = iommu_enable_translation(iommu);
2617 if (ret)
2618 goto error;
2619
2620 iommu_disable_protect_mem_regions(iommu);
2621 }
2622
2623 return 0;
2624error:
2625 for_each_drhd_unit(drhd) {
2626 if (drhd->ignored)
2627 continue;
2628 iommu = drhd->iommu;
2629 free_iommu(iommu);
2630 }
2631 kfree(g_iommus);
2632 return ret;
2633}
2634
2635
2636static struct iova *intel_alloc_iova(struct device *dev,
2637 struct dmar_domain *domain,
2638 unsigned long nrpages, uint64_t dma_mask)
2639{
2640 struct pci_dev *pdev = to_pci_dev(dev);
2641 struct iova *iova = NULL;
2642
2643
2644 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2645
2646 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2647
2648
2649
2650
2651
2652 iova = alloc_iova(&domain->iovad, nrpages,
2653 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2654 if (iova)
2655 return iova;
2656 }
2657 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2658 if (unlikely(!iova)) {
2659 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2660 nrpages, pci_name(pdev));
2661 return NULL;
2662 }
2663
2664 return iova;
2665}
2666
2667static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2668{
2669 struct dmar_domain *domain;
2670 int ret;
2671
2672 domain = get_domain_for_dev(pdev,
2673 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2674 if (!domain) {
2675 printk(KERN_ERR
2676 "Allocating domain for %s failed", pci_name(pdev));
2677 return NULL;
2678 }
2679
2680
2681 if (unlikely(!domain_context_mapped(pdev))) {
2682 ret = domain_context_mapping(domain, pdev,
2683 CONTEXT_TT_MULTI_LEVEL);
2684 if (ret) {
2685 printk(KERN_ERR
2686 "Domain context map for %s failed",
2687 pci_name(pdev));
2688 return NULL;
2689 }
2690 }
2691
2692 return domain;
2693}
2694
2695static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2696{
2697 struct device_domain_info *info;
2698
2699
2700 info = dev->dev.archdata.iommu;
2701 if (likely(info))
2702 return info->domain;
2703
2704 return __get_valid_domain_for_dev(dev);
2705}
2706
2707static int iommu_dummy(struct pci_dev *pdev)
2708{
2709 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2710}
2711
2712
2713static int iommu_no_mapping(struct device *dev)
2714{
2715 struct pci_dev *pdev;
2716 int found;
2717
2718 if (unlikely(dev->bus != &pci_bus_type))
2719 return 1;
2720
2721 pdev = to_pci_dev(dev);
2722 if (iommu_dummy(pdev))
2723 return 1;
2724
2725 if (!iommu_identity_mapping)
2726 return 0;
2727
2728 found = identity_mapping(pdev);
2729 if (found) {
2730 if (iommu_should_identity_map(pdev, 0))
2731 return 1;
2732 else {
2733
2734
2735
2736
2737 domain_remove_one_dev_info(si_domain, pdev);
2738 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2739 pci_name(pdev));
2740 return 0;
2741 }
2742 } else {
2743
2744
2745
2746
2747 if (iommu_should_identity_map(pdev, 0)) {
2748 int ret;
2749 ret = domain_add_dev_info(si_domain, pdev,
2750 hw_pass_through ?
2751 CONTEXT_TT_PASS_THROUGH :
2752 CONTEXT_TT_MULTI_LEVEL);
2753 if (!ret) {
2754 printk(KERN_INFO "64bit %s uses identity mapping\n",
2755 pci_name(pdev));
2756 return 1;
2757 }
2758 }
2759 }
2760
2761 return 0;
2762}
2763
2764static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2765 size_t size, int dir, u64 dma_mask)
2766{
2767 struct pci_dev *pdev = to_pci_dev(hwdev);
2768 struct dmar_domain *domain;
2769 phys_addr_t start_paddr;
2770 struct iova *iova;
2771 int prot = 0;
2772 int ret;
2773 struct intel_iommu *iommu;
2774 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2775
2776 BUG_ON(dir == DMA_NONE);
2777
2778 if (iommu_no_mapping(hwdev))
2779 return paddr;
2780
2781 domain = get_valid_domain_for_dev(pdev);
2782 if (!domain)
2783 return 0;
2784
2785 iommu = domain_get_iommu(domain);
2786 size = aligned_nrpages(paddr, size);
2787
2788 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2789 if (!iova)
2790 goto error;
2791
2792
2793
2794
2795
2796 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2797 !cap_zlr(iommu->cap))
2798 prot |= DMA_PTE_READ;
2799 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2800 prot |= DMA_PTE_WRITE;
2801
2802
2803
2804
2805
2806
2807 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2808 mm_to_dma_pfn(paddr_pfn), size, prot);
2809 if (ret)
2810 goto error;
2811
2812
2813 if (cap_caching_mode(iommu->cap))
2814 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2815 else
2816 iommu_flush_write_buffer(iommu);
2817
2818 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2819 start_paddr += paddr & ~PAGE_MASK;
2820 return start_paddr;
2821
2822error:
2823 if (iova)
2824 __free_iova(&domain->iovad, iova);
2825 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2826 pci_name(pdev), size, (unsigned long long)paddr, dir);
2827 return 0;
2828}
2829
2830static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2831 unsigned long offset, size_t size,
2832 enum dma_data_direction dir,
2833 struct dma_attrs *attrs)
2834{
2835 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2836 dir, to_pci_dev(dev)->dma_mask);
2837}
2838
2839static void flush_unmaps(void)
2840{
2841 int i, j;
2842
2843 timer_on = 0;
2844
2845
2846 for (i = 0; i < g_num_of_iommus; i++) {
2847 struct intel_iommu *iommu = g_iommus[i];
2848 if (!iommu)
2849 continue;
2850
2851 if (!deferred_flush[i].next)
2852 continue;
2853
2854
2855 if (!cap_caching_mode(iommu->cap))
2856 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2857 DMA_TLB_GLOBAL_FLUSH);
2858 for (j = 0; j < deferred_flush[i].next; j++) {
2859 unsigned long mask;
2860 struct iova *iova = deferred_flush[i].iova[j];
2861 struct dmar_domain *domain = deferred_flush[i].domain[j];
2862
2863
2864 if (cap_caching_mode(iommu->cap))
2865 iommu_flush_iotlb_psi(iommu, domain->id,
2866 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2867 else {
2868 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2869 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2870 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2871 }
2872 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2873 }
2874 deferred_flush[i].next = 0;
2875 }
2876
2877 list_size = 0;
2878}
2879
2880static void flush_unmaps_timeout(unsigned long data)
2881{
2882 unsigned long flags;
2883
2884 spin_lock_irqsave(&async_umap_flush_lock, flags);
2885 flush_unmaps();
2886 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2887}
2888
2889static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2890{
2891 unsigned long flags;
2892 int next, iommu_id;
2893 struct intel_iommu *iommu;
2894
2895 spin_lock_irqsave(&async_umap_flush_lock, flags);
2896 if (list_size == HIGH_WATER_MARK)
2897 flush_unmaps();
2898
2899 iommu = domain_get_iommu(dom);
2900 iommu_id = iommu->seq_id;
2901
2902 next = deferred_flush[iommu_id].next;
2903 deferred_flush[iommu_id].domain[next] = dom;
2904 deferred_flush[iommu_id].iova[next] = iova;
2905 deferred_flush[iommu_id].next++;
2906
2907 if (!timer_on) {
2908 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2909 timer_on = 1;
2910 }
2911 list_size++;
2912 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2913}
2914
2915static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2916 size_t size, enum dma_data_direction dir,
2917 struct dma_attrs *attrs)
2918{
2919 struct pci_dev *pdev = to_pci_dev(dev);
2920 struct dmar_domain *domain;
2921 unsigned long start_pfn, last_pfn;
2922 struct iova *iova;
2923 struct intel_iommu *iommu;
2924
2925 if (iommu_no_mapping(dev))
2926 return;
2927
2928 domain = find_domain(pdev);
2929 BUG_ON(!domain);
2930
2931 iommu = domain_get_iommu(domain);
2932
2933 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2934 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2935 (unsigned long long)dev_addr))
2936 return;
2937
2938 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2939 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2940
2941 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2942 pci_name(pdev), start_pfn, last_pfn);
2943
2944
2945 dma_pte_clear_range(domain, start_pfn, last_pfn);
2946
2947
2948 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2949
2950 if (intel_iommu_strict) {
2951 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2952 last_pfn - start_pfn + 1, 0);
2953
2954 __free_iova(&domain->iovad, iova);
2955 } else {
2956 add_unmap(domain, iova);
2957
2958
2959
2960
2961 }
2962}
2963
2964static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2965 dma_addr_t *dma_handle, gfp_t flags,
2966 struct dma_attrs *attrs)
2967{
2968 void *vaddr;
2969 int order;
2970
2971 size = PAGE_ALIGN(size);
2972 order = get_order(size);
2973
2974 if (!iommu_no_mapping(hwdev))
2975 flags &= ~(GFP_DMA | GFP_DMA32);
2976 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2977 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2978 flags |= GFP_DMA;
2979 else
2980 flags |= GFP_DMA32;
2981 }
2982
2983 vaddr = (void *)__get_free_pages(flags, order);
2984 if (!vaddr)
2985 return NULL;
2986 memset(vaddr, 0, size);
2987
2988 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2989 DMA_BIDIRECTIONAL,
2990 hwdev->coherent_dma_mask);
2991 if (*dma_handle)
2992 return vaddr;
2993 free_pages((unsigned long)vaddr, order);
2994 return NULL;
2995}
2996
2997static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2998 dma_addr_t dma_handle, struct dma_attrs *attrs)
2999{
3000 int order;
3001
3002 size = PAGE_ALIGN(size);
3003 order = get_order(size);
3004
3005 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3006 free_pages((unsigned long)vaddr, order);
3007}
3008
3009static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3010 int nelems, enum dma_data_direction dir,
3011 struct dma_attrs *attrs)
3012{
3013 struct pci_dev *pdev = to_pci_dev(hwdev);
3014 struct dmar_domain *domain;
3015 unsigned long start_pfn, last_pfn;
3016 struct iova *iova;
3017 struct intel_iommu *iommu;
3018
3019 if (iommu_no_mapping(hwdev))
3020 return;
3021
3022 domain = find_domain(pdev);
3023 BUG_ON(!domain);
3024
3025 iommu = domain_get_iommu(domain);
3026
3027 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3028 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3029 (unsigned long long)sglist[0].dma_address))
3030 return;
3031
3032 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3033 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3034
3035
3036 dma_pte_clear_range(domain, start_pfn, last_pfn);
3037
3038
3039 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3040
3041 if (intel_iommu_strict) {
3042 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3043 last_pfn - start_pfn + 1, 0);
3044
3045 __free_iova(&domain->iovad, iova);
3046 } else {
3047 add_unmap(domain, iova);
3048
3049
3050
3051
3052 }
3053}
3054
3055static int intel_nontranslate_map_sg(struct device *hddev,
3056 struct scatterlist *sglist, int nelems, int dir)
3057{
3058 int i;
3059 struct scatterlist *sg;
3060
3061 for_each_sg(sglist, sg, nelems, i) {
3062 BUG_ON(!sg_page(sg));
3063 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3064 sg->dma_length = sg->length;
3065 }
3066 return nelems;
3067}
3068
3069static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3070 enum dma_data_direction dir, struct dma_attrs *attrs)
3071{
3072 int i;
3073 struct pci_dev *pdev = to_pci_dev(hwdev);
3074 struct dmar_domain *domain;
3075 size_t size = 0;
3076 int prot = 0;
3077 struct iova *iova = NULL;
3078 int ret;
3079 struct scatterlist *sg;
3080 unsigned long start_vpfn;
3081 struct intel_iommu *iommu;
3082
3083 BUG_ON(dir == DMA_NONE);
3084 if (iommu_no_mapping(hwdev))
3085 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3086
3087 domain = get_valid_domain_for_dev(pdev);
3088 if (!domain)
3089 return 0;
3090
3091 iommu = domain_get_iommu(domain);
3092
3093 for_each_sg(sglist, sg, nelems, i)
3094 size += aligned_nrpages(sg->offset, sg->length);
3095
3096 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3097 pdev->dma_mask);
3098 if (!iova) {
3099 sglist->dma_length = 0;
3100 return 0;
3101 }
3102
3103
3104
3105
3106
3107 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3108 !cap_zlr(iommu->cap))
3109 prot |= DMA_PTE_READ;
3110 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3111 prot |= DMA_PTE_WRITE;
3112
3113 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3114
3115 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3116 if (unlikely(ret)) {
3117
3118 dma_pte_clear_range(domain, start_vpfn,
3119 start_vpfn + size - 1);
3120
3121 dma_pte_free_pagetable(domain, start_vpfn,
3122 start_vpfn + size - 1);
3123
3124 __free_iova(&domain->iovad, iova);
3125 return 0;
3126 }
3127
3128
3129 if (cap_caching_mode(iommu->cap))
3130 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3131 else
3132 iommu_flush_write_buffer(iommu);
3133
3134 return nelems;
3135}
3136
3137static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3138{
3139 return !dma_addr;
3140}
3141
3142struct dma_map_ops intel_dma_ops = {
3143 .alloc = intel_alloc_coherent,
3144 .free = intel_free_coherent,
3145 .map_sg = intel_map_sg,
3146 .unmap_sg = intel_unmap_sg,
3147 .map_page = intel_map_page,
3148 .unmap_page = intel_unmap_page,
3149 .mapping_error = intel_mapping_error,
3150};
3151
3152static inline int iommu_domain_cache_init(void)
3153{
3154 int ret = 0;
3155
3156 iommu_domain_cache = kmem_cache_create("iommu_domain",
3157 sizeof(struct dmar_domain),
3158 0,
3159 SLAB_HWCACHE_ALIGN,
3160
3161 NULL);
3162 if (!iommu_domain_cache) {
3163 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3164 ret = -ENOMEM;
3165 }
3166
3167 return ret;
3168}
3169
3170static inline int iommu_devinfo_cache_init(void)
3171{
3172 int ret = 0;
3173
3174 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3175 sizeof(struct device_domain_info),
3176 0,
3177 SLAB_HWCACHE_ALIGN,
3178 NULL);
3179 if (!iommu_devinfo_cache) {
3180 printk(KERN_ERR "Couldn't create devinfo cache\n");
3181 ret = -ENOMEM;
3182 }
3183
3184 return ret;
3185}
3186
3187static inline int iommu_iova_cache_init(void)
3188{
3189 int ret = 0;
3190
3191 iommu_iova_cache = kmem_cache_create("iommu_iova",
3192 sizeof(struct iova),
3193 0,
3194 SLAB_HWCACHE_ALIGN,
3195 NULL);
3196 if (!iommu_iova_cache) {
3197 printk(KERN_ERR "Couldn't create iova cache\n");
3198 ret = -ENOMEM;
3199 }
3200
3201 return ret;
3202}
3203
3204static int __init iommu_init_mempool(void)
3205{
3206 int ret;
3207 ret = iommu_iova_cache_init();
3208 if (ret)
3209 return ret;
3210
3211 ret = iommu_domain_cache_init();
3212 if (ret)
3213 goto domain_error;
3214
3215 ret = iommu_devinfo_cache_init();
3216 if (!ret)
3217 return ret;
3218
3219 kmem_cache_destroy(iommu_domain_cache);
3220domain_error:
3221 kmem_cache_destroy(iommu_iova_cache);
3222
3223 return -ENOMEM;
3224}
3225
3226static void __init iommu_exit_mempool(void)
3227{
3228 kmem_cache_destroy(iommu_devinfo_cache);
3229 kmem_cache_destroy(iommu_domain_cache);
3230 kmem_cache_destroy(iommu_iova_cache);
3231
3232}
3233
3234static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3235{
3236 struct dmar_drhd_unit *drhd;
3237 u32 vtbar;
3238 int rc;
3239
3240
3241
3242
3243
3244
3245 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3246 if (rc) {
3247
3248 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3249 return;
3250 }
3251 vtbar &= 0xffff0000;
3252
3253
3254 drhd = dmar_find_matched_drhd_unit(pdev);
3255 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3256 TAINT_FIRMWARE_WORKAROUND,
3257 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3258 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3259}
3260DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3261
3262static void __init init_no_remapping_devices(void)
3263{
3264 struct dmar_drhd_unit *drhd;
3265
3266 for_each_drhd_unit(drhd) {
3267 if (!drhd->include_all) {
3268 int i;
3269 for (i = 0; i < drhd->devices_cnt; i++)
3270 if (drhd->devices[i] != NULL)
3271 break;
3272
3273 if (i == drhd->devices_cnt)
3274 drhd->ignored = 1;
3275 }
3276 }
3277
3278 for_each_drhd_unit(drhd) {
3279 int i;
3280 if (drhd->ignored || drhd->include_all)
3281 continue;
3282
3283 for (i = 0; i < drhd->devices_cnt; i++)
3284 if (drhd->devices[i] &&
3285 !IS_GFX_DEVICE(drhd->devices[i]))
3286 break;
3287
3288 if (i < drhd->devices_cnt)
3289 continue;
3290
3291
3292
3293 if (dmar_map_gfx) {
3294 intel_iommu_gfx_mapped = 1;
3295 } else {
3296 drhd->ignored = 1;
3297 for (i = 0; i < drhd->devices_cnt; i++) {
3298 if (!drhd->devices[i])
3299 continue;
3300 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3301 }
3302 }
3303 }
3304}
3305
3306#ifdef CONFIG_SUSPEND
3307static int init_iommu_hw(void)
3308{
3309 struct dmar_drhd_unit *drhd;
3310 struct intel_iommu *iommu = NULL;
3311
3312 for_each_active_iommu(iommu, drhd)
3313 if (iommu->qi)
3314 dmar_reenable_qi(iommu);
3315
3316 for_each_iommu(iommu, drhd) {
3317 if (drhd->ignored) {
3318
3319
3320
3321
3322 if (force_on)
3323 iommu_disable_protect_mem_regions(iommu);
3324 continue;
3325 }
3326
3327 iommu_flush_write_buffer(iommu);
3328
3329 iommu_set_root_entry(iommu);
3330
3331 iommu->flush.flush_context(iommu, 0, 0, 0,
3332 DMA_CCMD_GLOBAL_INVL);
3333 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3334 DMA_TLB_GLOBAL_FLUSH);
3335 if (iommu_enable_translation(iommu))
3336 return 1;
3337 iommu_disable_protect_mem_regions(iommu);
3338 }
3339
3340 return 0;
3341}
3342
3343static void iommu_flush_all(void)
3344{
3345 struct dmar_drhd_unit *drhd;
3346 struct intel_iommu *iommu;
3347
3348 for_each_active_iommu(iommu, drhd) {
3349 iommu->flush.flush_context(iommu, 0, 0, 0,
3350 DMA_CCMD_GLOBAL_INVL);
3351 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3352 DMA_TLB_GLOBAL_FLUSH);
3353 }
3354}
3355
3356static int iommu_suspend(void)
3357{
3358 struct dmar_drhd_unit *drhd;
3359 struct intel_iommu *iommu = NULL;
3360 unsigned long flag;
3361
3362 for_each_active_iommu(iommu, drhd) {
3363 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3364 GFP_ATOMIC);
3365 if (!iommu->iommu_state)
3366 goto nomem;
3367 }
3368
3369 iommu_flush_all();
3370
3371 for_each_active_iommu(iommu, drhd) {
3372 iommu_disable_translation(iommu);
3373
3374 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3375
3376 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3377 readl(iommu->reg + DMAR_FECTL_REG);
3378 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3379 readl(iommu->reg + DMAR_FEDATA_REG);
3380 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3381 readl(iommu->reg + DMAR_FEADDR_REG);
3382 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3383 readl(iommu->reg + DMAR_FEUADDR_REG);
3384
3385 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3386 }
3387 return 0;
3388
3389nomem:
3390 for_each_active_iommu(iommu, drhd)
3391 kfree(iommu->iommu_state);
3392
3393 return -ENOMEM;
3394}
3395
3396static void iommu_resume(void)
3397{
3398 struct dmar_drhd_unit *drhd;
3399 struct intel_iommu *iommu = NULL;
3400 unsigned long flag;
3401
3402 if (init_iommu_hw()) {
3403 if (force_on)
3404 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3405 else
3406 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3407 return;
3408 }
3409
3410 for_each_active_iommu(iommu, drhd) {
3411
3412 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3413
3414 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3415 iommu->reg + DMAR_FECTL_REG);
3416 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3417 iommu->reg + DMAR_FEDATA_REG);
3418 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3419 iommu->reg + DMAR_FEADDR_REG);
3420 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3421 iommu->reg + DMAR_FEUADDR_REG);
3422
3423 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3424 }
3425
3426 for_each_active_iommu(iommu, drhd)
3427 kfree(iommu->iommu_state);
3428}
3429
3430static struct syscore_ops iommu_syscore_ops = {
3431 .resume = iommu_resume,
3432 .suspend = iommu_suspend,
3433};
3434
3435static void __init init_iommu_pm_ops(void)
3436{
3437 register_syscore_ops(&iommu_syscore_ops);
3438}
3439
3440#else
3441static inline void init_iommu_pm_ops(void) {}
3442#endif
3443
3444LIST_HEAD(dmar_rmrr_units);
3445
3446static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3447{
3448 list_add(&rmrr->list, &dmar_rmrr_units);
3449}
3450
3451
3452int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3453{
3454 struct acpi_dmar_reserved_memory *rmrr;
3455 struct dmar_rmrr_unit *rmrru;
3456
3457 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3458 if (!rmrru)
3459 return -ENOMEM;
3460
3461 rmrru->hdr = header;
3462 rmrr = (struct acpi_dmar_reserved_memory *)header;
3463 rmrru->base_address = rmrr->base_address;
3464 rmrru->end_address = rmrr->end_address;
3465
3466 dmar_register_rmrr_unit(rmrru);
3467 return 0;
3468}
3469
3470static int __init
3471rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3472{
3473 struct acpi_dmar_reserved_memory *rmrr;
3474 int ret;
3475
3476 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3477 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3478 ((void *)rmrr) + rmrr->header.length,
3479 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3480
3481 if (ret || (rmrru->devices_cnt == 0)) {
3482 list_del(&rmrru->list);
3483 kfree(rmrru);
3484 }
3485 return ret;
3486}
3487
3488static LIST_HEAD(dmar_atsr_units);
3489
3490int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3491{
3492 struct acpi_dmar_atsr *atsr;
3493 struct dmar_atsr_unit *atsru;
3494
3495 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3496 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3497 if (!atsru)
3498 return -ENOMEM;
3499
3500 atsru->hdr = hdr;
3501 atsru->include_all = atsr->flags & 0x1;
3502
3503 list_add(&atsru->list, &dmar_atsr_units);
3504
3505 return 0;
3506}
3507
3508static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3509{
3510 int rc;
3511 struct acpi_dmar_atsr *atsr;
3512
3513 if (atsru->include_all)
3514 return 0;
3515
3516 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3517 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3518 (void *)atsr + atsr->header.length,
3519 &atsru->devices_cnt, &atsru->devices,
3520 atsr->segment);
3521 if (rc || !atsru->devices_cnt) {
3522 list_del(&atsru->list);
3523 kfree(atsru);
3524 }
3525
3526 return rc;
3527}
3528
3529int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3530{
3531 int i;
3532 struct pci_bus *bus;
3533 struct acpi_dmar_atsr *atsr;
3534 struct dmar_atsr_unit *atsru;
3535
3536 dev = pci_physfn(dev);
3537
3538 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3539 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3540 if (atsr->segment == pci_domain_nr(dev->bus))
3541 goto found;
3542 }
3543
3544 return 0;
3545
3546found:
3547 for (bus = dev->bus; bus; bus = bus->parent) {
3548 struct pci_dev *bridge = bus->self;
3549
3550 if (!bridge || !pci_is_pcie(bridge) ||
3551 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3552 return 0;
3553
3554 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
3555 for (i = 0; i < atsru->devices_cnt; i++)
3556 if (atsru->devices[i] == bridge)
3557 return 1;
3558 break;
3559 }
3560 }
3561
3562 if (atsru->include_all)
3563 return 1;
3564
3565 return 0;
3566}
3567
3568int __init dmar_parse_rmrr_atsr_dev(void)
3569{
3570 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3571 struct dmar_atsr_unit *atsr, *atsr_n;
3572 int ret = 0;
3573
3574 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3575 ret = rmrr_parse_dev(rmrr);
3576 if (ret)
3577 return ret;
3578 }
3579
3580 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3581 ret = atsr_parse_dev(atsr);
3582 if (ret)
3583 return ret;
3584 }
3585
3586 return ret;
3587}
3588
3589
3590
3591
3592
3593
3594
3595static int device_notifier(struct notifier_block *nb,
3596 unsigned long action, void *data)
3597{
3598 struct device *dev = data;
3599 struct pci_dev *pdev = to_pci_dev(dev);
3600 struct dmar_domain *domain;
3601
3602 if (iommu_no_mapping(dev))
3603 return 0;
3604
3605 domain = find_domain(pdev);
3606 if (!domain)
3607 return 0;
3608
3609 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3610 domain_remove_one_dev_info(domain, pdev);
3611
3612 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3613 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3614 list_empty(&domain->devices))
3615 domain_exit(domain);
3616 }
3617
3618 return 0;
3619}
3620
3621static struct notifier_block device_nb = {
3622 .notifier_call = device_notifier,
3623};
3624
3625int __init intel_iommu_init(void)
3626{
3627 int ret = 0;
3628
3629
3630 force_on = tboot_force_iommu();
3631
3632 if (dmar_table_init()) {
3633 if (force_on)
3634 panic("tboot: Failed to initialize DMAR table\n");
3635 return -ENODEV;
3636 }
3637
3638 if (dmar_dev_scope_init() < 0) {
3639 if (force_on)
3640 panic("tboot: Failed to initialize DMAR device scope\n");
3641 return -ENODEV;
3642 }
3643
3644 if (no_iommu || dmar_disabled)
3645 return -ENODEV;
3646
3647 if (iommu_init_mempool()) {
3648 if (force_on)
3649 panic("tboot: Failed to initialize iommu memory\n");
3650 return -ENODEV;
3651 }
3652
3653 if (list_empty(&dmar_rmrr_units))
3654 printk(KERN_INFO "DMAR: No RMRR found\n");
3655
3656 if (list_empty(&dmar_atsr_units))
3657 printk(KERN_INFO "DMAR: No ATSR found\n");
3658
3659 if (dmar_init_reserved_ranges()) {
3660 if (force_on)
3661 panic("tboot: Failed to reserve iommu ranges\n");
3662 return -ENODEV;
3663 }
3664
3665 init_no_remapping_devices();
3666
3667 ret = init_dmars();
3668 if (ret) {
3669 if (force_on)
3670 panic("tboot: Failed to initialize DMARs\n");
3671 printk(KERN_ERR "IOMMU: dmar init failed\n");
3672 put_iova_domain(&reserved_iova_list);
3673 iommu_exit_mempool();
3674 return ret;
3675 }
3676 printk(KERN_INFO
3677 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3678
3679 init_timer(&unmap_timer);
3680#ifdef CONFIG_SWIOTLB
3681 swiotlb = 0;
3682#endif
3683 dma_ops = &intel_dma_ops;
3684
3685 init_iommu_pm_ops();
3686
3687 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3688
3689 bus_register_notifier(&pci_bus_type, &device_nb);
3690
3691 intel_iommu_enabled = 1;
3692
3693 return 0;
3694}
3695
3696static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3697 struct pci_dev *pdev)
3698{
3699 struct pci_dev *tmp, *parent;
3700
3701 if (!iommu || !pdev)
3702 return;
3703
3704
3705 tmp = pci_find_upstream_pcie_bridge(pdev);
3706
3707 if (tmp) {
3708 parent = pdev->bus->self;
3709 while (parent != tmp) {
3710 iommu_detach_dev(iommu, parent->bus->number,
3711 parent->devfn);
3712 parent = parent->bus->self;
3713 }
3714 if (pci_is_pcie(tmp))
3715 iommu_detach_dev(iommu,
3716 tmp->subordinate->number, 0);
3717 else
3718 iommu_detach_dev(iommu, tmp->bus->number,
3719 tmp->devfn);
3720 }
3721}
3722
3723static void domain_remove_one_dev_info(struct dmar_domain *domain,
3724 struct pci_dev *pdev)
3725{
3726 struct device_domain_info *info;
3727 struct intel_iommu *iommu;
3728 unsigned long flags;
3729 int found = 0;
3730 struct list_head *entry, *tmp;
3731
3732 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3733 pdev->devfn);
3734 if (!iommu)
3735 return;
3736
3737 spin_lock_irqsave(&device_domain_lock, flags);
3738 list_for_each_safe(entry, tmp, &domain->devices) {
3739 info = list_entry(entry, struct device_domain_info, link);
3740 if (info->segment == pci_domain_nr(pdev->bus) &&
3741 info->bus == pdev->bus->number &&
3742 info->devfn == pdev->devfn) {
3743 unlink_domain_info(info);
3744 spin_unlock_irqrestore(&device_domain_lock, flags);
3745
3746 iommu_disable_dev_iotlb(info);
3747 iommu_detach_dev(iommu, info->bus, info->devfn);
3748 iommu_detach_dependent_devices(iommu, pdev);
3749 free_devinfo_mem(info);
3750
3751 spin_lock_irqsave(&device_domain_lock, flags);
3752
3753 if (found)
3754 break;
3755 else
3756 continue;
3757 }
3758
3759
3760
3761
3762
3763 if (iommu == device_to_iommu(info->segment, info->bus,
3764 info->devfn))
3765 found = 1;
3766 }
3767
3768 spin_unlock_irqrestore(&device_domain_lock, flags);
3769
3770 if (found == 0) {
3771 unsigned long tmp_flags;
3772 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3773 clear_bit(iommu->seq_id, domain->iommu_bmp);
3774 domain->iommu_count--;
3775 domain_update_iommu_cap(domain);
3776 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3777
3778 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3779 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3780 spin_lock_irqsave(&iommu->lock, tmp_flags);
3781 clear_bit(domain->id, iommu->domain_ids);
3782 iommu->domains[domain->id] = NULL;
3783 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3784 }
3785 }
3786}
3787
3788static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3789{
3790 struct device_domain_info *info;
3791 struct intel_iommu *iommu;
3792 unsigned long flags1, flags2;
3793
3794 spin_lock_irqsave(&device_domain_lock, flags1);
3795 while (!list_empty(&domain->devices)) {
3796 info = list_entry(domain->devices.next,
3797 struct device_domain_info, link);
3798 unlink_domain_info(info);
3799 spin_unlock_irqrestore(&device_domain_lock, flags1);
3800
3801 iommu_disable_dev_iotlb(info);
3802 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3803 iommu_detach_dev(iommu, info->bus, info->devfn);
3804 iommu_detach_dependent_devices(iommu, info->dev);
3805
3806
3807
3808
3809 spin_lock_irqsave(&domain->iommu_lock, flags2);
3810 if (test_and_clear_bit(iommu->seq_id,
3811 domain->iommu_bmp)) {
3812 domain->iommu_count--;
3813 domain_update_iommu_cap(domain);
3814 }
3815 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3816
3817 free_devinfo_mem(info);
3818 spin_lock_irqsave(&device_domain_lock, flags1);
3819 }
3820 spin_unlock_irqrestore(&device_domain_lock, flags1);
3821}
3822
3823
3824static unsigned long vm_domid;
3825
3826static struct dmar_domain *iommu_alloc_vm_domain(void)
3827{
3828 struct dmar_domain *domain;
3829
3830 domain = alloc_domain_mem();
3831 if (!domain)
3832 return NULL;
3833
3834 domain->id = vm_domid++;
3835 domain->nid = -1;
3836 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3837 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3838
3839 return domain;
3840}
3841
3842static int md_domain_init(struct dmar_domain *domain, int guest_width)
3843{
3844 int adjust_width;
3845
3846 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3847 spin_lock_init(&domain->iommu_lock);
3848
3849 domain_reserve_special_ranges(domain);
3850
3851
3852 domain->gaw = guest_width;
3853 adjust_width = guestwidth_to_adjustwidth(guest_width);
3854 domain->agaw = width_to_agaw(adjust_width);
3855
3856 INIT_LIST_HEAD(&domain->devices);
3857
3858 domain->iommu_count = 0;
3859 domain->iommu_coherency = 0;
3860 domain->iommu_snooping = 0;
3861 domain->iommu_superpage = 0;
3862 domain->max_addr = 0;
3863 domain->nid = -1;
3864
3865
3866 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3867 if (!domain->pgd)
3868 return -ENOMEM;
3869 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3870 return 0;
3871}
3872
3873static void iommu_free_vm_domain(struct dmar_domain *domain)
3874{
3875 unsigned long flags;
3876 struct dmar_drhd_unit *drhd;
3877 struct intel_iommu *iommu;
3878 unsigned long i;
3879 unsigned long ndomains;
3880
3881 for_each_drhd_unit(drhd) {
3882 if (drhd->ignored)
3883 continue;
3884 iommu = drhd->iommu;
3885
3886 ndomains = cap_ndoms(iommu->cap);
3887 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3888 if (iommu->domains[i] == domain) {
3889 spin_lock_irqsave(&iommu->lock, flags);
3890 clear_bit(i, iommu->domain_ids);
3891 iommu->domains[i] = NULL;
3892 spin_unlock_irqrestore(&iommu->lock, flags);
3893 break;
3894 }
3895 }
3896 }
3897}
3898
3899static void vm_domain_exit(struct dmar_domain *domain)
3900{
3901
3902 if (!domain)
3903 return;
3904
3905 vm_domain_remove_all_dev_info(domain);
3906
3907 put_iova_domain(&domain->iovad);
3908
3909
3910 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3911
3912
3913 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3914
3915 iommu_free_vm_domain(domain);
3916 free_domain_mem(domain);
3917}
3918
3919static int intel_iommu_domain_init(struct iommu_domain *domain)
3920{
3921 struct dmar_domain *dmar_domain;
3922
3923 dmar_domain = iommu_alloc_vm_domain();
3924 if (!dmar_domain) {
3925 printk(KERN_ERR
3926 "intel_iommu_domain_init: dmar_domain == NULL\n");
3927 return -ENOMEM;
3928 }
3929 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3930 printk(KERN_ERR
3931 "intel_iommu_domain_init() failed\n");
3932 vm_domain_exit(dmar_domain);
3933 return -ENOMEM;
3934 }
3935 domain_update_iommu_cap(dmar_domain);
3936 domain->priv = dmar_domain;
3937
3938 domain->geometry.aperture_start = 0;
3939 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3940 domain->geometry.force_aperture = true;
3941
3942 return 0;
3943}
3944
3945static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3946{
3947 struct dmar_domain *dmar_domain = domain->priv;
3948
3949 domain->priv = NULL;
3950 vm_domain_exit(dmar_domain);
3951}
3952
3953static int intel_iommu_attach_device(struct iommu_domain *domain,
3954 struct device *dev)
3955{
3956 struct dmar_domain *dmar_domain = domain->priv;
3957 struct pci_dev *pdev = to_pci_dev(dev);
3958 struct intel_iommu *iommu;
3959 int addr_width;
3960
3961
3962 if (unlikely(domain_context_mapped(pdev))) {
3963 struct dmar_domain *old_domain;
3964
3965 old_domain = find_domain(pdev);
3966 if (old_domain) {
3967 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3968 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3969 domain_remove_one_dev_info(old_domain, pdev);
3970 else
3971 domain_remove_dev_info(old_domain);
3972 }
3973 }
3974
3975 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3976 pdev->devfn);
3977 if (!iommu)
3978 return -ENODEV;
3979
3980
3981 addr_width = agaw_to_width(iommu->agaw);
3982 if (addr_width > cap_mgaw(iommu->cap))
3983 addr_width = cap_mgaw(iommu->cap);
3984
3985 if (dmar_domain->max_addr > (1LL << addr_width)) {
3986 printk(KERN_ERR "%s: iommu width (%d) is not "
3987 "sufficient for the mapped address (%llx)\n",
3988 __func__, addr_width, dmar_domain->max_addr);
3989 return -EFAULT;
3990 }
3991 dmar_domain->gaw = addr_width;
3992
3993
3994
3995
3996 while (iommu->agaw < dmar_domain->agaw) {
3997 struct dma_pte *pte;
3998
3999 pte = dmar_domain->pgd;
4000 if (dma_pte_present(pte)) {
4001 dmar_domain->pgd = (struct dma_pte *)
4002 phys_to_virt(dma_pte_addr(pte));
4003 free_pgtable_page(pte);
4004 }
4005 dmar_domain->agaw--;
4006 }
4007
4008 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4009}
4010
4011static void intel_iommu_detach_device(struct iommu_domain *domain,
4012 struct device *dev)
4013{
4014 struct dmar_domain *dmar_domain = domain->priv;
4015 struct pci_dev *pdev = to_pci_dev(dev);
4016
4017 domain_remove_one_dev_info(dmar_domain, pdev);
4018}
4019
4020static int intel_iommu_map(struct iommu_domain *domain,
4021 unsigned long iova, phys_addr_t hpa,
4022 size_t size, int iommu_prot)
4023{
4024 struct dmar_domain *dmar_domain = domain->priv;
4025 u64 max_addr;
4026 int prot = 0;
4027 int ret;
4028
4029 if (iommu_prot & IOMMU_READ)
4030 prot |= DMA_PTE_READ;
4031 if (iommu_prot & IOMMU_WRITE)
4032 prot |= DMA_PTE_WRITE;
4033 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4034 prot |= DMA_PTE_SNP;
4035
4036 max_addr = iova + size;
4037 if (dmar_domain->max_addr < max_addr) {
4038 u64 end;
4039
4040
4041 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4042 if (end < max_addr) {
4043 printk(KERN_ERR "%s: iommu width (%d) is not "
4044 "sufficient for the mapped address (%llx)\n",
4045 __func__, dmar_domain->gaw, max_addr);
4046 return -EFAULT;
4047 }
4048 dmar_domain->max_addr = max_addr;
4049 }
4050
4051
4052 size = aligned_nrpages(hpa, size);
4053 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4054 hpa >> VTD_PAGE_SHIFT, size, prot);
4055 return ret;
4056}
4057
4058static size_t intel_iommu_unmap(struct iommu_domain *domain,
4059 unsigned long iova, size_t size)
4060{
4061 struct dmar_domain *dmar_domain = domain->priv;
4062 int order;
4063
4064 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4065 (iova + size - 1) >> VTD_PAGE_SHIFT);
4066
4067 if (dmar_domain->max_addr == iova + size)
4068 dmar_domain->max_addr = iova;
4069
4070 return PAGE_SIZE << order;
4071}
4072
4073static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4074 unsigned long iova)
4075{
4076 struct dmar_domain *dmar_domain = domain->priv;
4077 struct dma_pte *pte;
4078 u64 phys = 0;
4079
4080 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4081 if (pte)
4082 phys = dma_pte_addr(pte);
4083
4084 return phys;
4085}
4086
4087static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4088 unsigned long cap)
4089{
4090 struct dmar_domain *dmar_domain = domain->priv;
4091
4092 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4093 return dmar_domain->iommu_snooping;
4094 if (cap == IOMMU_CAP_INTR_REMAP)
4095 return irq_remapping_enabled;
4096
4097 return 0;
4098}
4099
4100static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
4101{
4102 pci_dev_put(*from);
4103 *from = to;
4104}
4105
4106#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4107
4108static int intel_iommu_add_device(struct device *dev)
4109{
4110 struct pci_dev *pdev = to_pci_dev(dev);
4111 struct pci_dev *bridge, *dma_pdev = NULL;
4112 struct iommu_group *group;
4113 int ret;
4114
4115 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4116 pdev->bus->number, pdev->devfn))
4117 return -ENODEV;
4118
4119 bridge = pci_find_upstream_pcie_bridge(pdev);
4120 if (bridge) {
4121 if (pci_is_pcie(bridge))
4122 dma_pdev = pci_get_domain_bus_and_slot(
4123 pci_domain_nr(pdev->bus),
4124 bridge->subordinate->number, 0);
4125 if (!dma_pdev)
4126 dma_pdev = pci_dev_get(bridge);
4127 } else
4128 dma_pdev = pci_dev_get(pdev);
4129
4130
4131 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4132
4133
4134
4135
4136
4137 if (dma_pdev->multifunction &&
4138 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
4139 swap_pci_ref(&dma_pdev,
4140 pci_get_slot(dma_pdev->bus,
4141 PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
4142 0)));
4143
4144
4145
4146
4147
4148
4149 while (!pci_is_root_bus(dma_pdev->bus)) {
4150 struct pci_bus *bus = dma_pdev->bus;
4151
4152 while (!bus->self) {
4153 if (!pci_is_root_bus(bus))
4154 bus = bus->parent;
4155 else
4156 goto root_bus;
4157 }
4158
4159 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4160 break;
4161
4162 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4163 }
4164
4165root_bus:
4166 group = iommu_group_get(&dma_pdev->dev);
4167 pci_dev_put(dma_pdev);
4168 if (!group) {
4169 group = iommu_group_alloc();
4170 if (IS_ERR(group))
4171 return PTR_ERR(group);
4172 }
4173
4174 ret = iommu_group_add_device(group, dev);
4175
4176 iommu_group_put(group);
4177 return ret;
4178}
4179
4180static void intel_iommu_remove_device(struct device *dev)
4181{
4182 iommu_group_remove_device(dev);
4183}
4184
4185static struct iommu_ops intel_iommu_ops = {
4186 .domain_init = intel_iommu_domain_init,
4187 .domain_destroy = intel_iommu_domain_destroy,
4188 .attach_dev = intel_iommu_attach_device,
4189 .detach_dev = intel_iommu_detach_device,
4190 .map = intel_iommu_map,
4191 .unmap = intel_iommu_unmap,
4192 .iova_to_phys = intel_iommu_iova_to_phys,
4193 .domain_has_cap = intel_iommu_domain_has_cap,
4194 .add_device = intel_iommu_add_device,
4195 .remove_device = intel_iommu_remove_device,
4196 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4197};
4198
4199static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
4200{
4201
4202
4203
4204
4205 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4206 rwbf_quirk = 1;
4207
4208
4209 if (dev->revision == 0x07) {
4210 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4211 dmar_map_gfx = 0;
4212 }
4213}
4214
4215DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4216
4217#define GGC 0x52
4218#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4219#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4220#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4221#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4222#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4223#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4224#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4225#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4226
4227static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4228{
4229 unsigned short ggc;
4230
4231 if (pci_read_config_word(dev, GGC, &ggc))
4232 return;
4233
4234 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4235 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4236 dmar_map_gfx = 0;
4237 } else if (dmar_map_gfx) {
4238
4239 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4240 intel_iommu_strict = 1;
4241 }
4242}
4243DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4244DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4245DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4246DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4247
4248
4249
4250
4251
4252
4253
4254
4255static void __init check_tylersburg_isoch(void)
4256{
4257 struct pci_dev *pdev;
4258 uint32_t vtisochctrl;
4259
4260
4261 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4262 if (!pdev)
4263 return;
4264 pci_dev_put(pdev);
4265
4266
4267
4268
4269 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4270 if (!pdev)
4271 return;
4272
4273 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4274 pci_dev_put(pdev);
4275 return;
4276 }
4277
4278 pci_dev_put(pdev);
4279
4280
4281 if (vtisochctrl & 1)
4282 return;
4283
4284
4285 vtisochctrl &= 0x1c;
4286
4287
4288 if (vtisochctrl == 0x10)
4289 return;
4290
4291
4292 if (!vtisochctrl) {
4293 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4294 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4295 dmi_get_system_info(DMI_BIOS_VENDOR),
4296 dmi_get_system_info(DMI_BIOS_VERSION),
4297 dmi_get_system_info(DMI_PRODUCT_VERSION));
4298 iommu_identity_mapping |= IDENTMAP_AZALIA;
4299 return;
4300 }
4301
4302 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4303 vtisochctrl);
4304}
4305