1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <asm/irq_remapping.h>
46#include <asm/cacheflush.h>
47#include <asm/iommu.h>
48
49#include "irq_remapping.h"
50#include "pci.h"
51
52#define ROOT_SIZE VTD_PAGE_SIZE
53#define CONTEXT_SIZE VTD_PAGE_SIZE
54
55#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64
65#define MAX_AGAW_WIDTH 64
66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
67
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71
72
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
76
77#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
78#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
79#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
80
81
82#define LEVEL_STRIDE (9)
83#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102
103static inline int agaw_to_level(int agaw)
104{
105 return agaw + 2;
106}
107
108static inline int agaw_to_width(int agaw)
109{
110 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
111}
112
113static inline int width_to_agaw(int width)
114{
115 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
116}
117
118static inline unsigned int level_to_offset_bits(int level)
119{
120 return (level - 1) * LEVEL_STRIDE;
121}
122
123static inline int pfn_level_offset(unsigned long pfn, int level)
124{
125 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126}
127
128static inline unsigned long level_mask(int level)
129{
130 return -1UL << level_to_offset_bits(level);
131}
132
133static inline unsigned long level_size(int level)
134{
135 return 1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long align_to_level(unsigned long pfn, int level)
139{
140 return (pfn + level_size(level) - 1) & level_mask(level);
141}
142
143static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144{
145 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
146}
147
148
149
150static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151{
152 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153}
154
155static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156{
157 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159static inline unsigned long page_to_dma_pfn(struct page *pg)
160{
161 return mm_to_dma_pfn(page_to_pfn(pg));
162}
163static inline unsigned long virt_to_dma_pfn(void *p)
164{
165 return page_to_dma_pfn(virt_to_page(p));
166}
167
168
169static struct intel_iommu **g_iommus;
170
171static void __init check_tylersburg_isoch(void);
172static int rwbf_quirk;
173
174
175
176
177
178static int force_on = 0;
179
180
181
182
183
184
185
186struct root_entry {
187 u64 val;
188 u64 rsvd1;
189};
190#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191static inline bool root_present(struct root_entry *root)
192{
193 return (root->val & 1);
194}
195static inline void set_root_present(struct root_entry *root)
196{
197 root->val |= 1;
198}
199static inline void set_root_value(struct root_entry *root, unsigned long value)
200{
201 root->val |= value & VTD_PAGE_MASK;
202}
203
204static inline struct context_entry *
205get_context_addr_from_root(struct root_entry *root)
206{
207 return (struct context_entry *)
208 (root_present(root)?phys_to_virt(
209 root->val & VTD_PAGE_MASK) :
210 NULL);
211}
212
213
214
215
216
217
218
219
220
221
222
223
224struct context_entry {
225 u64 lo;
226 u64 hi;
227};
228
229static inline bool context_present(struct context_entry *context)
230{
231 return (context->lo & 1);
232}
233static inline void context_set_present(struct context_entry *context)
234{
235 context->lo |= 1;
236}
237
238static inline void context_set_fault_enable(struct context_entry *context)
239{
240 context->lo &= (((u64)-1) << 2) | 1;
241}
242
243static inline void context_set_translation_type(struct context_entry *context,
244 unsigned long value)
245{
246 context->lo &= (((u64)-1) << 4) | 3;
247 context->lo |= (value & 3) << 2;
248}
249
250static inline void context_set_address_root(struct context_entry *context,
251 unsigned long value)
252{
253 context->lo |= value & VTD_PAGE_MASK;
254}
255
256static inline void context_set_address_width(struct context_entry *context,
257 unsigned long value)
258{
259 context->hi |= value & 7;
260}
261
262static inline void context_set_domain_id(struct context_entry *context,
263 unsigned long value)
264{
265 context->hi |= (value & ((1 << 16) - 1)) << 8;
266}
267
268static inline void context_clear_entry(struct context_entry *context)
269{
270 context->lo = 0;
271 context->hi = 0;
272}
273
274
275
276
277
278
279
280
281
282
283struct dma_pte {
284 u64 val;
285};
286
287static inline void dma_clear_pte(struct dma_pte *pte)
288{
289 pte->val = 0;
290}
291
292static inline u64 dma_pte_addr(struct dma_pte *pte)
293{
294#ifdef CONFIG_64BIT
295 return pte->val & VTD_PAGE_MASK;
296#else
297
298 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
299#endif
300}
301
302static inline bool dma_pte_present(struct dma_pte *pte)
303{
304 return (pte->val & 3) != 0;
305}
306
307static inline bool dma_pte_superpage(struct dma_pte *pte)
308{
309 return (pte->val & (1 << 7));
310}
311
312static inline int first_pte_in_page(struct dma_pte *pte)
313{
314 return !((unsigned long)pte & ~VTD_PAGE_MASK);
315}
316
317
318
319
320
321
322
323static struct dmar_domain *si_domain;
324static int hw_pass_through = 1;
325
326
327#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
328
329
330
331
332#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
333
334
335#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
336
337
338#ifdef CONFIG_X86
339# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
340#else
341# define IOMMU_UNITS_SUPPORTED 64
342#endif
343
344struct dmar_domain {
345 int id;
346 int nid;
347 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
348
349
350 struct list_head devices;
351 struct iova_domain iovad;
352
353 struct dma_pte *pgd;
354 int gaw;
355
356
357 int agaw;
358
359 int flags;
360
361 int iommu_coherency;
362 int iommu_snooping;
363 int iommu_count;
364 int iommu_superpage;
365
366
367 spinlock_t iommu_lock;
368 u64 max_addr;
369};
370
371
372struct device_domain_info {
373 struct list_head link;
374 struct list_head global;
375 int segment;
376 u8 bus;
377 u8 devfn;
378 struct pci_dev *dev;
379 struct intel_iommu *iommu;
380 struct dmar_domain *domain;
381};
382
383static void flush_unmaps_timeout(unsigned long data);
384
385static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
386
387#define HIGH_WATER_MARK 250
388struct deferred_flush_tables {
389 int next;
390 struct iova *iova[HIGH_WATER_MARK];
391 struct dmar_domain *domain[HIGH_WATER_MARK];
392};
393
394static struct deferred_flush_tables *deferred_flush;
395
396
397static int g_num_of_iommus;
398
399static DEFINE_SPINLOCK(async_umap_flush_lock);
400static LIST_HEAD(unmaps_to_do);
401
402static int timer_on;
403static long list_size;
404
405static void domain_remove_dev_info(struct dmar_domain *domain);
406
407#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
408int dmar_disabled = 0;
409#else
410int dmar_disabled = 1;
411#endif
412
413int intel_iommu_enabled = 0;
414EXPORT_SYMBOL_GPL(intel_iommu_enabled);
415
416static int dmar_map_gfx = 1;
417static int dmar_forcedac;
418static int intel_iommu_strict;
419static int intel_iommu_superpage = 1;
420
421int intel_iommu_gfx_mapped;
422EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
423
424#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
425static DEFINE_SPINLOCK(device_domain_lock);
426static LIST_HEAD(device_domain_list);
427
428static struct iommu_ops intel_iommu_ops;
429
430static int __init intel_iommu_setup(char *str)
431{
432 if (!str)
433 return -EINVAL;
434 while (*str) {
435 if (!strncmp(str, "on", 2)) {
436 dmar_disabled = 0;
437 printk(KERN_INFO "Intel-IOMMU: enabled\n");
438 } else if (!strncmp(str, "off", 3)) {
439 dmar_disabled = 1;
440 printk(KERN_INFO "Intel-IOMMU: disabled\n");
441 } else if (!strncmp(str, "igfx_off", 8)) {
442 dmar_map_gfx = 0;
443 printk(KERN_INFO
444 "Intel-IOMMU: disable GFX device mapping\n");
445 } else if (!strncmp(str, "forcedac", 8)) {
446 printk(KERN_INFO
447 "Intel-IOMMU: Forcing DAC for PCI devices\n");
448 dmar_forcedac = 1;
449 } else if (!strncmp(str, "strict", 6)) {
450 printk(KERN_INFO
451 "Intel-IOMMU: disable batched IOTLB flush\n");
452 intel_iommu_strict = 1;
453 } else if (!strncmp(str, "sp_off", 6)) {
454 printk(KERN_INFO
455 "Intel-IOMMU: disable supported super page\n");
456 intel_iommu_superpage = 0;
457 }
458
459 str += strcspn(str, ",");
460 while (*str == ',')
461 str++;
462 }
463 return 0;
464}
465__setup("intel_iommu=", intel_iommu_setup);
466
467static struct kmem_cache *iommu_domain_cache;
468static struct kmem_cache *iommu_devinfo_cache;
469static struct kmem_cache *iommu_iova_cache;
470
471static inline void *alloc_pgtable_page(int node)
472{
473 struct page *page;
474 void *vaddr = NULL;
475
476 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
477 if (page)
478 vaddr = page_address(page);
479 return vaddr;
480}
481
482static inline void free_pgtable_page(void *vaddr)
483{
484 free_page((unsigned long)vaddr);
485}
486
487static inline void *alloc_domain_mem(void)
488{
489 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
490}
491
492static void free_domain_mem(void *vaddr)
493{
494 kmem_cache_free(iommu_domain_cache, vaddr);
495}
496
497static inline void * alloc_devinfo_mem(void)
498{
499 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
500}
501
502static inline void free_devinfo_mem(void *vaddr)
503{
504 kmem_cache_free(iommu_devinfo_cache, vaddr);
505}
506
507struct iova *alloc_iova_mem(void)
508{
509 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
510}
511
512void free_iova_mem(struct iova *iova)
513{
514 kmem_cache_free(iommu_iova_cache, iova);
515}
516
517
518static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
519{
520 unsigned long sagaw;
521 int agaw = -1;
522
523 sagaw = cap_sagaw(iommu->cap);
524 for (agaw = width_to_agaw(max_gaw);
525 agaw >= 0; agaw--) {
526 if (test_bit(agaw, &sagaw))
527 break;
528 }
529
530 return agaw;
531}
532
533
534
535
536int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
537{
538 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
539}
540
541
542
543
544
545
546int iommu_calculate_agaw(struct intel_iommu *iommu)
547{
548 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
549}
550
551
552static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
553{
554 int iommu_id;
555
556
557 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
558 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
559
560 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
561 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
562 return NULL;
563
564 return g_iommus[iommu_id];
565}
566
567static void domain_update_iommu_coherency(struct dmar_domain *domain)
568{
569 int i;
570
571 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
572
573 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
574
575 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
576 if (!ecap_coherent(g_iommus[i]->ecap)) {
577 domain->iommu_coherency = 0;
578 break;
579 }
580 }
581}
582
583static void domain_update_iommu_snooping(struct dmar_domain *domain)
584{
585 int i;
586
587 domain->iommu_snooping = 1;
588
589 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
590 if (!ecap_sc_support(g_iommus[i]->ecap)) {
591 domain->iommu_snooping = 0;
592 break;
593 }
594 }
595}
596
597static void domain_update_iommu_superpage(struct dmar_domain *domain)
598{
599 struct dmar_drhd_unit *drhd;
600 struct intel_iommu *iommu = NULL;
601 int mask = 0xf;
602
603 if (!intel_iommu_superpage) {
604 domain->iommu_superpage = 0;
605 return;
606 }
607
608
609 for_each_active_iommu(iommu, drhd) {
610 mask &= cap_super_page_val(iommu->cap);
611 if (!mask) {
612 break;
613 }
614 }
615 domain->iommu_superpage = fls(mask);
616}
617
618
619static void domain_update_iommu_cap(struct dmar_domain *domain)
620{
621 domain_update_iommu_coherency(domain);
622 domain_update_iommu_snooping(domain);
623 domain_update_iommu_superpage(domain);
624}
625
626static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
627{
628 struct dmar_drhd_unit *drhd = NULL;
629 int i;
630
631 for_each_active_drhd_unit(drhd) {
632 if (segment != drhd->segment)
633 continue;
634
635 for (i = 0; i < drhd->devices_cnt; i++) {
636 if (drhd->devices[i] &&
637 drhd->devices[i]->bus->number == bus &&
638 drhd->devices[i]->devfn == devfn)
639 return drhd->iommu;
640 if (drhd->devices[i] &&
641 drhd->devices[i]->subordinate &&
642 drhd->devices[i]->subordinate->number <= bus &&
643 drhd->devices[i]->subordinate->busn_res.end >= bus)
644 return drhd->iommu;
645 }
646
647 if (drhd->include_all)
648 return drhd->iommu;
649 }
650
651 return NULL;
652}
653
654static void domain_flush_cache(struct dmar_domain *domain,
655 void *addr, int size)
656{
657 if (!domain->iommu_coherency)
658 clflush_cache_range(addr, size);
659}
660
661
662static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
663 u8 bus, u8 devfn)
664{
665 struct root_entry *root;
666 struct context_entry *context;
667 unsigned long phy_addr;
668 unsigned long flags;
669
670 spin_lock_irqsave(&iommu->lock, flags);
671 root = &iommu->root_entry[bus];
672 context = get_context_addr_from_root(root);
673 if (!context) {
674 context = (struct context_entry *)
675 alloc_pgtable_page(iommu->node);
676 if (!context) {
677 spin_unlock_irqrestore(&iommu->lock, flags);
678 return NULL;
679 }
680 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
681 phy_addr = virt_to_phys((void *)context);
682 set_root_value(root, phy_addr);
683 set_root_present(root);
684 __iommu_flush_cache(iommu, root, sizeof(*root));
685 }
686 spin_unlock_irqrestore(&iommu->lock, flags);
687 return &context[devfn];
688}
689
690static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
691{
692 struct root_entry *root;
693 struct context_entry *context;
694 int ret;
695 unsigned long flags;
696
697 spin_lock_irqsave(&iommu->lock, flags);
698 root = &iommu->root_entry[bus];
699 context = get_context_addr_from_root(root);
700 if (!context) {
701 ret = 0;
702 goto out;
703 }
704 ret = context_present(&context[devfn]);
705out:
706 spin_unlock_irqrestore(&iommu->lock, flags);
707 return ret;
708}
709
710static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
711{
712 struct root_entry *root;
713 struct context_entry *context;
714 unsigned long flags;
715
716 spin_lock_irqsave(&iommu->lock, flags);
717 root = &iommu->root_entry[bus];
718 context = get_context_addr_from_root(root);
719 if (context) {
720 context_clear_entry(&context[devfn]);
721 __iommu_flush_cache(iommu, &context[devfn], \
722 sizeof(*context));
723 }
724 spin_unlock_irqrestore(&iommu->lock, flags);
725}
726
727static void free_context_table(struct intel_iommu *iommu)
728{
729 struct root_entry *root;
730 int i;
731 unsigned long flags;
732 struct context_entry *context;
733
734 spin_lock_irqsave(&iommu->lock, flags);
735 if (!iommu->root_entry) {
736 goto out;
737 }
738 for (i = 0; i < ROOT_ENTRY_NR; i++) {
739 root = &iommu->root_entry[i];
740 context = get_context_addr_from_root(root);
741 if (context)
742 free_pgtable_page(context);
743 }
744 free_pgtable_page(iommu->root_entry);
745 iommu->root_entry = NULL;
746out:
747 spin_unlock_irqrestore(&iommu->lock, flags);
748}
749
750static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
751 unsigned long pfn, int target_level)
752{
753 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
754 struct dma_pte *parent, *pte = NULL;
755 int level = agaw_to_level(domain->agaw);
756 int offset;
757
758 BUG_ON(!domain->pgd);
759
760 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
761
762 return NULL;
763
764 parent = domain->pgd;
765
766 while (level > 0) {
767 void *tmp_page;
768
769 offset = pfn_level_offset(pfn, level);
770 pte = &parent[offset];
771 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
772 break;
773 if (level == target_level)
774 break;
775
776 if (!dma_pte_present(pte)) {
777 uint64_t pteval;
778
779 tmp_page = alloc_pgtable_page(domain->nid);
780
781 if (!tmp_page)
782 return NULL;
783
784 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
785 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
786 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
787
788 free_pgtable_page(tmp_page);
789 } else {
790 dma_pte_addr(pte);
791 domain_flush_cache(domain, pte, sizeof(*pte));
792 }
793 }
794 parent = phys_to_virt(dma_pte_addr(pte));
795 level--;
796 }
797
798 return pte;
799}
800
801
802
803static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
804 unsigned long pfn,
805 int level, int *large_page)
806{
807 struct dma_pte *parent, *pte = NULL;
808 int total = agaw_to_level(domain->agaw);
809 int offset;
810
811 parent = domain->pgd;
812 while (level <= total) {
813 offset = pfn_level_offset(pfn, total);
814 pte = &parent[offset];
815 if (level == total)
816 return pte;
817
818 if (!dma_pte_present(pte)) {
819 *large_page = total;
820 break;
821 }
822
823 if (pte->val & DMA_PTE_LARGE_PAGE) {
824 *large_page = total;
825 return pte;
826 }
827
828 parent = phys_to_virt(dma_pte_addr(pte));
829 total--;
830 }
831 return NULL;
832}
833
834
835static int dma_pte_clear_range(struct dmar_domain *domain,
836 unsigned long start_pfn,
837 unsigned long last_pfn)
838{
839 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
840 unsigned int large_page = 1;
841 struct dma_pte *first_pte, *pte;
842
843 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
844 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
845 BUG_ON(start_pfn > last_pfn);
846
847
848 do {
849 large_page = 1;
850 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
851 if (!pte) {
852 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
853 continue;
854 }
855 do {
856 dma_clear_pte(pte);
857 start_pfn += lvl_to_nr_pages(large_page);
858 pte++;
859 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
860
861 domain_flush_cache(domain, first_pte,
862 (void *)pte - (void *)first_pte);
863
864 } while (start_pfn && start_pfn <= last_pfn);
865
866 return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
867}
868
869static void dma_pte_free_level(struct dmar_domain *domain, int level,
870 struct dma_pte *pte, unsigned long pfn,
871 unsigned long start_pfn, unsigned long last_pfn)
872{
873 pfn = max(start_pfn, pfn);
874 pte = &pte[pfn_level_offset(pfn, level)];
875
876 do {
877 unsigned long level_pfn;
878 struct dma_pte *level_pte;
879
880 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
881 goto next;
882
883 level_pfn = pfn & level_mask(level - 1);
884 level_pte = phys_to_virt(dma_pte_addr(pte));
885
886 if (level > 2)
887 dma_pte_free_level(domain, level - 1, level_pte,
888 level_pfn, start_pfn, last_pfn);
889
890
891 if (!(start_pfn > level_pfn ||
892 last_pfn < level_pfn + level_size(level) - 1)) {
893 dma_clear_pte(pte);
894 domain_flush_cache(domain, pte, sizeof(*pte));
895 free_pgtable_page(level_pte);
896 }
897next:
898 pfn += level_size(level);
899 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
900}
901
902
903static void dma_pte_free_pagetable(struct dmar_domain *domain,
904 unsigned long start_pfn,
905 unsigned long last_pfn)
906{
907 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
908
909 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
910 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
911 BUG_ON(start_pfn > last_pfn);
912
913
914 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
915 domain->pgd, 0, start_pfn, last_pfn);
916
917
918 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
919 free_pgtable_page(domain->pgd);
920 domain->pgd = NULL;
921 }
922}
923
924
925static int iommu_alloc_root_entry(struct intel_iommu *iommu)
926{
927 struct root_entry *root;
928 unsigned long flags;
929
930 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
931 if (!root)
932 return -ENOMEM;
933
934 __iommu_flush_cache(iommu, root, ROOT_SIZE);
935
936 spin_lock_irqsave(&iommu->lock, flags);
937 iommu->root_entry = root;
938 spin_unlock_irqrestore(&iommu->lock, flags);
939
940 return 0;
941}
942
943static void iommu_set_root_entry(struct intel_iommu *iommu)
944{
945 void *addr;
946 u32 sts;
947 unsigned long flag;
948
949 addr = iommu->root_entry;
950
951 raw_spin_lock_irqsave(&iommu->register_lock, flag);
952 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
953
954 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
955
956
957 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
958 readl, (sts & DMA_GSTS_RTPS), sts);
959
960 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
961}
962
963static void iommu_flush_write_buffer(struct intel_iommu *iommu)
964{
965 u32 val;
966 unsigned long flag;
967
968 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
969 return;
970
971 raw_spin_lock_irqsave(&iommu->register_lock, flag);
972 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
973
974
975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
976 readl, (!(val & DMA_GSTS_WBFS)), val);
977
978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
979}
980
981
982static void __iommu_flush_context(struct intel_iommu *iommu,
983 u16 did, u16 source_id, u8 function_mask,
984 u64 type)
985{
986 u64 val = 0;
987 unsigned long flag;
988
989 switch (type) {
990 case DMA_CCMD_GLOBAL_INVL:
991 val = DMA_CCMD_GLOBAL_INVL;
992 break;
993 case DMA_CCMD_DOMAIN_INVL:
994 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
995 break;
996 case DMA_CCMD_DEVICE_INVL:
997 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
998 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
999 break;
1000 default:
1001 BUG();
1002 }
1003 val |= DMA_CCMD_ICC;
1004
1005 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1006 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1007
1008
1009 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1010 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1011
1012 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1013}
1014
1015
1016static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1017 u64 addr, unsigned int size_order, u64 type)
1018{
1019 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1020 u64 val = 0, val_iva = 0;
1021 unsigned long flag;
1022
1023 switch (type) {
1024 case DMA_TLB_GLOBAL_FLUSH:
1025
1026 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1027 break;
1028 case DMA_TLB_DSI_FLUSH:
1029 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1030 break;
1031 case DMA_TLB_PSI_FLUSH:
1032 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1033
1034 val_iva = size_order | addr;
1035 break;
1036 default:
1037 BUG();
1038 }
1039
1040#if 0
1041
1042
1043
1044
1045 if (cap_read_drain(iommu->cap))
1046 val |= DMA_TLB_READ_DRAIN;
1047#endif
1048 if (cap_write_drain(iommu->cap))
1049 val |= DMA_TLB_WRITE_DRAIN;
1050
1051 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1052
1053 if (val_iva)
1054 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1055 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1056
1057
1058 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1059 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1060
1061 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1062
1063
1064 if (DMA_TLB_IAIG(val) == 0)
1065 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1066 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1067 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1068 (unsigned long long)DMA_TLB_IIRG(type),
1069 (unsigned long long)DMA_TLB_IAIG(val));
1070}
1071
1072static struct device_domain_info *iommu_support_dev_iotlb(
1073 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1074{
1075 int found = 0;
1076 unsigned long flags;
1077 struct device_domain_info *info;
1078 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1079
1080 if (!ecap_dev_iotlb_support(iommu->ecap))
1081 return NULL;
1082
1083 if (!iommu->qi)
1084 return NULL;
1085
1086 spin_lock_irqsave(&device_domain_lock, flags);
1087 list_for_each_entry(info, &domain->devices, link)
1088 if (info->bus == bus && info->devfn == devfn) {
1089 found = 1;
1090 break;
1091 }
1092 spin_unlock_irqrestore(&device_domain_lock, flags);
1093
1094 if (!found || !info->dev)
1095 return NULL;
1096
1097 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1098 return NULL;
1099
1100 if (!dmar_find_matched_atsr_unit(info->dev))
1101 return NULL;
1102
1103 info->iommu = iommu;
1104
1105 return info;
1106}
1107
1108static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1109{
1110 if (!info)
1111 return;
1112
1113 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1114}
1115
1116static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1117{
1118 if (!info->dev || !pci_ats_enabled(info->dev))
1119 return;
1120
1121 pci_disable_ats(info->dev);
1122}
1123
1124static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1125 u64 addr, unsigned mask)
1126{
1127 u16 sid, qdep;
1128 unsigned long flags;
1129 struct device_domain_info *info;
1130
1131 spin_lock_irqsave(&device_domain_lock, flags);
1132 list_for_each_entry(info, &domain->devices, link) {
1133 if (!info->dev || !pci_ats_enabled(info->dev))
1134 continue;
1135
1136 sid = info->bus << 8 | info->devfn;
1137 qdep = pci_ats_queue_depth(info->dev);
1138 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1139 }
1140 spin_unlock_irqrestore(&device_domain_lock, flags);
1141}
1142
1143static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1144 unsigned long pfn, unsigned int pages, int map)
1145{
1146 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1147 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1148
1149 BUG_ON(pages == 0);
1150
1151
1152
1153
1154
1155
1156
1157 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1158 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1159 DMA_TLB_DSI_FLUSH);
1160 else
1161 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1162 DMA_TLB_PSI_FLUSH);
1163
1164
1165
1166
1167
1168 if (!cap_caching_mode(iommu->cap) || !map)
1169 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1170}
1171
1172static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1173{
1174 u32 pmen;
1175 unsigned long flags;
1176
1177 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1178 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1179 pmen &= ~DMA_PMEN_EPM;
1180 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1181
1182
1183 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1184 readl, !(pmen & DMA_PMEN_PRS), pmen);
1185
1186 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1187}
1188
1189static int iommu_enable_translation(struct intel_iommu *iommu)
1190{
1191 u32 sts;
1192 unsigned long flags;
1193
1194 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1195 iommu->gcmd |= DMA_GCMD_TE;
1196 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1197
1198
1199 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1200 readl, (sts & DMA_GSTS_TES), sts);
1201
1202 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1203 return 0;
1204}
1205
1206static int iommu_disable_translation(struct intel_iommu *iommu)
1207{
1208 u32 sts;
1209 unsigned long flag;
1210
1211 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1212 iommu->gcmd &= ~DMA_GCMD_TE;
1213 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1214
1215
1216 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1217 readl, (!(sts & DMA_GSTS_TES)), sts);
1218
1219 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1220 return 0;
1221}
1222
1223
1224static int iommu_init_domains(struct intel_iommu *iommu)
1225{
1226 unsigned long ndomains;
1227 unsigned long nlongs;
1228
1229 ndomains = cap_ndoms(iommu->cap);
1230 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1231 iommu->seq_id, ndomains);
1232 nlongs = BITS_TO_LONGS(ndomains);
1233
1234 spin_lock_init(&iommu->lock);
1235
1236
1237
1238
1239 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1240 if (!iommu->domain_ids) {
1241 pr_err("IOMMU%d: allocating domain id array failed\n",
1242 iommu->seq_id);
1243 return -ENOMEM;
1244 }
1245 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1246 GFP_KERNEL);
1247 if (!iommu->domains) {
1248 pr_err("IOMMU%d: allocating domain array failed\n",
1249 iommu->seq_id);
1250 kfree(iommu->domain_ids);
1251 iommu->domain_ids = NULL;
1252 return -ENOMEM;
1253 }
1254
1255
1256
1257
1258
1259 if (cap_caching_mode(iommu->cap))
1260 set_bit(0, iommu->domain_ids);
1261 return 0;
1262}
1263
1264
1265static void domain_exit(struct dmar_domain *domain);
1266static void vm_domain_exit(struct dmar_domain *domain);
1267
1268static void free_dmar_iommu(struct intel_iommu *iommu)
1269{
1270 struct dmar_domain *domain;
1271 int i, count;
1272 unsigned long flags;
1273
1274 if ((iommu->domains) && (iommu->domain_ids)) {
1275 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1276 domain = iommu->domains[i];
1277 clear_bit(i, iommu->domain_ids);
1278
1279 spin_lock_irqsave(&domain->iommu_lock, flags);
1280 count = --domain->iommu_count;
1281 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1282 if (count == 0) {
1283 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1284 vm_domain_exit(domain);
1285 else
1286 domain_exit(domain);
1287 }
1288 }
1289 }
1290
1291 if (iommu->gcmd & DMA_GCMD_TE)
1292 iommu_disable_translation(iommu);
1293
1294 kfree(iommu->domains);
1295 kfree(iommu->domain_ids);
1296 iommu->domains = NULL;
1297 iommu->domain_ids = NULL;
1298
1299 g_iommus[iommu->seq_id] = NULL;
1300
1301
1302 for (i = 0; i < g_num_of_iommus; i++) {
1303 if (g_iommus[i])
1304 break;
1305 }
1306
1307 if (i == g_num_of_iommus)
1308 kfree(g_iommus);
1309
1310
1311 free_context_table(iommu);
1312}
1313
1314static struct dmar_domain *alloc_domain(void)
1315{
1316 struct dmar_domain *domain;
1317
1318 domain = alloc_domain_mem();
1319 if (!domain)
1320 return NULL;
1321
1322 domain->nid = -1;
1323 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1324 domain->flags = 0;
1325
1326 return domain;
1327}
1328
1329static int iommu_attach_domain(struct dmar_domain *domain,
1330 struct intel_iommu *iommu)
1331{
1332 int num;
1333 unsigned long ndomains;
1334 unsigned long flags;
1335
1336 ndomains = cap_ndoms(iommu->cap);
1337
1338 spin_lock_irqsave(&iommu->lock, flags);
1339
1340 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1341 if (num >= ndomains) {
1342 spin_unlock_irqrestore(&iommu->lock, flags);
1343 printk(KERN_ERR "IOMMU: no free domain ids\n");
1344 return -ENOMEM;
1345 }
1346
1347 domain->id = num;
1348 set_bit(num, iommu->domain_ids);
1349 set_bit(iommu->seq_id, domain->iommu_bmp);
1350 iommu->domains[num] = domain;
1351 spin_unlock_irqrestore(&iommu->lock, flags);
1352
1353 return 0;
1354}
1355
1356static void iommu_detach_domain(struct dmar_domain *domain,
1357 struct intel_iommu *iommu)
1358{
1359 unsigned long flags;
1360 int num, ndomains;
1361 int found = 0;
1362
1363 spin_lock_irqsave(&iommu->lock, flags);
1364 ndomains = cap_ndoms(iommu->cap);
1365 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1366 if (iommu->domains[num] == domain) {
1367 found = 1;
1368 break;
1369 }
1370 }
1371
1372 if (found) {
1373 clear_bit(num, iommu->domain_ids);
1374 clear_bit(iommu->seq_id, domain->iommu_bmp);
1375 iommu->domains[num] = NULL;
1376 }
1377 spin_unlock_irqrestore(&iommu->lock, flags);
1378}
1379
1380static struct iova_domain reserved_iova_list;
1381static struct lock_class_key reserved_rbtree_key;
1382
1383static int dmar_init_reserved_ranges(void)
1384{
1385 struct pci_dev *pdev = NULL;
1386 struct iova *iova;
1387 int i;
1388
1389 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1390
1391 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1392 &reserved_rbtree_key);
1393
1394
1395 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1396 IOVA_PFN(IOAPIC_RANGE_END));
1397 if (!iova) {
1398 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1399 return -ENODEV;
1400 }
1401
1402
1403 for_each_pci_dev(pdev) {
1404 struct resource *r;
1405
1406 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1407 r = &pdev->resource[i];
1408 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1409 continue;
1410 iova = reserve_iova(&reserved_iova_list,
1411 IOVA_PFN(r->start),
1412 IOVA_PFN(r->end));
1413 if (!iova) {
1414 printk(KERN_ERR "Reserve iova failed\n");
1415 return -ENODEV;
1416 }
1417 }
1418 }
1419 return 0;
1420}
1421
1422static void domain_reserve_special_ranges(struct dmar_domain *domain)
1423{
1424 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1425}
1426
1427static inline int guestwidth_to_adjustwidth(int gaw)
1428{
1429 int agaw;
1430 int r = (gaw - 12) % 9;
1431
1432 if (r == 0)
1433 agaw = gaw;
1434 else
1435 agaw = gaw + 9 - r;
1436 if (agaw > 64)
1437 agaw = 64;
1438 return agaw;
1439}
1440
1441static int domain_init(struct dmar_domain *domain, int guest_width)
1442{
1443 struct intel_iommu *iommu;
1444 int adjust_width, agaw;
1445 unsigned long sagaw;
1446
1447 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1448 spin_lock_init(&domain->iommu_lock);
1449
1450 domain_reserve_special_ranges(domain);
1451
1452
1453 iommu = domain_get_iommu(domain);
1454 if (guest_width > cap_mgaw(iommu->cap))
1455 guest_width = cap_mgaw(iommu->cap);
1456 domain->gaw = guest_width;
1457 adjust_width = guestwidth_to_adjustwidth(guest_width);
1458 agaw = width_to_agaw(adjust_width);
1459 sagaw = cap_sagaw(iommu->cap);
1460 if (!test_bit(agaw, &sagaw)) {
1461
1462 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1463 agaw = find_next_bit(&sagaw, 5, agaw);
1464 if (agaw >= 5)
1465 return -ENODEV;
1466 }
1467 domain->agaw = agaw;
1468 INIT_LIST_HEAD(&domain->devices);
1469
1470 if (ecap_coherent(iommu->ecap))
1471 domain->iommu_coherency = 1;
1472 else
1473 domain->iommu_coherency = 0;
1474
1475 if (ecap_sc_support(iommu->ecap))
1476 domain->iommu_snooping = 1;
1477 else
1478 domain->iommu_snooping = 0;
1479
1480 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1481 domain->iommu_count = 1;
1482 domain->nid = iommu->node;
1483
1484
1485 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1486 if (!domain->pgd)
1487 return -ENOMEM;
1488 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1489 return 0;
1490}
1491
1492static void domain_exit(struct dmar_domain *domain)
1493{
1494 struct dmar_drhd_unit *drhd;
1495 struct intel_iommu *iommu;
1496
1497
1498 if (!domain)
1499 return;
1500
1501
1502 if (!intel_iommu_strict)
1503 flush_unmaps_timeout(0);
1504
1505 domain_remove_dev_info(domain);
1506
1507 put_iova_domain(&domain->iovad);
1508
1509
1510 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1511
1512
1513 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1514
1515 for_each_active_iommu(iommu, drhd)
1516 if (test_bit(iommu->seq_id, domain->iommu_bmp))
1517 iommu_detach_domain(domain, iommu);
1518
1519 free_domain_mem(domain);
1520}
1521
1522static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1523 u8 bus, u8 devfn, int translation)
1524{
1525 struct context_entry *context;
1526 unsigned long flags;
1527 struct intel_iommu *iommu;
1528 struct dma_pte *pgd;
1529 unsigned long num;
1530 unsigned long ndomains;
1531 int id;
1532 int agaw;
1533 struct device_domain_info *info = NULL;
1534
1535 pr_debug("Set context mapping for %02x:%02x.%d\n",
1536 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1537
1538 BUG_ON(!domain->pgd);
1539 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1540 translation != CONTEXT_TT_MULTI_LEVEL);
1541
1542 iommu = device_to_iommu(segment, bus, devfn);
1543 if (!iommu)
1544 return -ENODEV;
1545
1546 context = device_to_context_entry(iommu, bus, devfn);
1547 if (!context)
1548 return -ENOMEM;
1549 spin_lock_irqsave(&iommu->lock, flags);
1550 if (context_present(context)) {
1551 spin_unlock_irqrestore(&iommu->lock, flags);
1552 return 0;
1553 }
1554
1555 id = domain->id;
1556 pgd = domain->pgd;
1557
1558 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1559 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1560 int found = 0;
1561
1562
1563 ndomains = cap_ndoms(iommu->cap);
1564 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1565 if (iommu->domains[num] == domain) {
1566 id = num;
1567 found = 1;
1568 break;
1569 }
1570 }
1571
1572 if (found == 0) {
1573 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1574 if (num >= ndomains) {
1575 spin_unlock_irqrestore(&iommu->lock, flags);
1576 printk(KERN_ERR "IOMMU: no free domain ids\n");
1577 return -EFAULT;
1578 }
1579
1580 set_bit(num, iommu->domain_ids);
1581 iommu->domains[num] = domain;
1582 id = num;
1583 }
1584
1585
1586
1587
1588
1589 if (translation != CONTEXT_TT_PASS_THROUGH) {
1590 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1591 pgd = phys_to_virt(dma_pte_addr(pgd));
1592 if (!dma_pte_present(pgd)) {
1593 spin_unlock_irqrestore(&iommu->lock, flags);
1594 return -ENOMEM;
1595 }
1596 }
1597 }
1598 }
1599
1600 context_set_domain_id(context, id);
1601
1602 if (translation != CONTEXT_TT_PASS_THROUGH) {
1603 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1604 translation = info ? CONTEXT_TT_DEV_IOTLB :
1605 CONTEXT_TT_MULTI_LEVEL;
1606 }
1607
1608
1609
1610
1611 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1612 context_set_address_width(context, iommu->msagaw);
1613 else {
1614 context_set_address_root(context, virt_to_phys(pgd));
1615 context_set_address_width(context, iommu->agaw);
1616 }
1617
1618 context_set_translation_type(context, translation);
1619 context_set_fault_enable(context);
1620 context_set_present(context);
1621 domain_flush_cache(domain, context, sizeof(*context));
1622
1623
1624
1625
1626
1627
1628
1629 if (cap_caching_mode(iommu->cap)) {
1630 iommu->flush.flush_context(iommu, 0,
1631 (((u16)bus) << 8) | devfn,
1632 DMA_CCMD_MASK_NOBIT,
1633 DMA_CCMD_DEVICE_INVL);
1634 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1635 } else {
1636 iommu_flush_write_buffer(iommu);
1637 }
1638 iommu_enable_dev_iotlb(info);
1639 spin_unlock_irqrestore(&iommu->lock, flags);
1640
1641 spin_lock_irqsave(&domain->iommu_lock, flags);
1642 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1643 domain->iommu_count++;
1644 if (domain->iommu_count == 1)
1645 domain->nid = iommu->node;
1646 domain_update_iommu_cap(domain);
1647 }
1648 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1649 return 0;
1650}
1651
1652static int
1653domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1654 int translation)
1655{
1656 int ret;
1657 struct pci_dev *tmp, *parent;
1658
1659 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1660 pdev->bus->number, pdev->devfn,
1661 translation);
1662 if (ret)
1663 return ret;
1664
1665
1666 tmp = pci_find_upstream_pcie_bridge(pdev);
1667 if (!tmp)
1668 return 0;
1669
1670 parent = pdev->bus->self;
1671 while (parent != tmp) {
1672 ret = domain_context_mapping_one(domain,
1673 pci_domain_nr(parent->bus),
1674 parent->bus->number,
1675 parent->devfn, translation);
1676 if (ret)
1677 return ret;
1678 parent = parent->bus->self;
1679 }
1680 if (pci_is_pcie(tmp))
1681 return domain_context_mapping_one(domain,
1682 pci_domain_nr(tmp->subordinate),
1683 tmp->subordinate->number, 0,
1684 translation);
1685 else
1686 return domain_context_mapping_one(domain,
1687 pci_domain_nr(tmp->bus),
1688 tmp->bus->number,
1689 tmp->devfn,
1690 translation);
1691}
1692
1693static int domain_context_mapped(struct pci_dev *pdev)
1694{
1695 int ret;
1696 struct pci_dev *tmp, *parent;
1697 struct intel_iommu *iommu;
1698
1699 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1700 pdev->devfn);
1701 if (!iommu)
1702 return -ENODEV;
1703
1704 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1705 if (!ret)
1706 return ret;
1707
1708 tmp = pci_find_upstream_pcie_bridge(pdev);
1709 if (!tmp)
1710 return ret;
1711
1712 parent = pdev->bus->self;
1713 while (parent != tmp) {
1714 ret = device_context_mapped(iommu, parent->bus->number,
1715 parent->devfn);
1716 if (!ret)
1717 return ret;
1718 parent = parent->bus->self;
1719 }
1720 if (pci_is_pcie(tmp))
1721 return device_context_mapped(iommu, tmp->subordinate->number,
1722 0);
1723 else
1724 return device_context_mapped(iommu, tmp->bus->number,
1725 tmp->devfn);
1726}
1727
1728
1729static inline unsigned long aligned_nrpages(unsigned long host_addr,
1730 size_t size)
1731{
1732 host_addr &= ~PAGE_MASK;
1733 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1734}
1735
1736
1737static inline int hardware_largepage_caps(struct dmar_domain *domain,
1738 unsigned long iov_pfn,
1739 unsigned long phy_pfn,
1740 unsigned long pages)
1741{
1742 int support, level = 1;
1743 unsigned long pfnmerge;
1744
1745 support = domain->iommu_superpage;
1746
1747
1748
1749
1750
1751 pfnmerge = iov_pfn | phy_pfn;
1752
1753 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1754 pages >>= VTD_STRIDE_SHIFT;
1755 if (!pages)
1756 break;
1757 pfnmerge >>= VTD_STRIDE_SHIFT;
1758 level++;
1759 support--;
1760 }
1761 return level;
1762}
1763
1764static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1765 struct scatterlist *sg, unsigned long phys_pfn,
1766 unsigned long nr_pages, int prot)
1767{
1768 struct dma_pte *first_pte = NULL, *pte = NULL;
1769 phys_addr_t uninitialized_var(pteval);
1770 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1771 unsigned long sg_res;
1772 unsigned int largepage_lvl = 0;
1773 unsigned long lvl_pages = 0;
1774
1775 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1776
1777 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1778 return -EINVAL;
1779
1780 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1781
1782 if (sg)
1783 sg_res = 0;
1784 else {
1785 sg_res = nr_pages + 1;
1786 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1787 }
1788
1789 while (nr_pages > 0) {
1790 uint64_t tmp;
1791
1792 if (!sg_res) {
1793 sg_res = aligned_nrpages(sg->offset, sg->length);
1794 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1795 sg->dma_length = sg->length;
1796 pteval = page_to_phys(sg_page(sg)) | prot;
1797 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1798 }
1799
1800 if (!pte) {
1801 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1802
1803 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1804 if (!pte)
1805 return -ENOMEM;
1806
1807 if (largepage_lvl > 1) {
1808 pteval |= DMA_PTE_LARGE_PAGE;
1809
1810
1811 dma_pte_clear_range(domain, iov_pfn,
1812 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1813 dma_pte_free_pagetable(domain, iov_pfn,
1814 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1815 } else {
1816 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1817 }
1818
1819 }
1820
1821
1822
1823 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1824 if (tmp) {
1825 static int dumps = 5;
1826 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1827 iov_pfn, tmp, (unsigned long long)pteval);
1828 if (dumps) {
1829 dumps--;
1830 debug_dma_dump_mappings(NULL);
1831 }
1832 WARN_ON(1);
1833 }
1834
1835 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1836
1837 BUG_ON(nr_pages < lvl_pages);
1838 BUG_ON(sg_res < lvl_pages);
1839
1840 nr_pages -= lvl_pages;
1841 iov_pfn += lvl_pages;
1842 phys_pfn += lvl_pages;
1843 pteval += lvl_pages * VTD_PAGE_SIZE;
1844 sg_res -= lvl_pages;
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857 pte++;
1858 if (!nr_pages || first_pte_in_page(pte) ||
1859 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1860 domain_flush_cache(domain, first_pte,
1861 (void *)pte - (void *)first_pte);
1862 pte = NULL;
1863 }
1864
1865 if (!sg_res && nr_pages)
1866 sg = sg_next(sg);
1867 }
1868 return 0;
1869}
1870
1871static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1872 struct scatterlist *sg, unsigned long nr_pages,
1873 int prot)
1874{
1875 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1876}
1877
1878static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1879 unsigned long phys_pfn, unsigned long nr_pages,
1880 int prot)
1881{
1882 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1883}
1884
1885static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1886{
1887 if (!iommu)
1888 return;
1889
1890 clear_context_table(iommu, bus, devfn);
1891 iommu->flush.flush_context(iommu, 0, 0, 0,
1892 DMA_CCMD_GLOBAL_INVL);
1893 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1894}
1895
1896static inline void unlink_domain_info(struct device_domain_info *info)
1897{
1898 assert_spin_locked(&device_domain_lock);
1899 list_del(&info->link);
1900 list_del(&info->global);
1901 if (info->dev)
1902 info->dev->dev.archdata.iommu = NULL;
1903}
1904
1905static void domain_remove_dev_info(struct dmar_domain *domain)
1906{
1907 struct device_domain_info *info;
1908 unsigned long flags;
1909 struct intel_iommu *iommu;
1910
1911 spin_lock_irqsave(&device_domain_lock, flags);
1912 while (!list_empty(&domain->devices)) {
1913 info = list_entry(domain->devices.next,
1914 struct device_domain_info, link);
1915 unlink_domain_info(info);
1916 spin_unlock_irqrestore(&device_domain_lock, flags);
1917
1918 iommu_disable_dev_iotlb(info);
1919 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1920 iommu_detach_dev(iommu, info->bus, info->devfn);
1921 free_devinfo_mem(info);
1922
1923 spin_lock_irqsave(&device_domain_lock, flags);
1924 }
1925 spin_unlock_irqrestore(&device_domain_lock, flags);
1926}
1927
1928
1929
1930
1931
1932static struct dmar_domain *
1933find_domain(struct pci_dev *pdev)
1934{
1935 struct device_domain_info *info;
1936
1937
1938 info = pdev->dev.archdata.iommu;
1939 if (info)
1940 return info->domain;
1941 return NULL;
1942}
1943
1944
1945static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1946{
1947 struct dmar_domain *domain, *found = NULL;
1948 struct intel_iommu *iommu;
1949 struct dmar_drhd_unit *drhd;
1950 struct device_domain_info *info, *tmp;
1951 struct pci_dev *dev_tmp;
1952 unsigned long flags;
1953 int bus = 0, devfn = 0;
1954 int segment;
1955 int ret;
1956
1957 domain = find_domain(pdev);
1958 if (domain)
1959 return domain;
1960
1961 segment = pci_domain_nr(pdev->bus);
1962
1963 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1964 if (dev_tmp) {
1965 if (pci_is_pcie(dev_tmp)) {
1966 bus = dev_tmp->subordinate->number;
1967 devfn = 0;
1968 } else {
1969 bus = dev_tmp->bus->number;
1970 devfn = dev_tmp->devfn;
1971 }
1972 spin_lock_irqsave(&device_domain_lock, flags);
1973 list_for_each_entry(info, &device_domain_list, global) {
1974 if (info->segment == segment &&
1975 info->bus == bus && info->devfn == devfn) {
1976 found = info->domain;
1977 break;
1978 }
1979 }
1980 spin_unlock_irqrestore(&device_domain_lock, flags);
1981
1982 if (found) {
1983 domain = found;
1984 goto found_domain;
1985 }
1986 }
1987
1988 domain = alloc_domain();
1989 if (!domain)
1990 goto error;
1991
1992
1993 drhd = dmar_find_matched_drhd_unit(pdev);
1994 if (!drhd) {
1995 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1996 pci_name(pdev));
1997 free_domain_mem(domain);
1998 return NULL;
1999 }
2000 iommu = drhd->iommu;
2001
2002 ret = iommu_attach_domain(domain, iommu);
2003 if (ret) {
2004 free_domain_mem(domain);
2005 goto error;
2006 }
2007
2008 if (domain_init(domain, gaw)) {
2009 domain_exit(domain);
2010 goto error;
2011 }
2012
2013
2014 if (dev_tmp) {
2015 info = alloc_devinfo_mem();
2016 if (!info) {
2017 domain_exit(domain);
2018 goto error;
2019 }
2020 info->segment = segment;
2021 info->bus = bus;
2022 info->devfn = devfn;
2023 info->dev = NULL;
2024 info->domain = domain;
2025
2026 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2027
2028
2029 found = NULL;
2030 spin_lock_irqsave(&device_domain_lock, flags);
2031 list_for_each_entry(tmp, &device_domain_list, global) {
2032 if (tmp->segment == segment &&
2033 tmp->bus == bus && tmp->devfn == devfn) {
2034 found = tmp->domain;
2035 break;
2036 }
2037 }
2038 if (found) {
2039 spin_unlock_irqrestore(&device_domain_lock, flags);
2040 free_devinfo_mem(info);
2041 domain_exit(domain);
2042 domain = found;
2043 } else {
2044 list_add(&info->link, &domain->devices);
2045 list_add(&info->global, &device_domain_list);
2046 spin_unlock_irqrestore(&device_domain_lock, flags);
2047 }
2048 }
2049
2050found_domain:
2051 info = alloc_devinfo_mem();
2052 if (!info)
2053 goto error;
2054 info->segment = segment;
2055 info->bus = pdev->bus->number;
2056 info->devfn = pdev->devfn;
2057 info->dev = pdev;
2058 info->domain = domain;
2059 spin_lock_irqsave(&device_domain_lock, flags);
2060
2061 found = find_domain(pdev);
2062 if (found != NULL) {
2063 spin_unlock_irqrestore(&device_domain_lock, flags);
2064 if (found != domain) {
2065 domain_exit(domain);
2066 domain = found;
2067 }
2068 free_devinfo_mem(info);
2069 return domain;
2070 }
2071 list_add(&info->link, &domain->devices);
2072 list_add(&info->global, &device_domain_list);
2073 pdev->dev.archdata.iommu = info;
2074 spin_unlock_irqrestore(&device_domain_lock, flags);
2075 return domain;
2076error:
2077
2078 return find_domain(pdev);
2079}
2080
2081static int iommu_identity_mapping;
2082#define IDENTMAP_ALL 1
2083#define IDENTMAP_GFX 2
2084#define IDENTMAP_AZALIA 4
2085
2086static int iommu_domain_identity_map(struct dmar_domain *domain,
2087 unsigned long long start,
2088 unsigned long long end)
2089{
2090 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2091 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2092
2093 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2094 dma_to_mm_pfn(last_vpfn))) {
2095 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2096 return -ENOMEM;
2097 }
2098
2099 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2100 start, end, domain->id);
2101
2102
2103
2104
2105 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2106
2107 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2108 last_vpfn - first_vpfn + 1,
2109 DMA_PTE_READ|DMA_PTE_WRITE);
2110}
2111
2112static int iommu_prepare_identity_map(struct pci_dev *pdev,
2113 unsigned long long start,
2114 unsigned long long end)
2115{
2116 struct dmar_domain *domain;
2117 int ret;
2118
2119 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2120 if (!domain)
2121 return -ENOMEM;
2122
2123
2124
2125
2126
2127 if (domain == si_domain && hw_pass_through) {
2128 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2129 pci_name(pdev), start, end);
2130 return 0;
2131 }
2132
2133 printk(KERN_INFO
2134 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2135 pci_name(pdev), start, end);
2136
2137 if (end < start) {
2138 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2139 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2140 dmi_get_system_info(DMI_BIOS_VENDOR),
2141 dmi_get_system_info(DMI_BIOS_VERSION),
2142 dmi_get_system_info(DMI_PRODUCT_VERSION));
2143 ret = -EIO;
2144 goto error;
2145 }
2146
2147 if (end >> agaw_to_width(domain->agaw)) {
2148 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2149 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2150 agaw_to_width(domain->agaw),
2151 dmi_get_system_info(DMI_BIOS_VENDOR),
2152 dmi_get_system_info(DMI_BIOS_VERSION),
2153 dmi_get_system_info(DMI_PRODUCT_VERSION));
2154 ret = -EIO;
2155 goto error;
2156 }
2157
2158 ret = iommu_domain_identity_map(domain, start, end);
2159 if (ret)
2160 goto error;
2161
2162
2163 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2164 if (ret)
2165 goto error;
2166
2167 return 0;
2168
2169 error:
2170 domain_exit(domain);
2171 return ret;
2172}
2173
2174static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2175 struct pci_dev *pdev)
2176{
2177 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2178 return 0;
2179 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2180 rmrr->end_address);
2181}
2182
2183#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2184static inline void iommu_prepare_isa(void)
2185{
2186 struct pci_dev *pdev;
2187 int ret;
2188
2189 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2190 if (!pdev)
2191 return;
2192
2193 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2194 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2195
2196 if (ret)
2197 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2198 "floppy might not work\n");
2199
2200}
2201#else
2202static inline void iommu_prepare_isa(void)
2203{
2204 return;
2205}
2206#endif
2207
2208static int md_domain_init(struct dmar_domain *domain, int guest_width);
2209
2210static int __init si_domain_init(int hw)
2211{
2212 struct dmar_drhd_unit *drhd;
2213 struct intel_iommu *iommu;
2214 int nid, ret = 0;
2215
2216 si_domain = alloc_domain();
2217 if (!si_domain)
2218 return -EFAULT;
2219
2220 for_each_active_iommu(iommu, drhd) {
2221 ret = iommu_attach_domain(si_domain, iommu);
2222 if (ret) {
2223 domain_exit(si_domain);
2224 return -EFAULT;
2225 }
2226 }
2227
2228 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2229 domain_exit(si_domain);
2230 return -EFAULT;
2231 }
2232
2233 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2234 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2235 si_domain->id);
2236
2237 if (hw)
2238 return 0;
2239
2240 for_each_online_node(nid) {
2241 unsigned long start_pfn, end_pfn;
2242 int i;
2243
2244 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2245 ret = iommu_domain_identity_map(si_domain,
2246 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2247 if (ret)
2248 return ret;
2249 }
2250 }
2251
2252 return 0;
2253}
2254
2255static void domain_remove_one_dev_info(struct dmar_domain *domain,
2256 struct pci_dev *pdev);
2257static int identity_mapping(struct pci_dev *pdev)
2258{
2259 struct device_domain_info *info;
2260
2261 if (likely(!iommu_identity_mapping))
2262 return 0;
2263
2264 info = pdev->dev.archdata.iommu;
2265 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2266 return (info->domain == si_domain);
2267
2268 return 0;
2269}
2270
2271static int domain_add_dev_info(struct dmar_domain *domain,
2272 struct pci_dev *pdev,
2273 int translation)
2274{
2275 struct device_domain_info *info;
2276 unsigned long flags;
2277 int ret;
2278
2279 info = alloc_devinfo_mem();
2280 if (!info)
2281 return -ENOMEM;
2282
2283 info->segment = pci_domain_nr(pdev->bus);
2284 info->bus = pdev->bus->number;
2285 info->devfn = pdev->devfn;
2286 info->dev = pdev;
2287 info->domain = domain;
2288
2289 spin_lock_irqsave(&device_domain_lock, flags);
2290 list_add(&info->link, &domain->devices);
2291 list_add(&info->global, &device_domain_list);
2292 pdev->dev.archdata.iommu = info;
2293 spin_unlock_irqrestore(&device_domain_lock, flags);
2294
2295 ret = domain_context_mapping(domain, pdev, translation);
2296 if (ret) {
2297 spin_lock_irqsave(&device_domain_lock, flags);
2298 unlink_domain_info(info);
2299 spin_unlock_irqrestore(&device_domain_lock, flags);
2300 free_devinfo_mem(info);
2301 return ret;
2302 }
2303
2304 return 0;
2305}
2306
2307static bool device_has_rmrr(struct pci_dev *dev)
2308{
2309 struct dmar_rmrr_unit *rmrr;
2310 int i;
2311
2312 for_each_rmrr_units(rmrr) {
2313 for (i = 0; i < rmrr->devices_cnt; i++) {
2314
2315
2316
2317
2318 if (rmrr->devices[i] == dev)
2319 return true;
2320 }
2321 }
2322 return false;
2323}
2324
2325static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2326{
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336 if (device_has_rmrr(pdev) &&
2337 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2338 return 0;
2339
2340 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2341 return 1;
2342
2343 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2344 return 1;
2345
2346 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2347 return 0;
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366 if (!pci_is_pcie(pdev)) {
2367 if (!pci_is_root_bus(pdev->bus))
2368 return 0;
2369 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2370 return 0;
2371 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2372 return 0;
2373
2374
2375
2376
2377
2378
2379 if (!startup) {
2380
2381
2382
2383
2384 u64 dma_mask = pdev->dma_mask;
2385
2386 if (pdev->dev.coherent_dma_mask &&
2387 pdev->dev.coherent_dma_mask < dma_mask)
2388 dma_mask = pdev->dev.coherent_dma_mask;
2389
2390 return dma_mask >= dma_get_required_mask(&pdev->dev);
2391 }
2392
2393 return 1;
2394}
2395
2396static int __init iommu_prepare_static_identity_mapping(int hw)
2397{
2398 struct pci_dev *pdev = NULL;
2399 int ret;
2400
2401 ret = si_domain_init(hw);
2402 if (ret)
2403 return -EFAULT;
2404
2405 for_each_pci_dev(pdev) {
2406 if (iommu_should_identity_map(pdev, 1)) {
2407 ret = domain_add_dev_info(si_domain, pdev,
2408 hw ? CONTEXT_TT_PASS_THROUGH :
2409 CONTEXT_TT_MULTI_LEVEL);
2410 if (ret) {
2411
2412 if (ret == -ENODEV)
2413 continue;
2414 return ret;
2415 }
2416 pr_info("IOMMU: %s identity mapping for device %s\n",
2417 hw ? "hardware" : "software", pci_name(pdev));
2418 }
2419 }
2420
2421 return 0;
2422}
2423
2424static int __init init_dmars(void)
2425{
2426 struct dmar_drhd_unit *drhd;
2427 struct dmar_rmrr_unit *rmrr;
2428 struct pci_dev *pdev;
2429 struct intel_iommu *iommu;
2430 int i, ret;
2431
2432
2433
2434
2435
2436
2437
2438 for_each_drhd_unit(drhd) {
2439
2440
2441
2442
2443
2444 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2445 g_num_of_iommus++;
2446 continue;
2447 }
2448 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2449 IOMMU_UNITS_SUPPORTED);
2450 }
2451
2452 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2453 GFP_KERNEL);
2454 if (!g_iommus) {
2455 printk(KERN_ERR "Allocating global iommu array failed\n");
2456 ret = -ENOMEM;
2457 goto error;
2458 }
2459
2460 deferred_flush = kzalloc(g_num_of_iommus *
2461 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2462 if (!deferred_flush) {
2463 ret = -ENOMEM;
2464 goto error;
2465 }
2466
2467 for_each_active_iommu(iommu, drhd) {
2468 g_iommus[iommu->seq_id] = iommu;
2469
2470 ret = iommu_init_domains(iommu);
2471 if (ret)
2472 goto error;
2473
2474
2475
2476
2477
2478
2479 ret = iommu_alloc_root_entry(iommu);
2480 if (ret) {
2481 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2482 goto error;
2483 }
2484 if (!ecap_pass_through(iommu->ecap))
2485 hw_pass_through = 0;
2486 }
2487
2488
2489
2490
2491 for_each_active_iommu(iommu, drhd) {
2492
2493
2494
2495
2496
2497 if (iommu->qi)
2498 continue;
2499
2500
2501
2502
2503 dmar_fault(-1, iommu);
2504
2505
2506
2507
2508 dmar_disable_qi(iommu);
2509 }
2510
2511 for_each_active_iommu(iommu, drhd) {
2512 if (dmar_enable_qi(iommu)) {
2513
2514
2515
2516
2517 iommu->flush.flush_context = __iommu_flush_context;
2518 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2519 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2520 "invalidation\n",
2521 iommu->seq_id,
2522 (unsigned long long)drhd->reg_base_addr);
2523 } else {
2524 iommu->flush.flush_context = qi_flush_context;
2525 iommu->flush.flush_iotlb = qi_flush_iotlb;
2526 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2527 "invalidation\n",
2528 iommu->seq_id,
2529 (unsigned long long)drhd->reg_base_addr);
2530 }
2531 }
2532
2533 if (iommu_pass_through)
2534 iommu_identity_mapping |= IDENTMAP_ALL;
2535
2536#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2537 iommu_identity_mapping |= IDENTMAP_GFX;
2538#endif
2539
2540 check_tylersburg_isoch();
2541
2542
2543
2544
2545
2546
2547 if (iommu_identity_mapping) {
2548 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2549 if (ret) {
2550 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2551 goto error;
2552 }
2553 }
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2569 for_each_rmrr_units(rmrr) {
2570 for (i = 0; i < rmrr->devices_cnt; i++) {
2571 pdev = rmrr->devices[i];
2572
2573
2574
2575
2576 if (!pdev)
2577 continue;
2578 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2579 if (ret)
2580 printk(KERN_ERR
2581 "IOMMU: mapping reserved region failed\n");
2582 }
2583 }
2584
2585 iommu_prepare_isa();
2586
2587
2588
2589
2590
2591
2592
2593
2594 for_each_iommu(iommu, drhd) {
2595 if (drhd->ignored) {
2596
2597
2598
2599
2600 if (force_on)
2601 iommu_disable_protect_mem_regions(iommu);
2602 continue;
2603 }
2604
2605 iommu_flush_write_buffer(iommu);
2606
2607 ret = dmar_set_interrupt(iommu);
2608 if (ret)
2609 goto error;
2610
2611 iommu_set_root_entry(iommu);
2612
2613 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2614 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2615
2616 ret = iommu_enable_translation(iommu);
2617 if (ret)
2618 goto error;
2619
2620 iommu_disable_protect_mem_regions(iommu);
2621 }
2622
2623 return 0;
2624error:
2625 for_each_active_iommu(iommu, drhd)
2626 free_dmar_iommu(iommu);
2627 kfree(deferred_flush);
2628 kfree(g_iommus);
2629 return ret;
2630}
2631
2632
2633static struct iova *intel_alloc_iova(struct device *dev,
2634 struct dmar_domain *domain,
2635 unsigned long nrpages, uint64_t dma_mask)
2636{
2637 struct pci_dev *pdev = to_pci_dev(dev);
2638 struct iova *iova = NULL;
2639
2640
2641 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2642
2643 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2644
2645
2646
2647
2648
2649 iova = alloc_iova(&domain->iovad, nrpages,
2650 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2651 if (iova)
2652 return iova;
2653 }
2654 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2655 if (unlikely(!iova)) {
2656 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2657 nrpages, pci_name(pdev));
2658 return NULL;
2659 }
2660
2661 return iova;
2662}
2663
2664static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2665{
2666 struct dmar_domain *domain;
2667 int ret;
2668
2669 domain = get_domain_for_dev(pdev,
2670 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2671 if (!domain) {
2672 printk(KERN_ERR
2673 "Allocating domain for %s failed", pci_name(pdev));
2674 return NULL;
2675 }
2676
2677
2678 if (unlikely(!domain_context_mapped(pdev))) {
2679 ret = domain_context_mapping(domain, pdev,
2680 CONTEXT_TT_MULTI_LEVEL);
2681 if (ret) {
2682 printk(KERN_ERR
2683 "Domain context map for %s failed",
2684 pci_name(pdev));
2685 return NULL;
2686 }
2687 }
2688
2689 return domain;
2690}
2691
2692static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2693{
2694 struct device_domain_info *info;
2695
2696
2697 info = dev->dev.archdata.iommu;
2698 if (likely(info))
2699 return info->domain;
2700
2701 return __get_valid_domain_for_dev(dev);
2702}
2703
2704static int iommu_dummy(struct pci_dev *pdev)
2705{
2706 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2707}
2708
2709
2710static int iommu_no_mapping(struct device *dev)
2711{
2712 struct pci_dev *pdev;
2713 int found;
2714
2715 if (unlikely(!dev_is_pci(dev)))
2716 return 1;
2717
2718 pdev = to_pci_dev(dev);
2719 if (iommu_dummy(pdev))
2720 return 1;
2721
2722 if (!iommu_identity_mapping)
2723 return 0;
2724
2725 found = identity_mapping(pdev);
2726 if (found) {
2727 if (iommu_should_identity_map(pdev, 0))
2728 return 1;
2729 else {
2730
2731
2732
2733
2734 domain_remove_one_dev_info(si_domain, pdev);
2735 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2736 pci_name(pdev));
2737 return 0;
2738 }
2739 } else {
2740
2741
2742
2743
2744 if (iommu_should_identity_map(pdev, 0)) {
2745 int ret;
2746 ret = domain_add_dev_info(si_domain, pdev,
2747 hw_pass_through ?
2748 CONTEXT_TT_PASS_THROUGH :
2749 CONTEXT_TT_MULTI_LEVEL);
2750 if (!ret) {
2751 printk(KERN_INFO "64bit %s uses identity mapping\n",
2752 pci_name(pdev));
2753 return 1;
2754 }
2755 }
2756 }
2757
2758 return 0;
2759}
2760
2761static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2762 size_t size, int dir, u64 dma_mask)
2763{
2764 struct pci_dev *pdev = to_pci_dev(hwdev);
2765 struct dmar_domain *domain;
2766 phys_addr_t start_paddr;
2767 struct iova *iova;
2768 int prot = 0;
2769 int ret;
2770 struct intel_iommu *iommu;
2771 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2772
2773 BUG_ON(dir == DMA_NONE);
2774
2775 if (iommu_no_mapping(hwdev))
2776 return paddr;
2777
2778 domain = get_valid_domain_for_dev(pdev);
2779 if (!domain)
2780 return 0;
2781
2782 iommu = domain_get_iommu(domain);
2783 size = aligned_nrpages(paddr, size);
2784
2785 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2786 if (!iova)
2787 goto error;
2788
2789
2790
2791
2792
2793 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2794 !cap_zlr(iommu->cap))
2795 prot |= DMA_PTE_READ;
2796 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2797 prot |= DMA_PTE_WRITE;
2798
2799
2800
2801
2802
2803
2804 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2805 mm_to_dma_pfn(paddr_pfn), size, prot);
2806 if (ret)
2807 goto error;
2808
2809
2810 if (cap_caching_mode(iommu->cap))
2811 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2812 else
2813 iommu_flush_write_buffer(iommu);
2814
2815 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2816 start_paddr += paddr & ~PAGE_MASK;
2817 return start_paddr;
2818
2819error:
2820 if (iova)
2821 __free_iova(&domain->iovad, iova);
2822 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2823 pci_name(pdev), size, (unsigned long long)paddr, dir);
2824 return 0;
2825}
2826
2827static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2828 unsigned long offset, size_t size,
2829 enum dma_data_direction dir,
2830 struct dma_attrs *attrs)
2831{
2832 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2833 dir, to_pci_dev(dev)->dma_mask);
2834}
2835
2836static void flush_unmaps(void)
2837{
2838 int i, j;
2839
2840 timer_on = 0;
2841
2842
2843 for (i = 0; i < g_num_of_iommus; i++) {
2844 struct intel_iommu *iommu = g_iommus[i];
2845 if (!iommu)
2846 continue;
2847
2848 if (!deferred_flush[i].next)
2849 continue;
2850
2851
2852 if (!cap_caching_mode(iommu->cap))
2853 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2854 DMA_TLB_GLOBAL_FLUSH);
2855 for (j = 0; j < deferred_flush[i].next; j++) {
2856 unsigned long mask;
2857 struct iova *iova = deferred_flush[i].iova[j];
2858 struct dmar_domain *domain = deferred_flush[i].domain[j];
2859
2860
2861 if (cap_caching_mode(iommu->cap))
2862 iommu_flush_iotlb_psi(iommu, domain->id,
2863 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2864 else {
2865 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2866 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2867 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2868 }
2869 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2870 }
2871 deferred_flush[i].next = 0;
2872 }
2873
2874 list_size = 0;
2875}
2876
2877static void flush_unmaps_timeout(unsigned long data)
2878{
2879 unsigned long flags;
2880
2881 spin_lock_irqsave(&async_umap_flush_lock, flags);
2882 flush_unmaps();
2883 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2884}
2885
2886static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2887{
2888 unsigned long flags;
2889 int next, iommu_id;
2890 struct intel_iommu *iommu;
2891
2892 spin_lock_irqsave(&async_umap_flush_lock, flags);
2893 if (list_size == HIGH_WATER_MARK)
2894 flush_unmaps();
2895
2896 iommu = domain_get_iommu(dom);
2897 iommu_id = iommu->seq_id;
2898
2899 next = deferred_flush[iommu_id].next;
2900 deferred_flush[iommu_id].domain[next] = dom;
2901 deferred_flush[iommu_id].iova[next] = iova;
2902 deferred_flush[iommu_id].next++;
2903
2904 if (!timer_on) {
2905 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2906 timer_on = 1;
2907 }
2908 list_size++;
2909 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2910}
2911
2912static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2913 size_t size, enum dma_data_direction dir,
2914 struct dma_attrs *attrs)
2915{
2916 struct pci_dev *pdev = to_pci_dev(dev);
2917 struct dmar_domain *domain;
2918 unsigned long start_pfn, last_pfn;
2919 struct iova *iova;
2920 struct intel_iommu *iommu;
2921
2922 if (iommu_no_mapping(dev))
2923 return;
2924
2925 domain = find_domain(pdev);
2926 BUG_ON(!domain);
2927
2928 iommu = domain_get_iommu(domain);
2929
2930 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2931 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2932 (unsigned long long)dev_addr))
2933 return;
2934
2935 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2936 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2937
2938 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2939 pci_name(pdev), start_pfn, last_pfn);
2940
2941
2942 dma_pte_clear_range(domain, start_pfn, last_pfn);
2943
2944
2945 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2946
2947 if (intel_iommu_strict) {
2948 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2949 last_pfn - start_pfn + 1, 0);
2950
2951 __free_iova(&domain->iovad, iova);
2952 } else {
2953 add_unmap(domain, iova);
2954
2955
2956
2957
2958 }
2959}
2960
2961static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2962 dma_addr_t *dma_handle, gfp_t flags,
2963 struct dma_attrs *attrs)
2964{
2965 void *vaddr;
2966 int order;
2967
2968 size = PAGE_ALIGN(size);
2969 order = get_order(size);
2970
2971 if (!iommu_no_mapping(hwdev))
2972 flags &= ~(GFP_DMA | GFP_DMA32);
2973 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2974 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2975 flags |= GFP_DMA;
2976 else
2977 flags |= GFP_DMA32;
2978 }
2979
2980 vaddr = (void *)__get_free_pages(flags, order);
2981 if (!vaddr)
2982 return NULL;
2983 memset(vaddr, 0, size);
2984
2985 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2986 DMA_BIDIRECTIONAL,
2987 hwdev->coherent_dma_mask);
2988 if (*dma_handle)
2989 return vaddr;
2990 free_pages((unsigned long)vaddr, order);
2991 return NULL;
2992}
2993
2994static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2995 dma_addr_t dma_handle, struct dma_attrs *attrs)
2996{
2997 int order;
2998
2999 size = PAGE_ALIGN(size);
3000 order = get_order(size);
3001
3002 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3003 free_pages((unsigned long)vaddr, order);
3004}
3005
3006static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3007 int nelems, enum dma_data_direction dir,
3008 struct dma_attrs *attrs)
3009{
3010 struct pci_dev *pdev = to_pci_dev(hwdev);
3011 struct dmar_domain *domain;
3012 unsigned long start_pfn, last_pfn;
3013 struct iova *iova;
3014 struct intel_iommu *iommu;
3015
3016 if (iommu_no_mapping(hwdev))
3017 return;
3018
3019 domain = find_domain(pdev);
3020 BUG_ON(!domain);
3021
3022 iommu = domain_get_iommu(domain);
3023
3024 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3025 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3026 (unsigned long long)sglist[0].dma_address))
3027 return;
3028
3029 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3030 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3031
3032
3033 dma_pte_clear_range(domain, start_pfn, last_pfn);
3034
3035
3036 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3037
3038 if (intel_iommu_strict) {
3039 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3040 last_pfn - start_pfn + 1, 0);
3041
3042 __free_iova(&domain->iovad, iova);
3043 } else {
3044 add_unmap(domain, iova);
3045
3046
3047
3048
3049 }
3050}
3051
3052static int intel_nontranslate_map_sg(struct device *hddev,
3053 struct scatterlist *sglist, int nelems, int dir)
3054{
3055 int i;
3056 struct scatterlist *sg;
3057
3058 for_each_sg(sglist, sg, nelems, i) {
3059 BUG_ON(!sg_page(sg));
3060 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3061 sg->dma_length = sg->length;
3062 }
3063 return nelems;
3064}
3065
3066static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3067 enum dma_data_direction dir, struct dma_attrs *attrs)
3068{
3069 int i;
3070 struct pci_dev *pdev = to_pci_dev(hwdev);
3071 struct dmar_domain *domain;
3072 size_t size = 0;
3073 int prot = 0;
3074 struct iova *iova = NULL;
3075 int ret;
3076 struct scatterlist *sg;
3077 unsigned long start_vpfn;
3078 struct intel_iommu *iommu;
3079
3080 BUG_ON(dir == DMA_NONE);
3081 if (iommu_no_mapping(hwdev))
3082 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3083
3084 domain = get_valid_domain_for_dev(pdev);
3085 if (!domain)
3086 return 0;
3087
3088 iommu = domain_get_iommu(domain);
3089
3090 for_each_sg(sglist, sg, nelems, i)
3091 size += aligned_nrpages(sg->offset, sg->length);
3092
3093 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3094 pdev->dma_mask);
3095 if (!iova) {
3096 sglist->dma_length = 0;
3097 return 0;
3098 }
3099
3100
3101
3102
3103
3104 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3105 !cap_zlr(iommu->cap))
3106 prot |= DMA_PTE_READ;
3107 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3108 prot |= DMA_PTE_WRITE;
3109
3110 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3111
3112 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3113 if (unlikely(ret)) {
3114
3115 dma_pte_clear_range(domain, start_vpfn,
3116 start_vpfn + size - 1);
3117
3118 dma_pte_free_pagetable(domain, start_vpfn,
3119 start_vpfn + size - 1);
3120
3121 __free_iova(&domain->iovad, iova);
3122 return 0;
3123 }
3124
3125
3126 if (cap_caching_mode(iommu->cap))
3127 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3128 else
3129 iommu_flush_write_buffer(iommu);
3130
3131 return nelems;
3132}
3133
3134static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3135{
3136 return !dma_addr;
3137}
3138
3139struct dma_map_ops intel_dma_ops = {
3140 .alloc = intel_alloc_coherent,
3141 .free = intel_free_coherent,
3142 .map_sg = intel_map_sg,
3143 .unmap_sg = intel_unmap_sg,
3144 .map_page = intel_map_page,
3145 .unmap_page = intel_unmap_page,
3146 .mapping_error = intel_mapping_error,
3147};
3148
3149static inline int iommu_domain_cache_init(void)
3150{
3151 int ret = 0;
3152
3153 iommu_domain_cache = kmem_cache_create("iommu_domain",
3154 sizeof(struct dmar_domain),
3155 0,
3156 SLAB_HWCACHE_ALIGN,
3157
3158 NULL);
3159 if (!iommu_domain_cache) {
3160 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3161 ret = -ENOMEM;
3162 }
3163
3164 return ret;
3165}
3166
3167static inline int iommu_devinfo_cache_init(void)
3168{
3169 int ret = 0;
3170
3171 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3172 sizeof(struct device_domain_info),
3173 0,
3174 SLAB_HWCACHE_ALIGN,
3175 NULL);
3176 if (!iommu_devinfo_cache) {
3177 printk(KERN_ERR "Couldn't create devinfo cache\n");
3178 ret = -ENOMEM;
3179 }
3180
3181 return ret;
3182}
3183
3184static inline int iommu_iova_cache_init(void)
3185{
3186 int ret = 0;
3187
3188 iommu_iova_cache = kmem_cache_create("iommu_iova",
3189 sizeof(struct iova),
3190 0,
3191 SLAB_HWCACHE_ALIGN,
3192 NULL);
3193 if (!iommu_iova_cache) {
3194 printk(KERN_ERR "Couldn't create iova cache\n");
3195 ret = -ENOMEM;
3196 }
3197
3198 return ret;
3199}
3200
3201static int __init iommu_init_mempool(void)
3202{
3203 int ret;
3204 ret = iommu_iova_cache_init();
3205 if (ret)
3206 return ret;
3207
3208 ret = iommu_domain_cache_init();
3209 if (ret)
3210 goto domain_error;
3211
3212 ret = iommu_devinfo_cache_init();
3213 if (!ret)
3214 return ret;
3215
3216 kmem_cache_destroy(iommu_domain_cache);
3217domain_error:
3218 kmem_cache_destroy(iommu_iova_cache);
3219
3220 return -ENOMEM;
3221}
3222
3223static void __init iommu_exit_mempool(void)
3224{
3225 kmem_cache_destroy(iommu_devinfo_cache);
3226 kmem_cache_destroy(iommu_domain_cache);
3227 kmem_cache_destroy(iommu_iova_cache);
3228
3229}
3230
3231static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3232{
3233 struct dmar_drhd_unit *drhd;
3234 u32 vtbar;
3235 int rc;
3236
3237
3238
3239
3240
3241
3242 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3243 if (rc) {
3244
3245 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3246 return;
3247 }
3248 vtbar &= 0xffff0000;
3249
3250
3251 drhd = dmar_find_matched_drhd_unit(pdev);
3252 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3253 TAINT_FIRMWARE_WORKAROUND,
3254 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3255 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3256}
3257DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3258
3259static void __init init_no_remapping_devices(void)
3260{
3261 struct dmar_drhd_unit *drhd;
3262
3263 for_each_drhd_unit(drhd) {
3264 if (!drhd->include_all) {
3265 int i;
3266 for (i = 0; i < drhd->devices_cnt; i++)
3267 if (drhd->devices[i] != NULL)
3268 break;
3269
3270 if (i == drhd->devices_cnt)
3271 drhd->ignored = 1;
3272 }
3273 }
3274
3275 for_each_active_drhd_unit(drhd) {
3276 int i;
3277 if (drhd->include_all)
3278 continue;
3279
3280 for (i = 0; i < drhd->devices_cnt; i++)
3281 if (drhd->devices[i] &&
3282 !IS_GFX_DEVICE(drhd->devices[i]))
3283 break;
3284
3285 if (i < drhd->devices_cnt)
3286 continue;
3287
3288
3289
3290 if (dmar_map_gfx) {
3291 intel_iommu_gfx_mapped = 1;
3292 } else {
3293 drhd->ignored = 1;
3294 for (i = 0; i < drhd->devices_cnt; i++) {
3295 if (!drhd->devices[i])
3296 continue;
3297 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3298 }
3299 }
3300 }
3301}
3302
3303#ifdef CONFIG_SUSPEND
3304static int init_iommu_hw(void)
3305{
3306 struct dmar_drhd_unit *drhd;
3307 struct intel_iommu *iommu = NULL;
3308
3309 for_each_active_iommu(iommu, drhd)
3310 if (iommu->qi)
3311 dmar_reenable_qi(iommu);
3312
3313 for_each_iommu(iommu, drhd) {
3314 if (drhd->ignored) {
3315
3316
3317
3318
3319 if (force_on)
3320 iommu_disable_protect_mem_regions(iommu);
3321 continue;
3322 }
3323
3324 iommu_flush_write_buffer(iommu);
3325
3326 iommu_set_root_entry(iommu);
3327
3328 iommu->flush.flush_context(iommu, 0, 0, 0,
3329 DMA_CCMD_GLOBAL_INVL);
3330 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3331 DMA_TLB_GLOBAL_FLUSH);
3332 if (iommu_enable_translation(iommu))
3333 return 1;
3334 iommu_disable_protect_mem_regions(iommu);
3335 }
3336
3337 return 0;
3338}
3339
3340static void iommu_flush_all(void)
3341{
3342 struct dmar_drhd_unit *drhd;
3343 struct intel_iommu *iommu;
3344
3345 for_each_active_iommu(iommu, drhd) {
3346 iommu->flush.flush_context(iommu, 0, 0, 0,
3347 DMA_CCMD_GLOBAL_INVL);
3348 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3349 DMA_TLB_GLOBAL_FLUSH);
3350 }
3351}
3352
3353static int iommu_suspend(void)
3354{
3355 struct dmar_drhd_unit *drhd;
3356 struct intel_iommu *iommu = NULL;
3357 unsigned long flag;
3358
3359 for_each_active_iommu(iommu, drhd) {
3360 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3361 GFP_ATOMIC);
3362 if (!iommu->iommu_state)
3363 goto nomem;
3364 }
3365
3366 iommu_flush_all();
3367
3368 for_each_active_iommu(iommu, drhd) {
3369 iommu_disable_translation(iommu);
3370
3371 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3372
3373 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3374 readl(iommu->reg + DMAR_FECTL_REG);
3375 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3376 readl(iommu->reg + DMAR_FEDATA_REG);
3377 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3378 readl(iommu->reg + DMAR_FEADDR_REG);
3379 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3380 readl(iommu->reg + DMAR_FEUADDR_REG);
3381
3382 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3383 }
3384 return 0;
3385
3386nomem:
3387 for_each_active_iommu(iommu, drhd)
3388 kfree(iommu->iommu_state);
3389
3390 return -ENOMEM;
3391}
3392
3393static void iommu_resume(void)
3394{
3395 struct dmar_drhd_unit *drhd;
3396 struct intel_iommu *iommu = NULL;
3397 unsigned long flag;
3398
3399 if (init_iommu_hw()) {
3400 if (force_on)
3401 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3402 else
3403 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3404 return;
3405 }
3406
3407 for_each_active_iommu(iommu, drhd) {
3408
3409 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3410
3411 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3412 iommu->reg + DMAR_FECTL_REG);
3413 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3414 iommu->reg + DMAR_FEDATA_REG);
3415 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3416 iommu->reg + DMAR_FEADDR_REG);
3417 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3418 iommu->reg + DMAR_FEUADDR_REG);
3419
3420 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3421 }
3422
3423 for_each_active_iommu(iommu, drhd)
3424 kfree(iommu->iommu_state);
3425}
3426
3427static struct syscore_ops iommu_syscore_ops = {
3428 .resume = iommu_resume,
3429 .suspend = iommu_suspend,
3430};
3431
3432static void __init init_iommu_pm_ops(void)
3433{
3434 register_syscore_ops(&iommu_syscore_ops);
3435}
3436
3437#else
3438static inline void init_iommu_pm_ops(void) {}
3439#endif
3440
3441LIST_HEAD(dmar_rmrr_units);
3442
3443static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3444{
3445 list_add(&rmrr->list, &dmar_rmrr_units);
3446}
3447
3448
3449int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3450{
3451 struct acpi_dmar_reserved_memory *rmrr;
3452 struct dmar_rmrr_unit *rmrru;
3453
3454 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3455 if (!rmrru)
3456 return -ENOMEM;
3457
3458 rmrru->hdr = header;
3459 rmrr = (struct acpi_dmar_reserved_memory *)header;
3460 rmrru->base_address = rmrr->base_address;
3461 rmrru->end_address = rmrr->end_address;
3462
3463 dmar_register_rmrr_unit(rmrru);
3464 return 0;
3465}
3466
3467static int __init
3468rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3469{
3470 struct acpi_dmar_reserved_memory *rmrr;
3471
3472 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3473 return dmar_parse_dev_scope((void *)(rmrr + 1),
3474 ((void *)rmrr) + rmrr->header.length,
3475 &rmrru->devices_cnt, &rmrru->devices,
3476 rmrr->segment);
3477}
3478
3479static LIST_HEAD(dmar_atsr_units);
3480
3481int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3482{
3483 struct acpi_dmar_atsr *atsr;
3484 struct dmar_atsr_unit *atsru;
3485
3486 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3487 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3488 if (!atsru)
3489 return -ENOMEM;
3490
3491 atsru->hdr = hdr;
3492 atsru->include_all = atsr->flags & 0x1;
3493
3494 list_add(&atsru->list, &dmar_atsr_units);
3495
3496 return 0;
3497}
3498
3499static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3500{
3501 struct acpi_dmar_atsr *atsr;
3502
3503 if (atsru->include_all)
3504 return 0;
3505
3506 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3507 return dmar_parse_dev_scope((void *)(atsr + 1),
3508 (void *)atsr + atsr->header.length,
3509 &atsru->devices_cnt, &atsru->devices,
3510 atsr->segment);
3511}
3512
3513static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3514{
3515 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3516 kfree(atsru);
3517}
3518
3519static void intel_iommu_free_dmars(void)
3520{
3521 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3522 struct dmar_atsr_unit *atsru, *atsr_n;
3523
3524 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3525 list_del(&rmrru->list);
3526 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3527 kfree(rmrru);
3528 }
3529
3530 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3531 list_del(&atsru->list);
3532 intel_iommu_free_atsr(atsru);
3533 }
3534}
3535
3536int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3537{
3538 int i;
3539 struct pci_bus *bus;
3540 struct acpi_dmar_atsr *atsr;
3541 struct dmar_atsr_unit *atsru;
3542
3543 dev = pci_physfn(dev);
3544
3545 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3546 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3547 if (atsr->segment == pci_domain_nr(dev->bus))
3548 goto found;
3549 }
3550
3551 return 0;
3552
3553found:
3554 for (bus = dev->bus; bus; bus = bus->parent) {
3555 struct pci_dev *bridge = bus->self;
3556
3557 if (!bridge || !pci_is_pcie(bridge) ||
3558 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3559 return 0;
3560
3561 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
3562 for (i = 0; i < atsru->devices_cnt; i++)
3563 if (atsru->devices[i] == bridge)
3564 return 1;
3565 break;
3566 }
3567 }
3568
3569 if (atsru->include_all)
3570 return 1;
3571
3572 return 0;
3573}
3574
3575int __init dmar_parse_rmrr_atsr_dev(void)
3576{
3577 struct dmar_rmrr_unit *rmrr;
3578 struct dmar_atsr_unit *atsr;
3579 int ret = 0;
3580
3581 list_for_each_entry(rmrr, &dmar_rmrr_units, list) {
3582 ret = rmrr_parse_dev(rmrr);
3583 if (ret)
3584 return ret;
3585 }
3586
3587 list_for_each_entry(atsr, &dmar_atsr_units, list) {
3588 ret = atsr_parse_dev(atsr);
3589 if (ret)
3590 return ret;
3591 }
3592
3593 return ret;
3594}
3595
3596
3597
3598
3599
3600
3601
3602static int device_notifier(struct notifier_block *nb,
3603 unsigned long action, void *data)
3604{
3605 struct device *dev = data;
3606 struct pci_dev *pdev = to_pci_dev(dev);
3607 struct dmar_domain *domain;
3608
3609 if (iommu_no_mapping(dev))
3610 return 0;
3611
3612 domain = find_domain(pdev);
3613 if (!domain)
3614 return 0;
3615
3616 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3617 domain_remove_one_dev_info(domain, pdev);
3618
3619 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3620 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3621 list_empty(&domain->devices))
3622 domain_exit(domain);
3623 }
3624
3625 return 0;
3626}
3627
3628static struct notifier_block device_nb = {
3629 .notifier_call = device_notifier,
3630};
3631
3632int __init intel_iommu_init(void)
3633{
3634 int ret = -ENODEV;
3635 struct dmar_drhd_unit *drhd;
3636 struct intel_iommu *iommu;
3637
3638
3639 force_on = tboot_force_iommu();
3640
3641 if (dmar_table_init()) {
3642 if (force_on)
3643 panic("tboot: Failed to initialize DMAR table\n");
3644 goto out_free_dmar;
3645 }
3646
3647
3648
3649
3650 for_each_active_iommu(iommu, drhd)
3651 if (iommu->gcmd & DMA_GCMD_TE)
3652 iommu_disable_translation(iommu);
3653
3654 if (dmar_dev_scope_init() < 0) {
3655 if (force_on)
3656 panic("tboot: Failed to initialize DMAR device scope\n");
3657 goto out_free_dmar;
3658 }
3659
3660 if (no_iommu || dmar_disabled)
3661 goto out_free_dmar;
3662
3663 if (iommu_init_mempool()) {
3664 if (force_on)
3665 panic("tboot: Failed to initialize iommu memory\n");
3666 goto out_free_dmar;
3667 }
3668
3669 if (list_empty(&dmar_rmrr_units))
3670 printk(KERN_INFO "DMAR: No RMRR found\n");
3671
3672 if (list_empty(&dmar_atsr_units))
3673 printk(KERN_INFO "DMAR: No ATSR found\n");
3674
3675 if (dmar_init_reserved_ranges()) {
3676 if (force_on)
3677 panic("tboot: Failed to reserve iommu ranges\n");
3678 goto out_free_mempool;
3679 }
3680
3681 init_no_remapping_devices();
3682
3683 ret = init_dmars();
3684 if (ret) {
3685 if (force_on)
3686 panic("tboot: Failed to initialize DMARs\n");
3687 printk(KERN_ERR "IOMMU: dmar init failed\n");
3688 goto out_free_reserved_range;
3689 }
3690 printk(KERN_INFO
3691 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3692
3693 init_timer(&unmap_timer);
3694#ifdef CONFIG_SWIOTLB
3695 swiotlb = 0;
3696#endif
3697 dma_ops = &intel_dma_ops;
3698
3699 init_iommu_pm_ops();
3700
3701 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3702
3703 bus_register_notifier(&pci_bus_type, &device_nb);
3704
3705 intel_iommu_enabled = 1;
3706
3707 return 0;
3708
3709out_free_reserved_range:
3710 put_iova_domain(&reserved_iova_list);
3711out_free_mempool:
3712 iommu_exit_mempool();
3713out_free_dmar:
3714 intel_iommu_free_dmars();
3715 return ret;
3716}
3717
3718static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3719 struct pci_dev *pdev)
3720{
3721 struct pci_dev *tmp, *parent;
3722
3723 if (!iommu || !pdev)
3724 return;
3725
3726
3727 tmp = pci_find_upstream_pcie_bridge(pdev);
3728
3729 if (tmp) {
3730 parent = pdev->bus->self;
3731 while (parent != tmp) {
3732 iommu_detach_dev(iommu, parent->bus->number,
3733 parent->devfn);
3734 parent = parent->bus->self;
3735 }
3736 if (pci_is_pcie(tmp))
3737 iommu_detach_dev(iommu,
3738 tmp->subordinate->number, 0);
3739 else
3740 iommu_detach_dev(iommu, tmp->bus->number,
3741 tmp->devfn);
3742 }
3743}
3744
3745static void domain_remove_one_dev_info(struct dmar_domain *domain,
3746 struct pci_dev *pdev)
3747{
3748 struct device_domain_info *info, *tmp;
3749 struct intel_iommu *iommu;
3750 unsigned long flags;
3751 int found = 0;
3752
3753 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3754 pdev->devfn);
3755 if (!iommu)
3756 return;
3757
3758 spin_lock_irqsave(&device_domain_lock, flags);
3759 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
3760 if (info->segment == pci_domain_nr(pdev->bus) &&
3761 info->bus == pdev->bus->number &&
3762 info->devfn == pdev->devfn) {
3763 unlink_domain_info(info);
3764 spin_unlock_irqrestore(&device_domain_lock, flags);
3765
3766 iommu_disable_dev_iotlb(info);
3767 iommu_detach_dev(iommu, info->bus, info->devfn);
3768 iommu_detach_dependent_devices(iommu, pdev);
3769 free_devinfo_mem(info);
3770
3771 spin_lock_irqsave(&device_domain_lock, flags);
3772
3773 if (found)
3774 break;
3775 else
3776 continue;
3777 }
3778
3779
3780
3781
3782
3783 if (iommu == device_to_iommu(info->segment, info->bus,
3784 info->devfn))
3785 found = 1;
3786 }
3787
3788 spin_unlock_irqrestore(&device_domain_lock, flags);
3789
3790 if (found == 0) {
3791 unsigned long tmp_flags;
3792 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3793 clear_bit(iommu->seq_id, domain->iommu_bmp);
3794 domain->iommu_count--;
3795 domain_update_iommu_cap(domain);
3796 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3797
3798 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3799 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3800 spin_lock_irqsave(&iommu->lock, tmp_flags);
3801 clear_bit(domain->id, iommu->domain_ids);
3802 iommu->domains[domain->id] = NULL;
3803 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3804 }
3805 }
3806}
3807
3808static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3809{
3810 struct device_domain_info *info;
3811 struct intel_iommu *iommu;
3812 unsigned long flags1, flags2;
3813
3814 spin_lock_irqsave(&device_domain_lock, flags1);
3815 while (!list_empty(&domain->devices)) {
3816 info = list_entry(domain->devices.next,
3817 struct device_domain_info, link);
3818 unlink_domain_info(info);
3819 spin_unlock_irqrestore(&device_domain_lock, flags1);
3820
3821 iommu_disable_dev_iotlb(info);
3822 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3823 iommu_detach_dev(iommu, info->bus, info->devfn);
3824 iommu_detach_dependent_devices(iommu, info->dev);
3825
3826
3827
3828
3829 spin_lock_irqsave(&domain->iommu_lock, flags2);
3830 if (test_and_clear_bit(iommu->seq_id,
3831 domain->iommu_bmp)) {
3832 domain->iommu_count--;
3833 domain_update_iommu_cap(domain);
3834 }
3835 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3836
3837 free_devinfo_mem(info);
3838 spin_lock_irqsave(&device_domain_lock, flags1);
3839 }
3840 spin_unlock_irqrestore(&device_domain_lock, flags1);
3841}
3842
3843
3844static atomic_t vm_domid = ATOMIC_INIT(0);
3845
3846static struct dmar_domain *iommu_alloc_vm_domain(void)
3847{
3848 struct dmar_domain *domain;
3849
3850 domain = alloc_domain_mem();
3851 if (!domain)
3852 return NULL;
3853
3854 domain->id = atomic_inc_return(&vm_domid);
3855 domain->nid = -1;
3856 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3857 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3858
3859 return domain;
3860}
3861
3862static int md_domain_init(struct dmar_domain *domain, int guest_width)
3863{
3864 int adjust_width;
3865
3866 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3867 spin_lock_init(&domain->iommu_lock);
3868
3869 domain_reserve_special_ranges(domain);
3870
3871
3872 domain->gaw = guest_width;
3873 adjust_width = guestwidth_to_adjustwidth(guest_width);
3874 domain->agaw = width_to_agaw(adjust_width);
3875
3876 INIT_LIST_HEAD(&domain->devices);
3877
3878 domain->iommu_count = 0;
3879 domain->iommu_coherency = 0;
3880 domain->iommu_snooping = 0;
3881 domain->iommu_superpage = 0;
3882 domain->max_addr = 0;
3883 domain->nid = -1;
3884
3885
3886 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3887 if (!domain->pgd)
3888 return -ENOMEM;
3889 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3890 return 0;
3891}
3892
3893static void iommu_free_vm_domain(struct dmar_domain *domain)
3894{
3895 unsigned long flags;
3896 struct dmar_drhd_unit *drhd;
3897 struct intel_iommu *iommu;
3898 unsigned long i;
3899 unsigned long ndomains;
3900
3901 for_each_active_iommu(iommu, drhd) {
3902 ndomains = cap_ndoms(iommu->cap);
3903 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3904 if (iommu->domains[i] == domain) {
3905 spin_lock_irqsave(&iommu->lock, flags);
3906 clear_bit(i, iommu->domain_ids);
3907 iommu->domains[i] = NULL;
3908 spin_unlock_irqrestore(&iommu->lock, flags);
3909 break;
3910 }
3911 }
3912 }
3913}
3914
3915static void vm_domain_exit(struct dmar_domain *domain)
3916{
3917
3918 if (!domain)
3919 return;
3920
3921 vm_domain_remove_all_dev_info(domain);
3922
3923 put_iova_domain(&domain->iovad);
3924
3925
3926 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3927
3928
3929 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3930
3931 iommu_free_vm_domain(domain);
3932 free_domain_mem(domain);
3933}
3934
3935static int intel_iommu_domain_init(struct iommu_domain *domain)
3936{
3937 struct dmar_domain *dmar_domain;
3938
3939 dmar_domain = iommu_alloc_vm_domain();
3940 if (!dmar_domain) {
3941 printk(KERN_ERR
3942 "intel_iommu_domain_init: dmar_domain == NULL\n");
3943 return -ENOMEM;
3944 }
3945 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3946 printk(KERN_ERR
3947 "intel_iommu_domain_init() failed\n");
3948 vm_domain_exit(dmar_domain);
3949 return -ENOMEM;
3950 }
3951 domain_update_iommu_cap(dmar_domain);
3952 domain->priv = dmar_domain;
3953
3954 domain->geometry.aperture_start = 0;
3955 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3956 domain->geometry.force_aperture = true;
3957
3958 return 0;
3959}
3960
3961static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3962{
3963 struct dmar_domain *dmar_domain = domain->priv;
3964
3965 domain->priv = NULL;
3966 vm_domain_exit(dmar_domain);
3967}
3968
3969static int intel_iommu_attach_device(struct iommu_domain *domain,
3970 struct device *dev)
3971{
3972 struct dmar_domain *dmar_domain = domain->priv;
3973 struct pci_dev *pdev = to_pci_dev(dev);
3974 struct intel_iommu *iommu;
3975 int addr_width;
3976
3977
3978 if (unlikely(domain_context_mapped(pdev))) {
3979 struct dmar_domain *old_domain;
3980
3981 old_domain = find_domain(pdev);
3982 if (old_domain) {
3983 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3984 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3985 domain_remove_one_dev_info(old_domain, pdev);
3986 else
3987 domain_remove_dev_info(old_domain);
3988 }
3989 }
3990
3991 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3992 pdev->devfn);
3993 if (!iommu)
3994 return -ENODEV;
3995
3996
3997 addr_width = agaw_to_width(iommu->agaw);
3998 if (addr_width > cap_mgaw(iommu->cap))
3999 addr_width = cap_mgaw(iommu->cap);
4000
4001 if (dmar_domain->max_addr > (1LL << addr_width)) {
4002 printk(KERN_ERR "%s: iommu width (%d) is not "
4003 "sufficient for the mapped address (%llx)\n",
4004 __func__, addr_width, dmar_domain->max_addr);
4005 return -EFAULT;
4006 }
4007 dmar_domain->gaw = addr_width;
4008
4009
4010
4011
4012 while (iommu->agaw < dmar_domain->agaw) {
4013 struct dma_pte *pte;
4014
4015 pte = dmar_domain->pgd;
4016 if (dma_pte_present(pte)) {
4017 dmar_domain->pgd = (struct dma_pte *)
4018 phys_to_virt(dma_pte_addr(pte));
4019 free_pgtable_page(pte);
4020 }
4021 dmar_domain->agaw--;
4022 }
4023
4024 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4025}
4026
4027static void intel_iommu_detach_device(struct iommu_domain *domain,
4028 struct device *dev)
4029{
4030 struct dmar_domain *dmar_domain = domain->priv;
4031 struct pci_dev *pdev = to_pci_dev(dev);
4032
4033 domain_remove_one_dev_info(dmar_domain, pdev);
4034}
4035
4036static int intel_iommu_map(struct iommu_domain *domain,
4037 unsigned long iova, phys_addr_t hpa,
4038 size_t size, int iommu_prot)
4039{
4040 struct dmar_domain *dmar_domain = domain->priv;
4041 u64 max_addr;
4042 int prot = 0;
4043 int ret;
4044
4045 if (iommu_prot & IOMMU_READ)
4046 prot |= DMA_PTE_READ;
4047 if (iommu_prot & IOMMU_WRITE)
4048 prot |= DMA_PTE_WRITE;
4049 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4050 prot |= DMA_PTE_SNP;
4051
4052 max_addr = iova + size;
4053 if (dmar_domain->max_addr < max_addr) {
4054 u64 end;
4055
4056
4057 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4058 if (end < max_addr) {
4059 printk(KERN_ERR "%s: iommu width (%d) is not "
4060 "sufficient for the mapped address (%llx)\n",
4061 __func__, dmar_domain->gaw, max_addr);
4062 return -EFAULT;
4063 }
4064 dmar_domain->max_addr = max_addr;
4065 }
4066
4067
4068 size = aligned_nrpages(hpa, size);
4069 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4070 hpa >> VTD_PAGE_SHIFT, size, prot);
4071 return ret;
4072}
4073
4074static size_t intel_iommu_unmap(struct iommu_domain *domain,
4075 unsigned long iova, size_t size)
4076{
4077 struct dmar_domain *dmar_domain = domain->priv;
4078 int order;
4079
4080 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4081 (iova + size - 1) >> VTD_PAGE_SHIFT);
4082
4083 if (dmar_domain->max_addr == iova + size)
4084 dmar_domain->max_addr = iova;
4085
4086 return PAGE_SIZE << order;
4087}
4088
4089static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4090 dma_addr_t iova)
4091{
4092 struct dmar_domain *dmar_domain = domain->priv;
4093 struct dma_pte *pte;
4094 u64 phys = 0;
4095
4096 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4097 if (pte)
4098 phys = dma_pte_addr(pte);
4099
4100 return phys;
4101}
4102
4103static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4104 unsigned long cap)
4105{
4106 struct dmar_domain *dmar_domain = domain->priv;
4107
4108 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4109 return dmar_domain->iommu_snooping;
4110 if (cap == IOMMU_CAP_INTR_REMAP)
4111 return irq_remapping_enabled;
4112
4113 return 0;
4114}
4115
4116#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4117
4118static int intel_iommu_add_device(struct device *dev)
4119{
4120 struct pci_dev *pdev = to_pci_dev(dev);
4121 struct pci_dev *bridge, *dma_pdev = NULL;
4122 struct iommu_group *group;
4123 int ret;
4124
4125 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4126 pdev->bus->number, pdev->devfn))
4127 return -ENODEV;
4128
4129 bridge = pci_find_upstream_pcie_bridge(pdev);
4130 if (bridge) {
4131 if (pci_is_pcie(bridge))
4132 dma_pdev = pci_get_domain_bus_and_slot(
4133 pci_domain_nr(pdev->bus),
4134 bridge->subordinate->number, 0);
4135 if (!dma_pdev)
4136 dma_pdev = pci_dev_get(bridge);
4137 } else
4138 dma_pdev = pci_dev_get(pdev);
4139
4140
4141 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4142
4143
4144
4145
4146
4147
4148 if (dma_pdev->multifunction &&
4149 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4150 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4151
4152 for (i = 0; i < 8; i++) {
4153 struct pci_dev *tmp;
4154
4155 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4156 if (!tmp)
4157 continue;
4158
4159 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4160 swap_pci_ref(&dma_pdev, tmp);
4161 break;
4162 }
4163 pci_dev_put(tmp);
4164 }
4165 }
4166
4167
4168
4169
4170
4171
4172 while (!pci_is_root_bus(dma_pdev->bus)) {
4173 struct pci_bus *bus = dma_pdev->bus;
4174
4175 while (!bus->self) {
4176 if (!pci_is_root_bus(bus))
4177 bus = bus->parent;
4178 else
4179 goto root_bus;
4180 }
4181
4182 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4183 break;
4184
4185 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4186 }
4187
4188root_bus:
4189 group = iommu_group_get(&dma_pdev->dev);
4190 pci_dev_put(dma_pdev);
4191 if (!group) {
4192 group = iommu_group_alloc();
4193 if (IS_ERR(group))
4194 return PTR_ERR(group);
4195 }
4196
4197 ret = iommu_group_add_device(group, dev);
4198
4199 iommu_group_put(group);
4200 return ret;
4201}
4202
4203static void intel_iommu_remove_device(struct device *dev)
4204{
4205 iommu_group_remove_device(dev);
4206}
4207
4208static struct iommu_ops intel_iommu_ops = {
4209 .domain_init = intel_iommu_domain_init,
4210 .domain_destroy = intel_iommu_domain_destroy,
4211 .attach_dev = intel_iommu_attach_device,
4212 .detach_dev = intel_iommu_detach_device,
4213 .map = intel_iommu_map,
4214 .unmap = intel_iommu_unmap,
4215 .iova_to_phys = intel_iommu_iova_to_phys,
4216 .domain_has_cap = intel_iommu_domain_has_cap,
4217 .add_device = intel_iommu_add_device,
4218 .remove_device = intel_iommu_remove_device,
4219 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4220};
4221
4222static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4223{
4224
4225 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4226 dmar_map_gfx = 0;
4227}
4228
4229DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4230DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4231DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4232DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4233DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4234DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4235DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4236
4237static void quirk_iommu_rwbf(struct pci_dev *dev)
4238{
4239
4240
4241
4242
4243 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4244 rwbf_quirk = 1;
4245}
4246
4247DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4248DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4249DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4250DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4251DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4252DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4253DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4254
4255#define GGC 0x52
4256#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4257#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4258#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4259#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4260#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4261#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4262#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4263#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4264
4265static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4266{
4267 unsigned short ggc;
4268
4269 if (pci_read_config_word(dev, GGC, &ggc))
4270 return;
4271
4272 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4273 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4274 dmar_map_gfx = 0;
4275 } else if (dmar_map_gfx) {
4276
4277 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4278 intel_iommu_strict = 1;
4279 }
4280}
4281DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4282DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4283DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4284DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4285
4286
4287
4288
4289
4290
4291
4292
4293static void __init check_tylersburg_isoch(void)
4294{
4295 struct pci_dev *pdev;
4296 uint32_t vtisochctrl;
4297
4298
4299 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4300 if (!pdev)
4301 return;
4302 pci_dev_put(pdev);
4303
4304
4305
4306
4307 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4308 if (!pdev)
4309 return;
4310
4311 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4312 pci_dev_put(pdev);
4313 return;
4314 }
4315
4316 pci_dev_put(pdev);
4317
4318
4319 if (vtisochctrl & 1)
4320 return;
4321
4322
4323 vtisochctrl &= 0x1c;
4324
4325
4326 if (vtisochctrl == 0x10)
4327 return;
4328
4329
4330 if (!vtisochctrl) {
4331 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4332 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4333 dmi_get_system_info(DMI_BIOS_VENDOR),
4334 dmi_get_system_info(DMI_BIOS_VERSION),
4335 dmi_get_system_info(DMI_PRODUCT_VERSION));
4336 iommu_identity_mapping |= IDENTMAP_AZALIA;
4337 return;
4338 }
4339
4340 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4341 vtisochctrl);
4342}
4343