1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/export.h>
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
36#include <linux/timer.h>
37#include <linux/iova.h>
38#include <linux/iommu.h>
39#include <linux/intel-iommu.h>
40#include <linux/syscore_ops.h>
41#include <linux/tboot.h>
42#include <linux/dmi.h>
43#include <linux/pci-ats.h>
44#include <linux/memblock.h>
45#include <asm/irq_remapping.h>
46#include <asm/cacheflush.h>
47#include <asm/iommu.h>
48
49#include "irq_remapping.h"
50#include "pci.h"
51
52#define ROOT_SIZE VTD_PAGE_SIZE
53#define CONTEXT_SIZE VTD_PAGE_SIZE
54
55#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64
65#define MAX_AGAW_WIDTH 64
66
67#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
68#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
69
70
71
72#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
73 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
74#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
75
76#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
77#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
78#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
79
80
81#define LEVEL_STRIDE (9)
82#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
101
102static inline int agaw_to_level(int agaw)
103{
104 return agaw + 2;
105}
106
107static inline int agaw_to_width(int agaw)
108{
109 return 30 + agaw * LEVEL_STRIDE;
110}
111
112static inline int width_to_agaw(int width)
113{
114 return (width - 30) / LEVEL_STRIDE;
115}
116
117static inline unsigned int level_to_offset_bits(int level)
118{
119 return (level - 1) * LEVEL_STRIDE;
120}
121
122static inline int pfn_level_offset(unsigned long pfn, int level)
123{
124 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
125}
126
127static inline unsigned long level_mask(int level)
128{
129 return -1UL << level_to_offset_bits(level);
130}
131
132static inline unsigned long level_size(int level)
133{
134 return 1UL << level_to_offset_bits(level);
135}
136
137static inline unsigned long align_to_level(unsigned long pfn, int level)
138{
139 return (pfn + level_size(level) - 1) & level_mask(level);
140}
141
142static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
143{
144 return 1 << ((lvl - 1) * LEVEL_STRIDE);
145}
146
147
148
149static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
150{
151 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
152}
153
154static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
155{
156 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
157}
158static inline unsigned long page_to_dma_pfn(struct page *pg)
159{
160 return mm_to_dma_pfn(page_to_pfn(pg));
161}
162static inline unsigned long virt_to_dma_pfn(void *p)
163{
164 return page_to_dma_pfn(virt_to_page(p));
165}
166
167
168static struct intel_iommu **g_iommus;
169
170static void __init check_tylersburg_isoch(void);
171static int rwbf_quirk;
172
173
174
175
176
177static int force_on = 0;
178
179
180
181
182
183
184
185struct root_entry {
186 u64 val;
187 u64 rsvd1;
188};
189#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
190static inline bool root_present(struct root_entry *root)
191{
192 return (root->val & 1);
193}
194static inline void set_root_present(struct root_entry *root)
195{
196 root->val |= 1;
197}
198static inline void set_root_value(struct root_entry *root, unsigned long value)
199{
200 root->val |= value & VTD_PAGE_MASK;
201}
202
203static inline struct context_entry *
204get_context_addr_from_root(struct root_entry *root)
205{
206 return (struct context_entry *)
207 (root_present(root)?phys_to_virt(
208 root->val & VTD_PAGE_MASK) :
209 NULL);
210}
211
212
213
214
215
216
217
218
219
220
221
222
223struct context_entry {
224 u64 lo;
225 u64 hi;
226};
227
228static inline bool context_present(struct context_entry *context)
229{
230 return (context->lo & 1);
231}
232static inline void context_set_present(struct context_entry *context)
233{
234 context->lo |= 1;
235}
236
237static inline void context_set_fault_enable(struct context_entry *context)
238{
239 context->lo &= (((u64)-1) << 2) | 1;
240}
241
242static inline void context_set_translation_type(struct context_entry *context,
243 unsigned long value)
244{
245 context->lo &= (((u64)-1) << 4) | 3;
246 context->lo |= (value & 3) << 2;
247}
248
249static inline void context_set_address_root(struct context_entry *context,
250 unsigned long value)
251{
252 context->lo |= value & VTD_PAGE_MASK;
253}
254
255static inline void context_set_address_width(struct context_entry *context,
256 unsigned long value)
257{
258 context->hi |= value & 7;
259}
260
261static inline void context_set_domain_id(struct context_entry *context,
262 unsigned long value)
263{
264 context->hi |= (value & ((1 << 16) - 1)) << 8;
265}
266
267static inline void context_clear_entry(struct context_entry *context)
268{
269 context->lo = 0;
270 context->hi = 0;
271}
272
273
274
275
276
277
278
279
280
281
282struct dma_pte {
283 u64 val;
284};
285
286static inline void dma_clear_pte(struct dma_pte *pte)
287{
288 pte->val = 0;
289}
290
291static inline void dma_set_pte_readable(struct dma_pte *pte)
292{
293 pte->val |= DMA_PTE_READ;
294}
295
296static inline void dma_set_pte_writable(struct dma_pte *pte)
297{
298 pte->val |= DMA_PTE_WRITE;
299}
300
301static inline void dma_set_pte_snp(struct dma_pte *pte)
302{
303 pte->val |= DMA_PTE_SNP;
304}
305
306static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
307{
308 pte->val = (pte->val & ~3) | (prot & 3);
309}
310
311static inline u64 dma_pte_addr(struct dma_pte *pte)
312{
313#ifdef CONFIG_64BIT
314 return pte->val & VTD_PAGE_MASK;
315#else
316
317 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
318#endif
319}
320
321static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
322{
323 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
324}
325
326static inline bool dma_pte_present(struct dma_pte *pte)
327{
328 return (pte->val & 3) != 0;
329}
330
331static inline bool dma_pte_superpage(struct dma_pte *pte)
332{
333 return (pte->val & (1 << 7));
334}
335
336static inline int first_pte_in_page(struct dma_pte *pte)
337{
338 return !((unsigned long)pte & ~VTD_PAGE_MASK);
339}
340
341
342
343
344
345
346
347static struct dmar_domain *si_domain;
348static int hw_pass_through = 1;
349
350
351#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
352
353
354
355
356#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
357
358
359#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
360
361
362#ifdef CONFIG_X86
363# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
364#else
365# define IOMMU_UNITS_SUPPORTED 64
366#endif
367
368struct dmar_domain {
369 int id;
370 int nid;
371 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
372
373
374 struct list_head devices;
375 struct iova_domain iovad;
376
377 struct dma_pte *pgd;
378 int gaw;
379
380
381 int agaw;
382
383 int flags;
384
385 int iommu_coherency;
386 int iommu_snooping;
387 int iommu_count;
388 int iommu_superpage;
389
390
391 spinlock_t iommu_lock;
392 u64 max_addr;
393};
394
395
396struct device_domain_info {
397 struct list_head link;
398 struct list_head global;
399 int segment;
400 u8 bus;
401 u8 devfn;
402 struct pci_dev *dev;
403 struct intel_iommu *iommu;
404 struct dmar_domain *domain;
405};
406
407static void flush_unmaps_timeout(unsigned long data);
408
409DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
410
411#define HIGH_WATER_MARK 250
412struct deferred_flush_tables {
413 int next;
414 struct iova *iova[HIGH_WATER_MARK];
415 struct dmar_domain *domain[HIGH_WATER_MARK];
416};
417
418static struct deferred_flush_tables *deferred_flush;
419
420
421static int g_num_of_iommus;
422
423static DEFINE_SPINLOCK(async_umap_flush_lock);
424static LIST_HEAD(unmaps_to_do);
425
426static int timer_on;
427static long list_size;
428
429static void domain_remove_dev_info(struct dmar_domain *domain);
430
431#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
432int dmar_disabled = 0;
433#else
434int dmar_disabled = 1;
435#endif
436
437int intel_iommu_enabled = 0;
438EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
440static int dmar_map_gfx = 1;
441static int dmar_forcedac;
442static int intel_iommu_strict;
443static int intel_iommu_superpage = 1;
444
445int intel_iommu_gfx_mapped;
446EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
448#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449static DEFINE_SPINLOCK(device_domain_lock);
450static LIST_HEAD(device_domain_list);
451
452static struct iommu_ops intel_iommu_ops;
453
454static int __init intel_iommu_setup(char *str)
455{
456 if (!str)
457 return -EINVAL;
458 while (*str) {
459 if (!strncmp(str, "on", 2)) {
460 dmar_disabled = 0;
461 printk(KERN_INFO "Intel-IOMMU: enabled\n");
462 } else if (!strncmp(str, "off", 3)) {
463 dmar_disabled = 1;
464 printk(KERN_INFO "Intel-IOMMU: disabled\n");
465 } else if (!strncmp(str, "igfx_off", 8)) {
466 dmar_map_gfx = 0;
467 printk(KERN_INFO
468 "Intel-IOMMU: disable GFX device mapping\n");
469 } else if (!strncmp(str, "forcedac", 8)) {
470 printk(KERN_INFO
471 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472 dmar_forcedac = 1;
473 } else if (!strncmp(str, "strict", 6)) {
474 printk(KERN_INFO
475 "Intel-IOMMU: disable batched IOTLB flush\n");
476 intel_iommu_strict = 1;
477 } else if (!strncmp(str, "sp_off", 6)) {
478 printk(KERN_INFO
479 "Intel-IOMMU: disable supported super page\n");
480 intel_iommu_superpage = 0;
481 }
482
483 str += strcspn(str, ",");
484 while (*str == ',')
485 str++;
486 }
487 return 0;
488}
489__setup("intel_iommu=", intel_iommu_setup);
490
491static struct kmem_cache *iommu_domain_cache;
492static struct kmem_cache *iommu_devinfo_cache;
493static struct kmem_cache *iommu_iova_cache;
494
495static inline void *alloc_pgtable_page(int node)
496{
497 struct page *page;
498 void *vaddr = NULL;
499
500 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501 if (page)
502 vaddr = page_address(page);
503 return vaddr;
504}
505
506static inline void free_pgtable_page(void *vaddr)
507{
508 free_page((unsigned long)vaddr);
509}
510
511static inline void *alloc_domain_mem(void)
512{
513 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
514}
515
516static void free_domain_mem(void *vaddr)
517{
518 kmem_cache_free(iommu_domain_cache, vaddr);
519}
520
521static inline void * alloc_devinfo_mem(void)
522{
523 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
524}
525
526static inline void free_devinfo_mem(void *vaddr)
527{
528 kmem_cache_free(iommu_devinfo_cache, vaddr);
529}
530
531struct iova *alloc_iova_mem(void)
532{
533 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
534}
535
536void free_iova_mem(struct iova *iova)
537{
538 kmem_cache_free(iommu_iova_cache, iova);
539}
540
541
542static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
543{
544 unsigned long sagaw;
545 int agaw = -1;
546
547 sagaw = cap_sagaw(iommu->cap);
548 for (agaw = width_to_agaw(max_gaw);
549 agaw >= 0; agaw--) {
550 if (test_bit(agaw, &sagaw))
551 break;
552 }
553
554 return agaw;
555}
556
557
558
559
560int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
561{
562 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
563}
564
565
566
567
568
569
570int iommu_calculate_agaw(struct intel_iommu *iommu)
571{
572 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
573}
574
575
576static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
577{
578 int iommu_id;
579
580
581 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
582 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
583
584 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
585 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
586 return NULL;
587
588 return g_iommus[iommu_id];
589}
590
591static void domain_update_iommu_coherency(struct dmar_domain *domain)
592{
593 int i;
594
595 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
596
597 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
598
599 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
600 if (!ecap_coherent(g_iommus[i]->ecap)) {
601 domain->iommu_coherency = 0;
602 break;
603 }
604 }
605}
606
607static void domain_update_iommu_snooping(struct dmar_domain *domain)
608{
609 int i;
610
611 domain->iommu_snooping = 1;
612
613 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
614 if (!ecap_sc_support(g_iommus[i]->ecap)) {
615 domain->iommu_snooping = 0;
616 break;
617 }
618 }
619}
620
621static void domain_update_iommu_superpage(struct dmar_domain *domain)
622{
623 struct dmar_drhd_unit *drhd;
624 struct intel_iommu *iommu = NULL;
625 int mask = 0xf;
626
627 if (!intel_iommu_superpage) {
628 domain->iommu_superpage = 0;
629 return;
630 }
631
632
633 for_each_active_iommu(iommu, drhd) {
634 mask &= cap_super_page_val(iommu->cap);
635 if (!mask) {
636 break;
637 }
638 }
639 domain->iommu_superpage = fls(mask);
640}
641
642
643static void domain_update_iommu_cap(struct dmar_domain *domain)
644{
645 domain_update_iommu_coherency(domain);
646 domain_update_iommu_snooping(domain);
647 domain_update_iommu_superpage(domain);
648}
649
650static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
651{
652 struct dmar_drhd_unit *drhd = NULL;
653 int i;
654
655 for_each_drhd_unit(drhd) {
656 if (drhd->ignored)
657 continue;
658 if (segment != drhd->segment)
659 continue;
660
661 for (i = 0; i < drhd->devices_cnt; i++) {
662 if (drhd->devices[i] &&
663 drhd->devices[i]->bus->number == bus &&
664 drhd->devices[i]->devfn == devfn)
665 return drhd->iommu;
666 if (drhd->devices[i] &&
667 drhd->devices[i]->subordinate &&
668 drhd->devices[i]->subordinate->number <= bus &&
669 drhd->devices[i]->subordinate->busn_res.end >= bus)
670 return drhd->iommu;
671 }
672
673 if (drhd->include_all)
674 return drhd->iommu;
675 }
676
677 return NULL;
678}
679
680static void domain_flush_cache(struct dmar_domain *domain,
681 void *addr, int size)
682{
683 if (!domain->iommu_coherency)
684 clflush_cache_range(addr, size);
685}
686
687
688static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
689 u8 bus, u8 devfn)
690{
691 struct root_entry *root;
692 struct context_entry *context;
693 unsigned long phy_addr;
694 unsigned long flags;
695
696 spin_lock_irqsave(&iommu->lock, flags);
697 root = &iommu->root_entry[bus];
698 context = get_context_addr_from_root(root);
699 if (!context) {
700 context = (struct context_entry *)
701 alloc_pgtable_page(iommu->node);
702 if (!context) {
703 spin_unlock_irqrestore(&iommu->lock, flags);
704 return NULL;
705 }
706 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
707 phy_addr = virt_to_phys((void *)context);
708 set_root_value(root, phy_addr);
709 set_root_present(root);
710 __iommu_flush_cache(iommu, root, sizeof(*root));
711 }
712 spin_unlock_irqrestore(&iommu->lock, flags);
713 return &context[devfn];
714}
715
716static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
717{
718 struct root_entry *root;
719 struct context_entry *context;
720 int ret;
721 unsigned long flags;
722
723 spin_lock_irqsave(&iommu->lock, flags);
724 root = &iommu->root_entry[bus];
725 context = get_context_addr_from_root(root);
726 if (!context) {
727 ret = 0;
728 goto out;
729 }
730 ret = context_present(&context[devfn]);
731out:
732 spin_unlock_irqrestore(&iommu->lock, flags);
733 return ret;
734}
735
736static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
737{
738 struct root_entry *root;
739 struct context_entry *context;
740 unsigned long flags;
741
742 spin_lock_irqsave(&iommu->lock, flags);
743 root = &iommu->root_entry[bus];
744 context = get_context_addr_from_root(root);
745 if (context) {
746 context_clear_entry(&context[devfn]);
747 __iommu_flush_cache(iommu, &context[devfn], \
748 sizeof(*context));
749 }
750 spin_unlock_irqrestore(&iommu->lock, flags);
751}
752
753static void free_context_table(struct intel_iommu *iommu)
754{
755 struct root_entry *root;
756 int i;
757 unsigned long flags;
758 struct context_entry *context;
759
760 spin_lock_irqsave(&iommu->lock, flags);
761 if (!iommu->root_entry) {
762 goto out;
763 }
764 for (i = 0; i < ROOT_ENTRY_NR; i++) {
765 root = &iommu->root_entry[i];
766 context = get_context_addr_from_root(root);
767 if (context)
768 free_pgtable_page(context);
769 }
770 free_pgtable_page(iommu->root_entry);
771 iommu->root_entry = NULL;
772out:
773 spin_unlock_irqrestore(&iommu->lock, flags);
774}
775
776static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
777 unsigned long pfn, int target_level)
778{
779 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
780 struct dma_pte *parent, *pte = NULL;
781 int level = agaw_to_level(domain->agaw);
782 int offset;
783
784 BUG_ON(!domain->pgd);
785 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
786 parent = domain->pgd;
787
788 while (level > 0) {
789 void *tmp_page;
790
791 offset = pfn_level_offset(pfn, level);
792 pte = &parent[offset];
793 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
794 break;
795 if (level == target_level)
796 break;
797
798 if (!dma_pte_present(pte)) {
799 uint64_t pteval;
800
801 tmp_page = alloc_pgtable_page(domain->nid);
802
803 if (!tmp_page)
804 return NULL;
805
806 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
807 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
808 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
809
810 free_pgtable_page(tmp_page);
811 } else {
812 dma_pte_addr(pte);
813 domain_flush_cache(domain, pte, sizeof(*pte));
814 }
815 }
816 parent = phys_to_virt(dma_pte_addr(pte));
817 level--;
818 }
819
820 return pte;
821}
822
823
824
825static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
826 unsigned long pfn,
827 int level, int *large_page)
828{
829 struct dma_pte *parent, *pte = NULL;
830 int total = agaw_to_level(domain->agaw);
831 int offset;
832
833 parent = domain->pgd;
834 while (level <= total) {
835 offset = pfn_level_offset(pfn, total);
836 pte = &parent[offset];
837 if (level == total)
838 return pte;
839
840 if (!dma_pte_present(pte)) {
841 *large_page = total;
842 break;
843 }
844
845 if (pte->val & DMA_PTE_LARGE_PAGE) {
846 *large_page = total;
847 return pte;
848 }
849
850 parent = phys_to_virt(dma_pte_addr(pte));
851 total--;
852 }
853 return NULL;
854}
855
856
857static int dma_pte_clear_range(struct dmar_domain *domain,
858 unsigned long start_pfn,
859 unsigned long last_pfn)
860{
861 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
862 unsigned int large_page = 1;
863 struct dma_pte *first_pte, *pte;
864 int order;
865
866 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
867 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
868 BUG_ON(start_pfn > last_pfn);
869
870
871 do {
872 large_page = 1;
873 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
874 if (!pte) {
875 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
876 continue;
877 }
878 do {
879 dma_clear_pte(pte);
880 start_pfn += lvl_to_nr_pages(large_page);
881 pte++;
882 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
883
884 domain_flush_cache(domain, first_pte,
885 (void *)pte - (void *)first_pte);
886
887 } while (start_pfn && start_pfn <= last_pfn);
888
889 order = (large_page - 1) * 9;
890 return order;
891}
892
893
894static void dma_pte_free_pagetable(struct dmar_domain *domain,
895 unsigned long start_pfn,
896 unsigned long last_pfn)
897{
898 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
899 struct dma_pte *first_pte, *pte;
900 int total = agaw_to_level(domain->agaw);
901 int level;
902 unsigned long tmp;
903 int large_page = 2;
904
905 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
906 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
907 BUG_ON(start_pfn > last_pfn);
908
909
910 level = 2;
911 while (level <= total) {
912 tmp = align_to_level(start_pfn, level);
913
914
915 if (tmp + level_size(level) - 1 > last_pfn)
916 return;
917
918 do {
919 large_page = level;
920 first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
921 if (large_page > level)
922 level = large_page + 1;
923 if (!pte) {
924 tmp = align_to_level(tmp + 1, level + 1);
925 continue;
926 }
927 do {
928 if (dma_pte_present(pte)) {
929 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
930 dma_clear_pte(pte);
931 }
932 pte++;
933 tmp += level_size(level);
934 } while (!first_pte_in_page(pte) &&
935 tmp + level_size(level) - 1 <= last_pfn);
936
937 domain_flush_cache(domain, first_pte,
938 (void *)pte - (void *)first_pte);
939
940 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
941 level++;
942 }
943
944 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
945 free_pgtable_page(domain->pgd);
946 domain->pgd = NULL;
947 }
948}
949
950
951static int iommu_alloc_root_entry(struct intel_iommu *iommu)
952{
953 struct root_entry *root;
954 unsigned long flags;
955
956 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
957 if (!root)
958 return -ENOMEM;
959
960 __iommu_flush_cache(iommu, root, ROOT_SIZE);
961
962 spin_lock_irqsave(&iommu->lock, flags);
963 iommu->root_entry = root;
964 spin_unlock_irqrestore(&iommu->lock, flags);
965
966 return 0;
967}
968
969static void iommu_set_root_entry(struct intel_iommu *iommu)
970{
971 void *addr;
972 u32 sts;
973 unsigned long flag;
974
975 addr = iommu->root_entry;
976
977 raw_spin_lock_irqsave(&iommu->register_lock, flag);
978 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
979
980 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
981
982
983 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
984 readl, (sts & DMA_GSTS_RTPS), sts);
985
986 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
987}
988
989static void iommu_flush_write_buffer(struct intel_iommu *iommu)
990{
991 u32 val;
992 unsigned long flag;
993
994 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
995 return;
996
997 raw_spin_lock_irqsave(&iommu->register_lock, flag);
998 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
999
1000
1001 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1002 readl, (!(val & DMA_GSTS_WBFS)), val);
1003
1004 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1005}
1006
1007
1008static void __iommu_flush_context(struct intel_iommu *iommu,
1009 u16 did, u16 source_id, u8 function_mask,
1010 u64 type)
1011{
1012 u64 val = 0;
1013 unsigned long flag;
1014
1015 switch (type) {
1016 case DMA_CCMD_GLOBAL_INVL:
1017 val = DMA_CCMD_GLOBAL_INVL;
1018 break;
1019 case DMA_CCMD_DOMAIN_INVL:
1020 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1021 break;
1022 case DMA_CCMD_DEVICE_INVL:
1023 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1024 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1025 break;
1026 default:
1027 BUG();
1028 }
1029 val |= DMA_CCMD_ICC;
1030
1031 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1032 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1033
1034
1035 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1036 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1037
1038 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1039}
1040
1041
1042static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1043 u64 addr, unsigned int size_order, u64 type)
1044{
1045 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1046 u64 val = 0, val_iva = 0;
1047 unsigned long flag;
1048
1049 switch (type) {
1050 case DMA_TLB_GLOBAL_FLUSH:
1051
1052 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1053 break;
1054 case DMA_TLB_DSI_FLUSH:
1055 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1056 break;
1057 case DMA_TLB_PSI_FLUSH:
1058 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1059
1060 val_iva = size_order | addr;
1061 break;
1062 default:
1063 BUG();
1064 }
1065
1066#if 0
1067
1068
1069
1070
1071 if (cap_read_drain(iommu->cap))
1072 val |= DMA_TLB_READ_DRAIN;
1073#endif
1074 if (cap_write_drain(iommu->cap))
1075 val |= DMA_TLB_WRITE_DRAIN;
1076
1077 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1078
1079 if (val_iva)
1080 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1081 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1082
1083
1084 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1085 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1086
1087 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1088
1089
1090 if (DMA_TLB_IAIG(val) == 0)
1091 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1092 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1093 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1094 (unsigned long long)DMA_TLB_IIRG(type),
1095 (unsigned long long)DMA_TLB_IAIG(val));
1096}
1097
1098static struct device_domain_info *iommu_support_dev_iotlb(
1099 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1100{
1101 int found = 0;
1102 unsigned long flags;
1103 struct device_domain_info *info;
1104 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1105
1106 if (!ecap_dev_iotlb_support(iommu->ecap))
1107 return NULL;
1108
1109 if (!iommu->qi)
1110 return NULL;
1111
1112 spin_lock_irqsave(&device_domain_lock, flags);
1113 list_for_each_entry(info, &domain->devices, link)
1114 if (info->bus == bus && info->devfn == devfn) {
1115 found = 1;
1116 break;
1117 }
1118 spin_unlock_irqrestore(&device_domain_lock, flags);
1119
1120 if (!found || !info->dev)
1121 return NULL;
1122
1123 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1124 return NULL;
1125
1126 if (!dmar_find_matched_atsr_unit(info->dev))
1127 return NULL;
1128
1129 info->iommu = iommu;
1130
1131 return info;
1132}
1133
1134static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1135{
1136 if (!info)
1137 return;
1138
1139 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1140}
1141
1142static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1143{
1144 if (!info->dev || !pci_ats_enabled(info->dev))
1145 return;
1146
1147 pci_disable_ats(info->dev);
1148}
1149
1150static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1151 u64 addr, unsigned mask)
1152{
1153 u16 sid, qdep;
1154 unsigned long flags;
1155 struct device_domain_info *info;
1156
1157 spin_lock_irqsave(&device_domain_lock, flags);
1158 list_for_each_entry(info, &domain->devices, link) {
1159 if (!info->dev || !pci_ats_enabled(info->dev))
1160 continue;
1161
1162 sid = info->bus << 8 | info->devfn;
1163 qdep = pci_ats_queue_depth(info->dev);
1164 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1165 }
1166 spin_unlock_irqrestore(&device_domain_lock, flags);
1167}
1168
1169static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1170 unsigned long pfn, unsigned int pages, int map)
1171{
1172 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1173 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1174
1175 BUG_ON(pages == 0);
1176
1177
1178
1179
1180
1181
1182
1183 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1184 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1185 DMA_TLB_DSI_FLUSH);
1186 else
1187 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1188 DMA_TLB_PSI_FLUSH);
1189
1190
1191
1192
1193
1194 if (!cap_caching_mode(iommu->cap) || !map)
1195 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1196}
1197
1198static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1199{
1200 u32 pmen;
1201 unsigned long flags;
1202
1203 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1204 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1205 pmen &= ~DMA_PMEN_EPM;
1206 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1207
1208
1209 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1210 readl, !(pmen & DMA_PMEN_PRS), pmen);
1211
1212 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1213}
1214
1215static int iommu_enable_translation(struct intel_iommu *iommu)
1216{
1217 u32 sts;
1218 unsigned long flags;
1219
1220 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1221 iommu->gcmd |= DMA_GCMD_TE;
1222 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1223
1224
1225 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1226 readl, (sts & DMA_GSTS_TES), sts);
1227
1228 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1229 return 0;
1230}
1231
1232static int iommu_disable_translation(struct intel_iommu *iommu)
1233{
1234 u32 sts;
1235 unsigned long flag;
1236
1237 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1238 iommu->gcmd &= ~DMA_GCMD_TE;
1239 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1240
1241
1242 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1243 readl, (!(sts & DMA_GSTS_TES)), sts);
1244
1245 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1246 return 0;
1247}
1248
1249
1250static int iommu_init_domains(struct intel_iommu *iommu)
1251{
1252 unsigned long ndomains;
1253 unsigned long nlongs;
1254
1255 ndomains = cap_ndoms(iommu->cap);
1256 pr_debug("IOMMU %d: Number of Domains supported <%ld>\n", iommu->seq_id,
1257 ndomains);
1258 nlongs = BITS_TO_LONGS(ndomains);
1259
1260 spin_lock_init(&iommu->lock);
1261
1262
1263
1264
1265 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1266 if (!iommu->domain_ids) {
1267 printk(KERN_ERR "Allocating domain id array failed\n");
1268 return -ENOMEM;
1269 }
1270 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1271 GFP_KERNEL);
1272 if (!iommu->domains) {
1273 printk(KERN_ERR "Allocating domain array failed\n");
1274 return -ENOMEM;
1275 }
1276
1277
1278
1279
1280
1281 if (cap_caching_mode(iommu->cap))
1282 set_bit(0, iommu->domain_ids);
1283 return 0;
1284}
1285
1286
1287static void domain_exit(struct dmar_domain *domain);
1288static void vm_domain_exit(struct dmar_domain *domain);
1289
1290void free_dmar_iommu(struct intel_iommu *iommu)
1291{
1292 struct dmar_domain *domain;
1293 int i;
1294 unsigned long flags;
1295
1296 if ((iommu->domains) && (iommu->domain_ids)) {
1297 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1298 domain = iommu->domains[i];
1299 clear_bit(i, iommu->domain_ids);
1300
1301 spin_lock_irqsave(&domain->iommu_lock, flags);
1302 if (--domain->iommu_count == 0) {
1303 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1304 vm_domain_exit(domain);
1305 else
1306 domain_exit(domain);
1307 }
1308 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1309 }
1310 }
1311
1312 if (iommu->gcmd & DMA_GCMD_TE)
1313 iommu_disable_translation(iommu);
1314
1315 if (iommu->irq) {
1316 irq_set_handler_data(iommu->irq, NULL);
1317
1318 free_irq(iommu->irq, iommu);
1319 destroy_irq(iommu->irq);
1320 }
1321
1322 kfree(iommu->domains);
1323 kfree(iommu->domain_ids);
1324
1325 g_iommus[iommu->seq_id] = NULL;
1326
1327
1328 for (i = 0; i < g_num_of_iommus; i++) {
1329 if (g_iommus[i])
1330 break;
1331 }
1332
1333 if (i == g_num_of_iommus)
1334 kfree(g_iommus);
1335
1336
1337 free_context_table(iommu);
1338}
1339
1340static struct dmar_domain *alloc_domain(void)
1341{
1342 struct dmar_domain *domain;
1343
1344 domain = alloc_domain_mem();
1345 if (!domain)
1346 return NULL;
1347
1348 domain->nid = -1;
1349 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
1350 domain->flags = 0;
1351
1352 return domain;
1353}
1354
1355static int iommu_attach_domain(struct dmar_domain *domain,
1356 struct intel_iommu *iommu)
1357{
1358 int num;
1359 unsigned long ndomains;
1360 unsigned long flags;
1361
1362 ndomains = cap_ndoms(iommu->cap);
1363
1364 spin_lock_irqsave(&iommu->lock, flags);
1365
1366 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1367 if (num >= ndomains) {
1368 spin_unlock_irqrestore(&iommu->lock, flags);
1369 printk(KERN_ERR "IOMMU: no free domain ids\n");
1370 return -ENOMEM;
1371 }
1372
1373 domain->id = num;
1374 set_bit(num, iommu->domain_ids);
1375 set_bit(iommu->seq_id, domain->iommu_bmp);
1376 iommu->domains[num] = domain;
1377 spin_unlock_irqrestore(&iommu->lock, flags);
1378
1379 return 0;
1380}
1381
1382static void iommu_detach_domain(struct dmar_domain *domain,
1383 struct intel_iommu *iommu)
1384{
1385 unsigned long flags;
1386 int num, ndomains;
1387 int found = 0;
1388
1389 spin_lock_irqsave(&iommu->lock, flags);
1390 ndomains = cap_ndoms(iommu->cap);
1391 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1392 if (iommu->domains[num] == domain) {
1393 found = 1;
1394 break;
1395 }
1396 }
1397
1398 if (found) {
1399 clear_bit(num, iommu->domain_ids);
1400 clear_bit(iommu->seq_id, domain->iommu_bmp);
1401 iommu->domains[num] = NULL;
1402 }
1403 spin_unlock_irqrestore(&iommu->lock, flags);
1404}
1405
1406static struct iova_domain reserved_iova_list;
1407static struct lock_class_key reserved_rbtree_key;
1408
1409static int dmar_init_reserved_ranges(void)
1410{
1411 struct pci_dev *pdev = NULL;
1412 struct iova *iova;
1413 int i;
1414
1415 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1416
1417 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1418 &reserved_rbtree_key);
1419
1420
1421 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1422 IOVA_PFN(IOAPIC_RANGE_END));
1423 if (!iova) {
1424 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1425 return -ENODEV;
1426 }
1427
1428
1429 for_each_pci_dev(pdev) {
1430 struct resource *r;
1431
1432 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1433 r = &pdev->resource[i];
1434 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1435 continue;
1436 iova = reserve_iova(&reserved_iova_list,
1437 IOVA_PFN(r->start),
1438 IOVA_PFN(r->end));
1439 if (!iova) {
1440 printk(KERN_ERR "Reserve iova failed\n");
1441 return -ENODEV;
1442 }
1443 }
1444 }
1445 return 0;
1446}
1447
1448static void domain_reserve_special_ranges(struct dmar_domain *domain)
1449{
1450 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1451}
1452
1453static inline int guestwidth_to_adjustwidth(int gaw)
1454{
1455 int agaw;
1456 int r = (gaw - 12) % 9;
1457
1458 if (r == 0)
1459 agaw = gaw;
1460 else
1461 agaw = gaw + 9 - r;
1462 if (agaw > 64)
1463 agaw = 64;
1464 return agaw;
1465}
1466
1467static int domain_init(struct dmar_domain *domain, int guest_width)
1468{
1469 struct intel_iommu *iommu;
1470 int adjust_width, agaw;
1471 unsigned long sagaw;
1472
1473 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1474 spin_lock_init(&domain->iommu_lock);
1475
1476 domain_reserve_special_ranges(domain);
1477
1478
1479 iommu = domain_get_iommu(domain);
1480 if (guest_width > cap_mgaw(iommu->cap))
1481 guest_width = cap_mgaw(iommu->cap);
1482 domain->gaw = guest_width;
1483 adjust_width = guestwidth_to_adjustwidth(guest_width);
1484 agaw = width_to_agaw(adjust_width);
1485 sagaw = cap_sagaw(iommu->cap);
1486 if (!test_bit(agaw, &sagaw)) {
1487
1488 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1489 agaw = find_next_bit(&sagaw, 5, agaw);
1490 if (agaw >= 5)
1491 return -ENODEV;
1492 }
1493 domain->agaw = agaw;
1494 INIT_LIST_HEAD(&domain->devices);
1495
1496 if (ecap_coherent(iommu->ecap))
1497 domain->iommu_coherency = 1;
1498 else
1499 domain->iommu_coherency = 0;
1500
1501 if (ecap_sc_support(iommu->ecap))
1502 domain->iommu_snooping = 1;
1503 else
1504 domain->iommu_snooping = 0;
1505
1506 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1507 domain->iommu_count = 1;
1508 domain->nid = iommu->node;
1509
1510
1511 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1512 if (!domain->pgd)
1513 return -ENOMEM;
1514 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1515 return 0;
1516}
1517
1518static void domain_exit(struct dmar_domain *domain)
1519{
1520 struct dmar_drhd_unit *drhd;
1521 struct intel_iommu *iommu;
1522
1523
1524 if (!domain)
1525 return;
1526
1527
1528 if (!intel_iommu_strict)
1529 flush_unmaps_timeout(0);
1530
1531 domain_remove_dev_info(domain);
1532
1533 put_iova_domain(&domain->iovad);
1534
1535
1536 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1537
1538
1539 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1540
1541 for_each_active_iommu(iommu, drhd)
1542 if (test_bit(iommu->seq_id, domain->iommu_bmp))
1543 iommu_detach_domain(domain, iommu);
1544
1545 free_domain_mem(domain);
1546}
1547
1548static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1549 u8 bus, u8 devfn, int translation)
1550{
1551 struct context_entry *context;
1552 unsigned long flags;
1553 struct intel_iommu *iommu;
1554 struct dma_pte *pgd;
1555 unsigned long num;
1556 unsigned long ndomains;
1557 int id;
1558 int agaw;
1559 struct device_domain_info *info = NULL;
1560
1561 pr_debug("Set context mapping for %02x:%02x.%d\n",
1562 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1563
1564 BUG_ON(!domain->pgd);
1565 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1566 translation != CONTEXT_TT_MULTI_LEVEL);
1567
1568 iommu = device_to_iommu(segment, bus, devfn);
1569 if (!iommu)
1570 return -ENODEV;
1571
1572 context = device_to_context_entry(iommu, bus, devfn);
1573 if (!context)
1574 return -ENOMEM;
1575 spin_lock_irqsave(&iommu->lock, flags);
1576 if (context_present(context)) {
1577 spin_unlock_irqrestore(&iommu->lock, flags);
1578 return 0;
1579 }
1580
1581 id = domain->id;
1582 pgd = domain->pgd;
1583
1584 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1585 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1586 int found = 0;
1587
1588
1589 ndomains = cap_ndoms(iommu->cap);
1590 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1591 if (iommu->domains[num] == domain) {
1592 id = num;
1593 found = 1;
1594 break;
1595 }
1596 }
1597
1598 if (found == 0) {
1599 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1600 if (num >= ndomains) {
1601 spin_unlock_irqrestore(&iommu->lock, flags);
1602 printk(KERN_ERR "IOMMU: no free domain ids\n");
1603 return -EFAULT;
1604 }
1605
1606 set_bit(num, iommu->domain_ids);
1607 iommu->domains[num] = domain;
1608 id = num;
1609 }
1610
1611
1612
1613
1614
1615 if (translation != CONTEXT_TT_PASS_THROUGH) {
1616 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1617 pgd = phys_to_virt(dma_pte_addr(pgd));
1618 if (!dma_pte_present(pgd)) {
1619 spin_unlock_irqrestore(&iommu->lock, flags);
1620 return -ENOMEM;
1621 }
1622 }
1623 }
1624 }
1625
1626 context_set_domain_id(context, id);
1627
1628 if (translation != CONTEXT_TT_PASS_THROUGH) {
1629 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1630 translation = info ? CONTEXT_TT_DEV_IOTLB :
1631 CONTEXT_TT_MULTI_LEVEL;
1632 }
1633
1634
1635
1636
1637 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1638 context_set_address_width(context, iommu->msagaw);
1639 else {
1640 context_set_address_root(context, virt_to_phys(pgd));
1641 context_set_address_width(context, iommu->agaw);
1642 }
1643
1644 context_set_translation_type(context, translation);
1645 context_set_fault_enable(context);
1646 context_set_present(context);
1647 domain_flush_cache(domain, context, sizeof(*context));
1648
1649
1650
1651
1652
1653
1654
1655 if (cap_caching_mode(iommu->cap)) {
1656 iommu->flush.flush_context(iommu, 0,
1657 (((u16)bus) << 8) | devfn,
1658 DMA_CCMD_MASK_NOBIT,
1659 DMA_CCMD_DEVICE_INVL);
1660 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
1661 } else {
1662 iommu_flush_write_buffer(iommu);
1663 }
1664 iommu_enable_dev_iotlb(info);
1665 spin_unlock_irqrestore(&iommu->lock, flags);
1666
1667 spin_lock_irqsave(&domain->iommu_lock, flags);
1668 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1669 domain->iommu_count++;
1670 if (domain->iommu_count == 1)
1671 domain->nid = iommu->node;
1672 domain_update_iommu_cap(domain);
1673 }
1674 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1675 return 0;
1676}
1677
1678static int
1679domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1680 int translation)
1681{
1682 int ret;
1683 struct pci_dev *tmp, *parent;
1684
1685 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1686 pdev->bus->number, pdev->devfn,
1687 translation);
1688 if (ret)
1689 return ret;
1690
1691
1692 tmp = pci_find_upstream_pcie_bridge(pdev);
1693 if (!tmp)
1694 return 0;
1695
1696 parent = pdev->bus->self;
1697 while (parent != tmp) {
1698 ret = domain_context_mapping_one(domain,
1699 pci_domain_nr(parent->bus),
1700 parent->bus->number,
1701 parent->devfn, translation);
1702 if (ret)
1703 return ret;
1704 parent = parent->bus->self;
1705 }
1706 if (pci_is_pcie(tmp))
1707 return domain_context_mapping_one(domain,
1708 pci_domain_nr(tmp->subordinate),
1709 tmp->subordinate->number, 0,
1710 translation);
1711 else
1712 return domain_context_mapping_one(domain,
1713 pci_domain_nr(tmp->bus),
1714 tmp->bus->number,
1715 tmp->devfn,
1716 translation);
1717}
1718
1719static int domain_context_mapped(struct pci_dev *pdev)
1720{
1721 int ret;
1722 struct pci_dev *tmp, *parent;
1723 struct intel_iommu *iommu;
1724
1725 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1726 pdev->devfn);
1727 if (!iommu)
1728 return -ENODEV;
1729
1730 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1731 if (!ret)
1732 return ret;
1733
1734 tmp = pci_find_upstream_pcie_bridge(pdev);
1735 if (!tmp)
1736 return ret;
1737
1738 parent = pdev->bus->self;
1739 while (parent != tmp) {
1740 ret = device_context_mapped(iommu, parent->bus->number,
1741 parent->devfn);
1742 if (!ret)
1743 return ret;
1744 parent = parent->bus->self;
1745 }
1746 if (pci_is_pcie(tmp))
1747 return device_context_mapped(iommu, tmp->subordinate->number,
1748 0);
1749 else
1750 return device_context_mapped(iommu, tmp->bus->number,
1751 tmp->devfn);
1752}
1753
1754
1755static inline unsigned long aligned_nrpages(unsigned long host_addr,
1756 size_t size)
1757{
1758 host_addr &= ~PAGE_MASK;
1759 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1760}
1761
1762
1763static inline int hardware_largepage_caps(struct dmar_domain *domain,
1764 unsigned long iov_pfn,
1765 unsigned long phy_pfn,
1766 unsigned long pages)
1767{
1768 int support, level = 1;
1769 unsigned long pfnmerge;
1770
1771 support = domain->iommu_superpage;
1772
1773
1774
1775
1776
1777 pfnmerge = iov_pfn | phy_pfn;
1778
1779 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1780 pages >>= VTD_STRIDE_SHIFT;
1781 if (!pages)
1782 break;
1783 pfnmerge >>= VTD_STRIDE_SHIFT;
1784 level++;
1785 support--;
1786 }
1787 return level;
1788}
1789
1790static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1791 struct scatterlist *sg, unsigned long phys_pfn,
1792 unsigned long nr_pages, int prot)
1793{
1794 struct dma_pte *first_pte = NULL, *pte = NULL;
1795 phys_addr_t uninitialized_var(pteval);
1796 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1797 unsigned long sg_res;
1798 unsigned int largepage_lvl = 0;
1799 unsigned long lvl_pages = 0;
1800
1801 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1802
1803 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1804 return -EINVAL;
1805
1806 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1807
1808 if (sg)
1809 sg_res = 0;
1810 else {
1811 sg_res = nr_pages + 1;
1812 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1813 }
1814
1815 while (nr_pages > 0) {
1816 uint64_t tmp;
1817
1818 if (!sg_res) {
1819 sg_res = aligned_nrpages(sg->offset, sg->length);
1820 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1821 sg->dma_length = sg->length;
1822 pteval = page_to_phys(sg_page(sg)) | prot;
1823 phys_pfn = pteval >> VTD_PAGE_SHIFT;
1824 }
1825
1826 if (!pte) {
1827 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1828
1829 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
1830 if (!pte)
1831 return -ENOMEM;
1832
1833 if (largepage_lvl > 1) {
1834 pteval |= DMA_PTE_LARGE_PAGE;
1835
1836
1837 dma_pte_clear_range(domain, iov_pfn,
1838 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1839 dma_pte_free_pagetable(domain, iov_pfn,
1840 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1841 } else {
1842 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
1843 }
1844
1845 }
1846
1847
1848
1849 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1850 if (tmp) {
1851 static int dumps = 5;
1852 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1853 iov_pfn, tmp, (unsigned long long)pteval);
1854 if (dumps) {
1855 dumps--;
1856 debug_dma_dump_mappings(NULL);
1857 }
1858 WARN_ON(1);
1859 }
1860
1861 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1862
1863 BUG_ON(nr_pages < lvl_pages);
1864 BUG_ON(sg_res < lvl_pages);
1865
1866 nr_pages -= lvl_pages;
1867 iov_pfn += lvl_pages;
1868 phys_pfn += lvl_pages;
1869 pteval += lvl_pages * VTD_PAGE_SIZE;
1870 sg_res -= lvl_pages;
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883 pte++;
1884 if (!nr_pages || first_pte_in_page(pte) ||
1885 (largepage_lvl > 1 && sg_res < lvl_pages)) {
1886 domain_flush_cache(domain, first_pte,
1887 (void *)pte - (void *)first_pte);
1888 pte = NULL;
1889 }
1890
1891 if (!sg_res && nr_pages)
1892 sg = sg_next(sg);
1893 }
1894 return 0;
1895}
1896
1897static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1898 struct scatterlist *sg, unsigned long nr_pages,
1899 int prot)
1900{
1901 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1902}
1903
1904static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1905 unsigned long phys_pfn, unsigned long nr_pages,
1906 int prot)
1907{
1908 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1909}
1910
1911static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1912{
1913 if (!iommu)
1914 return;
1915
1916 clear_context_table(iommu, bus, devfn);
1917 iommu->flush.flush_context(iommu, 0, 0, 0,
1918 DMA_CCMD_GLOBAL_INVL);
1919 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1920}
1921
1922static inline void unlink_domain_info(struct device_domain_info *info)
1923{
1924 assert_spin_locked(&device_domain_lock);
1925 list_del(&info->link);
1926 list_del(&info->global);
1927 if (info->dev)
1928 info->dev->dev.archdata.iommu = NULL;
1929}
1930
1931static void domain_remove_dev_info(struct dmar_domain *domain)
1932{
1933 struct device_domain_info *info;
1934 unsigned long flags;
1935 struct intel_iommu *iommu;
1936
1937 spin_lock_irqsave(&device_domain_lock, flags);
1938 while (!list_empty(&domain->devices)) {
1939 info = list_entry(domain->devices.next,
1940 struct device_domain_info, link);
1941 unlink_domain_info(info);
1942 spin_unlock_irqrestore(&device_domain_lock, flags);
1943
1944 iommu_disable_dev_iotlb(info);
1945 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1946 iommu_detach_dev(iommu, info->bus, info->devfn);
1947 free_devinfo_mem(info);
1948
1949 spin_lock_irqsave(&device_domain_lock, flags);
1950 }
1951 spin_unlock_irqrestore(&device_domain_lock, flags);
1952}
1953
1954
1955
1956
1957
1958static struct dmar_domain *
1959find_domain(struct pci_dev *pdev)
1960{
1961 struct device_domain_info *info;
1962
1963
1964 info = pdev->dev.archdata.iommu;
1965 if (info)
1966 return info->domain;
1967 return NULL;
1968}
1969
1970
1971static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1972{
1973 struct dmar_domain *domain, *found = NULL;
1974 struct intel_iommu *iommu;
1975 struct dmar_drhd_unit *drhd;
1976 struct device_domain_info *info, *tmp;
1977 struct pci_dev *dev_tmp;
1978 unsigned long flags;
1979 int bus = 0, devfn = 0;
1980 int segment;
1981 int ret;
1982
1983 domain = find_domain(pdev);
1984 if (domain)
1985 return domain;
1986
1987 segment = pci_domain_nr(pdev->bus);
1988
1989 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1990 if (dev_tmp) {
1991 if (pci_is_pcie(dev_tmp)) {
1992 bus = dev_tmp->subordinate->number;
1993 devfn = 0;
1994 } else {
1995 bus = dev_tmp->bus->number;
1996 devfn = dev_tmp->devfn;
1997 }
1998 spin_lock_irqsave(&device_domain_lock, flags);
1999 list_for_each_entry(info, &device_domain_list, global) {
2000 if (info->segment == segment &&
2001 info->bus == bus && info->devfn == devfn) {
2002 found = info->domain;
2003 break;
2004 }
2005 }
2006 spin_unlock_irqrestore(&device_domain_lock, flags);
2007
2008 if (found) {
2009 domain = found;
2010 goto found_domain;
2011 }
2012 }
2013
2014 domain = alloc_domain();
2015 if (!domain)
2016 goto error;
2017
2018
2019 drhd = dmar_find_matched_drhd_unit(pdev);
2020 if (!drhd) {
2021 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2022 pci_name(pdev));
2023 free_domain_mem(domain);
2024 return NULL;
2025 }
2026 iommu = drhd->iommu;
2027
2028 ret = iommu_attach_domain(domain, iommu);
2029 if (ret) {
2030 free_domain_mem(domain);
2031 goto error;
2032 }
2033
2034 if (domain_init(domain, gaw)) {
2035 domain_exit(domain);
2036 goto error;
2037 }
2038
2039
2040 if (dev_tmp) {
2041 info = alloc_devinfo_mem();
2042 if (!info) {
2043 domain_exit(domain);
2044 goto error;
2045 }
2046 info->segment = segment;
2047 info->bus = bus;
2048 info->devfn = devfn;
2049 info->dev = NULL;
2050 info->domain = domain;
2051
2052 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
2053
2054
2055 found = NULL;
2056 spin_lock_irqsave(&device_domain_lock, flags);
2057 list_for_each_entry(tmp, &device_domain_list, global) {
2058 if (tmp->segment == segment &&
2059 tmp->bus == bus && tmp->devfn == devfn) {
2060 found = tmp->domain;
2061 break;
2062 }
2063 }
2064 if (found) {
2065 spin_unlock_irqrestore(&device_domain_lock, flags);
2066 free_devinfo_mem(info);
2067 domain_exit(domain);
2068 domain = found;
2069 } else {
2070 list_add(&info->link, &domain->devices);
2071 list_add(&info->global, &device_domain_list);
2072 spin_unlock_irqrestore(&device_domain_lock, flags);
2073 }
2074 }
2075
2076found_domain:
2077 info = alloc_devinfo_mem();
2078 if (!info)
2079 goto error;
2080 info->segment = segment;
2081 info->bus = pdev->bus->number;
2082 info->devfn = pdev->devfn;
2083 info->dev = pdev;
2084 info->domain = domain;
2085 spin_lock_irqsave(&device_domain_lock, flags);
2086
2087 found = find_domain(pdev);
2088 if (found != NULL) {
2089 spin_unlock_irqrestore(&device_domain_lock, flags);
2090 if (found != domain) {
2091 domain_exit(domain);
2092 domain = found;
2093 }
2094 free_devinfo_mem(info);
2095 return domain;
2096 }
2097 list_add(&info->link, &domain->devices);
2098 list_add(&info->global, &device_domain_list);
2099 pdev->dev.archdata.iommu = info;
2100 spin_unlock_irqrestore(&device_domain_lock, flags);
2101 return domain;
2102error:
2103
2104 return find_domain(pdev);
2105}
2106
2107static int iommu_identity_mapping;
2108#define IDENTMAP_ALL 1
2109#define IDENTMAP_GFX 2
2110#define IDENTMAP_AZALIA 4
2111
2112static int iommu_domain_identity_map(struct dmar_domain *domain,
2113 unsigned long long start,
2114 unsigned long long end)
2115{
2116 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2117 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2118
2119 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2120 dma_to_mm_pfn(last_vpfn))) {
2121 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2122 return -ENOMEM;
2123 }
2124
2125 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2126 start, end, domain->id);
2127
2128
2129
2130
2131 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2132
2133 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2134 last_vpfn - first_vpfn + 1,
2135 DMA_PTE_READ|DMA_PTE_WRITE);
2136}
2137
2138static int iommu_prepare_identity_map(struct pci_dev *pdev,
2139 unsigned long long start,
2140 unsigned long long end)
2141{
2142 struct dmar_domain *domain;
2143 int ret;
2144
2145 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2146 if (!domain)
2147 return -ENOMEM;
2148
2149
2150
2151
2152
2153 if (domain == si_domain && hw_pass_through) {
2154 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2155 pci_name(pdev), start, end);
2156 return 0;
2157 }
2158
2159 printk(KERN_INFO
2160 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2161 pci_name(pdev), start, end);
2162
2163 if (end < start) {
2164 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2165 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2166 dmi_get_system_info(DMI_BIOS_VENDOR),
2167 dmi_get_system_info(DMI_BIOS_VERSION),
2168 dmi_get_system_info(DMI_PRODUCT_VERSION));
2169 ret = -EIO;
2170 goto error;
2171 }
2172
2173 if (end >> agaw_to_width(domain->agaw)) {
2174 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2175 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2176 agaw_to_width(domain->agaw),
2177 dmi_get_system_info(DMI_BIOS_VENDOR),
2178 dmi_get_system_info(DMI_BIOS_VERSION),
2179 dmi_get_system_info(DMI_PRODUCT_VERSION));
2180 ret = -EIO;
2181 goto error;
2182 }
2183
2184 ret = iommu_domain_identity_map(domain, start, end);
2185 if (ret)
2186 goto error;
2187
2188
2189 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2190 if (ret)
2191 goto error;
2192
2193 return 0;
2194
2195 error:
2196 domain_exit(domain);
2197 return ret;
2198}
2199
2200static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2201 struct pci_dev *pdev)
2202{
2203 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2204 return 0;
2205 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2206 rmrr->end_address);
2207}
2208
2209#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2210static inline void iommu_prepare_isa(void)
2211{
2212 struct pci_dev *pdev;
2213 int ret;
2214
2215 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2216 if (!pdev)
2217 return;
2218
2219 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2220 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
2221
2222 if (ret)
2223 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2224 "floppy might not work\n");
2225
2226}
2227#else
2228static inline void iommu_prepare_isa(void)
2229{
2230 return;
2231}
2232#endif
2233
2234static int md_domain_init(struct dmar_domain *domain, int guest_width);
2235
2236static int __init si_domain_init(int hw)
2237{
2238 struct dmar_drhd_unit *drhd;
2239 struct intel_iommu *iommu;
2240 int nid, ret = 0;
2241
2242 si_domain = alloc_domain();
2243 if (!si_domain)
2244 return -EFAULT;
2245
2246 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2247
2248 for_each_active_iommu(iommu, drhd) {
2249 ret = iommu_attach_domain(si_domain, iommu);
2250 if (ret) {
2251 domain_exit(si_domain);
2252 return -EFAULT;
2253 }
2254 }
2255
2256 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2257 domain_exit(si_domain);
2258 return -EFAULT;
2259 }
2260
2261 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2262
2263 if (hw)
2264 return 0;
2265
2266 for_each_online_node(nid) {
2267 unsigned long start_pfn, end_pfn;
2268 int i;
2269
2270 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2271 ret = iommu_domain_identity_map(si_domain,
2272 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2273 if (ret)
2274 return ret;
2275 }
2276 }
2277
2278 return 0;
2279}
2280
2281static void domain_remove_one_dev_info(struct dmar_domain *domain,
2282 struct pci_dev *pdev);
2283static int identity_mapping(struct pci_dev *pdev)
2284{
2285 struct device_domain_info *info;
2286
2287 if (likely(!iommu_identity_mapping))
2288 return 0;
2289
2290 info = pdev->dev.archdata.iommu;
2291 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2292 return (info->domain == si_domain);
2293
2294 return 0;
2295}
2296
2297static int domain_add_dev_info(struct dmar_domain *domain,
2298 struct pci_dev *pdev,
2299 int translation)
2300{
2301 struct device_domain_info *info;
2302 unsigned long flags;
2303 int ret;
2304
2305 info = alloc_devinfo_mem();
2306 if (!info)
2307 return -ENOMEM;
2308
2309 info->segment = pci_domain_nr(pdev->bus);
2310 info->bus = pdev->bus->number;
2311 info->devfn = pdev->devfn;
2312 info->dev = pdev;
2313 info->domain = domain;
2314
2315 spin_lock_irqsave(&device_domain_lock, flags);
2316 list_add(&info->link, &domain->devices);
2317 list_add(&info->global, &device_domain_list);
2318 pdev->dev.archdata.iommu = info;
2319 spin_unlock_irqrestore(&device_domain_lock, flags);
2320
2321 ret = domain_context_mapping(domain, pdev, translation);
2322 if (ret) {
2323 spin_lock_irqsave(&device_domain_lock, flags);
2324 unlink_domain_info(info);
2325 spin_unlock_irqrestore(&device_domain_lock, flags);
2326 free_devinfo_mem(info);
2327 return ret;
2328 }
2329
2330 return 0;
2331}
2332
2333static bool device_has_rmrr(struct pci_dev *dev)
2334{
2335 struct dmar_rmrr_unit *rmrr;
2336 int i;
2337
2338 for_each_rmrr_units(rmrr) {
2339 for (i = 0; i < rmrr->devices_cnt; i++) {
2340
2341
2342
2343
2344 if (rmrr->devices[i] == dev)
2345 return true;
2346 }
2347 }
2348 return false;
2349}
2350
2351static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2352{
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362 if (device_has_rmrr(pdev) &&
2363 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2364 return 0;
2365
2366 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2367 return 1;
2368
2369 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2370 return 1;
2371
2372 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2373 return 0;
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392 if (!pci_is_pcie(pdev)) {
2393 if (!pci_is_root_bus(pdev->bus))
2394 return 0;
2395 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2396 return 0;
2397 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2398 return 0;
2399
2400
2401
2402
2403
2404
2405 if (!startup) {
2406
2407
2408
2409
2410 u64 dma_mask = pdev->dma_mask;
2411
2412 if (pdev->dev.coherent_dma_mask &&
2413 pdev->dev.coherent_dma_mask < dma_mask)
2414 dma_mask = pdev->dev.coherent_dma_mask;
2415
2416 return dma_mask >= dma_get_required_mask(&pdev->dev);
2417 }
2418
2419 return 1;
2420}
2421
2422static int __init iommu_prepare_static_identity_mapping(int hw)
2423{
2424 struct pci_dev *pdev = NULL;
2425 int ret;
2426
2427 ret = si_domain_init(hw);
2428 if (ret)
2429 return -EFAULT;
2430
2431 for_each_pci_dev(pdev) {
2432 if (iommu_should_identity_map(pdev, 1)) {
2433 ret = domain_add_dev_info(si_domain, pdev,
2434 hw ? CONTEXT_TT_PASS_THROUGH :
2435 CONTEXT_TT_MULTI_LEVEL);
2436 if (ret) {
2437
2438 if (ret == -ENODEV)
2439 continue;
2440 return ret;
2441 }
2442 pr_info("IOMMU: %s identity mapping for device %s\n",
2443 hw ? "hardware" : "software", pci_name(pdev));
2444 }
2445 }
2446
2447 return 0;
2448}
2449
2450static int __init init_dmars(void)
2451{
2452 struct dmar_drhd_unit *drhd;
2453 struct dmar_rmrr_unit *rmrr;
2454 struct pci_dev *pdev;
2455 struct intel_iommu *iommu;
2456 int i, ret;
2457
2458
2459
2460
2461
2462
2463
2464 for_each_drhd_unit(drhd) {
2465
2466
2467
2468
2469
2470 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2471 g_num_of_iommus++;
2472 continue;
2473 }
2474 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2475 IOMMU_UNITS_SUPPORTED);
2476 }
2477
2478 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2479 GFP_KERNEL);
2480 if (!g_iommus) {
2481 printk(KERN_ERR "Allocating global iommu array failed\n");
2482 ret = -ENOMEM;
2483 goto error;
2484 }
2485
2486 deferred_flush = kzalloc(g_num_of_iommus *
2487 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2488 if (!deferred_flush) {
2489 ret = -ENOMEM;
2490 goto error;
2491 }
2492
2493 for_each_drhd_unit(drhd) {
2494 if (drhd->ignored)
2495 continue;
2496
2497 iommu = drhd->iommu;
2498 g_iommus[iommu->seq_id] = iommu;
2499
2500 ret = iommu_init_domains(iommu);
2501 if (ret)
2502 goto error;
2503
2504
2505
2506
2507
2508
2509 ret = iommu_alloc_root_entry(iommu);
2510 if (ret) {
2511 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2512 goto error;
2513 }
2514 if (!ecap_pass_through(iommu->ecap))
2515 hw_pass_through = 0;
2516 }
2517
2518
2519
2520
2521 for_each_drhd_unit(drhd) {
2522 if (drhd->ignored)
2523 continue;
2524
2525 iommu = drhd->iommu;
2526
2527
2528
2529
2530
2531
2532 if (iommu->qi)
2533 continue;
2534
2535
2536
2537
2538 dmar_fault(-1, iommu);
2539
2540
2541
2542
2543 dmar_disable_qi(iommu);
2544 }
2545
2546 for_each_drhd_unit(drhd) {
2547 if (drhd->ignored)
2548 continue;
2549
2550 iommu = drhd->iommu;
2551
2552 if (dmar_enable_qi(iommu)) {
2553
2554
2555
2556
2557 iommu->flush.flush_context = __iommu_flush_context;
2558 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2559 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
2560 "invalidation\n",
2561 iommu->seq_id,
2562 (unsigned long long)drhd->reg_base_addr);
2563 } else {
2564 iommu->flush.flush_context = qi_flush_context;
2565 iommu->flush.flush_iotlb = qi_flush_iotlb;
2566 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
2567 "invalidation\n",
2568 iommu->seq_id,
2569 (unsigned long long)drhd->reg_base_addr);
2570 }
2571 }
2572
2573 if (iommu_pass_through)
2574 iommu_identity_mapping |= IDENTMAP_ALL;
2575
2576#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2577 iommu_identity_mapping |= IDENTMAP_GFX;
2578#endif
2579
2580 check_tylersburg_isoch();
2581
2582
2583
2584
2585
2586
2587 if (iommu_identity_mapping) {
2588 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2589 if (ret) {
2590 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2591 goto error;
2592 }
2593 }
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2609 for_each_rmrr_units(rmrr) {
2610 for (i = 0; i < rmrr->devices_cnt; i++) {
2611 pdev = rmrr->devices[i];
2612
2613
2614
2615
2616 if (!pdev)
2617 continue;
2618 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2619 if (ret)
2620 printk(KERN_ERR
2621 "IOMMU: mapping reserved region failed\n");
2622 }
2623 }
2624
2625 iommu_prepare_isa();
2626
2627
2628
2629
2630
2631
2632
2633
2634 for_each_drhd_unit(drhd) {
2635 if (drhd->ignored) {
2636
2637
2638
2639
2640 if (force_on)
2641 iommu_disable_protect_mem_regions(drhd->iommu);
2642 continue;
2643 }
2644 iommu = drhd->iommu;
2645
2646 iommu_flush_write_buffer(iommu);
2647
2648 ret = dmar_set_interrupt(iommu);
2649 if (ret)
2650 goto error;
2651
2652 iommu_set_root_entry(iommu);
2653
2654 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2655 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2656
2657 ret = iommu_enable_translation(iommu);
2658 if (ret)
2659 goto error;
2660
2661 iommu_disable_protect_mem_regions(iommu);
2662 }
2663
2664 return 0;
2665error:
2666 for_each_drhd_unit(drhd) {
2667 if (drhd->ignored)
2668 continue;
2669 iommu = drhd->iommu;
2670 free_iommu(iommu);
2671 }
2672 kfree(g_iommus);
2673 return ret;
2674}
2675
2676
2677static struct iova *intel_alloc_iova(struct device *dev,
2678 struct dmar_domain *domain,
2679 unsigned long nrpages, uint64_t dma_mask)
2680{
2681 struct pci_dev *pdev = to_pci_dev(dev);
2682 struct iova *iova = NULL;
2683
2684
2685 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2686
2687 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2688
2689
2690
2691
2692
2693 iova = alloc_iova(&domain->iovad, nrpages,
2694 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2695 if (iova)
2696 return iova;
2697 }
2698 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2699 if (unlikely(!iova)) {
2700 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2701 nrpages, pci_name(pdev));
2702 return NULL;
2703 }
2704
2705 return iova;
2706}
2707
2708static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2709{
2710 struct dmar_domain *domain;
2711 int ret;
2712
2713 domain = get_domain_for_dev(pdev,
2714 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2715 if (!domain) {
2716 printk(KERN_ERR
2717 "Allocating domain for %s failed", pci_name(pdev));
2718 return NULL;
2719 }
2720
2721
2722 if (unlikely(!domain_context_mapped(pdev))) {
2723 ret = domain_context_mapping(domain, pdev,
2724 CONTEXT_TT_MULTI_LEVEL);
2725 if (ret) {
2726 printk(KERN_ERR
2727 "Domain context map for %s failed",
2728 pci_name(pdev));
2729 return NULL;
2730 }
2731 }
2732
2733 return domain;
2734}
2735
2736static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2737{
2738 struct device_domain_info *info;
2739
2740
2741 info = dev->dev.archdata.iommu;
2742 if (likely(info))
2743 return info->domain;
2744
2745 return __get_valid_domain_for_dev(dev);
2746}
2747
2748static int iommu_dummy(struct pci_dev *pdev)
2749{
2750 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2751}
2752
2753
2754static int iommu_no_mapping(struct device *dev)
2755{
2756 struct pci_dev *pdev;
2757 int found;
2758
2759 if (unlikely(dev->bus != &pci_bus_type))
2760 return 1;
2761
2762 pdev = to_pci_dev(dev);
2763 if (iommu_dummy(pdev))
2764 return 1;
2765
2766 if (!iommu_identity_mapping)
2767 return 0;
2768
2769 found = identity_mapping(pdev);
2770 if (found) {
2771 if (iommu_should_identity_map(pdev, 0))
2772 return 1;
2773 else {
2774
2775
2776
2777
2778 domain_remove_one_dev_info(si_domain, pdev);
2779 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2780 pci_name(pdev));
2781 return 0;
2782 }
2783 } else {
2784
2785
2786
2787
2788 if (iommu_should_identity_map(pdev, 0)) {
2789 int ret;
2790 ret = domain_add_dev_info(si_domain, pdev,
2791 hw_pass_through ?
2792 CONTEXT_TT_PASS_THROUGH :
2793 CONTEXT_TT_MULTI_LEVEL);
2794 if (!ret) {
2795 printk(KERN_INFO "64bit %s uses identity mapping\n",
2796 pci_name(pdev));
2797 return 1;
2798 }
2799 }
2800 }
2801
2802 return 0;
2803}
2804
2805static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2806 size_t size, int dir, u64 dma_mask)
2807{
2808 struct pci_dev *pdev = to_pci_dev(hwdev);
2809 struct dmar_domain *domain;
2810 phys_addr_t start_paddr;
2811 struct iova *iova;
2812 int prot = 0;
2813 int ret;
2814 struct intel_iommu *iommu;
2815 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2816
2817 BUG_ON(dir == DMA_NONE);
2818
2819 if (iommu_no_mapping(hwdev))
2820 return paddr;
2821
2822 domain = get_valid_domain_for_dev(pdev);
2823 if (!domain)
2824 return 0;
2825
2826 iommu = domain_get_iommu(domain);
2827 size = aligned_nrpages(paddr, size);
2828
2829 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
2830 if (!iova)
2831 goto error;
2832
2833
2834
2835
2836
2837 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2838 !cap_zlr(iommu->cap))
2839 prot |= DMA_PTE_READ;
2840 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2841 prot |= DMA_PTE_WRITE;
2842
2843
2844
2845
2846
2847
2848 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2849 mm_to_dma_pfn(paddr_pfn), size, prot);
2850 if (ret)
2851 goto error;
2852
2853
2854 if (cap_caching_mode(iommu->cap))
2855 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
2856 else
2857 iommu_flush_write_buffer(iommu);
2858
2859 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2860 start_paddr += paddr & ~PAGE_MASK;
2861 return start_paddr;
2862
2863error:
2864 if (iova)
2865 __free_iova(&domain->iovad, iova);
2866 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2867 pci_name(pdev), size, (unsigned long long)paddr, dir);
2868 return 0;
2869}
2870
2871static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2872 unsigned long offset, size_t size,
2873 enum dma_data_direction dir,
2874 struct dma_attrs *attrs)
2875{
2876 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2877 dir, to_pci_dev(dev)->dma_mask);
2878}
2879
2880static void flush_unmaps(void)
2881{
2882 int i, j;
2883
2884 timer_on = 0;
2885
2886
2887 for (i = 0; i < g_num_of_iommus; i++) {
2888 struct intel_iommu *iommu = g_iommus[i];
2889 if (!iommu)
2890 continue;
2891
2892 if (!deferred_flush[i].next)
2893 continue;
2894
2895
2896 if (!cap_caching_mode(iommu->cap))
2897 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2898 DMA_TLB_GLOBAL_FLUSH);
2899 for (j = 0; j < deferred_flush[i].next; j++) {
2900 unsigned long mask;
2901 struct iova *iova = deferred_flush[i].iova[j];
2902 struct dmar_domain *domain = deferred_flush[i].domain[j];
2903
2904
2905 if (cap_caching_mode(iommu->cap))
2906 iommu_flush_iotlb_psi(iommu, domain->id,
2907 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2908 else {
2909 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2910 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2911 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2912 }
2913 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2914 }
2915 deferred_flush[i].next = 0;
2916 }
2917
2918 list_size = 0;
2919}
2920
2921static void flush_unmaps_timeout(unsigned long data)
2922{
2923 unsigned long flags;
2924
2925 spin_lock_irqsave(&async_umap_flush_lock, flags);
2926 flush_unmaps();
2927 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2928}
2929
2930static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2931{
2932 unsigned long flags;
2933 int next, iommu_id;
2934 struct intel_iommu *iommu;
2935
2936 spin_lock_irqsave(&async_umap_flush_lock, flags);
2937 if (list_size == HIGH_WATER_MARK)
2938 flush_unmaps();
2939
2940 iommu = domain_get_iommu(dom);
2941 iommu_id = iommu->seq_id;
2942
2943 next = deferred_flush[iommu_id].next;
2944 deferred_flush[iommu_id].domain[next] = dom;
2945 deferred_flush[iommu_id].iova[next] = iova;
2946 deferred_flush[iommu_id].next++;
2947
2948 if (!timer_on) {
2949 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2950 timer_on = 1;
2951 }
2952 list_size++;
2953 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2954}
2955
2956static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2957 size_t size, enum dma_data_direction dir,
2958 struct dma_attrs *attrs)
2959{
2960 struct pci_dev *pdev = to_pci_dev(dev);
2961 struct dmar_domain *domain;
2962 unsigned long start_pfn, last_pfn;
2963 struct iova *iova;
2964 struct intel_iommu *iommu;
2965
2966 if (iommu_no_mapping(dev))
2967 return;
2968
2969 domain = find_domain(pdev);
2970 BUG_ON(!domain);
2971
2972 iommu = domain_get_iommu(domain);
2973
2974 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2975 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2976 (unsigned long long)dev_addr))
2977 return;
2978
2979 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2980 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2981
2982 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2983 pci_name(pdev), start_pfn, last_pfn);
2984
2985
2986 dma_pte_clear_range(domain, start_pfn, last_pfn);
2987
2988
2989 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2990
2991 if (intel_iommu_strict) {
2992 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2993 last_pfn - start_pfn + 1, 0);
2994
2995 __free_iova(&domain->iovad, iova);
2996 } else {
2997 add_unmap(domain, iova);
2998
2999
3000
3001
3002 }
3003}
3004
3005static void *intel_alloc_coherent(struct device *hwdev, size_t size,
3006 dma_addr_t *dma_handle, gfp_t flags,
3007 struct dma_attrs *attrs)
3008{
3009 void *vaddr;
3010 int order;
3011
3012 size = PAGE_ALIGN(size);
3013 order = get_order(size);
3014
3015 if (!iommu_no_mapping(hwdev))
3016 flags &= ~(GFP_DMA | GFP_DMA32);
3017 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
3018 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
3019 flags |= GFP_DMA;
3020 else
3021 flags |= GFP_DMA32;
3022 }
3023
3024 vaddr = (void *)__get_free_pages(flags, order);
3025 if (!vaddr)
3026 return NULL;
3027 memset(vaddr, 0, size);
3028
3029 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
3030 DMA_BIDIRECTIONAL,
3031 hwdev->coherent_dma_mask);
3032 if (*dma_handle)
3033 return vaddr;
3034 free_pages((unsigned long)vaddr, order);
3035 return NULL;
3036}
3037
3038static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
3039 dma_addr_t dma_handle, struct dma_attrs *attrs)
3040{
3041 int order;
3042
3043 size = PAGE_ALIGN(size);
3044 order = get_order(size);
3045
3046 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
3047 free_pages((unsigned long)vaddr, order);
3048}
3049
3050static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3051 int nelems, enum dma_data_direction dir,
3052 struct dma_attrs *attrs)
3053{
3054 struct pci_dev *pdev = to_pci_dev(hwdev);
3055 struct dmar_domain *domain;
3056 unsigned long start_pfn, last_pfn;
3057 struct iova *iova;
3058 struct intel_iommu *iommu;
3059
3060 if (iommu_no_mapping(hwdev))
3061 return;
3062
3063 domain = find_domain(pdev);
3064 BUG_ON(!domain);
3065
3066 iommu = domain_get_iommu(domain);
3067
3068 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
3069 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3070 (unsigned long long)sglist[0].dma_address))
3071 return;
3072
3073 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3074 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3075
3076
3077 dma_pte_clear_range(domain, start_pfn, last_pfn);
3078
3079
3080 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
3081
3082 if (intel_iommu_strict) {
3083 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3084 last_pfn - start_pfn + 1, 0);
3085
3086 __free_iova(&domain->iovad, iova);
3087 } else {
3088 add_unmap(domain, iova);
3089
3090
3091
3092
3093 }
3094}
3095
3096static int intel_nontranslate_map_sg(struct device *hddev,
3097 struct scatterlist *sglist, int nelems, int dir)
3098{
3099 int i;
3100 struct scatterlist *sg;
3101
3102 for_each_sg(sglist, sg, nelems, i) {
3103 BUG_ON(!sg_page(sg));
3104 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3105 sg->dma_length = sg->length;
3106 }
3107 return nelems;
3108}
3109
3110static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3111 enum dma_data_direction dir, struct dma_attrs *attrs)
3112{
3113 int i;
3114 struct pci_dev *pdev = to_pci_dev(hwdev);
3115 struct dmar_domain *domain;
3116 size_t size = 0;
3117 int prot = 0;
3118 struct iova *iova = NULL;
3119 int ret;
3120 struct scatterlist *sg;
3121 unsigned long start_vpfn;
3122 struct intel_iommu *iommu;
3123
3124 BUG_ON(dir == DMA_NONE);
3125 if (iommu_no_mapping(hwdev))
3126 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
3127
3128 domain = get_valid_domain_for_dev(pdev);
3129 if (!domain)
3130 return 0;
3131
3132 iommu = domain_get_iommu(domain);
3133
3134 for_each_sg(sglist, sg, nelems, i)
3135 size += aligned_nrpages(sg->offset, sg->length);
3136
3137 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3138 pdev->dma_mask);
3139 if (!iova) {
3140 sglist->dma_length = 0;
3141 return 0;
3142 }
3143
3144
3145
3146
3147
3148 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3149 !cap_zlr(iommu->cap))
3150 prot |= DMA_PTE_READ;
3151 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3152 prot |= DMA_PTE_WRITE;
3153
3154 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3155
3156 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3157 if (unlikely(ret)) {
3158
3159 dma_pte_clear_range(domain, start_vpfn,
3160 start_vpfn + size - 1);
3161
3162 dma_pte_free_pagetable(domain, start_vpfn,
3163 start_vpfn + size - 1);
3164
3165 __free_iova(&domain->iovad, iova);
3166 return 0;
3167 }
3168
3169
3170 if (cap_caching_mode(iommu->cap))
3171 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
3172 else
3173 iommu_flush_write_buffer(iommu);
3174
3175 return nelems;
3176}
3177
3178static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3179{
3180 return !dma_addr;
3181}
3182
3183struct dma_map_ops intel_dma_ops = {
3184 .alloc = intel_alloc_coherent,
3185 .free = intel_free_coherent,
3186 .map_sg = intel_map_sg,
3187 .unmap_sg = intel_unmap_sg,
3188 .map_page = intel_map_page,
3189 .unmap_page = intel_unmap_page,
3190 .mapping_error = intel_mapping_error,
3191};
3192
3193static inline int iommu_domain_cache_init(void)
3194{
3195 int ret = 0;
3196
3197 iommu_domain_cache = kmem_cache_create("iommu_domain",
3198 sizeof(struct dmar_domain),
3199 0,
3200 SLAB_HWCACHE_ALIGN,
3201
3202 NULL);
3203 if (!iommu_domain_cache) {
3204 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3205 ret = -ENOMEM;
3206 }
3207
3208 return ret;
3209}
3210
3211static inline int iommu_devinfo_cache_init(void)
3212{
3213 int ret = 0;
3214
3215 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3216 sizeof(struct device_domain_info),
3217 0,
3218 SLAB_HWCACHE_ALIGN,
3219 NULL);
3220 if (!iommu_devinfo_cache) {
3221 printk(KERN_ERR "Couldn't create devinfo cache\n");
3222 ret = -ENOMEM;
3223 }
3224
3225 return ret;
3226}
3227
3228static inline int iommu_iova_cache_init(void)
3229{
3230 int ret = 0;
3231
3232 iommu_iova_cache = kmem_cache_create("iommu_iova",
3233 sizeof(struct iova),
3234 0,
3235 SLAB_HWCACHE_ALIGN,
3236 NULL);
3237 if (!iommu_iova_cache) {
3238 printk(KERN_ERR "Couldn't create iova cache\n");
3239 ret = -ENOMEM;
3240 }
3241
3242 return ret;
3243}
3244
3245static int __init iommu_init_mempool(void)
3246{
3247 int ret;
3248 ret = iommu_iova_cache_init();
3249 if (ret)
3250 return ret;
3251
3252 ret = iommu_domain_cache_init();
3253 if (ret)
3254 goto domain_error;
3255
3256 ret = iommu_devinfo_cache_init();
3257 if (!ret)
3258 return ret;
3259
3260 kmem_cache_destroy(iommu_domain_cache);
3261domain_error:
3262 kmem_cache_destroy(iommu_iova_cache);
3263
3264 return -ENOMEM;
3265}
3266
3267static void __init iommu_exit_mempool(void)
3268{
3269 kmem_cache_destroy(iommu_devinfo_cache);
3270 kmem_cache_destroy(iommu_domain_cache);
3271 kmem_cache_destroy(iommu_iova_cache);
3272
3273}
3274
3275static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3276{
3277 struct dmar_drhd_unit *drhd;
3278 u32 vtbar;
3279 int rc;
3280
3281
3282
3283
3284
3285
3286 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3287 if (rc) {
3288
3289 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3290 return;
3291 }
3292 vtbar &= 0xffff0000;
3293
3294
3295 drhd = dmar_find_matched_drhd_unit(pdev);
3296 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3297 TAINT_FIRMWARE_WORKAROUND,
3298 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3299 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3300}
3301DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3302
3303static void __init init_no_remapping_devices(void)
3304{
3305 struct dmar_drhd_unit *drhd;
3306
3307 for_each_drhd_unit(drhd) {
3308 if (!drhd->include_all) {
3309 int i;
3310 for (i = 0; i < drhd->devices_cnt; i++)
3311 if (drhd->devices[i] != NULL)
3312 break;
3313
3314 if (i == drhd->devices_cnt)
3315 drhd->ignored = 1;
3316 }
3317 }
3318
3319 for_each_drhd_unit(drhd) {
3320 int i;
3321 if (drhd->ignored || drhd->include_all)
3322 continue;
3323
3324 for (i = 0; i < drhd->devices_cnt; i++)
3325 if (drhd->devices[i] &&
3326 !IS_GFX_DEVICE(drhd->devices[i]))
3327 break;
3328
3329 if (i < drhd->devices_cnt)
3330 continue;
3331
3332
3333
3334 if (dmar_map_gfx) {
3335 intel_iommu_gfx_mapped = 1;
3336 } else {
3337 drhd->ignored = 1;
3338 for (i = 0; i < drhd->devices_cnt; i++) {
3339 if (!drhd->devices[i])
3340 continue;
3341 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3342 }
3343 }
3344 }
3345}
3346
3347#ifdef CONFIG_SUSPEND
3348static int init_iommu_hw(void)
3349{
3350 struct dmar_drhd_unit *drhd;
3351 struct intel_iommu *iommu = NULL;
3352
3353 for_each_active_iommu(iommu, drhd)
3354 if (iommu->qi)
3355 dmar_reenable_qi(iommu);
3356
3357 for_each_iommu(iommu, drhd) {
3358 if (drhd->ignored) {
3359
3360
3361
3362
3363 if (force_on)
3364 iommu_disable_protect_mem_regions(iommu);
3365 continue;
3366 }
3367
3368 iommu_flush_write_buffer(iommu);
3369
3370 iommu_set_root_entry(iommu);
3371
3372 iommu->flush.flush_context(iommu, 0, 0, 0,
3373 DMA_CCMD_GLOBAL_INVL);
3374 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3375 DMA_TLB_GLOBAL_FLUSH);
3376 if (iommu_enable_translation(iommu))
3377 return 1;
3378 iommu_disable_protect_mem_regions(iommu);
3379 }
3380
3381 return 0;
3382}
3383
3384static void iommu_flush_all(void)
3385{
3386 struct dmar_drhd_unit *drhd;
3387 struct intel_iommu *iommu;
3388
3389 for_each_active_iommu(iommu, drhd) {
3390 iommu->flush.flush_context(iommu, 0, 0, 0,
3391 DMA_CCMD_GLOBAL_INVL);
3392 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3393 DMA_TLB_GLOBAL_FLUSH);
3394 }
3395}
3396
3397static int iommu_suspend(void)
3398{
3399 struct dmar_drhd_unit *drhd;
3400 struct intel_iommu *iommu = NULL;
3401 unsigned long flag;
3402
3403 for_each_active_iommu(iommu, drhd) {
3404 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3405 GFP_ATOMIC);
3406 if (!iommu->iommu_state)
3407 goto nomem;
3408 }
3409
3410 iommu_flush_all();
3411
3412 for_each_active_iommu(iommu, drhd) {
3413 iommu_disable_translation(iommu);
3414
3415 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3416
3417 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3418 readl(iommu->reg + DMAR_FECTL_REG);
3419 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3420 readl(iommu->reg + DMAR_FEDATA_REG);
3421 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3422 readl(iommu->reg + DMAR_FEADDR_REG);
3423 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3424 readl(iommu->reg + DMAR_FEUADDR_REG);
3425
3426 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3427 }
3428 return 0;
3429
3430nomem:
3431 for_each_active_iommu(iommu, drhd)
3432 kfree(iommu->iommu_state);
3433
3434 return -ENOMEM;
3435}
3436
3437static void iommu_resume(void)
3438{
3439 struct dmar_drhd_unit *drhd;
3440 struct intel_iommu *iommu = NULL;
3441 unsigned long flag;
3442
3443 if (init_iommu_hw()) {
3444 if (force_on)
3445 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3446 else
3447 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3448 return;
3449 }
3450
3451 for_each_active_iommu(iommu, drhd) {
3452
3453 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3454
3455 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3456 iommu->reg + DMAR_FECTL_REG);
3457 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3458 iommu->reg + DMAR_FEDATA_REG);
3459 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3460 iommu->reg + DMAR_FEADDR_REG);
3461 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3462 iommu->reg + DMAR_FEUADDR_REG);
3463
3464 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3465 }
3466
3467 for_each_active_iommu(iommu, drhd)
3468 kfree(iommu->iommu_state);
3469}
3470
3471static struct syscore_ops iommu_syscore_ops = {
3472 .resume = iommu_resume,
3473 .suspend = iommu_suspend,
3474};
3475
3476static void __init init_iommu_pm_ops(void)
3477{
3478 register_syscore_ops(&iommu_syscore_ops);
3479}
3480
3481#else
3482static inline void init_iommu_pm_ops(void) {}
3483#endif
3484
3485LIST_HEAD(dmar_rmrr_units);
3486
3487static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3488{
3489 list_add(&rmrr->list, &dmar_rmrr_units);
3490}
3491
3492
3493int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3494{
3495 struct acpi_dmar_reserved_memory *rmrr;
3496 struct dmar_rmrr_unit *rmrru;
3497
3498 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3499 if (!rmrru)
3500 return -ENOMEM;
3501
3502 rmrru->hdr = header;
3503 rmrr = (struct acpi_dmar_reserved_memory *)header;
3504 rmrru->base_address = rmrr->base_address;
3505 rmrru->end_address = rmrr->end_address;
3506
3507 dmar_register_rmrr_unit(rmrru);
3508 return 0;
3509}
3510
3511static int __init
3512rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3513{
3514 struct acpi_dmar_reserved_memory *rmrr;
3515 int ret;
3516
3517 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3518 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3519 ((void *)rmrr) + rmrr->header.length,
3520 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3521
3522 if (ret || (rmrru->devices_cnt == 0)) {
3523 list_del(&rmrru->list);
3524 kfree(rmrru);
3525 }
3526 return ret;
3527}
3528
3529static LIST_HEAD(dmar_atsr_units);
3530
3531int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3532{
3533 struct acpi_dmar_atsr *atsr;
3534 struct dmar_atsr_unit *atsru;
3535
3536 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3537 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3538 if (!atsru)
3539 return -ENOMEM;
3540
3541 atsru->hdr = hdr;
3542 atsru->include_all = atsr->flags & 0x1;
3543
3544 list_add(&atsru->list, &dmar_atsr_units);
3545
3546 return 0;
3547}
3548
3549static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3550{
3551 int rc;
3552 struct acpi_dmar_atsr *atsr;
3553
3554 if (atsru->include_all)
3555 return 0;
3556
3557 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3558 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3559 (void *)atsr + atsr->header.length,
3560 &atsru->devices_cnt, &atsru->devices,
3561 atsr->segment);
3562 if (rc || !atsru->devices_cnt) {
3563 list_del(&atsru->list);
3564 kfree(atsru);
3565 }
3566
3567 return rc;
3568}
3569
3570int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3571{
3572 int i;
3573 struct pci_bus *bus;
3574 struct acpi_dmar_atsr *atsr;
3575 struct dmar_atsr_unit *atsru;
3576
3577 dev = pci_physfn(dev);
3578
3579 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3580 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3581 if (atsr->segment == pci_domain_nr(dev->bus))
3582 goto found;
3583 }
3584
3585 return 0;
3586
3587found:
3588 for (bus = dev->bus; bus; bus = bus->parent) {
3589 struct pci_dev *bridge = bus->self;
3590
3591 if (!bridge || !pci_is_pcie(bridge) ||
3592 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3593 return 0;
3594
3595 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
3596 for (i = 0; i < atsru->devices_cnt; i++)
3597 if (atsru->devices[i] == bridge)
3598 return 1;
3599 break;
3600 }
3601 }
3602
3603 if (atsru->include_all)
3604 return 1;
3605
3606 return 0;
3607}
3608
3609int __init dmar_parse_rmrr_atsr_dev(void)
3610{
3611 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3612 struct dmar_atsr_unit *atsr, *atsr_n;
3613 int ret = 0;
3614
3615 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3616 ret = rmrr_parse_dev(rmrr);
3617 if (ret)
3618 return ret;
3619 }
3620
3621 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3622 ret = atsr_parse_dev(atsr);
3623 if (ret)
3624 return ret;
3625 }
3626
3627 return ret;
3628}
3629
3630
3631
3632
3633
3634
3635
3636static int device_notifier(struct notifier_block *nb,
3637 unsigned long action, void *data)
3638{
3639 struct device *dev = data;
3640 struct pci_dev *pdev = to_pci_dev(dev);
3641 struct dmar_domain *domain;
3642
3643 if (iommu_no_mapping(dev))
3644 return 0;
3645
3646 domain = find_domain(pdev);
3647 if (!domain)
3648 return 0;
3649
3650 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
3651 domain_remove_one_dev_info(domain, pdev);
3652
3653 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3654 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3655 list_empty(&domain->devices))
3656 domain_exit(domain);
3657 }
3658
3659 return 0;
3660}
3661
3662static struct notifier_block device_nb = {
3663 .notifier_call = device_notifier,
3664};
3665
3666int __init intel_iommu_init(void)
3667{
3668 int ret = 0;
3669 struct dmar_drhd_unit *drhd;
3670
3671
3672 force_on = tboot_force_iommu();
3673
3674 if (dmar_table_init()) {
3675 if (force_on)
3676 panic("tboot: Failed to initialize DMAR table\n");
3677 return -ENODEV;
3678 }
3679
3680
3681
3682
3683 for_each_drhd_unit(drhd) {
3684 struct intel_iommu *iommu;
3685
3686 if (drhd->ignored)
3687 continue;
3688
3689 iommu = drhd->iommu;
3690 if (iommu->gcmd & DMA_GCMD_TE)
3691 iommu_disable_translation(iommu);
3692 }
3693
3694 if (dmar_dev_scope_init() < 0) {
3695 if (force_on)
3696 panic("tboot: Failed to initialize DMAR device scope\n");
3697 return -ENODEV;
3698 }
3699
3700 if (no_iommu || dmar_disabled)
3701 return -ENODEV;
3702
3703 if (iommu_init_mempool()) {
3704 if (force_on)
3705 panic("tboot: Failed to initialize iommu memory\n");
3706 return -ENODEV;
3707 }
3708
3709 if (list_empty(&dmar_rmrr_units))
3710 printk(KERN_INFO "DMAR: No RMRR found\n");
3711
3712 if (list_empty(&dmar_atsr_units))
3713 printk(KERN_INFO "DMAR: No ATSR found\n");
3714
3715 if (dmar_init_reserved_ranges()) {
3716 if (force_on)
3717 panic("tboot: Failed to reserve iommu ranges\n");
3718 return -ENODEV;
3719 }
3720
3721 init_no_remapping_devices();
3722
3723 ret = init_dmars();
3724 if (ret) {
3725 if (force_on)
3726 panic("tboot: Failed to initialize DMARs\n");
3727 printk(KERN_ERR "IOMMU: dmar init failed\n");
3728 put_iova_domain(&reserved_iova_list);
3729 iommu_exit_mempool();
3730 return ret;
3731 }
3732 printk(KERN_INFO
3733 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3734
3735 init_timer(&unmap_timer);
3736#ifdef CONFIG_SWIOTLB
3737 swiotlb = 0;
3738#endif
3739 dma_ops = &intel_dma_ops;
3740
3741 init_iommu_pm_ops();
3742
3743 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
3744
3745 bus_register_notifier(&pci_bus_type, &device_nb);
3746
3747 intel_iommu_enabled = 1;
3748
3749 return 0;
3750}
3751
3752static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3753 struct pci_dev *pdev)
3754{
3755 struct pci_dev *tmp, *parent;
3756
3757 if (!iommu || !pdev)
3758 return;
3759
3760
3761 tmp = pci_find_upstream_pcie_bridge(pdev);
3762
3763 if (tmp) {
3764 parent = pdev->bus->self;
3765 while (parent != tmp) {
3766 iommu_detach_dev(iommu, parent->bus->number,
3767 parent->devfn);
3768 parent = parent->bus->self;
3769 }
3770 if (pci_is_pcie(tmp))
3771 iommu_detach_dev(iommu,
3772 tmp->subordinate->number, 0);
3773 else
3774 iommu_detach_dev(iommu, tmp->bus->number,
3775 tmp->devfn);
3776 }
3777}
3778
3779static void domain_remove_one_dev_info(struct dmar_domain *domain,
3780 struct pci_dev *pdev)
3781{
3782 struct device_domain_info *info;
3783 struct intel_iommu *iommu;
3784 unsigned long flags;
3785 int found = 0;
3786 struct list_head *entry, *tmp;
3787
3788 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3789 pdev->devfn);
3790 if (!iommu)
3791 return;
3792
3793 spin_lock_irqsave(&device_domain_lock, flags);
3794 list_for_each_safe(entry, tmp, &domain->devices) {
3795 info = list_entry(entry, struct device_domain_info, link);
3796 if (info->segment == pci_domain_nr(pdev->bus) &&
3797 info->bus == pdev->bus->number &&
3798 info->devfn == pdev->devfn) {
3799 unlink_domain_info(info);
3800 spin_unlock_irqrestore(&device_domain_lock, flags);
3801
3802 iommu_disable_dev_iotlb(info);
3803 iommu_detach_dev(iommu, info->bus, info->devfn);
3804 iommu_detach_dependent_devices(iommu, pdev);
3805 free_devinfo_mem(info);
3806
3807 spin_lock_irqsave(&device_domain_lock, flags);
3808
3809 if (found)
3810 break;
3811 else
3812 continue;
3813 }
3814
3815
3816
3817
3818
3819 if (iommu == device_to_iommu(info->segment, info->bus,
3820 info->devfn))
3821 found = 1;
3822 }
3823
3824 spin_unlock_irqrestore(&device_domain_lock, flags);
3825
3826 if (found == 0) {
3827 unsigned long tmp_flags;
3828 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3829 clear_bit(iommu->seq_id, domain->iommu_bmp);
3830 domain->iommu_count--;
3831 domain_update_iommu_cap(domain);
3832 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3833
3834 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3835 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3836 spin_lock_irqsave(&iommu->lock, tmp_flags);
3837 clear_bit(domain->id, iommu->domain_ids);
3838 iommu->domains[domain->id] = NULL;
3839 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3840 }
3841 }
3842}
3843
3844static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3845{
3846 struct device_domain_info *info;
3847 struct intel_iommu *iommu;
3848 unsigned long flags1, flags2;
3849
3850 spin_lock_irqsave(&device_domain_lock, flags1);
3851 while (!list_empty(&domain->devices)) {
3852 info = list_entry(domain->devices.next,
3853 struct device_domain_info, link);
3854 unlink_domain_info(info);
3855 spin_unlock_irqrestore(&device_domain_lock, flags1);
3856
3857 iommu_disable_dev_iotlb(info);
3858 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3859 iommu_detach_dev(iommu, info->bus, info->devfn);
3860 iommu_detach_dependent_devices(iommu, info->dev);
3861
3862
3863
3864
3865 spin_lock_irqsave(&domain->iommu_lock, flags2);
3866 if (test_and_clear_bit(iommu->seq_id,
3867 domain->iommu_bmp)) {
3868 domain->iommu_count--;
3869 domain_update_iommu_cap(domain);
3870 }
3871 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3872
3873 free_devinfo_mem(info);
3874 spin_lock_irqsave(&device_domain_lock, flags1);
3875 }
3876 spin_unlock_irqrestore(&device_domain_lock, flags1);
3877}
3878
3879
3880static unsigned long vm_domid;
3881
3882static struct dmar_domain *iommu_alloc_vm_domain(void)
3883{
3884 struct dmar_domain *domain;
3885
3886 domain = alloc_domain_mem();
3887 if (!domain)
3888 return NULL;
3889
3890 domain->id = vm_domid++;
3891 domain->nid = -1;
3892 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
3893 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3894
3895 return domain;
3896}
3897
3898static int md_domain_init(struct dmar_domain *domain, int guest_width)
3899{
3900 int adjust_width;
3901
3902 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3903 spin_lock_init(&domain->iommu_lock);
3904
3905 domain_reserve_special_ranges(domain);
3906
3907
3908 domain->gaw = guest_width;
3909 adjust_width = guestwidth_to_adjustwidth(guest_width);
3910 domain->agaw = width_to_agaw(adjust_width);
3911
3912 INIT_LIST_HEAD(&domain->devices);
3913
3914 domain->iommu_count = 0;
3915 domain->iommu_coherency = 0;
3916 domain->iommu_snooping = 0;
3917 domain->iommu_superpage = 0;
3918 domain->max_addr = 0;
3919 domain->nid = -1;
3920
3921
3922 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
3923 if (!domain->pgd)
3924 return -ENOMEM;
3925 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3926 return 0;
3927}
3928
3929static void iommu_free_vm_domain(struct dmar_domain *domain)
3930{
3931 unsigned long flags;
3932 struct dmar_drhd_unit *drhd;
3933 struct intel_iommu *iommu;
3934 unsigned long i;
3935 unsigned long ndomains;
3936
3937 for_each_drhd_unit(drhd) {
3938 if (drhd->ignored)
3939 continue;
3940 iommu = drhd->iommu;
3941
3942 ndomains = cap_ndoms(iommu->cap);
3943 for_each_set_bit(i, iommu->domain_ids, ndomains) {
3944 if (iommu->domains[i] == domain) {
3945 spin_lock_irqsave(&iommu->lock, flags);
3946 clear_bit(i, iommu->domain_ids);
3947 iommu->domains[i] = NULL;
3948 spin_unlock_irqrestore(&iommu->lock, flags);
3949 break;
3950 }
3951 }
3952 }
3953}
3954
3955static void vm_domain_exit(struct dmar_domain *domain)
3956{
3957
3958 if (!domain)
3959 return;
3960
3961 vm_domain_remove_all_dev_info(domain);
3962
3963 put_iova_domain(&domain->iovad);
3964
3965
3966 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3967
3968
3969 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3970
3971 iommu_free_vm_domain(domain);
3972 free_domain_mem(domain);
3973}
3974
3975static int intel_iommu_domain_init(struct iommu_domain *domain)
3976{
3977 struct dmar_domain *dmar_domain;
3978
3979 dmar_domain = iommu_alloc_vm_domain();
3980 if (!dmar_domain) {
3981 printk(KERN_ERR
3982 "intel_iommu_domain_init: dmar_domain == NULL\n");
3983 return -ENOMEM;
3984 }
3985 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3986 printk(KERN_ERR
3987 "intel_iommu_domain_init() failed\n");
3988 vm_domain_exit(dmar_domain);
3989 return -ENOMEM;
3990 }
3991 domain_update_iommu_cap(dmar_domain);
3992 domain->priv = dmar_domain;
3993
3994 domain->geometry.aperture_start = 0;
3995 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3996 domain->geometry.force_aperture = true;
3997
3998 return 0;
3999}
4000
4001static void intel_iommu_domain_destroy(struct iommu_domain *domain)
4002{
4003 struct dmar_domain *dmar_domain = domain->priv;
4004
4005 domain->priv = NULL;
4006 vm_domain_exit(dmar_domain);
4007}
4008
4009static int intel_iommu_attach_device(struct iommu_domain *domain,
4010 struct device *dev)
4011{
4012 struct dmar_domain *dmar_domain = domain->priv;
4013 struct pci_dev *pdev = to_pci_dev(dev);
4014 struct intel_iommu *iommu;
4015 int addr_width;
4016
4017
4018 if (unlikely(domain_context_mapped(pdev))) {
4019 struct dmar_domain *old_domain;
4020
4021 old_domain = find_domain(pdev);
4022 if (old_domain) {
4023 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
4024 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
4025 domain_remove_one_dev_info(old_domain, pdev);
4026 else
4027 domain_remove_dev_info(old_domain);
4028 }
4029 }
4030
4031 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
4032 pdev->devfn);
4033 if (!iommu)
4034 return -ENODEV;
4035
4036
4037 addr_width = agaw_to_width(iommu->agaw);
4038 if (addr_width > cap_mgaw(iommu->cap))
4039 addr_width = cap_mgaw(iommu->cap);
4040
4041 if (dmar_domain->max_addr > (1LL << addr_width)) {
4042 printk(KERN_ERR "%s: iommu width (%d) is not "
4043 "sufficient for the mapped address (%llx)\n",
4044 __func__, addr_width, dmar_domain->max_addr);
4045 return -EFAULT;
4046 }
4047 dmar_domain->gaw = addr_width;
4048
4049
4050
4051
4052 while (iommu->agaw < dmar_domain->agaw) {
4053 struct dma_pte *pte;
4054
4055 pte = dmar_domain->pgd;
4056 if (dma_pte_present(pte)) {
4057 dmar_domain->pgd = (struct dma_pte *)
4058 phys_to_virt(dma_pte_addr(pte));
4059 free_pgtable_page(pte);
4060 }
4061 dmar_domain->agaw--;
4062 }
4063
4064 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
4065}
4066
4067static void intel_iommu_detach_device(struct iommu_domain *domain,
4068 struct device *dev)
4069{
4070 struct dmar_domain *dmar_domain = domain->priv;
4071 struct pci_dev *pdev = to_pci_dev(dev);
4072
4073 domain_remove_one_dev_info(dmar_domain, pdev);
4074}
4075
4076static int intel_iommu_map(struct iommu_domain *domain,
4077 unsigned long iova, phys_addr_t hpa,
4078 size_t size, int iommu_prot)
4079{
4080 struct dmar_domain *dmar_domain = domain->priv;
4081 u64 max_addr;
4082 int prot = 0;
4083 int ret;
4084
4085 if (iommu_prot & IOMMU_READ)
4086 prot |= DMA_PTE_READ;
4087 if (iommu_prot & IOMMU_WRITE)
4088 prot |= DMA_PTE_WRITE;
4089 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4090 prot |= DMA_PTE_SNP;
4091
4092 max_addr = iova + size;
4093 if (dmar_domain->max_addr < max_addr) {
4094 u64 end;
4095
4096
4097 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4098 if (end < max_addr) {
4099 printk(KERN_ERR "%s: iommu width (%d) is not "
4100 "sufficient for the mapped address (%llx)\n",
4101 __func__, dmar_domain->gaw, max_addr);
4102 return -EFAULT;
4103 }
4104 dmar_domain->max_addr = max_addr;
4105 }
4106
4107
4108 size = aligned_nrpages(hpa, size);
4109 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4110 hpa >> VTD_PAGE_SHIFT, size, prot);
4111 return ret;
4112}
4113
4114static size_t intel_iommu_unmap(struct iommu_domain *domain,
4115 unsigned long iova, size_t size)
4116{
4117 struct dmar_domain *dmar_domain = domain->priv;
4118 int order;
4119
4120 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
4121 (iova + size - 1) >> VTD_PAGE_SHIFT);
4122
4123 if (dmar_domain->max_addr == iova + size)
4124 dmar_domain->max_addr = iova;
4125
4126 return PAGE_SIZE << order;
4127}
4128
4129static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4130 dma_addr_t iova)
4131{
4132 struct dmar_domain *dmar_domain = domain->priv;
4133 struct dma_pte *pte;
4134 u64 phys = 0;
4135
4136 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
4137 if (pte)
4138 phys = dma_pte_addr(pte);
4139
4140 return phys;
4141}
4142
4143static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4144 unsigned long cap)
4145{
4146 struct dmar_domain *dmar_domain = domain->priv;
4147
4148 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4149 return dmar_domain->iommu_snooping;
4150 if (cap == IOMMU_CAP_INTR_REMAP)
4151 return irq_remapping_enabled;
4152
4153 return 0;
4154}
4155
4156#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
4157
4158static int intel_iommu_add_device(struct device *dev)
4159{
4160 struct pci_dev *pdev = to_pci_dev(dev);
4161 struct pci_dev *bridge, *dma_pdev = NULL;
4162 struct iommu_group *group;
4163 int ret;
4164
4165 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4166 pdev->bus->number, pdev->devfn))
4167 return -ENODEV;
4168
4169 bridge = pci_find_upstream_pcie_bridge(pdev);
4170 if (bridge) {
4171 if (pci_is_pcie(bridge))
4172 dma_pdev = pci_get_domain_bus_and_slot(
4173 pci_domain_nr(pdev->bus),
4174 bridge->subordinate->number, 0);
4175 if (!dma_pdev)
4176 dma_pdev = pci_dev_get(bridge);
4177 } else
4178 dma_pdev = pci_dev_get(pdev);
4179
4180
4181 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4182
4183
4184
4185
4186
4187
4188 if (dma_pdev->multifunction &&
4189 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4190 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4191
4192 for (i = 0; i < 8; i++) {
4193 struct pci_dev *tmp;
4194
4195 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4196 if (!tmp)
4197 continue;
4198
4199 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4200 swap_pci_ref(&dma_pdev, tmp);
4201 break;
4202 }
4203 pci_dev_put(tmp);
4204 }
4205 }
4206
4207
4208
4209
4210
4211
4212 while (!pci_is_root_bus(dma_pdev->bus)) {
4213 struct pci_bus *bus = dma_pdev->bus;
4214
4215 while (!bus->self) {
4216 if (!pci_is_root_bus(bus))
4217 bus = bus->parent;
4218 else
4219 goto root_bus;
4220 }
4221
4222 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
4223 break;
4224
4225 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
4226 }
4227
4228root_bus:
4229 group = iommu_group_get(&dma_pdev->dev);
4230 pci_dev_put(dma_pdev);
4231 if (!group) {
4232 group = iommu_group_alloc();
4233 if (IS_ERR(group))
4234 return PTR_ERR(group);
4235 }
4236
4237 ret = iommu_group_add_device(group, dev);
4238
4239 iommu_group_put(group);
4240 return ret;
4241}
4242
4243static void intel_iommu_remove_device(struct device *dev)
4244{
4245 iommu_group_remove_device(dev);
4246}
4247
4248static struct iommu_ops intel_iommu_ops = {
4249 .domain_init = intel_iommu_domain_init,
4250 .domain_destroy = intel_iommu_domain_destroy,
4251 .attach_dev = intel_iommu_attach_device,
4252 .detach_dev = intel_iommu_detach_device,
4253 .map = intel_iommu_map,
4254 .unmap = intel_iommu_unmap,
4255 .iova_to_phys = intel_iommu_iova_to_phys,
4256 .domain_has_cap = intel_iommu_domain_has_cap,
4257 .add_device = intel_iommu_add_device,
4258 .remove_device = intel_iommu_remove_device,
4259 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4260};
4261
4262static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4263{
4264
4265 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4266 dmar_map_gfx = 0;
4267}
4268
4269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4270DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4272DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4273DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4274DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4275DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4276
4277static void quirk_iommu_rwbf(struct pci_dev *dev)
4278{
4279
4280
4281
4282
4283 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4284 rwbf_quirk = 1;
4285}
4286
4287DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4288DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4289DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4290DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4291DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4292DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4293DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4294
4295#define GGC 0x52
4296#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4297#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4298#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4299#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4300#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4301#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4302#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4303#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4304
4305static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4306{
4307 unsigned short ggc;
4308
4309 if (pci_read_config_word(dev, GGC, &ggc))
4310 return;
4311
4312 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4313 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4314 dmar_map_gfx = 0;
4315 } else if (dmar_map_gfx) {
4316
4317 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4318 intel_iommu_strict = 1;
4319 }
4320}
4321DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4322DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4323DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4324DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4325
4326
4327
4328
4329
4330
4331
4332
4333static void __init check_tylersburg_isoch(void)
4334{
4335 struct pci_dev *pdev;
4336 uint32_t vtisochctrl;
4337
4338
4339 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4340 if (!pdev)
4341 return;
4342 pci_dev_put(pdev);
4343
4344
4345
4346
4347 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4348 if (!pdev)
4349 return;
4350
4351 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4352 pci_dev_put(pdev);
4353 return;
4354 }
4355
4356 pci_dev_put(pdev);
4357
4358
4359 if (vtisochctrl & 1)
4360 return;
4361
4362
4363 vtisochctrl &= 0x1c;
4364
4365
4366 if (vtisochctrl == 0x10)
4367 return;
4368
4369
4370 if (!vtisochctrl) {
4371 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4372 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4373 dmi_get_system_info(DMI_BIOS_VENDOR),
4374 dmi_get_system_info(DMI_BIOS_VERSION),
4375 dmi_get_system_info(DMI_PRODUCT_VERSION));
4376 iommu_identity_mapping |= IDENTMAP_AZALIA;
4377 return;
4378 }
4379
4380 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4381 vtisochctrl);
4382}
4383