1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
26#include <linux/debugfs.h>
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
35#include <linux/timer.h>
36#include <linux/iova.h>
37#include <linux/iommu.h>
38#include <linux/intel-iommu.h>
39#include <linux/sysdev.h>
40#include <linux/tboot.h>
41#include <linux/dmi.h>
42#include <asm/cacheflush.h>
43#include <asm/iommu.h>
44#include "pci.h"
45
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
59#define MAX_AGAW_WIDTH 64
60
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64
65
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
69
70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
73
74
75
76
77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
78{
79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
80}
81
82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
83{
84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
85}
86static inline unsigned long page_to_dma_pfn(struct page *pg)
87{
88 return mm_to_dma_pfn(page_to_pfn(pg));
89}
90static inline unsigned long virt_to_dma_pfn(void *p)
91{
92 return page_to_dma_pfn(virt_to_page(p));
93}
94
95
96static struct intel_iommu **g_iommus;
97
98static void __init check_tylersburg_isoch(void);
99static int rwbf_quirk;
100
101
102
103
104
105
106
107struct root_entry {
108 u64 val;
109 u64 rsvd1;
110};
111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
112static inline bool root_present(struct root_entry *root)
113{
114 return (root->val & 1);
115}
116static inline void set_root_present(struct root_entry *root)
117{
118 root->val |= 1;
119}
120static inline void set_root_value(struct root_entry *root, unsigned long value)
121{
122 root->val |= value & VTD_PAGE_MASK;
123}
124
125static inline struct context_entry *
126get_context_addr_from_root(struct root_entry *root)
127{
128 return (struct context_entry *)
129 (root_present(root)?phys_to_virt(
130 root->val & VTD_PAGE_MASK) :
131 NULL);
132}
133
134
135
136
137
138
139
140
141
142
143
144
145struct context_entry {
146 u64 lo;
147 u64 hi;
148};
149
150static inline bool context_present(struct context_entry *context)
151{
152 return (context->lo & 1);
153}
154static inline void context_set_present(struct context_entry *context)
155{
156 context->lo |= 1;
157}
158
159static inline void context_set_fault_enable(struct context_entry *context)
160{
161 context->lo &= (((u64)-1) << 2) | 1;
162}
163
164static inline void context_set_translation_type(struct context_entry *context,
165 unsigned long value)
166{
167 context->lo &= (((u64)-1) << 4) | 3;
168 context->lo |= (value & 3) << 2;
169}
170
171static inline void context_set_address_root(struct context_entry *context,
172 unsigned long value)
173{
174 context->lo |= value & VTD_PAGE_MASK;
175}
176
177static inline void context_set_address_width(struct context_entry *context,
178 unsigned long value)
179{
180 context->hi |= value & 7;
181}
182
183static inline void context_set_domain_id(struct context_entry *context,
184 unsigned long value)
185{
186 context->hi |= (value & ((1 << 16) - 1)) << 8;
187}
188
189static inline void context_clear_entry(struct context_entry *context)
190{
191 context->lo = 0;
192 context->hi = 0;
193}
194
195
196
197
198
199
200
201
202
203
204struct dma_pte {
205 u64 val;
206};
207
208static inline void dma_clear_pte(struct dma_pte *pte)
209{
210 pte->val = 0;
211}
212
213static inline void dma_set_pte_readable(struct dma_pte *pte)
214{
215 pte->val |= DMA_PTE_READ;
216}
217
218static inline void dma_set_pte_writable(struct dma_pte *pte)
219{
220 pte->val |= DMA_PTE_WRITE;
221}
222
223static inline void dma_set_pte_snp(struct dma_pte *pte)
224{
225 pte->val |= DMA_PTE_SNP;
226}
227
228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
229{
230 pte->val = (pte->val & ~3) | (prot & 3);
231}
232
233static inline u64 dma_pte_addr(struct dma_pte *pte)
234{
235#ifdef CONFIG_64BIT
236 return pte->val & VTD_PAGE_MASK;
237#else
238
239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
240#endif
241}
242
243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
244{
245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
246}
247
248static inline bool dma_pte_present(struct dma_pte *pte)
249{
250 return (pte->val & 3) != 0;
251}
252
253static inline int first_pte_in_page(struct dma_pte *pte)
254{
255 return !((unsigned long)pte & ~VTD_PAGE_MASK);
256}
257
258
259
260
261
262
263
264static struct dmar_domain *si_domain;
265static int hw_pass_through = 1;
266
267
268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
269
270
271
272
273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
274
275
276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
277
278struct dmar_domain {
279 int id;
280 unsigned long iommu_bmp;
281
282 struct list_head devices;
283 struct iova_domain iovad;
284
285 struct dma_pte *pgd;
286 int gaw;
287
288
289 int agaw;
290
291 int flags;
292
293 int iommu_coherency;
294 int iommu_snooping;
295 int iommu_count;
296 spinlock_t iommu_lock;
297 u64 max_addr;
298};
299
300
301struct device_domain_info {
302 struct list_head link;
303 struct list_head global;
304 int segment;
305 u8 bus;
306 u8 devfn;
307 struct pci_dev *dev;
308 struct intel_iommu *iommu;
309 struct dmar_domain *domain;
310};
311
312static void flush_unmaps_timeout(unsigned long data);
313
314DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
315
316#define HIGH_WATER_MARK 250
317struct deferred_flush_tables {
318 int next;
319 struct iova *iova[HIGH_WATER_MARK];
320 struct dmar_domain *domain[HIGH_WATER_MARK];
321};
322
323static struct deferred_flush_tables *deferred_flush;
324
325
326static int g_num_of_iommus;
327
328static DEFINE_SPINLOCK(async_umap_flush_lock);
329static LIST_HEAD(unmaps_to_do);
330
331static int timer_on;
332static long list_size;
333
334static void domain_remove_dev_info(struct dmar_domain *domain);
335
336#ifdef CONFIG_DMAR_DEFAULT_ON
337int dmar_disabled = 0;
338#else
339int dmar_disabled = 1;
340#endif
341
342static int __initdata dmar_map_gfx = 1;
343static int dmar_forcedac;
344static int intel_iommu_strict;
345
346#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
347static DEFINE_SPINLOCK(device_domain_lock);
348static LIST_HEAD(device_domain_list);
349
350static struct iommu_ops intel_iommu_ops;
351
352static int __init intel_iommu_setup(char *str)
353{
354 if (!str)
355 return -EINVAL;
356 while (*str) {
357 if (!strncmp(str, "on", 2)) {
358 dmar_disabled = 0;
359 printk(KERN_INFO "Intel-IOMMU: enabled\n");
360 } else if (!strncmp(str, "off", 3)) {
361 dmar_disabled = 1;
362 printk(KERN_INFO "Intel-IOMMU: disabled\n");
363 } else if (!strncmp(str, "igfx_off", 8)) {
364 dmar_map_gfx = 0;
365 printk(KERN_INFO
366 "Intel-IOMMU: disable GFX device mapping\n");
367 } else if (!strncmp(str, "forcedac", 8)) {
368 printk(KERN_INFO
369 "Intel-IOMMU: Forcing DAC for PCI devices\n");
370 dmar_forcedac = 1;
371 } else if (!strncmp(str, "strict", 6)) {
372 printk(KERN_INFO
373 "Intel-IOMMU: disable batched IOTLB flush\n");
374 intel_iommu_strict = 1;
375 }
376
377 str += strcspn(str, ",");
378 while (*str == ',')
379 str++;
380 }
381 return 0;
382}
383__setup("intel_iommu=", intel_iommu_setup);
384
385static struct kmem_cache *iommu_domain_cache;
386static struct kmem_cache *iommu_devinfo_cache;
387static struct kmem_cache *iommu_iova_cache;
388
389static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
390{
391 unsigned int flags;
392 void *vaddr;
393
394
395 flags = current->flags & PF_MEMALLOC;
396 current->flags |= PF_MEMALLOC;
397 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
398 current->flags &= (~PF_MEMALLOC | flags);
399 return vaddr;
400}
401
402
403static inline void *alloc_pgtable_page(void)
404{
405 unsigned int flags;
406 void *vaddr;
407
408
409 flags = current->flags & PF_MEMALLOC;
410 current->flags |= PF_MEMALLOC;
411 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
412 current->flags &= (~PF_MEMALLOC | flags);
413 return vaddr;
414}
415
416static inline void free_pgtable_page(void *vaddr)
417{
418 free_page((unsigned long)vaddr);
419}
420
421static inline void *alloc_domain_mem(void)
422{
423 return iommu_kmem_cache_alloc(iommu_domain_cache);
424}
425
426static void free_domain_mem(void *vaddr)
427{
428 kmem_cache_free(iommu_domain_cache, vaddr);
429}
430
431static inline void * alloc_devinfo_mem(void)
432{
433 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
434}
435
436static inline void free_devinfo_mem(void *vaddr)
437{
438 kmem_cache_free(iommu_devinfo_cache, vaddr);
439}
440
441struct iova *alloc_iova_mem(void)
442{
443 return iommu_kmem_cache_alloc(iommu_iova_cache);
444}
445
446void free_iova_mem(struct iova *iova)
447{
448 kmem_cache_free(iommu_iova_cache, iova);
449}
450
451
452static inline int width_to_agaw(int width);
453
454static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
455{
456 unsigned long sagaw;
457 int agaw = -1;
458
459 sagaw = cap_sagaw(iommu->cap);
460 for (agaw = width_to_agaw(max_gaw);
461 agaw >= 0; agaw--) {
462 if (test_bit(agaw, &sagaw))
463 break;
464 }
465
466 return agaw;
467}
468
469
470
471
472int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
473{
474 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
475}
476
477
478
479
480
481
482int iommu_calculate_agaw(struct intel_iommu *iommu)
483{
484 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
485}
486
487
488static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
489{
490 int iommu_id;
491
492
493 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
494 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
495
496 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
497 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
498 return NULL;
499
500 return g_iommus[iommu_id];
501}
502
503static void domain_update_iommu_coherency(struct dmar_domain *domain)
504{
505 int i;
506
507 domain->iommu_coherency = 1;
508
509 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
510 for (; i < g_num_of_iommus; ) {
511 if (!ecap_coherent(g_iommus[i]->ecap)) {
512 domain->iommu_coherency = 0;
513 break;
514 }
515 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
516 }
517}
518
519static void domain_update_iommu_snooping(struct dmar_domain *domain)
520{
521 int i;
522
523 domain->iommu_snooping = 1;
524
525 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
526 for (; i < g_num_of_iommus; ) {
527 if (!ecap_sc_support(g_iommus[i]->ecap)) {
528 domain->iommu_snooping = 0;
529 break;
530 }
531 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
532 }
533}
534
535
536static void domain_update_iommu_cap(struct dmar_domain *domain)
537{
538 domain_update_iommu_coherency(domain);
539 domain_update_iommu_snooping(domain);
540}
541
542static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
543{
544 struct dmar_drhd_unit *drhd = NULL;
545 int i;
546
547 for_each_drhd_unit(drhd) {
548 if (drhd->ignored)
549 continue;
550 if (segment != drhd->segment)
551 continue;
552
553 for (i = 0; i < drhd->devices_cnt; i++) {
554 if (drhd->devices[i] &&
555 drhd->devices[i]->bus->number == bus &&
556 drhd->devices[i]->devfn == devfn)
557 return drhd->iommu;
558 if (drhd->devices[i] &&
559 drhd->devices[i]->subordinate &&
560 drhd->devices[i]->subordinate->number <= bus &&
561 drhd->devices[i]->subordinate->subordinate >= bus)
562 return drhd->iommu;
563 }
564
565 if (drhd->include_all)
566 return drhd->iommu;
567 }
568
569 return NULL;
570}
571
572static void domain_flush_cache(struct dmar_domain *domain,
573 void *addr, int size)
574{
575 if (!domain->iommu_coherency)
576 clflush_cache_range(addr, size);
577}
578
579
580static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
581 u8 bus, u8 devfn)
582{
583 struct root_entry *root;
584 struct context_entry *context;
585 unsigned long phy_addr;
586 unsigned long flags;
587
588 spin_lock_irqsave(&iommu->lock, flags);
589 root = &iommu->root_entry[bus];
590 context = get_context_addr_from_root(root);
591 if (!context) {
592 context = (struct context_entry *)alloc_pgtable_page();
593 if (!context) {
594 spin_unlock_irqrestore(&iommu->lock, flags);
595 return NULL;
596 }
597 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
598 phy_addr = virt_to_phys((void *)context);
599 set_root_value(root, phy_addr);
600 set_root_present(root);
601 __iommu_flush_cache(iommu, root, sizeof(*root));
602 }
603 spin_unlock_irqrestore(&iommu->lock, flags);
604 return &context[devfn];
605}
606
607static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
608{
609 struct root_entry *root;
610 struct context_entry *context;
611 int ret;
612 unsigned long flags;
613
614 spin_lock_irqsave(&iommu->lock, flags);
615 root = &iommu->root_entry[bus];
616 context = get_context_addr_from_root(root);
617 if (!context) {
618 ret = 0;
619 goto out;
620 }
621 ret = context_present(&context[devfn]);
622out:
623 spin_unlock_irqrestore(&iommu->lock, flags);
624 return ret;
625}
626
627static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
628{
629 struct root_entry *root;
630 struct context_entry *context;
631 unsigned long flags;
632
633 spin_lock_irqsave(&iommu->lock, flags);
634 root = &iommu->root_entry[bus];
635 context = get_context_addr_from_root(root);
636 if (context) {
637 context_clear_entry(&context[devfn]);
638 __iommu_flush_cache(iommu, &context[devfn], \
639 sizeof(*context));
640 }
641 spin_unlock_irqrestore(&iommu->lock, flags);
642}
643
644static void free_context_table(struct intel_iommu *iommu)
645{
646 struct root_entry *root;
647 int i;
648 unsigned long flags;
649 struct context_entry *context;
650
651 spin_lock_irqsave(&iommu->lock, flags);
652 if (!iommu->root_entry) {
653 goto out;
654 }
655 for (i = 0; i < ROOT_ENTRY_NR; i++) {
656 root = &iommu->root_entry[i];
657 context = get_context_addr_from_root(root);
658 if (context)
659 free_pgtable_page(context);
660 }
661 free_pgtable_page(iommu->root_entry);
662 iommu->root_entry = NULL;
663out:
664 spin_unlock_irqrestore(&iommu->lock, flags);
665}
666
667
668#define LEVEL_STRIDE (9)
669#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
670
671static inline int agaw_to_level(int agaw)
672{
673 return agaw + 2;
674}
675
676static inline int agaw_to_width(int agaw)
677{
678 return 30 + agaw * LEVEL_STRIDE;
679
680}
681
682static inline int width_to_agaw(int width)
683{
684 return (width - 30) / LEVEL_STRIDE;
685}
686
687static inline unsigned int level_to_offset_bits(int level)
688{
689 return (level - 1) * LEVEL_STRIDE;
690}
691
692static inline int pfn_level_offset(unsigned long pfn, int level)
693{
694 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
695}
696
697static inline unsigned long level_mask(int level)
698{
699 return -1UL << level_to_offset_bits(level);
700}
701
702static inline unsigned long level_size(int level)
703{
704 return 1UL << level_to_offset_bits(level);
705}
706
707static inline unsigned long align_to_level(unsigned long pfn, int level)
708{
709 return (pfn + level_size(level) - 1) & level_mask(level);
710}
711
712static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
713 unsigned long pfn)
714{
715 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
716 struct dma_pte *parent, *pte = NULL;
717 int level = agaw_to_level(domain->agaw);
718 int offset;
719
720 BUG_ON(!domain->pgd);
721 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
722 parent = domain->pgd;
723
724 while (level > 0) {
725 void *tmp_page;
726
727 offset = pfn_level_offset(pfn, level);
728 pte = &parent[offset];
729 if (level == 1)
730 break;
731
732 if (!dma_pte_present(pte)) {
733 uint64_t pteval;
734
735 tmp_page = alloc_pgtable_page();
736
737 if (!tmp_page)
738 return NULL;
739
740 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
741 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
742 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
743
744 free_pgtable_page(tmp_page);
745 } else {
746 dma_pte_addr(pte);
747 domain_flush_cache(domain, pte, sizeof(*pte));
748 }
749 }
750 parent = phys_to_virt(dma_pte_addr(pte));
751 level--;
752 }
753
754 return pte;
755}
756
757
758static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
759 unsigned long pfn,
760 int level)
761{
762 struct dma_pte *parent, *pte = NULL;
763 int total = agaw_to_level(domain->agaw);
764 int offset;
765
766 parent = domain->pgd;
767 while (level <= total) {
768 offset = pfn_level_offset(pfn, total);
769 pte = &parent[offset];
770 if (level == total)
771 return pte;
772
773 if (!dma_pte_present(pte))
774 break;
775 parent = phys_to_virt(dma_pte_addr(pte));
776 total--;
777 }
778 return NULL;
779}
780
781
782static void dma_pte_clear_range(struct dmar_domain *domain,
783 unsigned long start_pfn,
784 unsigned long last_pfn)
785{
786 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
787 struct dma_pte *first_pte, *pte;
788
789 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
790 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
791 BUG_ON(start_pfn > last_pfn);
792
793
794 do {
795 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
796 if (!pte) {
797 start_pfn = align_to_level(start_pfn + 1, 2);
798 continue;
799 }
800 do {
801 dma_clear_pte(pte);
802 start_pfn++;
803 pte++;
804 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
805
806 domain_flush_cache(domain, first_pte,
807 (void *)pte - (void *)first_pte);
808
809 } while (start_pfn && start_pfn <= last_pfn);
810}
811
812
813static void dma_pte_free_pagetable(struct dmar_domain *domain,
814 unsigned long start_pfn,
815 unsigned long last_pfn)
816{
817 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
818 struct dma_pte *first_pte, *pte;
819 int total = agaw_to_level(domain->agaw);
820 int level;
821 unsigned long tmp;
822
823 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
824 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
825 BUG_ON(start_pfn > last_pfn);
826
827
828 level = 2;
829 while (level <= total) {
830 tmp = align_to_level(start_pfn, level);
831
832
833 if (tmp + level_size(level) - 1 > last_pfn)
834 return;
835
836 do {
837 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
838 if (!pte) {
839 tmp = align_to_level(tmp + 1, level + 1);
840 continue;
841 }
842 do {
843 if (dma_pte_present(pte)) {
844 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
845 dma_clear_pte(pte);
846 }
847 pte++;
848 tmp += level_size(level);
849 } while (!first_pte_in_page(pte) &&
850 tmp + level_size(level) - 1 <= last_pfn);
851
852 domain_flush_cache(domain, first_pte,
853 (void *)pte - (void *)first_pte);
854
855 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
856 level++;
857 }
858
859 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
860 free_pgtable_page(domain->pgd);
861 domain->pgd = NULL;
862 }
863}
864
865
866static int iommu_alloc_root_entry(struct intel_iommu *iommu)
867{
868 struct root_entry *root;
869 unsigned long flags;
870
871 root = (struct root_entry *)alloc_pgtable_page();
872 if (!root)
873 return -ENOMEM;
874
875 __iommu_flush_cache(iommu, root, ROOT_SIZE);
876
877 spin_lock_irqsave(&iommu->lock, flags);
878 iommu->root_entry = root;
879 spin_unlock_irqrestore(&iommu->lock, flags);
880
881 return 0;
882}
883
884static void iommu_set_root_entry(struct intel_iommu *iommu)
885{
886 void *addr;
887 u32 sts;
888 unsigned long flag;
889
890 addr = iommu->root_entry;
891
892 spin_lock_irqsave(&iommu->register_lock, flag);
893 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
894
895 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
896
897
898 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
899 readl, (sts & DMA_GSTS_RTPS), sts);
900
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
902}
903
904static void iommu_flush_write_buffer(struct intel_iommu *iommu)
905{
906 u32 val;
907 unsigned long flag;
908
909 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
910 return;
911
912 spin_lock_irqsave(&iommu->register_lock, flag);
913 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
914
915
916 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
917 readl, (!(val & DMA_GSTS_WBFS)), val);
918
919 spin_unlock_irqrestore(&iommu->register_lock, flag);
920}
921
922
923static void __iommu_flush_context(struct intel_iommu *iommu,
924 u16 did, u16 source_id, u8 function_mask,
925 u64 type)
926{
927 u64 val = 0;
928 unsigned long flag;
929
930 switch (type) {
931 case DMA_CCMD_GLOBAL_INVL:
932 val = DMA_CCMD_GLOBAL_INVL;
933 break;
934 case DMA_CCMD_DOMAIN_INVL:
935 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
936 break;
937 case DMA_CCMD_DEVICE_INVL:
938 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
939 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
940 break;
941 default:
942 BUG();
943 }
944 val |= DMA_CCMD_ICC;
945
946 spin_lock_irqsave(&iommu->register_lock, flag);
947 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
948
949
950 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
951 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
952
953 spin_unlock_irqrestore(&iommu->register_lock, flag);
954}
955
956
957static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
958 u64 addr, unsigned int size_order, u64 type)
959{
960 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
961 u64 val = 0, val_iva = 0;
962 unsigned long flag;
963
964 switch (type) {
965 case DMA_TLB_GLOBAL_FLUSH:
966
967 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
968 break;
969 case DMA_TLB_DSI_FLUSH:
970 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
971 break;
972 case DMA_TLB_PSI_FLUSH:
973 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
974
975 val_iva = size_order | addr;
976 break;
977 default:
978 BUG();
979 }
980
981#if 0
982
983
984
985
986 if (cap_read_drain(iommu->cap))
987 val |= DMA_TLB_READ_DRAIN;
988#endif
989 if (cap_write_drain(iommu->cap))
990 val |= DMA_TLB_WRITE_DRAIN;
991
992 spin_lock_irqsave(&iommu->register_lock, flag);
993
994 if (val_iva)
995 dmar_writeq(iommu->reg + tlb_offset, val_iva);
996 dmar_writeq(iommu->reg + tlb_offset + 8, val);
997
998
999 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1000 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1001
1002 spin_unlock_irqrestore(&iommu->register_lock, flag);
1003
1004
1005 if (DMA_TLB_IAIG(val) == 0)
1006 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1007 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1008 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1009 (unsigned long long)DMA_TLB_IIRG(type),
1010 (unsigned long long)DMA_TLB_IAIG(val));
1011}
1012
1013static struct device_domain_info *iommu_support_dev_iotlb(
1014 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1015{
1016 int found = 0;
1017 unsigned long flags;
1018 struct device_domain_info *info;
1019 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1020
1021 if (!ecap_dev_iotlb_support(iommu->ecap))
1022 return NULL;
1023
1024 if (!iommu->qi)
1025 return NULL;
1026
1027 spin_lock_irqsave(&device_domain_lock, flags);
1028 list_for_each_entry(info, &domain->devices, link)
1029 if (info->bus == bus && info->devfn == devfn) {
1030 found = 1;
1031 break;
1032 }
1033 spin_unlock_irqrestore(&device_domain_lock, flags);
1034
1035 if (!found || !info->dev)
1036 return NULL;
1037
1038 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1039 return NULL;
1040
1041 if (!dmar_find_matched_atsr_unit(info->dev))
1042 return NULL;
1043
1044 info->iommu = iommu;
1045
1046 return info;
1047}
1048
1049static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1050{
1051 if (!info)
1052 return;
1053
1054 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1055}
1056
1057static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1058{
1059 if (!info->dev || !pci_ats_enabled(info->dev))
1060 return;
1061
1062 pci_disable_ats(info->dev);
1063}
1064
1065static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1066 u64 addr, unsigned mask)
1067{
1068 u16 sid, qdep;
1069 unsigned long flags;
1070 struct device_domain_info *info;
1071
1072 spin_lock_irqsave(&device_domain_lock, flags);
1073 list_for_each_entry(info, &domain->devices, link) {
1074 if (!info->dev || !pci_ats_enabled(info->dev))
1075 continue;
1076
1077 sid = info->bus << 8 | info->devfn;
1078 qdep = pci_ats_queue_depth(info->dev);
1079 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1080 }
1081 spin_unlock_irqrestore(&device_domain_lock, flags);
1082}
1083
1084static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1085 unsigned long pfn, unsigned int pages)
1086{
1087 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1088 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1089
1090 BUG_ON(pages == 0);
1091
1092
1093
1094
1095
1096
1097
1098 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1099 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1100 DMA_TLB_DSI_FLUSH);
1101 else
1102 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1103 DMA_TLB_PSI_FLUSH);
1104
1105
1106
1107
1108
1109 if (!cap_caching_mode(iommu->cap) || did)
1110 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1111}
1112
1113static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1114{
1115 u32 pmen;
1116 unsigned long flags;
1117
1118 spin_lock_irqsave(&iommu->register_lock, flags);
1119 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1120 pmen &= ~DMA_PMEN_EPM;
1121 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1122
1123
1124 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1125 readl, !(pmen & DMA_PMEN_PRS), pmen);
1126
1127 spin_unlock_irqrestore(&iommu->register_lock, flags);
1128}
1129
1130static int iommu_enable_translation(struct intel_iommu *iommu)
1131{
1132 u32 sts;
1133 unsigned long flags;
1134
1135 spin_lock_irqsave(&iommu->register_lock, flags);
1136 iommu->gcmd |= DMA_GCMD_TE;
1137 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1138
1139
1140 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1141 readl, (sts & DMA_GSTS_TES), sts);
1142
1143 spin_unlock_irqrestore(&iommu->register_lock, flags);
1144 return 0;
1145}
1146
1147static int iommu_disable_translation(struct intel_iommu *iommu)
1148{
1149 u32 sts;
1150 unsigned long flag;
1151
1152 spin_lock_irqsave(&iommu->register_lock, flag);
1153 iommu->gcmd &= ~DMA_GCMD_TE;
1154 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1155
1156
1157 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1158 readl, (!(sts & DMA_GSTS_TES)), sts);
1159
1160 spin_unlock_irqrestore(&iommu->register_lock, flag);
1161 return 0;
1162}
1163
1164
1165static int iommu_init_domains(struct intel_iommu *iommu)
1166{
1167 unsigned long ndomains;
1168 unsigned long nlongs;
1169
1170 ndomains = cap_ndoms(iommu->cap);
1171 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1172 nlongs = BITS_TO_LONGS(ndomains);
1173
1174 spin_lock_init(&iommu->lock);
1175
1176
1177
1178
1179 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1180 if (!iommu->domain_ids) {
1181 printk(KERN_ERR "Allocating domain id array failed\n");
1182 return -ENOMEM;
1183 }
1184 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1185 GFP_KERNEL);
1186 if (!iommu->domains) {
1187 printk(KERN_ERR "Allocating domain array failed\n");
1188 return -ENOMEM;
1189 }
1190
1191
1192
1193
1194
1195 if (cap_caching_mode(iommu->cap))
1196 set_bit(0, iommu->domain_ids);
1197 return 0;
1198}
1199
1200
1201static void domain_exit(struct dmar_domain *domain);
1202static void vm_domain_exit(struct dmar_domain *domain);
1203
1204void free_dmar_iommu(struct intel_iommu *iommu)
1205{
1206 struct dmar_domain *domain;
1207 int i;
1208 unsigned long flags;
1209
1210 if ((iommu->domains) && (iommu->domain_ids)) {
1211 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1212 for (; i < cap_ndoms(iommu->cap); ) {
1213 domain = iommu->domains[i];
1214 clear_bit(i, iommu->domain_ids);
1215
1216 spin_lock_irqsave(&domain->iommu_lock, flags);
1217 if (--domain->iommu_count == 0) {
1218 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1219 vm_domain_exit(domain);
1220 else
1221 domain_exit(domain);
1222 }
1223 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1224
1225 i = find_next_bit(iommu->domain_ids,
1226 cap_ndoms(iommu->cap), i+1);
1227 }
1228 }
1229
1230 if (iommu->gcmd & DMA_GCMD_TE)
1231 iommu_disable_translation(iommu);
1232
1233 if (iommu->irq) {
1234 set_irq_data(iommu->irq, NULL);
1235
1236 free_irq(iommu->irq, iommu);
1237 destroy_irq(iommu->irq);
1238 }
1239
1240 kfree(iommu->domains);
1241 kfree(iommu->domain_ids);
1242
1243 g_iommus[iommu->seq_id] = NULL;
1244
1245
1246 for (i = 0; i < g_num_of_iommus; i++) {
1247 if (g_iommus[i])
1248 break;
1249 }
1250
1251 if (i == g_num_of_iommus)
1252 kfree(g_iommus);
1253
1254
1255 free_context_table(iommu);
1256}
1257
1258static struct dmar_domain *alloc_domain(void)
1259{
1260 struct dmar_domain *domain;
1261
1262 domain = alloc_domain_mem();
1263 if (!domain)
1264 return NULL;
1265
1266 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1267 domain->flags = 0;
1268
1269 return domain;
1270}
1271
1272static int iommu_attach_domain(struct dmar_domain *domain,
1273 struct intel_iommu *iommu)
1274{
1275 int num;
1276 unsigned long ndomains;
1277 unsigned long flags;
1278
1279 ndomains = cap_ndoms(iommu->cap);
1280
1281 spin_lock_irqsave(&iommu->lock, flags);
1282
1283 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1284 if (num >= ndomains) {
1285 spin_unlock_irqrestore(&iommu->lock, flags);
1286 printk(KERN_ERR "IOMMU: no free domain ids\n");
1287 return -ENOMEM;
1288 }
1289
1290 domain->id = num;
1291 set_bit(num, iommu->domain_ids);
1292 set_bit(iommu->seq_id, &domain->iommu_bmp);
1293 iommu->domains[num] = domain;
1294 spin_unlock_irqrestore(&iommu->lock, flags);
1295
1296 return 0;
1297}
1298
1299static void iommu_detach_domain(struct dmar_domain *domain,
1300 struct intel_iommu *iommu)
1301{
1302 unsigned long flags;
1303 int num, ndomains;
1304 int found = 0;
1305
1306 spin_lock_irqsave(&iommu->lock, flags);
1307 ndomains = cap_ndoms(iommu->cap);
1308 num = find_first_bit(iommu->domain_ids, ndomains);
1309 for (; num < ndomains; ) {
1310 if (iommu->domains[num] == domain) {
1311 found = 1;
1312 break;
1313 }
1314 num = find_next_bit(iommu->domain_ids,
1315 cap_ndoms(iommu->cap), num+1);
1316 }
1317
1318 if (found) {
1319 clear_bit(num, iommu->domain_ids);
1320 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1321 iommu->domains[num] = NULL;
1322 }
1323 spin_unlock_irqrestore(&iommu->lock, flags);
1324}
1325
1326static struct iova_domain reserved_iova_list;
1327static struct lock_class_key reserved_rbtree_key;
1328
1329static void dmar_init_reserved_ranges(void)
1330{
1331 struct pci_dev *pdev = NULL;
1332 struct iova *iova;
1333 int i;
1334
1335 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1336
1337 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1338 &reserved_rbtree_key);
1339
1340
1341 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1342 IOVA_PFN(IOAPIC_RANGE_END));
1343 if (!iova)
1344 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1345
1346
1347 for_each_pci_dev(pdev) {
1348 struct resource *r;
1349
1350 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1351 r = &pdev->resource[i];
1352 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1353 continue;
1354 iova = reserve_iova(&reserved_iova_list,
1355 IOVA_PFN(r->start),
1356 IOVA_PFN(r->end));
1357 if (!iova)
1358 printk(KERN_ERR "Reserve iova failed\n");
1359 }
1360 }
1361
1362}
1363
1364static void domain_reserve_special_ranges(struct dmar_domain *domain)
1365{
1366 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1367}
1368
1369static inline int guestwidth_to_adjustwidth(int gaw)
1370{
1371 int agaw;
1372 int r = (gaw - 12) % 9;
1373
1374 if (r == 0)
1375 agaw = gaw;
1376 else
1377 agaw = gaw + 9 - r;
1378 if (agaw > 64)
1379 agaw = 64;
1380 return agaw;
1381}
1382
1383static int domain_init(struct dmar_domain *domain, int guest_width)
1384{
1385 struct intel_iommu *iommu;
1386 int adjust_width, agaw;
1387 unsigned long sagaw;
1388
1389 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1390 spin_lock_init(&domain->iommu_lock);
1391
1392 domain_reserve_special_ranges(domain);
1393
1394
1395 iommu = domain_get_iommu(domain);
1396 if (guest_width > cap_mgaw(iommu->cap))
1397 guest_width = cap_mgaw(iommu->cap);
1398 domain->gaw = guest_width;
1399 adjust_width = guestwidth_to_adjustwidth(guest_width);
1400 agaw = width_to_agaw(adjust_width);
1401 sagaw = cap_sagaw(iommu->cap);
1402 if (!test_bit(agaw, &sagaw)) {
1403
1404 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1405 agaw = find_next_bit(&sagaw, 5, agaw);
1406 if (agaw >= 5)
1407 return -ENODEV;
1408 }
1409 domain->agaw = agaw;
1410 INIT_LIST_HEAD(&domain->devices);
1411
1412 if (ecap_coherent(iommu->ecap))
1413 domain->iommu_coherency = 1;
1414 else
1415 domain->iommu_coherency = 0;
1416
1417 if (ecap_sc_support(iommu->ecap))
1418 domain->iommu_snooping = 1;
1419 else
1420 domain->iommu_snooping = 0;
1421
1422 domain->iommu_count = 1;
1423
1424
1425 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1426 if (!domain->pgd)
1427 return -ENOMEM;
1428 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1429 return 0;
1430}
1431
1432static void domain_exit(struct dmar_domain *domain)
1433{
1434 struct dmar_drhd_unit *drhd;
1435 struct intel_iommu *iommu;
1436
1437
1438 if (!domain)
1439 return;
1440
1441 domain_remove_dev_info(domain);
1442
1443 put_iova_domain(&domain->iovad);
1444
1445
1446 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1447
1448
1449 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1450
1451 for_each_active_iommu(iommu, drhd)
1452 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1453 iommu_detach_domain(domain, iommu);
1454
1455 free_domain_mem(domain);
1456}
1457
1458static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1459 u8 bus, u8 devfn, int translation)
1460{
1461 struct context_entry *context;
1462 unsigned long flags;
1463 struct intel_iommu *iommu;
1464 struct dma_pte *pgd;
1465 unsigned long num;
1466 unsigned long ndomains;
1467 int id;
1468 int agaw;
1469 struct device_domain_info *info = NULL;
1470
1471 pr_debug("Set context mapping for %02x:%02x.%d\n",
1472 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1473
1474 BUG_ON(!domain->pgd);
1475 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1476 translation != CONTEXT_TT_MULTI_LEVEL);
1477
1478 iommu = device_to_iommu(segment, bus, devfn);
1479 if (!iommu)
1480 return -ENODEV;
1481
1482 context = device_to_context_entry(iommu, bus, devfn);
1483 if (!context)
1484 return -ENOMEM;
1485 spin_lock_irqsave(&iommu->lock, flags);
1486 if (context_present(context)) {
1487 spin_unlock_irqrestore(&iommu->lock, flags);
1488 return 0;
1489 }
1490
1491 id = domain->id;
1492 pgd = domain->pgd;
1493
1494 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1495 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
1496 int found = 0;
1497
1498
1499 ndomains = cap_ndoms(iommu->cap);
1500 num = find_first_bit(iommu->domain_ids, ndomains);
1501 for (; num < ndomains; ) {
1502 if (iommu->domains[num] == domain) {
1503 id = num;
1504 found = 1;
1505 break;
1506 }
1507 num = find_next_bit(iommu->domain_ids,
1508 cap_ndoms(iommu->cap), num+1);
1509 }
1510
1511 if (found == 0) {
1512 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1513 if (num >= ndomains) {
1514 spin_unlock_irqrestore(&iommu->lock, flags);
1515 printk(KERN_ERR "IOMMU: no free domain ids\n");
1516 return -EFAULT;
1517 }
1518
1519 set_bit(num, iommu->domain_ids);
1520 iommu->domains[num] = domain;
1521 id = num;
1522 }
1523
1524
1525
1526
1527 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1528 pgd = phys_to_virt(dma_pte_addr(pgd));
1529 if (!dma_pte_present(pgd)) {
1530 spin_unlock_irqrestore(&iommu->lock, flags);
1531 return -ENOMEM;
1532 }
1533 }
1534 }
1535
1536 context_set_domain_id(context, id);
1537
1538 if (translation != CONTEXT_TT_PASS_THROUGH) {
1539 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1540 translation = info ? CONTEXT_TT_DEV_IOTLB :
1541 CONTEXT_TT_MULTI_LEVEL;
1542 }
1543
1544
1545
1546
1547 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1548 context_set_address_width(context, iommu->msagaw);
1549 else {
1550 context_set_address_root(context, virt_to_phys(pgd));
1551 context_set_address_width(context, iommu->agaw);
1552 }
1553
1554 context_set_translation_type(context, translation);
1555 context_set_fault_enable(context);
1556 context_set_present(context);
1557 domain_flush_cache(domain, context, sizeof(*context));
1558
1559
1560
1561
1562
1563
1564
1565 if (cap_caching_mode(iommu->cap)) {
1566 iommu->flush.flush_context(iommu, 0,
1567 (((u16)bus) << 8) | devfn,
1568 DMA_CCMD_MASK_NOBIT,
1569 DMA_CCMD_DEVICE_INVL);
1570 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
1571 } else {
1572 iommu_flush_write_buffer(iommu);
1573 }
1574 iommu_enable_dev_iotlb(info);
1575 spin_unlock_irqrestore(&iommu->lock, flags);
1576
1577 spin_lock_irqsave(&domain->iommu_lock, flags);
1578 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1579 domain->iommu_count++;
1580 domain_update_iommu_cap(domain);
1581 }
1582 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1583 return 0;
1584}
1585
1586static int
1587domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1588 int translation)
1589{
1590 int ret;
1591 struct pci_dev *tmp, *parent;
1592
1593 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
1594 pdev->bus->number, pdev->devfn,
1595 translation);
1596 if (ret)
1597 return ret;
1598
1599
1600 tmp = pci_find_upstream_pcie_bridge(pdev);
1601 if (!tmp)
1602 return 0;
1603
1604 parent = pdev->bus->self;
1605 while (parent != tmp) {
1606 ret = domain_context_mapping_one(domain,
1607 pci_domain_nr(parent->bus),
1608 parent->bus->number,
1609 parent->devfn, translation);
1610 if (ret)
1611 return ret;
1612 parent = parent->bus->self;
1613 }
1614 if (tmp->is_pcie)
1615 return domain_context_mapping_one(domain,
1616 pci_domain_nr(tmp->subordinate),
1617 tmp->subordinate->number, 0,
1618 translation);
1619 else
1620 return domain_context_mapping_one(domain,
1621 pci_domain_nr(tmp->bus),
1622 tmp->bus->number,
1623 tmp->devfn,
1624 translation);
1625}
1626
1627static int domain_context_mapped(struct pci_dev *pdev)
1628{
1629 int ret;
1630 struct pci_dev *tmp, *parent;
1631 struct intel_iommu *iommu;
1632
1633 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1634 pdev->devfn);
1635 if (!iommu)
1636 return -ENODEV;
1637
1638 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
1639 if (!ret)
1640 return ret;
1641
1642 tmp = pci_find_upstream_pcie_bridge(pdev);
1643 if (!tmp)
1644 return ret;
1645
1646 parent = pdev->bus->self;
1647 while (parent != tmp) {
1648 ret = device_context_mapped(iommu, parent->bus->number,
1649 parent->devfn);
1650 if (!ret)
1651 return ret;
1652 parent = parent->bus->self;
1653 }
1654 if (tmp->is_pcie)
1655 return device_context_mapped(iommu, tmp->subordinate->number,
1656 0);
1657 else
1658 return device_context_mapped(iommu, tmp->bus->number,
1659 tmp->devfn);
1660}
1661
1662
1663static inline unsigned long aligned_nrpages(unsigned long host_addr,
1664 size_t size)
1665{
1666 host_addr &= ~PAGE_MASK;
1667 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1668}
1669
1670static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1671 struct scatterlist *sg, unsigned long phys_pfn,
1672 unsigned long nr_pages, int prot)
1673{
1674 struct dma_pte *first_pte = NULL, *pte = NULL;
1675 phys_addr_t uninitialized_var(pteval);
1676 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1677 unsigned long sg_res;
1678
1679 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1680
1681 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1682 return -EINVAL;
1683
1684 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1685
1686 if (sg)
1687 sg_res = 0;
1688 else {
1689 sg_res = nr_pages + 1;
1690 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1691 }
1692
1693 while (nr_pages--) {
1694 uint64_t tmp;
1695
1696 if (!sg_res) {
1697 sg_res = aligned_nrpages(sg->offset, sg->length);
1698 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1699 sg->dma_length = sg->length;
1700 pteval = page_to_phys(sg_page(sg)) | prot;
1701 }
1702 if (!pte) {
1703 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1704 if (!pte)
1705 return -ENOMEM;
1706 }
1707
1708
1709
1710 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
1711 if (tmp) {
1712 static int dumps = 5;
1713 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1714 iov_pfn, tmp, (unsigned long long)pteval);
1715 if (dumps) {
1716 dumps--;
1717 debug_dma_dump_mappings(NULL);
1718 }
1719 WARN_ON(1);
1720 }
1721 pte++;
1722 if (!nr_pages || first_pte_in_page(pte)) {
1723 domain_flush_cache(domain, first_pte,
1724 (void *)pte - (void *)first_pte);
1725 pte = NULL;
1726 }
1727 iov_pfn++;
1728 pteval += VTD_PAGE_SIZE;
1729 sg_res--;
1730 if (!sg_res)
1731 sg = sg_next(sg);
1732 }
1733 return 0;
1734}
1735
1736static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1737 struct scatterlist *sg, unsigned long nr_pages,
1738 int prot)
1739{
1740 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1741}
1742
1743static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1744 unsigned long phys_pfn, unsigned long nr_pages,
1745 int prot)
1746{
1747 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
1748}
1749
1750static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1751{
1752 if (!iommu)
1753 return;
1754
1755 clear_context_table(iommu, bus, devfn);
1756 iommu->flush.flush_context(iommu, 0, 0, 0,
1757 DMA_CCMD_GLOBAL_INVL);
1758 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
1759}
1760
1761static void domain_remove_dev_info(struct dmar_domain *domain)
1762{
1763 struct device_domain_info *info;
1764 unsigned long flags;
1765 struct intel_iommu *iommu;
1766
1767 spin_lock_irqsave(&device_domain_lock, flags);
1768 while (!list_empty(&domain->devices)) {
1769 info = list_entry(domain->devices.next,
1770 struct device_domain_info, link);
1771 list_del(&info->link);
1772 list_del(&info->global);
1773 if (info->dev)
1774 info->dev->dev.archdata.iommu = NULL;
1775 spin_unlock_irqrestore(&device_domain_lock, flags);
1776
1777 iommu_disable_dev_iotlb(info);
1778 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
1779 iommu_detach_dev(iommu, info->bus, info->devfn);
1780 free_devinfo_mem(info);
1781
1782 spin_lock_irqsave(&device_domain_lock, flags);
1783 }
1784 spin_unlock_irqrestore(&device_domain_lock, flags);
1785}
1786
1787
1788
1789
1790
1791static struct dmar_domain *
1792find_domain(struct pci_dev *pdev)
1793{
1794 struct device_domain_info *info;
1795
1796
1797 info = pdev->dev.archdata.iommu;
1798 if (info)
1799 return info->domain;
1800 return NULL;
1801}
1802
1803
1804static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1805{
1806 struct dmar_domain *domain, *found = NULL;
1807 struct intel_iommu *iommu;
1808 struct dmar_drhd_unit *drhd;
1809 struct device_domain_info *info, *tmp;
1810 struct pci_dev *dev_tmp;
1811 unsigned long flags;
1812 int bus = 0, devfn = 0;
1813 int segment;
1814 int ret;
1815
1816 domain = find_domain(pdev);
1817 if (domain)
1818 return domain;
1819
1820 segment = pci_domain_nr(pdev->bus);
1821
1822 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1823 if (dev_tmp) {
1824 if (dev_tmp->is_pcie) {
1825 bus = dev_tmp->subordinate->number;
1826 devfn = 0;
1827 } else {
1828 bus = dev_tmp->bus->number;
1829 devfn = dev_tmp->devfn;
1830 }
1831 spin_lock_irqsave(&device_domain_lock, flags);
1832 list_for_each_entry(info, &device_domain_list, global) {
1833 if (info->segment == segment &&
1834 info->bus == bus && info->devfn == devfn) {
1835 found = info->domain;
1836 break;
1837 }
1838 }
1839 spin_unlock_irqrestore(&device_domain_lock, flags);
1840
1841 if (found) {
1842 domain = found;
1843 goto found_domain;
1844 }
1845 }
1846
1847 domain = alloc_domain();
1848 if (!domain)
1849 goto error;
1850
1851
1852 drhd = dmar_find_matched_drhd_unit(pdev);
1853 if (!drhd) {
1854 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1855 pci_name(pdev));
1856 return NULL;
1857 }
1858 iommu = drhd->iommu;
1859
1860 ret = iommu_attach_domain(domain, iommu);
1861 if (ret) {
1862 domain_exit(domain);
1863 goto error;
1864 }
1865
1866 if (domain_init(domain, gaw)) {
1867 domain_exit(domain);
1868 goto error;
1869 }
1870
1871
1872 if (dev_tmp) {
1873 info = alloc_devinfo_mem();
1874 if (!info) {
1875 domain_exit(domain);
1876 goto error;
1877 }
1878 info->segment = segment;
1879 info->bus = bus;
1880 info->devfn = devfn;
1881 info->dev = NULL;
1882 info->domain = domain;
1883
1884 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1885
1886
1887 found = NULL;
1888 spin_lock_irqsave(&device_domain_lock, flags);
1889 list_for_each_entry(tmp, &device_domain_list, global) {
1890 if (tmp->segment == segment &&
1891 tmp->bus == bus && tmp->devfn == devfn) {
1892 found = tmp->domain;
1893 break;
1894 }
1895 }
1896 if (found) {
1897 free_devinfo_mem(info);
1898 domain_exit(domain);
1899 domain = found;
1900 } else {
1901 list_add(&info->link, &domain->devices);
1902 list_add(&info->global, &device_domain_list);
1903 }
1904 spin_unlock_irqrestore(&device_domain_lock, flags);
1905 }
1906
1907found_domain:
1908 info = alloc_devinfo_mem();
1909 if (!info)
1910 goto error;
1911 info->segment = segment;
1912 info->bus = pdev->bus->number;
1913 info->devfn = pdev->devfn;
1914 info->dev = pdev;
1915 info->domain = domain;
1916 spin_lock_irqsave(&device_domain_lock, flags);
1917
1918 found = find_domain(pdev);
1919 if (found != NULL) {
1920 spin_unlock_irqrestore(&device_domain_lock, flags);
1921 if (found != domain) {
1922 domain_exit(domain);
1923 domain = found;
1924 }
1925 free_devinfo_mem(info);
1926 return domain;
1927 }
1928 list_add(&info->link, &domain->devices);
1929 list_add(&info->global, &device_domain_list);
1930 pdev->dev.archdata.iommu = info;
1931 spin_unlock_irqrestore(&device_domain_lock, flags);
1932 return domain;
1933error:
1934
1935 return find_domain(pdev);
1936}
1937
1938static int iommu_identity_mapping;
1939#define IDENTMAP_ALL 1
1940#define IDENTMAP_GFX 2
1941#define IDENTMAP_AZALIA 4
1942
1943static int iommu_domain_identity_map(struct dmar_domain *domain,
1944 unsigned long long start,
1945 unsigned long long end)
1946{
1947 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1948 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1949
1950 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1951 dma_to_mm_pfn(last_vpfn))) {
1952 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1953 return -ENOMEM;
1954 }
1955
1956 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1957 start, end, domain->id);
1958
1959
1960
1961
1962 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
1963
1964 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1965 last_vpfn - first_vpfn + 1,
1966 DMA_PTE_READ|DMA_PTE_WRITE);
1967}
1968
1969static int iommu_prepare_identity_map(struct pci_dev *pdev,
1970 unsigned long long start,
1971 unsigned long long end)
1972{
1973 struct dmar_domain *domain;
1974 int ret;
1975
1976 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1977 if (!domain)
1978 return -ENOMEM;
1979
1980
1981
1982
1983
1984 if (domain == si_domain && hw_pass_through) {
1985 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1986 pci_name(pdev), start, end);
1987 return 0;
1988 }
1989
1990 printk(KERN_INFO
1991 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1992 pci_name(pdev), start, end);
1993
1994 if (end >> agaw_to_width(domain->agaw)) {
1995 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1996 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1997 agaw_to_width(domain->agaw),
1998 dmi_get_system_info(DMI_BIOS_VENDOR),
1999 dmi_get_system_info(DMI_BIOS_VERSION),
2000 dmi_get_system_info(DMI_PRODUCT_VERSION));
2001 ret = -EIO;
2002 goto error;
2003 }
2004
2005 ret = iommu_domain_identity_map(domain, start, end);
2006 if (ret)
2007 goto error;
2008
2009
2010 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
2011 if (ret)
2012 goto error;
2013
2014 return 0;
2015
2016 error:
2017 domain_exit(domain);
2018 return ret;
2019}
2020
2021static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2022 struct pci_dev *pdev)
2023{
2024 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2025 return 0;
2026 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2027 rmrr->end_address + 1);
2028}
2029
2030#ifdef CONFIG_DMAR_FLOPPY_WA
2031static inline void iommu_prepare_isa(void)
2032{
2033 struct pci_dev *pdev;
2034 int ret;
2035
2036 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2037 if (!pdev)
2038 return;
2039
2040 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2041 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2042
2043 if (ret)
2044 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2045 "floppy might not work\n");
2046
2047}
2048#else
2049static inline void iommu_prepare_isa(void)
2050{
2051 return;
2052}
2053#endif
2054
2055static int md_domain_init(struct dmar_domain *domain, int guest_width);
2056
2057static int __init si_domain_work_fn(unsigned long start_pfn,
2058 unsigned long end_pfn, void *datax)
2059{
2060 int *ret = datax;
2061
2062 *ret = iommu_domain_identity_map(si_domain,
2063 (uint64_t)start_pfn << PAGE_SHIFT,
2064 (uint64_t)end_pfn << PAGE_SHIFT);
2065 return *ret;
2066
2067}
2068
2069static int __init si_domain_init(int hw)
2070{
2071 struct dmar_drhd_unit *drhd;
2072 struct intel_iommu *iommu;
2073 int nid, ret = 0;
2074
2075 si_domain = alloc_domain();
2076 if (!si_domain)
2077 return -EFAULT;
2078
2079 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2080
2081 for_each_active_iommu(iommu, drhd) {
2082 ret = iommu_attach_domain(si_domain, iommu);
2083 if (ret) {
2084 domain_exit(si_domain);
2085 return -EFAULT;
2086 }
2087 }
2088
2089 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2090 domain_exit(si_domain);
2091 return -EFAULT;
2092 }
2093
2094 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2095
2096 if (hw)
2097 return 0;
2098
2099 for_each_online_node(nid) {
2100 work_with_active_regions(nid, si_domain_work_fn, &ret);
2101 if (ret)
2102 return ret;
2103 }
2104
2105 return 0;
2106}
2107
2108static void domain_remove_one_dev_info(struct dmar_domain *domain,
2109 struct pci_dev *pdev);
2110static int identity_mapping(struct pci_dev *pdev)
2111{
2112 struct device_domain_info *info;
2113
2114 if (likely(!iommu_identity_mapping))
2115 return 0;
2116
2117
2118 list_for_each_entry(info, &si_domain->devices, link)
2119 if (info->dev == pdev)
2120 return 1;
2121 return 0;
2122}
2123
2124static int domain_add_dev_info(struct dmar_domain *domain,
2125 struct pci_dev *pdev,
2126 int translation)
2127{
2128 struct device_domain_info *info;
2129 unsigned long flags;
2130 int ret;
2131
2132 info = alloc_devinfo_mem();
2133 if (!info)
2134 return -ENOMEM;
2135
2136 ret = domain_context_mapping(domain, pdev, translation);
2137 if (ret) {
2138 free_devinfo_mem(info);
2139 return ret;
2140 }
2141
2142 info->segment = pci_domain_nr(pdev->bus);
2143 info->bus = pdev->bus->number;
2144 info->devfn = pdev->devfn;
2145 info->dev = pdev;
2146 info->domain = domain;
2147
2148 spin_lock_irqsave(&device_domain_lock, flags);
2149 list_add(&info->link, &domain->devices);
2150 list_add(&info->global, &device_domain_list);
2151 pdev->dev.archdata.iommu = info;
2152 spin_unlock_irqrestore(&device_domain_lock, flags);
2153
2154 return 0;
2155}
2156
2157static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2158{
2159 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2160 return 1;
2161
2162 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2163 return 1;
2164
2165 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2166 return 0;
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185 if (!pdev->is_pcie) {
2186 if (!pci_is_root_bus(pdev->bus))
2187 return 0;
2188 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2189 return 0;
2190 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2191 return 0;
2192
2193
2194
2195
2196
2197
2198 if (!startup)
2199 return pdev->dma_mask > DMA_BIT_MASK(32);
2200
2201 return 1;
2202}
2203
2204static int __init iommu_prepare_static_identity_mapping(int hw)
2205{
2206 struct pci_dev *pdev = NULL;
2207 int ret;
2208
2209 ret = si_domain_init(hw);
2210 if (ret)
2211 return -EFAULT;
2212
2213 for_each_pci_dev(pdev) {
2214 if (iommu_should_identity_map(pdev, 1)) {
2215 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2216 hw ? "hardware" : "software", pci_name(pdev));
2217
2218 ret = domain_add_dev_info(si_domain, pdev,
2219 hw ? CONTEXT_TT_PASS_THROUGH :
2220 CONTEXT_TT_MULTI_LEVEL);
2221 if (ret)
2222 return ret;
2223 }
2224 }
2225
2226 return 0;
2227}
2228
2229int __init init_dmars(void)
2230{
2231 struct dmar_drhd_unit *drhd;
2232 struct dmar_rmrr_unit *rmrr;
2233 struct pci_dev *pdev;
2234 struct intel_iommu *iommu;
2235 int i, ret;
2236
2237
2238
2239
2240
2241
2242
2243 for_each_drhd_unit(drhd) {
2244 g_num_of_iommus++;
2245
2246
2247
2248
2249
2250 }
2251
2252 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2253 GFP_KERNEL);
2254 if (!g_iommus) {
2255 printk(KERN_ERR "Allocating global iommu array failed\n");
2256 ret = -ENOMEM;
2257 goto error;
2258 }
2259
2260 deferred_flush = kzalloc(g_num_of_iommus *
2261 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2262 if (!deferred_flush) {
2263 ret = -ENOMEM;
2264 goto error;
2265 }
2266
2267 for_each_drhd_unit(drhd) {
2268 if (drhd->ignored)
2269 continue;
2270
2271 iommu = drhd->iommu;
2272 g_iommus[iommu->seq_id] = iommu;
2273
2274 ret = iommu_init_domains(iommu);
2275 if (ret)
2276 goto error;
2277
2278
2279
2280
2281
2282
2283 ret = iommu_alloc_root_entry(iommu);
2284 if (ret) {
2285 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2286 goto error;
2287 }
2288 if (!ecap_pass_through(iommu->ecap))
2289 hw_pass_through = 0;
2290 }
2291
2292
2293
2294
2295 for_each_drhd_unit(drhd) {
2296 if (drhd->ignored)
2297 continue;
2298
2299 iommu = drhd->iommu;
2300
2301
2302
2303
2304
2305
2306 if (iommu->qi)
2307 continue;
2308
2309
2310
2311
2312 dmar_fault(-1, iommu);
2313
2314
2315
2316
2317 dmar_disable_qi(iommu);
2318 }
2319
2320 for_each_drhd_unit(drhd) {
2321 if (drhd->ignored)
2322 continue;
2323
2324 iommu = drhd->iommu;
2325
2326 if (dmar_enable_qi(iommu)) {
2327
2328
2329
2330
2331 iommu->flush.flush_context = __iommu_flush_context;
2332 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2333 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
2334 "invalidation\n",
2335 (unsigned long long)drhd->reg_base_addr);
2336 } else {
2337 iommu->flush.flush_context = qi_flush_context;
2338 iommu->flush.flush_iotlb = qi_flush_iotlb;
2339 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
2340 "invalidation\n",
2341 (unsigned long long)drhd->reg_base_addr);
2342 }
2343 }
2344
2345 if (iommu_pass_through)
2346 iommu_identity_mapping |= IDENTMAP_ALL;
2347
2348#ifdef CONFIG_DMAR_BROKEN_GFX_WA
2349 iommu_identity_mapping |= IDENTMAP_GFX;
2350#endif
2351
2352 check_tylersburg_isoch();
2353
2354
2355
2356
2357
2358
2359 if (iommu_identity_mapping) {
2360 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2361 if (ret) {
2362 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2363 goto error;
2364 }
2365 }
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2381 for_each_rmrr_units(rmrr) {
2382 for (i = 0; i < rmrr->devices_cnt; i++) {
2383 pdev = rmrr->devices[i];
2384
2385
2386
2387
2388 if (!pdev)
2389 continue;
2390 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2391 if (ret)
2392 printk(KERN_ERR
2393 "IOMMU: mapping reserved region failed\n");
2394 }
2395 }
2396
2397 iommu_prepare_isa();
2398
2399
2400
2401
2402
2403
2404
2405
2406 for_each_drhd_unit(drhd) {
2407 if (drhd->ignored)
2408 continue;
2409 iommu = drhd->iommu;
2410
2411 iommu_flush_write_buffer(iommu);
2412
2413 ret = dmar_set_interrupt(iommu);
2414 if (ret)
2415 goto error;
2416
2417 iommu_set_root_entry(iommu);
2418
2419 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2420 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2421
2422 ret = iommu_enable_translation(iommu);
2423 if (ret)
2424 goto error;
2425
2426 iommu_disable_protect_mem_regions(iommu);
2427 }
2428
2429 return 0;
2430error:
2431 for_each_drhd_unit(drhd) {
2432 if (drhd->ignored)
2433 continue;
2434 iommu = drhd->iommu;
2435 free_iommu(iommu);
2436 }
2437 kfree(g_iommus);
2438 return ret;
2439}
2440
2441
2442static struct iova *intel_alloc_iova(struct device *dev,
2443 struct dmar_domain *domain,
2444 unsigned long nrpages, uint64_t dma_mask)
2445{
2446 struct pci_dev *pdev = to_pci_dev(dev);
2447 struct iova *iova = NULL;
2448
2449
2450 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2451
2452 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2453
2454
2455
2456
2457
2458 iova = alloc_iova(&domain->iovad, nrpages,
2459 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2460 if (iova)
2461 return iova;
2462 }
2463 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2464 if (unlikely(!iova)) {
2465 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2466 nrpages, pci_name(pdev));
2467 return NULL;
2468 }
2469
2470 return iova;
2471}
2472
2473static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
2474{
2475 struct dmar_domain *domain;
2476 int ret;
2477
2478 domain = get_domain_for_dev(pdev,
2479 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2480 if (!domain) {
2481 printk(KERN_ERR
2482 "Allocating domain for %s failed", pci_name(pdev));
2483 return NULL;
2484 }
2485
2486
2487 if (unlikely(!domain_context_mapped(pdev))) {
2488 ret = domain_context_mapping(domain, pdev,
2489 CONTEXT_TT_MULTI_LEVEL);
2490 if (ret) {
2491 printk(KERN_ERR
2492 "Domain context map for %s failed",
2493 pci_name(pdev));
2494 return NULL;
2495 }
2496 }
2497
2498 return domain;
2499}
2500
2501static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2502{
2503 struct device_domain_info *info;
2504
2505
2506 info = dev->dev.archdata.iommu;
2507 if (likely(info))
2508 return info->domain;
2509
2510 return __get_valid_domain_for_dev(dev);
2511}
2512
2513static int iommu_dummy(struct pci_dev *pdev)
2514{
2515 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2516}
2517
2518
2519static int iommu_no_mapping(struct device *dev)
2520{
2521 struct pci_dev *pdev;
2522 int found;
2523
2524 if (unlikely(dev->bus != &pci_bus_type))
2525 return 1;
2526
2527 pdev = to_pci_dev(dev);
2528 if (iommu_dummy(pdev))
2529 return 1;
2530
2531 if (!iommu_identity_mapping)
2532 return 0;
2533
2534 found = identity_mapping(pdev);
2535 if (found) {
2536 if (iommu_should_identity_map(pdev, 0))
2537 return 1;
2538 else {
2539
2540
2541
2542
2543 domain_remove_one_dev_info(si_domain, pdev);
2544 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2545 pci_name(pdev));
2546 return 0;
2547 }
2548 } else {
2549
2550
2551
2552
2553 if (iommu_should_identity_map(pdev, 0)) {
2554 int ret;
2555 ret = domain_add_dev_info(si_domain, pdev,
2556 hw_pass_through ?
2557 CONTEXT_TT_PASS_THROUGH :
2558 CONTEXT_TT_MULTI_LEVEL);
2559 if (!ret) {
2560 printk(KERN_INFO "64bit %s uses identity mapping\n",
2561 pci_name(pdev));
2562 return 1;
2563 }
2564 }
2565 }
2566
2567 return 0;
2568}
2569
2570static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2571 size_t size, int dir, u64 dma_mask)
2572{
2573 struct pci_dev *pdev = to_pci_dev(hwdev);
2574 struct dmar_domain *domain;
2575 phys_addr_t start_paddr;
2576 struct iova *iova;
2577 int prot = 0;
2578 int ret;
2579 struct intel_iommu *iommu;
2580 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
2581
2582 BUG_ON(dir == DMA_NONE);
2583
2584 if (iommu_no_mapping(hwdev))
2585 return paddr;
2586
2587 domain = get_valid_domain_for_dev(pdev);
2588 if (!domain)
2589 return 0;
2590
2591 iommu = domain_get_iommu(domain);
2592 size = aligned_nrpages(paddr, size);
2593
2594 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2595 pdev->dma_mask);
2596 if (!iova)
2597 goto error;
2598
2599
2600
2601
2602
2603 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2604 !cap_zlr(iommu->cap))
2605 prot |= DMA_PTE_READ;
2606 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2607 prot |= DMA_PTE_WRITE;
2608
2609
2610
2611
2612
2613
2614 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
2615 mm_to_dma_pfn(paddr_pfn), size, prot);
2616 if (ret)
2617 goto error;
2618
2619
2620 if (cap_caching_mode(iommu->cap))
2621 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
2622 else
2623 iommu_flush_write_buffer(iommu);
2624
2625 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2626 start_paddr += paddr & ~PAGE_MASK;
2627 return start_paddr;
2628
2629error:
2630 if (iova)
2631 __free_iova(&domain->iovad, iova);
2632 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
2633 pci_name(pdev), size, (unsigned long long)paddr, dir);
2634 return 0;
2635}
2636
2637static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2638 unsigned long offset, size_t size,
2639 enum dma_data_direction dir,
2640 struct dma_attrs *attrs)
2641{
2642 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2643 dir, to_pci_dev(dev)->dma_mask);
2644}
2645
2646static void flush_unmaps(void)
2647{
2648 int i, j;
2649
2650 timer_on = 0;
2651
2652
2653 for (i = 0; i < g_num_of_iommus; i++) {
2654 struct intel_iommu *iommu = g_iommus[i];
2655 if (!iommu)
2656 continue;
2657
2658 if (!deferred_flush[i].next)
2659 continue;
2660
2661 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2662 DMA_TLB_GLOBAL_FLUSH);
2663 for (j = 0; j < deferred_flush[i].next; j++) {
2664 unsigned long mask;
2665 struct iova *iova = deferred_flush[i].iova[j];
2666
2667 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2668 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2669 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2670 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
2671 }
2672 deferred_flush[i].next = 0;
2673 }
2674
2675 list_size = 0;
2676}
2677
2678static void flush_unmaps_timeout(unsigned long data)
2679{
2680 unsigned long flags;
2681
2682 spin_lock_irqsave(&async_umap_flush_lock, flags);
2683 flush_unmaps();
2684 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2685}
2686
2687static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2688{
2689 unsigned long flags;
2690 int next, iommu_id;
2691 struct intel_iommu *iommu;
2692
2693 spin_lock_irqsave(&async_umap_flush_lock, flags);
2694 if (list_size == HIGH_WATER_MARK)
2695 flush_unmaps();
2696
2697 iommu = domain_get_iommu(dom);
2698 iommu_id = iommu->seq_id;
2699
2700 next = deferred_flush[iommu_id].next;
2701 deferred_flush[iommu_id].domain[next] = dom;
2702 deferred_flush[iommu_id].iova[next] = iova;
2703 deferred_flush[iommu_id].next++;
2704
2705 if (!timer_on) {
2706 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2707 timer_on = 1;
2708 }
2709 list_size++;
2710 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2711}
2712
2713static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2714 size_t size, enum dma_data_direction dir,
2715 struct dma_attrs *attrs)
2716{
2717 struct pci_dev *pdev = to_pci_dev(dev);
2718 struct dmar_domain *domain;
2719 unsigned long start_pfn, last_pfn;
2720 struct iova *iova;
2721 struct intel_iommu *iommu;
2722
2723 if (iommu_no_mapping(dev))
2724 return;
2725
2726 domain = find_domain(pdev);
2727 BUG_ON(!domain);
2728
2729 iommu = domain_get_iommu(domain);
2730
2731 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2732 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2733 (unsigned long long)dev_addr))
2734 return;
2735
2736 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2737 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2738
2739 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2740 pci_name(pdev), start_pfn, last_pfn);
2741
2742
2743 dma_pte_clear_range(domain, start_pfn, last_pfn);
2744
2745
2746 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2747
2748 if (intel_iommu_strict) {
2749 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2750 last_pfn - start_pfn + 1);
2751
2752 __free_iova(&domain->iovad, iova);
2753 } else {
2754 add_unmap(domain, iova);
2755
2756
2757
2758
2759 }
2760}
2761
2762static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2763 dma_addr_t *dma_handle, gfp_t flags)
2764{
2765 void *vaddr;
2766 int order;
2767
2768 size = PAGE_ALIGN(size);
2769 order = get_order(size);
2770
2771 if (!iommu_no_mapping(hwdev))
2772 flags &= ~(GFP_DMA | GFP_DMA32);
2773 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2774 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2775 flags |= GFP_DMA;
2776 else
2777 flags |= GFP_DMA32;
2778 }
2779
2780 vaddr = (void *)__get_free_pages(flags, order);
2781 if (!vaddr)
2782 return NULL;
2783 memset(vaddr, 0, size);
2784
2785 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2786 DMA_BIDIRECTIONAL,
2787 hwdev->coherent_dma_mask);
2788 if (*dma_handle)
2789 return vaddr;
2790 free_pages((unsigned long)vaddr, order);
2791 return NULL;
2792}
2793
2794static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2795 dma_addr_t dma_handle)
2796{
2797 int order;
2798
2799 size = PAGE_ALIGN(size);
2800 order = get_order(size);
2801
2802 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
2803 free_pages((unsigned long)vaddr, order);
2804}
2805
2806static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2807 int nelems, enum dma_data_direction dir,
2808 struct dma_attrs *attrs)
2809{
2810 struct pci_dev *pdev = to_pci_dev(hwdev);
2811 struct dmar_domain *domain;
2812 unsigned long start_pfn, last_pfn;
2813 struct iova *iova;
2814 struct intel_iommu *iommu;
2815
2816 if (iommu_no_mapping(hwdev))
2817 return;
2818
2819 domain = find_domain(pdev);
2820 BUG_ON(!domain);
2821
2822 iommu = domain_get_iommu(domain);
2823
2824 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2825 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2826 (unsigned long long)sglist[0].dma_address))
2827 return;
2828
2829 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2830 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
2831
2832
2833 dma_pte_clear_range(domain, start_pfn, last_pfn);
2834
2835
2836 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2837
2838 if (intel_iommu_strict) {
2839 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2840 last_pfn - start_pfn + 1);
2841
2842 __free_iova(&domain->iovad, iova);
2843 } else {
2844 add_unmap(domain, iova);
2845
2846
2847
2848
2849 }
2850}
2851
2852static int intel_nontranslate_map_sg(struct device *hddev,
2853 struct scatterlist *sglist, int nelems, int dir)
2854{
2855 int i;
2856 struct scatterlist *sg;
2857
2858 for_each_sg(sglist, sg, nelems, i) {
2859 BUG_ON(!sg_page(sg));
2860 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
2861 sg->dma_length = sg->length;
2862 }
2863 return nelems;
2864}
2865
2866static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2867 enum dma_data_direction dir, struct dma_attrs *attrs)
2868{
2869 int i;
2870 struct pci_dev *pdev = to_pci_dev(hwdev);
2871 struct dmar_domain *domain;
2872 size_t size = 0;
2873 int prot = 0;
2874 size_t offset_pfn = 0;
2875 struct iova *iova = NULL;
2876 int ret;
2877 struct scatterlist *sg;
2878 unsigned long start_vpfn;
2879 struct intel_iommu *iommu;
2880
2881 BUG_ON(dir == DMA_NONE);
2882 if (iommu_no_mapping(hwdev))
2883 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2884
2885 domain = get_valid_domain_for_dev(pdev);
2886 if (!domain)
2887 return 0;
2888
2889 iommu = domain_get_iommu(domain);
2890
2891 for_each_sg(sglist, sg, nelems, i)
2892 size += aligned_nrpages(sg->offset, sg->length);
2893
2894 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2895 pdev->dma_mask);
2896 if (!iova) {
2897 sglist->dma_length = 0;
2898 return 0;
2899 }
2900
2901
2902
2903
2904
2905 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2906 !cap_zlr(iommu->cap))
2907 prot |= DMA_PTE_READ;
2908 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2909 prot |= DMA_PTE_WRITE;
2910
2911 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
2912
2913 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
2914 if (unlikely(ret)) {
2915
2916 dma_pte_clear_range(domain, start_vpfn,
2917 start_vpfn + size - 1);
2918
2919 dma_pte_free_pagetable(domain, start_vpfn,
2920 start_vpfn + size - 1);
2921
2922 __free_iova(&domain->iovad, iova);
2923 return 0;
2924 }
2925
2926
2927 if (cap_caching_mode(iommu->cap))
2928 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
2929 else
2930 iommu_flush_write_buffer(iommu);
2931
2932 return nelems;
2933}
2934
2935static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2936{
2937 return !dma_addr;
2938}
2939
2940struct dma_map_ops intel_dma_ops = {
2941 .alloc_coherent = intel_alloc_coherent,
2942 .free_coherent = intel_free_coherent,
2943 .map_sg = intel_map_sg,
2944 .unmap_sg = intel_unmap_sg,
2945 .map_page = intel_map_page,
2946 .unmap_page = intel_unmap_page,
2947 .mapping_error = intel_mapping_error,
2948};
2949
2950static inline int iommu_domain_cache_init(void)
2951{
2952 int ret = 0;
2953
2954 iommu_domain_cache = kmem_cache_create("iommu_domain",
2955 sizeof(struct dmar_domain),
2956 0,
2957 SLAB_HWCACHE_ALIGN,
2958
2959 NULL);
2960 if (!iommu_domain_cache) {
2961 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2962 ret = -ENOMEM;
2963 }
2964
2965 return ret;
2966}
2967
2968static inline int iommu_devinfo_cache_init(void)
2969{
2970 int ret = 0;
2971
2972 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2973 sizeof(struct device_domain_info),
2974 0,
2975 SLAB_HWCACHE_ALIGN,
2976 NULL);
2977 if (!iommu_devinfo_cache) {
2978 printk(KERN_ERR "Couldn't create devinfo cache\n");
2979 ret = -ENOMEM;
2980 }
2981
2982 return ret;
2983}
2984
2985static inline int iommu_iova_cache_init(void)
2986{
2987 int ret = 0;
2988
2989 iommu_iova_cache = kmem_cache_create("iommu_iova",
2990 sizeof(struct iova),
2991 0,
2992 SLAB_HWCACHE_ALIGN,
2993 NULL);
2994 if (!iommu_iova_cache) {
2995 printk(KERN_ERR "Couldn't create iova cache\n");
2996 ret = -ENOMEM;
2997 }
2998
2999 return ret;
3000}
3001
3002static int __init iommu_init_mempool(void)
3003{
3004 int ret;
3005 ret = iommu_iova_cache_init();
3006 if (ret)
3007 return ret;
3008
3009 ret = iommu_domain_cache_init();
3010 if (ret)
3011 goto domain_error;
3012
3013 ret = iommu_devinfo_cache_init();
3014 if (!ret)
3015 return ret;
3016
3017 kmem_cache_destroy(iommu_domain_cache);
3018domain_error:
3019 kmem_cache_destroy(iommu_iova_cache);
3020
3021 return -ENOMEM;
3022}
3023
3024static void __init iommu_exit_mempool(void)
3025{
3026 kmem_cache_destroy(iommu_devinfo_cache);
3027 kmem_cache_destroy(iommu_domain_cache);
3028 kmem_cache_destroy(iommu_iova_cache);
3029
3030}
3031
3032static void __init init_no_remapping_devices(void)
3033{
3034 struct dmar_drhd_unit *drhd;
3035
3036 for_each_drhd_unit(drhd) {
3037 if (!drhd->include_all) {
3038 int i;
3039 for (i = 0; i < drhd->devices_cnt; i++)
3040 if (drhd->devices[i] != NULL)
3041 break;
3042
3043 if (i == drhd->devices_cnt)
3044 drhd->ignored = 1;
3045 }
3046 }
3047
3048 if (dmar_map_gfx)
3049 return;
3050
3051 for_each_drhd_unit(drhd) {
3052 int i;
3053 if (drhd->ignored || drhd->include_all)
3054 continue;
3055
3056 for (i = 0; i < drhd->devices_cnt; i++)
3057 if (drhd->devices[i] &&
3058 !IS_GFX_DEVICE(drhd->devices[i]))
3059 break;
3060
3061 if (i < drhd->devices_cnt)
3062 continue;
3063
3064
3065 drhd->ignored = 1;
3066 for (i = 0; i < drhd->devices_cnt; i++) {
3067 if (!drhd->devices[i])
3068 continue;
3069 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3070 }
3071 }
3072}
3073
3074#ifdef CONFIG_SUSPEND
3075static int init_iommu_hw(void)
3076{
3077 struct dmar_drhd_unit *drhd;
3078 struct intel_iommu *iommu = NULL;
3079
3080 for_each_active_iommu(iommu, drhd)
3081 if (iommu->qi)
3082 dmar_reenable_qi(iommu);
3083
3084 for_each_active_iommu(iommu, drhd) {
3085 iommu_flush_write_buffer(iommu);
3086
3087 iommu_set_root_entry(iommu);
3088
3089 iommu->flush.flush_context(iommu, 0, 0, 0,
3090 DMA_CCMD_GLOBAL_INVL);
3091 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3092 DMA_TLB_GLOBAL_FLUSH);
3093 iommu_enable_translation(iommu);
3094 iommu_disable_protect_mem_regions(iommu);
3095 }
3096
3097 return 0;
3098}
3099
3100static void iommu_flush_all(void)
3101{
3102 struct dmar_drhd_unit *drhd;
3103 struct intel_iommu *iommu;
3104
3105 for_each_active_iommu(iommu, drhd) {
3106 iommu->flush.flush_context(iommu, 0, 0, 0,
3107 DMA_CCMD_GLOBAL_INVL);
3108 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3109 DMA_TLB_GLOBAL_FLUSH);
3110 }
3111}
3112
3113static int iommu_suspend(struct sys_device *dev, pm_message_t state)
3114{
3115 struct dmar_drhd_unit *drhd;
3116 struct intel_iommu *iommu = NULL;
3117 unsigned long flag;
3118
3119 for_each_active_iommu(iommu, drhd) {
3120 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3121 GFP_ATOMIC);
3122 if (!iommu->iommu_state)
3123 goto nomem;
3124 }
3125
3126 iommu_flush_all();
3127
3128 for_each_active_iommu(iommu, drhd) {
3129 iommu_disable_translation(iommu);
3130
3131 spin_lock_irqsave(&iommu->register_lock, flag);
3132
3133 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3134 readl(iommu->reg + DMAR_FECTL_REG);
3135 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3136 readl(iommu->reg + DMAR_FEDATA_REG);
3137 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3138 readl(iommu->reg + DMAR_FEADDR_REG);
3139 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3140 readl(iommu->reg + DMAR_FEUADDR_REG);
3141
3142 spin_unlock_irqrestore(&iommu->register_lock, flag);
3143 }
3144 return 0;
3145
3146nomem:
3147 for_each_active_iommu(iommu, drhd)
3148 kfree(iommu->iommu_state);
3149
3150 return -ENOMEM;
3151}
3152
3153static int iommu_resume(struct sys_device *dev)
3154{
3155 struct dmar_drhd_unit *drhd;
3156 struct intel_iommu *iommu = NULL;
3157 unsigned long flag;
3158
3159 if (init_iommu_hw()) {
3160 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3161 return -EIO;
3162 }
3163
3164 for_each_active_iommu(iommu, drhd) {
3165
3166 spin_lock_irqsave(&iommu->register_lock, flag);
3167
3168 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3169 iommu->reg + DMAR_FECTL_REG);
3170 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3171 iommu->reg + DMAR_FEDATA_REG);
3172 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3173 iommu->reg + DMAR_FEADDR_REG);
3174 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3175 iommu->reg + DMAR_FEUADDR_REG);
3176
3177 spin_unlock_irqrestore(&iommu->register_lock, flag);
3178 }
3179
3180 for_each_active_iommu(iommu, drhd)
3181 kfree(iommu->iommu_state);
3182
3183 return 0;
3184}
3185
3186static struct sysdev_class iommu_sysclass = {
3187 .name = "iommu",
3188 .resume = iommu_resume,
3189 .suspend = iommu_suspend,
3190};
3191
3192static struct sys_device device_iommu = {
3193 .cls = &iommu_sysclass,
3194};
3195
3196static int __init init_iommu_sysfs(void)
3197{
3198 int error;
3199
3200 error = sysdev_class_register(&iommu_sysclass);
3201 if (error)
3202 return error;
3203
3204 error = sysdev_register(&device_iommu);
3205 if (error)
3206 sysdev_class_unregister(&iommu_sysclass);
3207
3208 return error;
3209}
3210
3211#else
3212static int __init init_iommu_sysfs(void)
3213{
3214 return 0;
3215}
3216#endif
3217
3218
3219
3220
3221
3222
3223
3224static int device_notifier(struct notifier_block *nb,
3225 unsigned long action, void *data)
3226{
3227 struct device *dev = data;
3228 struct pci_dev *pdev = to_pci_dev(dev);
3229 struct dmar_domain *domain;
3230
3231 domain = find_domain(pdev);
3232 if (!domain)
3233 return 0;
3234
3235 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
3236 domain_remove_one_dev_info(domain, pdev);
3237
3238 return 0;
3239}
3240
3241static struct notifier_block device_nb = {
3242 .notifier_call = device_notifier,
3243};
3244
3245int __init intel_iommu_init(void)
3246{
3247 int ret = 0;
3248 int force_on = 0;
3249
3250
3251 force_on = tboot_force_iommu();
3252
3253 if (dmar_table_init()) {
3254 if (force_on)
3255 panic("tboot: Failed to initialize DMAR table\n");
3256 return -ENODEV;
3257 }
3258
3259 if (dmar_dev_scope_init()) {
3260 if (force_on)
3261 panic("tboot: Failed to initialize DMAR device scope\n");
3262 return -ENODEV;
3263 }
3264
3265
3266
3267
3268
3269 if (no_iommu || swiotlb || dmar_disabled)
3270 return -ENODEV;
3271
3272 iommu_init_mempool();
3273 dmar_init_reserved_ranges();
3274
3275 init_no_remapping_devices();
3276
3277 ret = init_dmars();
3278 if (ret) {
3279 if (force_on)
3280 panic("tboot: Failed to initialize DMARs\n");
3281 printk(KERN_ERR "IOMMU: dmar init failed\n");
3282 put_iova_domain(&reserved_iova_list);
3283 iommu_exit_mempool();
3284 return ret;
3285 }
3286 printk(KERN_INFO
3287 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3288
3289 init_timer(&unmap_timer);
3290 force_iommu = 1;
3291 dma_ops = &intel_dma_ops;
3292
3293 init_iommu_sysfs();
3294
3295 register_iommu(&intel_iommu_ops);
3296
3297 bus_register_notifier(&pci_bus_type, &device_nb);
3298
3299 return 0;
3300}
3301
3302static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3303 struct pci_dev *pdev)
3304{
3305 struct pci_dev *tmp, *parent;
3306
3307 if (!iommu || !pdev)
3308 return;
3309
3310
3311 tmp = pci_find_upstream_pcie_bridge(pdev);
3312
3313 if (tmp) {
3314 parent = pdev->bus->self;
3315 while (parent != tmp) {
3316 iommu_detach_dev(iommu, parent->bus->number,
3317 parent->devfn);
3318 parent = parent->bus->self;
3319 }
3320 if (tmp->is_pcie)
3321 iommu_detach_dev(iommu,
3322 tmp->subordinate->number, 0);
3323 else
3324 iommu_detach_dev(iommu, tmp->bus->number,
3325 tmp->devfn);
3326 }
3327}
3328
3329static void domain_remove_one_dev_info(struct dmar_domain *domain,
3330 struct pci_dev *pdev)
3331{
3332 struct device_domain_info *info;
3333 struct intel_iommu *iommu;
3334 unsigned long flags;
3335 int found = 0;
3336 struct list_head *entry, *tmp;
3337
3338 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3339 pdev->devfn);
3340 if (!iommu)
3341 return;
3342
3343 spin_lock_irqsave(&device_domain_lock, flags);
3344 list_for_each_safe(entry, tmp, &domain->devices) {
3345 info = list_entry(entry, struct device_domain_info, link);
3346
3347 if (info->bus == pdev->bus->number &&
3348 info->devfn == pdev->devfn) {
3349 list_del(&info->link);
3350 list_del(&info->global);
3351 if (info->dev)
3352 info->dev->dev.archdata.iommu = NULL;
3353 spin_unlock_irqrestore(&device_domain_lock, flags);
3354
3355 iommu_disable_dev_iotlb(info);
3356 iommu_detach_dev(iommu, info->bus, info->devfn);
3357 iommu_detach_dependent_devices(iommu, pdev);
3358 free_devinfo_mem(info);
3359
3360 spin_lock_irqsave(&device_domain_lock, flags);
3361
3362 if (found)
3363 break;
3364 else
3365 continue;
3366 }
3367
3368
3369
3370
3371
3372 if (iommu == device_to_iommu(info->segment, info->bus,
3373 info->devfn))
3374 found = 1;
3375 }
3376
3377 if (found == 0) {
3378 unsigned long tmp_flags;
3379 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3380 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3381 domain->iommu_count--;
3382 domain_update_iommu_cap(domain);
3383 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3384 }
3385
3386 spin_unlock_irqrestore(&device_domain_lock, flags);
3387}
3388
3389static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3390{
3391 struct device_domain_info *info;
3392 struct intel_iommu *iommu;
3393 unsigned long flags1, flags2;
3394
3395 spin_lock_irqsave(&device_domain_lock, flags1);
3396 while (!list_empty(&domain->devices)) {
3397 info = list_entry(domain->devices.next,
3398 struct device_domain_info, link);
3399 list_del(&info->link);
3400 list_del(&info->global);
3401 if (info->dev)
3402 info->dev->dev.archdata.iommu = NULL;
3403
3404 spin_unlock_irqrestore(&device_domain_lock, flags1);
3405
3406 iommu_disable_dev_iotlb(info);
3407 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
3408 iommu_detach_dev(iommu, info->bus, info->devfn);
3409 iommu_detach_dependent_devices(iommu, info->dev);
3410
3411
3412
3413
3414 spin_lock_irqsave(&domain->iommu_lock, flags2);
3415 if (test_and_clear_bit(iommu->seq_id,
3416 &domain->iommu_bmp)) {
3417 domain->iommu_count--;
3418 domain_update_iommu_cap(domain);
3419 }
3420 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3421
3422 free_devinfo_mem(info);
3423 spin_lock_irqsave(&device_domain_lock, flags1);
3424 }
3425 spin_unlock_irqrestore(&device_domain_lock, flags1);
3426}
3427
3428
3429static unsigned long vm_domid;
3430
3431static int vm_domain_min_agaw(struct dmar_domain *domain)
3432{
3433 int i;
3434 int min_agaw = domain->agaw;
3435
3436 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
3437 for (; i < g_num_of_iommus; ) {
3438 if (min_agaw > g_iommus[i]->agaw)
3439 min_agaw = g_iommus[i]->agaw;
3440
3441 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
3442 }
3443
3444 return min_agaw;
3445}
3446
3447static struct dmar_domain *iommu_alloc_vm_domain(void)
3448{
3449 struct dmar_domain *domain;
3450
3451 domain = alloc_domain_mem();
3452 if (!domain)
3453 return NULL;
3454
3455 domain->id = vm_domid++;
3456 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3457 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3458
3459 return domain;
3460}
3461
3462static int md_domain_init(struct dmar_domain *domain, int guest_width)
3463{
3464 int adjust_width;
3465
3466 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
3467 spin_lock_init(&domain->iommu_lock);
3468
3469 domain_reserve_special_ranges(domain);
3470
3471
3472 domain->gaw = guest_width;
3473 adjust_width = guestwidth_to_adjustwidth(guest_width);
3474 domain->agaw = width_to_agaw(adjust_width);
3475
3476 INIT_LIST_HEAD(&domain->devices);
3477
3478 domain->iommu_count = 0;
3479 domain->iommu_coherency = 0;
3480 domain->iommu_snooping = 0;
3481 domain->max_addr = 0;
3482
3483
3484 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
3485 if (!domain->pgd)
3486 return -ENOMEM;
3487 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3488 return 0;
3489}
3490
3491static void iommu_free_vm_domain(struct dmar_domain *domain)
3492{
3493 unsigned long flags;
3494 struct dmar_drhd_unit *drhd;
3495 struct intel_iommu *iommu;
3496 unsigned long i;
3497 unsigned long ndomains;
3498
3499 for_each_drhd_unit(drhd) {
3500 if (drhd->ignored)
3501 continue;
3502 iommu = drhd->iommu;
3503
3504 ndomains = cap_ndoms(iommu->cap);
3505 i = find_first_bit(iommu->domain_ids, ndomains);
3506 for (; i < ndomains; ) {
3507 if (iommu->domains[i] == domain) {
3508 spin_lock_irqsave(&iommu->lock, flags);
3509 clear_bit(i, iommu->domain_ids);
3510 iommu->domains[i] = NULL;
3511 spin_unlock_irqrestore(&iommu->lock, flags);
3512 break;
3513 }
3514 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
3515 }
3516 }
3517}
3518
3519static void vm_domain_exit(struct dmar_domain *domain)
3520{
3521
3522 if (!domain)
3523 return;
3524
3525 vm_domain_remove_all_dev_info(domain);
3526
3527 put_iova_domain(&domain->iovad);
3528
3529
3530 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3531
3532
3533 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
3534
3535 iommu_free_vm_domain(domain);
3536 free_domain_mem(domain);
3537}
3538
3539static int intel_iommu_domain_init(struct iommu_domain *domain)
3540{
3541 struct dmar_domain *dmar_domain;
3542
3543 dmar_domain = iommu_alloc_vm_domain();
3544 if (!dmar_domain) {
3545 printk(KERN_ERR
3546 "intel_iommu_domain_init: dmar_domain == NULL\n");
3547 return -ENOMEM;
3548 }
3549 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
3550 printk(KERN_ERR
3551 "intel_iommu_domain_init() failed\n");
3552 vm_domain_exit(dmar_domain);
3553 return -ENOMEM;
3554 }
3555 domain->priv = dmar_domain;
3556
3557 return 0;
3558}
3559
3560static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3561{
3562 struct dmar_domain *dmar_domain = domain->priv;
3563
3564 domain->priv = NULL;
3565 vm_domain_exit(dmar_domain);
3566}
3567
3568static int intel_iommu_attach_device(struct iommu_domain *domain,
3569 struct device *dev)
3570{
3571 struct dmar_domain *dmar_domain = domain->priv;
3572 struct pci_dev *pdev = to_pci_dev(dev);
3573 struct intel_iommu *iommu;
3574 int addr_width;
3575 u64 end;
3576
3577
3578 if (unlikely(domain_context_mapped(pdev))) {
3579 struct dmar_domain *old_domain;
3580
3581 old_domain = find_domain(pdev);
3582 if (old_domain) {
3583 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3584 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3585 domain_remove_one_dev_info(old_domain, pdev);
3586 else
3587 domain_remove_dev_info(old_domain);
3588 }
3589 }
3590
3591 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3592 pdev->devfn);
3593 if (!iommu)
3594 return -ENODEV;
3595
3596
3597 addr_width = agaw_to_width(iommu->agaw);
3598 end = DOMAIN_MAX_ADDR(addr_width);
3599 end = end & VTD_PAGE_MASK;
3600 if (end < dmar_domain->max_addr) {
3601 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3602 "sufficient for the mapped address (%llx)\n",
3603 __func__, iommu->agaw, dmar_domain->max_addr);
3604 return -EFAULT;
3605 }
3606
3607 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
3608}
3609
3610static void intel_iommu_detach_device(struct iommu_domain *domain,
3611 struct device *dev)
3612{
3613 struct dmar_domain *dmar_domain = domain->priv;
3614 struct pci_dev *pdev = to_pci_dev(dev);
3615
3616 domain_remove_one_dev_info(dmar_domain, pdev);
3617}
3618
3619static int intel_iommu_map_range(struct iommu_domain *domain,
3620 unsigned long iova, phys_addr_t hpa,
3621 size_t size, int iommu_prot)
3622{
3623 struct dmar_domain *dmar_domain = domain->priv;
3624 u64 max_addr;
3625 int addr_width;
3626 int prot = 0;
3627 int ret;
3628
3629 if (iommu_prot & IOMMU_READ)
3630 prot |= DMA_PTE_READ;
3631 if (iommu_prot & IOMMU_WRITE)
3632 prot |= DMA_PTE_WRITE;
3633 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3634 prot |= DMA_PTE_SNP;
3635
3636 max_addr = iova + size;
3637 if (dmar_domain->max_addr < max_addr) {
3638 int min_agaw;
3639 u64 end;
3640
3641
3642 min_agaw = vm_domain_min_agaw(dmar_domain);
3643 addr_width = agaw_to_width(min_agaw);
3644 end = DOMAIN_MAX_ADDR(addr_width);
3645 end = end & VTD_PAGE_MASK;
3646 if (end < max_addr) {
3647 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3648 "sufficient for the mapped address (%llx)\n",
3649 __func__, min_agaw, max_addr);
3650 return -EFAULT;
3651 }
3652 dmar_domain->max_addr = max_addr;
3653 }
3654
3655
3656 size = aligned_nrpages(hpa, size);
3657 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3658 hpa >> VTD_PAGE_SHIFT, size, prot);
3659 return ret;
3660}
3661
3662static void intel_iommu_unmap_range(struct iommu_domain *domain,
3663 unsigned long iova, size_t size)
3664{
3665 struct dmar_domain *dmar_domain = domain->priv;
3666
3667 if (!size)
3668 return;
3669
3670 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3671 (iova + size - 1) >> VTD_PAGE_SHIFT);
3672
3673 if (dmar_domain->max_addr == iova + size)
3674 dmar_domain->max_addr = iova;
3675}
3676
3677static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3678 unsigned long iova)
3679{
3680 struct dmar_domain *dmar_domain = domain->priv;
3681 struct dma_pte *pte;
3682 u64 phys = 0;
3683
3684 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
3685 if (pte)
3686 phys = dma_pte_addr(pte);
3687
3688 return phys;
3689}
3690
3691static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3692 unsigned long cap)
3693{
3694 struct dmar_domain *dmar_domain = domain->priv;
3695
3696 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3697 return dmar_domain->iommu_snooping;
3698
3699 return 0;
3700}
3701
3702static struct iommu_ops intel_iommu_ops = {
3703 .domain_init = intel_iommu_domain_init,
3704 .domain_destroy = intel_iommu_domain_destroy,
3705 .attach_dev = intel_iommu_attach_device,
3706 .detach_dev = intel_iommu_detach_device,
3707 .map = intel_iommu_map_range,
3708 .unmap = intel_iommu_unmap_range,
3709 .iova_to_phys = intel_iommu_iova_to_phys,
3710 .domain_has_cap = intel_iommu_domain_has_cap,
3711};
3712
3713static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3714{
3715
3716
3717
3718
3719 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3720 rwbf_quirk = 1;
3721}
3722
3723DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
3724
3725
3726
3727
3728
3729
3730
3731
3732static void __init check_tylersburg_isoch(void)
3733{
3734 struct pci_dev *pdev;
3735 uint32_t vtisochctrl;
3736
3737
3738 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3739 if (!pdev)
3740 return;
3741 pci_dev_put(pdev);
3742
3743
3744
3745
3746 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3747 if (!pdev)
3748 return;
3749
3750 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3751 pci_dev_put(pdev);
3752 return;
3753 }
3754
3755 pci_dev_put(pdev);
3756
3757
3758 if (vtisochctrl & 1)
3759 return;
3760
3761
3762 vtisochctrl &= 0x1c;
3763
3764
3765 if (vtisochctrl == 0x10)
3766 return;
3767
3768
3769 if (!vtisochctrl) {
3770 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3771 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3772 dmi_get_system_info(DMI_BIOS_VENDOR),
3773 dmi_get_system_info(DMI_BIOS_VERSION),
3774 dmi_get_system_info(DMI_PRODUCT_VERSION));
3775 iommu_identity_mapping |= IDENTMAP_AZALIA;
3776 return;
3777 }
3778
3779 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3780 vtisochctrl);
3781}
3782