1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21#define pr_fmt(fmt) "DMAR: " fmt
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
25#include <linux/debugfs.h>
26#include <linux/export.h>
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
35#include <linux/memory.h>
36#include <linux/timer.h>
37#include <linux/io.h>
38#include <linux/iova.h>
39#include <linux/iommu.h>
40#include <linux/intel-iommu.h>
41#include <linux/syscore_ops.h>
42#include <linux/tboot.h>
43#include <linux/dmi.h>
44#include <linux/pci-ats.h>
45#include <linux/memblock.h>
46#include <linux/dma-contiguous.h>
47#include <linux/crash_dump.h>
48#include <asm/irq_remapping.h>
49#include <asm/cacheflush.h>
50#include <asm/iommu.h>
51
52#include "irq_remapping.h"
53
54#define ROOT_SIZE VTD_PAGE_SIZE
55#define CONTEXT_SIZE VTD_PAGE_SIZE
56
57#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
58#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
59#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
60#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
61
62#define IOAPIC_RANGE_START (0xfee00000)
63#define IOAPIC_RANGE_END (0xfeefffff)
64#define IOVA_START_ADDR (0x1000)
65
66#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
67
68#define MAX_AGAW_WIDTH 64
69#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
70
71#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
72#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
73
74
75
76#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
77 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
78#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
79
80
81#define IOVA_START_PFN (1)
82
83#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
84#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
85#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
86
87
88#define LEVEL_STRIDE (9)
89#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
108
109static inline int agaw_to_level(int agaw)
110{
111 return agaw + 2;
112}
113
114static inline int agaw_to_width(int agaw)
115{
116 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
117}
118
119static inline int width_to_agaw(int width)
120{
121 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
122}
123
124static inline unsigned int level_to_offset_bits(int level)
125{
126 return (level - 1) * LEVEL_STRIDE;
127}
128
129static inline int pfn_level_offset(unsigned long pfn, int level)
130{
131 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
132}
133
134static inline unsigned long level_mask(int level)
135{
136 return -1UL << level_to_offset_bits(level);
137}
138
139static inline unsigned long level_size(int level)
140{
141 return 1UL << level_to_offset_bits(level);
142}
143
144static inline unsigned long align_to_level(unsigned long pfn, int level)
145{
146 return (pfn + level_size(level) - 1) & level_mask(level);
147}
148
149static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
150{
151 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
152}
153
154
155
156static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
157{
158 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
159}
160
161static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
162{
163 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
164}
165static inline unsigned long page_to_dma_pfn(struct page *pg)
166{
167 return mm_to_dma_pfn(page_to_pfn(pg));
168}
169static inline unsigned long virt_to_dma_pfn(void *p)
170{
171 return page_to_dma_pfn(virt_to_page(p));
172}
173
174
175static struct intel_iommu **g_iommus;
176
177static void __init check_tylersburg_isoch(void);
178static int rwbf_quirk;
179
180
181
182
183
184static int force_on = 0;
185
186
187
188
189
190
191
192struct root_entry {
193 u64 lo;
194 u64 hi;
195};
196#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
197
198
199
200
201
202static phys_addr_t root_entry_lctp(struct root_entry *re)
203{
204 if (!(re->lo & 1))
205 return 0;
206
207 return re->lo & VTD_PAGE_MASK;
208}
209
210
211
212
213
214static phys_addr_t root_entry_uctp(struct root_entry *re)
215{
216 if (!(re->hi & 1))
217 return 0;
218
219 return re->hi & VTD_PAGE_MASK;
220}
221
222
223
224
225
226
227
228
229
230
231
232struct context_entry {
233 u64 lo;
234 u64 hi;
235};
236
237static inline void context_clear_pasid_enable(struct context_entry *context)
238{
239 context->lo &= ~(1ULL << 11);
240}
241
242static inline bool context_pasid_enabled(struct context_entry *context)
243{
244 return !!(context->lo & (1ULL << 11));
245}
246
247static inline void context_set_copied(struct context_entry *context)
248{
249 context->hi |= (1ull << 3);
250}
251
252static inline bool context_copied(struct context_entry *context)
253{
254 return !!(context->hi & (1ULL << 3));
255}
256
257static inline bool __context_present(struct context_entry *context)
258{
259 return (context->lo & 1);
260}
261
262static inline bool context_present(struct context_entry *context)
263{
264 return context_pasid_enabled(context) ?
265 __context_present(context) :
266 __context_present(context) && !context_copied(context);
267}
268
269static inline void context_set_present(struct context_entry *context)
270{
271 context->lo |= 1;
272}
273
274static inline void context_set_fault_enable(struct context_entry *context)
275{
276 context->lo &= (((u64)-1) << 2) | 1;
277}
278
279static inline void context_set_translation_type(struct context_entry *context,
280 unsigned long value)
281{
282 context->lo &= (((u64)-1) << 4) | 3;
283 context->lo |= (value & 3) << 2;
284}
285
286static inline void context_set_address_root(struct context_entry *context,
287 unsigned long value)
288{
289 context->lo &= ~VTD_PAGE_MASK;
290 context->lo |= value & VTD_PAGE_MASK;
291}
292
293static inline void context_set_address_width(struct context_entry *context,
294 unsigned long value)
295{
296 context->hi |= value & 7;
297}
298
299static inline void context_set_domain_id(struct context_entry *context,
300 unsigned long value)
301{
302 context->hi |= (value & ((1 << 16) - 1)) << 8;
303}
304
305static inline int context_domain_id(struct context_entry *c)
306{
307 return((c->hi >> 8) & 0xffff);
308}
309
310static inline void context_clear_entry(struct context_entry *context)
311{
312 context->lo = 0;
313 context->hi = 0;
314}
315
316
317
318
319
320
321
322
323
324
325struct dma_pte {
326 u64 val;
327};
328
329static inline void dma_clear_pte(struct dma_pte *pte)
330{
331 pte->val = 0;
332}
333
334static inline u64 dma_pte_addr(struct dma_pte *pte)
335{
336#ifdef CONFIG_64BIT
337 return pte->val & VTD_PAGE_MASK;
338#else
339
340 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
341#endif
342}
343
344static inline bool dma_pte_present(struct dma_pte *pte)
345{
346 return (pte->val & 3) != 0;
347}
348
349static inline bool dma_pte_superpage(struct dma_pte *pte)
350{
351 return (pte->val & DMA_PTE_LARGE_PAGE);
352}
353
354static inline int first_pte_in_page(struct dma_pte *pte)
355{
356 return !((unsigned long)pte & ~VTD_PAGE_MASK);
357}
358
359
360
361
362
363
364
365static struct dmar_domain *si_domain;
366static int hw_pass_through = 1;
367
368
369
370
371
372#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
373
374
375#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
376
377#define for_each_domain_iommu(idx, domain) \
378 for (idx = 0; idx < g_num_of_iommus; idx++) \
379 if (domain->iommu_refcnt[idx])
380
381struct dmar_domain {
382 int nid;
383
384 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
385
386
387
388 u16 iommu_did[DMAR_UNITS_SUPPORTED];
389
390
391
392
393 struct list_head devices;
394 struct iova_domain iovad;
395
396 struct dma_pte *pgd;
397 int gaw;
398
399
400 int agaw;
401
402 int flags;
403
404 int iommu_coherency;
405 int iommu_snooping;
406 int iommu_count;
407 int iommu_superpage;
408
409
410 u64 max_addr;
411
412 struct iommu_domain domain;
413
414};
415
416
417struct device_domain_info {
418 struct list_head link;
419 struct list_head global;
420 u8 bus;
421 u8 devfn;
422 u8 pasid_supported:3;
423 u8 pasid_enabled:1;
424 u8 pri_supported:1;
425 u8 pri_enabled:1;
426 u8 ats_supported:1;
427 u8 ats_enabled:1;
428 u8 ats_qdep;
429 struct device *dev;
430 struct intel_iommu *iommu;
431 struct dmar_domain *domain;
432};
433
434struct dmar_rmrr_unit {
435 struct list_head list;
436 struct acpi_dmar_header *hdr;
437 u64 base_address;
438 u64 end_address;
439 struct dmar_dev_scope *devices;
440 int devices_cnt;
441};
442
443struct dmar_atsr_unit {
444 struct list_head list;
445 struct acpi_dmar_header *hdr;
446 struct dmar_dev_scope *devices;
447 int devices_cnt;
448 u8 include_all:1;
449};
450
451static LIST_HEAD(dmar_atsr_units);
452static LIST_HEAD(dmar_rmrr_units);
453
454#define for_each_rmrr_units(rmrr) \
455 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
456
457static void flush_unmaps_timeout(unsigned long data);
458
459static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
460
461#define HIGH_WATER_MARK 250
462struct deferred_flush_tables {
463 int next;
464 struct iova *iova[HIGH_WATER_MARK];
465 struct dmar_domain *domain[HIGH_WATER_MARK];
466 struct page *freelist[HIGH_WATER_MARK];
467};
468
469static struct deferred_flush_tables *deferred_flush;
470
471
472static int g_num_of_iommus;
473
474static DEFINE_SPINLOCK(async_umap_flush_lock);
475static LIST_HEAD(unmaps_to_do);
476
477static int timer_on;
478static long list_size;
479
480static void domain_exit(struct dmar_domain *domain);
481static void domain_remove_dev_info(struct dmar_domain *domain);
482static void dmar_remove_one_dev_info(struct dmar_domain *domain,
483 struct device *dev);
484static void __dmar_remove_one_dev_info(struct device_domain_info *info);
485static void domain_context_clear(struct intel_iommu *iommu,
486 struct device *dev);
487static int domain_detach_iommu(struct dmar_domain *domain,
488 struct intel_iommu *iommu);
489
490#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
491int dmar_disabled = 0;
492#else
493int dmar_disabled = 1;
494#endif
495
496int intel_iommu_enabled = 0;
497EXPORT_SYMBOL_GPL(intel_iommu_enabled);
498
499static int dmar_map_gfx = 1;
500static int dmar_forcedac;
501static int intel_iommu_strict;
502static int intel_iommu_superpage = 1;
503static int intel_iommu_ecs = 1;
504static int intel_iommu_pasid28;
505static int iommu_identity_mapping;
506
507#define IDENTMAP_ALL 1
508#define IDENTMAP_GFX 2
509#define IDENTMAP_AZALIA 4
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
530 (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap)))
531
532
533#define pasid_enabled(iommu) (ecs_enabled(iommu) && \
534 (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap)))
535
536int intel_iommu_gfx_mapped;
537EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
538
539#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
540static DEFINE_SPINLOCK(device_domain_lock);
541static LIST_HEAD(device_domain_list);
542
543static const struct iommu_ops intel_iommu_ops;
544
545static bool translation_pre_enabled(struct intel_iommu *iommu)
546{
547 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
548}
549
550static void clear_translation_pre_enabled(struct intel_iommu *iommu)
551{
552 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
553}
554
555static void init_translation_status(struct intel_iommu *iommu)
556{
557 u32 gsts;
558
559 gsts = readl(iommu->reg + DMAR_GSTS_REG);
560 if (gsts & DMA_GSTS_TES)
561 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
562}
563
564
565static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
566{
567 return container_of(dom, struct dmar_domain, domain);
568}
569
570static int __init intel_iommu_setup(char *str)
571{
572 if (!str)
573 return -EINVAL;
574 while (*str) {
575 if (!strncmp(str, "on", 2)) {
576 dmar_disabled = 0;
577 pr_info("IOMMU enabled\n");
578 } else if (!strncmp(str, "off", 3)) {
579 dmar_disabled = 1;
580 pr_info("IOMMU disabled\n");
581 } else if (!strncmp(str, "igfx_off", 8)) {
582 dmar_map_gfx = 0;
583 pr_info("Disable GFX device mapping\n");
584 } else if (!strncmp(str, "forcedac", 8)) {
585 pr_info("Forcing DAC for PCI devices\n");
586 dmar_forcedac = 1;
587 } else if (!strncmp(str, "strict", 6)) {
588 pr_info("Disable batched IOTLB flush\n");
589 intel_iommu_strict = 1;
590 } else if (!strncmp(str, "sp_off", 6)) {
591 pr_info("Disable supported super page\n");
592 intel_iommu_superpage = 0;
593 } else if (!strncmp(str, "ecs_off", 7)) {
594 printk(KERN_INFO
595 "Intel-IOMMU: disable extended context table support\n");
596 intel_iommu_ecs = 0;
597 } else if (!strncmp(str, "pasid28", 7)) {
598 printk(KERN_INFO
599 "Intel-IOMMU: enable pre-production PASID support\n");
600 intel_iommu_pasid28 = 1;
601 iommu_identity_mapping |= IDENTMAP_GFX;
602 }
603
604 str += strcspn(str, ",");
605 while (*str == ',')
606 str++;
607 }
608 return 0;
609}
610__setup("intel_iommu=", intel_iommu_setup);
611
612static struct kmem_cache *iommu_domain_cache;
613static struct kmem_cache *iommu_devinfo_cache;
614
615static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
616{
617 struct dmar_domain **domains;
618 int idx = did >> 8;
619
620 domains = iommu->domains[idx];
621 if (!domains)
622 return NULL;
623
624 return domains[did & 0xff];
625}
626
627static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
628 struct dmar_domain *domain)
629{
630 struct dmar_domain **domains;
631 int idx = did >> 8;
632
633 if (!iommu->domains[idx]) {
634 size_t size = 256 * sizeof(struct dmar_domain *);
635 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
636 }
637
638 domains = iommu->domains[idx];
639 if (WARN_ON(!domains))
640 return;
641 else
642 domains[did & 0xff] = domain;
643}
644
645static inline void *alloc_pgtable_page(int node)
646{
647 struct page *page;
648 void *vaddr = NULL;
649
650 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
651 if (page)
652 vaddr = page_address(page);
653 return vaddr;
654}
655
656static inline void free_pgtable_page(void *vaddr)
657{
658 free_page((unsigned long)vaddr);
659}
660
661static inline void *alloc_domain_mem(void)
662{
663 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
664}
665
666static void free_domain_mem(void *vaddr)
667{
668 kmem_cache_free(iommu_domain_cache, vaddr);
669}
670
671static inline void * alloc_devinfo_mem(void)
672{
673 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
674}
675
676static inline void free_devinfo_mem(void *vaddr)
677{
678 kmem_cache_free(iommu_devinfo_cache, vaddr);
679}
680
681static inline int domain_type_is_vm(struct dmar_domain *domain)
682{
683 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
684}
685
686static inline int domain_type_is_si(struct dmar_domain *domain)
687{
688 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
689}
690
691static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
692{
693 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
694 DOMAIN_FLAG_STATIC_IDENTITY);
695}
696
697static inline int domain_pfn_supported(struct dmar_domain *domain,
698 unsigned long pfn)
699{
700 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
701
702 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
703}
704
705static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
706{
707 unsigned long sagaw;
708 int agaw = -1;
709
710 sagaw = cap_sagaw(iommu->cap);
711 for (agaw = width_to_agaw(max_gaw);
712 agaw >= 0; agaw--) {
713 if (test_bit(agaw, &sagaw))
714 break;
715 }
716
717 return agaw;
718}
719
720
721
722
723int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
724{
725 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
726}
727
728
729
730
731
732
733int iommu_calculate_agaw(struct intel_iommu *iommu)
734{
735 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
736}
737
738
739static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
740{
741 int iommu_id;
742
743
744 BUG_ON(domain_type_is_vm_or_si(domain));
745 for_each_domain_iommu(iommu_id, domain)
746 break;
747
748 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
749 return NULL;
750
751 return g_iommus[iommu_id];
752}
753
754static void domain_update_iommu_coherency(struct dmar_domain *domain)
755{
756 struct dmar_drhd_unit *drhd;
757 struct intel_iommu *iommu;
758 bool found = false;
759 int i;
760
761 domain->iommu_coherency = 1;
762
763 for_each_domain_iommu(i, domain) {
764 found = true;
765 if (!ecap_coherent(g_iommus[i]->ecap)) {
766 domain->iommu_coherency = 0;
767 break;
768 }
769 }
770 if (found)
771 return;
772
773
774 rcu_read_lock();
775 for_each_active_iommu(iommu, drhd) {
776 if (!ecap_coherent(iommu->ecap)) {
777 domain->iommu_coherency = 0;
778 break;
779 }
780 }
781 rcu_read_unlock();
782}
783
784static int domain_update_iommu_snooping(struct intel_iommu *skip)
785{
786 struct dmar_drhd_unit *drhd;
787 struct intel_iommu *iommu;
788 int ret = 1;
789
790 rcu_read_lock();
791 for_each_active_iommu(iommu, drhd) {
792 if (iommu != skip) {
793 if (!ecap_sc_support(iommu->ecap)) {
794 ret = 0;
795 break;
796 }
797 }
798 }
799 rcu_read_unlock();
800
801 return ret;
802}
803
804static int domain_update_iommu_superpage(struct intel_iommu *skip)
805{
806 struct dmar_drhd_unit *drhd;
807 struct intel_iommu *iommu;
808 int mask = 0xf;
809
810 if (!intel_iommu_superpage) {
811 return 0;
812 }
813
814
815 rcu_read_lock();
816 for_each_active_iommu(iommu, drhd) {
817 if (iommu != skip) {
818 mask &= cap_super_page_val(iommu->cap);
819 if (!mask)
820 break;
821 }
822 }
823 rcu_read_unlock();
824
825 return fls(mask);
826}
827
828
829static void domain_update_iommu_cap(struct dmar_domain *domain)
830{
831 domain_update_iommu_coherency(domain);
832 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
833 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
834}
835
836static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
837 u8 bus, u8 devfn, int alloc)
838{
839 struct root_entry *root = &iommu->root_entry[bus];
840 struct context_entry *context;
841 u64 *entry;
842
843 entry = &root->lo;
844 if (ecs_enabled(iommu)) {
845 if (devfn >= 0x80) {
846 devfn -= 0x80;
847 entry = &root->hi;
848 }
849 devfn *= 2;
850 }
851 if (*entry & 1)
852 context = phys_to_virt(*entry & VTD_PAGE_MASK);
853 else {
854 unsigned long phy_addr;
855 if (!alloc)
856 return NULL;
857
858 context = alloc_pgtable_page(iommu->node);
859 if (!context)
860 return NULL;
861
862 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
863 phy_addr = virt_to_phys((void *)context);
864 *entry = phy_addr | 1;
865 __iommu_flush_cache(iommu, entry, sizeof(*entry));
866 }
867 return &context[devfn];
868}
869
870static int iommu_dummy(struct device *dev)
871{
872 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
873}
874
875static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
876{
877 struct dmar_drhd_unit *drhd = NULL;
878 struct intel_iommu *iommu;
879 struct device *tmp;
880 struct pci_dev *ptmp, *pdev = NULL;
881 u16 segment = 0;
882 int i;
883
884 if (iommu_dummy(dev))
885 return NULL;
886
887 if (dev_is_pci(dev)) {
888 pdev = to_pci_dev(dev);
889 segment = pci_domain_nr(pdev->bus);
890 } else if (has_acpi_companion(dev))
891 dev = &ACPI_COMPANION(dev)->dev;
892
893 rcu_read_lock();
894 for_each_active_iommu(iommu, drhd) {
895 if (pdev && segment != drhd->segment)
896 continue;
897
898 for_each_active_dev_scope(drhd->devices,
899 drhd->devices_cnt, i, tmp) {
900 if (tmp == dev) {
901 *bus = drhd->devices[i].bus;
902 *devfn = drhd->devices[i].devfn;
903 goto out;
904 }
905
906 if (!pdev || !dev_is_pci(tmp))
907 continue;
908
909 ptmp = to_pci_dev(tmp);
910 if (ptmp->subordinate &&
911 ptmp->subordinate->number <= pdev->bus->number &&
912 ptmp->subordinate->busn_res.end >= pdev->bus->number)
913 goto got_pdev;
914 }
915
916 if (pdev && drhd->include_all) {
917 got_pdev:
918 *bus = pdev->bus->number;
919 *devfn = pdev->devfn;
920 goto out;
921 }
922 }
923 iommu = NULL;
924 out:
925 rcu_read_unlock();
926
927 return iommu;
928}
929
930static void domain_flush_cache(struct dmar_domain *domain,
931 void *addr, int size)
932{
933 if (!domain->iommu_coherency)
934 clflush_cache_range(addr, size);
935}
936
937static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
938{
939 struct context_entry *context;
940 int ret = 0;
941 unsigned long flags;
942
943 spin_lock_irqsave(&iommu->lock, flags);
944 context = iommu_context_addr(iommu, bus, devfn, 0);
945 if (context)
946 ret = context_present(context);
947 spin_unlock_irqrestore(&iommu->lock, flags);
948 return ret;
949}
950
951static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
952{
953 struct context_entry *context;
954 unsigned long flags;
955
956 spin_lock_irqsave(&iommu->lock, flags);
957 context = iommu_context_addr(iommu, bus, devfn, 0);
958 if (context) {
959 context_clear_entry(context);
960 __iommu_flush_cache(iommu, context, sizeof(*context));
961 }
962 spin_unlock_irqrestore(&iommu->lock, flags);
963}
964
965static void free_context_table(struct intel_iommu *iommu)
966{
967 int i;
968 unsigned long flags;
969 struct context_entry *context;
970
971 spin_lock_irqsave(&iommu->lock, flags);
972 if (!iommu->root_entry) {
973 goto out;
974 }
975 for (i = 0; i < ROOT_ENTRY_NR; i++) {
976 context = iommu_context_addr(iommu, i, 0, 0);
977 if (context)
978 free_pgtable_page(context);
979
980 if (!ecs_enabled(iommu))
981 continue;
982
983 context = iommu_context_addr(iommu, i, 0x80, 0);
984 if (context)
985 free_pgtable_page(context);
986
987 }
988 free_pgtable_page(iommu->root_entry);
989 iommu->root_entry = NULL;
990out:
991 spin_unlock_irqrestore(&iommu->lock, flags);
992}
993
994static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
995 unsigned long pfn, int *target_level)
996{
997 struct dma_pte *parent, *pte = NULL;
998 int level = agaw_to_level(domain->agaw);
999 int offset;
1000
1001 BUG_ON(!domain->pgd);
1002
1003 if (!domain_pfn_supported(domain, pfn))
1004
1005 return NULL;
1006
1007 parent = domain->pgd;
1008
1009 while (1) {
1010 void *tmp_page;
1011
1012 offset = pfn_level_offset(pfn, level);
1013 pte = &parent[offset];
1014 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
1015 break;
1016 if (level == *target_level)
1017 break;
1018
1019 if (!dma_pte_present(pte)) {
1020 uint64_t pteval;
1021
1022 tmp_page = alloc_pgtable_page(domain->nid);
1023
1024 if (!tmp_page)
1025 return NULL;
1026
1027 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
1028 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
1029 if (cmpxchg64(&pte->val, 0ULL, pteval))
1030
1031 free_pgtable_page(tmp_page);
1032 else
1033 domain_flush_cache(domain, pte, sizeof(*pte));
1034 }
1035 if (level == 1)
1036 break;
1037
1038 parent = phys_to_virt(dma_pte_addr(pte));
1039 level--;
1040 }
1041
1042 if (!*target_level)
1043 *target_level = level;
1044
1045 return pte;
1046}
1047
1048
1049
1050static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1051 unsigned long pfn,
1052 int level, int *large_page)
1053{
1054 struct dma_pte *parent, *pte = NULL;
1055 int total = agaw_to_level(domain->agaw);
1056 int offset;
1057
1058 parent = domain->pgd;
1059 while (level <= total) {
1060 offset = pfn_level_offset(pfn, total);
1061 pte = &parent[offset];
1062 if (level == total)
1063 return pte;
1064
1065 if (!dma_pte_present(pte)) {
1066 *large_page = total;
1067 break;
1068 }
1069
1070 if (dma_pte_superpage(pte)) {
1071 *large_page = total;
1072 return pte;
1073 }
1074
1075 parent = phys_to_virt(dma_pte_addr(pte));
1076 total--;
1077 }
1078 return NULL;
1079}
1080
1081
1082static void dma_pte_clear_range(struct dmar_domain *domain,
1083 unsigned long start_pfn,
1084 unsigned long last_pfn)
1085{
1086 unsigned int large_page = 1;
1087 struct dma_pte *first_pte, *pte;
1088
1089 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1090 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1091 BUG_ON(start_pfn > last_pfn);
1092
1093
1094 do {
1095 large_page = 1;
1096 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1097 if (!pte) {
1098 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1099 continue;
1100 }
1101 do {
1102 dma_clear_pte(pte);
1103 start_pfn += lvl_to_nr_pages(large_page);
1104 pte++;
1105 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1106
1107 domain_flush_cache(domain, first_pte,
1108 (void *)pte - (void *)first_pte);
1109
1110 } while (start_pfn && start_pfn <= last_pfn);
1111}
1112
1113static void dma_pte_free_level(struct dmar_domain *domain, int level,
1114 struct dma_pte *pte, unsigned long pfn,
1115 unsigned long start_pfn, unsigned long last_pfn)
1116{
1117 pfn = max(start_pfn, pfn);
1118 pte = &pte[pfn_level_offset(pfn, level)];
1119
1120 do {
1121 unsigned long level_pfn;
1122 struct dma_pte *level_pte;
1123
1124 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1125 goto next;
1126
1127 level_pfn = pfn & level_mask(level - 1);
1128 level_pte = phys_to_virt(dma_pte_addr(pte));
1129
1130 if (level > 2)
1131 dma_pte_free_level(domain, level - 1, level_pte,
1132 level_pfn, start_pfn, last_pfn);
1133
1134
1135 if (!(start_pfn > level_pfn ||
1136 last_pfn < level_pfn + level_size(level) - 1)) {
1137 dma_clear_pte(pte);
1138 domain_flush_cache(domain, pte, sizeof(*pte));
1139 free_pgtable_page(level_pte);
1140 }
1141next:
1142 pfn += level_size(level);
1143 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1144}
1145
1146
1147static void dma_pte_free_pagetable(struct dmar_domain *domain,
1148 unsigned long start_pfn,
1149 unsigned long last_pfn)
1150{
1151 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1152 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1153 BUG_ON(start_pfn > last_pfn);
1154
1155 dma_pte_clear_range(domain, start_pfn, last_pfn);
1156
1157
1158 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1159 domain->pgd, 0, start_pfn, last_pfn);
1160
1161
1162 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1163 free_pgtable_page(domain->pgd);
1164 domain->pgd = NULL;
1165 }
1166}
1167
1168
1169
1170
1171
1172
1173
1174static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1175 int level, struct dma_pte *pte,
1176 struct page *freelist)
1177{
1178 struct page *pg;
1179
1180 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1181 pg->freelist = freelist;
1182 freelist = pg;
1183
1184 if (level == 1)
1185 return freelist;
1186
1187 pte = page_address(pg);
1188 do {
1189 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1190 freelist = dma_pte_list_pagetables(domain, level - 1,
1191 pte, freelist);
1192 pte++;
1193 } while (!first_pte_in_page(pte));
1194
1195 return freelist;
1196}
1197
1198static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1199 struct dma_pte *pte, unsigned long pfn,
1200 unsigned long start_pfn,
1201 unsigned long last_pfn,
1202 struct page *freelist)
1203{
1204 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1205
1206 pfn = max(start_pfn, pfn);
1207 pte = &pte[pfn_level_offset(pfn, level)];
1208
1209 do {
1210 unsigned long level_pfn;
1211
1212 if (!dma_pte_present(pte))
1213 goto next;
1214
1215 level_pfn = pfn & level_mask(level);
1216
1217
1218 if (start_pfn <= level_pfn &&
1219 last_pfn >= level_pfn + level_size(level) - 1) {
1220
1221
1222 if (level > 1 && !dma_pte_superpage(pte))
1223 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1224
1225 dma_clear_pte(pte);
1226 if (!first_pte)
1227 first_pte = pte;
1228 last_pte = pte;
1229 } else if (level > 1) {
1230
1231 freelist = dma_pte_clear_level(domain, level - 1,
1232 phys_to_virt(dma_pte_addr(pte)),
1233 level_pfn, start_pfn, last_pfn,
1234 freelist);
1235 }
1236next:
1237 pfn += level_size(level);
1238 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1239
1240 if (first_pte)
1241 domain_flush_cache(domain, first_pte,
1242 (void *)++last_pte - (void *)first_pte);
1243
1244 return freelist;
1245}
1246
1247
1248
1249
1250static struct page *domain_unmap(struct dmar_domain *domain,
1251 unsigned long start_pfn,
1252 unsigned long last_pfn)
1253{
1254 struct page *freelist = NULL;
1255
1256 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1257 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1258 BUG_ON(start_pfn > last_pfn);
1259
1260
1261 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1262 domain->pgd, 0, start_pfn, last_pfn, NULL);
1263
1264
1265 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1266 struct page *pgd_page = virt_to_page(domain->pgd);
1267 pgd_page->freelist = freelist;
1268 freelist = pgd_page;
1269
1270 domain->pgd = NULL;
1271 }
1272
1273 return freelist;
1274}
1275
1276static void dma_free_pagelist(struct page *freelist)
1277{
1278 struct page *pg;
1279
1280 while ((pg = freelist)) {
1281 freelist = pg->freelist;
1282 free_pgtable_page(page_address(pg));
1283 }
1284}
1285
1286
1287static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1288{
1289 struct root_entry *root;
1290 unsigned long flags;
1291
1292 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1293 if (!root) {
1294 pr_err("Allocating root entry for %s failed\n",
1295 iommu->name);
1296 return -ENOMEM;
1297 }
1298
1299 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1300
1301 spin_lock_irqsave(&iommu->lock, flags);
1302 iommu->root_entry = root;
1303 spin_unlock_irqrestore(&iommu->lock, flags);
1304
1305 return 0;
1306}
1307
1308static void iommu_set_root_entry(struct intel_iommu *iommu)
1309{
1310 u64 addr;
1311 u32 sts;
1312 unsigned long flag;
1313
1314 addr = virt_to_phys(iommu->root_entry);
1315 if (ecs_enabled(iommu))
1316 addr |= DMA_RTADDR_RTT;
1317
1318 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1319 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1320
1321 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1322
1323
1324 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1325 readl, (sts & DMA_GSTS_RTPS), sts);
1326
1327 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1328}
1329
1330static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1331{
1332 u32 val;
1333 unsigned long flag;
1334
1335 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1336 return;
1337
1338 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1339 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1340
1341
1342 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1343 readl, (!(val & DMA_GSTS_WBFS)), val);
1344
1345 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1346}
1347
1348
1349static void __iommu_flush_context(struct intel_iommu *iommu,
1350 u16 did, u16 source_id, u8 function_mask,
1351 u64 type)
1352{
1353 u64 val = 0;
1354 unsigned long flag;
1355
1356 switch (type) {
1357 case DMA_CCMD_GLOBAL_INVL:
1358 val = DMA_CCMD_GLOBAL_INVL;
1359 break;
1360 case DMA_CCMD_DOMAIN_INVL:
1361 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1362 break;
1363 case DMA_CCMD_DEVICE_INVL:
1364 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1365 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1366 break;
1367 default:
1368 BUG();
1369 }
1370 val |= DMA_CCMD_ICC;
1371
1372 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1373 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1374
1375
1376 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1377 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1378
1379 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1380}
1381
1382
1383static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1384 u64 addr, unsigned int size_order, u64 type)
1385{
1386 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1387 u64 val = 0, val_iva = 0;
1388 unsigned long flag;
1389
1390 switch (type) {
1391 case DMA_TLB_GLOBAL_FLUSH:
1392
1393 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1394 break;
1395 case DMA_TLB_DSI_FLUSH:
1396 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1397 break;
1398 case DMA_TLB_PSI_FLUSH:
1399 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1400
1401 val_iva = size_order | addr;
1402 break;
1403 default:
1404 BUG();
1405 }
1406
1407#if 0
1408
1409
1410
1411
1412 if (cap_read_drain(iommu->cap))
1413 val |= DMA_TLB_READ_DRAIN;
1414#endif
1415 if (cap_write_drain(iommu->cap))
1416 val |= DMA_TLB_WRITE_DRAIN;
1417
1418 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1419
1420 if (val_iva)
1421 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1422 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1423
1424
1425 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1426 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1427
1428 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1429
1430
1431 if (DMA_TLB_IAIG(val) == 0)
1432 pr_err("Flush IOTLB failed\n");
1433 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1434 pr_debug("TLB flush request %Lx, actual %Lx\n",
1435 (unsigned long long)DMA_TLB_IIRG(type),
1436 (unsigned long long)DMA_TLB_IAIG(val));
1437}
1438
1439static struct device_domain_info *
1440iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1441 u8 bus, u8 devfn)
1442{
1443 struct device_domain_info *info;
1444
1445 assert_spin_locked(&device_domain_lock);
1446
1447 if (!iommu->qi)
1448 return NULL;
1449
1450 list_for_each_entry(info, &domain->devices, link)
1451 if (info->iommu == iommu && info->bus == bus &&
1452 info->devfn == devfn) {
1453 if (info->ats_supported && info->dev)
1454 return info;
1455 break;
1456 }
1457
1458 return NULL;
1459}
1460
1461static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1462{
1463 struct pci_dev *pdev;
1464
1465 if (!info || !dev_is_pci(info->dev))
1466 return;
1467
1468 pdev = to_pci_dev(info->dev);
1469
1470#ifdef CONFIG_INTEL_IOMMU_SVM
1471
1472
1473
1474
1475
1476 if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1477 info->pasid_enabled = 1;
1478
1479 if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1480 info->pri_enabled = 1;
1481#endif
1482 if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1483 info->ats_enabled = 1;
1484 info->ats_qdep = pci_ats_queue_depth(pdev);
1485 }
1486}
1487
1488static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1489{
1490 struct pci_dev *pdev;
1491
1492 if (!dev_is_pci(info->dev))
1493 return;
1494
1495 pdev = to_pci_dev(info->dev);
1496
1497 if (info->ats_enabled) {
1498 pci_disable_ats(pdev);
1499 info->ats_enabled = 0;
1500 }
1501#ifdef CONFIG_INTEL_IOMMU_SVM
1502 if (info->pri_enabled) {
1503 pci_disable_pri(pdev);
1504 info->pri_enabled = 0;
1505 }
1506 if (info->pasid_enabled) {
1507 pci_disable_pasid(pdev);
1508 info->pasid_enabled = 0;
1509 }
1510#endif
1511}
1512
1513static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1514 u64 addr, unsigned mask)
1515{
1516 u16 sid, qdep;
1517 unsigned long flags;
1518 struct device_domain_info *info;
1519
1520 spin_lock_irqsave(&device_domain_lock, flags);
1521 list_for_each_entry(info, &domain->devices, link) {
1522 if (!info->ats_enabled)
1523 continue;
1524
1525 sid = info->bus << 8 | info->devfn;
1526 qdep = info->ats_qdep;
1527 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1528 }
1529 spin_unlock_irqrestore(&device_domain_lock, flags);
1530}
1531
1532static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1533 struct dmar_domain *domain,
1534 unsigned long pfn, unsigned int pages,
1535 int ih, int map)
1536{
1537 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1538 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1539 u16 did = domain->iommu_did[iommu->seq_id];
1540
1541 BUG_ON(pages == 0);
1542
1543 if (ih)
1544 ih = 1 << 6;
1545
1546
1547
1548
1549
1550
1551 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1552 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1553 DMA_TLB_DSI_FLUSH);
1554 else
1555 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1556 DMA_TLB_PSI_FLUSH);
1557
1558
1559
1560
1561
1562 if (!cap_caching_mode(iommu->cap) || !map)
1563 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1564 addr, mask);
1565}
1566
1567static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1568{
1569 u32 pmen;
1570 unsigned long flags;
1571
1572 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1573 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1574 pmen &= ~DMA_PMEN_EPM;
1575 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1576
1577
1578 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1579 readl, !(pmen & DMA_PMEN_PRS), pmen);
1580
1581 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1582}
1583
1584static void iommu_enable_translation(struct intel_iommu *iommu)
1585{
1586 u32 sts;
1587 unsigned long flags;
1588
1589 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1590 iommu->gcmd |= DMA_GCMD_TE;
1591 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1592
1593
1594 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1595 readl, (sts & DMA_GSTS_TES), sts);
1596
1597 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1598}
1599
1600static void iommu_disable_translation(struct intel_iommu *iommu)
1601{
1602 u32 sts;
1603 unsigned long flag;
1604
1605 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1606 iommu->gcmd &= ~DMA_GCMD_TE;
1607 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1608
1609
1610 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1611 readl, (!(sts & DMA_GSTS_TES)), sts);
1612
1613 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1614}
1615
1616
1617static int iommu_init_domains(struct intel_iommu *iommu)
1618{
1619 u32 ndomains, nlongs;
1620 size_t size;
1621
1622 ndomains = cap_ndoms(iommu->cap);
1623 pr_debug("%s: Number of Domains supported <%d>\n",
1624 iommu->name, ndomains);
1625 nlongs = BITS_TO_LONGS(ndomains);
1626
1627 spin_lock_init(&iommu->lock);
1628
1629 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1630 if (!iommu->domain_ids) {
1631 pr_err("%s: Allocating domain id array failed\n",
1632 iommu->name);
1633 return -ENOMEM;
1634 }
1635
1636 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1637 iommu->domains = kzalloc(size, GFP_KERNEL);
1638
1639 if (iommu->domains) {
1640 size = 256 * sizeof(struct dmar_domain *);
1641 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1642 }
1643
1644 if (!iommu->domains || !iommu->domains[0]) {
1645 pr_err("%s: Allocating domain array failed\n",
1646 iommu->name);
1647 kfree(iommu->domain_ids);
1648 kfree(iommu->domains);
1649 iommu->domain_ids = NULL;
1650 iommu->domains = NULL;
1651 return -ENOMEM;
1652 }
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662 set_bit(0, iommu->domain_ids);
1663
1664 return 0;
1665}
1666
1667static void disable_dmar_iommu(struct intel_iommu *iommu)
1668{
1669 struct device_domain_info *info, *tmp;
1670 unsigned long flags;
1671
1672 if (!iommu->domains || !iommu->domain_ids)
1673 return;
1674
1675 spin_lock_irqsave(&device_domain_lock, flags);
1676 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1677 struct dmar_domain *domain;
1678
1679 if (info->iommu != iommu)
1680 continue;
1681
1682 if (!info->dev || !info->domain)
1683 continue;
1684
1685 domain = info->domain;
1686
1687 dmar_remove_one_dev_info(domain, info->dev);
1688
1689 if (!domain_type_is_vm_or_si(domain))
1690 domain_exit(domain);
1691 }
1692 spin_unlock_irqrestore(&device_domain_lock, flags);
1693
1694 if (iommu->gcmd & DMA_GCMD_TE)
1695 iommu_disable_translation(iommu);
1696}
1697
1698static void free_dmar_iommu(struct intel_iommu *iommu)
1699{
1700 if ((iommu->domains) && (iommu->domain_ids)) {
1701 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1702 int i;
1703
1704 for (i = 0; i < elems; i++)
1705 kfree(iommu->domains[i]);
1706 kfree(iommu->domains);
1707 kfree(iommu->domain_ids);
1708 iommu->domains = NULL;
1709 iommu->domain_ids = NULL;
1710 }
1711
1712 g_iommus[iommu->seq_id] = NULL;
1713
1714
1715 free_context_table(iommu);
1716
1717#ifdef CONFIG_INTEL_IOMMU_SVM
1718 if (pasid_enabled(iommu)) {
1719 if (ecap_prs(iommu->ecap))
1720 intel_svm_finish_prq(iommu);
1721 intel_svm_free_pasid_tables(iommu);
1722 }
1723#endif
1724}
1725
1726static struct dmar_domain *alloc_domain(int flags)
1727{
1728 struct dmar_domain *domain;
1729
1730 domain = alloc_domain_mem();
1731 if (!domain)
1732 return NULL;
1733
1734 memset(domain, 0, sizeof(*domain));
1735 domain->nid = -1;
1736 domain->flags = flags;
1737 INIT_LIST_HEAD(&domain->devices);
1738
1739 return domain;
1740}
1741
1742
1743static int domain_attach_iommu(struct dmar_domain *domain,
1744 struct intel_iommu *iommu)
1745{
1746 unsigned long ndomains;
1747 int num;
1748
1749 assert_spin_locked(&device_domain_lock);
1750 assert_spin_locked(&iommu->lock);
1751
1752 domain->iommu_refcnt[iommu->seq_id] += 1;
1753 domain->iommu_count += 1;
1754 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1755 ndomains = cap_ndoms(iommu->cap);
1756 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1757
1758 if (num >= ndomains) {
1759 pr_err("%s: No free domain ids\n", iommu->name);
1760 domain->iommu_refcnt[iommu->seq_id] -= 1;
1761 domain->iommu_count -= 1;
1762 return -ENOSPC;
1763 }
1764
1765 set_bit(num, iommu->domain_ids);
1766 set_iommu_domain(iommu, num, domain);
1767
1768 domain->iommu_did[iommu->seq_id] = num;
1769 domain->nid = iommu->node;
1770
1771 domain_update_iommu_cap(domain);
1772 }
1773
1774 return 0;
1775}
1776
1777static int domain_detach_iommu(struct dmar_domain *domain,
1778 struct intel_iommu *iommu)
1779{
1780 int num, count = INT_MAX;
1781
1782 assert_spin_locked(&device_domain_lock);
1783 assert_spin_locked(&iommu->lock);
1784
1785 domain->iommu_refcnt[iommu->seq_id] -= 1;
1786 count = --domain->iommu_count;
1787 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1788 num = domain->iommu_did[iommu->seq_id];
1789 clear_bit(num, iommu->domain_ids);
1790 set_iommu_domain(iommu, num, NULL);
1791
1792 domain_update_iommu_cap(domain);
1793 domain->iommu_did[iommu->seq_id] = 0;
1794 }
1795
1796 return count;
1797}
1798
1799static struct iova_domain reserved_iova_list;
1800static struct lock_class_key reserved_rbtree_key;
1801
1802static int dmar_init_reserved_ranges(void)
1803{
1804 struct pci_dev *pdev = NULL;
1805 struct iova *iova;
1806 int i;
1807
1808 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1809 DMA_32BIT_PFN);
1810
1811 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1812 &reserved_rbtree_key);
1813
1814
1815 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1816 IOVA_PFN(IOAPIC_RANGE_END));
1817 if (!iova) {
1818 pr_err("Reserve IOAPIC range failed\n");
1819 return -ENODEV;
1820 }
1821
1822
1823 for_each_pci_dev(pdev) {
1824 struct resource *r;
1825
1826 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1827 r = &pdev->resource[i];
1828 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1829 continue;
1830 iova = reserve_iova(&reserved_iova_list,
1831 IOVA_PFN(r->start),
1832 IOVA_PFN(r->end));
1833 if (!iova) {
1834 pr_err("Reserve iova failed\n");
1835 return -ENODEV;
1836 }
1837 }
1838 }
1839 return 0;
1840}
1841
1842static void domain_reserve_special_ranges(struct dmar_domain *domain)
1843{
1844 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1845}
1846
1847static inline int guestwidth_to_adjustwidth(int gaw)
1848{
1849 int agaw;
1850 int r = (gaw - 12) % 9;
1851
1852 if (r == 0)
1853 agaw = gaw;
1854 else
1855 agaw = gaw + 9 - r;
1856 if (agaw > 64)
1857 agaw = 64;
1858 return agaw;
1859}
1860
1861static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1862 int guest_width)
1863{
1864 int adjust_width, agaw;
1865 unsigned long sagaw;
1866
1867 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1868 DMA_32BIT_PFN);
1869 domain_reserve_special_ranges(domain);
1870
1871
1872 if (guest_width > cap_mgaw(iommu->cap))
1873 guest_width = cap_mgaw(iommu->cap);
1874 domain->gaw = guest_width;
1875 adjust_width = guestwidth_to_adjustwidth(guest_width);
1876 agaw = width_to_agaw(adjust_width);
1877 sagaw = cap_sagaw(iommu->cap);
1878 if (!test_bit(agaw, &sagaw)) {
1879
1880 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1881 agaw = find_next_bit(&sagaw, 5, agaw);
1882 if (agaw >= 5)
1883 return -ENODEV;
1884 }
1885 domain->agaw = agaw;
1886
1887 if (ecap_coherent(iommu->ecap))
1888 domain->iommu_coherency = 1;
1889 else
1890 domain->iommu_coherency = 0;
1891
1892 if (ecap_sc_support(iommu->ecap))
1893 domain->iommu_snooping = 1;
1894 else
1895 domain->iommu_snooping = 0;
1896
1897 if (intel_iommu_superpage)
1898 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1899 else
1900 domain->iommu_superpage = 0;
1901
1902 domain->nid = iommu->node;
1903
1904
1905 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1906 if (!domain->pgd)
1907 return -ENOMEM;
1908 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1909 return 0;
1910}
1911
1912static void domain_exit(struct dmar_domain *domain)
1913{
1914 struct page *freelist = NULL;
1915
1916
1917 if (!domain)
1918 return;
1919
1920
1921 if (!intel_iommu_strict)
1922 flush_unmaps_timeout(0);
1923
1924
1925 rcu_read_lock();
1926 domain_remove_dev_info(domain);
1927 rcu_read_unlock();
1928
1929
1930 put_iova_domain(&domain->iovad);
1931
1932 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1933
1934 dma_free_pagelist(freelist);
1935
1936 free_domain_mem(domain);
1937}
1938
1939static int domain_context_mapping_one(struct dmar_domain *domain,
1940 struct intel_iommu *iommu,
1941 u8 bus, u8 devfn)
1942{
1943 u16 did = domain->iommu_did[iommu->seq_id];
1944 int translation = CONTEXT_TT_MULTI_LEVEL;
1945 struct device_domain_info *info = NULL;
1946 struct context_entry *context;
1947 unsigned long flags;
1948 struct dma_pte *pgd;
1949 int ret, agaw;
1950
1951 WARN_ON(did == 0);
1952
1953 if (hw_pass_through && domain_type_is_si(domain))
1954 translation = CONTEXT_TT_PASS_THROUGH;
1955
1956 pr_debug("Set context mapping for %02x:%02x.%d\n",
1957 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1958
1959 BUG_ON(!domain->pgd);
1960
1961 spin_lock_irqsave(&device_domain_lock, flags);
1962 spin_lock(&iommu->lock);
1963
1964 ret = -ENOMEM;
1965 context = iommu_context_addr(iommu, bus, devfn, 1);
1966 if (!context)
1967 goto out_unlock;
1968
1969 ret = 0;
1970 if (context_present(context))
1971 goto out_unlock;
1972
1973 pgd = domain->pgd;
1974
1975 context_clear_entry(context);
1976 context_set_domain_id(context, did);
1977
1978
1979
1980
1981
1982 if (translation != CONTEXT_TT_PASS_THROUGH) {
1983 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1984 ret = -ENOMEM;
1985 pgd = phys_to_virt(dma_pte_addr(pgd));
1986 if (!dma_pte_present(pgd))
1987 goto out_unlock;
1988 }
1989
1990 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1991 if (info && info->ats_supported)
1992 translation = CONTEXT_TT_DEV_IOTLB;
1993 else
1994 translation = CONTEXT_TT_MULTI_LEVEL;
1995
1996 context_set_address_root(context, virt_to_phys(pgd));
1997 context_set_address_width(context, iommu->agaw);
1998 } else {
1999
2000
2001
2002
2003
2004 context_set_address_width(context, iommu->msagaw);
2005 }
2006
2007 context_set_translation_type(context, translation);
2008 context_set_fault_enable(context);
2009 context_set_present(context);
2010 domain_flush_cache(domain, context, sizeof(*context));
2011
2012
2013
2014
2015
2016
2017
2018 if (cap_caching_mode(iommu->cap)) {
2019 iommu->flush.flush_context(iommu, 0,
2020 (((u16)bus) << 8) | devfn,
2021 DMA_CCMD_MASK_NOBIT,
2022 DMA_CCMD_DEVICE_INVL);
2023 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2024 } else {
2025 iommu_flush_write_buffer(iommu);
2026 }
2027 iommu_enable_dev_iotlb(info);
2028
2029 ret = 0;
2030
2031out_unlock:
2032 spin_unlock(&iommu->lock);
2033 spin_unlock_irqrestore(&device_domain_lock, flags);
2034
2035 return 0;
2036}
2037
2038struct domain_context_mapping_data {
2039 struct dmar_domain *domain;
2040 struct intel_iommu *iommu;
2041};
2042
2043static int domain_context_mapping_cb(struct pci_dev *pdev,
2044 u16 alias, void *opaque)
2045{
2046 struct domain_context_mapping_data *data = opaque;
2047
2048 return domain_context_mapping_one(data->domain, data->iommu,
2049 PCI_BUS_NUM(alias), alias & 0xff);
2050}
2051
2052static int
2053domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2054{
2055 struct intel_iommu *iommu;
2056 u8 bus, devfn;
2057 struct domain_context_mapping_data data;
2058
2059 iommu = device_to_iommu(dev, &bus, &devfn);
2060 if (!iommu)
2061 return -ENODEV;
2062
2063 if (!dev_is_pci(dev))
2064 return domain_context_mapping_one(domain, iommu, bus, devfn);
2065
2066 data.domain = domain;
2067 data.iommu = iommu;
2068
2069 return pci_for_each_dma_alias(to_pci_dev(dev),
2070 &domain_context_mapping_cb, &data);
2071}
2072
2073static int domain_context_mapped_cb(struct pci_dev *pdev,
2074 u16 alias, void *opaque)
2075{
2076 struct intel_iommu *iommu = opaque;
2077
2078 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2079}
2080
2081static int domain_context_mapped(struct device *dev)
2082{
2083 struct intel_iommu *iommu;
2084 u8 bus, devfn;
2085
2086 iommu = device_to_iommu(dev, &bus, &devfn);
2087 if (!iommu)
2088 return -ENODEV;
2089
2090 if (!dev_is_pci(dev))
2091 return device_context_mapped(iommu, bus, devfn);
2092
2093 return !pci_for_each_dma_alias(to_pci_dev(dev),
2094 domain_context_mapped_cb, iommu);
2095}
2096
2097
2098static inline unsigned long aligned_nrpages(unsigned long host_addr,
2099 size_t size)
2100{
2101 host_addr &= ~PAGE_MASK;
2102 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2103}
2104
2105
2106static inline int hardware_largepage_caps(struct dmar_domain *domain,
2107 unsigned long iov_pfn,
2108 unsigned long phy_pfn,
2109 unsigned long pages)
2110{
2111 int support, level = 1;
2112 unsigned long pfnmerge;
2113
2114 support = domain->iommu_superpage;
2115
2116
2117
2118
2119
2120 pfnmerge = iov_pfn | phy_pfn;
2121
2122 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2123 pages >>= VTD_STRIDE_SHIFT;
2124 if (!pages)
2125 break;
2126 pfnmerge >>= VTD_STRIDE_SHIFT;
2127 level++;
2128 support--;
2129 }
2130 return level;
2131}
2132
2133static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2134 struct scatterlist *sg, unsigned long phys_pfn,
2135 unsigned long nr_pages, int prot)
2136{
2137 struct dma_pte *first_pte = NULL, *pte = NULL;
2138 phys_addr_t uninitialized_var(pteval);
2139 unsigned long sg_res = 0;
2140 unsigned int largepage_lvl = 0;
2141 unsigned long lvl_pages = 0;
2142
2143 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2144
2145 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2146 return -EINVAL;
2147
2148 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2149
2150 if (!sg) {
2151 sg_res = nr_pages;
2152 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2153 }
2154
2155 while (nr_pages > 0) {
2156 uint64_t tmp;
2157
2158 if (!sg_res) {
2159 sg_res = aligned_nrpages(sg->offset, sg->length);
2160 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2161 sg->dma_length = sg->length;
2162 pteval = page_to_phys(sg_page(sg)) | prot;
2163 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2164 }
2165
2166 if (!pte) {
2167 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2168
2169 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2170 if (!pte)
2171 return -ENOMEM;
2172
2173 if (largepage_lvl > 1) {
2174 unsigned long nr_superpages, end_pfn;
2175
2176 pteval |= DMA_PTE_LARGE_PAGE;
2177 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2178
2179 nr_superpages = sg_res / lvl_pages;
2180 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2181
2182
2183
2184
2185
2186 dma_pte_free_pagetable(domain, iov_pfn, end_pfn);
2187 } else {
2188 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2189 }
2190
2191 }
2192
2193
2194
2195 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2196 if (tmp) {
2197 static int dumps = 5;
2198 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2199 iov_pfn, tmp, (unsigned long long)pteval);
2200 if (dumps) {
2201 dumps--;
2202 debug_dma_dump_mappings(NULL);
2203 }
2204 WARN_ON(1);
2205 }
2206
2207 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2208
2209 BUG_ON(nr_pages < lvl_pages);
2210 BUG_ON(sg_res < lvl_pages);
2211
2212 nr_pages -= lvl_pages;
2213 iov_pfn += lvl_pages;
2214 phys_pfn += lvl_pages;
2215 pteval += lvl_pages * VTD_PAGE_SIZE;
2216 sg_res -= lvl_pages;
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229 pte++;
2230 if (!nr_pages || first_pte_in_page(pte) ||
2231 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2232 domain_flush_cache(domain, first_pte,
2233 (void *)pte - (void *)first_pte);
2234 pte = NULL;
2235 }
2236
2237 if (!sg_res && nr_pages)
2238 sg = sg_next(sg);
2239 }
2240 return 0;
2241}
2242
2243static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2244 struct scatterlist *sg, unsigned long nr_pages,
2245 int prot)
2246{
2247 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2248}
2249
2250static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2251 unsigned long phys_pfn, unsigned long nr_pages,
2252 int prot)
2253{
2254 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2255}
2256
2257static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2258{
2259 if (!iommu)
2260 return;
2261
2262 clear_context_table(iommu, bus, devfn);
2263 iommu->flush.flush_context(iommu, 0, 0, 0,
2264 DMA_CCMD_GLOBAL_INVL);
2265 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2266}
2267
2268static inline void unlink_domain_info(struct device_domain_info *info)
2269{
2270 assert_spin_locked(&device_domain_lock);
2271 list_del(&info->link);
2272 list_del(&info->global);
2273 if (info->dev)
2274 info->dev->archdata.iommu = NULL;
2275}
2276
2277static void domain_remove_dev_info(struct dmar_domain *domain)
2278{
2279 struct device_domain_info *info, *tmp;
2280 unsigned long flags;
2281
2282 spin_lock_irqsave(&device_domain_lock, flags);
2283 list_for_each_entry_safe(info, tmp, &domain->devices, link)
2284 __dmar_remove_one_dev_info(info);
2285 spin_unlock_irqrestore(&device_domain_lock, flags);
2286}
2287
2288
2289
2290
2291
2292static struct dmar_domain *find_domain(struct device *dev)
2293{
2294 struct device_domain_info *info;
2295
2296
2297 info = dev->archdata.iommu;
2298 if (info)
2299 return info->domain;
2300 return NULL;
2301}
2302
2303static inline struct device_domain_info *
2304dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2305{
2306 struct device_domain_info *info;
2307
2308 list_for_each_entry(info, &device_domain_list, global)
2309 if (info->iommu->segment == segment && info->bus == bus &&
2310 info->devfn == devfn)
2311 return info;
2312
2313 return NULL;
2314}
2315
2316static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2317 int bus, int devfn,
2318 struct device *dev,
2319 struct dmar_domain *domain)
2320{
2321 struct dmar_domain *found = NULL;
2322 struct device_domain_info *info;
2323 unsigned long flags;
2324 int ret;
2325
2326 info = alloc_devinfo_mem();
2327 if (!info)
2328 return NULL;
2329
2330 info->bus = bus;
2331 info->devfn = devfn;
2332 info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2333 info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2334 info->ats_qdep = 0;
2335 info->dev = dev;
2336 info->domain = domain;
2337 info->iommu = iommu;
2338
2339 if (dev && dev_is_pci(dev)) {
2340 struct pci_dev *pdev = to_pci_dev(info->dev);
2341
2342 if (ecap_dev_iotlb_support(iommu->ecap) &&
2343 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2344 dmar_find_matched_atsr_unit(pdev))
2345 info->ats_supported = 1;
2346
2347 if (ecs_enabled(iommu)) {
2348 if (pasid_enabled(iommu)) {
2349 int features = pci_pasid_features(pdev);
2350 if (features >= 0)
2351 info->pasid_supported = features | 1;
2352 }
2353
2354 if (info->ats_supported && ecap_prs(iommu->ecap) &&
2355 pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2356 info->pri_supported = 1;
2357 }
2358 }
2359
2360 spin_lock_irqsave(&device_domain_lock, flags);
2361 if (dev)
2362 found = find_domain(dev);
2363
2364 if (!found) {
2365 struct device_domain_info *info2;
2366 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2367 if (info2) {
2368 found = info2->domain;
2369 info2->dev = dev;
2370 }
2371 }
2372
2373 if (found) {
2374 spin_unlock_irqrestore(&device_domain_lock, flags);
2375 free_devinfo_mem(info);
2376
2377 return found;
2378 }
2379
2380 spin_lock(&iommu->lock);
2381 ret = domain_attach_iommu(domain, iommu);
2382 spin_unlock(&iommu->lock);
2383
2384 if (ret) {
2385 spin_unlock_irqrestore(&device_domain_lock, flags);
2386 free_devinfo_mem(info);
2387 return NULL;
2388 }
2389
2390 list_add(&info->link, &domain->devices);
2391 list_add(&info->global, &device_domain_list);
2392 if (dev)
2393 dev->archdata.iommu = info;
2394 spin_unlock_irqrestore(&device_domain_lock, flags);
2395
2396 if (dev && domain_context_mapping(domain, dev)) {
2397 pr_err("Domain context map for %s failed\n", dev_name(dev));
2398 dmar_remove_one_dev_info(domain, dev);
2399 return NULL;
2400 }
2401
2402 return domain;
2403}
2404
2405static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2406{
2407 *(u16 *)opaque = alias;
2408 return 0;
2409}
2410
2411
2412static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2413{
2414 struct device_domain_info *info = NULL;
2415 struct dmar_domain *domain, *tmp;
2416 struct intel_iommu *iommu;
2417 u16 req_id, dma_alias;
2418 unsigned long flags;
2419 u8 bus, devfn;
2420
2421 domain = find_domain(dev);
2422 if (domain)
2423 return domain;
2424
2425 iommu = device_to_iommu(dev, &bus, &devfn);
2426 if (!iommu)
2427 return NULL;
2428
2429 req_id = ((u16)bus << 8) | devfn;
2430
2431 if (dev_is_pci(dev)) {
2432 struct pci_dev *pdev = to_pci_dev(dev);
2433
2434 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2435
2436 spin_lock_irqsave(&device_domain_lock, flags);
2437 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2438 PCI_BUS_NUM(dma_alias),
2439 dma_alias & 0xff);
2440 if (info) {
2441 iommu = info->iommu;
2442 domain = info->domain;
2443 }
2444 spin_unlock_irqrestore(&device_domain_lock, flags);
2445
2446
2447 if (info)
2448 goto found_domain;
2449 }
2450
2451
2452 domain = alloc_domain(0);
2453 if (!domain)
2454 return NULL;
2455 if (domain_init(domain, iommu, gaw)) {
2456 domain_exit(domain);
2457 return NULL;
2458 }
2459
2460
2461 if (dev_is_pci(dev) && req_id != dma_alias) {
2462 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2463 dma_alias & 0xff, NULL, domain);
2464
2465 if (!tmp || tmp != domain) {
2466 domain_exit(domain);
2467 domain = tmp;
2468 }
2469
2470 if (!domain)
2471 return NULL;
2472 }
2473
2474found_domain:
2475 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2476
2477 if (!tmp || tmp != domain) {
2478 domain_exit(domain);
2479 domain = tmp;
2480 }
2481
2482 return domain;
2483}
2484
2485static int iommu_domain_identity_map(struct dmar_domain *domain,
2486 unsigned long long start,
2487 unsigned long long end)
2488{
2489 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2490 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2491
2492 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2493 dma_to_mm_pfn(last_vpfn))) {
2494 pr_err("Reserving iova failed\n");
2495 return -ENOMEM;
2496 }
2497
2498 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2499
2500
2501
2502
2503 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2504
2505 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2506 last_vpfn - first_vpfn + 1,
2507 DMA_PTE_READ|DMA_PTE_WRITE);
2508}
2509
2510static int domain_prepare_identity_map(struct device *dev,
2511 struct dmar_domain *domain,
2512 unsigned long long start,
2513 unsigned long long end)
2514{
2515
2516
2517
2518
2519 if (domain == si_domain && hw_pass_through) {
2520 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2521 dev_name(dev), start, end);
2522 return 0;
2523 }
2524
2525 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2526 dev_name(dev), start, end);
2527
2528 if (end < start) {
2529 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2530 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2531 dmi_get_system_info(DMI_BIOS_VENDOR),
2532 dmi_get_system_info(DMI_BIOS_VERSION),
2533 dmi_get_system_info(DMI_PRODUCT_VERSION));
2534 return -EIO;
2535 }
2536
2537 if (end >> agaw_to_width(domain->agaw)) {
2538 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2539 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2540 agaw_to_width(domain->agaw),
2541 dmi_get_system_info(DMI_BIOS_VENDOR),
2542 dmi_get_system_info(DMI_BIOS_VERSION),
2543 dmi_get_system_info(DMI_PRODUCT_VERSION));
2544 return -EIO;
2545 }
2546
2547 return iommu_domain_identity_map(domain, start, end);
2548}
2549
2550static int iommu_prepare_identity_map(struct device *dev,
2551 unsigned long long start,
2552 unsigned long long end)
2553{
2554 struct dmar_domain *domain;
2555 int ret;
2556
2557 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2558 if (!domain)
2559 return -ENOMEM;
2560
2561 ret = domain_prepare_identity_map(dev, domain, start, end);
2562 if (ret)
2563 domain_exit(domain);
2564
2565 return ret;
2566}
2567
2568static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2569 struct device *dev)
2570{
2571 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2572 return 0;
2573 return iommu_prepare_identity_map(dev, rmrr->base_address,
2574 rmrr->end_address);
2575}
2576
2577#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2578static inline void iommu_prepare_isa(void)
2579{
2580 struct pci_dev *pdev;
2581 int ret;
2582
2583 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2584 if (!pdev)
2585 return;
2586
2587 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2588 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2589
2590 if (ret)
2591 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2592
2593 pci_dev_put(pdev);
2594}
2595#else
2596static inline void iommu_prepare_isa(void)
2597{
2598 return;
2599}
2600#endif
2601
2602static int md_domain_init(struct dmar_domain *domain, int guest_width);
2603
2604static int __init si_domain_init(int hw)
2605{
2606 int nid, ret = 0;
2607
2608 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2609 if (!si_domain)
2610 return -EFAULT;
2611
2612 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2613 domain_exit(si_domain);
2614 return -EFAULT;
2615 }
2616
2617 pr_debug("Identity mapping domain allocated\n");
2618
2619 if (hw)
2620 return 0;
2621
2622 for_each_online_node(nid) {
2623 unsigned long start_pfn, end_pfn;
2624 int i;
2625
2626 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2627 ret = iommu_domain_identity_map(si_domain,
2628 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2629 if (ret)
2630 return ret;
2631 }
2632 }
2633
2634 return 0;
2635}
2636
2637static int identity_mapping(struct device *dev)
2638{
2639 struct device_domain_info *info;
2640
2641 if (likely(!iommu_identity_mapping))
2642 return 0;
2643
2644 info = dev->archdata.iommu;
2645 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2646 return (info->domain == si_domain);
2647
2648 return 0;
2649}
2650
2651static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2652{
2653 struct dmar_domain *ndomain;
2654 struct intel_iommu *iommu;
2655 u8 bus, devfn;
2656
2657 iommu = device_to_iommu(dev, &bus, &devfn);
2658 if (!iommu)
2659 return -ENODEV;
2660
2661 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662 if (ndomain != domain)
2663 return -EBUSY;
2664
2665 return 0;
2666}
2667
2668static bool device_has_rmrr(struct device *dev)
2669{
2670 struct dmar_rmrr_unit *rmrr;
2671 struct device *tmp;
2672 int i;
2673
2674 rcu_read_lock();
2675 for_each_rmrr_units(rmrr) {
2676
2677
2678
2679
2680 for_each_active_dev_scope(rmrr->devices,
2681 rmrr->devices_cnt, i, tmp)
2682 if (tmp == dev) {
2683 rcu_read_unlock();
2684 return true;
2685 }
2686 }
2687 rcu_read_unlock();
2688 return false;
2689}
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713static bool device_is_rmrr_locked(struct device *dev)
2714{
2715 if (!device_has_rmrr(dev))
2716 return false;
2717
2718 if (dev_is_pci(dev)) {
2719 struct pci_dev *pdev = to_pci_dev(dev);
2720
2721 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2722 return false;
2723 }
2724
2725 return true;
2726}
2727
2728static int iommu_should_identity_map(struct device *dev, int startup)
2729{
2730
2731 if (dev_is_pci(dev)) {
2732 struct pci_dev *pdev = to_pci_dev(dev);
2733
2734 if (device_is_rmrr_locked(dev))
2735 return 0;
2736
2737 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2738 return 1;
2739
2740 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2741 return 1;
2742
2743 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2744 return 0;
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763 if (!pci_is_pcie(pdev)) {
2764 if (!pci_is_root_bus(pdev->bus))
2765 return 0;
2766 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2767 return 0;
2768 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2769 return 0;
2770 } else {
2771 if (device_has_rmrr(dev))
2772 return 0;
2773 }
2774
2775
2776
2777
2778
2779
2780 if (!startup) {
2781
2782
2783
2784
2785 u64 dma_mask = *dev->dma_mask;
2786
2787 if (dev->coherent_dma_mask &&
2788 dev->coherent_dma_mask < dma_mask)
2789 dma_mask = dev->coherent_dma_mask;
2790
2791 return dma_mask >= dma_get_required_mask(dev);
2792 }
2793
2794 return 1;
2795}
2796
2797static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2798{
2799 int ret;
2800
2801 if (!iommu_should_identity_map(dev, 1))
2802 return 0;
2803
2804 ret = domain_add_dev_info(si_domain, dev);
2805 if (!ret)
2806 pr_info("%s identity mapping for device %s\n",
2807 hw ? "Hardware" : "Software", dev_name(dev));
2808 else if (ret == -ENODEV)
2809
2810 ret = 0;
2811
2812 return ret;
2813}
2814
2815
2816static int __init iommu_prepare_static_identity_mapping(int hw)
2817{
2818 struct pci_dev *pdev = NULL;
2819 struct dmar_drhd_unit *drhd;
2820 struct intel_iommu *iommu;
2821 struct device *dev;
2822 int i;
2823 int ret = 0;
2824
2825 for_each_pci_dev(pdev) {
2826 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2827 if (ret)
2828 return ret;
2829 }
2830
2831 for_each_active_iommu(iommu, drhd)
2832 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2833 struct acpi_device_physical_node *pn;
2834 struct acpi_device *adev;
2835
2836 if (dev->bus != &acpi_bus_type)
2837 continue;
2838
2839 adev= to_acpi_device(dev);
2840 mutex_lock(&adev->physical_node_lock);
2841 list_for_each_entry(pn, &adev->physical_node_list, node) {
2842 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2843 if (ret)
2844 break;
2845 }
2846 mutex_unlock(&adev->physical_node_lock);
2847 if (ret)
2848 return ret;
2849 }
2850
2851 return 0;
2852}
2853
2854static void intel_iommu_init_qi(struct intel_iommu *iommu)
2855{
2856
2857
2858
2859
2860
2861
2862 if (!iommu->qi) {
2863
2864
2865
2866 dmar_fault(-1, iommu);
2867
2868
2869
2870
2871 dmar_disable_qi(iommu);
2872 }
2873
2874 if (dmar_enable_qi(iommu)) {
2875
2876
2877
2878 iommu->flush.flush_context = __iommu_flush_context;
2879 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2880 pr_info("%s: Using Register based invalidation\n",
2881 iommu->name);
2882 } else {
2883 iommu->flush.flush_context = qi_flush_context;
2884 iommu->flush.flush_iotlb = qi_flush_iotlb;
2885 pr_info("%s: Using Queued invalidation\n", iommu->name);
2886 }
2887}
2888
2889static int copy_context_table(struct intel_iommu *iommu,
2890 struct root_entry *old_re,
2891 struct context_entry **tbl,
2892 int bus, bool ext)
2893{
2894 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
2895 struct context_entry *new_ce = NULL, ce;
2896 struct context_entry *old_ce = NULL;
2897 struct root_entry re;
2898 phys_addr_t old_ce_phys;
2899
2900 tbl_idx = ext ? bus * 2 : bus;
2901 memcpy(&re, old_re, sizeof(re));
2902
2903 for (devfn = 0; devfn < 256; devfn++) {
2904
2905 idx = (ext ? devfn * 2 : devfn) % 256;
2906
2907 if (idx == 0) {
2908
2909 if (new_ce) {
2910 tbl[tbl_idx] = new_ce;
2911 __iommu_flush_cache(iommu, new_ce,
2912 VTD_PAGE_SIZE);
2913 pos = 1;
2914 }
2915
2916 if (old_ce)
2917 iounmap(old_ce);
2918
2919 ret = 0;
2920 if (devfn < 0x80)
2921 old_ce_phys = root_entry_lctp(&re);
2922 else
2923 old_ce_phys = root_entry_uctp(&re);
2924
2925 if (!old_ce_phys) {
2926 if (ext && devfn == 0) {
2927
2928 devfn = 0x7f;
2929 continue;
2930 } else {
2931 goto out;
2932 }
2933 }
2934
2935 ret = -ENOMEM;
2936 old_ce = memremap(old_ce_phys, PAGE_SIZE,
2937 MEMREMAP_WB);
2938 if (!old_ce)
2939 goto out;
2940
2941 new_ce = alloc_pgtable_page(iommu->node);
2942 if (!new_ce)
2943 goto out_unmap;
2944
2945 ret = 0;
2946 }
2947
2948
2949 memcpy(&ce, old_ce + idx, sizeof(ce));
2950
2951 if (!__context_present(&ce))
2952 continue;
2953
2954 did = context_domain_id(&ce);
2955 if (did >= 0 && did < cap_ndoms(iommu->cap))
2956 set_bit(did, iommu->domain_ids);
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974 context_clear_pasid_enable(&ce);
2975 context_set_copied(&ce);
2976
2977 new_ce[idx] = ce;
2978 }
2979
2980 tbl[tbl_idx + pos] = new_ce;
2981
2982 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2983
2984out_unmap:
2985 memunmap(old_ce);
2986
2987out:
2988 return ret;
2989}
2990
2991static int copy_translation_tables(struct intel_iommu *iommu)
2992{
2993 struct context_entry **ctxt_tbls;
2994 struct root_entry *old_rt;
2995 phys_addr_t old_rt_phys;
2996 int ctxt_table_entries;
2997 unsigned long flags;
2998 u64 rtaddr_reg;
2999 int bus, ret;
3000 bool new_ext, ext;
3001
3002 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3003 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
3004 new_ext = !!ecap_ecs(iommu->ecap);
3005
3006
3007
3008
3009
3010
3011
3012 if (new_ext != ext)
3013 return -EINVAL;
3014
3015 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3016 if (!old_rt_phys)
3017 return -EINVAL;
3018
3019 old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3020 if (!old_rt)
3021 return -ENOMEM;
3022
3023
3024 ctxt_table_entries = ext ? 512 : 256;
3025 ret = -ENOMEM;
3026 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3027 if (!ctxt_tbls)
3028 goto out_unmap;
3029
3030 for (bus = 0; bus < 256; bus++) {
3031 ret = copy_context_table(iommu, &old_rt[bus],
3032 ctxt_tbls, bus, ext);
3033 if (ret) {
3034 pr_err("%s: Failed to copy context table for bus %d\n",
3035 iommu->name, bus);
3036 continue;
3037 }
3038 }
3039
3040 spin_lock_irqsave(&iommu->lock, flags);
3041
3042
3043 for (bus = 0; bus < 256; bus++) {
3044 int idx = ext ? bus * 2 : bus;
3045 u64 val;
3046
3047 if (ctxt_tbls[idx]) {
3048 val = virt_to_phys(ctxt_tbls[idx]) | 1;
3049 iommu->root_entry[bus].lo = val;
3050 }
3051
3052 if (!ext || !ctxt_tbls[idx + 1])
3053 continue;
3054
3055 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3056 iommu->root_entry[bus].hi = val;
3057 }
3058
3059 spin_unlock_irqrestore(&iommu->lock, flags);
3060
3061 kfree(ctxt_tbls);
3062
3063 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3064
3065 ret = 0;
3066
3067out_unmap:
3068 memunmap(old_rt);
3069
3070 return ret;
3071}
3072
3073static int __init init_dmars(void)
3074{
3075 struct dmar_drhd_unit *drhd;
3076 struct dmar_rmrr_unit *rmrr;
3077 bool copied_tables = false;
3078 struct device *dev;
3079 struct intel_iommu *iommu;
3080 int i, ret;
3081
3082
3083
3084
3085
3086
3087
3088 for_each_drhd_unit(drhd) {
3089
3090
3091
3092
3093
3094 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3095 g_num_of_iommus++;
3096 continue;
3097 }
3098 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3099 }
3100
3101
3102 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3103 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3104
3105 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3106 GFP_KERNEL);
3107 if (!g_iommus) {
3108 pr_err("Allocating global iommu array failed\n");
3109 ret = -ENOMEM;
3110 goto error;
3111 }
3112
3113 deferred_flush = kzalloc(g_num_of_iommus *
3114 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3115 if (!deferred_flush) {
3116 ret = -ENOMEM;
3117 goto free_g_iommus;
3118 }
3119
3120 for_each_active_iommu(iommu, drhd) {
3121 g_iommus[iommu->seq_id] = iommu;
3122
3123 intel_iommu_init_qi(iommu);
3124
3125 ret = iommu_init_domains(iommu);
3126 if (ret)
3127 goto free_iommu;
3128
3129 init_translation_status(iommu);
3130
3131 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3132 iommu_disable_translation(iommu);
3133 clear_translation_pre_enabled(iommu);
3134 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3135 iommu->name);
3136 }
3137
3138
3139
3140
3141
3142
3143 ret = iommu_alloc_root_entry(iommu);
3144 if (ret)
3145 goto free_iommu;
3146
3147 if (translation_pre_enabled(iommu)) {
3148 pr_info("Translation already enabled - trying to copy translation structures\n");
3149
3150 ret = copy_translation_tables(iommu);
3151 if (ret) {
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3162 iommu->name);
3163 iommu_disable_translation(iommu);
3164 clear_translation_pre_enabled(iommu);
3165 } else {
3166 pr_info("Copied translation tables from previous kernel for %s\n",
3167 iommu->name);
3168 copied_tables = true;
3169 }
3170 }
3171
3172 iommu_flush_write_buffer(iommu);
3173 iommu_set_root_entry(iommu);
3174 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3175 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3176
3177 if (!ecap_pass_through(iommu->ecap))
3178 hw_pass_through = 0;
3179#ifdef CONFIG_INTEL_IOMMU_SVM
3180 if (pasid_enabled(iommu))
3181 intel_svm_alloc_pasid_tables(iommu);
3182#endif
3183 }
3184
3185 if (iommu_pass_through)
3186 iommu_identity_mapping |= IDENTMAP_ALL;
3187
3188#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3189 iommu_identity_mapping |= IDENTMAP_GFX;
3190#endif
3191
3192 if (iommu_identity_mapping) {
3193 ret = si_domain_init(hw_pass_through);
3194 if (ret)
3195 goto free_iommu;
3196 }
3197
3198 check_tylersburg_isoch();
3199
3200
3201
3202
3203
3204
3205
3206 if (copied_tables)
3207 goto domains_done;
3208
3209
3210
3211
3212
3213
3214 if (iommu_identity_mapping) {
3215 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
3216 if (ret) {
3217 pr_crit("Failed to setup IOMMU pass-through\n");
3218 goto free_iommu;
3219 }
3220 }
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235 pr_info("Setting RMRR:\n");
3236 for_each_rmrr_units(rmrr) {
3237
3238 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3239 i, dev) {
3240 ret = iommu_prepare_rmrr_dev(rmrr, dev);
3241 if (ret)
3242 pr_err("Mapping reserved region failed\n");
3243 }
3244 }
3245
3246 iommu_prepare_isa();
3247
3248domains_done:
3249
3250
3251
3252
3253
3254
3255
3256
3257 for_each_iommu(iommu, drhd) {
3258 if (drhd->ignored) {
3259
3260
3261
3262
3263 if (force_on)
3264 iommu_disable_protect_mem_regions(iommu);
3265 continue;
3266 }
3267
3268 iommu_flush_write_buffer(iommu);
3269
3270#ifdef CONFIG_INTEL_IOMMU_SVM
3271 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
3272 ret = intel_svm_enable_prq(iommu);
3273 if (ret)
3274 goto free_iommu;
3275 }
3276#endif
3277 ret = dmar_set_interrupt(iommu);
3278 if (ret)
3279 goto free_iommu;
3280
3281 if (!translation_pre_enabled(iommu))
3282 iommu_enable_translation(iommu);
3283
3284 iommu_disable_protect_mem_regions(iommu);
3285 }
3286
3287 return 0;
3288
3289free_iommu:
3290 for_each_active_iommu(iommu, drhd) {
3291 disable_dmar_iommu(iommu);
3292 free_dmar_iommu(iommu);
3293 }
3294 kfree(deferred_flush);
3295free_g_iommus:
3296 kfree(g_iommus);
3297error:
3298 return ret;
3299}
3300
3301
3302static struct iova *intel_alloc_iova(struct device *dev,
3303 struct dmar_domain *domain,
3304 unsigned long nrpages, uint64_t dma_mask)
3305{
3306 struct iova *iova = NULL;
3307
3308
3309 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3310
3311 nrpages = __roundup_pow_of_two(nrpages);
3312
3313 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
3314
3315
3316
3317
3318
3319 iova = alloc_iova(&domain->iovad, nrpages,
3320 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3321 if (iova)
3322 return iova;
3323 }
3324 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3325 if (unlikely(!iova)) {
3326 pr_err("Allocating %ld-page iova for %s failed",
3327 nrpages, dev_name(dev));
3328 return NULL;
3329 }
3330
3331 return iova;
3332}
3333
3334static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
3335{
3336 struct dmar_rmrr_unit *rmrr;
3337 struct dmar_domain *domain;
3338 struct device *i_dev;
3339 int i, ret;
3340
3341 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
3342 if (!domain) {
3343 pr_err("Allocating domain for %s failed\n",
3344 dev_name(dev));
3345 return NULL;
3346 }
3347
3348
3349 rcu_read_lock();
3350 for_each_rmrr_units(rmrr) {
3351 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
3352 i, i_dev) {
3353 if (i_dev != dev)
3354 continue;
3355
3356 ret = domain_prepare_identity_map(dev, domain,
3357 rmrr->base_address,
3358 rmrr->end_address);
3359 if (ret)
3360 dev_err(dev, "Mapping reserved region failed\n");
3361 }
3362 }
3363 rcu_read_unlock();
3364
3365 return domain;
3366}
3367
3368static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
3369{
3370 struct device_domain_info *info;
3371
3372
3373 info = dev->archdata.iommu;
3374 if (likely(info))
3375 return info->domain;
3376
3377 return __get_valid_domain_for_dev(dev);
3378}
3379
3380
3381static int iommu_no_mapping(struct device *dev)
3382{
3383 int found;
3384
3385 if (iommu_dummy(dev))
3386 return 1;
3387
3388 if (!iommu_identity_mapping)
3389 return 0;
3390
3391 found = identity_mapping(dev);
3392 if (found) {
3393 if (iommu_should_identity_map(dev, 0))
3394 return 1;
3395 else {
3396
3397
3398
3399
3400 dmar_remove_one_dev_info(si_domain, dev);
3401 pr_info("32bit %s uses non-identity mapping\n",
3402 dev_name(dev));
3403 return 0;
3404 }
3405 } else {
3406
3407
3408
3409
3410 if (iommu_should_identity_map(dev, 0)) {
3411 int ret;
3412 ret = domain_add_dev_info(si_domain, dev);
3413 if (!ret) {
3414 pr_info("64bit %s uses identity mapping\n",
3415 dev_name(dev));
3416 return 1;
3417 }
3418 }
3419 }
3420
3421 return 0;
3422}
3423
3424static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3425 size_t size, int dir, u64 dma_mask)
3426{
3427 struct dmar_domain *domain;
3428 phys_addr_t start_paddr;
3429 struct iova *iova;
3430 int prot = 0;
3431 int ret;
3432 struct intel_iommu *iommu;
3433 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3434
3435 BUG_ON(dir == DMA_NONE);
3436
3437 if (iommu_no_mapping(dev))
3438 return paddr;
3439
3440 domain = get_valid_domain_for_dev(dev);
3441 if (!domain)
3442 return 0;
3443
3444 iommu = domain_get_iommu(domain);
3445 size = aligned_nrpages(paddr, size);
3446
3447 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3448 if (!iova)
3449 goto error;
3450
3451
3452
3453
3454
3455 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3456 !cap_zlr(iommu->cap))
3457 prot |= DMA_PTE_READ;
3458 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3459 prot |= DMA_PTE_WRITE;
3460
3461
3462
3463
3464
3465
3466 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3467 mm_to_dma_pfn(paddr_pfn), size, prot);
3468 if (ret)
3469 goto error;
3470
3471
3472 if (cap_caching_mode(iommu->cap))
3473 iommu_flush_iotlb_psi(iommu, domain,
3474 mm_to_dma_pfn(iova->pfn_lo),
3475 size, 0, 1);
3476 else
3477 iommu_flush_write_buffer(iommu);
3478
3479 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3480 start_paddr += paddr & ~PAGE_MASK;
3481 return start_paddr;
3482
3483error:
3484 if (iova)
3485 __free_iova(&domain->iovad, iova);
3486 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
3487 dev_name(dev), size, (unsigned long long)paddr, dir);
3488 return 0;
3489}
3490
3491static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3492 unsigned long offset, size_t size,
3493 enum dma_data_direction dir,
3494 struct dma_attrs *attrs)
3495{
3496 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3497 dir, *dev->dma_mask);
3498}
3499
3500static void flush_unmaps(void)
3501{
3502 int i, j;
3503
3504 timer_on = 0;
3505
3506
3507 for (i = 0; i < g_num_of_iommus; i++) {
3508 struct intel_iommu *iommu = g_iommus[i];
3509 if (!iommu)
3510 continue;
3511
3512 if (!deferred_flush[i].next)
3513 continue;
3514
3515
3516 if (!cap_caching_mode(iommu->cap))
3517 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3518 DMA_TLB_GLOBAL_FLUSH);
3519 for (j = 0; j < deferred_flush[i].next; j++) {
3520 unsigned long mask;
3521 struct iova *iova = deferred_flush[i].iova[j];
3522 struct dmar_domain *domain = deferred_flush[i].domain[j];
3523
3524
3525 if (cap_caching_mode(iommu->cap))
3526 iommu_flush_iotlb_psi(iommu, domain,
3527 iova->pfn_lo, iova_size(iova),
3528 !deferred_flush[i].freelist[j], 0);
3529 else {
3530 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3531 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3532 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3533 }
3534 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3535 if (deferred_flush[i].freelist[j])
3536 dma_free_pagelist(deferred_flush[i].freelist[j]);
3537 }
3538 deferred_flush[i].next = 0;
3539 }
3540
3541 list_size = 0;
3542}
3543
3544static void flush_unmaps_timeout(unsigned long data)
3545{
3546 unsigned long flags;
3547
3548 spin_lock_irqsave(&async_umap_flush_lock, flags);
3549 flush_unmaps();
3550 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3551}
3552
3553static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3554{
3555 unsigned long flags;
3556 int next, iommu_id;
3557 struct intel_iommu *iommu;
3558
3559 spin_lock_irqsave(&async_umap_flush_lock, flags);
3560 if (list_size == HIGH_WATER_MARK)
3561 flush_unmaps();
3562
3563 iommu = domain_get_iommu(dom);
3564 iommu_id = iommu->seq_id;
3565
3566 next = deferred_flush[iommu_id].next;
3567 deferred_flush[iommu_id].domain[next] = dom;
3568 deferred_flush[iommu_id].iova[next] = iova;
3569 deferred_flush[iommu_id].freelist[next] = freelist;
3570 deferred_flush[iommu_id].next++;
3571
3572 if (!timer_on) {
3573 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3574 timer_on = 1;
3575 }
3576 list_size++;
3577 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3578}
3579
3580static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3581{
3582 struct dmar_domain *domain;
3583 unsigned long start_pfn, last_pfn;
3584 struct iova *iova;
3585 struct intel_iommu *iommu;
3586 struct page *freelist;
3587
3588 if (iommu_no_mapping(dev))
3589 return;
3590
3591 domain = find_domain(dev);
3592 BUG_ON(!domain);
3593
3594 iommu = domain_get_iommu(domain);
3595
3596 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3597 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3598 (unsigned long long)dev_addr))
3599 return;
3600
3601 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3602 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3603
3604 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3605 dev_name(dev), start_pfn, last_pfn);
3606
3607 freelist = domain_unmap(domain, start_pfn, last_pfn);
3608
3609 if (intel_iommu_strict) {
3610 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
3611 last_pfn - start_pfn + 1, !freelist, 0);
3612
3613 __free_iova(&domain->iovad, iova);
3614 dma_free_pagelist(freelist);
3615 } else {
3616 add_unmap(domain, iova, freelist);
3617
3618
3619
3620
3621 }
3622}
3623
3624static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3625 size_t size, enum dma_data_direction dir,
3626 struct dma_attrs *attrs)
3627{
3628 intel_unmap(dev, dev_addr);
3629}
3630
3631static void *intel_alloc_coherent(struct device *dev, size_t size,
3632 dma_addr_t *dma_handle, gfp_t flags,
3633 struct dma_attrs *attrs)
3634{
3635 struct page *page = NULL;
3636 int order;
3637
3638 size = PAGE_ALIGN(size);
3639 order = get_order(size);
3640
3641 if (!iommu_no_mapping(dev))
3642 flags &= ~(GFP_DMA | GFP_DMA32);
3643 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3644 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3645 flags |= GFP_DMA;
3646 else
3647 flags |= GFP_DMA32;
3648 }
3649
3650 if (gfpflags_allow_blocking(flags)) {
3651 unsigned int count = size >> PAGE_SHIFT;
3652
3653 page = dma_alloc_from_contiguous(dev, count, order);
3654 if (page && iommu_no_mapping(dev) &&
3655 page_to_phys(page) + size > dev->coherent_dma_mask) {
3656 dma_release_from_contiguous(dev, page, count);
3657 page = NULL;
3658 }
3659 }
3660
3661 if (!page)
3662 page = alloc_pages(flags, order);
3663 if (!page)
3664 return NULL;
3665 memset(page_address(page), 0, size);
3666
3667 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3668 DMA_BIDIRECTIONAL,
3669 dev->coherent_dma_mask);
3670 if (*dma_handle)
3671 return page_address(page);
3672 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3673 __free_pages(page, order);
3674
3675 return NULL;
3676}
3677
3678static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3679 dma_addr_t dma_handle, struct dma_attrs *attrs)
3680{
3681 int order;
3682 struct page *page = virt_to_page(vaddr);
3683
3684 size = PAGE_ALIGN(size);
3685 order = get_order(size);
3686
3687 intel_unmap(dev, dma_handle);
3688 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3689 __free_pages(page, order);
3690}
3691
3692static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3693 int nelems, enum dma_data_direction dir,
3694 struct dma_attrs *attrs)
3695{
3696 intel_unmap(dev, sglist[0].dma_address);
3697}
3698
3699static int intel_nontranslate_map_sg(struct device *hddev,
3700 struct scatterlist *sglist, int nelems, int dir)
3701{
3702 int i;
3703 struct scatterlist *sg;
3704
3705 for_each_sg(sglist, sg, nelems, i) {
3706 BUG_ON(!sg_page(sg));
3707 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3708 sg->dma_length = sg->length;
3709 }
3710 return nelems;
3711}
3712
3713static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3714 enum dma_data_direction dir, struct dma_attrs *attrs)
3715{
3716 int i;
3717 struct dmar_domain *domain;
3718 size_t size = 0;
3719 int prot = 0;
3720 struct iova *iova = NULL;
3721 int ret;
3722 struct scatterlist *sg;
3723 unsigned long start_vpfn;
3724 struct intel_iommu *iommu;
3725
3726 BUG_ON(dir == DMA_NONE);
3727 if (iommu_no_mapping(dev))
3728 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3729
3730 domain = get_valid_domain_for_dev(dev);
3731 if (!domain)
3732 return 0;
3733
3734 iommu = domain_get_iommu(domain);
3735
3736 for_each_sg(sglist, sg, nelems, i)
3737 size += aligned_nrpages(sg->offset, sg->length);
3738
3739 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3740 *dev->dma_mask);
3741 if (!iova) {
3742 sglist->dma_length = 0;
3743 return 0;
3744 }
3745
3746
3747
3748
3749
3750 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3751 !cap_zlr(iommu->cap))
3752 prot |= DMA_PTE_READ;
3753 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3754 prot |= DMA_PTE_WRITE;
3755
3756 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3757
3758 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3759 if (unlikely(ret)) {
3760 dma_pte_free_pagetable(domain, start_vpfn,
3761 start_vpfn + size - 1);
3762 __free_iova(&domain->iovad, iova);
3763 return 0;
3764 }
3765
3766
3767 if (cap_caching_mode(iommu->cap))
3768 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
3769 else
3770 iommu_flush_write_buffer(iommu);
3771
3772 return nelems;
3773}
3774
3775static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3776{
3777 return !dma_addr;
3778}
3779
3780struct dma_map_ops intel_dma_ops = {
3781 .alloc = intel_alloc_coherent,
3782 .free = intel_free_coherent,
3783 .map_sg = intel_map_sg,
3784 .unmap_sg = intel_unmap_sg,
3785 .map_page = intel_map_page,
3786 .unmap_page = intel_unmap_page,
3787 .mapping_error = intel_mapping_error,
3788};
3789
3790static inline int iommu_domain_cache_init(void)
3791{
3792 int ret = 0;
3793
3794 iommu_domain_cache = kmem_cache_create("iommu_domain",
3795 sizeof(struct dmar_domain),
3796 0,
3797 SLAB_HWCACHE_ALIGN,
3798
3799 NULL);
3800 if (!iommu_domain_cache) {
3801 pr_err("Couldn't create iommu_domain cache\n");
3802 ret = -ENOMEM;
3803 }
3804
3805 return ret;
3806}
3807
3808static inline int iommu_devinfo_cache_init(void)
3809{
3810 int ret = 0;
3811
3812 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3813 sizeof(struct device_domain_info),
3814 0,
3815 SLAB_HWCACHE_ALIGN,
3816 NULL);
3817 if (!iommu_devinfo_cache) {
3818 pr_err("Couldn't create devinfo cache\n");
3819 ret = -ENOMEM;
3820 }
3821
3822 return ret;
3823}
3824
3825static int __init iommu_init_mempool(void)
3826{
3827 int ret;
3828 ret = iova_cache_get();
3829 if (ret)
3830 return ret;
3831
3832 ret = iommu_domain_cache_init();
3833 if (ret)
3834 goto domain_error;
3835
3836 ret = iommu_devinfo_cache_init();
3837 if (!ret)
3838 return ret;
3839
3840 kmem_cache_destroy(iommu_domain_cache);
3841domain_error:
3842 iova_cache_put();
3843
3844 return -ENOMEM;
3845}
3846
3847static void __init iommu_exit_mempool(void)
3848{
3849 kmem_cache_destroy(iommu_devinfo_cache);
3850 kmem_cache_destroy(iommu_domain_cache);
3851 iova_cache_put();
3852}
3853
3854static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3855{
3856 struct dmar_drhd_unit *drhd;
3857 u32 vtbar;
3858 int rc;
3859
3860
3861
3862
3863
3864
3865 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3866 if (rc) {
3867
3868 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3869 return;
3870 }
3871 vtbar &= 0xffff0000;
3872
3873
3874 drhd = dmar_find_matched_drhd_unit(pdev);
3875 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3876 TAINT_FIRMWARE_WORKAROUND,
3877 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3878 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3879}
3880DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3881
3882static void __init init_no_remapping_devices(void)
3883{
3884 struct dmar_drhd_unit *drhd;
3885 struct device *dev;
3886 int i;
3887
3888 for_each_drhd_unit(drhd) {
3889 if (!drhd->include_all) {
3890 for_each_active_dev_scope(drhd->devices,
3891 drhd->devices_cnt, i, dev)
3892 break;
3893
3894 if (i == drhd->devices_cnt)
3895 drhd->ignored = 1;
3896 }
3897 }
3898
3899 for_each_active_drhd_unit(drhd) {
3900 if (drhd->include_all)
3901 continue;
3902
3903 for_each_active_dev_scope(drhd->devices,
3904 drhd->devices_cnt, i, dev)
3905 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3906 break;
3907 if (i < drhd->devices_cnt)
3908 continue;
3909
3910
3911
3912 if (dmar_map_gfx) {
3913 intel_iommu_gfx_mapped = 1;
3914 } else {
3915 drhd->ignored = 1;
3916 for_each_active_dev_scope(drhd->devices,
3917 drhd->devices_cnt, i, dev)
3918 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3919 }
3920 }
3921}
3922
3923#ifdef CONFIG_SUSPEND
3924static int init_iommu_hw(void)
3925{
3926 struct dmar_drhd_unit *drhd;
3927 struct intel_iommu *iommu = NULL;
3928
3929 for_each_active_iommu(iommu, drhd)
3930 if (iommu->qi)
3931 dmar_reenable_qi(iommu);
3932
3933 for_each_iommu(iommu, drhd) {
3934 if (drhd->ignored) {
3935
3936
3937
3938
3939 if (force_on)
3940 iommu_disable_protect_mem_regions(iommu);
3941 continue;
3942 }
3943
3944 iommu_flush_write_buffer(iommu);
3945
3946 iommu_set_root_entry(iommu);
3947
3948 iommu->flush.flush_context(iommu, 0, 0, 0,
3949 DMA_CCMD_GLOBAL_INVL);
3950 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3951 iommu_enable_translation(iommu);
3952 iommu_disable_protect_mem_regions(iommu);
3953 }
3954
3955 return 0;
3956}
3957
3958static void iommu_flush_all(void)
3959{
3960 struct dmar_drhd_unit *drhd;
3961 struct intel_iommu *iommu;
3962
3963 for_each_active_iommu(iommu, drhd) {
3964 iommu->flush.flush_context(iommu, 0, 0, 0,
3965 DMA_CCMD_GLOBAL_INVL);
3966 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3967 DMA_TLB_GLOBAL_FLUSH);
3968 }
3969}
3970
3971static int iommu_suspend(void)
3972{
3973 struct dmar_drhd_unit *drhd;
3974 struct intel_iommu *iommu = NULL;
3975 unsigned long flag;
3976
3977 for_each_active_iommu(iommu, drhd) {
3978 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3979 GFP_ATOMIC);
3980 if (!iommu->iommu_state)
3981 goto nomem;
3982 }
3983
3984 iommu_flush_all();
3985
3986 for_each_active_iommu(iommu, drhd) {
3987 iommu_disable_translation(iommu);
3988
3989 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3990
3991 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3992 readl(iommu->reg + DMAR_FECTL_REG);
3993 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3994 readl(iommu->reg + DMAR_FEDATA_REG);
3995 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3996 readl(iommu->reg + DMAR_FEADDR_REG);
3997 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3998 readl(iommu->reg + DMAR_FEUADDR_REG);
3999
4000 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4001 }
4002 return 0;
4003
4004nomem:
4005 for_each_active_iommu(iommu, drhd)
4006 kfree(iommu->iommu_state);
4007
4008 return -ENOMEM;
4009}
4010
4011static void iommu_resume(void)
4012{
4013 struct dmar_drhd_unit *drhd;
4014 struct intel_iommu *iommu = NULL;
4015 unsigned long flag;
4016
4017 if (init_iommu_hw()) {
4018 if (force_on)
4019 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
4020 else
4021 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
4022 return;
4023 }
4024
4025 for_each_active_iommu(iommu, drhd) {
4026
4027 raw_spin_lock_irqsave(&iommu->register_lock, flag);
4028
4029 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
4030 iommu->reg + DMAR_FECTL_REG);
4031 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
4032 iommu->reg + DMAR_FEDATA_REG);
4033 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
4034 iommu->reg + DMAR_FEADDR_REG);
4035 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
4036 iommu->reg + DMAR_FEUADDR_REG);
4037
4038 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
4039 }
4040
4041 for_each_active_iommu(iommu, drhd)
4042 kfree(iommu->iommu_state);
4043}
4044
4045static struct syscore_ops iommu_syscore_ops = {
4046 .resume = iommu_resume,
4047 .suspend = iommu_suspend,
4048};
4049
4050static void __init init_iommu_pm_ops(void)
4051{
4052 register_syscore_ops(&iommu_syscore_ops);
4053}
4054
4055#else
4056static inline void init_iommu_pm_ops(void) {}
4057#endif
4058
4059
4060int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
4061{
4062 struct acpi_dmar_reserved_memory *rmrr;
4063 struct dmar_rmrr_unit *rmrru;
4064
4065 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
4066 if (!rmrru)
4067 return -ENOMEM;
4068
4069 rmrru->hdr = header;
4070 rmrr = (struct acpi_dmar_reserved_memory *)header;
4071 rmrru->base_address = rmrr->base_address;
4072 rmrru->end_address = rmrr->end_address;
4073 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
4074 ((void *)rmrr) + rmrr->header.length,
4075 &rmrru->devices_cnt);
4076 if (rmrru->devices_cnt && rmrru->devices == NULL) {
4077 kfree(rmrru);
4078 return -ENOMEM;
4079 }
4080
4081 list_add(&rmrru->list, &dmar_rmrr_units);
4082
4083 return 0;
4084}
4085
4086static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
4087{
4088 struct dmar_atsr_unit *atsru;
4089 struct acpi_dmar_atsr *tmp;
4090
4091 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4092 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
4093 if (atsr->segment != tmp->segment)
4094 continue;
4095 if (atsr->header.length != tmp->header.length)
4096 continue;
4097 if (memcmp(atsr, tmp, atsr->header.length) == 0)
4098 return atsru;
4099 }
4100
4101 return NULL;
4102}
4103
4104int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4105{
4106 struct acpi_dmar_atsr *atsr;
4107 struct dmar_atsr_unit *atsru;
4108
4109 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
4110 return 0;
4111
4112 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4113 atsru = dmar_find_atsr(atsr);
4114 if (atsru)
4115 return 0;
4116
4117 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
4118 if (!atsru)
4119 return -ENOMEM;
4120
4121
4122
4123
4124
4125
4126 atsru->hdr = (void *)(atsru + 1);
4127 memcpy(atsru->hdr, hdr, hdr->length);
4128 atsru->include_all = atsr->flags & 0x1;
4129 if (!atsru->include_all) {
4130 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4131 (void *)atsr + atsr->header.length,
4132 &atsru->devices_cnt);
4133 if (atsru->devices_cnt && atsru->devices == NULL) {
4134 kfree(atsru);
4135 return -ENOMEM;
4136 }
4137 }
4138
4139 list_add_rcu(&atsru->list, &dmar_atsr_units);
4140
4141 return 0;
4142}
4143
4144static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4145{
4146 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4147 kfree(atsru);
4148}
4149
4150int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4151{
4152 struct acpi_dmar_atsr *atsr;
4153 struct dmar_atsr_unit *atsru;
4154
4155 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4156 atsru = dmar_find_atsr(atsr);
4157 if (atsru) {
4158 list_del_rcu(&atsru->list);
4159 synchronize_rcu();
4160 intel_iommu_free_atsr(atsru);
4161 }
4162
4163 return 0;
4164}
4165
4166int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4167{
4168 int i;
4169 struct device *dev;
4170 struct acpi_dmar_atsr *atsr;
4171 struct dmar_atsr_unit *atsru;
4172
4173 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4174 atsru = dmar_find_atsr(atsr);
4175 if (!atsru)
4176 return 0;
4177
4178 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4179 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4180 i, dev)
4181 return -EBUSY;
4182
4183 return 0;
4184}
4185
4186static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4187{
4188 int sp, ret = 0;
4189 struct intel_iommu *iommu = dmaru->iommu;
4190
4191 if (g_iommus[iommu->seq_id])
4192 return 0;
4193
4194 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
4195 pr_warn("%s: Doesn't support hardware pass through.\n",
4196 iommu->name);
4197 return -ENXIO;
4198 }
4199 if (!ecap_sc_support(iommu->ecap) &&
4200 domain_update_iommu_snooping(iommu)) {
4201 pr_warn("%s: Doesn't support snooping.\n",
4202 iommu->name);
4203 return -ENXIO;
4204 }
4205 sp = domain_update_iommu_superpage(iommu) - 1;
4206 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
4207 pr_warn("%s: Doesn't support large page.\n",
4208 iommu->name);
4209 return -ENXIO;
4210 }
4211
4212
4213
4214
4215 if (iommu->gcmd & DMA_GCMD_TE)
4216 iommu_disable_translation(iommu);
4217
4218 g_iommus[iommu->seq_id] = iommu;
4219 ret = iommu_init_domains(iommu);
4220 if (ret == 0)
4221 ret = iommu_alloc_root_entry(iommu);
4222 if (ret)
4223 goto out;
4224
4225#ifdef CONFIG_INTEL_IOMMU_SVM
4226 if (pasid_enabled(iommu))
4227 intel_svm_alloc_pasid_tables(iommu);
4228#endif
4229
4230 if (dmaru->ignored) {
4231
4232
4233
4234 if (force_on)
4235 iommu_disable_protect_mem_regions(iommu);
4236 return 0;
4237 }
4238
4239 intel_iommu_init_qi(iommu);
4240 iommu_flush_write_buffer(iommu);
4241
4242#ifdef CONFIG_INTEL_IOMMU_SVM
4243 if (pasid_enabled(iommu) && ecap_prs(iommu->ecap)) {
4244 ret = intel_svm_enable_prq(iommu);
4245 if (ret)
4246 goto disable_iommu;
4247 }
4248#endif
4249 ret = dmar_set_interrupt(iommu);
4250 if (ret)
4251 goto disable_iommu;
4252
4253 iommu_set_root_entry(iommu);
4254 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4255 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4256 iommu_enable_translation(iommu);
4257
4258 iommu_disable_protect_mem_regions(iommu);
4259 return 0;
4260
4261disable_iommu:
4262 disable_dmar_iommu(iommu);
4263out:
4264 free_dmar_iommu(iommu);
4265 return ret;
4266}
4267
4268int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4269{
4270 int ret = 0;
4271 struct intel_iommu *iommu = dmaru->iommu;
4272
4273 if (!intel_iommu_enabled)
4274 return 0;
4275 if (iommu == NULL)
4276 return -EINVAL;
4277
4278 if (insert) {
4279 ret = intel_iommu_add(dmaru);
4280 } else {
4281 disable_dmar_iommu(iommu);
4282 free_dmar_iommu(iommu);
4283 }
4284
4285 return ret;
4286}
4287
4288static void intel_iommu_free_dmars(void)
4289{
4290 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4291 struct dmar_atsr_unit *atsru, *atsr_n;
4292
4293 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4294 list_del(&rmrru->list);
4295 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4296 kfree(rmrru);
4297 }
4298
4299 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4300 list_del(&atsru->list);
4301 intel_iommu_free_atsr(atsru);
4302 }
4303}
4304
4305int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4306{
4307 int i, ret = 1;
4308 struct pci_bus *bus;
4309 struct pci_dev *bridge = NULL;
4310 struct device *tmp;
4311 struct acpi_dmar_atsr *atsr;
4312 struct dmar_atsr_unit *atsru;
4313
4314 dev = pci_physfn(dev);
4315 for (bus = dev->bus; bus; bus = bus->parent) {
4316 bridge = bus->self;
4317
4318 if (!bridge)
4319 return 1;
4320
4321 if (!pci_is_pcie(bridge) ||
4322 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
4323 return 0;
4324
4325 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
4326 break;
4327 }
4328
4329 rcu_read_lock();
4330 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4331 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4332 if (atsr->segment != pci_domain_nr(dev->bus))
4333 continue;
4334
4335 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
4336 if (tmp == &bridge->dev)
4337 goto out;
4338
4339 if (atsru->include_all)
4340 goto out;
4341 }
4342 ret = 0;
4343out:
4344 rcu_read_unlock();
4345
4346 return ret;
4347}
4348
4349int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4350{
4351 int ret = 0;
4352 struct dmar_rmrr_unit *rmrru;
4353 struct dmar_atsr_unit *atsru;
4354 struct acpi_dmar_atsr *atsr;
4355 struct acpi_dmar_reserved_memory *rmrr;
4356
4357 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4358 return 0;
4359
4360 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4361 rmrr = container_of(rmrru->hdr,
4362 struct acpi_dmar_reserved_memory, header);
4363 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4364 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4365 ((void *)rmrr) + rmrr->header.length,
4366 rmrr->segment, rmrru->devices,
4367 rmrru->devices_cnt);
4368 if(ret < 0)
4369 return ret;
4370 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4371 dmar_remove_dev_scope(info, rmrr->segment,
4372 rmrru->devices, rmrru->devices_cnt);
4373 }
4374 }
4375
4376 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4377 if (atsru->include_all)
4378 continue;
4379
4380 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4381 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4382 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4383 (void *)atsr + atsr->header.length,
4384 atsr->segment, atsru->devices,
4385 atsru->devices_cnt);
4386 if (ret > 0)
4387 break;
4388 else if(ret < 0)
4389 return ret;
4390 } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) {
4391 if (dmar_remove_dev_scope(info, atsr->segment,
4392 atsru->devices, atsru->devices_cnt))
4393 break;
4394 }
4395 }
4396
4397 return 0;
4398}
4399
4400
4401
4402
4403
4404
4405
4406static int device_notifier(struct notifier_block *nb,
4407 unsigned long action, void *data)
4408{
4409 struct device *dev = data;
4410 struct dmar_domain *domain;
4411
4412 if (iommu_dummy(dev))
4413 return 0;
4414
4415 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4416 return 0;
4417
4418 domain = find_domain(dev);
4419 if (!domain)
4420 return 0;
4421
4422 dmar_remove_one_dev_info(domain, dev);
4423 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4424 domain_exit(domain);
4425
4426 return 0;
4427}
4428
4429static struct notifier_block device_nb = {
4430 .notifier_call = device_notifier,
4431};
4432
4433static int intel_iommu_memory_notifier(struct notifier_block *nb,
4434 unsigned long val, void *v)
4435{
4436 struct memory_notify *mhp = v;
4437 unsigned long long start, end;
4438 unsigned long start_vpfn, last_vpfn;
4439
4440 switch (val) {
4441 case MEM_GOING_ONLINE:
4442 start = mhp->start_pfn << PAGE_SHIFT;
4443 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4444 if (iommu_domain_identity_map(si_domain, start, end)) {
4445 pr_warn("Failed to build identity map for [%llx-%llx]\n",
4446 start, end);
4447 return NOTIFY_BAD;
4448 }
4449 break;
4450
4451 case MEM_OFFLINE:
4452 case MEM_CANCEL_ONLINE:
4453 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4454 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4455 while (start_vpfn <= last_vpfn) {
4456 struct iova *iova;
4457 struct dmar_drhd_unit *drhd;
4458 struct intel_iommu *iommu;
4459 struct page *freelist;
4460
4461 iova = find_iova(&si_domain->iovad, start_vpfn);
4462 if (iova == NULL) {
4463 pr_debug("Failed get IOVA for PFN %lx\n",
4464 start_vpfn);
4465 break;
4466 }
4467
4468 iova = split_and_remove_iova(&si_domain->iovad, iova,
4469 start_vpfn, last_vpfn);
4470 if (iova == NULL) {
4471 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
4472 start_vpfn, last_vpfn);
4473 return NOTIFY_BAD;
4474 }
4475
4476 freelist = domain_unmap(si_domain, iova->pfn_lo,
4477 iova->pfn_hi);
4478
4479 rcu_read_lock();
4480 for_each_active_iommu(iommu, drhd)
4481 iommu_flush_iotlb_psi(iommu, si_domain,
4482 iova->pfn_lo, iova_size(iova),
4483 !freelist, 0);
4484 rcu_read_unlock();
4485 dma_free_pagelist(freelist);
4486
4487 start_vpfn = iova->pfn_hi + 1;
4488 free_iova_mem(iova);
4489 }
4490 break;
4491 }
4492
4493 return NOTIFY_OK;
4494}
4495
4496static struct notifier_block intel_iommu_memory_nb = {
4497 .notifier_call = intel_iommu_memory_notifier,
4498 .priority = 0
4499};
4500
4501
4502static ssize_t intel_iommu_show_version(struct device *dev,
4503 struct device_attribute *attr,
4504 char *buf)
4505{
4506 struct intel_iommu *iommu = dev_get_drvdata(dev);
4507 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4508 return sprintf(buf, "%d:%d\n",
4509 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4510}
4511static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4512
4513static ssize_t intel_iommu_show_address(struct device *dev,
4514 struct device_attribute *attr,
4515 char *buf)
4516{
4517 struct intel_iommu *iommu = dev_get_drvdata(dev);
4518 return sprintf(buf, "%llx\n", iommu->reg_phys);
4519}
4520static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4521
4522static ssize_t intel_iommu_show_cap(struct device *dev,
4523 struct device_attribute *attr,
4524 char *buf)
4525{
4526 struct intel_iommu *iommu = dev_get_drvdata(dev);
4527 return sprintf(buf, "%llx\n", iommu->cap);
4528}
4529static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4530
4531static ssize_t intel_iommu_show_ecap(struct device *dev,
4532 struct device_attribute *attr,
4533 char *buf)
4534{
4535 struct intel_iommu *iommu = dev_get_drvdata(dev);
4536 return sprintf(buf, "%llx\n", iommu->ecap);
4537}
4538static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4539
4540static ssize_t intel_iommu_show_ndoms(struct device *dev,
4541 struct device_attribute *attr,
4542 char *buf)
4543{
4544 struct intel_iommu *iommu = dev_get_drvdata(dev);
4545 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4546}
4547static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4548
4549static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4550 struct device_attribute *attr,
4551 char *buf)
4552{
4553 struct intel_iommu *iommu = dev_get_drvdata(dev);
4554 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4555 cap_ndoms(iommu->cap)));
4556}
4557static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4558
4559static struct attribute *intel_iommu_attrs[] = {
4560 &dev_attr_version.attr,
4561 &dev_attr_address.attr,
4562 &dev_attr_cap.attr,
4563 &dev_attr_ecap.attr,
4564 &dev_attr_domains_supported.attr,
4565 &dev_attr_domains_used.attr,
4566 NULL,
4567};
4568
4569static struct attribute_group intel_iommu_group = {
4570 .name = "intel-iommu",
4571 .attrs = intel_iommu_attrs,
4572};
4573
4574const struct attribute_group *intel_iommu_groups[] = {
4575 &intel_iommu_group,
4576 NULL,
4577};
4578
4579int __init intel_iommu_init(void)
4580{
4581 int ret = -ENODEV;
4582 struct dmar_drhd_unit *drhd;
4583 struct intel_iommu *iommu;
4584
4585
4586 force_on = tboot_force_iommu();
4587
4588 if (iommu_init_mempool()) {
4589 if (force_on)
4590 panic("tboot: Failed to initialize iommu memory\n");
4591 return -ENOMEM;
4592 }
4593
4594 down_write(&dmar_global_lock);
4595 if (dmar_table_init()) {
4596 if (force_on)
4597 panic("tboot: Failed to initialize DMAR table\n");
4598 goto out_free_dmar;
4599 }
4600
4601 if (dmar_dev_scope_init() < 0) {
4602 if (force_on)
4603 panic("tboot: Failed to initialize DMAR device scope\n");
4604 goto out_free_dmar;
4605 }
4606
4607 if (no_iommu || dmar_disabled)
4608 goto out_free_dmar;
4609
4610 if (list_empty(&dmar_rmrr_units))
4611 pr_info("No RMRR found\n");
4612
4613 if (list_empty(&dmar_atsr_units))
4614 pr_info("No ATSR found\n");
4615
4616 if (dmar_init_reserved_ranges()) {
4617 if (force_on)
4618 panic("tboot: Failed to reserve iommu ranges\n");
4619 goto out_free_reserved_range;
4620 }
4621
4622 init_no_remapping_devices();
4623
4624 ret = init_dmars();
4625 if (ret) {
4626 if (force_on)
4627 panic("tboot: Failed to initialize DMARs\n");
4628 pr_err("Initialization failed\n");
4629 goto out_free_reserved_range;
4630 }
4631 up_write(&dmar_global_lock);
4632 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
4633
4634 init_timer(&unmap_timer);
4635#ifdef CONFIG_SWIOTLB
4636 swiotlb = 0;
4637#endif
4638 dma_ops = &intel_dma_ops;
4639
4640 init_iommu_pm_ops();
4641
4642 for_each_active_iommu(iommu, drhd)
4643 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4644 intel_iommu_groups,
4645 "%s", iommu->name);
4646
4647 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4648 bus_register_notifier(&pci_bus_type, &device_nb);
4649 if (si_domain && !hw_pass_through)
4650 register_memory_notifier(&intel_iommu_memory_nb);
4651
4652 intel_iommu_enabled = 1;
4653
4654 return 0;
4655
4656out_free_reserved_range:
4657 put_iova_domain(&reserved_iova_list);
4658out_free_dmar:
4659 intel_iommu_free_dmars();
4660 up_write(&dmar_global_lock);
4661 iommu_exit_mempool();
4662 return ret;
4663}
4664
4665static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4666{
4667 struct intel_iommu *iommu = opaque;
4668
4669 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4670 return 0;
4671}
4672
4673
4674
4675
4676
4677
4678
4679static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
4680{
4681 if (!iommu || !dev || !dev_is_pci(dev))
4682 return;
4683
4684 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
4685}
4686
4687static void __dmar_remove_one_dev_info(struct device_domain_info *info)
4688{
4689 struct intel_iommu *iommu;
4690 unsigned long flags;
4691
4692 assert_spin_locked(&device_domain_lock);
4693
4694 if (WARN_ON(!info))
4695 return;
4696
4697 iommu = info->iommu;
4698
4699 if (info->dev) {
4700 iommu_disable_dev_iotlb(info);
4701 domain_context_clear(iommu, info->dev);
4702 }
4703
4704 unlink_domain_info(info);
4705
4706 spin_lock_irqsave(&iommu->lock, flags);
4707 domain_detach_iommu(info->domain, iommu);
4708 spin_unlock_irqrestore(&iommu->lock, flags);
4709
4710 free_devinfo_mem(info);
4711}
4712
4713static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4714 struct device *dev)
4715{
4716 struct device_domain_info *info;
4717 unsigned long flags;
4718
4719 spin_lock_irqsave(&device_domain_lock, flags);
4720 info = dev->archdata.iommu;
4721 __dmar_remove_one_dev_info(info);
4722 spin_unlock_irqrestore(&device_domain_lock, flags);
4723}
4724
4725static int md_domain_init(struct dmar_domain *domain, int guest_width)
4726{
4727 int adjust_width;
4728
4729 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4730 DMA_32BIT_PFN);
4731 domain_reserve_special_ranges(domain);
4732
4733
4734 domain->gaw = guest_width;
4735 adjust_width = guestwidth_to_adjustwidth(guest_width);
4736 domain->agaw = width_to_agaw(adjust_width);
4737
4738 domain->iommu_coherency = 0;
4739 domain->iommu_snooping = 0;
4740 domain->iommu_superpage = 0;
4741 domain->max_addr = 0;
4742
4743
4744 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4745 if (!domain->pgd)
4746 return -ENOMEM;
4747 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4748 return 0;
4749}
4750
4751static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4752{
4753 struct dmar_domain *dmar_domain;
4754 struct iommu_domain *domain;
4755
4756 if (type != IOMMU_DOMAIN_UNMANAGED)
4757 return NULL;
4758
4759 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4760 if (!dmar_domain) {
4761 pr_err("Can't allocate dmar_domain\n");
4762 return NULL;
4763 }
4764 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4765 pr_err("Domain initialization failed\n");
4766 domain_exit(dmar_domain);
4767 return NULL;
4768 }
4769 domain_update_iommu_cap(dmar_domain);
4770
4771 domain = &dmar_domain->domain;
4772 domain->geometry.aperture_start = 0;
4773 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4774 domain->geometry.force_aperture = true;
4775
4776 return domain;
4777}
4778
4779static void intel_iommu_domain_free(struct iommu_domain *domain)
4780{
4781 domain_exit(to_dmar_domain(domain));
4782}
4783
4784static int intel_iommu_attach_device(struct iommu_domain *domain,
4785 struct device *dev)
4786{
4787 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4788 struct intel_iommu *iommu;
4789 int addr_width;
4790 u8 bus, devfn;
4791
4792 if (device_is_rmrr_locked(dev)) {
4793 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4794 return -EPERM;
4795 }
4796
4797
4798 if (unlikely(domain_context_mapped(dev))) {
4799 struct dmar_domain *old_domain;
4800
4801 old_domain = find_domain(dev);
4802 if (old_domain) {
4803 rcu_read_lock();
4804 dmar_remove_one_dev_info(old_domain, dev);
4805 rcu_read_unlock();
4806
4807 if (!domain_type_is_vm_or_si(old_domain) &&
4808 list_empty(&old_domain->devices))
4809 domain_exit(old_domain);
4810 }
4811 }
4812
4813 iommu = device_to_iommu(dev, &bus, &devfn);
4814 if (!iommu)
4815 return -ENODEV;
4816
4817
4818 addr_width = agaw_to_width(iommu->agaw);
4819 if (addr_width > cap_mgaw(iommu->cap))
4820 addr_width = cap_mgaw(iommu->cap);
4821
4822 if (dmar_domain->max_addr > (1LL << addr_width)) {
4823 pr_err("%s: iommu width (%d) is not "
4824 "sufficient for the mapped address (%llx)\n",
4825 __func__, addr_width, dmar_domain->max_addr);
4826 return -EFAULT;
4827 }
4828 dmar_domain->gaw = addr_width;
4829
4830
4831
4832
4833 while (iommu->agaw < dmar_domain->agaw) {
4834 struct dma_pte *pte;
4835
4836 pte = dmar_domain->pgd;
4837 if (dma_pte_present(pte)) {
4838 dmar_domain->pgd = (struct dma_pte *)
4839 phys_to_virt(dma_pte_addr(pte));
4840 free_pgtable_page(pte);
4841 }
4842 dmar_domain->agaw--;
4843 }
4844
4845 return domain_add_dev_info(dmar_domain, dev);
4846}
4847
4848static void intel_iommu_detach_device(struct iommu_domain *domain,
4849 struct device *dev)
4850{
4851 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
4852}
4853
4854static int intel_iommu_map(struct iommu_domain *domain,
4855 unsigned long iova, phys_addr_t hpa,
4856 size_t size, int iommu_prot)
4857{
4858 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4859 u64 max_addr;
4860 int prot = 0;
4861 int ret;
4862
4863 if (iommu_prot & IOMMU_READ)
4864 prot |= DMA_PTE_READ;
4865 if (iommu_prot & IOMMU_WRITE)
4866 prot |= DMA_PTE_WRITE;
4867 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4868 prot |= DMA_PTE_SNP;
4869
4870 max_addr = iova + size;
4871 if (dmar_domain->max_addr < max_addr) {
4872 u64 end;
4873
4874
4875 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4876 if (end < max_addr) {
4877 pr_err("%s: iommu width (%d) is not "
4878 "sufficient for the mapped address (%llx)\n",
4879 __func__, dmar_domain->gaw, max_addr);
4880 return -EFAULT;
4881 }
4882 dmar_domain->max_addr = max_addr;
4883 }
4884
4885
4886 size = aligned_nrpages(hpa, size);
4887 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4888 hpa >> VTD_PAGE_SHIFT, size, prot);
4889 return ret;
4890}
4891
4892static size_t intel_iommu_unmap(struct iommu_domain *domain,
4893 unsigned long iova, size_t size)
4894{
4895 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4896 struct page *freelist = NULL;
4897 struct intel_iommu *iommu;
4898 unsigned long start_pfn, last_pfn;
4899 unsigned int npages;
4900 int iommu_id, level = 0;
4901
4902
4903
4904 BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
4905
4906 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4907 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4908
4909 start_pfn = iova >> VTD_PAGE_SHIFT;
4910 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4911
4912 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4913
4914 npages = last_pfn - start_pfn + 1;
4915
4916 for_each_domain_iommu(iommu_id, dmar_domain) {
4917 iommu = g_iommus[iommu_id];
4918
4919 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4920 start_pfn, npages, !freelist, 0);
4921 }
4922
4923 dma_free_pagelist(freelist);
4924
4925 if (dmar_domain->max_addr == iova + size)
4926 dmar_domain->max_addr = iova;
4927
4928 return size;
4929}
4930
4931static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4932 dma_addr_t iova)
4933{
4934 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4935 struct dma_pte *pte;
4936 int level = 0;
4937 u64 phys = 0;
4938
4939 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4940 if (pte)
4941 phys = dma_pte_addr(pte);
4942
4943 return phys;
4944}
4945
4946static bool intel_iommu_capable(enum iommu_cap cap)
4947{
4948 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4949 return domain_update_iommu_snooping(NULL) == 1;
4950 if (cap == IOMMU_CAP_INTR_REMAP)
4951 return irq_remapping_enabled == 1;
4952
4953 return false;
4954}
4955
4956static int intel_iommu_add_device(struct device *dev)
4957{
4958 struct intel_iommu *iommu;
4959 struct iommu_group *group;
4960 u8 bus, devfn;
4961
4962 iommu = device_to_iommu(dev, &bus, &devfn);
4963 if (!iommu)
4964 return -ENODEV;
4965
4966 iommu_device_link(iommu->iommu_dev, dev);
4967
4968 group = iommu_group_get_for_dev(dev);
4969
4970 if (IS_ERR(group))
4971 return PTR_ERR(group);
4972
4973 iommu_group_put(group);
4974 return 0;
4975}
4976
4977static void intel_iommu_remove_device(struct device *dev)
4978{
4979 struct intel_iommu *iommu;
4980 u8 bus, devfn;
4981
4982 iommu = device_to_iommu(dev, &bus, &devfn);
4983 if (!iommu)
4984 return;
4985
4986 iommu_group_remove_device(dev);
4987
4988 iommu_device_unlink(iommu->iommu_dev, dev);
4989}
4990
4991#ifdef CONFIG_INTEL_IOMMU_SVM
4992int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
4993{
4994 struct device_domain_info *info;
4995 struct context_entry *context;
4996 struct dmar_domain *domain;
4997 unsigned long flags;
4998 u64 ctx_lo;
4999 int ret;
5000
5001 domain = get_valid_domain_for_dev(sdev->dev);
5002 if (!domain)
5003 return -EINVAL;
5004
5005 spin_lock_irqsave(&device_domain_lock, flags);
5006 spin_lock(&iommu->lock);
5007
5008 ret = -EINVAL;
5009 info = sdev->dev->archdata.iommu;
5010 if (!info || !info->pasid_supported)
5011 goto out;
5012
5013 context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
5014 if (WARN_ON(!context))
5015 goto out;
5016
5017 ctx_lo = context[0].lo;
5018
5019 sdev->did = domain->iommu_did[iommu->seq_id];
5020 sdev->sid = PCI_DEVID(info->bus, info->devfn);
5021
5022 if (!(ctx_lo & CONTEXT_PASIDE)) {
5023 context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
5024 context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
5025 wmb();
5026
5027
5028
5029
5030
5031
5032
5033
5034
5035 if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
5036 ctx_lo &= ~CONTEXT_TT_MASK;
5037 if (info->ats_supported)
5038 ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
5039 else
5040 ctx_lo |= CONTEXT_TT_PT_PASID << 2;
5041 }
5042 ctx_lo |= CONTEXT_PASIDE;
5043 if (iommu->pasid_state_table)
5044 ctx_lo |= CONTEXT_DINVE;
5045 if (info->pri_supported)
5046 ctx_lo |= CONTEXT_PRS;
5047 context[0].lo = ctx_lo;
5048 wmb();
5049 iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
5050 DMA_CCMD_MASK_NOBIT,
5051 DMA_CCMD_DEVICE_INVL);
5052 }
5053
5054
5055 if (!info->pasid_enabled)
5056 iommu_enable_dev_iotlb(info);
5057
5058 if (info->ats_enabled) {
5059 sdev->dev_iotlb = 1;
5060 sdev->qdep = info->ats_qdep;
5061 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
5062 sdev->qdep = 0;
5063 }
5064 ret = 0;
5065
5066 out:
5067 spin_unlock(&iommu->lock);
5068 spin_unlock_irqrestore(&device_domain_lock, flags);
5069
5070 return ret;
5071}
5072
5073struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
5074{
5075 struct intel_iommu *iommu;
5076 u8 bus, devfn;
5077
5078 if (iommu_dummy(dev)) {
5079 dev_warn(dev,
5080 "No IOMMU translation for device; cannot enable SVM\n");
5081 return NULL;
5082 }
5083
5084 iommu = device_to_iommu(dev, &bus, &devfn);
5085 if ((!iommu)) {
5086 dev_err(dev, "No IOMMU for device; cannot enable SVM\n");
5087 return NULL;
5088 }
5089
5090 if (!iommu->pasid_table) {
5091 dev_err(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
5092 return NULL;
5093 }
5094
5095 return iommu;
5096}
5097#endif
5098
5099static const struct iommu_ops intel_iommu_ops = {
5100 .capable = intel_iommu_capable,
5101 .domain_alloc = intel_iommu_domain_alloc,
5102 .domain_free = intel_iommu_domain_free,
5103 .attach_dev = intel_iommu_attach_device,
5104 .detach_dev = intel_iommu_detach_device,
5105 .map = intel_iommu_map,
5106 .unmap = intel_iommu_unmap,
5107 .map_sg = default_iommu_map_sg,
5108 .iova_to_phys = intel_iommu_iova_to_phys,
5109 .add_device = intel_iommu_add_device,
5110 .remove_device = intel_iommu_remove_device,
5111 .device_group = pci_device_group,
5112 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
5113};
5114
5115static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
5116{
5117
5118 pr_info("Disabling IOMMU for graphics on this chipset\n");
5119 dmar_map_gfx = 0;
5120}
5121
5122DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
5123DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
5124DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
5125DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
5126DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
5127DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
5128DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
5129
5130static void quirk_iommu_rwbf(struct pci_dev *dev)
5131{
5132
5133
5134
5135
5136 pr_info("Forcing write-buffer flush capability\n");
5137 rwbf_quirk = 1;
5138}
5139
5140DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
5141DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
5142DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
5143DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
5144DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
5145DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
5146DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
5147
5148#define GGC 0x52
5149#define GGC_MEMORY_SIZE_MASK (0xf << 8)
5150#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
5151#define GGC_MEMORY_SIZE_1M (0x1 << 8)
5152#define GGC_MEMORY_SIZE_2M (0x3 << 8)
5153#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
5154#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
5155#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
5156#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
5157
5158static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
5159{
5160 unsigned short ggc;
5161
5162 if (pci_read_config_word(dev, GGC, &ggc))
5163 return;
5164
5165 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
5166 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
5167 dmar_map_gfx = 0;
5168 } else if (dmar_map_gfx) {
5169
5170 pr_info("Disabling batched IOTLB flush on Ironlake\n");
5171 intel_iommu_strict = 1;
5172 }
5173}
5174DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
5175DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
5176DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
5177DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
5178
5179
5180
5181
5182
5183
5184
5185
5186static void __init check_tylersburg_isoch(void)
5187{
5188 struct pci_dev *pdev;
5189 uint32_t vtisochctrl;
5190
5191
5192 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
5193 if (!pdev)
5194 return;
5195 pci_dev_put(pdev);
5196
5197
5198
5199
5200 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
5201 if (!pdev)
5202 return;
5203
5204 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
5205 pci_dev_put(pdev);
5206 return;
5207 }
5208
5209 pci_dev_put(pdev);
5210
5211
5212 if (vtisochctrl & 1)
5213 return;
5214
5215
5216 vtisochctrl &= 0x1c;
5217
5218
5219 if (vtisochctrl == 0x10)
5220 return;
5221
5222
5223 if (!vtisochctrl) {
5224 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
5225 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
5226 dmi_get_system_info(DMI_BIOS_VENDOR),
5227 dmi_get_system_info(DMI_BIOS_VERSION),
5228 dmi_get_system_info(DMI_PRODUCT_VERSION));
5229 iommu_identity_mapping |= IDENTMAP_AZALIA;
5230 return;
5231 }
5232
5233 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
5234 vtisochctrl);
5235}
5236