1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22#include "qemu/osdep.h"
23#include "qemu/error-report.h"
24#include "qapi/error.h"
25#include "hw/sysbus.h"
26#include "exec/address-spaces.h"
27#include "intel_iommu_internal.h"
28#include "hw/pci/pci.h"
29#include "hw/pci/pci_bus.h"
30#include "hw/i386/pc.h"
31#include "hw/i386/apic-msidef.h"
32#include "hw/boards.h"
33#include "hw/i386/x86-iommu.h"
34#include "hw/pci-host/q35.h"
35#include "sysemu/kvm.h"
36#include "hw/i386/apic_internal.h"
37#include "kvm_i386.h"
38#include "trace.h"
39
40static void vtd_address_space_refresh_all(IntelIOMMUState *s);
41
42static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
43 uint64_t wmask, uint64_t w1cmask)
44{
45 stq_le_p(&s->csr[addr], val);
46 stq_le_p(&s->wmask[addr], wmask);
47 stq_le_p(&s->w1cmask[addr], w1cmask);
48}
49
50static void vtd_define_quad_wo(IntelIOMMUState *s, hwaddr addr, uint64_t mask)
51{
52 stq_le_p(&s->womask[addr], mask);
53}
54
55static void vtd_define_long(IntelIOMMUState *s, hwaddr addr, uint32_t val,
56 uint32_t wmask, uint32_t w1cmask)
57{
58 stl_le_p(&s->csr[addr], val);
59 stl_le_p(&s->wmask[addr], wmask);
60 stl_le_p(&s->w1cmask[addr], w1cmask);
61}
62
63static void vtd_define_long_wo(IntelIOMMUState *s, hwaddr addr, uint32_t mask)
64{
65 stl_le_p(&s->womask[addr], mask);
66}
67
68
69static void vtd_set_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val)
70{
71 uint64_t oldval = ldq_le_p(&s->csr[addr]);
72 uint64_t wmask = ldq_le_p(&s->wmask[addr]);
73 uint64_t w1cmask = ldq_le_p(&s->w1cmask[addr]);
74 stq_le_p(&s->csr[addr],
75 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
76}
77
78static void vtd_set_long(IntelIOMMUState *s, hwaddr addr, uint32_t val)
79{
80 uint32_t oldval = ldl_le_p(&s->csr[addr]);
81 uint32_t wmask = ldl_le_p(&s->wmask[addr]);
82 uint32_t w1cmask = ldl_le_p(&s->w1cmask[addr]);
83 stl_le_p(&s->csr[addr],
84 ((oldval & ~wmask) | (val & wmask)) & ~(w1cmask & val));
85}
86
87static uint64_t vtd_get_quad(IntelIOMMUState *s, hwaddr addr)
88{
89 uint64_t val = ldq_le_p(&s->csr[addr]);
90 uint64_t womask = ldq_le_p(&s->womask[addr]);
91 return val & ~womask;
92}
93
94static uint32_t vtd_get_long(IntelIOMMUState *s, hwaddr addr)
95{
96 uint32_t val = ldl_le_p(&s->csr[addr]);
97 uint32_t womask = ldl_le_p(&s->womask[addr]);
98 return val & ~womask;
99}
100
101
102static uint64_t vtd_get_quad_raw(IntelIOMMUState *s, hwaddr addr)
103{
104 return ldq_le_p(&s->csr[addr]);
105}
106
107static uint32_t vtd_get_long_raw(IntelIOMMUState *s, hwaddr addr)
108{
109 return ldl_le_p(&s->csr[addr]);
110}
111
112static void vtd_set_quad_raw(IntelIOMMUState *s, hwaddr addr, uint64_t val)
113{
114 stq_le_p(&s->csr[addr], val);
115}
116
117static uint32_t vtd_set_clear_mask_long(IntelIOMMUState *s, hwaddr addr,
118 uint32_t clear, uint32_t mask)
119{
120 uint32_t new_val = (ldl_le_p(&s->csr[addr]) & ~clear) | mask;
121 stl_le_p(&s->csr[addr], new_val);
122 return new_val;
123}
124
125static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr addr,
126 uint64_t clear, uint64_t mask)
127{
128 uint64_t new_val = (ldq_le_p(&s->csr[addr]) & ~clear) | mask;
129 stq_le_p(&s->csr[addr], new_val);
130 return new_val;
131}
132
133static inline void vtd_iommu_lock(IntelIOMMUState *s)
134{
135 qemu_mutex_lock(&s->iommu_lock);
136}
137
138static inline void vtd_iommu_unlock(IntelIOMMUState *s)
139{
140 qemu_mutex_unlock(&s->iommu_lock);
141}
142
143
144static inline gboolean vtd_as_has_map_notifier(VTDAddressSpace *as)
145{
146 return as->notifier_flags & IOMMU_NOTIFIER_MAP;
147}
148
149
150static gboolean vtd_uint64_equal(gconstpointer v1, gconstpointer v2)
151{
152 return *((const uint64_t *)v1) == *((const uint64_t *)v2);
153}
154
155static guint vtd_uint64_hash(gconstpointer v)
156{
157 return (guint)*(const uint64_t *)v;
158}
159
160static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value,
161 gpointer user_data)
162{
163 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
164 uint16_t domain_id = *(uint16_t *)user_data;
165 return entry->domain_id == domain_id;
166}
167
168
169static inline uint32_t vtd_slpt_level_shift(uint32_t level)
170{
171 assert(level != 0);
172 return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS;
173}
174
175static inline uint64_t vtd_slpt_level_page_mask(uint32_t level)
176{
177 return ~((1ULL << vtd_slpt_level_shift(level)) - 1);
178}
179
180static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value,
181 gpointer user_data)
182{
183 VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value;
184 VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data;
185 uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask;
186 uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K;
187 return (entry->domain_id == info->domain_id) &&
188 (((entry->gfn & info->mask) == gfn) ||
189 (entry->gfn == gfn_tlb));
190}
191
192
193
194
195static void vtd_reset_context_cache_locked(IntelIOMMUState *s)
196{
197 VTDAddressSpace *vtd_as;
198 VTDBus *vtd_bus;
199 GHashTableIter bus_it;
200 uint32_t devfn_it;
201
202 trace_vtd_context_cache_reset();
203
204 g_hash_table_iter_init(&bus_it, s->vtd_as_by_busptr);
205
206 while (g_hash_table_iter_next (&bus_it, NULL, (void**)&vtd_bus)) {
207 for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
208 vtd_as = vtd_bus->dev_as[devfn_it];
209 if (!vtd_as) {
210 continue;
211 }
212 vtd_as->context_cache_entry.context_cache_gen = 0;
213 }
214 }
215 s->context_cache_gen = 1;
216}
217
218
219static void vtd_reset_iotlb_locked(IntelIOMMUState *s)
220{
221 assert(s->iotlb);
222 g_hash_table_remove_all(s->iotlb);
223}
224
225static void vtd_reset_iotlb(IntelIOMMUState *s)
226{
227 vtd_iommu_lock(s);
228 vtd_reset_iotlb_locked(s);
229 vtd_iommu_unlock(s);
230}
231
232static void vtd_reset_caches(IntelIOMMUState *s)
233{
234 vtd_iommu_lock(s);
235 vtd_reset_iotlb_locked(s);
236 vtd_reset_context_cache_locked(s);
237 vtd_iommu_unlock(s);
238}
239
240static uint64_t vtd_get_iotlb_key(uint64_t gfn, uint16_t source_id,
241 uint32_t level)
242{
243 return gfn | ((uint64_t)(source_id) << VTD_IOTLB_SID_SHIFT) |
244 ((uint64_t)(level) << VTD_IOTLB_LVL_SHIFT);
245}
246
247static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level)
248{
249 return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K;
250}
251
252
253static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id,
254 hwaddr addr)
255{
256 VTDIOTLBEntry *entry;
257 uint64_t key;
258 int level;
259
260 for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) {
261 key = vtd_get_iotlb_key(vtd_get_iotlb_gfn(addr, level),
262 source_id, level);
263 entry = g_hash_table_lookup(s->iotlb, &key);
264 if (entry) {
265 goto out;
266 }
267 }
268
269out:
270 return entry;
271}
272
273
274static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id,
275 uint16_t domain_id, hwaddr addr, uint64_t slpte,
276 uint8_t access_flags, uint32_t level)
277{
278 VTDIOTLBEntry *entry = g_malloc(sizeof(*entry));
279 uint64_t *key = g_malloc(sizeof(*key));
280 uint64_t gfn = vtd_get_iotlb_gfn(addr, level);
281
282 trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id);
283 if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) {
284 trace_vtd_iotlb_reset("iotlb exceeds size limit");
285 vtd_reset_iotlb_locked(s);
286 }
287
288 entry->gfn = gfn;
289 entry->domain_id = domain_id;
290 entry->slpte = slpte;
291 entry->access_flags = access_flags;
292 entry->mask = vtd_slpt_level_page_mask(level);
293 *key = vtd_get_iotlb_key(gfn, source_id, level);
294 g_hash_table_replace(s->iotlb, key, entry);
295}
296
297
298
299
300static void vtd_generate_interrupt(IntelIOMMUState *s, hwaddr mesg_addr_reg,
301 hwaddr mesg_data_reg)
302{
303 MSIMessage msi;
304
305 assert(mesg_data_reg < DMAR_REG_SIZE);
306 assert(mesg_addr_reg < DMAR_REG_SIZE);
307
308 msi.address = vtd_get_long_raw(s, mesg_addr_reg);
309 msi.data = vtd_get_long_raw(s, mesg_data_reg);
310
311 trace_vtd_irq_generate(msi.address, msi.data);
312
313 apic_get_class()->send_msi(&msi);
314}
315
316
317
318
319
320static void vtd_generate_fault_event(IntelIOMMUState *s, uint32_t pre_fsts)
321{
322 if (pre_fsts & VTD_FSTS_PPF || pre_fsts & VTD_FSTS_PFO ||
323 pre_fsts & VTD_FSTS_IQE) {
324 trace_vtd_err("There are previous interrupt conditions "
325 "to be serviced by software, fault event "
326 "is not generated.");
327 return;
328 }
329 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, 0, VTD_FECTL_IP);
330 if (vtd_get_long_raw(s, DMAR_FECTL_REG) & VTD_FECTL_IM) {
331 trace_vtd_err("Interrupt Mask set, irq is not generated.");
332 } else {
333 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
334 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
335 }
336}
337
338
339
340
341static bool vtd_is_frcd_set(IntelIOMMUState *s, uint16_t index)
342{
343
344 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
345 addr += 8;
346
347 assert(index < DMAR_FRCD_REG_NR);
348
349 return vtd_get_quad_raw(s, addr) & VTD_FRCD_F;
350}
351
352
353
354
355
356static void vtd_update_fsts_ppf(IntelIOMMUState *s)
357{
358 uint32_t i;
359 uint32_t ppf_mask = 0;
360
361 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
362 if (vtd_is_frcd_set(s, i)) {
363 ppf_mask = VTD_FSTS_PPF;
364 break;
365 }
366 }
367 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_PPF, ppf_mask);
368 trace_vtd_fsts_ppf(!!ppf_mask);
369}
370
371static void vtd_set_frcd_and_update_ppf(IntelIOMMUState *s, uint16_t index)
372{
373
374 hwaddr addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
375 addr += 8;
376
377 assert(index < DMAR_FRCD_REG_NR);
378
379 vtd_set_clear_mask_quad(s, addr, 0, VTD_FRCD_F);
380 vtd_update_fsts_ppf(s);
381}
382
383
384static void vtd_record_frcd(IntelIOMMUState *s, uint16_t index,
385 uint16_t source_id, hwaddr addr,
386 VTDFaultReason fault, bool is_write)
387{
388 uint64_t hi = 0, lo;
389 hwaddr frcd_reg_addr = DMAR_FRCD_REG_OFFSET + (((uint64_t)index) << 4);
390
391 assert(index < DMAR_FRCD_REG_NR);
392
393 lo = VTD_FRCD_FI(addr);
394 hi = VTD_FRCD_SID(source_id) | VTD_FRCD_FR(fault);
395 if (!is_write) {
396 hi |= VTD_FRCD_T;
397 }
398 vtd_set_quad_raw(s, frcd_reg_addr, lo);
399 vtd_set_quad_raw(s, frcd_reg_addr + 8, hi);
400
401 trace_vtd_frr_new(index, hi, lo);
402}
403
404
405static bool vtd_try_collapse_fault(IntelIOMMUState *s, uint16_t source_id)
406{
407 uint32_t i;
408 uint64_t frcd_reg;
409 hwaddr addr = DMAR_FRCD_REG_OFFSET + 8;
410
411 for (i = 0; i < DMAR_FRCD_REG_NR; i++) {
412 frcd_reg = vtd_get_quad_raw(s, addr);
413 if ((frcd_reg & VTD_FRCD_F) &&
414 ((frcd_reg & VTD_FRCD_SID_MASK) == source_id)) {
415 return true;
416 }
417 addr += 16;
418 }
419 return false;
420}
421
422
423static void vtd_report_dmar_fault(IntelIOMMUState *s, uint16_t source_id,
424 hwaddr addr, VTDFaultReason fault,
425 bool is_write)
426{
427 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
428
429 assert(fault < VTD_FR_MAX);
430
431 if (fault == VTD_FR_RESERVED_ERR) {
432
433 return;
434 }
435
436 trace_vtd_dmar_fault(source_id, fault, addr, is_write);
437
438 if (fsts_reg & VTD_FSTS_PFO) {
439 trace_vtd_err("New fault is not recorded due to "
440 "Primary Fault Overflow.");
441 return;
442 }
443
444 if (vtd_try_collapse_fault(s, source_id)) {
445 trace_vtd_err("New fault is not recorded due to "
446 "compression of faults.");
447 return;
448 }
449
450 if (vtd_is_frcd_set(s, s->next_frcd_reg)) {
451 trace_vtd_err("Next Fault Recording Reg is used, "
452 "new fault is not recorded, set PFO field.");
453 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_PFO);
454 return;
455 }
456
457 vtd_record_frcd(s, s->next_frcd_reg, source_id, addr, fault, is_write);
458
459 if (fsts_reg & VTD_FSTS_PPF) {
460 trace_vtd_err("There are pending faults already, "
461 "fault event is not generated.");
462 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
463 s->next_frcd_reg++;
464 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
465 s->next_frcd_reg = 0;
466 }
467 } else {
468 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, VTD_FSTS_FRI_MASK,
469 VTD_FSTS_FRI(s->next_frcd_reg));
470 vtd_set_frcd_and_update_ppf(s, s->next_frcd_reg);
471 s->next_frcd_reg++;
472 if (s->next_frcd_reg == DMAR_FRCD_REG_NR) {
473 s->next_frcd_reg = 0;
474 }
475
476
477
478 vtd_generate_fault_event(s, fsts_reg);
479 }
480}
481
482
483
484
485static void vtd_handle_inv_queue_error(IntelIOMMUState *s)
486{
487 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
488
489 vtd_set_clear_mask_long(s, DMAR_FSTS_REG, 0, VTD_FSTS_IQE);
490 vtd_generate_fault_event(s, fsts_reg);
491}
492
493
494static void vtd_generate_completion_event(IntelIOMMUState *s)
495{
496 if (vtd_get_long_raw(s, DMAR_ICS_REG) & VTD_ICS_IWC) {
497 trace_vtd_inv_desc_wait_irq("One pending, skip current");
498 return;
499 }
500 vtd_set_clear_mask_long(s, DMAR_ICS_REG, 0, VTD_ICS_IWC);
501 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, 0, VTD_IECTL_IP);
502 if (vtd_get_long_raw(s, DMAR_IECTL_REG) & VTD_IECTL_IM) {
503 trace_vtd_inv_desc_wait_irq("IM in IECTL_REG is set, "
504 "new event not generated");
505 return;
506 } else {
507
508 trace_vtd_inv_desc_wait_irq("Generating complete event");
509 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
510 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
511 }
512}
513
514static inline bool vtd_root_entry_present(VTDRootEntry *root)
515{
516 return root->val & VTD_ROOT_ENTRY_P;
517}
518
519static int vtd_get_root_entry(IntelIOMMUState *s, uint8_t index,
520 VTDRootEntry *re)
521{
522 dma_addr_t addr;
523
524 addr = s->root + index * sizeof(*re);
525 if (dma_memory_read(&address_space_memory, addr, re, sizeof(*re))) {
526 trace_vtd_re_invalid(re->rsvd, re->val);
527 re->val = 0;
528 return -VTD_FR_ROOT_TABLE_INV;
529 }
530 re->val = le64_to_cpu(re->val);
531 return 0;
532}
533
534static inline bool vtd_ce_present(VTDContextEntry *context)
535{
536 return context->lo & VTD_CONTEXT_ENTRY_P;
537}
538
539static int vtd_get_context_entry_from_root(VTDRootEntry *root, uint8_t index,
540 VTDContextEntry *ce)
541{
542 dma_addr_t addr;
543
544
545 addr = (root->val & VTD_ROOT_ENTRY_CTP) + index * sizeof(*ce);
546 if (dma_memory_read(&address_space_memory, addr, ce, sizeof(*ce))) {
547 trace_vtd_re_invalid(root->rsvd, root->val);
548 return -VTD_FR_CONTEXT_TABLE_INV;
549 }
550 ce->lo = le64_to_cpu(ce->lo);
551 ce->hi = le64_to_cpu(ce->hi);
552 return 0;
553}
554
555static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce)
556{
557 return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR;
558}
559
560static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw)
561{
562 return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw);
563}
564
565
566static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level)
567{
568 return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK);
569}
570
571
572static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index)
573{
574 uint64_t slpte;
575
576 assert(index < VTD_SL_PT_ENTRY_NR);
577
578 if (dma_memory_read(&address_space_memory,
579 base_addr + index * sizeof(slpte), &slpte,
580 sizeof(slpte))) {
581 slpte = (uint64_t)-1;
582 return slpte;
583 }
584 slpte = le64_to_cpu(slpte);
585 return slpte;
586}
587
588
589
590
591static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level)
592{
593 return (iova >> vtd_slpt_level_shift(level)) &
594 ((1ULL << VTD_SL_LEVEL_BITS) - 1);
595}
596
597
598static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level)
599{
600 return VTD_CAP_SAGAW_MASK & s->cap &
601 (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT));
602}
603
604
605
606
607static inline uint32_t vtd_ce_get_level(VTDContextEntry *ce)
608{
609 return 2 + (ce->hi & VTD_CONTEXT_ENTRY_AW);
610}
611
612static inline uint32_t vtd_ce_get_agaw(VTDContextEntry *ce)
613{
614 return 30 + (ce->hi & VTD_CONTEXT_ENTRY_AW) * 9;
615}
616
617static inline uint32_t vtd_ce_get_type(VTDContextEntry *ce)
618{
619 return ce->lo & VTD_CONTEXT_ENTRY_TT;
620}
621
622
623static inline bool vtd_ce_type_check(X86IOMMUState *x86_iommu,
624 VTDContextEntry *ce)
625{
626 switch (vtd_ce_get_type(ce)) {
627 case VTD_CONTEXT_TT_MULTI_LEVEL:
628
629 break;
630 case VTD_CONTEXT_TT_DEV_IOTLB:
631 if (!x86_iommu->dt_supported) {
632 return false;
633 }
634 break;
635 case VTD_CONTEXT_TT_PASS_THROUGH:
636 if (!x86_iommu->pt_supported) {
637 return false;
638 }
639 break;
640 default:
641
642 return false;
643 }
644 return true;
645}
646
647static inline uint64_t vtd_iova_limit(VTDContextEntry *ce, uint8_t aw)
648{
649 uint32_t ce_agaw = vtd_ce_get_agaw(ce);
650 return 1ULL << MIN(ce_agaw, aw);
651}
652
653
654static inline bool vtd_iova_range_check(uint64_t iova, VTDContextEntry *ce,
655 uint8_t aw)
656{
657
658
659
660
661 return !(iova & ~(vtd_iova_limit(ce, aw) - 1));
662}
663
664
665
666
667
668
669static uint64_t vtd_paging_entry_rsvd_field[9];
670
671static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level)
672{
673 if (slpte & VTD_SL_PT_PAGE_SIZE_MASK) {
674
675 return slpte & vtd_paging_entry_rsvd_field[level + 4];
676 } else {
677 return slpte & vtd_paging_entry_rsvd_field[level];
678 }
679}
680
681
682static VTDBus *vtd_find_as_from_bus_num(IntelIOMMUState *s, uint8_t bus_num)
683{
684 VTDBus *vtd_bus = s->vtd_as_by_bus_num[bus_num];
685 if (!vtd_bus) {
686
687
688
689
690
691 GHashTableIter iter;
692
693 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
694 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
695 if (pci_bus_num(vtd_bus->bus) == bus_num) {
696 s->vtd_as_by_bus_num[bus_num] = vtd_bus;
697 return vtd_bus;
698 }
699 }
700 }
701 return vtd_bus;
702}
703
704
705
706
707static int vtd_iova_to_slpte(VTDContextEntry *ce, uint64_t iova, bool is_write,
708 uint64_t *slptep, uint32_t *slpte_level,
709 bool *reads, bool *writes, uint8_t aw_bits)
710{
711 dma_addr_t addr = vtd_ce_get_slpt_base(ce);
712 uint32_t level = vtd_ce_get_level(ce);
713 uint32_t offset;
714 uint64_t slpte;
715 uint64_t access_right_check;
716
717 if (!vtd_iova_range_check(iova, ce, aw_bits)) {
718 trace_vtd_err_dmar_iova_overflow(iova);
719 return -VTD_FR_ADDR_BEYOND_MGAW;
720 }
721
722
723 access_right_check = is_write ? VTD_SL_W : VTD_SL_R;
724
725 while (true) {
726 offset = vtd_iova_level_offset(iova, level);
727 slpte = vtd_get_slpte(addr, offset);
728
729 if (slpte == (uint64_t)-1) {
730 trace_vtd_err_dmar_slpte_read_error(iova, level);
731 if (level == vtd_ce_get_level(ce)) {
732
733 return -VTD_FR_CONTEXT_ENTRY_INV;
734 } else {
735 return -VTD_FR_PAGING_ENTRY_INV;
736 }
737 }
738 *reads = (*reads) && (slpte & VTD_SL_R);
739 *writes = (*writes) && (slpte & VTD_SL_W);
740 if (!(slpte & access_right_check)) {
741 trace_vtd_err_dmar_slpte_perm_error(iova, level, slpte, is_write);
742 return is_write ? -VTD_FR_WRITE : -VTD_FR_READ;
743 }
744 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
745 trace_vtd_err_dmar_slpte_resv_error(iova, level, slpte);
746 return -VTD_FR_PAGING_ENTRY_RSVD;
747 }
748
749 if (vtd_is_last_slpte(slpte, level)) {
750 *slptep = slpte;
751 *slpte_level = level;
752 return 0;
753 }
754 addr = vtd_get_slpte_addr(slpte, aw_bits);
755 level--;
756 }
757}
758
759typedef int (*vtd_page_walk_hook)(IOMMUTLBEntry *entry, void *private);
760
761
762
763
764
765
766
767
768
769
770
771typedef struct {
772 VTDAddressSpace *as;
773 vtd_page_walk_hook hook_fn;
774 void *private;
775 bool notify_unmap;
776 uint8_t aw;
777 uint16_t domain_id;
778} vtd_page_walk_info;
779
780static int vtd_page_walk_one(IOMMUTLBEntry *entry, vtd_page_walk_info *info)
781{
782 VTDAddressSpace *as = info->as;
783 vtd_page_walk_hook hook_fn = info->hook_fn;
784 void *private = info->private;
785 DMAMap target = {
786 .iova = entry->iova,
787 .size = entry->addr_mask,
788 .translated_addr = entry->translated_addr,
789 .perm = entry->perm,
790 };
791 DMAMap *mapped = iova_tree_find(as->iova_tree, &target);
792
793 if (entry->perm == IOMMU_NONE && !info->notify_unmap) {
794 trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
795 return 0;
796 }
797
798 assert(hook_fn);
799
800
801 if (entry->perm) {
802 if (mapped) {
803
804 if (!memcmp(mapped, &target, sizeof(target))) {
805 trace_vtd_page_walk_one_skip_map(entry->iova, entry->addr_mask,
806 entry->translated_addr);
807 return 0;
808 } else {
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823 IOMMUAccessFlags cache_perm = entry->perm;
824 int ret;
825
826
827 entry->perm = IOMMU_NONE;
828 trace_vtd_page_walk_one(info->domain_id,
829 entry->iova,
830 entry->translated_addr,
831 entry->addr_mask,
832 entry->perm);
833 ret = hook_fn(entry, private);
834 if (ret) {
835 return ret;
836 }
837
838 iova_tree_remove(as->iova_tree, &target);
839
840 entry->perm = cache_perm;
841 }
842 }
843 iova_tree_insert(as->iova_tree, &target);
844 } else {
845 if (!mapped) {
846
847 trace_vtd_page_walk_one_skip_unmap(entry->iova, entry->addr_mask);
848 return 0;
849 }
850 iova_tree_remove(as->iova_tree, &target);
851 }
852
853 trace_vtd_page_walk_one(info->domain_id, entry->iova,
854 entry->translated_addr, entry->addr_mask,
855 entry->perm);
856 return hook_fn(entry, private);
857}
858
859
860
861
862
863
864
865
866
867
868
869static int vtd_page_walk_level(dma_addr_t addr, uint64_t start,
870 uint64_t end, uint32_t level, bool read,
871 bool write, vtd_page_walk_info *info)
872{
873 bool read_cur, write_cur, entry_valid;
874 uint32_t offset;
875 uint64_t slpte;
876 uint64_t subpage_size, subpage_mask;
877 IOMMUTLBEntry entry;
878 uint64_t iova = start;
879 uint64_t iova_next;
880 int ret = 0;
881
882 trace_vtd_page_walk_level(addr, level, start, end);
883
884 subpage_size = 1ULL << vtd_slpt_level_shift(level);
885 subpage_mask = vtd_slpt_level_page_mask(level);
886
887 while (iova < end) {
888 iova_next = (iova & subpage_mask) + subpage_size;
889
890 offset = vtd_iova_level_offset(iova, level);
891 slpte = vtd_get_slpte(addr, offset);
892
893 if (slpte == (uint64_t)-1) {
894 trace_vtd_page_walk_skip_read(iova, iova_next);
895 goto next;
896 }
897
898 if (vtd_slpte_nonzero_rsvd(slpte, level)) {
899 trace_vtd_page_walk_skip_reserve(iova, iova_next);
900 goto next;
901 }
902
903
904 read_cur = read && (slpte & VTD_SL_R);
905 write_cur = write && (slpte & VTD_SL_W);
906
907
908
909
910
911
912 entry_valid = read_cur | write_cur;
913
914 if (!vtd_is_last_slpte(slpte, level) && entry_valid) {
915
916
917
918
919 ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw),
920 iova, MIN(iova_next, end), level - 1,
921 read_cur, write_cur, info);
922 } else {
923
924
925
926
927
928
929
930
931 entry.target_as = &address_space_memory;
932 entry.iova = iova & subpage_mask;
933 entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur);
934 entry.addr_mask = ~subpage_mask;
935
936 entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw);
937 ret = vtd_page_walk_one(&entry, info);
938 }
939
940 if (ret < 0) {
941 return ret;
942 }
943
944next:
945 iova = iova_next;
946 }
947
948 return 0;
949}
950
951
952
953
954
955
956
957
958
959static int vtd_page_walk(VTDContextEntry *ce, uint64_t start, uint64_t end,
960 vtd_page_walk_info *info)
961{
962 dma_addr_t addr = vtd_ce_get_slpt_base(ce);
963 uint32_t level = vtd_ce_get_level(ce);
964
965 if (!vtd_iova_range_check(start, ce, info->aw)) {
966 return -VTD_FR_ADDR_BEYOND_MGAW;
967 }
968
969 if (!vtd_iova_range_check(end, ce, info->aw)) {
970
971 end = vtd_iova_limit(ce, info->aw);
972 }
973
974 return vtd_page_walk_level(addr, start, end, level, true, true, info);
975}
976
977
978static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num,
979 uint8_t devfn, VTDContextEntry *ce)
980{
981 VTDRootEntry re;
982 int ret_fr;
983 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
984
985 ret_fr = vtd_get_root_entry(s, bus_num, &re);
986 if (ret_fr) {
987 return ret_fr;
988 }
989
990 if (!vtd_root_entry_present(&re)) {
991
992 trace_vtd_re_not_present(bus_num);
993 return -VTD_FR_ROOT_ENTRY_P;
994 }
995
996 if (re.rsvd || (re.val & VTD_ROOT_ENTRY_RSVD(s->aw_bits))) {
997 trace_vtd_re_invalid(re.rsvd, re.val);
998 return -VTD_FR_ROOT_ENTRY_RSVD;
999 }
1000
1001 ret_fr = vtd_get_context_entry_from_root(&re, devfn, ce);
1002 if (ret_fr) {
1003 return ret_fr;
1004 }
1005
1006 if (!vtd_ce_present(ce)) {
1007
1008 trace_vtd_ce_not_present(bus_num, devfn);
1009 return -VTD_FR_CONTEXT_ENTRY_P;
1010 }
1011
1012 if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
1013 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO(s->aw_bits))) {
1014 trace_vtd_ce_invalid(ce->hi, ce->lo);
1015 return -VTD_FR_CONTEXT_ENTRY_RSVD;
1016 }
1017
1018
1019 if (!vtd_is_level_supported(s, vtd_ce_get_level(ce))) {
1020 trace_vtd_ce_invalid(ce->hi, ce->lo);
1021 return -VTD_FR_CONTEXT_ENTRY_INV;
1022 }
1023
1024
1025 if (!vtd_ce_type_check(x86_iommu, ce)) {
1026 trace_vtd_ce_invalid(ce->hi, ce->lo);
1027 return -VTD_FR_CONTEXT_ENTRY_INV;
1028 }
1029
1030 return 0;
1031}
1032
1033static int vtd_sync_shadow_page_hook(IOMMUTLBEntry *entry,
1034 void *private)
1035{
1036 memory_region_notify_iommu((IOMMUMemoryRegion *)private, 0, *entry);
1037 return 0;
1038}
1039
1040
1041static int vtd_sync_shadow_page_table_range(VTDAddressSpace *vtd_as,
1042 VTDContextEntry *ce,
1043 hwaddr addr, hwaddr size)
1044{
1045 IntelIOMMUState *s = vtd_as->iommu_state;
1046 vtd_page_walk_info info = {
1047 .hook_fn = vtd_sync_shadow_page_hook,
1048 .private = (void *)&vtd_as->iommu,
1049 .notify_unmap = true,
1050 .aw = s->aw_bits,
1051 .as = vtd_as,
1052 };
1053 VTDContextEntry ce_cache;
1054 int ret;
1055
1056 if (ce) {
1057
1058 ce_cache = *ce;
1059 } else {
1060
1061 ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1062 vtd_as->devfn, &ce_cache);
1063 if (ret) {
1064
1065
1066
1067
1068
1069 trace_vtd_err("Detected invalid context entry when "
1070 "trying to sync shadow page table");
1071 return 0;
1072 }
1073 }
1074
1075 info.domain_id = VTD_CONTEXT_ENTRY_DID(ce_cache.hi);
1076
1077 return vtd_page_walk(&ce_cache, addr, addr + size, &info);
1078}
1079
1080static int vtd_sync_shadow_page_table(VTDAddressSpace *vtd_as)
1081{
1082 return vtd_sync_shadow_page_table_range(vtd_as, NULL, 0, UINT64_MAX);
1083}
1084
1085
1086
1087
1088
1089
1090static int vtd_dev_get_trans_type(VTDAddressSpace *as)
1091{
1092 IntelIOMMUState *s;
1093 VTDContextEntry ce;
1094 int ret;
1095
1096 s = as->iommu_state;
1097
1098 ret = vtd_dev_to_context_entry(s, pci_bus_num(as->bus),
1099 as->devfn, &ce);
1100 if (ret) {
1101 return ret;
1102 }
1103
1104 return vtd_ce_get_type(&ce);
1105}
1106
1107static bool vtd_dev_pt_enabled(VTDAddressSpace *as)
1108{
1109 int ret;
1110
1111 assert(as);
1112
1113 ret = vtd_dev_get_trans_type(as);
1114 if (ret < 0) {
1115
1116
1117
1118
1119
1120
1121 return false;
1122 }
1123
1124 return ret == VTD_CONTEXT_TT_PASS_THROUGH;
1125}
1126
1127
1128static bool vtd_switch_address_space(VTDAddressSpace *as)
1129{
1130 bool use_iommu;
1131
1132 bool take_bql = !qemu_mutex_iothread_locked();
1133
1134 assert(as);
1135
1136 use_iommu = as->iommu_state->dmar_enabled & !vtd_dev_pt_enabled(as);
1137
1138 trace_vtd_switch_address_space(pci_bus_num(as->bus),
1139 VTD_PCI_SLOT(as->devfn),
1140 VTD_PCI_FUNC(as->devfn),
1141 use_iommu);
1142
1143
1144
1145
1146
1147
1148 if (take_bql) {
1149 qemu_mutex_lock_iothread();
1150 }
1151
1152
1153 if (use_iommu) {
1154 memory_region_set_enabled(&as->sys_alias, false);
1155 memory_region_set_enabled(MEMORY_REGION(&as->iommu), true);
1156 } else {
1157 memory_region_set_enabled(MEMORY_REGION(&as->iommu), false);
1158 memory_region_set_enabled(&as->sys_alias, true);
1159 }
1160
1161 if (take_bql) {
1162 qemu_mutex_unlock_iothread();
1163 }
1164
1165 return use_iommu;
1166}
1167
1168static void vtd_switch_address_space_all(IntelIOMMUState *s)
1169{
1170 GHashTableIter iter;
1171 VTDBus *vtd_bus;
1172 int i;
1173
1174 g_hash_table_iter_init(&iter, s->vtd_as_by_busptr);
1175 while (g_hash_table_iter_next(&iter, NULL, (void **)&vtd_bus)) {
1176 for (i = 0; i < PCI_DEVFN_MAX; i++) {
1177 if (!vtd_bus->dev_as[i]) {
1178 continue;
1179 }
1180 vtd_switch_address_space(vtd_bus->dev_as[i]);
1181 }
1182 }
1183}
1184
1185static inline uint16_t vtd_make_source_id(uint8_t bus_num, uint8_t devfn)
1186{
1187 return ((bus_num & 0xffUL) << 8) | (devfn & 0xffUL);
1188}
1189
1190static const bool vtd_qualified_faults[] = {
1191 [VTD_FR_RESERVED] = false,
1192 [VTD_FR_ROOT_ENTRY_P] = false,
1193 [VTD_FR_CONTEXT_ENTRY_P] = true,
1194 [VTD_FR_CONTEXT_ENTRY_INV] = true,
1195 [VTD_FR_ADDR_BEYOND_MGAW] = true,
1196 [VTD_FR_WRITE] = true,
1197 [VTD_FR_READ] = true,
1198 [VTD_FR_PAGING_ENTRY_INV] = true,
1199 [VTD_FR_ROOT_TABLE_INV] = false,
1200 [VTD_FR_CONTEXT_TABLE_INV] = false,
1201 [VTD_FR_ROOT_ENTRY_RSVD] = false,
1202 [VTD_FR_PAGING_ENTRY_RSVD] = true,
1203 [VTD_FR_CONTEXT_ENTRY_TT] = true,
1204 [VTD_FR_RESERVED_ERR] = false,
1205 [VTD_FR_MAX] = false,
1206};
1207
1208
1209
1210
1211
1212static inline bool vtd_is_qualified_fault(VTDFaultReason fault)
1213{
1214 return vtd_qualified_faults[fault];
1215}
1216
1217static inline bool vtd_is_interrupt_addr(hwaddr addr)
1218{
1219 return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST;
1220}
1221
1222static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id)
1223{
1224 VTDBus *vtd_bus;
1225 VTDAddressSpace *vtd_as;
1226 bool success = false;
1227
1228 vtd_bus = vtd_find_as_from_bus_num(s, VTD_SID_TO_BUS(source_id));
1229 if (!vtd_bus) {
1230 goto out;
1231 }
1232
1233 vtd_as = vtd_bus->dev_as[VTD_SID_TO_DEVFN(source_id)];
1234 if (!vtd_as) {
1235 goto out;
1236 }
1237
1238 if (vtd_switch_address_space(vtd_as) == false) {
1239
1240 success = true;
1241 }
1242
1243out:
1244 trace_vtd_pt_enable_fast_path(source_id, success);
1245}
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus,
1260 uint8_t devfn, hwaddr addr, bool is_write,
1261 IOMMUTLBEntry *entry)
1262{
1263 IntelIOMMUState *s = vtd_as->iommu_state;
1264 VTDContextEntry ce;
1265 uint8_t bus_num = pci_bus_num(bus);
1266 VTDContextCacheEntry *cc_entry;
1267 uint64_t slpte, page_mask;
1268 uint32_t level;
1269 uint16_t source_id = vtd_make_source_id(bus_num, devfn);
1270 int ret_fr;
1271 bool is_fpd_set = false;
1272 bool reads = true;
1273 bool writes = true;
1274 uint8_t access_flags;
1275 VTDIOTLBEntry *iotlb_entry;
1276
1277
1278
1279
1280
1281 assert(!vtd_is_interrupt_addr(addr));
1282
1283 vtd_iommu_lock(s);
1284
1285 cc_entry = &vtd_as->context_cache_entry;
1286
1287
1288 iotlb_entry = vtd_lookup_iotlb(s, source_id, addr);
1289 if (iotlb_entry) {
1290 trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte,
1291 iotlb_entry->domain_id);
1292 slpte = iotlb_entry->slpte;
1293 access_flags = iotlb_entry->access_flags;
1294 page_mask = iotlb_entry->mask;
1295 goto out;
1296 }
1297
1298
1299 if (cc_entry->context_cache_gen == s->context_cache_gen) {
1300 trace_vtd_iotlb_cc_hit(bus_num, devfn, cc_entry->context_entry.hi,
1301 cc_entry->context_entry.lo,
1302 cc_entry->context_cache_gen);
1303 ce = cc_entry->context_entry;
1304 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
1305 } else {
1306 ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
1307 is_fpd_set = ce.lo & VTD_CONTEXT_ENTRY_FPD;
1308 if (ret_fr) {
1309 ret_fr = -ret_fr;
1310 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
1311 trace_vtd_fault_disabled();
1312 } else {
1313 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
1314 }
1315 goto error;
1316 }
1317
1318 trace_vtd_iotlb_cc_update(bus_num, devfn, ce.hi, ce.lo,
1319 cc_entry->context_cache_gen,
1320 s->context_cache_gen);
1321 cc_entry->context_entry = ce;
1322 cc_entry->context_cache_gen = s->context_cache_gen;
1323 }
1324
1325
1326
1327
1328
1329 if (vtd_ce_get_type(&ce) == VTD_CONTEXT_TT_PASS_THROUGH) {
1330 entry->iova = addr & VTD_PAGE_MASK_4K;
1331 entry->translated_addr = entry->iova;
1332 entry->addr_mask = ~VTD_PAGE_MASK_4K;
1333 entry->perm = IOMMU_RW;
1334 trace_vtd_translate_pt(source_id, entry->iova);
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345 vtd_pt_enable_fast_path(s, source_id);
1346 vtd_iommu_unlock(s);
1347 return true;
1348 }
1349
1350 ret_fr = vtd_iova_to_slpte(&ce, addr, is_write, &slpte, &level,
1351 &reads, &writes, s->aw_bits);
1352 if (ret_fr) {
1353 ret_fr = -ret_fr;
1354 if (is_fpd_set && vtd_is_qualified_fault(ret_fr)) {
1355 trace_vtd_fault_disabled();
1356 } else {
1357 vtd_report_dmar_fault(s, source_id, addr, ret_fr, is_write);
1358 }
1359 goto error;
1360 }
1361
1362 page_mask = vtd_slpt_level_page_mask(level);
1363 access_flags = IOMMU_ACCESS_FLAG(reads, writes);
1364 vtd_update_iotlb(s, source_id, VTD_CONTEXT_ENTRY_DID(ce.hi), addr, slpte,
1365 access_flags, level);
1366out:
1367 vtd_iommu_unlock(s);
1368 entry->iova = addr & page_mask;
1369 entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask;
1370 entry->addr_mask = ~page_mask;
1371 entry->perm = access_flags;
1372 return true;
1373
1374error:
1375 vtd_iommu_unlock(s);
1376 entry->iova = 0;
1377 entry->translated_addr = 0;
1378 entry->addr_mask = 0;
1379 entry->perm = IOMMU_NONE;
1380 return false;
1381}
1382
1383static void vtd_root_table_setup(IntelIOMMUState *s)
1384{
1385 s->root = vtd_get_quad_raw(s, DMAR_RTADDR_REG);
1386 s->root_extended = s->root & VTD_RTADDR_RTT;
1387 s->root &= VTD_RTADDR_ADDR_MASK(s->aw_bits);
1388
1389 trace_vtd_reg_dmar_root(s->root, s->root_extended);
1390}
1391
1392static void vtd_iec_notify_all(IntelIOMMUState *s, bool global,
1393 uint32_t index, uint32_t mask)
1394{
1395 x86_iommu_iec_notify_all(X86_IOMMU_DEVICE(s), global, index, mask);
1396}
1397
1398static void vtd_interrupt_remap_table_setup(IntelIOMMUState *s)
1399{
1400 uint64_t value = 0;
1401 value = vtd_get_quad_raw(s, DMAR_IRTA_REG);
1402 s->intr_size = 1UL << ((value & VTD_IRTA_SIZE_MASK) + 1);
1403 s->intr_root = value & VTD_IRTA_ADDR_MASK(s->aw_bits);
1404 s->intr_eime = value & VTD_IRTA_EIME;
1405
1406
1407 vtd_iec_notify_all(s, true, 0, 0);
1408
1409 trace_vtd_reg_ir_root(s->intr_root, s->intr_size);
1410}
1411
1412static void vtd_iommu_replay_all(IntelIOMMUState *s)
1413{
1414 VTDAddressSpace *vtd_as;
1415
1416 QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
1417 vtd_sync_shadow_page_table(vtd_as);
1418 }
1419}
1420
1421static void vtd_context_global_invalidate(IntelIOMMUState *s)
1422{
1423 trace_vtd_inv_desc_cc_global();
1424
1425 vtd_iommu_lock(s);
1426 s->context_cache_gen++;
1427 if (s->context_cache_gen == VTD_CONTEXT_CACHE_GEN_MAX) {
1428 vtd_reset_context_cache_locked(s);
1429 }
1430 vtd_iommu_unlock(s);
1431 vtd_address_space_refresh_all(s);
1432
1433
1434
1435
1436
1437
1438
1439 vtd_iommu_replay_all(s);
1440}
1441
1442
1443
1444
1445static void vtd_context_device_invalidate(IntelIOMMUState *s,
1446 uint16_t source_id,
1447 uint16_t func_mask)
1448{
1449 uint16_t mask;
1450 VTDBus *vtd_bus;
1451 VTDAddressSpace *vtd_as;
1452 uint8_t bus_n, devfn;
1453 uint16_t devfn_it;
1454
1455 trace_vtd_inv_desc_cc_devices(source_id, func_mask);
1456
1457 switch (func_mask & 3) {
1458 case 0:
1459 mask = 0;
1460 break;
1461 case 1:
1462 mask = 4;
1463 break;
1464 case 2:
1465 mask = 6;
1466 break;
1467 case 3:
1468 mask = 7;
1469 break;
1470 }
1471 mask = ~mask;
1472
1473 bus_n = VTD_SID_TO_BUS(source_id);
1474 vtd_bus = vtd_find_as_from_bus_num(s, bus_n);
1475 if (vtd_bus) {
1476 devfn = VTD_SID_TO_DEVFN(source_id);
1477 for (devfn_it = 0; devfn_it < PCI_DEVFN_MAX; ++devfn_it) {
1478 vtd_as = vtd_bus->dev_as[devfn_it];
1479 if (vtd_as && ((devfn_it & mask) == (devfn & mask))) {
1480 trace_vtd_inv_desc_cc_device(bus_n, VTD_PCI_SLOT(devfn_it),
1481 VTD_PCI_FUNC(devfn_it));
1482 vtd_iommu_lock(s);
1483 vtd_as->context_cache_entry.context_cache_gen = 0;
1484 vtd_iommu_unlock(s);
1485
1486
1487
1488
1489 vtd_switch_address_space(vtd_as);
1490
1491
1492
1493
1494
1495
1496
1497
1498 vtd_sync_shadow_page_table(vtd_as);
1499 }
1500 }
1501 }
1502}
1503
1504
1505
1506
1507
1508static uint64_t vtd_context_cache_invalidate(IntelIOMMUState *s, uint64_t val)
1509{
1510 uint64_t caig;
1511 uint64_t type = val & VTD_CCMD_CIRG_MASK;
1512
1513 switch (type) {
1514 case VTD_CCMD_DOMAIN_INVL:
1515
1516 case VTD_CCMD_GLOBAL_INVL:
1517 caig = VTD_CCMD_GLOBAL_INVL_A;
1518 vtd_context_global_invalidate(s);
1519 break;
1520
1521 case VTD_CCMD_DEVICE_INVL:
1522 caig = VTD_CCMD_DEVICE_INVL_A;
1523 vtd_context_device_invalidate(s, VTD_CCMD_SID(val), VTD_CCMD_FM(val));
1524 break;
1525
1526 default:
1527 trace_vtd_err("Context cache invalidate type error.");
1528 caig = 0;
1529 }
1530 return caig;
1531}
1532
1533static void vtd_iotlb_global_invalidate(IntelIOMMUState *s)
1534{
1535 trace_vtd_inv_desc_iotlb_global();
1536 vtd_reset_iotlb(s);
1537 vtd_iommu_replay_all(s);
1538}
1539
1540static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id)
1541{
1542 VTDContextEntry ce;
1543 VTDAddressSpace *vtd_as;
1544
1545 trace_vtd_inv_desc_iotlb_domain(domain_id);
1546
1547 vtd_iommu_lock(s);
1548 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_domain,
1549 &domain_id);
1550 vtd_iommu_unlock(s);
1551
1552 QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
1553 if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1554 vtd_as->devfn, &ce) &&
1555 domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1556 vtd_sync_shadow_page_table(vtd_as);
1557 }
1558 }
1559}
1560
1561static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s,
1562 uint16_t domain_id, hwaddr addr,
1563 uint8_t am)
1564{
1565 VTDAddressSpace *vtd_as;
1566 VTDContextEntry ce;
1567 int ret;
1568 hwaddr size = (1 << am) * VTD_PAGE_SIZE;
1569
1570 QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) {
1571 ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
1572 vtd_as->devfn, &ce);
1573 if (!ret && domain_id == VTD_CONTEXT_ENTRY_DID(ce.hi)) {
1574 if (vtd_as_has_map_notifier(vtd_as)) {
1575
1576
1577
1578
1579
1580 vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size);
1581 } else {
1582
1583
1584
1585
1586
1587 IOMMUTLBEntry entry = {
1588 .target_as = &address_space_memory,
1589 .iova = addr,
1590 .translated_addr = 0,
1591 .addr_mask = size - 1,
1592 .perm = IOMMU_NONE,
1593 };
1594 memory_region_notify_iommu(&vtd_as->iommu, 0, entry);
1595 }
1596 }
1597 }
1598}
1599
1600static void vtd_iotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id,
1601 hwaddr addr, uint8_t am)
1602{
1603 VTDIOTLBPageInvInfo info;
1604
1605 trace_vtd_inv_desc_iotlb_pages(domain_id, addr, am);
1606
1607 assert(am <= VTD_MAMV);
1608 info.domain_id = domain_id;
1609 info.addr = addr;
1610 info.mask = ~((1 << am) - 1);
1611 vtd_iommu_lock(s);
1612 g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &info);
1613 vtd_iommu_unlock(s);
1614 vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am);
1615}
1616
1617
1618
1619
1620
1621static uint64_t vtd_iotlb_flush(IntelIOMMUState *s, uint64_t val)
1622{
1623 uint64_t iaig;
1624 uint64_t type = val & VTD_TLB_FLUSH_GRANU_MASK;
1625 uint16_t domain_id;
1626 hwaddr addr;
1627 uint8_t am;
1628
1629 switch (type) {
1630 case VTD_TLB_GLOBAL_FLUSH:
1631 iaig = VTD_TLB_GLOBAL_FLUSH_A;
1632 vtd_iotlb_global_invalidate(s);
1633 break;
1634
1635 case VTD_TLB_DSI_FLUSH:
1636 domain_id = VTD_TLB_DID(val);
1637 iaig = VTD_TLB_DSI_FLUSH_A;
1638 vtd_iotlb_domain_invalidate(s, domain_id);
1639 break;
1640
1641 case VTD_TLB_PSI_FLUSH:
1642 domain_id = VTD_TLB_DID(val);
1643 addr = vtd_get_quad_raw(s, DMAR_IVA_REG);
1644 am = VTD_IVA_AM(addr);
1645 addr = VTD_IVA_ADDR(addr);
1646 if (am > VTD_MAMV) {
1647 trace_vtd_err("IOTLB PSI flush: address mask overflow.");
1648 iaig = 0;
1649 break;
1650 }
1651 iaig = VTD_TLB_PSI_FLUSH_A;
1652 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1653 break;
1654
1655 default:
1656 trace_vtd_err("IOTLB flush: invalid granularity.");
1657 iaig = 0;
1658 }
1659 return iaig;
1660}
1661
1662static void vtd_fetch_inv_desc(IntelIOMMUState *s);
1663
1664static inline bool vtd_queued_inv_disable_check(IntelIOMMUState *s)
1665{
1666 return s->qi_enabled && (s->iq_tail == s->iq_head) &&
1667 (s->iq_last_desc_type == VTD_INV_DESC_WAIT);
1668}
1669
1670static void vtd_handle_gcmd_qie(IntelIOMMUState *s, bool en)
1671{
1672 uint64_t iqa_val = vtd_get_quad_raw(s, DMAR_IQA_REG);
1673
1674 trace_vtd_inv_qi_enable(en);
1675
1676 if (en) {
1677 s->iq = iqa_val & VTD_IQA_IQA_MASK(s->aw_bits);
1678
1679 s->iq_size = 1UL << ((iqa_val & VTD_IQA_QS) + 8);
1680 s->qi_enabled = true;
1681 trace_vtd_inv_qi_setup(s->iq, s->iq_size);
1682
1683 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_QIES);
1684
1685 if (s->iq_tail != 0) {
1686
1687
1688
1689
1690
1691 trace_vtd_warn_invalid_qi_tail(s->iq_tail);
1692 if (!(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
1693 vtd_fetch_inv_desc(s);
1694 }
1695 }
1696 } else {
1697 if (vtd_queued_inv_disable_check(s)) {
1698
1699 vtd_set_quad_raw(s, DMAR_IQH_REG, 0);
1700 s->iq_head = 0;
1701 s->qi_enabled = false;
1702
1703 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_QIES, 0);
1704 } else {
1705 trace_vtd_err_qi_disable(s->iq_head, s->iq_tail, s->iq_last_desc_type);
1706 }
1707 }
1708}
1709
1710
1711static void vtd_handle_gcmd_srtp(IntelIOMMUState *s)
1712{
1713 vtd_root_table_setup(s);
1714
1715 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_RTPS);
1716 vtd_reset_caches(s);
1717 vtd_address_space_refresh_all(s);
1718}
1719
1720
1721static void vtd_handle_gcmd_sirtp(IntelIOMMUState *s)
1722{
1723 vtd_interrupt_remap_table_setup(s);
1724
1725 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRTPS);
1726}
1727
1728
1729static void vtd_handle_gcmd_te(IntelIOMMUState *s, bool en)
1730{
1731 if (s->dmar_enabled == en) {
1732 return;
1733 }
1734
1735 trace_vtd_dmar_enable(en);
1736
1737 if (en) {
1738 s->dmar_enabled = true;
1739
1740 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_TES);
1741 } else {
1742 s->dmar_enabled = false;
1743
1744
1745 s->next_frcd_reg = 0;
1746
1747 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_TES, 0);
1748 }
1749
1750 vtd_reset_caches(s);
1751 vtd_address_space_refresh_all(s);
1752}
1753
1754
1755static void vtd_handle_gcmd_ire(IntelIOMMUState *s, bool en)
1756{
1757 trace_vtd_ir_enable(en);
1758
1759 if (en) {
1760 s->intr_enabled = true;
1761
1762 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, 0, VTD_GSTS_IRES);
1763 } else {
1764 s->intr_enabled = false;
1765
1766 vtd_set_clear_mask_long(s, DMAR_GSTS_REG, VTD_GSTS_IRES, 0);
1767 }
1768}
1769
1770
1771static void vtd_handle_gcmd_write(IntelIOMMUState *s)
1772{
1773 uint32_t status = vtd_get_long_raw(s, DMAR_GSTS_REG);
1774 uint32_t val = vtd_get_long_raw(s, DMAR_GCMD_REG);
1775 uint32_t changed = status ^ val;
1776
1777 trace_vtd_reg_write_gcmd(status, val);
1778 if (changed & VTD_GCMD_TE) {
1779
1780 vtd_handle_gcmd_te(s, val & VTD_GCMD_TE);
1781 }
1782 if (val & VTD_GCMD_SRTP) {
1783
1784 vtd_handle_gcmd_srtp(s);
1785 }
1786 if (changed & VTD_GCMD_QIE) {
1787
1788 vtd_handle_gcmd_qie(s, val & VTD_GCMD_QIE);
1789 }
1790 if (val & VTD_GCMD_SIRTP) {
1791
1792 vtd_handle_gcmd_sirtp(s);
1793 }
1794 if (changed & VTD_GCMD_IRE) {
1795
1796 vtd_handle_gcmd_ire(s, val & VTD_GCMD_IRE);
1797 }
1798}
1799
1800
1801static void vtd_handle_ccmd_write(IntelIOMMUState *s)
1802{
1803 uint64_t ret;
1804 uint64_t val = vtd_get_quad_raw(s, DMAR_CCMD_REG);
1805
1806
1807 if (val & VTD_CCMD_ICC) {
1808 if (s->qi_enabled) {
1809 trace_vtd_err("Queued Invalidation enabled, "
1810 "should not use register-based invalidation");
1811 return;
1812 }
1813 ret = vtd_context_cache_invalidate(s, val);
1814
1815 vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_ICC, 0ULL);
1816 ret = vtd_set_clear_mask_quad(s, DMAR_CCMD_REG, VTD_CCMD_CAIG_MASK,
1817 ret);
1818 }
1819}
1820
1821
1822static void vtd_handle_iotlb_write(IntelIOMMUState *s)
1823{
1824 uint64_t ret;
1825 uint64_t val = vtd_get_quad_raw(s, DMAR_IOTLB_REG);
1826
1827
1828 if (val & VTD_TLB_IVT) {
1829 if (s->qi_enabled) {
1830 trace_vtd_err("Queued Invalidation enabled, "
1831 "should not use register-based invalidation.");
1832 return;
1833 }
1834 ret = vtd_iotlb_flush(s, val);
1835
1836 vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG, VTD_TLB_IVT, 0ULL);
1837 ret = vtd_set_clear_mask_quad(s, DMAR_IOTLB_REG,
1838 VTD_TLB_FLUSH_GRANU_MASK_A, ret);
1839 }
1840}
1841
1842
1843static bool vtd_get_inv_desc(dma_addr_t base_addr, uint32_t offset,
1844 VTDInvDesc *inv_desc)
1845{
1846 dma_addr_t addr = base_addr + offset * sizeof(*inv_desc);
1847 if (dma_memory_read(&address_space_memory, addr, inv_desc,
1848 sizeof(*inv_desc))) {
1849 trace_vtd_err("Read INV DESC failed.");
1850 inv_desc->lo = 0;
1851 inv_desc->hi = 0;
1852 return false;
1853 }
1854 inv_desc->lo = le64_to_cpu(inv_desc->lo);
1855 inv_desc->hi = le64_to_cpu(inv_desc->hi);
1856 return true;
1857}
1858
1859static bool vtd_process_wait_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1860{
1861 if ((inv_desc->hi & VTD_INV_DESC_WAIT_RSVD_HI) ||
1862 (inv_desc->lo & VTD_INV_DESC_WAIT_RSVD_LO)) {
1863 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
1864 return false;
1865 }
1866 if (inv_desc->lo & VTD_INV_DESC_WAIT_SW) {
1867
1868 uint32_t status_data = (uint32_t)(inv_desc->lo >>
1869 VTD_INV_DESC_WAIT_DATA_SHIFT);
1870
1871 assert(!(inv_desc->lo & VTD_INV_DESC_WAIT_IF));
1872
1873
1874 dma_addr_t status_addr = inv_desc->hi;
1875 trace_vtd_inv_desc_wait_sw(status_addr, status_data);
1876 status_data = cpu_to_le32(status_data);
1877 if (dma_memory_write(&address_space_memory, status_addr, &status_data,
1878 sizeof(status_data))) {
1879 trace_vtd_inv_desc_wait_write_fail(inv_desc->hi, inv_desc->lo);
1880 return false;
1881 }
1882 } else if (inv_desc->lo & VTD_INV_DESC_WAIT_IF) {
1883
1884 vtd_generate_completion_event(s);
1885 } else {
1886 trace_vtd_inv_desc_wait_invalid(inv_desc->hi, inv_desc->lo);
1887 return false;
1888 }
1889 return true;
1890}
1891
1892static bool vtd_process_context_cache_desc(IntelIOMMUState *s,
1893 VTDInvDesc *inv_desc)
1894{
1895 uint16_t sid, fmask;
1896
1897 if ((inv_desc->lo & VTD_INV_DESC_CC_RSVD) || inv_desc->hi) {
1898 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
1899 return false;
1900 }
1901 switch (inv_desc->lo & VTD_INV_DESC_CC_G) {
1902 case VTD_INV_DESC_CC_DOMAIN:
1903 trace_vtd_inv_desc_cc_domain(
1904 (uint16_t)VTD_INV_DESC_CC_DID(inv_desc->lo));
1905
1906 case VTD_INV_DESC_CC_GLOBAL:
1907 vtd_context_global_invalidate(s);
1908 break;
1909
1910 case VTD_INV_DESC_CC_DEVICE:
1911 sid = VTD_INV_DESC_CC_SID(inv_desc->lo);
1912 fmask = VTD_INV_DESC_CC_FM(inv_desc->lo);
1913 vtd_context_device_invalidate(s, sid, fmask);
1914 break;
1915
1916 default:
1917 trace_vtd_inv_desc_cc_invalid(inv_desc->hi, inv_desc->lo);
1918 return false;
1919 }
1920 return true;
1921}
1922
1923static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc)
1924{
1925 uint16_t domain_id;
1926 uint8_t am;
1927 hwaddr addr;
1928
1929 if ((inv_desc->lo & VTD_INV_DESC_IOTLB_RSVD_LO) ||
1930 (inv_desc->hi & VTD_INV_DESC_IOTLB_RSVD_HI)) {
1931 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1932 return false;
1933 }
1934
1935 switch (inv_desc->lo & VTD_INV_DESC_IOTLB_G) {
1936 case VTD_INV_DESC_IOTLB_GLOBAL:
1937 vtd_iotlb_global_invalidate(s);
1938 break;
1939
1940 case VTD_INV_DESC_IOTLB_DOMAIN:
1941 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1942 vtd_iotlb_domain_invalidate(s, domain_id);
1943 break;
1944
1945 case VTD_INV_DESC_IOTLB_PAGE:
1946 domain_id = VTD_INV_DESC_IOTLB_DID(inv_desc->lo);
1947 addr = VTD_INV_DESC_IOTLB_ADDR(inv_desc->hi);
1948 am = VTD_INV_DESC_IOTLB_AM(inv_desc->hi);
1949 if (am > VTD_MAMV) {
1950 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1951 return false;
1952 }
1953 vtd_iotlb_page_invalidate(s, domain_id, addr, am);
1954 break;
1955
1956 default:
1957 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1958 return false;
1959 }
1960 return true;
1961}
1962
1963static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
1964 VTDInvDesc *inv_desc)
1965{
1966 trace_vtd_inv_desc_iec(inv_desc->iec.granularity,
1967 inv_desc->iec.index,
1968 inv_desc->iec.index_mask);
1969
1970 vtd_iec_notify_all(s, !inv_desc->iec.granularity,
1971 inv_desc->iec.index,
1972 inv_desc->iec.index_mask);
1973 return true;
1974}
1975
1976static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s,
1977 VTDInvDesc *inv_desc)
1978{
1979 VTDAddressSpace *vtd_dev_as;
1980 IOMMUTLBEntry entry;
1981 struct VTDBus *vtd_bus;
1982 hwaddr addr;
1983 uint64_t sz;
1984 uint16_t sid;
1985 uint8_t devfn;
1986 bool size;
1987 uint8_t bus_num;
1988
1989 addr = VTD_INV_DESC_DEVICE_IOTLB_ADDR(inv_desc->hi);
1990 sid = VTD_INV_DESC_DEVICE_IOTLB_SID(inv_desc->lo);
1991 devfn = sid & 0xff;
1992 bus_num = sid >> 8;
1993 size = VTD_INV_DESC_DEVICE_IOTLB_SIZE(inv_desc->hi);
1994
1995 if ((inv_desc->lo & VTD_INV_DESC_DEVICE_IOTLB_RSVD_LO) ||
1996 (inv_desc->hi & VTD_INV_DESC_DEVICE_IOTLB_RSVD_HI)) {
1997 trace_vtd_inv_desc_iotlb_invalid(inv_desc->hi, inv_desc->lo);
1998 return false;
1999 }
2000
2001 vtd_bus = vtd_find_as_from_bus_num(s, bus_num);
2002 if (!vtd_bus) {
2003 goto done;
2004 }
2005
2006 vtd_dev_as = vtd_bus->dev_as[devfn];
2007 if (!vtd_dev_as) {
2008 goto done;
2009 }
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019 if (size) {
2020 sz = (VTD_PAGE_SIZE * 2) << cto64(addr >> VTD_PAGE_SHIFT);
2021 addr &= ~(sz - 1);
2022 } else {
2023 sz = VTD_PAGE_SIZE;
2024 }
2025
2026 entry.target_as = &vtd_dev_as->as;
2027 entry.addr_mask = sz - 1;
2028 entry.iova = addr;
2029 entry.perm = IOMMU_NONE;
2030 entry.translated_addr = 0;
2031 memory_region_notify_iommu(&vtd_dev_as->iommu, 0, entry);
2032
2033done:
2034 return true;
2035}
2036
2037static bool vtd_process_inv_desc(IntelIOMMUState *s)
2038{
2039 VTDInvDesc inv_desc;
2040 uint8_t desc_type;
2041
2042 trace_vtd_inv_qi_head(s->iq_head);
2043 if (!vtd_get_inv_desc(s->iq, s->iq_head, &inv_desc)) {
2044 s->iq_last_desc_type = VTD_INV_DESC_NONE;
2045 return false;
2046 }
2047 desc_type = inv_desc.lo & VTD_INV_DESC_TYPE;
2048
2049 s->iq_last_desc_type = desc_type;
2050
2051 switch (desc_type) {
2052 case VTD_INV_DESC_CC:
2053 trace_vtd_inv_desc("context-cache", inv_desc.hi, inv_desc.lo);
2054 if (!vtd_process_context_cache_desc(s, &inv_desc)) {
2055 return false;
2056 }
2057 break;
2058
2059 case VTD_INV_DESC_IOTLB:
2060 trace_vtd_inv_desc("iotlb", inv_desc.hi, inv_desc.lo);
2061 if (!vtd_process_iotlb_desc(s, &inv_desc)) {
2062 return false;
2063 }
2064 break;
2065
2066 case VTD_INV_DESC_WAIT:
2067 trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo);
2068 if (!vtd_process_wait_desc(s, &inv_desc)) {
2069 return false;
2070 }
2071 break;
2072
2073 case VTD_INV_DESC_IEC:
2074 trace_vtd_inv_desc("iec", inv_desc.hi, inv_desc.lo);
2075 if (!vtd_process_inv_iec_desc(s, &inv_desc)) {
2076 return false;
2077 }
2078 break;
2079
2080 case VTD_INV_DESC_DEVICE:
2081 trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo);
2082 if (!vtd_process_device_iotlb_desc(s, &inv_desc)) {
2083 return false;
2084 }
2085 break;
2086
2087 default:
2088 trace_vtd_inv_desc_invalid(inv_desc.hi, inv_desc.lo);
2089 return false;
2090 }
2091 s->iq_head++;
2092 if (s->iq_head == s->iq_size) {
2093 s->iq_head = 0;
2094 }
2095 return true;
2096}
2097
2098
2099static void vtd_fetch_inv_desc(IntelIOMMUState *s)
2100{
2101 trace_vtd_inv_qi_fetch();
2102
2103 if (s->iq_tail >= s->iq_size) {
2104
2105 trace_vtd_err_qi_tail(s->iq_tail, s->iq_size);
2106 vtd_handle_inv_queue_error(s);
2107 return;
2108 }
2109 while (s->iq_head != s->iq_tail) {
2110 if (!vtd_process_inv_desc(s)) {
2111
2112 vtd_handle_inv_queue_error(s);
2113 break;
2114 }
2115
2116 vtd_set_quad_raw(s, DMAR_IQH_REG,
2117 (((uint64_t)(s->iq_head)) << VTD_IQH_QH_SHIFT) &
2118 VTD_IQH_QH_MASK);
2119 }
2120}
2121
2122
2123static void vtd_handle_iqt_write(IntelIOMMUState *s)
2124{
2125 uint64_t val = vtd_get_quad_raw(s, DMAR_IQT_REG);
2126
2127 s->iq_tail = VTD_IQT_QT(val);
2128 trace_vtd_inv_qi_tail(s->iq_tail);
2129
2130 if (s->qi_enabled && !(vtd_get_long_raw(s, DMAR_FSTS_REG) & VTD_FSTS_IQE)) {
2131
2132 vtd_fetch_inv_desc(s);
2133 }
2134}
2135
2136static void vtd_handle_fsts_write(IntelIOMMUState *s)
2137{
2138 uint32_t fsts_reg = vtd_get_long_raw(s, DMAR_FSTS_REG);
2139 uint32_t fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
2140 uint32_t status_fields = VTD_FSTS_PFO | VTD_FSTS_PPF | VTD_FSTS_IQE;
2141
2142 if ((fectl_reg & VTD_FECTL_IP) && !(fsts_reg & status_fields)) {
2143 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
2144 trace_vtd_fsts_clear_ip();
2145 }
2146
2147
2148
2149}
2150
2151static void vtd_handle_fectl_write(IntelIOMMUState *s)
2152{
2153 uint32_t fectl_reg;
2154
2155
2156
2157
2158 fectl_reg = vtd_get_long_raw(s, DMAR_FECTL_REG);
2159
2160 trace_vtd_reg_write_fectl(fectl_reg);
2161
2162 if ((fectl_reg & VTD_FECTL_IP) && !(fectl_reg & VTD_FECTL_IM)) {
2163 vtd_generate_interrupt(s, DMAR_FEADDR_REG, DMAR_FEDATA_REG);
2164 vtd_set_clear_mask_long(s, DMAR_FECTL_REG, VTD_FECTL_IP, 0);
2165 }
2166}
2167
2168static void vtd_handle_ics_write(IntelIOMMUState *s)
2169{
2170 uint32_t ics_reg = vtd_get_long_raw(s, DMAR_ICS_REG);
2171 uint32_t iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
2172
2173 if ((iectl_reg & VTD_IECTL_IP) && !(ics_reg & VTD_ICS_IWC)) {
2174 trace_vtd_reg_ics_clear_ip();
2175 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
2176 }
2177}
2178
2179static void vtd_handle_iectl_write(IntelIOMMUState *s)
2180{
2181 uint32_t iectl_reg;
2182
2183
2184
2185
2186 iectl_reg = vtd_get_long_raw(s, DMAR_IECTL_REG);
2187
2188 trace_vtd_reg_write_iectl(iectl_reg);
2189
2190 if ((iectl_reg & VTD_IECTL_IP) && !(iectl_reg & VTD_IECTL_IM)) {
2191 vtd_generate_interrupt(s, DMAR_IEADDR_REG, DMAR_IEDATA_REG);
2192 vtd_set_clear_mask_long(s, DMAR_IECTL_REG, VTD_IECTL_IP, 0);
2193 }
2194}
2195
2196static uint64_t vtd_mem_read(void *opaque, hwaddr addr, unsigned size)
2197{
2198 IntelIOMMUState *s = opaque;
2199 uint64_t val;
2200
2201 trace_vtd_reg_read(addr, size);
2202
2203 if (addr + size > DMAR_REG_SIZE) {
2204 trace_vtd_err("Read MMIO over range.");
2205 return (uint64_t)-1;
2206 }
2207
2208 switch (addr) {
2209
2210 case DMAR_RTADDR_REG:
2211 if (size == 4) {
2212 val = s->root & ((1ULL << 32) - 1);
2213 } else {
2214 val = s->root;
2215 }
2216 break;
2217
2218 case DMAR_RTADDR_REG_HI:
2219 assert(size == 4);
2220 val = s->root >> 32;
2221 break;
2222
2223
2224 case DMAR_IQA_REG:
2225 val = s->iq | (vtd_get_quad(s, DMAR_IQA_REG) & VTD_IQA_QS);
2226 if (size == 4) {
2227 val = val & ((1ULL << 32) - 1);
2228 }
2229 break;
2230
2231 case DMAR_IQA_REG_HI:
2232 assert(size == 4);
2233 val = s->iq >> 32;
2234 break;
2235
2236 default:
2237 if (size == 4) {
2238 val = vtd_get_long(s, addr);
2239 } else {
2240 val = vtd_get_quad(s, addr);
2241 }
2242 }
2243
2244 return val;
2245}
2246
2247static void vtd_mem_write(void *opaque, hwaddr addr,
2248 uint64_t val, unsigned size)
2249{
2250 IntelIOMMUState *s = opaque;
2251
2252 trace_vtd_reg_write(addr, size, val);
2253
2254 if (addr + size > DMAR_REG_SIZE) {
2255 trace_vtd_err("Write MMIO over range.");
2256 return;
2257 }
2258
2259 switch (addr) {
2260
2261 case DMAR_GCMD_REG:
2262 vtd_set_long(s, addr, val);
2263 vtd_handle_gcmd_write(s);
2264 break;
2265
2266
2267 case DMAR_CCMD_REG:
2268 if (size == 4) {
2269 vtd_set_long(s, addr, val);
2270 } else {
2271 vtd_set_quad(s, addr, val);
2272 vtd_handle_ccmd_write(s);
2273 }
2274 break;
2275
2276 case DMAR_CCMD_REG_HI:
2277 assert(size == 4);
2278 vtd_set_long(s, addr, val);
2279 vtd_handle_ccmd_write(s);
2280 break;
2281
2282
2283 case DMAR_IOTLB_REG:
2284 if (size == 4) {
2285 vtd_set_long(s, addr, val);
2286 } else {
2287 vtd_set_quad(s, addr, val);
2288 vtd_handle_iotlb_write(s);
2289 }
2290 break;
2291
2292 case DMAR_IOTLB_REG_HI:
2293 assert(size == 4);
2294 vtd_set_long(s, addr, val);
2295 vtd_handle_iotlb_write(s);
2296 break;
2297
2298
2299 case DMAR_IVA_REG:
2300 if (size == 4) {
2301 vtd_set_long(s, addr, val);
2302 } else {
2303 vtd_set_quad(s, addr, val);
2304 }
2305 break;
2306
2307 case DMAR_IVA_REG_HI:
2308 assert(size == 4);
2309 vtd_set_long(s, addr, val);
2310 break;
2311
2312
2313 case DMAR_FSTS_REG:
2314 assert(size == 4);
2315 vtd_set_long(s, addr, val);
2316 vtd_handle_fsts_write(s);
2317 break;
2318
2319
2320 case DMAR_FECTL_REG:
2321 assert(size == 4);
2322 vtd_set_long(s, addr, val);
2323 vtd_handle_fectl_write(s);
2324 break;
2325
2326
2327 case DMAR_FEDATA_REG:
2328 assert(size == 4);
2329 vtd_set_long(s, addr, val);
2330 break;
2331
2332
2333 case DMAR_FEADDR_REG:
2334 if (size == 4) {
2335 vtd_set_long(s, addr, val);
2336 } else {
2337
2338
2339
2340
2341 vtd_set_quad(s, addr, val);
2342 }
2343 break;
2344
2345
2346 case DMAR_FEUADDR_REG:
2347 assert(size == 4);
2348 vtd_set_long(s, addr, val);
2349 break;
2350
2351
2352 case DMAR_PMEN_REG:
2353 assert(size == 4);
2354 vtd_set_long(s, addr, val);
2355 break;
2356
2357
2358 case DMAR_RTADDR_REG:
2359 if (size == 4) {
2360 vtd_set_long(s, addr, val);
2361 } else {
2362 vtd_set_quad(s, addr, val);
2363 }
2364 break;
2365
2366 case DMAR_RTADDR_REG_HI:
2367 assert(size == 4);
2368 vtd_set_long(s, addr, val);
2369 break;
2370
2371
2372 case DMAR_IQT_REG:
2373 if (size == 4) {
2374 vtd_set_long(s, addr, val);
2375 } else {
2376 vtd_set_quad(s, addr, val);
2377 }
2378 vtd_handle_iqt_write(s);
2379 break;
2380
2381 case DMAR_IQT_REG_HI:
2382 assert(size == 4);
2383 vtd_set_long(s, addr, val);
2384
2385 break;
2386
2387
2388 case DMAR_IQA_REG:
2389 if (size == 4) {
2390 vtd_set_long(s, addr, val);
2391 } else {
2392 vtd_set_quad(s, addr, val);
2393 }
2394 break;
2395
2396 case DMAR_IQA_REG_HI:
2397 assert(size == 4);
2398 vtd_set_long(s, addr, val);
2399 break;
2400
2401
2402 case DMAR_ICS_REG:
2403 assert(size == 4);
2404 vtd_set_long(s, addr, val);
2405 vtd_handle_ics_write(s);
2406 break;
2407
2408
2409 case DMAR_IECTL_REG:
2410 assert(size == 4);
2411 vtd_set_long(s, addr, val);
2412 vtd_handle_iectl_write(s);
2413 break;
2414
2415
2416 case DMAR_IEDATA_REG:
2417 assert(size == 4);
2418 vtd_set_long(s, addr, val);
2419 break;
2420
2421
2422 case DMAR_IEADDR_REG:
2423 assert(size == 4);
2424 vtd_set_long(s, addr, val);
2425 break;
2426
2427
2428 case DMAR_IEUADDR_REG:
2429 assert(size == 4);
2430 vtd_set_long(s, addr, val);
2431 break;
2432
2433
2434 case DMAR_FRCD_REG_0_0:
2435 if (size == 4) {
2436 vtd_set_long(s, addr, val);
2437 } else {
2438 vtd_set_quad(s, addr, val);
2439 }
2440 break;
2441
2442 case DMAR_FRCD_REG_0_1:
2443 assert(size == 4);
2444 vtd_set_long(s, addr, val);
2445 break;
2446
2447 case DMAR_FRCD_REG_0_2:
2448 if (size == 4) {
2449 vtd_set_long(s, addr, val);
2450 } else {
2451 vtd_set_quad(s, addr, val);
2452
2453 vtd_update_fsts_ppf(s);
2454 }
2455 break;
2456
2457 case DMAR_FRCD_REG_0_3:
2458 assert(size == 4);
2459 vtd_set_long(s, addr, val);
2460
2461 vtd_update_fsts_ppf(s);
2462 break;
2463
2464 case DMAR_IRTA_REG:
2465 if (size == 4) {
2466 vtd_set_long(s, addr, val);
2467 } else {
2468 vtd_set_quad(s, addr, val);
2469 }
2470 break;
2471
2472 case DMAR_IRTA_REG_HI:
2473 assert(size == 4);
2474 vtd_set_long(s, addr, val);
2475 break;
2476
2477 default:
2478 if (size == 4) {
2479 vtd_set_long(s, addr, val);
2480 } else {
2481 vtd_set_quad(s, addr, val);
2482 }
2483 }
2484}
2485
2486static IOMMUTLBEntry vtd_iommu_translate(IOMMUMemoryRegion *iommu, hwaddr addr,
2487 IOMMUAccessFlags flag, int iommu_idx)
2488{
2489 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
2490 IntelIOMMUState *s = vtd_as->iommu_state;
2491 IOMMUTLBEntry iotlb = {
2492
2493 .target_as = &address_space_memory,
2494 };
2495 bool success;
2496
2497 if (likely(s->dmar_enabled)) {
2498 success = vtd_do_iommu_translate(vtd_as, vtd_as->bus, vtd_as->devfn,
2499 addr, flag & IOMMU_WO, &iotlb);
2500 } else {
2501
2502 iotlb.iova = addr & VTD_PAGE_MASK_4K;
2503 iotlb.translated_addr = addr & VTD_PAGE_MASK_4K;
2504 iotlb.addr_mask = ~VTD_PAGE_MASK_4K;
2505 iotlb.perm = IOMMU_RW;
2506 success = true;
2507 }
2508
2509 if (likely(success)) {
2510 trace_vtd_dmar_translate(pci_bus_num(vtd_as->bus),
2511 VTD_PCI_SLOT(vtd_as->devfn),
2512 VTD_PCI_FUNC(vtd_as->devfn),
2513 iotlb.iova, iotlb.translated_addr,
2514 iotlb.addr_mask);
2515 } else {
2516 trace_vtd_err_dmar_translate(pci_bus_num(vtd_as->bus),
2517 VTD_PCI_SLOT(vtd_as->devfn),
2518 VTD_PCI_FUNC(vtd_as->devfn),
2519 iotlb.iova);
2520 }
2521
2522 return iotlb;
2523}
2524
2525static void vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
2526 IOMMUNotifierFlag old,
2527 IOMMUNotifierFlag new)
2528{
2529 VTDAddressSpace *vtd_as = container_of(iommu, VTDAddressSpace, iommu);
2530 IntelIOMMUState *s = vtd_as->iommu_state;
2531
2532 if (!s->caching_mode && new & IOMMU_NOTIFIER_MAP) {
2533 error_report("We need to set caching-mode=1 for intel-iommu to enable "
2534 "device assignment with IOMMU protection.");
2535 exit(1);
2536 }
2537
2538
2539 vtd_as->notifier_flags = new;
2540
2541 if (old == IOMMU_NOTIFIER_NONE) {
2542 QLIST_INSERT_HEAD(&s->vtd_as_with_notifiers, vtd_as, next);
2543 } else if (new == IOMMU_NOTIFIER_NONE) {
2544 QLIST_REMOVE(vtd_as, next);
2545 }
2546}
2547
2548static int vtd_post_load(void *opaque, int version_id)
2549{
2550 IntelIOMMUState *iommu = opaque;
2551
2552
2553
2554
2555
2556
2557 vtd_switch_address_space_all(iommu);
2558
2559 return 0;
2560}
2561
2562static const VMStateDescription vtd_vmstate = {
2563 .name = "iommu-intel",
2564 .version_id = 1,
2565 .minimum_version_id = 1,
2566 .priority = MIG_PRI_IOMMU,
2567 .post_load = vtd_post_load,
2568 .fields = (VMStateField[]) {
2569 VMSTATE_UINT64(root, IntelIOMMUState),
2570 VMSTATE_UINT64(intr_root, IntelIOMMUState),
2571 VMSTATE_UINT64(iq, IntelIOMMUState),
2572 VMSTATE_UINT32(intr_size, IntelIOMMUState),
2573 VMSTATE_UINT16(iq_head, IntelIOMMUState),
2574 VMSTATE_UINT16(iq_tail, IntelIOMMUState),
2575 VMSTATE_UINT16(iq_size, IntelIOMMUState),
2576 VMSTATE_UINT16(next_frcd_reg, IntelIOMMUState),
2577 VMSTATE_UINT8_ARRAY(csr, IntelIOMMUState, DMAR_REG_SIZE),
2578 VMSTATE_UINT8(iq_last_desc_type, IntelIOMMUState),
2579 VMSTATE_BOOL(root_extended, IntelIOMMUState),
2580 VMSTATE_BOOL(dmar_enabled, IntelIOMMUState),
2581 VMSTATE_BOOL(qi_enabled, IntelIOMMUState),
2582 VMSTATE_BOOL(intr_enabled, IntelIOMMUState),
2583 VMSTATE_BOOL(intr_eime, IntelIOMMUState),
2584 VMSTATE_END_OF_LIST()
2585 }
2586};
2587
2588static const MemoryRegionOps vtd_mem_ops = {
2589 .read = vtd_mem_read,
2590 .write = vtd_mem_write,
2591 .endianness = DEVICE_LITTLE_ENDIAN,
2592 .impl = {
2593 .min_access_size = 4,
2594 .max_access_size = 8,
2595 },
2596 .valid = {
2597 .min_access_size = 4,
2598 .max_access_size = 8,
2599 },
2600};
2601
2602static Property vtd_properties[] = {
2603 DEFINE_PROP_UINT32("version", IntelIOMMUState, version, 0),
2604 DEFINE_PROP_ON_OFF_AUTO("eim", IntelIOMMUState, intr_eim,
2605 ON_OFF_AUTO_AUTO),
2606 DEFINE_PROP_BOOL("x-buggy-eim", IntelIOMMUState, buggy_eim, false),
2607 DEFINE_PROP_UINT8("x-aw-bits", IntelIOMMUState, aw_bits,
2608 VTD_HOST_ADDRESS_WIDTH),
2609 DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE),
2610 DEFINE_PROP_END_OF_LIST(),
2611};
2612
2613
2614static int vtd_irte_get(IntelIOMMUState *iommu, uint16_t index,
2615 VTD_IR_TableEntry *entry, uint16_t sid)
2616{
2617 static const uint16_t vtd_svt_mask[VTD_SQ_MAX] = \
2618 {0xffff, 0xfffb, 0xfff9, 0xfff8};
2619 dma_addr_t addr = 0x00;
2620 uint16_t mask, source_id;
2621 uint8_t bus, bus_max, bus_min;
2622
2623 addr = iommu->intr_root + index * sizeof(*entry);
2624 if (dma_memory_read(&address_space_memory, addr, entry,
2625 sizeof(*entry))) {
2626 trace_vtd_err("Memory read failed for IRTE.");
2627 return -VTD_FR_IR_ROOT_INVAL;
2628 }
2629
2630 trace_vtd_ir_irte_get(index, le64_to_cpu(entry->data[1]),
2631 le64_to_cpu(entry->data[0]));
2632
2633 if (!entry->irte.present) {
2634 trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
2635 le64_to_cpu(entry->data[0]));
2636 return -VTD_FR_IR_ENTRY_P;
2637 }
2638
2639 if (entry->irte.__reserved_0 || entry->irte.__reserved_1 ||
2640 entry->irte.__reserved_2) {
2641 trace_vtd_err_irte(index, le64_to_cpu(entry->data[1]),
2642 le64_to_cpu(entry->data[0]));
2643 return -VTD_FR_IR_IRTE_RSVD;
2644 }
2645
2646 if (sid != X86_IOMMU_SID_INVALID) {
2647
2648 source_id = le32_to_cpu(entry->irte.source_id);
2649 switch (entry->irte.sid_vtype) {
2650 case VTD_SVT_NONE:
2651 break;
2652
2653 case VTD_SVT_ALL:
2654 mask = vtd_svt_mask[entry->irte.sid_q];
2655 if ((source_id & mask) != (sid & mask)) {
2656 trace_vtd_err_irte_sid(index, sid, source_id);
2657 return -VTD_FR_IR_SID_ERR;
2658 }
2659 break;
2660
2661 case VTD_SVT_BUS:
2662 bus_max = source_id >> 8;
2663 bus_min = source_id & 0xff;
2664 bus = sid >> 8;
2665 if (bus > bus_max || bus < bus_min) {
2666 trace_vtd_err_irte_sid_bus(index, bus, bus_min, bus_max);
2667 return -VTD_FR_IR_SID_ERR;
2668 }
2669 break;
2670
2671 default:
2672 trace_vtd_err_irte_svt(index, entry->irte.sid_vtype);
2673
2674 return -VTD_FR_IR_SID_ERR;
2675 break;
2676 }
2677 }
2678
2679 return 0;
2680}
2681
2682
2683static int vtd_remap_irq_get(IntelIOMMUState *iommu, uint16_t index,
2684 VTDIrq *irq, uint16_t sid)
2685{
2686 VTD_IR_TableEntry irte = {};
2687 int ret = 0;
2688
2689 ret = vtd_irte_get(iommu, index, &irte, sid);
2690 if (ret) {
2691 return ret;
2692 }
2693
2694 irq->trigger_mode = irte.irte.trigger_mode;
2695 irq->vector = irte.irte.vector;
2696 irq->delivery_mode = irte.irte.delivery_mode;
2697 irq->dest = le32_to_cpu(irte.irte.dest_id);
2698 if (!iommu->intr_eime) {
2699#define VTD_IR_APIC_DEST_MASK (0xff00ULL)
2700#define VTD_IR_APIC_DEST_SHIFT (8)
2701 irq->dest = (irq->dest & VTD_IR_APIC_DEST_MASK) >>
2702 VTD_IR_APIC_DEST_SHIFT;
2703 }
2704 irq->dest_mode = irte.irte.dest_mode;
2705 irq->redir_hint = irte.irte.redir_hint;
2706
2707 trace_vtd_ir_remap(index, irq->trigger_mode, irq->vector,
2708 irq->delivery_mode, irq->dest, irq->dest_mode);
2709
2710 return 0;
2711}
2712
2713
2714static void vtd_generate_msi_message(VTDIrq *irq, MSIMessage *msg_out)
2715{
2716 VTD_MSIMessage msg = {};
2717
2718
2719 msg.dest_mode = irq->dest_mode;
2720 msg.redir_hint = irq->redir_hint;
2721 msg.dest = irq->dest;
2722 msg.__addr_hi = irq->dest & 0xffffff00;
2723 msg.__addr_head = cpu_to_le32(0xfee);
2724
2725 msg.__not_used = irq->msi_addr_last_bits;
2726
2727
2728 msg.vector = irq->vector;
2729 msg.delivery_mode = irq->delivery_mode;
2730 msg.level = 1;
2731 msg.trigger_mode = irq->trigger_mode;
2732
2733 msg_out->address = msg.msi_addr;
2734 msg_out->data = msg.msi_data;
2735}
2736
2737
2738static int vtd_interrupt_remap_msi(IntelIOMMUState *iommu,
2739 MSIMessage *origin,
2740 MSIMessage *translated,
2741 uint16_t sid)
2742{
2743 int ret = 0;
2744 VTD_IR_MSIAddress addr;
2745 uint16_t index;
2746 VTDIrq irq = {};
2747
2748 assert(origin && translated);
2749
2750 trace_vtd_ir_remap_msi_req(origin->address, origin->data);
2751
2752 if (!iommu || !iommu->intr_enabled) {
2753 memcpy(translated, origin, sizeof(*origin));
2754 goto out;
2755 }
2756
2757 if (origin->address & VTD_MSI_ADDR_HI_MASK) {
2758 trace_vtd_err("MSI address high 32 bits non-zero when "
2759 "Interrupt Remapping enabled.");
2760 return -VTD_FR_IR_REQ_RSVD;
2761 }
2762
2763 addr.data = origin->address & VTD_MSI_ADDR_LO_MASK;
2764 if (addr.addr.__head != 0xfee) {
2765 trace_vtd_err("MSI addr low 32 bit invalid.");
2766 return -VTD_FR_IR_REQ_RSVD;
2767 }
2768
2769
2770 if (addr.addr.int_mode != VTD_IR_INT_FORMAT_REMAP) {
2771 memcpy(translated, origin, sizeof(*origin));
2772 goto out;
2773 }
2774
2775 index = addr.addr.index_h << 15 | le16_to_cpu(addr.addr.index_l);
2776
2777#define VTD_IR_MSI_DATA_SUBHANDLE (0x0000ffff)
2778#define VTD_IR_MSI_DATA_RESERVED (0xffff0000)
2779
2780 if (addr.addr.sub_valid) {
2781
2782 index += origin->data & VTD_IR_MSI_DATA_SUBHANDLE;
2783 }
2784
2785 ret = vtd_remap_irq_get(iommu, index, &irq, sid);
2786 if (ret) {
2787 return ret;
2788 }
2789
2790 if (addr.addr.sub_valid) {
2791 trace_vtd_ir_remap_type("MSI");
2792 if (origin->data & VTD_IR_MSI_DATA_RESERVED) {
2793 trace_vtd_err_ir_msi_invalid(sid, origin->address, origin->data);
2794 return -VTD_FR_IR_REQ_RSVD;
2795 }
2796 } else {
2797 uint8_t vector = origin->data & 0xff;
2798 uint8_t trigger_mode = (origin->data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
2799
2800 trace_vtd_ir_remap_type("IOAPIC");
2801
2802
2803 if (vector != irq.vector) {
2804 trace_vtd_warn_ir_vector(sid, index, vector, irq.vector);
2805 }
2806
2807
2808
2809 if (trigger_mode != irq.trigger_mode) {
2810 trace_vtd_warn_ir_trigger(sid, index, trigger_mode,
2811 irq.trigger_mode);
2812 }
2813 }
2814
2815
2816
2817
2818
2819 irq.msi_addr_last_bits = addr.addr.__not_care;
2820
2821
2822 vtd_generate_msi_message(&irq, translated);
2823
2824out:
2825 trace_vtd_ir_remap_msi(origin->address, origin->data,
2826 translated->address, translated->data);
2827 return 0;
2828}
2829
2830static int vtd_int_remap(X86IOMMUState *iommu, MSIMessage *src,
2831 MSIMessage *dst, uint16_t sid)
2832{
2833 return vtd_interrupt_remap_msi(INTEL_IOMMU_DEVICE(iommu),
2834 src, dst, sid);
2835}
2836
2837static MemTxResult vtd_mem_ir_read(void *opaque, hwaddr addr,
2838 uint64_t *data, unsigned size,
2839 MemTxAttrs attrs)
2840{
2841 return MEMTX_OK;
2842}
2843
2844static MemTxResult vtd_mem_ir_write(void *opaque, hwaddr addr,
2845 uint64_t value, unsigned size,
2846 MemTxAttrs attrs)
2847{
2848 int ret = 0;
2849 MSIMessage from = {}, to = {};
2850 uint16_t sid = X86_IOMMU_SID_INVALID;
2851
2852 from.address = (uint64_t) addr + VTD_INTERRUPT_ADDR_FIRST;
2853 from.data = (uint32_t) value;
2854
2855 if (!attrs.unspecified) {
2856
2857 sid = attrs.requester_id;
2858 }
2859
2860 ret = vtd_interrupt_remap_msi(opaque, &from, &to, sid);
2861 if (ret) {
2862
2863
2864 return MEMTX_ERROR;
2865 }
2866
2867 apic_get_class()->send_msi(&to);
2868
2869 return MEMTX_OK;
2870}
2871
2872static const MemoryRegionOps vtd_mem_ir_ops = {
2873 .read_with_attrs = vtd_mem_ir_read,
2874 .write_with_attrs = vtd_mem_ir_write,
2875 .endianness = DEVICE_LITTLE_ENDIAN,
2876 .impl = {
2877 .min_access_size = 4,
2878 .max_access_size = 4,
2879 },
2880 .valid = {
2881 .min_access_size = 4,
2882 .max_access_size = 4,
2883 },
2884};
2885
2886VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int devfn)
2887{
2888 uintptr_t key = (uintptr_t)bus;
2889 VTDBus *vtd_bus = g_hash_table_lookup(s->vtd_as_by_busptr, &key);
2890 VTDAddressSpace *vtd_dev_as;
2891 char name[128];
2892
2893 if (!vtd_bus) {
2894 uintptr_t *new_key = g_malloc(sizeof(*new_key));
2895 *new_key = (uintptr_t)bus;
2896
2897 vtd_bus = g_malloc0(sizeof(VTDBus) + sizeof(VTDAddressSpace *) * \
2898 PCI_DEVFN_MAX);
2899 vtd_bus->bus = bus;
2900 g_hash_table_insert(s->vtd_as_by_busptr, new_key, vtd_bus);
2901 }
2902
2903 vtd_dev_as = vtd_bus->dev_as[devfn];
2904
2905 if (!vtd_dev_as) {
2906 snprintf(name, sizeof(name), "intel_iommu_devfn_%d", devfn);
2907 vtd_bus->dev_as[devfn] = vtd_dev_as = g_malloc0(sizeof(VTDAddressSpace));
2908
2909 vtd_dev_as->bus = bus;
2910 vtd_dev_as->devfn = (uint8_t)devfn;
2911 vtd_dev_as->iommu_state = s;
2912 vtd_dev_as->context_cache_entry.context_cache_gen = 0;
2913 vtd_dev_as->iova_tree = iova_tree_new();
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932 memory_region_init_iommu(&vtd_dev_as->iommu, sizeof(vtd_dev_as->iommu),
2933 TYPE_INTEL_IOMMU_MEMORY_REGION, OBJECT(s),
2934 "intel_iommu_dmar",
2935 UINT64_MAX);
2936 memory_region_init_alias(&vtd_dev_as->sys_alias, OBJECT(s),
2937 "vtd_sys_alias", get_system_memory(),
2938 0, memory_region_size(get_system_memory()));
2939 memory_region_init_io(&vtd_dev_as->iommu_ir, OBJECT(s),
2940 &vtd_mem_ir_ops, s, "intel_iommu_ir",
2941 VTD_INTERRUPT_ADDR_SIZE);
2942 memory_region_init(&vtd_dev_as->root, OBJECT(s),
2943 "vtd_root", UINT64_MAX);
2944 memory_region_add_subregion_overlap(&vtd_dev_as->root,
2945 VTD_INTERRUPT_ADDR_FIRST,
2946 &vtd_dev_as->iommu_ir, 64);
2947 address_space_init(&vtd_dev_as->as, &vtd_dev_as->root, name);
2948 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2949 &vtd_dev_as->sys_alias, 1);
2950 memory_region_add_subregion_overlap(&vtd_dev_as->root, 0,
2951 MEMORY_REGION(&vtd_dev_as->iommu),
2952 1);
2953 vtd_switch_address_space(vtd_dev_as);
2954 }
2955 return vtd_dev_as;
2956}
2957
2958
2959static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n)
2960{
2961 IOMMUTLBEntry entry;
2962 hwaddr size;
2963 hwaddr start = n->start;
2964 hwaddr end = n->end;
2965 IntelIOMMUState *s = as->iommu_state;
2966 DMAMap map;
2967
2968
2969
2970
2971
2972
2973
2974 if (end > VTD_ADDRESS_SIZE(s->aw_bits)) {
2975
2976
2977
2978
2979 end = VTD_ADDRESS_SIZE(s->aw_bits);
2980 }
2981
2982 assert(start <= end);
2983 size = end - start;
2984
2985 if (ctpop64(size) != 1) {
2986
2987
2988
2989
2990 int n = 64 - clz64(size);
2991 if (n > s->aw_bits) {
2992
2993 n = s->aw_bits;
2994 }
2995 size = 1ULL << n;
2996 }
2997
2998 entry.target_as = &address_space_memory;
2999
3000 entry.iova = n->start & ~(size - 1);
3001
3002 entry.translated_addr = 0;
3003 entry.perm = IOMMU_NONE;
3004 entry.addr_mask = size - 1;
3005
3006 trace_vtd_as_unmap_whole(pci_bus_num(as->bus),
3007 VTD_PCI_SLOT(as->devfn),
3008 VTD_PCI_FUNC(as->devfn),
3009 entry.iova, size);
3010
3011 map.iova = entry.iova;
3012 map.size = entry.addr_mask;
3013 iova_tree_remove(as->iova_tree, &map);
3014
3015 memory_region_notify_one(n, &entry);
3016}
3017
3018static void vtd_address_space_unmap_all(IntelIOMMUState *s)
3019{
3020 VTDAddressSpace *vtd_as;
3021 IOMMUNotifier *n;
3022
3023 QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) {
3024 IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
3025 vtd_address_space_unmap(vtd_as, n);
3026 }
3027 }
3028}
3029
3030static void vtd_address_space_refresh_all(IntelIOMMUState *s)
3031{
3032 vtd_address_space_unmap_all(s);
3033 vtd_switch_address_space_all(s);
3034}
3035
3036static int vtd_replay_hook(IOMMUTLBEntry *entry, void *private)
3037{
3038 memory_region_notify_one((IOMMUNotifier *)private, entry);
3039 return 0;
3040}
3041
3042static void vtd_iommu_replay(IOMMUMemoryRegion *iommu_mr, IOMMUNotifier *n)
3043{
3044 VTDAddressSpace *vtd_as = container_of(iommu_mr, VTDAddressSpace, iommu);
3045 IntelIOMMUState *s = vtd_as->iommu_state;
3046 uint8_t bus_n = pci_bus_num(vtd_as->bus);
3047 VTDContextEntry ce;
3048
3049
3050
3051
3052
3053
3054 vtd_address_space_unmap(vtd_as, n);
3055
3056 if (vtd_dev_to_context_entry(s, bus_n, vtd_as->devfn, &ce) == 0) {
3057 trace_vtd_replay_ce_valid(bus_n, PCI_SLOT(vtd_as->devfn),
3058 PCI_FUNC(vtd_as->devfn),
3059 VTD_CONTEXT_ENTRY_DID(ce.hi),
3060 ce.hi, ce.lo);
3061 if (vtd_as_has_map_notifier(vtd_as)) {
3062
3063 vtd_page_walk_info info = {
3064 .hook_fn = vtd_replay_hook,
3065 .private = (void *)n,
3066 .notify_unmap = false,
3067 .aw = s->aw_bits,
3068 .as = vtd_as,
3069 .domain_id = VTD_CONTEXT_ENTRY_DID(ce.hi),
3070 };
3071
3072 vtd_page_walk(&ce, 0, ~0ULL, &info);
3073 }
3074 } else {
3075 trace_vtd_replay_ce_invalid(bus_n, PCI_SLOT(vtd_as->devfn),
3076 PCI_FUNC(vtd_as->devfn));
3077 }
3078
3079 return;
3080}
3081
3082
3083
3084
3085static void vtd_init(IntelIOMMUState *s)
3086{
3087 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
3088
3089 memset(s->csr, 0, DMAR_REG_SIZE);
3090 memset(s->wmask, 0, DMAR_REG_SIZE);
3091 memset(s->w1cmask, 0, DMAR_REG_SIZE);
3092 memset(s->womask, 0, DMAR_REG_SIZE);
3093
3094 s->root = 0;
3095 s->root_extended = false;
3096 s->dmar_enabled = false;
3097 s->iq_head = 0;
3098 s->iq_tail = 0;
3099 s->iq = 0;
3100 s->iq_size = 0;
3101 s->qi_enabled = false;
3102 s->iq_last_desc_type = VTD_INV_DESC_NONE;
3103 s->next_frcd_reg = 0;
3104 s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND |
3105 VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS |
3106 VTD_CAP_SAGAW_39bit | VTD_CAP_MGAW(s->aw_bits);
3107 if (s->aw_bits == VTD_HOST_AW_48BIT) {
3108 s->cap |= VTD_CAP_SAGAW_48bit;
3109 }
3110 s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
3111
3112
3113
3114
3115 vtd_paging_entry_rsvd_field[0] = ~0ULL;
3116 vtd_paging_entry_rsvd_field[1] = VTD_SPTE_PAGE_L1_RSVD_MASK(s->aw_bits);
3117 vtd_paging_entry_rsvd_field[2] = VTD_SPTE_PAGE_L2_RSVD_MASK(s->aw_bits);
3118 vtd_paging_entry_rsvd_field[3] = VTD_SPTE_PAGE_L3_RSVD_MASK(s->aw_bits);
3119 vtd_paging_entry_rsvd_field[4] = VTD_SPTE_PAGE_L4_RSVD_MASK(s->aw_bits);
3120 vtd_paging_entry_rsvd_field[5] = VTD_SPTE_LPAGE_L1_RSVD_MASK(s->aw_bits);
3121 vtd_paging_entry_rsvd_field[6] = VTD_SPTE_LPAGE_L2_RSVD_MASK(s->aw_bits);
3122 vtd_paging_entry_rsvd_field[7] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits);
3123 vtd_paging_entry_rsvd_field[8] = VTD_SPTE_LPAGE_L4_RSVD_MASK(s->aw_bits);
3124
3125 if (x86_iommu->intr_supported) {
3126 s->ecap |= VTD_ECAP_IR | VTD_ECAP_MHMV;
3127 if (s->intr_eim == ON_OFF_AUTO_ON) {
3128 s->ecap |= VTD_ECAP_EIM;
3129 }
3130 assert(s->intr_eim != ON_OFF_AUTO_AUTO);
3131 }
3132
3133 if (x86_iommu->dt_supported) {
3134 s->ecap |= VTD_ECAP_DT;
3135 }
3136
3137 if (x86_iommu->pt_supported) {
3138 s->ecap |= VTD_ECAP_PT;
3139 }
3140
3141 if (s->caching_mode) {
3142 s->cap |= VTD_CAP_CM;
3143 }
3144
3145 vtd_reset_caches(s);
3146
3147
3148 vtd_define_long(s, DMAR_VER_REG, 0x10UL, 0, 0);
3149 vtd_define_quad(s, DMAR_CAP_REG, s->cap, 0, 0);
3150 vtd_define_quad(s, DMAR_ECAP_REG, s->ecap, 0, 0);
3151 vtd_define_long(s, DMAR_GCMD_REG, 0, 0xff800000UL, 0);
3152 vtd_define_long_wo(s, DMAR_GCMD_REG, 0xff800000UL);
3153 vtd_define_long(s, DMAR_GSTS_REG, 0, 0, 0);
3154 vtd_define_quad(s, DMAR_RTADDR_REG, 0, 0xfffffffffffff000ULL, 0);
3155 vtd_define_quad(s, DMAR_CCMD_REG, 0, 0xe0000003ffffffffULL, 0);
3156 vtd_define_quad_wo(s, DMAR_CCMD_REG, 0x3ffff0000ULL);
3157
3158
3159 vtd_define_long(s, DMAR_FSTS_REG, 0, 0, 0x11UL);
3160 vtd_define_long(s, DMAR_FECTL_REG, 0x80000000UL, 0x80000000UL, 0);
3161 vtd_define_long(s, DMAR_FEDATA_REG, 0, 0x0000ffffUL, 0);
3162 vtd_define_long(s, DMAR_FEADDR_REG, 0, 0xfffffffcUL, 0);
3163
3164
3165
3166
3167 vtd_define_long(s, DMAR_FEUADDR_REG, 0, 0, 0);
3168
3169
3170
3171
3172
3173 vtd_define_long(s, DMAR_PMEN_REG, 0, 0, 0);
3174
3175 vtd_define_quad(s, DMAR_IQH_REG, 0, 0, 0);
3176 vtd_define_quad(s, DMAR_IQT_REG, 0, 0x7fff0ULL, 0);
3177 vtd_define_quad(s, DMAR_IQA_REG, 0, 0xfffffffffffff007ULL, 0);
3178 vtd_define_long(s, DMAR_ICS_REG, 0, 0, 0x1UL);
3179 vtd_define_long(s, DMAR_IECTL_REG, 0x80000000UL, 0x80000000UL, 0);
3180 vtd_define_long(s, DMAR_IEDATA_REG, 0, 0xffffffffUL, 0);
3181 vtd_define_long(s, DMAR_IEADDR_REG, 0, 0xfffffffcUL, 0);
3182
3183 vtd_define_long(s, DMAR_IEUADDR_REG, 0, 0, 0);
3184
3185
3186 vtd_define_quad(s, DMAR_IOTLB_REG, 0, 0Xb003ffff00000000ULL, 0);
3187 vtd_define_quad(s, DMAR_IVA_REG, 0, 0xfffffffffffff07fULL, 0);
3188 vtd_define_quad_wo(s, DMAR_IVA_REG, 0xfffffffffffff07fULL);
3189
3190
3191 vtd_define_quad(s, DMAR_FRCD_REG_0_0, 0, 0, 0);
3192 vtd_define_quad(s, DMAR_FRCD_REG_0_2, 0, 0, 0x8000000000000000ULL);
3193
3194
3195
3196
3197 vtd_define_quad(s, DMAR_IRTA_REG, 0, 0xfffffffffffff80fULL, 0);
3198}
3199
3200
3201
3202
3203static void vtd_reset(DeviceState *dev)
3204{
3205 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
3206
3207 vtd_init(s);
3208 vtd_address_space_refresh_all(s);
3209}
3210
3211static AddressSpace *vtd_host_dma_iommu(PCIBus *bus, void *opaque, int devfn)
3212{
3213 IntelIOMMUState *s = opaque;
3214 VTDAddressSpace *vtd_as;
3215
3216 assert(0 <= devfn && devfn < PCI_DEVFN_MAX);
3217
3218 vtd_as = vtd_find_add_as(s, bus, devfn);
3219 return &vtd_as->as;
3220}
3221
3222static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
3223{
3224 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s);
3225
3226
3227 if (x86_iommu->intr_supported && kvm_irqchip_in_kernel() &&
3228 !kvm_irqchip_is_split()) {
3229 error_setg(errp, "Intel Interrupt Remapping cannot work with "
3230 "kernel-irqchip=on, please use 'split|off'.");
3231 return false;
3232 }
3233 if (s->intr_eim == ON_OFF_AUTO_ON && !x86_iommu->intr_supported) {
3234 error_setg(errp, "eim=on cannot be selected without intremap=on");
3235 return false;
3236 }
3237
3238 if (s->intr_eim == ON_OFF_AUTO_AUTO) {
3239 s->intr_eim = (kvm_irqchip_in_kernel() || s->buggy_eim)
3240 && x86_iommu->intr_supported ?
3241 ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
3242 }
3243 if (s->intr_eim == ON_OFF_AUTO_ON && !s->buggy_eim) {
3244 if (!kvm_irqchip_in_kernel()) {
3245 error_setg(errp, "eim=on requires accel=kvm,kernel-irqchip=split");
3246 return false;
3247 }
3248 if (!kvm_enable_x2apic()) {
3249 error_setg(errp, "eim=on requires support on the KVM side"
3250 "(X2APIC_API, first shipped in v4.7)");
3251 return false;
3252 }
3253 }
3254
3255
3256 if ((s->aw_bits != VTD_HOST_AW_39BIT) &&
3257 (s->aw_bits != VTD_HOST_AW_48BIT)) {
3258 error_setg(errp, "Supported values for x-aw-bits are: %d, %d",
3259 VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT);
3260 return false;
3261 }
3262
3263 return true;
3264}
3265
3266static void vtd_realize(DeviceState *dev, Error **errp)
3267{
3268 MachineState *ms = MACHINE(qdev_get_machine());
3269 PCMachineState *pcms = PC_MACHINE(ms);
3270 PCIBus *bus = pcms->bus;
3271 IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev);
3272 X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(dev);
3273
3274 x86_iommu->type = TYPE_INTEL;
3275
3276 if (!vtd_decide_config(s, errp)) {
3277 return;
3278 }
3279
3280 QLIST_INIT(&s->vtd_as_with_notifiers);
3281 qemu_mutex_init(&s->iommu_lock);
3282 memset(s->vtd_as_by_bus_num, 0, sizeof(s->vtd_as_by_bus_num));
3283 memory_region_init_io(&s->csrmem, OBJECT(s), &vtd_mem_ops, s,
3284 "intel_iommu", DMAR_REG_SIZE);
3285 sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->csrmem);
3286
3287 s->iotlb = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
3288 g_free, g_free);
3289 s->vtd_as_by_busptr = g_hash_table_new_full(vtd_uint64_hash, vtd_uint64_equal,
3290 g_free, g_free);
3291 vtd_init(s);
3292 sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
3293 pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
3294
3295 pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
3296}
3297
3298static void vtd_class_init(ObjectClass *klass, void *data)
3299{
3300 DeviceClass *dc = DEVICE_CLASS(klass);
3301 X86IOMMUClass *x86_class = X86_IOMMU_CLASS(klass);
3302
3303 dc->reset = vtd_reset;
3304 dc->vmsd = &vtd_vmstate;
3305 dc->props = vtd_properties;
3306 dc->hotpluggable = false;
3307 x86_class->realize = vtd_realize;
3308 x86_class->int_remap = vtd_int_remap;
3309
3310 dc->user_creatable = true;
3311}
3312
3313static const TypeInfo vtd_info = {
3314 .name = TYPE_INTEL_IOMMU_DEVICE,
3315 .parent = TYPE_X86_IOMMU_DEVICE,
3316 .instance_size = sizeof(IntelIOMMUState),
3317 .class_init = vtd_class_init,
3318};
3319
3320static void vtd_iommu_memory_region_class_init(ObjectClass *klass,
3321 void *data)
3322{
3323 IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass);
3324
3325 imrc->translate = vtd_iommu_translate;
3326 imrc->notify_flag_changed = vtd_iommu_notify_flag_changed;
3327 imrc->replay = vtd_iommu_replay;
3328}
3329
3330static const TypeInfo vtd_iommu_memory_region_info = {
3331 .parent = TYPE_IOMMU_MEMORY_REGION,
3332 .name = TYPE_INTEL_IOMMU_MEMORY_REGION,
3333 .class_init = vtd_iommu_memory_region_class_init,
3334};
3335
3336static void vtd_register_types(void)
3337{
3338 type_register_static(&vtd_info);
3339 type_register_static(&vtd_iommu_memory_region_info);
3340}
3341
3342type_init(vtd_register_types)
3343