1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20#include "qemu/osdep.h"
21#include "cpu.h"
22#include "tcg/helper-tcg.h"
23
24int get_pg_mode(CPUX86State *env)
25{
26 int pg_mode = 0;
27 if (env->cr[0] & CR0_WP_MASK) {
28 pg_mode |= PG_MODE_WP;
29 }
30 if (env->cr[4] & CR4_PAE_MASK) {
31 pg_mode |= PG_MODE_PAE;
32 }
33 if (env->cr[4] & CR4_PSE_MASK) {
34 pg_mode |= PG_MODE_PSE;
35 }
36 if (env->cr[4] & CR4_PKE_MASK) {
37 pg_mode |= PG_MODE_PKE;
38 }
39 if (env->cr[4] & CR4_PKS_MASK) {
40 pg_mode |= PG_MODE_PKS;
41 }
42 if (env->cr[4] & CR4_SMEP_MASK) {
43 pg_mode |= PG_MODE_SMEP;
44 }
45 if (env->cr[4] & CR4_LA57_MASK) {
46 pg_mode |= PG_MODE_LA57;
47 }
48 if (env->hflags & HF_LMA_MASK) {
49 pg_mode |= PG_MODE_LMA;
50 }
51 if (env->efer & MSR_EFER_NXE) {
52 pg_mode |= PG_MODE_NXE;
53 }
54 return pg_mode;
55}
56
57#define PG_ERROR_OK (-1)
58
59typedef hwaddr (*MMUTranslateFunc)(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
60 int *prot);
61
62#define GET_HPHYS(cs, gpa, access_type, prot) \
63 (get_hphys_func ? get_hphys_func(cs, gpa, access_type, prot) : gpa)
64
65static int mmu_translate(CPUState *cs, hwaddr addr, MMUTranslateFunc get_hphys_func,
66 uint64_t cr3, int is_write1, int mmu_idx, int pg_mode,
67 hwaddr *xlat, int *page_size, int *prot)
68{
69 X86CPU *cpu = X86_CPU(cs);
70 CPUX86State *env = &cpu->env;
71 uint64_t ptep, pte;
72 int32_t a20_mask;
73 target_ulong pde_addr, pte_addr;
74 int error_code = 0;
75 int is_dirty, is_write, is_user;
76 uint64_t rsvd_mask = PG_ADDRESS_MASK & ~MAKE_64BIT_MASK(0, cpu->phys_bits);
77 uint32_t page_offset;
78 uint32_t pkr;
79
80 is_user = (mmu_idx == MMU_USER_IDX);
81 is_write = is_write1 & 1;
82 a20_mask = x86_get_a20_mask(env);
83
84 if (!(pg_mode & PG_MODE_NXE)) {
85 rsvd_mask |= PG_NX_MASK;
86 }
87
88 if (pg_mode & PG_MODE_PAE) {
89 uint64_t pde, pdpe;
90 target_ulong pdpe_addr;
91
92#ifdef TARGET_X86_64
93 if (pg_mode & PG_MODE_LMA) {
94 bool la57 = pg_mode & PG_MODE_LA57;
95 uint64_t pml5e_addr, pml5e;
96 uint64_t pml4e_addr, pml4e;
97
98 if (la57) {
99 pml5e_addr = ((cr3 & ~0xfff) +
100 (((addr >> 48) & 0x1ff) << 3)) & a20_mask;
101 pml5e_addr = GET_HPHYS(cs, pml5e_addr, MMU_DATA_STORE, NULL);
102 pml5e = x86_ldq_phys(cs, pml5e_addr);
103 if (!(pml5e & PG_PRESENT_MASK)) {
104 goto do_fault;
105 }
106 if (pml5e & (rsvd_mask | PG_PSE_MASK)) {
107 goto do_fault_rsvd;
108 }
109 if (!(pml5e & PG_ACCESSED_MASK)) {
110 pml5e |= PG_ACCESSED_MASK;
111 x86_stl_phys_notdirty(cs, pml5e_addr, pml5e);
112 }
113 ptep = pml5e ^ PG_NX_MASK;
114 } else {
115 pml5e = cr3;
116 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
117 }
118
119 pml4e_addr = ((pml5e & PG_ADDRESS_MASK) +
120 (((addr >> 39) & 0x1ff) << 3)) & a20_mask;
121 pml4e_addr = GET_HPHYS(cs, pml4e_addr, MMU_DATA_STORE, NULL);
122 pml4e = x86_ldq_phys(cs, pml4e_addr);
123 if (!(pml4e & PG_PRESENT_MASK)) {
124 goto do_fault;
125 }
126 if (pml4e & (rsvd_mask | PG_PSE_MASK)) {
127 goto do_fault_rsvd;
128 }
129 if (!(pml4e & PG_ACCESSED_MASK)) {
130 pml4e |= PG_ACCESSED_MASK;
131 x86_stl_phys_notdirty(cs, pml4e_addr, pml4e);
132 }
133 ptep &= pml4e ^ PG_NX_MASK;
134 pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) &
135 a20_mask;
136 pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
137 pdpe = x86_ldq_phys(cs, pdpe_addr);
138 if (!(pdpe & PG_PRESENT_MASK)) {
139 goto do_fault;
140 }
141 if (pdpe & rsvd_mask) {
142 goto do_fault_rsvd;
143 }
144 ptep &= pdpe ^ PG_NX_MASK;
145 if (!(pdpe & PG_ACCESSED_MASK)) {
146 pdpe |= PG_ACCESSED_MASK;
147 x86_stl_phys_notdirty(cs, pdpe_addr, pdpe);
148 }
149 if (pdpe & PG_PSE_MASK) {
150
151 *page_size = 1024 * 1024 * 1024;
152 pte_addr = pdpe_addr;
153 pte = pdpe;
154 goto do_check_protect;
155 }
156 } else
157#endif
158 {
159
160 pdpe_addr = ((cr3 & ~0x1f) + ((addr >> 27) & 0x18)) &
161 a20_mask;
162 pdpe_addr = GET_HPHYS(cs, pdpe_addr, MMU_DATA_STORE, NULL);
163 pdpe = x86_ldq_phys(cs, pdpe_addr);
164 if (!(pdpe & PG_PRESENT_MASK)) {
165 goto do_fault;
166 }
167 rsvd_mask |= PG_HI_USER_MASK;
168 if (pdpe & (rsvd_mask | PG_NX_MASK)) {
169 goto do_fault_rsvd;
170 }
171 ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK;
172 }
173
174 pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) &
175 a20_mask;
176 pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
177 pde = x86_ldq_phys(cs, pde_addr);
178 if (!(pde & PG_PRESENT_MASK)) {
179 goto do_fault;
180 }
181 if (pde & rsvd_mask) {
182 goto do_fault_rsvd;
183 }
184 ptep &= pde ^ PG_NX_MASK;
185 if (pde & PG_PSE_MASK) {
186
187 *page_size = 2048 * 1024;
188 pte_addr = pde_addr;
189 pte = pde;
190 goto do_check_protect;
191 }
192
193 if (!(pde & PG_ACCESSED_MASK)) {
194 pde |= PG_ACCESSED_MASK;
195 x86_stl_phys_notdirty(cs, pde_addr, pde);
196 }
197 pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) &
198 a20_mask;
199 pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
200 pte = x86_ldq_phys(cs, pte_addr);
201 if (!(pte & PG_PRESENT_MASK)) {
202 goto do_fault;
203 }
204 if (pte & rsvd_mask) {
205 goto do_fault_rsvd;
206 }
207
208 ptep &= pte ^ PG_NX_MASK;
209 *page_size = 4096;
210 } else {
211 uint32_t pde;
212
213
214 pde_addr = ((cr3 & ~0xfff) + ((addr >> 20) & 0xffc)) &
215 a20_mask;
216 pde_addr = GET_HPHYS(cs, pde_addr, MMU_DATA_STORE, NULL);
217 pde = x86_ldl_phys(cs, pde_addr);
218 if (!(pde & PG_PRESENT_MASK)) {
219 goto do_fault;
220 }
221 ptep = pde | PG_NX_MASK;
222
223
224 if ((pde & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) {
225 *page_size = 4096 * 1024;
226 pte_addr = pde_addr;
227
228
229
230
231 pte = pde | ((pde & 0x1fe000LL) << (32 - 13));
232 rsvd_mask = 0x200000;
233 goto do_check_protect_pse36;
234 }
235
236 if (!(pde & PG_ACCESSED_MASK)) {
237 pde |= PG_ACCESSED_MASK;
238 x86_stl_phys_notdirty(cs, pde_addr, pde);
239 }
240
241
242 pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) &
243 a20_mask;
244 pte_addr = GET_HPHYS(cs, pte_addr, MMU_DATA_STORE, NULL);
245 pte = x86_ldl_phys(cs, pte_addr);
246 if (!(pte & PG_PRESENT_MASK)) {
247 goto do_fault;
248 }
249
250 ptep &= pte | PG_NX_MASK;
251 *page_size = 4096;
252 rsvd_mask = 0;
253 }
254
255do_check_protect:
256 rsvd_mask |= (*page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK;
257do_check_protect_pse36:
258 if (pte & rsvd_mask) {
259 goto do_fault_rsvd;
260 }
261 ptep ^= PG_NX_MASK;
262
263
264 if (is_user && !(ptep & PG_USER_MASK)) {
265 goto do_fault_protect;
266 }
267
268 *prot = 0;
269 if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) {
270 *prot |= PAGE_READ;
271 if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) {
272 *prot |= PAGE_WRITE;
273 }
274 }
275 if (!(ptep & PG_NX_MASK) &&
276 (mmu_idx == MMU_USER_IDX ||
277 !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) {
278 *prot |= PAGE_EXEC;
279 }
280
281 if (!(pg_mode & PG_MODE_LMA)) {
282 pkr = 0;
283 } else if (ptep & PG_USER_MASK) {
284 pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0;
285 } else {
286 pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0;
287 }
288 if (pkr) {
289 uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT;
290 uint32_t pkr_ad = (pkr >> pk * 2) & 1;
291 uint32_t pkr_wd = (pkr >> pk * 2) & 2;
292 uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
293
294 if (pkr_ad) {
295 pkr_prot &= ~(PAGE_READ | PAGE_WRITE);
296 } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) {
297 pkr_prot &= ~PAGE_WRITE;
298 }
299
300 *prot &= pkr_prot;
301 if ((pkr_prot & (1 << is_write1)) == 0) {
302 assert(is_write1 != 2);
303 error_code |= PG_ERROR_PK_MASK;
304 goto do_fault_protect;
305 }
306 }
307
308 if ((*prot & (1 << is_write1)) == 0) {
309 goto do_fault_protect;
310 }
311
312
313 is_dirty = is_write && !(pte & PG_DIRTY_MASK);
314 if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
315 pte |= PG_ACCESSED_MASK;
316 if (is_dirty) {
317 pte |= PG_DIRTY_MASK;
318 }
319 x86_stl_phys_notdirty(cs, pte_addr, pte);
320 }
321
322 if (!(pte & PG_DIRTY_MASK)) {
323
324
325 assert(!is_write);
326 *prot &= ~PAGE_WRITE;
327 }
328
329 pte = pte & a20_mask;
330
331
332 pte &= PG_ADDRESS_MASK & ~(*page_size - 1);
333 page_offset = addr & (*page_size - 1);
334 *xlat = GET_HPHYS(cs, pte + page_offset, is_write1, prot);
335 return PG_ERROR_OK;
336
337 do_fault_rsvd:
338 error_code |= PG_ERROR_RSVD_MASK;
339 do_fault_protect:
340 error_code |= PG_ERROR_P_MASK;
341 do_fault:
342 error_code |= (is_write << PG_ERROR_W_BIT);
343 if (is_user)
344 error_code |= PG_ERROR_U_MASK;
345 if (is_write1 == 2 &&
346 (((pg_mode & PG_MODE_NXE) && (pg_mode & PG_MODE_PAE)) ||
347 (pg_mode & PG_MODE_SMEP)))
348 error_code |= PG_ERROR_I_D_MASK;
349 return error_code;
350}
351
352hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type,
353 int *prot)
354{
355 CPUX86State *env = &X86_CPU(cs)->env;
356 uint64_t exit_info_1;
357 int page_size;
358 int next_prot;
359 hwaddr hphys;
360
361 if (likely(!(env->hflags2 & HF2_NPT_MASK))) {
362 return gphys;
363 }
364
365 exit_info_1 = mmu_translate(cs, gphys, NULL, env->nested_cr3,
366 access_type, MMU_USER_IDX, env->nested_pg_mode,
367 &hphys, &page_size, &next_prot);
368 if (exit_info_1 == PG_ERROR_OK) {
369 if (prot) {
370 *prot &= next_prot;
371 }
372 return hphys;
373 }
374
375 x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
376 gphys);
377 if (prot) {
378 exit_info_1 |= SVM_NPTEXIT_GPA;
379 } else {
380 exit_info_1 |= SVM_NPTEXIT_GPT;
381 }
382 cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr);
383}
384
385
386
387
388
389
390static int handle_mmu_fault(CPUState *cs, vaddr addr, int size,
391 int is_write1, int mmu_idx)
392{
393 X86CPU *cpu = X86_CPU(cs);
394 CPUX86State *env = &cpu->env;
395 int error_code = PG_ERROR_OK;
396 int pg_mode, prot, page_size;
397 hwaddr paddr;
398 hwaddr vaddr;
399
400#if defined(DEBUG_MMU)
401 printf("MMU fault: addr=%" VADDR_PRIx " w=%d mmu=%d eip=" TARGET_FMT_lx "\n",
402 addr, is_write1, mmu_idx, env->eip);
403#endif
404
405 if (!(env->cr[0] & CR0_PG_MASK)) {
406 paddr = addr;
407#ifdef TARGET_X86_64
408 if (!(env->hflags & HF_LMA_MASK)) {
409
410 paddr = (uint32_t)paddr;
411 }
412#endif
413 prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
414 page_size = 4096;
415 } else {
416 pg_mode = get_pg_mode(env);
417 if (pg_mode & PG_MODE_LMA) {
418 int32_t sext;
419
420
421 sext = (int64_t)addr >> (pg_mode & PG_MODE_LA57 ? 56 : 47);
422 if (sext != 0 && sext != -1) {
423 env->error_code = 0;
424 cs->exception_index = EXCP0D_GPF;
425 return 1;
426 }
427 }
428
429 error_code = mmu_translate(cs, addr, get_hphys, env->cr[3], is_write1,
430 mmu_idx, pg_mode,
431 &paddr, &page_size, &prot);
432 }
433
434 if (error_code == PG_ERROR_OK) {
435
436
437 vaddr = addr & TARGET_PAGE_MASK;
438 paddr &= TARGET_PAGE_MASK;
439
440 assert(prot & (1 << is_write1));
441 tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env),
442 prot, mmu_idx, page_size);
443 return 0;
444 } else {
445 if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
446
447 x86_stq_phys(cs,
448 env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2),
449 addr);
450 } else {
451 env->cr[2] = addr;
452 }
453 env->error_code = error_code;
454 cs->exception_index = EXCP0E_PAGE;
455 return 1;
456 }
457}
458
459bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size,
460 MMUAccessType access_type, int mmu_idx,
461 bool probe, uintptr_t retaddr)
462{
463 X86CPU *cpu = X86_CPU(cs);
464 CPUX86State *env = &cpu->env;
465
466 env->retaddr = retaddr;
467 if (handle_mmu_fault(cs, addr, size, access_type, mmu_idx)) {
468
469 g_assert(!probe);
470 raise_exception_err_ra(env, cs->exception_index,
471 env->error_code, retaddr);
472 }
473 return true;
474}
475