1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/sched.h>
42#include <linux/highmem.h>
43#include <linux/bug.h>
44
45#include <asm/pgtable.h>
46#include <asm/tlbflush.h>
47#include <asm/mmu_context.h>
48#include <asm/paravirt.h>
49
50#include <asm/xen/hypercall.h>
51#include <asm/xen/hypervisor.h>
52
53#include <xen/page.h>
54#include <xen/interface/xen.h>
55
56#include "multicalls.h"
57#include "mmu.h"
58
59xmaddr_t arbitrary_virt_to_machine(unsigned long address)
60{
61 pte_t *pte = lookup_address(address);
62 unsigned offset = address & PAGE_MASK;
63
64 BUG_ON(pte == NULL);
65
66 return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset);
67}
68
69void make_lowmem_page_readonly(void *vaddr)
70{
71 pte_t *pte, ptev;
72 unsigned long address = (unsigned long)vaddr;
73
74 pte = lookup_address(address);
75 BUG_ON(pte == NULL);
76
77 ptev = pte_wrprotect(*pte);
78
79 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
80 BUG();
81}
82
83void make_lowmem_page_readwrite(void *vaddr)
84{
85 pte_t *pte, ptev;
86 unsigned long address = (unsigned long)vaddr;
87
88 pte = lookup_address(address);
89 BUG_ON(pte == NULL);
90
91 ptev = pte_mkwrite(*pte);
92
93 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
94 BUG();
95}
96
97
98void xen_set_pmd(pmd_t *ptr, pmd_t val)
99{
100 struct multicall_space mcs;
101 struct mmu_update *u;
102
103 preempt_disable();
104
105 mcs = xen_mc_entry(sizeof(*u));
106 u = mcs.args;
107 u->ptr = virt_to_machine(ptr).maddr;
108 u->val = pmd_val_ma(val);
109 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
110
111 xen_mc_issue(PARAVIRT_LAZY_MMU);
112
113 preempt_enable();
114}
115
116
117
118
119
120void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
121{
122 pgd_t *pgd;
123 pud_t *pud;
124 pmd_t *pmd;
125 pte_t *pte;
126
127 pgd = swapper_pg_dir + pgd_index(vaddr);
128 if (pgd_none(*pgd)) {
129 BUG();
130 return;
131 }
132 pud = pud_offset(pgd, vaddr);
133 if (pud_none(*pud)) {
134 BUG();
135 return;
136 }
137 pmd = pmd_offset(pud, vaddr);
138 if (pmd_none(*pmd)) {
139 BUG();
140 return;
141 }
142 pte = pte_offset_kernel(pmd, vaddr);
143
144 xen_set_pte(pte, mfn_pte(mfn, flags));
145
146
147
148
149
150 __flush_tlb_one(vaddr);
151}
152
153void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
154 pte_t *ptep, pte_t pteval)
155{
156 if (mm == current->mm || mm == &init_mm) {
157 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
158 struct multicall_space mcs;
159 mcs = xen_mc_entry(0);
160
161 MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
162 xen_mc_issue(PARAVIRT_LAZY_MMU);
163 return;
164 } else
165 if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
166 return;
167 }
168 xen_set_pte(ptep, pteval);
169}
170
171#ifdef CONFIG_X86_PAE
172void xen_set_pud(pud_t *ptr, pud_t val)
173{
174 struct multicall_space mcs;
175 struct mmu_update *u;
176
177 preempt_disable();
178
179 mcs = xen_mc_entry(sizeof(*u));
180 u = mcs.args;
181 u->ptr = virt_to_machine(ptr).maddr;
182 u->val = pud_val_ma(val);
183 MULTI_mmu_update(mcs.mc, u, 1, NULL, DOMID_SELF);
184
185 xen_mc_issue(PARAVIRT_LAZY_MMU);
186
187 preempt_enable();
188}
189
190void xen_set_pte(pte_t *ptep, pte_t pte)
191{
192 ptep->pte_high = pte.pte_high;
193 smp_wmb();
194 ptep->pte_low = pte.pte_low;
195}
196
197void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
198{
199 set_64bit((u64 *)ptep, pte_val_ma(pte));
200}
201
202void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
203{
204 ptep->pte_low = 0;
205 smp_wmb();
206 ptep->pte_high = 0;
207}
208
209void xen_pmd_clear(pmd_t *pmdp)
210{
211 xen_set_pmd(pmdp, __pmd(0));
212}
213
214unsigned long long xen_pte_val(pte_t pte)
215{
216 unsigned long long ret = 0;
217
218 if (pte.pte_low) {
219 ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
220 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
221 }
222
223 return ret;
224}
225
226unsigned long long xen_pmd_val(pmd_t pmd)
227{
228 unsigned long long ret = pmd.pmd;
229 if (ret)
230 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
231 return ret;
232}
233
234unsigned long long xen_pgd_val(pgd_t pgd)
235{
236 unsigned long long ret = pgd.pgd;
237 if (ret)
238 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
239 return ret;
240}
241
242pte_t xen_make_pte(unsigned long long pte)
243{
244 if (pte & 1)
245 pte = phys_to_machine(XPADDR(pte)).maddr;
246
247 pte &= ~_PAGE_PCD;
248
249 return (pte_t){ pte, pte >> 32 };
250}
251
252pmd_t xen_make_pmd(unsigned long long pmd)
253{
254 if (pmd & 1)
255 pmd = phys_to_machine(XPADDR(pmd)).maddr;
256
257 return (pmd_t){ pmd };
258}
259
260pgd_t xen_make_pgd(unsigned long long pgd)
261{
262 if (pgd & _PAGE_PRESENT)
263 pgd = phys_to_machine(XPADDR(pgd)).maddr;
264
265 return (pgd_t){ pgd };
266}
267#else
268void xen_set_pte(pte_t *ptep, pte_t pte)
269{
270 *ptep = pte;
271}
272
273unsigned long xen_pte_val(pte_t pte)
274{
275 unsigned long ret = pte.pte_low;
276
277 if (ret & _PAGE_PRESENT)
278 ret = machine_to_phys(XMADDR(ret)).paddr;
279
280 return ret;
281}
282
283unsigned long xen_pgd_val(pgd_t pgd)
284{
285 unsigned long ret = pgd.pgd;
286 if (ret)
287 ret = machine_to_phys(XMADDR(ret)).paddr | 1;
288 return ret;
289}
290
291pte_t xen_make_pte(unsigned long pte)
292{
293 if (pte & _PAGE_PRESENT)
294 pte = phys_to_machine(XPADDR(pte)).maddr;
295
296 pte &= ~_PAGE_PCD;
297
298 return (pte_t){ pte };
299}
300
301pgd_t xen_make_pgd(unsigned long pgd)
302{
303 if (pgd & _PAGE_PRESENT)
304 pgd = phys_to_machine(XPADDR(pgd)).maddr;
305
306 return (pgd_t){ pgd };
307}
308#endif
309
310enum pt_level {
311 PT_PGD,
312 PT_PUD,
313 PT_PMD,
314 PT_PTE
315};
316
317
318
319
320
321
322
323
324
325
326
327static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level),
328 unsigned long limit)
329{
330 pgd_t *pgd = pgd_base;
331 int flush = 0;
332 unsigned long addr = 0;
333 unsigned long pgd_next;
334
335 BUG_ON(limit > FIXADDR_TOP);
336
337 if (xen_feature(XENFEAT_auto_translated_physmap))
338 return 0;
339
340 for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) {
341 pud_t *pud;
342 unsigned long pud_limit, pud_next;
343
344 pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP);
345
346 if (!pgd_val(*pgd))
347 continue;
348
349 pud = pud_offset(pgd, 0);
350
351 if (PTRS_PER_PUD > 1)
352 flush |= (*func)(virt_to_page(pud), PT_PUD);
353
354 for (; addr != pud_limit; pud++, addr = pud_next) {
355 pmd_t *pmd;
356 unsigned long pmd_limit;
357
358 pud_next = pud_addr_end(addr, pud_limit);
359
360 if (pud_next < limit)
361 pmd_limit = pud_next;
362 else
363 pmd_limit = limit;
364
365 if (pud_none(*pud))
366 continue;
367
368 pmd = pmd_offset(pud, 0);
369
370 if (PTRS_PER_PMD > 1)
371 flush |= (*func)(virt_to_page(pmd), PT_PMD);
372
373 for (; addr != pmd_limit; pmd++) {
374 addr += (PAGE_SIZE * PTRS_PER_PTE);
375 if ((pmd_limit-1) < (addr-1)) {
376 addr = pmd_limit;
377 break;
378 }
379
380 if (pmd_none(*pmd))
381 continue;
382
383 flush |= (*func)(pmd_page(*pmd), PT_PTE);
384 }
385 }
386 }
387
388 flush |= (*func)(virt_to_page(pgd_base), PT_PGD);
389
390 return flush;
391}
392
393static spinlock_t *lock_pte(struct page *page)
394{
395 spinlock_t *ptl = NULL;
396
397#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
398 ptl = __pte_lockptr(page);
399 spin_lock(ptl);
400#endif
401
402 return ptl;
403}
404
405static void do_unlock(void *v)
406{
407 spinlock_t *ptl = v;
408 spin_unlock(ptl);
409}
410
411static void xen_do_pin(unsigned level, unsigned long pfn)
412{
413 struct mmuext_op *op;
414 struct multicall_space mcs;
415
416 mcs = __xen_mc_entry(sizeof(*op));
417 op = mcs.args;
418 op->cmd = level;
419 op->arg1.mfn = pfn_to_mfn(pfn);
420 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
421}
422
423static int pin_page(struct page *page, enum pt_level level)
424{
425 unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
426 int flush;
427
428 if (pgfl)
429 flush = 0;
430 else if (PageHighMem(page))
431
432
433 flush = 1;
434 else {
435 void *pt = lowmem_page_address(page);
436 unsigned long pfn = page_to_pfn(page);
437 struct multicall_space mcs = __xen_mc_entry(0);
438 spinlock_t *ptl;
439
440 flush = 0;
441
442 ptl = NULL;
443 if (level == PT_PTE)
444 ptl = lock_pte(page);
445
446 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
447 pfn_pte(pfn, PAGE_KERNEL_RO),
448 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
449
450 if (level == PT_PTE)
451 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
452
453 if (ptl) {
454
455
456 xen_mc_callback(do_unlock, ptl);
457 }
458 }
459
460 return flush;
461}
462
463
464
465
466void xen_pgd_pin(pgd_t *pgd)
467{
468 unsigned level;
469
470 xen_mc_batch();
471
472 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
473
474 xen_mc_issue(0);
475 kmap_flush_unused();
476 xen_mc_batch();
477 }
478
479#ifdef CONFIG_X86_PAE
480 level = MMUEXT_PIN_L3_TABLE;
481#else
482 level = MMUEXT_PIN_L2_TABLE;
483#endif
484
485 xen_do_pin(level, PFN_DOWN(__pa(pgd)));
486
487 xen_mc_issue(0);
488}
489
490
491
492
493static __init int mark_pinned(struct page *page, enum pt_level level)
494{
495 SetPagePinned(page);
496 return 0;
497}
498
499void __init xen_mark_init_mm_pinned(void)
500{
501 pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP);
502}
503
504static int unpin_page(struct page *page, enum pt_level level)
505{
506 unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
507
508 if (pgfl && !PageHighMem(page)) {
509 void *pt = lowmem_page_address(page);
510 unsigned long pfn = page_to_pfn(page);
511 spinlock_t *ptl = NULL;
512 struct multicall_space mcs;
513
514 if (level == PT_PTE) {
515 ptl = lock_pte(page);
516
517 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
518 }
519
520 mcs = __xen_mc_entry(0);
521
522 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
523 pfn_pte(pfn, PAGE_KERNEL),
524 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
525
526 if (ptl) {
527
528 xen_mc_callback(do_unlock, ptl);
529 }
530 }
531
532 return 0;
533}
534
535
536static void xen_pgd_unpin(pgd_t *pgd)
537{
538 xen_mc_batch();
539
540 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
541
542 pgd_walk(pgd, unpin_page, TASK_SIZE);
543
544 xen_mc_issue(0);
545}
546
547void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
548{
549 spin_lock(&next->page_table_lock);
550 xen_pgd_pin(next->pgd);
551 spin_unlock(&next->page_table_lock);
552}
553
554void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
555{
556 spin_lock(&mm->page_table_lock);
557 xen_pgd_pin(mm->pgd);
558 spin_unlock(&mm->page_table_lock);
559}
560
561
562#ifdef CONFIG_SMP
563
564
565static void drop_other_mm_ref(void *info)
566{
567 struct mm_struct *mm = info;
568
569 if (__get_cpu_var(cpu_tlbstate).active_mm == mm)
570 leave_mm(smp_processor_id());
571
572
573
574 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
575 load_cr3(swapper_pg_dir);
576 arch_flush_lazy_cpu_mode();
577 }
578}
579
580static void drop_mm_ref(struct mm_struct *mm)
581{
582 cpumask_t mask;
583 unsigned cpu;
584
585 if (current->active_mm == mm) {
586 if (current->mm == mm)
587 load_cr3(swapper_pg_dir);
588 else
589 leave_mm(smp_processor_id());
590 arch_flush_lazy_cpu_mode();
591 }
592
593
594 mask = mm->cpu_vm_mask;
595
596
597
598
599
600
601 for_each_online_cpu(cpu) {
602 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
603 cpu_set(cpu, mask);
604 }
605
606 if (!cpus_empty(mask))
607 xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
608}
609#else
610static void drop_mm_ref(struct mm_struct *mm)
611{
612 if (current->active_mm == mm)
613 load_cr3(swapper_pg_dir);
614}
615#endif
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631void xen_exit_mmap(struct mm_struct *mm)
632{
633 get_cpu();
634 drop_mm_ref(mm);
635 put_cpu();
636
637 spin_lock(&mm->page_table_lock);
638
639
640 if (PagePinned(virt_to_page(mm->pgd)))
641 xen_pgd_unpin(mm->pgd);
642
643 spin_unlock(&mm->page_table_lock);
644}
645