1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/sched.h>
42#include <linux/highmem.h>
43#include <linux/debugfs.h>
44#include <linux/bug.h>
45#include <linux/vmalloc.h>
46#include <linux/module.h>
47#include <linux/gfp.h>
48#include <linux/memblock.h>
49#include <linux/seq_file.h>
50#include <linux/crash_dump.h>
51
52#include <trace/events/xen.h>
53
54#include <asm/pgtable.h>
55#include <asm/tlbflush.h>
56#include <asm/fixmap.h>
57#include <asm/mmu_context.h>
58#include <asm/setup.h>
59#include <asm/paravirt.h>
60#include <asm/e820.h>
61#include <asm/linkage.h>
62#include <asm/page.h>
63#include <asm/init.h>
64#include <asm/pat.h>
65#include <asm/smp.h>
66
67#include <asm/xen/hypercall.h>
68#include <asm/xen/hypervisor.h>
69
70#include <xen/xen.h>
71#include <xen/page.h>
72#include <xen/interface/xen.h>
73#include <xen/interface/hvm/hvm_op.h>
74#include <xen/interface/version.h>
75#include <xen/interface/memory.h>
76#include <xen/hvc-console.h>
77
78#include "multicalls.h"
79#include "mmu.h"
80#include "debugfs.h"
81
82
83
84
85
86DEFINE_SPINLOCK(xen_reservation_lock);
87
88#ifdef CONFIG_X86_32
89
90
91
92
93
94#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
95static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
96#endif
97#ifdef CONFIG_X86_64
98
99static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
100#endif
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116DEFINE_PER_CPU(unsigned long, xen_cr3);
117DEFINE_PER_CPU(unsigned long, xen_current_cr3);
118
119
120
121
122
123
124#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
125
126unsigned long arbitrary_virt_to_mfn(void *vaddr)
127{
128 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
129
130 return PFN_DOWN(maddr.maddr);
131}
132
133xmaddr_t arbitrary_virt_to_machine(void *vaddr)
134{
135 unsigned long address = (unsigned long)vaddr;
136 unsigned int level;
137 pte_t *pte;
138 unsigned offset;
139
140
141
142
143
144 if (virt_addr_valid(vaddr))
145 return virt_to_machine(vaddr);
146
147
148
149 pte = lookup_address(address, &level);
150 BUG_ON(pte == NULL);
151 offset = address & ~PAGE_MASK;
152 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
153}
154EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
155
156void make_lowmem_page_readonly(void *vaddr)
157{
158 pte_t *pte, ptev;
159 unsigned long address = (unsigned long)vaddr;
160 unsigned int level;
161
162 pte = lookup_address(address, &level);
163 if (pte == NULL)
164 return;
165
166 ptev = pte_wrprotect(*pte);
167
168 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
169 BUG();
170}
171
172void make_lowmem_page_readwrite(void *vaddr)
173{
174 pte_t *pte, ptev;
175 unsigned long address = (unsigned long)vaddr;
176 unsigned int level;
177
178 pte = lookup_address(address, &level);
179 if (pte == NULL)
180 return;
181
182 ptev = pte_mkwrite(*pte);
183
184 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
185 BUG();
186}
187
188
189static bool xen_page_pinned(void *ptr)
190{
191 struct page *page = virt_to_page(ptr);
192
193 return PagePinned(page);
194}
195
196void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
197{
198 struct multicall_space mcs;
199 struct mmu_update *u;
200
201 trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
202
203 mcs = xen_mc_entry(sizeof(*u));
204 u = mcs.args;
205
206
207 u->ptr = virt_to_machine(ptep).maddr;
208 u->val = pte_val_ma(pteval);
209
210 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
211
212 xen_mc_issue(PARAVIRT_LAZY_MMU);
213}
214EXPORT_SYMBOL_GPL(xen_set_domain_pte);
215
216static void xen_extend_mmu_update(const struct mmu_update *update)
217{
218 struct multicall_space mcs;
219 struct mmu_update *u;
220
221 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
222
223 if (mcs.mc != NULL) {
224 mcs.mc->args[1]++;
225 } else {
226 mcs = __xen_mc_entry(sizeof(*u));
227 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
228 }
229
230 u = mcs.args;
231 *u = *update;
232}
233
234static void xen_extend_mmuext_op(const struct mmuext_op *op)
235{
236 struct multicall_space mcs;
237 struct mmuext_op *u;
238
239 mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
240
241 if (mcs.mc != NULL) {
242 mcs.mc->args[1]++;
243 } else {
244 mcs = __xen_mc_entry(sizeof(*u));
245 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
246 }
247
248 u = mcs.args;
249 *u = *op;
250}
251
252static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
253{
254 struct mmu_update u;
255
256 preempt_disable();
257
258 xen_mc_batch();
259
260
261 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
262 u.val = pmd_val_ma(val);
263 xen_extend_mmu_update(&u);
264
265 xen_mc_issue(PARAVIRT_LAZY_MMU);
266
267 preempt_enable();
268}
269
270static void xen_set_pmd(pmd_t *ptr, pmd_t val)
271{
272 trace_xen_mmu_set_pmd(ptr, val);
273
274
275
276 if (!xen_page_pinned(ptr)) {
277 *ptr = val;
278 return;
279 }
280
281 xen_set_pmd_hyper(ptr, val);
282}
283
284
285
286
287
288void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
289{
290 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
291}
292
293static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
294{
295 struct mmu_update u;
296
297 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
298 return false;
299
300 xen_mc_batch();
301
302 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
303 u.val = pte_val_ma(pteval);
304 xen_extend_mmu_update(&u);
305
306 xen_mc_issue(PARAVIRT_LAZY_MMU);
307
308 return true;
309}
310
311static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
312{
313 if (!xen_batched_set_pte(ptep, pteval)) {
314
315
316
317
318
319
320
321 struct mmu_update u;
322
323 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
324 u.val = pte_val_ma(pteval);
325 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
326 }
327}
328
329static void xen_set_pte(pte_t *ptep, pte_t pteval)
330{
331 trace_xen_mmu_set_pte(ptep, pteval);
332 __xen_set_pte(ptep, pteval);
333}
334
335static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
336 pte_t *ptep, pte_t pteval)
337{
338 trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
339 __xen_set_pte(ptep, pteval);
340}
341
342pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
343 unsigned long addr, pte_t *ptep)
344{
345
346 trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
347 return *ptep;
348}
349
350void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
351 pte_t *ptep, pte_t pte)
352{
353 struct mmu_update u;
354
355 trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
356 xen_mc_batch();
357
358 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
359 u.val = pte_val_ma(pte);
360 xen_extend_mmu_update(&u);
361
362 xen_mc_issue(PARAVIRT_LAZY_MMU);
363}
364
365
366static pteval_t pte_mfn_to_pfn(pteval_t val)
367{
368 if (val & _PAGE_PRESENT) {
369 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
370 unsigned long pfn = mfn_to_pfn(mfn);
371
372 pteval_t flags = val & PTE_FLAGS_MASK;
373 if (unlikely(pfn == ~0))
374 val = flags & ~_PAGE_PRESENT;
375 else
376 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
377 }
378
379 return val;
380}
381
382static pteval_t pte_pfn_to_mfn(pteval_t val)
383{
384 if (val & _PAGE_PRESENT) {
385 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
386 pteval_t flags = val & PTE_FLAGS_MASK;
387 unsigned long mfn;
388
389 if (!xen_feature(XENFEAT_auto_translated_physmap))
390 mfn = get_phys_to_machine(pfn);
391 else
392 mfn = pfn;
393
394
395
396
397
398
399 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
400 mfn = 0;
401 flags = 0;
402 } else
403 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
404 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
405 }
406
407 return val;
408}
409
410__visible pteval_t xen_pte_val(pte_t pte)
411{
412 pteval_t pteval = pte.pte;
413#if 0
414
415 if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
416 WARN_ON(!pat_enabled);
417 pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
418 }
419#endif
420 return pte_mfn_to_pfn(pteval);
421}
422PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
423
424__visible pgdval_t xen_pgd_val(pgd_t pgd)
425{
426 return pte_mfn_to_pfn(pgd.pgd);
427}
428PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448void xen_set_pat(u64 pat)
449{
450
451
452 WARN_ON(pat != 0x0007010600070106ull);
453}
454
455__visible pte_t xen_make_pte(pteval_t pte)
456{
457#if 0
458
459
460
461
462
463
464
465
466 if (pat_enabled && !WARN_ON(pte & _PAGE_PAT)) {
467 if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
468 pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
469 }
470#endif
471 pte = pte_pfn_to_mfn(pte);
472
473 return native_make_pte(pte);
474}
475PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
476
477__visible pgd_t xen_make_pgd(pgdval_t pgd)
478{
479 pgd = pte_pfn_to_mfn(pgd);
480 return native_make_pgd(pgd);
481}
482PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
483
484__visible pmdval_t xen_pmd_val(pmd_t pmd)
485{
486 return pte_mfn_to_pfn(pmd.pmd);
487}
488PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
489
490static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
491{
492 struct mmu_update u;
493
494 preempt_disable();
495
496 xen_mc_batch();
497
498
499 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
500 u.val = pud_val_ma(val);
501 xen_extend_mmu_update(&u);
502
503 xen_mc_issue(PARAVIRT_LAZY_MMU);
504
505 preempt_enable();
506}
507
508static void xen_set_pud(pud_t *ptr, pud_t val)
509{
510 trace_xen_mmu_set_pud(ptr, val);
511
512
513
514 if (!xen_page_pinned(ptr)) {
515 *ptr = val;
516 return;
517 }
518
519 xen_set_pud_hyper(ptr, val);
520}
521
522#ifdef CONFIG_X86_PAE
523static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
524{
525 trace_xen_mmu_set_pte_atomic(ptep, pte);
526 set_64bit((u64 *)ptep, native_pte_val(pte));
527}
528
529static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
530{
531 trace_xen_mmu_pte_clear(mm, addr, ptep);
532 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
533 native_pte_clear(mm, addr, ptep);
534}
535
536static void xen_pmd_clear(pmd_t *pmdp)
537{
538 trace_xen_mmu_pmd_clear(pmdp);
539 set_pmd(pmdp, __pmd(0));
540}
541#endif
542
543__visible pmd_t xen_make_pmd(pmdval_t pmd)
544{
545 pmd = pte_pfn_to_mfn(pmd);
546 return native_make_pmd(pmd);
547}
548PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
549
550#if PAGETABLE_LEVELS == 4
551__visible pudval_t xen_pud_val(pud_t pud)
552{
553 return pte_mfn_to_pfn(pud.pud);
554}
555PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
556
557__visible pud_t xen_make_pud(pudval_t pud)
558{
559 pud = pte_pfn_to_mfn(pud);
560
561 return native_make_pud(pud);
562}
563PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
564
565static pgd_t *xen_get_user_pgd(pgd_t *pgd)
566{
567 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
568 unsigned offset = pgd - pgd_page;
569 pgd_t *user_ptr = NULL;
570
571 if (offset < pgd_index(USER_LIMIT)) {
572 struct page *page = virt_to_page(pgd_page);
573 user_ptr = (pgd_t *)page->private;
574 if (user_ptr)
575 user_ptr += offset;
576 }
577
578 return user_ptr;
579}
580
581static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
582{
583 struct mmu_update u;
584
585 u.ptr = virt_to_machine(ptr).maddr;
586 u.val = pgd_val_ma(val);
587 xen_extend_mmu_update(&u);
588}
589
590
591
592
593
594
595
596
597static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
598{
599 preempt_disable();
600
601 xen_mc_batch();
602
603 __xen_set_pgd_hyper(ptr, val);
604
605 xen_mc_issue(PARAVIRT_LAZY_MMU);
606
607 preempt_enable();
608}
609
610static void xen_set_pgd(pgd_t *ptr, pgd_t val)
611{
612 pgd_t *user_ptr = xen_get_user_pgd(ptr);
613
614 trace_xen_mmu_set_pgd(ptr, user_ptr, val);
615
616
617
618 if (!xen_page_pinned(ptr)) {
619 *ptr = val;
620 if (user_ptr) {
621 WARN_ON(xen_page_pinned(user_ptr));
622 *user_ptr = val;
623 }
624 return;
625 }
626
627
628
629 xen_mc_batch();
630
631 __xen_set_pgd_hyper(ptr, val);
632 if (user_ptr)
633 __xen_set_pgd_hyper(user_ptr, val);
634
635 xen_mc_issue(PARAVIRT_LAZY_MMU);
636}
637#endif
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
655 int (*func)(struct mm_struct *mm, struct page *,
656 enum pt_level),
657 unsigned long limit)
658{
659 int flush = 0;
660 unsigned hole_low, hole_high;
661 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
662 unsigned pgdidx, pudidx, pmdidx;
663
664
665 limit--;
666 BUG_ON(limit >= FIXADDR_TOP);
667
668 if (xen_feature(XENFEAT_auto_translated_physmap))
669 return 0;
670
671
672
673
674
675
676 hole_low = pgd_index(USER_LIMIT);
677 hole_high = pgd_index(PAGE_OFFSET);
678
679 pgdidx_limit = pgd_index(limit);
680#if PTRS_PER_PUD > 1
681 pudidx_limit = pud_index(limit);
682#else
683 pudidx_limit = 0;
684#endif
685#if PTRS_PER_PMD > 1
686 pmdidx_limit = pmd_index(limit);
687#else
688 pmdidx_limit = 0;
689#endif
690
691 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
692 pud_t *pud;
693
694 if (pgdidx >= hole_low && pgdidx < hole_high)
695 continue;
696
697 if (!pgd_val(pgd[pgdidx]))
698 continue;
699
700 pud = pud_offset(&pgd[pgdidx], 0);
701
702 if (PTRS_PER_PUD > 1)
703 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
704
705 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
706 pmd_t *pmd;
707
708 if (pgdidx == pgdidx_limit &&
709 pudidx > pudidx_limit)
710 goto out;
711
712 if (pud_none(pud[pudidx]))
713 continue;
714
715 pmd = pmd_offset(&pud[pudidx], 0);
716
717 if (PTRS_PER_PMD > 1)
718 flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
719
720 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
721 struct page *pte;
722
723 if (pgdidx == pgdidx_limit &&
724 pudidx == pudidx_limit &&
725 pmdidx > pmdidx_limit)
726 goto out;
727
728 if (pmd_none(pmd[pmdidx]))
729 continue;
730
731 pte = pmd_page(pmd[pmdidx]);
732 flush |= (*func)(mm, pte, PT_PTE);
733 }
734 }
735 }
736
737out:
738
739
740 flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
741
742 return flush;
743}
744
745static int xen_pgd_walk(struct mm_struct *mm,
746 int (*func)(struct mm_struct *mm, struct page *,
747 enum pt_level),
748 unsigned long limit)
749{
750 return __xen_pgd_walk(mm, mm->pgd, func, limit);
751}
752
753
754
755static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
756{
757 spinlock_t *ptl = NULL;
758
759#if USE_SPLIT_PTE_PTLOCKS
760 ptl = ptlock_ptr(page);
761 spin_lock_nest_lock(ptl, &mm->page_table_lock);
762#endif
763
764 return ptl;
765}
766
767static void xen_pte_unlock(void *v)
768{
769 spinlock_t *ptl = v;
770 spin_unlock(ptl);
771}
772
773static void xen_do_pin(unsigned level, unsigned long pfn)
774{
775 struct mmuext_op op;
776
777 op.cmd = level;
778 op.arg1.mfn = pfn_to_mfn(pfn);
779
780 xen_extend_mmuext_op(&op);
781}
782
783static int xen_pin_page(struct mm_struct *mm, struct page *page,
784 enum pt_level level)
785{
786 unsigned pgfl = TestSetPagePinned(page);
787 int flush;
788
789 if (pgfl)
790 flush = 0;
791 else if (PageHighMem(page))
792
793
794 flush = 1;
795 else {
796 void *pt = lowmem_page_address(page);
797 unsigned long pfn = page_to_pfn(page);
798 struct multicall_space mcs = __xen_mc_entry(0);
799 spinlock_t *ptl;
800
801 flush = 0;
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823 ptl = NULL;
824 if (level == PT_PTE)
825 ptl = xen_pte_lock(page, mm);
826
827 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
828 pfn_pte(pfn, PAGE_KERNEL_RO),
829 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
830
831 if (ptl) {
832 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
833
834
835
836 xen_mc_callback(xen_pte_unlock, ptl);
837 }
838 }
839
840 return flush;
841}
842
843
844
845
846static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
847{
848 trace_xen_mmu_pgd_pin(mm, pgd);
849
850 xen_mc_batch();
851
852 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
853
854 xen_mc_issue(0);
855
856 kmap_flush_unused();
857
858 xen_mc_batch();
859 }
860
861#ifdef CONFIG_X86_64
862 {
863 pgd_t *user_pgd = xen_get_user_pgd(pgd);
864
865 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
866
867 if (user_pgd) {
868 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
869 xen_do_pin(MMUEXT_PIN_L4_TABLE,
870 PFN_DOWN(__pa(user_pgd)));
871 }
872 }
873#else
874#ifdef CONFIG_X86_PAE
875
876 xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
877 PT_PMD);
878#endif
879 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
880#endif
881 xen_mc_issue(0);
882}
883
884static void xen_pgd_pin(struct mm_struct *mm)
885{
886 __xen_pgd_pin(mm, mm->pgd);
887}
888
889
890
891
892
893
894
895
896
897
898
899void xen_mm_pin_all(void)
900{
901 struct page *page;
902
903 spin_lock(&pgd_lock);
904
905 list_for_each_entry(page, &pgd_list, lru) {
906 if (!PagePinned(page)) {
907 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
908 SetPageSavePinned(page);
909 }
910 }
911
912 spin_unlock(&pgd_lock);
913}
914
915
916
917
918
919
920static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
921 enum pt_level level)
922{
923 SetPagePinned(page);
924 return 0;
925}
926
927static void __init xen_mark_init_mm_pinned(void)
928{
929 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
930}
931
932static int xen_unpin_page(struct mm_struct *mm, struct page *page,
933 enum pt_level level)
934{
935 unsigned pgfl = TestClearPagePinned(page);
936
937 if (pgfl && !PageHighMem(page)) {
938 void *pt = lowmem_page_address(page);
939 unsigned long pfn = page_to_pfn(page);
940 spinlock_t *ptl = NULL;
941 struct multicall_space mcs;
942
943
944
945
946
947
948
949
950 if (level == PT_PTE) {
951 ptl = xen_pte_lock(page, mm);
952
953 if (ptl)
954 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
955 }
956
957 mcs = __xen_mc_entry(0);
958
959 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
960 pfn_pte(pfn, PAGE_KERNEL),
961 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
962
963 if (ptl) {
964
965 xen_mc_callback(xen_pte_unlock, ptl);
966 }
967 }
968
969 return 0;
970}
971
972
973static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
974{
975 trace_xen_mmu_pgd_unpin(mm, pgd);
976
977 xen_mc_batch();
978
979 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
980
981#ifdef CONFIG_X86_64
982 {
983 pgd_t *user_pgd = xen_get_user_pgd(pgd);
984
985 if (user_pgd) {
986 xen_do_pin(MMUEXT_UNPIN_TABLE,
987 PFN_DOWN(__pa(user_pgd)));
988 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
989 }
990 }
991#endif
992
993#ifdef CONFIG_X86_PAE
994
995 xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
996 PT_PMD);
997#endif
998
999 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
1000
1001 xen_mc_issue(0);
1002}
1003
1004static void xen_pgd_unpin(struct mm_struct *mm)
1005{
1006 __xen_pgd_unpin(mm, mm->pgd);
1007}
1008
1009
1010
1011
1012
1013void xen_mm_unpin_all(void)
1014{
1015 struct page *page;
1016
1017 spin_lock(&pgd_lock);
1018
1019 list_for_each_entry(page, &pgd_list, lru) {
1020 if (PageSavePinned(page)) {
1021 BUG_ON(!PagePinned(page));
1022 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
1023 ClearPageSavePinned(page);
1024 }
1025 }
1026
1027 spin_unlock(&pgd_lock);
1028}
1029
1030static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
1031{
1032 spin_lock(&next->page_table_lock);
1033 xen_pgd_pin(next);
1034 spin_unlock(&next->page_table_lock);
1035}
1036
1037static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
1038{
1039 spin_lock(&mm->page_table_lock);
1040 xen_pgd_pin(mm);
1041 spin_unlock(&mm->page_table_lock);
1042}
1043
1044
1045#ifdef CONFIG_SMP
1046
1047
1048static void drop_other_mm_ref(void *info)
1049{
1050 struct mm_struct *mm = info;
1051 struct mm_struct *active_mm;
1052
1053 active_mm = this_cpu_read(cpu_tlbstate.active_mm);
1054
1055 if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1056 leave_mm(smp_processor_id());
1057
1058
1059
1060 if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
1061 load_cr3(swapper_pg_dir);
1062}
1063
1064static void xen_drop_mm_ref(struct mm_struct *mm)
1065{
1066 cpumask_var_t mask;
1067 unsigned cpu;
1068
1069 if (current->active_mm == mm) {
1070 if (current->mm == mm)
1071 load_cr3(swapper_pg_dir);
1072 else
1073 leave_mm(smp_processor_id());
1074 }
1075
1076
1077 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1078 for_each_online_cpu(cpu) {
1079 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
1080 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1081 continue;
1082 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1083 }
1084 return;
1085 }
1086 cpumask_copy(mask, mm_cpumask(mm));
1087
1088
1089
1090
1091
1092
1093 for_each_online_cpu(cpu) {
1094 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1095 cpumask_set_cpu(cpu, mask);
1096 }
1097
1098 if (!cpumask_empty(mask))
1099 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1100 free_cpumask_var(mask);
1101}
1102#else
1103static void xen_drop_mm_ref(struct mm_struct *mm)
1104{
1105 if (current->active_mm == mm)
1106 load_cr3(swapper_pg_dir);
1107}
1108#endif
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124static void xen_exit_mmap(struct mm_struct *mm)
1125{
1126 get_cpu();
1127 xen_drop_mm_ref(mm);
1128 put_cpu();
1129
1130 spin_lock(&mm->page_table_lock);
1131
1132
1133 if (xen_page_pinned(mm->pgd))
1134 xen_pgd_unpin(mm);
1135
1136 spin_unlock(&mm->page_table_lock);
1137}
1138
1139static void xen_post_allocator_init(void);
1140
1141#ifdef CONFIG_X86_64
1142static void __init xen_cleanhighmap(unsigned long vaddr,
1143 unsigned long vaddr_end)
1144{
1145 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1146 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1147
1148
1149
1150 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
1151 pmd++, vaddr += PMD_SIZE) {
1152 if (pmd_none(*pmd))
1153 continue;
1154 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1155 set_pmd(pmd, __pmd(0));
1156 }
1157
1158
1159 xen_mc_flush();
1160}
1161#endif
1162static void __init xen_pagetable_init(void)
1163{
1164#ifdef CONFIG_X86_64
1165 unsigned long size;
1166 unsigned long addr;
1167#endif
1168 paging_init();
1169 xen_setup_shared_info();
1170#ifdef CONFIG_X86_64
1171 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1172 unsigned long new_mfn_list;
1173
1174 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1175
1176
1177 new_mfn_list = xen_revector_p2m_tree();
1178 if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
1179
1180 memset((void *)xen_start_info->mfn_list, 0xff, size);
1181
1182
1183 BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
1184 addr = xen_start_info->mfn_list;
1185
1186
1187
1188
1189 size = roundup(size, PMD_SIZE);
1190 xen_cleanhighmap(addr, addr + size);
1191
1192 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1193 memblock_free(__pa(xen_start_info->mfn_list), size);
1194
1195 xen_start_info->mfn_list = new_mfn_list;
1196 } else
1197 goto skip;
1198 }
1199
1200
1201
1202
1203
1204
1205
1206
1207 addr = xen_start_info->pt_base;
1208 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
1209
1210 xen_cleanhighmap(addr, addr + size);
1211 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1212#ifdef DEBUG
1213
1214
1215
1216 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1217#endif
1218skip:
1219#endif
1220 xen_post_allocator_init();
1221}
1222static void xen_write_cr2(unsigned long cr2)
1223{
1224 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1225}
1226
1227static unsigned long xen_read_cr2(void)
1228{
1229 return this_cpu_read(xen_vcpu)->arch.cr2;
1230}
1231
1232unsigned long xen_read_cr2_direct(void)
1233{
1234 return this_cpu_read(xen_vcpu_info.arch.cr2);
1235}
1236
1237void xen_flush_tlb_all(void)
1238{
1239 struct mmuext_op *op;
1240 struct multicall_space mcs;
1241
1242 trace_xen_mmu_flush_tlb_all(0);
1243
1244 preempt_disable();
1245
1246 mcs = xen_mc_entry(sizeof(*op));
1247
1248 op = mcs.args;
1249 op->cmd = MMUEXT_TLB_FLUSH_ALL;
1250 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1251
1252 xen_mc_issue(PARAVIRT_LAZY_MMU);
1253
1254 preempt_enable();
1255}
1256static void xen_flush_tlb(void)
1257{
1258 struct mmuext_op *op;
1259 struct multicall_space mcs;
1260
1261 trace_xen_mmu_flush_tlb(0);
1262
1263 preempt_disable();
1264
1265 mcs = xen_mc_entry(sizeof(*op));
1266
1267 op = mcs.args;
1268 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1269 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1270
1271 xen_mc_issue(PARAVIRT_LAZY_MMU);
1272
1273 preempt_enable();
1274}
1275
1276static void xen_flush_tlb_single(unsigned long addr)
1277{
1278 struct mmuext_op *op;
1279 struct multicall_space mcs;
1280
1281 trace_xen_mmu_flush_tlb_single(addr);
1282
1283 preempt_disable();
1284
1285 mcs = xen_mc_entry(sizeof(*op));
1286 op = mcs.args;
1287 op->cmd = MMUEXT_INVLPG_LOCAL;
1288 op->arg1.linear_addr = addr & PAGE_MASK;
1289 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1290
1291 xen_mc_issue(PARAVIRT_LAZY_MMU);
1292
1293 preempt_enable();
1294}
1295
1296static void xen_flush_tlb_others(const struct cpumask *cpus,
1297 struct mm_struct *mm, unsigned long start,
1298 unsigned long end)
1299{
1300 struct {
1301 struct mmuext_op op;
1302#ifdef CONFIG_SMP
1303 DECLARE_BITMAP(mask, num_processors);
1304#else
1305 DECLARE_BITMAP(mask, NR_CPUS);
1306#endif
1307 } *args;
1308 struct multicall_space mcs;
1309
1310 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1311
1312 if (cpumask_empty(cpus))
1313 return;
1314
1315 mcs = xen_mc_entry(sizeof(*args));
1316 args = mcs.args;
1317 args->op.arg2.vcpumask = to_cpumask(args->mask);
1318
1319
1320 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1321 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1322
1323 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1324 if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1325 args->op.cmd = MMUEXT_INVLPG_MULTI;
1326 args->op.arg1.linear_addr = start;
1327 }
1328
1329 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1330
1331 xen_mc_issue(PARAVIRT_LAZY_MMU);
1332}
1333
1334static unsigned long xen_read_cr3(void)
1335{
1336 return this_cpu_read(xen_cr3);
1337}
1338
1339static void set_current_cr3(void *v)
1340{
1341 this_cpu_write(xen_current_cr3, (unsigned long)v);
1342}
1343
1344static void __xen_write_cr3(bool kernel, unsigned long cr3)
1345{
1346 struct mmuext_op op;
1347 unsigned long mfn;
1348
1349 trace_xen_mmu_write_cr3(kernel, cr3);
1350
1351 if (cr3)
1352 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1353 else
1354 mfn = 0;
1355
1356 WARN_ON(mfn == 0 && kernel);
1357
1358 op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1359 op.arg1.mfn = mfn;
1360
1361 xen_extend_mmuext_op(&op);
1362
1363 if (kernel) {
1364 this_cpu_write(xen_cr3, cr3);
1365
1366
1367
1368 xen_mc_callback(set_current_cr3, (void *)cr3);
1369 }
1370}
1371static void xen_write_cr3(unsigned long cr3)
1372{
1373 BUG_ON(preemptible());
1374
1375 xen_mc_batch();
1376
1377
1378
1379 this_cpu_write(xen_cr3, cr3);
1380
1381 __xen_write_cr3(true, cr3);
1382
1383#ifdef CONFIG_X86_64
1384 {
1385 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1386 if (user_pgd)
1387 __xen_write_cr3(false, __pa(user_pgd));
1388 else
1389 __xen_write_cr3(false, 0);
1390 }
1391#endif
1392
1393 xen_mc_issue(PARAVIRT_LAZY_CPU);
1394}
1395
1396#ifdef CONFIG_X86_64
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417static void __init xen_write_cr3_init(unsigned long cr3)
1418{
1419 BUG_ON(preemptible());
1420
1421 xen_mc_batch();
1422
1423
1424
1425 this_cpu_write(xen_cr3, cr3);
1426
1427 __xen_write_cr3(true, cr3);
1428
1429 xen_mc_issue(PARAVIRT_LAZY_CPU);
1430}
1431#endif
1432
1433static int xen_pgd_alloc(struct mm_struct *mm)
1434{
1435 pgd_t *pgd = mm->pgd;
1436 int ret = 0;
1437
1438 BUG_ON(PagePinned(virt_to_page(pgd)));
1439
1440#ifdef CONFIG_X86_64
1441 {
1442 struct page *page = virt_to_page(pgd);
1443 pgd_t *user_pgd;
1444
1445 BUG_ON(page->private != 0);
1446
1447 ret = -ENOMEM;
1448
1449 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1450 page->private = (unsigned long)user_pgd;
1451
1452 if (user_pgd != NULL) {
1453 user_pgd[pgd_index(VSYSCALL_START)] =
1454 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1455 ret = 0;
1456 }
1457
1458 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1459 }
1460#endif
1461
1462 return ret;
1463}
1464
1465static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1466{
1467#ifdef CONFIG_X86_64
1468 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1469
1470 if (user_pgd)
1471 free_page((unsigned long)user_pgd);
1472#endif
1473}
1474
1475#ifdef CONFIG_X86_32
1476static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1477{
1478
1479 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1480 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1481 pte_val_ma(pte));
1482
1483 return pte;
1484}
1485#else
1486static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1487{
1488 return pte;
1489}
1490#endif
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1507{
1508 if (pte_mfn(pte) != INVALID_P2M_ENTRY)
1509 pte = mask_rw_pte(ptep, pte);
1510 else
1511 pte = __pte_ma(0);
1512
1513 native_set_pte(ptep, pte);
1514}
1515
1516static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1517{
1518 struct mmuext_op op;
1519 op.cmd = cmd;
1520 op.arg1.mfn = pfn_to_mfn(pfn);
1521 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1522 BUG();
1523}
1524
1525
1526
1527static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1528{
1529#ifdef CONFIG_FLATMEM
1530 BUG_ON(mem_map);
1531#endif
1532 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1533 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1534}
1535
1536
1537static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1538{
1539#ifdef CONFIG_FLATMEM
1540 BUG_ON(mem_map);
1541#endif
1542 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1543}
1544
1545
1546
1547static void __init xen_release_pte_init(unsigned long pfn)
1548{
1549 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1550 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1551}
1552
1553static void __init xen_release_pmd_init(unsigned long pfn)
1554{
1555 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1556}
1557
1558static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1559{
1560 struct multicall_space mcs;
1561 struct mmuext_op *op;
1562
1563 mcs = __xen_mc_entry(sizeof(*op));
1564 op = mcs.args;
1565 op->cmd = cmd;
1566 op->arg1.mfn = pfn_to_mfn(pfn);
1567
1568 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1569}
1570
1571static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1572{
1573 struct multicall_space mcs;
1574 unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1575
1576 mcs = __xen_mc_entry(0);
1577 MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1578 pfn_pte(pfn, prot), 0);
1579}
1580
1581
1582
1583static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1584 unsigned level)
1585{
1586 bool pinned = PagePinned(virt_to_page(mm->pgd));
1587
1588 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1589
1590 if (pinned) {
1591 struct page *page = pfn_to_page(pfn);
1592
1593 SetPagePinned(page);
1594
1595 if (!PageHighMem(page)) {
1596 xen_mc_batch();
1597
1598 __set_pfn_prot(pfn, PAGE_KERNEL_RO);
1599
1600 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1601 __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1602
1603 xen_mc_issue(PARAVIRT_LAZY_MMU);
1604 } else {
1605
1606
1607 kmap_flush_unused();
1608 }
1609 }
1610}
1611
1612static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1613{
1614 xen_alloc_ptpage(mm, pfn, PT_PTE);
1615}
1616
1617static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1618{
1619 xen_alloc_ptpage(mm, pfn, PT_PMD);
1620}
1621
1622
1623static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1624{
1625 struct page *page = pfn_to_page(pfn);
1626 bool pinned = PagePinned(page);
1627
1628 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1629
1630 if (pinned) {
1631 if (!PageHighMem(page)) {
1632 xen_mc_batch();
1633
1634 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1635 __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1636
1637 __set_pfn_prot(pfn, PAGE_KERNEL);
1638
1639 xen_mc_issue(PARAVIRT_LAZY_MMU);
1640 }
1641 ClearPagePinned(page);
1642 }
1643}
1644
1645static void xen_release_pte(unsigned long pfn)
1646{
1647 xen_release_ptpage(pfn, PT_PTE);
1648}
1649
1650static void xen_release_pmd(unsigned long pfn)
1651{
1652 xen_release_ptpage(pfn, PT_PMD);
1653}
1654
1655#if PAGETABLE_LEVELS == 4
1656static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1657{
1658 xen_alloc_ptpage(mm, pfn, PT_PUD);
1659}
1660
1661static void xen_release_pud(unsigned long pfn)
1662{
1663 xen_release_ptpage(pfn, PT_PUD);
1664}
1665#endif
1666
1667void __init xen_reserve_top(void)
1668{
1669#ifdef CONFIG_X86_32
1670 unsigned long top = HYPERVISOR_VIRT_START;
1671 struct xen_platform_parameters pp;
1672
1673 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1674 top = pp.virt_start;
1675
1676 reserve_top_address(-top);
1677#endif
1678}
1679
1680
1681
1682
1683
1684static void *__ka(phys_addr_t paddr)
1685{
1686#ifdef CONFIG_X86_64
1687 return (void *)(paddr + __START_KERNEL_map);
1688#else
1689 return __va(paddr);
1690#endif
1691}
1692
1693
1694static unsigned long m2p(phys_addr_t maddr)
1695{
1696 phys_addr_t paddr;
1697
1698 maddr &= PTE_PFN_MASK;
1699 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1700
1701 return paddr;
1702}
1703
1704
1705static void *m2v(phys_addr_t maddr)
1706{
1707 return __ka(m2p(maddr));
1708}
1709
1710
1711static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
1712{
1713 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1714 pte_t pte = pfn_pte(pfn, prot);
1715
1716
1717 if (xen_feature(XENFEAT_auto_translated_physmap))
1718 return;
1719
1720 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1721 BUG();
1722}
1723static void set_page_prot(void *addr, pgprot_t prot)
1724{
1725 return set_page_prot_flags(addr, prot, UVMF_NONE);
1726}
1727#ifdef CONFIG_X86_32
1728static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1729{
1730 unsigned pmdidx, pteidx;
1731 unsigned ident_pte;
1732 unsigned long pfn;
1733
1734 level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
1735 PAGE_SIZE);
1736
1737 ident_pte = 0;
1738 pfn = 0;
1739 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1740 pte_t *pte_page;
1741
1742
1743 if (pmd_present(pmd[pmdidx]))
1744 pte_page = m2v(pmd[pmdidx].pmd);
1745 else {
1746
1747 if (ident_pte == LEVEL1_IDENT_ENTRIES)
1748 break;
1749
1750 pte_page = &level1_ident_pgt[ident_pte];
1751 ident_pte += PTRS_PER_PTE;
1752
1753 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1754 }
1755
1756
1757 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1758 pte_t pte;
1759
1760#ifdef CONFIG_X86_32
1761 if (pfn > max_pfn_mapped)
1762 max_pfn_mapped = pfn;
1763#endif
1764
1765 if (!pte_none(pte_page[pteidx]))
1766 continue;
1767
1768 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1769 pte_page[pteidx] = pte;
1770 }
1771 }
1772
1773 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1774 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1775
1776 set_page_prot(pmd, PAGE_KERNEL_RO);
1777}
1778#endif
1779void __init xen_setup_machphys_mapping(void)
1780{
1781 struct xen_machphys_mapping mapping;
1782
1783 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1784 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1785 machine_to_phys_nr = mapping.max_mfn + 1;
1786 } else {
1787 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1788 }
1789#ifdef CONFIG_X86_32
1790 WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
1791 < machine_to_phys_mapping);
1792#endif
1793}
1794
1795#ifdef CONFIG_X86_64
1796static void convert_pfn_mfn(void *v)
1797{
1798 pte_t *pte = v;
1799 int i;
1800
1801
1802
1803 for (i = 0; i < PTRS_PER_PTE; i++)
1804 pte[i] = xen_make_pte(pte[i].pte);
1805}
1806static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1807 unsigned long addr)
1808{
1809 if (*pt_base == PFN_DOWN(__pa(addr))) {
1810 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1811 clear_page((void *)addr);
1812 (*pt_base)++;
1813 }
1814 if (*pt_end == PFN_DOWN(__pa(addr))) {
1815 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1816 clear_page((void *)addr);
1817 (*pt_end)--;
1818 }
1819}
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1832{
1833 pud_t *l3;
1834 pmd_t *l2;
1835 unsigned long addr[3];
1836 unsigned long pt_base, pt_end;
1837 unsigned i;
1838
1839
1840
1841
1842
1843 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1844
1845 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1846 pt_end = pt_base + xen_start_info->nr_pt_frames;
1847
1848
1849 init_level4_pgt[0] = __pgd(0);
1850
1851 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1852
1853
1854
1855 convert_pfn_mfn(init_level4_pgt);
1856
1857
1858 convert_pfn_mfn(level3_ident_pgt);
1859
1860
1861 convert_pfn_mfn(level3_kernel_pgt);
1862
1863
1864 convert_pfn_mfn(level2_fixmap_pgt);
1865 }
1866
1867 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1868 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1869
1870 addr[0] = (unsigned long)pgd;
1871 addr[1] = (unsigned long)l3;
1872 addr[2] = (unsigned long)l2;
1873
1874
1875
1876
1877
1878
1879 copy_page(level2_ident_pgt, l2);
1880
1881 copy_page(level2_kernel_pgt, l2);
1882
1883 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1884
1885 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1886 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1887 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1888 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1889 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1890 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1891 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1892 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
1893
1894
1895 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1896 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1897
1898
1899 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1900
1901
1902
1903
1904
1905
1906 xen_mc_batch();
1907 __xen_write_cr3(true, __pa(init_level4_pgt));
1908 xen_mc_issue(PARAVIRT_LAZY_CPU);
1909 } else
1910 native_write_cr3(__pa(init_level4_pgt));
1911
1912
1913
1914
1915
1916
1917
1918 for (i = 0; i < ARRAY_SIZE(addr); i++)
1919 check_pt_base(&pt_base, &pt_end, addr[i]);
1920
1921
1922 memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
1923
1924 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
1925}
1926#else
1927static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
1928static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
1929
1930static void __init xen_write_cr3_init(unsigned long cr3)
1931{
1932 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
1933
1934 BUG_ON(read_cr3_pa() != __pa(initial_page_table));
1935 BUG_ON(cr3 != __pa(swapper_pg_dir));
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947 swapper_kernel_pmd =
1948 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1949 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
1950 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
1951 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
1952 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
1953
1954 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
1955 xen_write_cr3(cr3);
1956 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
1957
1958 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
1959 PFN_DOWN(__pa(initial_page_table)));
1960 set_page_prot(initial_page_table, PAGE_KERNEL);
1961 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
1962
1963 pv_mmu_ops.write_cr3 = &xen_write_cr3;
1964}
1965
1966void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1967{
1968 pmd_t *kernel_pmd;
1969
1970 initial_kernel_pmd =
1971 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
1972
1973 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
1974 xen_start_info->nr_pt_frames * PAGE_SIZE +
1975 512*1024);
1976
1977 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
1978 copy_page(initial_kernel_pmd, kernel_pmd);
1979
1980 xen_map_identity_early(initial_kernel_pmd, max_pfn);
1981
1982 copy_page(initial_page_table, pgd);
1983 initial_page_table[KERNEL_PGD_BOUNDARY] =
1984 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
1985
1986 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
1987 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
1988 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
1989
1990 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1991
1992 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
1993 PFN_DOWN(__pa(initial_page_table)));
1994 xen_write_cr3(__pa(initial_page_table));
1995
1996 memblock_reserve(__pa(xen_start_info->pt_base),
1997 xen_start_info->nr_pt_frames * PAGE_SIZE);
1998}
1999#endif
2000
2001static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
2002
2003static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2004{
2005 pte_t pte;
2006
2007 phys >>= PAGE_SHIFT;
2008
2009 switch (idx) {
2010 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2011 case FIX_RO_IDT:
2012#ifdef CONFIG_X86_32
2013 case FIX_WP_TEST:
2014 case FIX_VDSO:
2015# ifdef CONFIG_HIGHMEM
2016 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2017# endif
2018#else
2019 case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
2020 case VVAR_PAGE:
2021#endif
2022 case FIX_TEXT_POKE0:
2023 case FIX_TEXT_POKE1:
2024
2025 pte = pfn_pte(phys, prot);
2026 break;
2027
2028#ifdef CONFIG_X86_LOCAL_APIC
2029 case FIX_APIC_BASE:
2030 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2031 break;
2032#endif
2033
2034#ifdef CONFIG_X86_IO_APIC
2035 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2036
2037
2038
2039
2040 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2041 break;
2042#endif
2043
2044 case FIX_PARAVIRT_BOOTMAP:
2045
2046
2047 pte = mfn_pte(phys, prot);
2048 break;
2049
2050 default:
2051
2052 pte = mfn_pte(phys, prot);
2053 break;
2054 }
2055
2056 __native_set_fixmap(idx, pte);
2057
2058#ifdef CONFIG_X86_64
2059
2060
2061 if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
2062 idx == VVAR_PAGE) {
2063 unsigned long vaddr = __fix_to_virt(idx);
2064 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2065 }
2066#endif
2067}
2068
2069static void __init xen_post_allocator_init(void)
2070{
2071 if (xen_feature(XENFEAT_auto_translated_physmap))
2072 return;
2073
2074 pv_mmu_ops.set_pte = xen_set_pte;
2075 pv_mmu_ops.set_pmd = xen_set_pmd;
2076 pv_mmu_ops.set_pud = xen_set_pud;
2077#if PAGETABLE_LEVELS == 4
2078 pv_mmu_ops.set_pgd = xen_set_pgd;
2079#endif
2080
2081
2082
2083 pv_mmu_ops.alloc_pte = xen_alloc_pte;
2084 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
2085 pv_mmu_ops.release_pte = xen_release_pte;
2086 pv_mmu_ops.release_pmd = xen_release_pmd;
2087#if PAGETABLE_LEVELS == 4
2088 pv_mmu_ops.alloc_pud = xen_alloc_pud;
2089 pv_mmu_ops.release_pud = xen_release_pud;
2090#endif
2091
2092#ifdef CONFIG_X86_64
2093 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2094 SetPagePinned(virt_to_page(level3_user_vsyscall));
2095#endif
2096 xen_mark_init_mm_pinned();
2097}
2098
2099static void xen_leave_lazy_mmu(void)
2100{
2101 preempt_disable();
2102 xen_mc_flush();
2103 paravirt_leave_lazy_mmu();
2104 preempt_enable();
2105}
2106
2107static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2108 .read_cr2 = xen_read_cr2,
2109 .write_cr2 = xen_write_cr2,
2110
2111 .read_cr3 = xen_read_cr3,
2112 .write_cr3 = xen_write_cr3_init,
2113
2114 .flush_tlb_user = xen_flush_tlb,
2115 .flush_tlb_kernel = xen_flush_tlb,
2116 .flush_tlb_single = xen_flush_tlb_single,
2117 .flush_tlb_others = xen_flush_tlb_others,
2118
2119 .pte_update = paravirt_nop,
2120
2121 .pgd_alloc = xen_pgd_alloc,
2122 .pgd_free = xen_pgd_free,
2123
2124 .alloc_pte = xen_alloc_pte_init,
2125 .release_pte = xen_release_pte_init,
2126 .alloc_pmd = xen_alloc_pmd_init,
2127 .release_pmd = xen_release_pmd_init,
2128
2129 .set_pte = xen_set_pte_init,
2130 .set_pte_at = xen_set_pte_at,
2131 .set_pmd = xen_set_pmd_hyper,
2132
2133 .ptep_modify_prot_start = __ptep_modify_prot_start,
2134 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
2135
2136 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
2137 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2138
2139 .make_pte = PV_CALLEE_SAVE(xen_make_pte),
2140 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2141
2142#ifdef CONFIG_X86_PAE
2143 .set_pte_atomic = xen_set_pte_atomic,
2144 .pte_clear = xen_pte_clear,
2145 .pmd_clear = xen_pmd_clear,
2146#endif
2147 .set_pud = xen_set_pud_hyper,
2148
2149 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2150 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2151
2152#if PAGETABLE_LEVELS == 4
2153 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2154 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2155 .set_pgd = xen_set_pgd_hyper,
2156
2157 .alloc_pud = xen_alloc_pmd_init,
2158 .release_pud = xen_release_pmd_init,
2159#endif
2160
2161 .activate_mm = xen_activate_mm,
2162 .dup_mmap = xen_dup_mmap,
2163 .exit_mmap = xen_exit_mmap,
2164
2165 .lazy_mode = {
2166 .enter = paravirt_enter_lazy_mmu,
2167 .leave = xen_leave_lazy_mmu,
2168 .flush = paravirt_flush_lazy_mmu,
2169 },
2170
2171 .set_fixmap = xen_set_fixmap,
2172};
2173
2174void __init xen_init_mmu_ops(void)
2175{
2176 x86_init.paging.pagetable_init = xen_pagetable_init;
2177 pv_mmu_ops = xen_mmu_ops;
2178
2179 memset(dummy_mapping, 0xff, PAGE_SIZE);
2180}
2181
2182
2183#define MAX_CONTIG_ORDER 9
2184static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2185
2186#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2187static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2188 unsigned long *in_frames,
2189 unsigned long *out_frames)
2190{
2191 int i;
2192 struct multicall_space mcs;
2193
2194 xen_mc_batch();
2195 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2196 mcs = __xen_mc_entry(0);
2197
2198 if (in_frames)
2199 in_frames[i] = virt_to_mfn(vaddr);
2200
2201 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2202 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2203
2204 if (out_frames)
2205 out_frames[i] = virt_to_pfn(vaddr);
2206 }
2207 xen_mc_issue(0);
2208}
2209
2210
2211
2212
2213
2214
2215static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2216 unsigned long *mfns,
2217 unsigned long first_mfn)
2218{
2219 unsigned i, limit;
2220 unsigned long mfn;
2221
2222 xen_mc_batch();
2223
2224 limit = 1u << order;
2225 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2226 struct multicall_space mcs;
2227 unsigned flags;
2228
2229 mcs = __xen_mc_entry(0);
2230 if (mfns)
2231 mfn = mfns[i];
2232 else
2233 mfn = first_mfn + i;
2234
2235 if (i < (limit - 1))
2236 flags = 0;
2237 else {
2238 if (order == 0)
2239 flags = UVMF_INVLPG | UVMF_ALL;
2240 else
2241 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2242 }
2243
2244 MULTI_update_va_mapping(mcs.mc, vaddr,
2245 mfn_pte(mfn, PAGE_KERNEL), flags);
2246
2247 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2248 }
2249
2250 xen_mc_issue(0);
2251}
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2262 unsigned long *pfns_in,
2263 unsigned long extents_out,
2264 unsigned int order_out,
2265 unsigned long *mfns_out,
2266 unsigned int address_bits)
2267{
2268 long rc;
2269 int success;
2270
2271 struct xen_memory_exchange exchange = {
2272 .in = {
2273 .nr_extents = extents_in,
2274 .extent_order = order_in,
2275 .extent_start = pfns_in,
2276 .domid = DOMID_SELF
2277 },
2278 .out = {
2279 .nr_extents = extents_out,
2280 .extent_order = order_out,
2281 .extent_start = mfns_out,
2282 .address_bits = address_bits,
2283 .domid = DOMID_SELF
2284 }
2285 };
2286
2287 BUG_ON(extents_in << order_in != extents_out << order_out);
2288
2289 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2290 success = (exchange.nr_exchanged == extents_in);
2291
2292 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2293 BUG_ON(success && (rc != 0));
2294
2295 return success;
2296}
2297
2298int xen_create_contiguous_region(unsigned long vstart, unsigned int order,
2299 unsigned int address_bits)
2300{
2301 unsigned long *in_frames = discontig_frames, out_frame;
2302 unsigned long flags;
2303 int success;
2304
2305
2306
2307
2308
2309
2310
2311 if (xen_feature(XENFEAT_auto_translated_physmap))
2312 return 0;
2313
2314 if (unlikely(order > MAX_CONTIG_ORDER))
2315 return -ENOMEM;
2316
2317 memset((void *) vstart, 0, PAGE_SIZE << order);
2318
2319 spin_lock_irqsave(&xen_reservation_lock, flags);
2320
2321
2322 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2323
2324
2325 out_frame = virt_to_pfn(vstart);
2326 success = xen_exchange_memory(1UL << order, 0, in_frames,
2327 1, order, &out_frame,
2328 address_bits);
2329
2330
2331 if (success)
2332 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2333 else
2334 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2335
2336 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2337
2338 return success ? 0 : -ENOMEM;
2339}
2340EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2341
2342void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
2343{
2344 unsigned long *out_frames = discontig_frames, in_frame;
2345 unsigned long flags;
2346 int success;
2347
2348 if (xen_feature(XENFEAT_auto_translated_physmap))
2349 return;
2350
2351 if (unlikely(order > MAX_CONTIG_ORDER))
2352 return;
2353
2354 memset((void *) vstart, 0, PAGE_SIZE << order);
2355
2356 spin_lock_irqsave(&xen_reservation_lock, flags);
2357
2358
2359 in_frame = virt_to_mfn(vstart);
2360
2361
2362 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2363
2364
2365 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2366 0, out_frames, 0);
2367
2368
2369 if (success)
2370 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2371 else
2372 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2373
2374 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2375}
2376EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2377
2378#ifdef CONFIG_XEN_PVHVM
2379#ifdef CONFIG_PROC_VMCORE
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390static int xen_oldmem_pfn_is_ram(unsigned long pfn)
2391{
2392 struct xen_hvm_get_mem_type a = {
2393 .domid = DOMID_SELF,
2394 .pfn = pfn,
2395 };
2396 int ram;
2397
2398 if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
2399 return -ENXIO;
2400
2401 switch (a.mem_type) {
2402 case HVMMEM_mmio_dm:
2403 ram = 0;
2404 break;
2405 case HVMMEM_ram_rw:
2406 case HVMMEM_ram_ro:
2407 default:
2408 ram = 1;
2409 break;
2410 }
2411
2412 return ram;
2413}
2414#endif
2415
2416static void xen_hvm_exit_mmap(struct mm_struct *mm)
2417{
2418 struct xen_hvm_pagetable_dying a;
2419 int rc;
2420
2421 a.domid = DOMID_SELF;
2422 a.gpa = __pa(mm->pgd);
2423 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2424 WARN_ON_ONCE(rc < 0);
2425}
2426
2427static int is_pagetable_dying_supported(void)
2428{
2429 struct xen_hvm_pagetable_dying a;
2430 int rc = 0;
2431
2432 a.domid = DOMID_SELF;
2433 a.gpa = 0x00;
2434 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2435 if (rc < 0) {
2436 printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
2437 return 0;
2438 }
2439 return 1;
2440}
2441
2442void __init xen_hvm_init_mmu_ops(void)
2443{
2444 if (is_pagetable_dying_supported())
2445 pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
2446#ifdef CONFIG_PROC_VMCORE
2447 WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
2448#endif
2449}
2450#endif
2451
2452#define REMAP_BATCH_SIZE 16
2453
2454struct remap_data {
2455 unsigned long mfn;
2456 pgprot_t prot;
2457 struct mmu_update *mmu_update;
2458};
2459
2460static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2461 unsigned long addr, void *data)
2462{
2463 struct remap_data *rmd = data;
2464 pte_t pte = pte_mkspecial(mfn_pte(rmd->mfn++, rmd->prot));
2465
2466 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2467 rmd->mmu_update->val = pte_val_ma(pte);
2468 rmd->mmu_update++;
2469
2470 return 0;
2471}
2472
2473int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
2474 unsigned long addr,
2475 xen_pfn_t mfn, int nr,
2476 pgprot_t prot, unsigned domid,
2477 struct page **pages)
2478
2479{
2480 struct remap_data rmd;
2481 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2482 int batch;
2483 unsigned long range;
2484 int err = 0;
2485
2486 if (xen_feature(XENFEAT_auto_translated_physmap))
2487 return -EINVAL;
2488
2489 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2490
2491 rmd.mfn = mfn;
2492 rmd.prot = prot;
2493
2494 while (nr) {
2495 batch = min(REMAP_BATCH_SIZE, nr);
2496 range = (unsigned long)batch << PAGE_SHIFT;
2497
2498 rmd.mmu_update = mmu_update;
2499 err = apply_to_page_range(vma->vm_mm, addr, range,
2500 remap_area_mfn_pte_fn, &rmd);
2501 if (err)
2502 goto out;
2503
2504 err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
2505 if (err < 0)
2506 goto out;
2507
2508 nr -= batch;
2509 addr += range;
2510 }
2511
2512 err = 0;
2513out:
2514
2515 xen_flush_tlb_all();
2516
2517 return err;
2518}
2519EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
2520
2521
2522int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
2523 int numpgs, struct page **pages)
2524{
2525 if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
2526 return 0;
2527
2528 return -EINVAL;
2529}
2530EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
2531