1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/sched.h>
42#include <linux/highmem.h>
43#include <linux/debugfs.h>
44#include <linux/bug.h>
45#include <linux/vmalloc.h>
46#include <linux/module.h>
47#include <linux/gfp.h>
48#include <linux/memblock.h>
49#include <linux/seq_file.h>
50#include <linux/crash_dump.h>
51
52#include <trace/events/xen.h>
53
54#include <asm/pgtable.h>
55#include <asm/tlbflush.h>
56#include <asm/fixmap.h>
57#include <asm/mmu_context.h>
58#include <asm/setup.h>
59#include <asm/paravirt.h>
60#include <asm/e820.h>
61#include <asm/linkage.h>
62#include <asm/page.h>
63#include <asm/init.h>
64#include <asm/pat.h>
65#include <asm/smp.h>
66
67#include <asm/xen/hypercall.h>
68#include <asm/xen/hypervisor.h>
69
70#include <xen/xen.h>
71#include <xen/page.h>
72#include <xen/interface/xen.h>
73#include <xen/interface/hvm/hvm_op.h>
74#include <xen/interface/version.h>
75#include <xen/interface/memory.h>
76#include <xen/hvc-console.h>
77
78#include "multicalls.h"
79#include "mmu.h"
80#include "debugfs.h"
81
82
83
84
85
86DEFINE_SPINLOCK(xen_reservation_lock);
87
88#ifdef CONFIG_X86_32
89
90
91
92
93
94#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
95static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
96#endif
97#ifdef CONFIG_X86_64
98
99static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
100#endif
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116DEFINE_PER_CPU(unsigned long, xen_cr3);
117DEFINE_PER_CPU(unsigned long, xen_current_cr3);
118
119static phys_addr_t xen_pt_base, xen_pt_size __initdata;
120
121
122
123
124
125#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
126
127unsigned long arbitrary_virt_to_mfn(void *vaddr)
128{
129 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
130
131 return PFN_DOWN(maddr.maddr);
132}
133
134xmaddr_t arbitrary_virt_to_machine(void *vaddr)
135{
136 unsigned long address = (unsigned long)vaddr;
137 unsigned int level;
138 pte_t *pte;
139 unsigned offset;
140
141
142
143
144
145 if (virt_addr_valid(vaddr))
146 return virt_to_machine(vaddr);
147
148
149
150 pte = lookup_address(address, &level);
151 BUG_ON(pte == NULL);
152 offset = address & ~PAGE_MASK;
153 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
154}
155EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
156
157void make_lowmem_page_readonly(void *vaddr)
158{
159 pte_t *pte, ptev;
160 unsigned long address = (unsigned long)vaddr;
161 unsigned int level;
162
163 pte = lookup_address(address, &level);
164 if (pte == NULL)
165 return;
166
167 ptev = pte_wrprotect(*pte);
168
169 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
170 BUG();
171}
172
173void make_lowmem_page_readwrite(void *vaddr)
174{
175 pte_t *pte, ptev;
176 unsigned long address = (unsigned long)vaddr;
177 unsigned int level;
178
179 pte = lookup_address(address, &level);
180 if (pte == NULL)
181 return;
182
183 ptev = pte_mkwrite(*pte);
184
185 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
186 BUG();
187}
188
189
190static bool xen_page_pinned(void *ptr)
191{
192 struct page *page = virt_to_page(ptr);
193
194 return PagePinned(page);
195}
196
197void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
198{
199 struct multicall_space mcs;
200 struct mmu_update *u;
201
202 trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
203
204 mcs = xen_mc_entry(sizeof(*u));
205 u = mcs.args;
206
207
208 u->ptr = virt_to_machine(ptep).maddr;
209 u->val = pte_val_ma(pteval);
210
211 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
212
213 xen_mc_issue(PARAVIRT_LAZY_MMU);
214}
215EXPORT_SYMBOL_GPL(xen_set_domain_pte);
216
217static void xen_extend_mmu_update(const struct mmu_update *update)
218{
219 struct multicall_space mcs;
220 struct mmu_update *u;
221
222 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
223
224 if (mcs.mc != NULL) {
225 mcs.mc->args[1]++;
226 } else {
227 mcs = __xen_mc_entry(sizeof(*u));
228 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
229 }
230
231 u = mcs.args;
232 *u = *update;
233}
234
235static void xen_extend_mmuext_op(const struct mmuext_op *op)
236{
237 struct multicall_space mcs;
238 struct mmuext_op *u;
239
240 mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
241
242 if (mcs.mc != NULL) {
243 mcs.mc->args[1]++;
244 } else {
245 mcs = __xen_mc_entry(sizeof(*u));
246 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
247 }
248
249 u = mcs.args;
250 *u = *op;
251}
252
253static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
254{
255 struct mmu_update u;
256
257 preempt_disable();
258
259 xen_mc_batch();
260
261
262 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
263 u.val = pmd_val_ma(val);
264 xen_extend_mmu_update(&u);
265
266 xen_mc_issue(PARAVIRT_LAZY_MMU);
267
268 preempt_enable();
269}
270
271static void xen_set_pmd(pmd_t *ptr, pmd_t val)
272{
273 trace_xen_mmu_set_pmd(ptr, val);
274
275
276
277 if (!xen_page_pinned(ptr)) {
278 *ptr = val;
279 return;
280 }
281
282 xen_set_pmd_hyper(ptr, val);
283}
284
285
286
287
288
289void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
290{
291 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
292}
293
294static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
295{
296 struct mmu_update u;
297
298 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
299 return false;
300
301 xen_mc_batch();
302
303 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
304 u.val = pte_val_ma(pteval);
305 xen_extend_mmu_update(&u);
306
307 xen_mc_issue(PARAVIRT_LAZY_MMU);
308
309 return true;
310}
311
312static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
313{
314 if (!xen_batched_set_pte(ptep, pteval)) {
315
316
317
318
319
320
321
322 struct mmu_update u;
323
324 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
325 u.val = pte_val_ma(pteval);
326 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
327 }
328}
329
330static void xen_set_pte(pte_t *ptep, pte_t pteval)
331{
332 trace_xen_mmu_set_pte(ptep, pteval);
333 __xen_set_pte(ptep, pteval);
334}
335
336static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
337 pte_t *ptep, pte_t pteval)
338{
339 trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
340 __xen_set_pte(ptep, pteval);
341}
342
343pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
344 unsigned long addr, pte_t *ptep)
345{
346
347 trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
348 return *ptep;
349}
350
351void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
352 pte_t *ptep, pte_t pte)
353{
354 struct mmu_update u;
355
356 trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
357 xen_mc_batch();
358
359 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
360 u.val = pte_val_ma(pte);
361 xen_extend_mmu_update(&u);
362
363 xen_mc_issue(PARAVIRT_LAZY_MMU);
364}
365
366
367static pteval_t pte_mfn_to_pfn(pteval_t val)
368{
369 if (val & _PAGE_PRESENT) {
370 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
371 unsigned long pfn = mfn_to_pfn(mfn);
372
373 pteval_t flags = val & PTE_FLAGS_MASK;
374 if (unlikely(pfn == ~0))
375 val = flags & ~_PAGE_PRESENT;
376 else
377 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
378 }
379
380 return val;
381}
382
383static pteval_t pte_pfn_to_mfn(pteval_t val)
384{
385 if (val & _PAGE_PRESENT) {
386 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
387 pteval_t flags = val & PTE_FLAGS_MASK;
388 unsigned long mfn;
389
390 if (!xen_feature(XENFEAT_auto_translated_physmap))
391 mfn = __pfn_to_mfn(pfn);
392 else
393 mfn = pfn;
394
395
396
397
398
399
400 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
401 mfn = 0;
402 flags = 0;
403 } else
404 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
405 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
406 }
407
408 return val;
409}
410
411__visible pteval_t xen_pte_val(pte_t pte)
412{
413 pteval_t pteval = pte.pte;
414
415 return pte_mfn_to_pfn(pteval);
416}
417PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
418
419__visible pgdval_t xen_pgd_val(pgd_t pgd)
420{
421 return pte_mfn_to_pfn(pgd.pgd);
422}
423PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
424
425__visible pte_t xen_make_pte(pteval_t pte)
426{
427 pte = pte_pfn_to_mfn(pte);
428
429 return native_make_pte(pte);
430}
431PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
432
433__visible pgd_t xen_make_pgd(pgdval_t pgd)
434{
435 pgd = pte_pfn_to_mfn(pgd);
436 return native_make_pgd(pgd);
437}
438PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
439
440__visible pmdval_t xen_pmd_val(pmd_t pmd)
441{
442 return pte_mfn_to_pfn(pmd.pmd);
443}
444PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
445
446static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
447{
448 struct mmu_update u;
449
450 preempt_disable();
451
452 xen_mc_batch();
453
454
455 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
456 u.val = pud_val_ma(val);
457 xen_extend_mmu_update(&u);
458
459 xen_mc_issue(PARAVIRT_LAZY_MMU);
460
461 preempt_enable();
462}
463
464static void xen_set_pud(pud_t *ptr, pud_t val)
465{
466 trace_xen_mmu_set_pud(ptr, val);
467
468
469
470 if (!xen_page_pinned(ptr)) {
471 *ptr = val;
472 return;
473 }
474
475 xen_set_pud_hyper(ptr, val);
476}
477
478#ifdef CONFIG_X86_PAE
479static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
480{
481 trace_xen_mmu_set_pte_atomic(ptep, pte);
482 set_64bit((u64 *)ptep, native_pte_val(pte));
483}
484
485static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
486{
487 trace_xen_mmu_pte_clear(mm, addr, ptep);
488 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
489 native_pte_clear(mm, addr, ptep);
490}
491
492static void xen_pmd_clear(pmd_t *pmdp)
493{
494 trace_xen_mmu_pmd_clear(pmdp);
495 set_pmd(pmdp, __pmd(0));
496}
497#endif
498
499__visible pmd_t xen_make_pmd(pmdval_t pmd)
500{
501 pmd = pte_pfn_to_mfn(pmd);
502 return native_make_pmd(pmd);
503}
504PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
505
506#if CONFIG_PGTABLE_LEVELS == 4
507__visible pudval_t xen_pud_val(pud_t pud)
508{
509 return pte_mfn_to_pfn(pud.pud);
510}
511PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
512
513__visible pud_t xen_make_pud(pudval_t pud)
514{
515 pud = pte_pfn_to_mfn(pud);
516
517 return native_make_pud(pud);
518}
519PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
520
521static pgd_t *xen_get_user_pgd(pgd_t *pgd)
522{
523 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
524 unsigned offset = pgd - pgd_page;
525 pgd_t *user_ptr = NULL;
526
527 if (offset < pgd_index(USER_LIMIT)) {
528 struct page *page = virt_to_page(pgd_page);
529 user_ptr = (pgd_t *)page->private;
530 if (user_ptr)
531 user_ptr += offset;
532 }
533
534 return user_ptr;
535}
536
537static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
538{
539 struct mmu_update u;
540
541 u.ptr = virt_to_machine(ptr).maddr;
542 u.val = pgd_val_ma(val);
543 xen_extend_mmu_update(&u);
544}
545
546
547
548
549
550
551
552
553static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
554{
555 preempt_disable();
556
557 xen_mc_batch();
558
559 __xen_set_pgd_hyper(ptr, val);
560
561 xen_mc_issue(PARAVIRT_LAZY_MMU);
562
563 preempt_enable();
564}
565
566static void xen_set_pgd(pgd_t *ptr, pgd_t val)
567{
568 pgd_t *user_ptr = xen_get_user_pgd(ptr);
569
570 trace_xen_mmu_set_pgd(ptr, user_ptr, val);
571
572
573
574 if (!xen_page_pinned(ptr)) {
575 *ptr = val;
576 if (user_ptr) {
577 WARN_ON(xen_page_pinned(user_ptr));
578 *user_ptr = val;
579 }
580 return;
581 }
582
583
584
585 xen_mc_batch();
586
587 __xen_set_pgd_hyper(ptr, val);
588 if (user_ptr)
589 __xen_set_pgd_hyper(user_ptr, val);
590
591 xen_mc_issue(PARAVIRT_LAZY_MMU);
592}
593#endif
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
611 int (*func)(struct mm_struct *mm, struct page *,
612 enum pt_level),
613 unsigned long limit)
614{
615 int flush = 0;
616 unsigned hole_low, hole_high;
617 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
618 unsigned pgdidx, pudidx, pmdidx;
619
620
621 limit--;
622 BUG_ON(limit >= FIXADDR_TOP);
623
624 if (xen_feature(XENFEAT_auto_translated_physmap))
625 return 0;
626
627
628
629
630
631
632 hole_low = pgd_index(USER_LIMIT);
633 hole_high = pgd_index(PAGE_OFFSET);
634
635 pgdidx_limit = pgd_index(limit);
636#if PTRS_PER_PUD > 1
637 pudidx_limit = pud_index(limit);
638#else
639 pudidx_limit = 0;
640#endif
641#if PTRS_PER_PMD > 1
642 pmdidx_limit = pmd_index(limit);
643#else
644 pmdidx_limit = 0;
645#endif
646
647 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
648 pud_t *pud;
649
650 if (pgdidx >= hole_low && pgdidx < hole_high)
651 continue;
652
653 if (!pgd_val(pgd[pgdidx]))
654 continue;
655
656 pud = pud_offset(&pgd[pgdidx], 0);
657
658 if (PTRS_PER_PUD > 1)
659 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
660
661 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
662 pmd_t *pmd;
663
664 if (pgdidx == pgdidx_limit &&
665 pudidx > pudidx_limit)
666 goto out;
667
668 if (pud_none(pud[pudidx]))
669 continue;
670
671 pmd = pmd_offset(&pud[pudidx], 0);
672
673 if (PTRS_PER_PMD > 1)
674 flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
675
676 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
677 struct page *pte;
678
679 if (pgdidx == pgdidx_limit &&
680 pudidx == pudidx_limit &&
681 pmdidx > pmdidx_limit)
682 goto out;
683
684 if (pmd_none(pmd[pmdidx]))
685 continue;
686
687 pte = pmd_page(pmd[pmdidx]);
688 flush |= (*func)(mm, pte, PT_PTE);
689 }
690 }
691 }
692
693out:
694
695
696 flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
697
698 return flush;
699}
700
701static int xen_pgd_walk(struct mm_struct *mm,
702 int (*func)(struct mm_struct *mm, struct page *,
703 enum pt_level),
704 unsigned long limit)
705{
706 return __xen_pgd_walk(mm, mm->pgd, func, limit);
707}
708
709
710
711static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
712{
713 spinlock_t *ptl = NULL;
714
715#if USE_SPLIT_PTE_PTLOCKS
716 ptl = ptlock_ptr(page);
717 spin_lock_nest_lock(ptl, &mm->page_table_lock);
718#endif
719
720 return ptl;
721}
722
723static void xen_pte_unlock(void *v)
724{
725 spinlock_t *ptl = v;
726 spin_unlock(ptl);
727}
728
729static void xen_do_pin(unsigned level, unsigned long pfn)
730{
731 struct mmuext_op op;
732
733 op.cmd = level;
734 op.arg1.mfn = pfn_to_mfn(pfn);
735
736 xen_extend_mmuext_op(&op);
737}
738
739static int xen_pin_page(struct mm_struct *mm, struct page *page,
740 enum pt_level level)
741{
742 unsigned pgfl = TestSetPagePinned(page);
743 int flush;
744
745 if (pgfl)
746 flush = 0;
747 else if (PageHighMem(page))
748
749
750 flush = 1;
751 else {
752 void *pt = lowmem_page_address(page);
753 unsigned long pfn = page_to_pfn(page);
754 struct multicall_space mcs = __xen_mc_entry(0);
755 spinlock_t *ptl;
756
757 flush = 0;
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779 ptl = NULL;
780 if (level == PT_PTE)
781 ptl = xen_pte_lock(page, mm);
782
783 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
784 pfn_pte(pfn, PAGE_KERNEL_RO),
785 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
786
787 if (ptl) {
788 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
789
790
791
792 xen_mc_callback(xen_pte_unlock, ptl);
793 }
794 }
795
796 return flush;
797}
798
799
800
801
802static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
803{
804 trace_xen_mmu_pgd_pin(mm, pgd);
805
806 xen_mc_batch();
807
808 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
809
810 xen_mc_issue(0);
811
812 kmap_flush_unused();
813
814 xen_mc_batch();
815 }
816
817#ifdef CONFIG_X86_64
818 {
819 pgd_t *user_pgd = xen_get_user_pgd(pgd);
820
821 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
822
823 if (user_pgd) {
824 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
825 xen_do_pin(MMUEXT_PIN_L4_TABLE,
826 PFN_DOWN(__pa(user_pgd)));
827 }
828 }
829#else
830#ifdef CONFIG_X86_PAE
831
832 xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
833 PT_PMD);
834#endif
835 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
836#endif
837 xen_mc_issue(0);
838}
839
840static void xen_pgd_pin(struct mm_struct *mm)
841{
842 __xen_pgd_pin(mm, mm->pgd);
843}
844
845
846
847
848
849
850
851
852
853
854
855void xen_mm_pin_all(void)
856{
857 struct page *page;
858
859 spin_lock(&pgd_lock);
860
861 list_for_each_entry(page, &pgd_list, lru) {
862 if (!PagePinned(page)) {
863 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
864 SetPageSavePinned(page);
865 }
866 }
867
868 spin_unlock(&pgd_lock);
869}
870
871
872
873
874
875
876static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
877 enum pt_level level)
878{
879 SetPagePinned(page);
880 return 0;
881}
882
883static void __init xen_mark_init_mm_pinned(void)
884{
885 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
886}
887
888static int xen_unpin_page(struct mm_struct *mm, struct page *page,
889 enum pt_level level)
890{
891 unsigned pgfl = TestClearPagePinned(page);
892
893 if (pgfl && !PageHighMem(page)) {
894 void *pt = lowmem_page_address(page);
895 unsigned long pfn = page_to_pfn(page);
896 spinlock_t *ptl = NULL;
897 struct multicall_space mcs;
898
899
900
901
902
903
904
905
906 if (level == PT_PTE) {
907 ptl = xen_pte_lock(page, mm);
908
909 if (ptl)
910 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
911 }
912
913 mcs = __xen_mc_entry(0);
914
915 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
916 pfn_pte(pfn, PAGE_KERNEL),
917 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
918
919 if (ptl) {
920
921 xen_mc_callback(xen_pte_unlock, ptl);
922 }
923 }
924
925 return 0;
926}
927
928
929static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
930{
931 trace_xen_mmu_pgd_unpin(mm, pgd);
932
933 xen_mc_batch();
934
935 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
936
937#ifdef CONFIG_X86_64
938 {
939 pgd_t *user_pgd = xen_get_user_pgd(pgd);
940
941 if (user_pgd) {
942 xen_do_pin(MMUEXT_UNPIN_TABLE,
943 PFN_DOWN(__pa(user_pgd)));
944 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
945 }
946 }
947#endif
948
949#ifdef CONFIG_X86_PAE
950
951 xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
952 PT_PMD);
953#endif
954
955 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
956
957 xen_mc_issue(0);
958}
959
960static void xen_pgd_unpin(struct mm_struct *mm)
961{
962 __xen_pgd_unpin(mm, mm->pgd);
963}
964
965
966
967
968
969void xen_mm_unpin_all(void)
970{
971 struct page *page;
972
973 spin_lock(&pgd_lock);
974
975 list_for_each_entry(page, &pgd_list, lru) {
976 if (PageSavePinned(page)) {
977 BUG_ON(!PagePinned(page));
978 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
979 ClearPageSavePinned(page);
980 }
981 }
982
983 spin_unlock(&pgd_lock);
984}
985
986static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
987{
988 spin_lock(&next->page_table_lock);
989 xen_pgd_pin(next);
990 spin_unlock(&next->page_table_lock);
991}
992
993static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
994{
995 spin_lock(&mm->page_table_lock);
996 xen_pgd_pin(mm);
997 spin_unlock(&mm->page_table_lock);
998}
999
1000
1001#ifdef CONFIG_SMP
1002
1003
1004static void drop_other_mm_ref(void *info)
1005{
1006 struct mm_struct *mm = info;
1007 struct mm_struct *active_mm;
1008
1009 active_mm = this_cpu_read(cpu_tlbstate.active_mm);
1010
1011 if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1012 leave_mm(smp_processor_id());
1013
1014
1015
1016 if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
1017 load_cr3(swapper_pg_dir);
1018}
1019
1020static void xen_drop_mm_ref(struct mm_struct *mm)
1021{
1022 cpumask_var_t mask;
1023 unsigned cpu;
1024
1025 if (current->active_mm == mm) {
1026 if (current->mm == mm)
1027 load_cr3(swapper_pg_dir);
1028 else
1029 leave_mm(smp_processor_id());
1030 }
1031
1032
1033 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1034 for_each_online_cpu(cpu) {
1035 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
1036 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1037 continue;
1038 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1039 }
1040 return;
1041 }
1042 cpumask_copy(mask, mm_cpumask(mm));
1043
1044
1045
1046
1047
1048
1049 for_each_online_cpu(cpu) {
1050 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1051 cpumask_set_cpu(cpu, mask);
1052 }
1053
1054 if (!cpumask_empty(mask))
1055 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1056 free_cpumask_var(mask);
1057}
1058#else
1059static void xen_drop_mm_ref(struct mm_struct *mm)
1060{
1061 if (current->active_mm == mm)
1062 load_cr3(swapper_pg_dir);
1063}
1064#endif
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080static void xen_exit_mmap(struct mm_struct *mm)
1081{
1082 get_cpu();
1083 xen_drop_mm_ref(mm);
1084 put_cpu();
1085
1086 spin_lock(&mm->page_table_lock);
1087
1088
1089 if (xen_page_pinned(mm->pgd))
1090 xen_pgd_unpin(mm);
1091
1092 spin_unlock(&mm->page_table_lock);
1093}
1094
1095static void xen_post_allocator_init(void);
1096
1097static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1098{
1099 struct mmuext_op op;
1100
1101 op.cmd = cmd;
1102 op.arg1.mfn = pfn_to_mfn(pfn);
1103 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1104 BUG();
1105}
1106
1107#ifdef CONFIG_X86_64
1108static void __init xen_cleanhighmap(unsigned long vaddr,
1109 unsigned long vaddr_end)
1110{
1111 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1112 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1113
1114
1115
1116 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
1117 pmd++, vaddr += PMD_SIZE) {
1118 if (pmd_none(*pmd))
1119 continue;
1120 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1121 set_pmd(pmd, __pmd(0));
1122 }
1123
1124
1125 xen_mc_flush();
1126}
1127
1128
1129
1130
1131static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
1132{
1133 void *vaddr = __va(paddr);
1134 void *vaddr_end = vaddr + size;
1135
1136 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
1137 make_lowmem_page_readwrite(vaddr);
1138
1139 memblock_free(paddr, size);
1140}
1141
1142static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
1143{
1144 unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
1145
1146 if (unpin)
1147 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
1148 ClearPagePinned(virt_to_page(__va(pa)));
1149 xen_free_ro_pages(pa, PAGE_SIZE);
1150}
1151
1152
1153
1154
1155
1156static void __init xen_cleanmfnmap(unsigned long vaddr)
1157{
1158 unsigned long va = vaddr & PMD_MASK;
1159 unsigned long pa;
1160 pgd_t *pgd = pgd_offset_k(va);
1161 pud_t *pud_page = pud_offset(pgd, 0);
1162 pud_t *pud;
1163 pmd_t *pmd;
1164 pte_t *pte;
1165 unsigned int i;
1166 bool unpin;
1167
1168 unpin = (vaddr == 2 * PGDIR_SIZE);
1169 set_pgd(pgd, __pgd(0));
1170 do {
1171 pud = pud_page + pud_index(va);
1172 if (pud_none(*pud)) {
1173 va += PUD_SIZE;
1174 } else if (pud_large(*pud)) {
1175 pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
1176 xen_free_ro_pages(pa, PUD_SIZE);
1177 va += PUD_SIZE;
1178 } else {
1179 pmd = pmd_offset(pud, va);
1180 if (pmd_large(*pmd)) {
1181 pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
1182 xen_free_ro_pages(pa, PMD_SIZE);
1183 } else if (!pmd_none(*pmd)) {
1184 pte = pte_offset_kernel(pmd, va);
1185 set_pmd(pmd, __pmd(0));
1186 for (i = 0; i < PTRS_PER_PTE; ++i) {
1187 if (pte_none(pte[i]))
1188 break;
1189 pa = pte_pfn(pte[i]) << PAGE_SHIFT;
1190 xen_free_ro_pages(pa, PAGE_SIZE);
1191 }
1192 xen_cleanmfnmap_free_pgtbl(pte, unpin);
1193 }
1194 va += PMD_SIZE;
1195 if (pmd_index(va))
1196 continue;
1197 set_pud(pud, __pud(0));
1198 xen_cleanmfnmap_free_pgtbl(pmd, unpin);
1199 }
1200
1201 } while (pud_index(va) || pmd_index(va));
1202 xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
1203}
1204
1205static void __init xen_pagetable_p2m_free(void)
1206{
1207 unsigned long size;
1208 unsigned long addr;
1209
1210 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1211
1212
1213 if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1214 return;
1215
1216
1217 memset((void *)xen_start_info->mfn_list, 0xff, size);
1218
1219 addr = xen_start_info->mfn_list;
1220
1221
1222
1223
1224
1225
1226
1227
1228 size = roundup(size, PMD_SIZE);
1229
1230 if (addr >= __START_KERNEL_map) {
1231 xen_cleanhighmap(addr, addr + size);
1232 size = PAGE_ALIGN(xen_start_info->nr_pages *
1233 sizeof(unsigned long));
1234 memblock_free(__pa(addr), size);
1235 } else {
1236 xen_cleanmfnmap(addr);
1237 }
1238}
1239
1240static void __init xen_pagetable_cleanhighmap(void)
1241{
1242 unsigned long size;
1243 unsigned long addr;
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253 addr = xen_start_info->pt_base;
1254 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
1255
1256 xen_cleanhighmap(addr, addr + size);
1257 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1258#ifdef DEBUG
1259
1260
1261
1262 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1263#endif
1264}
1265#endif
1266
1267static void __init xen_pagetable_p2m_setup(void)
1268{
1269 if (xen_feature(XENFEAT_auto_translated_physmap))
1270 return;
1271
1272 xen_vmalloc_p2m_tree();
1273
1274#ifdef CONFIG_X86_64
1275 xen_pagetable_p2m_free();
1276
1277 xen_pagetable_cleanhighmap();
1278#endif
1279
1280 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1281}
1282
1283static void __init xen_pagetable_init(void)
1284{
1285 paging_init();
1286 xen_post_allocator_init();
1287
1288 xen_pagetable_p2m_setup();
1289
1290
1291 xen_build_mfn_list_list();
1292
1293
1294 if (!xen_feature(XENFEAT_auto_translated_physmap))
1295 xen_remap_memory();
1296
1297 xen_setup_shared_info();
1298}
1299static void xen_write_cr2(unsigned long cr2)
1300{
1301 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1302}
1303
1304static unsigned long xen_read_cr2(void)
1305{
1306 return this_cpu_read(xen_vcpu)->arch.cr2;
1307}
1308
1309unsigned long xen_read_cr2_direct(void)
1310{
1311 return this_cpu_read(xen_vcpu_info.arch.cr2);
1312}
1313
1314void xen_flush_tlb_all(void)
1315{
1316 struct mmuext_op *op;
1317 struct multicall_space mcs;
1318
1319 trace_xen_mmu_flush_tlb_all(0);
1320
1321 preempt_disable();
1322
1323 mcs = xen_mc_entry(sizeof(*op));
1324
1325 op = mcs.args;
1326 op->cmd = MMUEXT_TLB_FLUSH_ALL;
1327 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1328
1329 xen_mc_issue(PARAVIRT_LAZY_MMU);
1330
1331 preempt_enable();
1332}
1333static void xen_flush_tlb(void)
1334{
1335 struct mmuext_op *op;
1336 struct multicall_space mcs;
1337
1338 trace_xen_mmu_flush_tlb(0);
1339
1340 preempt_disable();
1341
1342 mcs = xen_mc_entry(sizeof(*op));
1343
1344 op = mcs.args;
1345 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1346 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1347
1348 xen_mc_issue(PARAVIRT_LAZY_MMU);
1349
1350 preempt_enable();
1351}
1352
1353static void xen_flush_tlb_single(unsigned long addr)
1354{
1355 struct mmuext_op *op;
1356 struct multicall_space mcs;
1357
1358 trace_xen_mmu_flush_tlb_single(addr);
1359
1360 preempt_disable();
1361
1362 mcs = xen_mc_entry(sizeof(*op));
1363 op = mcs.args;
1364 op->cmd = MMUEXT_INVLPG_LOCAL;
1365 op->arg1.linear_addr = addr & PAGE_MASK;
1366 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1367
1368 xen_mc_issue(PARAVIRT_LAZY_MMU);
1369
1370 preempt_enable();
1371}
1372
1373static void xen_flush_tlb_others(const struct cpumask *cpus,
1374 struct mm_struct *mm, unsigned long start,
1375 unsigned long end)
1376{
1377 struct {
1378 struct mmuext_op op;
1379#ifdef CONFIG_SMP
1380 DECLARE_BITMAP(mask, num_processors);
1381#else
1382 DECLARE_BITMAP(mask, NR_CPUS);
1383#endif
1384 } *args;
1385 struct multicall_space mcs;
1386
1387 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1388
1389 if (cpumask_empty(cpus))
1390 return;
1391
1392 mcs = xen_mc_entry(sizeof(*args));
1393 args = mcs.args;
1394 args->op.arg2.vcpumask = to_cpumask(args->mask);
1395
1396
1397 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1398 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1399
1400 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1401 if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1402 args->op.cmd = MMUEXT_INVLPG_MULTI;
1403 args->op.arg1.linear_addr = start;
1404 }
1405
1406 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1407
1408 xen_mc_issue(PARAVIRT_LAZY_MMU);
1409}
1410
1411static unsigned long xen_read_cr3(void)
1412{
1413 return this_cpu_read(xen_cr3);
1414}
1415
1416static void set_current_cr3(void *v)
1417{
1418 this_cpu_write(xen_current_cr3, (unsigned long)v);
1419}
1420
1421static void __xen_write_cr3(bool kernel, unsigned long cr3)
1422{
1423 struct mmuext_op op;
1424 unsigned long mfn;
1425
1426 trace_xen_mmu_write_cr3(kernel, cr3);
1427
1428 if (cr3)
1429 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1430 else
1431 mfn = 0;
1432
1433 WARN_ON(mfn == 0 && kernel);
1434
1435 op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1436 op.arg1.mfn = mfn;
1437
1438 xen_extend_mmuext_op(&op);
1439
1440 if (kernel) {
1441 this_cpu_write(xen_cr3, cr3);
1442
1443
1444
1445 xen_mc_callback(set_current_cr3, (void *)cr3);
1446 }
1447}
1448static void xen_write_cr3(unsigned long cr3)
1449{
1450 BUG_ON(preemptible());
1451
1452 xen_mc_batch();
1453
1454
1455
1456 this_cpu_write(xen_cr3, cr3);
1457
1458 __xen_write_cr3(true, cr3);
1459
1460#ifdef CONFIG_X86_64
1461 {
1462 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1463 if (user_pgd)
1464 __xen_write_cr3(false, __pa(user_pgd));
1465 else
1466 __xen_write_cr3(false, 0);
1467 }
1468#endif
1469
1470 xen_mc_issue(PARAVIRT_LAZY_CPU);
1471}
1472
1473#ifdef CONFIG_X86_64
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494static void __init xen_write_cr3_init(unsigned long cr3)
1495{
1496 BUG_ON(preemptible());
1497
1498 xen_mc_batch();
1499
1500
1501
1502 this_cpu_write(xen_cr3, cr3);
1503
1504 __xen_write_cr3(true, cr3);
1505
1506 xen_mc_issue(PARAVIRT_LAZY_CPU);
1507}
1508#endif
1509
1510static int xen_pgd_alloc(struct mm_struct *mm)
1511{
1512 pgd_t *pgd = mm->pgd;
1513 int ret = 0;
1514
1515 BUG_ON(PagePinned(virt_to_page(pgd)));
1516
1517#ifdef CONFIG_X86_64
1518 {
1519 struct page *page = virt_to_page(pgd);
1520 pgd_t *user_pgd;
1521
1522 BUG_ON(page->private != 0);
1523
1524 ret = -ENOMEM;
1525
1526 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1527 page->private = (unsigned long)user_pgd;
1528
1529 if (user_pgd != NULL) {
1530#ifdef CONFIG_X86_VSYSCALL_EMULATION
1531 user_pgd[pgd_index(VSYSCALL_ADDR)] =
1532 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1533#endif
1534 ret = 0;
1535 }
1536
1537 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1538 }
1539#endif
1540
1541 return ret;
1542}
1543
1544static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1545{
1546#ifdef CONFIG_X86_64
1547 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1548
1549 if (user_pgd)
1550 free_page((unsigned long)user_pgd);
1551#endif
1552}
1553
1554#ifdef CONFIG_X86_32
1555static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1556{
1557
1558 if (pte_val_ma(*ptep) & _PAGE_PRESENT)
1559 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1560 pte_val_ma(pte));
1561
1562 return pte;
1563}
1564#else
1565static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte)
1566{
1567 unsigned long pfn;
1568
1569 if (xen_feature(XENFEAT_writable_page_tables) ||
1570 xen_feature(XENFEAT_auto_translated_physmap) ||
1571 xen_start_info->mfn_list >= __START_KERNEL_map)
1572 return pte;
1573
1574
1575
1576
1577
1578
1579
1580 pfn = pte_pfn(pte);
1581 if (pfn >= xen_start_info->first_p2m_pfn &&
1582 pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
1583 pte = __pte_ma(pte_val_ma(pte) & ~_PAGE_RW);
1584
1585 return pte;
1586}
1587#endif
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1604{
1605 if (pte_mfn(pte) != INVALID_P2M_ENTRY)
1606 pte = mask_rw_pte(ptep, pte);
1607 else
1608 pte = __pte_ma(0);
1609
1610 native_set_pte(ptep, pte);
1611}
1612
1613
1614
1615static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1616{
1617#ifdef CONFIG_FLATMEM
1618 BUG_ON(mem_map);
1619#endif
1620 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1621 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1622}
1623
1624
1625static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1626{
1627#ifdef CONFIG_FLATMEM
1628 BUG_ON(mem_map);
1629#endif
1630 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1631}
1632
1633
1634
1635static void __init xen_release_pte_init(unsigned long pfn)
1636{
1637 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1638 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1639}
1640
1641static void __init xen_release_pmd_init(unsigned long pfn)
1642{
1643 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1644}
1645
1646static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1647{
1648 struct multicall_space mcs;
1649 struct mmuext_op *op;
1650
1651 mcs = __xen_mc_entry(sizeof(*op));
1652 op = mcs.args;
1653 op->cmd = cmd;
1654 op->arg1.mfn = pfn_to_mfn(pfn);
1655
1656 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1657}
1658
1659static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1660{
1661 struct multicall_space mcs;
1662 unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1663
1664 mcs = __xen_mc_entry(0);
1665 MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1666 pfn_pte(pfn, prot), 0);
1667}
1668
1669
1670
1671static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1672 unsigned level)
1673{
1674 bool pinned = PagePinned(virt_to_page(mm->pgd));
1675
1676 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1677
1678 if (pinned) {
1679 struct page *page = pfn_to_page(pfn);
1680
1681 SetPagePinned(page);
1682
1683 if (!PageHighMem(page)) {
1684 xen_mc_batch();
1685
1686 __set_pfn_prot(pfn, PAGE_KERNEL_RO);
1687
1688 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1689 __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1690
1691 xen_mc_issue(PARAVIRT_LAZY_MMU);
1692 } else {
1693
1694
1695 kmap_flush_unused();
1696 }
1697 }
1698}
1699
1700static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1701{
1702 xen_alloc_ptpage(mm, pfn, PT_PTE);
1703}
1704
1705static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1706{
1707 xen_alloc_ptpage(mm, pfn, PT_PMD);
1708}
1709
1710
1711static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1712{
1713 struct page *page = pfn_to_page(pfn);
1714 bool pinned = PagePinned(page);
1715
1716 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1717
1718 if (pinned) {
1719 if (!PageHighMem(page)) {
1720 xen_mc_batch();
1721
1722 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1723 __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1724
1725 __set_pfn_prot(pfn, PAGE_KERNEL);
1726
1727 xen_mc_issue(PARAVIRT_LAZY_MMU);
1728 }
1729 ClearPagePinned(page);
1730 }
1731}
1732
1733static void xen_release_pte(unsigned long pfn)
1734{
1735 xen_release_ptpage(pfn, PT_PTE);
1736}
1737
1738static void xen_release_pmd(unsigned long pfn)
1739{
1740 xen_release_ptpage(pfn, PT_PMD);
1741}
1742
1743#if CONFIG_PGTABLE_LEVELS == 4
1744static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1745{
1746 xen_alloc_ptpage(mm, pfn, PT_PUD);
1747}
1748
1749static void xen_release_pud(unsigned long pfn)
1750{
1751 xen_release_ptpage(pfn, PT_PUD);
1752}
1753#endif
1754
1755void __init xen_reserve_top(void)
1756{
1757#ifdef CONFIG_X86_32
1758 unsigned long top = HYPERVISOR_VIRT_START;
1759 struct xen_platform_parameters pp;
1760
1761 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1762 top = pp.virt_start;
1763
1764 reserve_top_address(-top);
1765#endif
1766}
1767
1768
1769
1770
1771
1772static void * __init __ka(phys_addr_t paddr)
1773{
1774#ifdef CONFIG_X86_64
1775 return (void *)(paddr + __START_KERNEL_map);
1776#else
1777 return __va(paddr);
1778#endif
1779}
1780
1781
1782static unsigned long __init m2p(phys_addr_t maddr)
1783{
1784 phys_addr_t paddr;
1785
1786 maddr &= PTE_PFN_MASK;
1787 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1788
1789 return paddr;
1790}
1791
1792
1793static void * __init m2v(phys_addr_t maddr)
1794{
1795 return __ka(m2p(maddr));
1796}
1797
1798
1799static void __init set_page_prot_flags(void *addr, pgprot_t prot,
1800 unsigned long flags)
1801{
1802 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1803 pte_t pte = pfn_pte(pfn, prot);
1804
1805
1806 if (xen_feature(XENFEAT_auto_translated_physmap))
1807 return;
1808
1809 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1810 BUG();
1811}
1812static void __init set_page_prot(void *addr, pgprot_t prot)
1813{
1814 return set_page_prot_flags(addr, prot, UVMF_NONE);
1815}
1816#ifdef CONFIG_X86_32
1817static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1818{
1819 unsigned pmdidx, pteidx;
1820 unsigned ident_pte;
1821 unsigned long pfn;
1822
1823 level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
1824 PAGE_SIZE);
1825
1826 ident_pte = 0;
1827 pfn = 0;
1828 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1829 pte_t *pte_page;
1830
1831
1832 if (pmd_present(pmd[pmdidx]))
1833 pte_page = m2v(pmd[pmdidx].pmd);
1834 else {
1835
1836 if (ident_pte == LEVEL1_IDENT_ENTRIES)
1837 break;
1838
1839 pte_page = &level1_ident_pgt[ident_pte];
1840 ident_pte += PTRS_PER_PTE;
1841
1842 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1843 }
1844
1845
1846 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1847 pte_t pte;
1848
1849 if (pfn > max_pfn_mapped)
1850 max_pfn_mapped = pfn;
1851
1852 if (!pte_none(pte_page[pteidx]))
1853 continue;
1854
1855 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1856 pte_page[pteidx] = pte;
1857 }
1858 }
1859
1860 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1861 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1862
1863 set_page_prot(pmd, PAGE_KERNEL_RO);
1864}
1865#endif
1866void __init xen_setup_machphys_mapping(void)
1867{
1868 struct xen_machphys_mapping mapping;
1869
1870 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1871 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1872 machine_to_phys_nr = mapping.max_mfn + 1;
1873 } else {
1874 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1875 }
1876#ifdef CONFIG_X86_32
1877 WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
1878 < machine_to_phys_mapping);
1879#endif
1880}
1881
1882#ifdef CONFIG_X86_64
1883static void __init convert_pfn_mfn(void *v)
1884{
1885 pte_t *pte = v;
1886 int i;
1887
1888
1889
1890 for (i = 0; i < PTRS_PER_PTE; i++)
1891 pte[i] = xen_make_pte(pte[i].pte);
1892}
1893static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1894 unsigned long addr)
1895{
1896 if (*pt_base == PFN_DOWN(__pa(addr))) {
1897 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1898 clear_page((void *)addr);
1899 (*pt_base)++;
1900 }
1901 if (*pt_end == PFN_DOWN(__pa(addr))) {
1902 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1903 clear_page((void *)addr);
1904 (*pt_end)--;
1905 }
1906}
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1919{
1920 pud_t *l3;
1921 pmd_t *l2;
1922 unsigned long addr[3];
1923 unsigned long pt_base, pt_end;
1924 unsigned i;
1925
1926
1927
1928
1929
1930 if (xen_start_info->mfn_list < __START_KERNEL_map)
1931 max_pfn_mapped = xen_start_info->first_p2m_pfn;
1932 else
1933 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1934
1935 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1936 pt_end = pt_base + xen_start_info->nr_pt_frames;
1937
1938
1939 init_level4_pgt[0] = __pgd(0);
1940
1941 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1942
1943
1944
1945 convert_pfn_mfn(init_level4_pgt);
1946
1947
1948 convert_pfn_mfn(level3_ident_pgt);
1949
1950
1951 convert_pfn_mfn(level3_kernel_pgt);
1952
1953
1954 convert_pfn_mfn(level2_fixmap_pgt);
1955 }
1956
1957 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1958 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1959
1960 addr[0] = (unsigned long)pgd;
1961 addr[1] = (unsigned long)l3;
1962 addr[2] = (unsigned long)l2;
1963
1964
1965
1966
1967
1968
1969 copy_page(level2_ident_pgt, l2);
1970
1971 copy_page(level2_kernel_pgt, l2);
1972
1973
1974 i = pgd_index(xen_start_info->mfn_list);
1975 if (i && i < pgd_index(__START_KERNEL_map))
1976 init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
1977
1978 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1979
1980 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1981 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1982 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1983 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1984 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1985 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1986 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1987 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
1988
1989
1990 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1991 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1992
1993
1994 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1995
1996
1997
1998
1999
2000
2001 xen_mc_batch();
2002 __xen_write_cr3(true, __pa(init_level4_pgt));
2003 xen_mc_issue(PARAVIRT_LAZY_CPU);
2004 } else
2005 native_write_cr3(__pa(init_level4_pgt));
2006
2007
2008
2009
2010
2011
2012
2013 for (i = 0; i < ARRAY_SIZE(addr); i++)
2014 check_pt_base(&pt_base, &pt_end, addr[i]);
2015
2016
2017 xen_pt_base = PFN_PHYS(pt_base);
2018 xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
2019 memblock_reserve(xen_pt_base, xen_pt_size);
2020
2021
2022 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
2023}
2024
2025
2026
2027
2028static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
2029{
2030 unsigned long *vaddr;
2031 unsigned long val;
2032
2033 vaddr = early_memremap_ro(addr, sizeof(val));
2034 val = *vaddr;
2035 early_memunmap(vaddr, sizeof(val));
2036 return val;
2037}
2038
2039
2040
2041
2042
2043static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
2044{
2045 phys_addr_t pa;
2046 pgd_t pgd;
2047 pud_t pud;
2048 pmd_t pmd;
2049 pte_t pte;
2050
2051 pa = read_cr3();
2052 pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
2053 sizeof(pgd)));
2054 if (!pgd_present(pgd))
2055 return 0;
2056
2057 pa = pgd_val(pgd) & PTE_PFN_MASK;
2058 pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
2059 sizeof(pud)));
2060 if (!pud_present(pud))
2061 return 0;
2062 pa = pud_pfn(pud) << PAGE_SHIFT;
2063 if (pud_large(pud))
2064 return pa + (vaddr & ~PUD_MASK);
2065
2066 pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
2067 sizeof(pmd)));
2068 if (!pmd_present(pmd))
2069 return 0;
2070 pa = pmd_pfn(pmd) << PAGE_SHIFT;
2071 if (pmd_large(pmd))
2072 return pa + (vaddr & ~PMD_MASK);
2073
2074 pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
2075 sizeof(pte)));
2076 if (!pte_present(pte))
2077 return 0;
2078 pa = pte_pfn(pte) << PAGE_SHIFT;
2079
2080 return pa | (vaddr & ~PAGE_MASK);
2081}
2082
2083
2084
2085
2086
2087void __init xen_relocate_p2m(void)
2088{
2089 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
2090 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
2091 int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
2092 pte_t *pt;
2093 pmd_t *pmd;
2094 pud_t *pud;
2095 pgd_t *pgd;
2096 unsigned long *new_p2m;
2097
2098 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
2099 n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
2100 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
2101 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
2102 n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
2103 n_frames = n_pte + n_pt + n_pmd + n_pud;
2104
2105 new_area = xen_find_free_area(PFN_PHYS(n_frames));
2106 if (!new_area) {
2107 xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
2108 BUG();
2109 }
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119 pud_phys = new_area;
2120 pmd_phys = pud_phys + PFN_PHYS(n_pud);
2121 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
2122 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
2123
2124 pgd = __va(read_cr3());
2125 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
2126 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
2127 pud = early_memremap(pud_phys, PAGE_SIZE);
2128 clear_page(pud);
2129 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
2130 idx_pmd++) {
2131 pmd = early_memremap(pmd_phys, PAGE_SIZE);
2132 clear_page(pmd);
2133 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
2134 idx_pt++) {
2135 pt = early_memremap(pt_phys, PAGE_SIZE);
2136 clear_page(pt);
2137 for (idx_pte = 0;
2138 idx_pte < min(n_pte, PTRS_PER_PTE);
2139 idx_pte++) {
2140 set_pte(pt + idx_pte,
2141 pfn_pte(p2m_pfn, PAGE_KERNEL));
2142 p2m_pfn++;
2143 }
2144 n_pte -= PTRS_PER_PTE;
2145 early_memunmap(pt, PAGE_SIZE);
2146 make_lowmem_page_readonly(__va(pt_phys));
2147 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2148 PFN_DOWN(pt_phys));
2149 set_pmd(pmd + idx_pt,
2150 __pmd(_PAGE_TABLE | pt_phys));
2151 pt_phys += PAGE_SIZE;
2152 }
2153 n_pt -= PTRS_PER_PMD;
2154 early_memunmap(pmd, PAGE_SIZE);
2155 make_lowmem_page_readonly(__va(pmd_phys));
2156 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
2157 PFN_DOWN(pmd_phys));
2158 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
2159 pmd_phys += PAGE_SIZE;
2160 }
2161 n_pmd -= PTRS_PER_PUD;
2162 early_memunmap(pud, PAGE_SIZE);
2163 make_lowmem_page_readonly(__va(pud_phys));
2164 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
2165 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2166 pud_phys += PAGE_SIZE;
2167 }
2168
2169
2170 memcpy(new_p2m, xen_p2m_addr, size);
2171 xen_p2m_addr = new_p2m;
2172
2173
2174 p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
2175 BUG_ON(!p2m_pfn);
2176 p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
2177
2178 if (xen_start_info->mfn_list < __START_KERNEL_map) {
2179 pfn = xen_start_info->first_p2m_pfn;
2180 pfn_end = xen_start_info->first_p2m_pfn +
2181 xen_start_info->nr_p2m_frames;
2182 set_pgd(pgd + 1, __pgd(0));
2183 } else {
2184 pfn = p2m_pfn;
2185 pfn_end = p2m_pfn_end;
2186 }
2187
2188 memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
2189 while (pfn < pfn_end) {
2190 if (pfn == p2m_pfn) {
2191 pfn = p2m_pfn_end;
2192 continue;
2193 }
2194 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
2195 pfn++;
2196 }
2197
2198 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
2199 xen_start_info->first_p2m_pfn = PFN_DOWN(new_area);
2200 xen_start_info->nr_p2m_frames = n_frames;
2201}
2202
2203#else
2204static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
2205static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
2206
2207static void __init xen_write_cr3_init(unsigned long cr3)
2208{
2209 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
2210
2211 BUG_ON(read_cr3() != __pa(initial_page_table));
2212 BUG_ON(cr3 != __pa(swapper_pg_dir));
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224 swapper_kernel_pmd =
2225 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2226 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
2227 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
2228 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
2229 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
2230
2231 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
2232 xen_write_cr3(cr3);
2233 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
2234
2235 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
2236 PFN_DOWN(__pa(initial_page_table)));
2237 set_page_prot(initial_page_table, PAGE_KERNEL);
2238 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
2239
2240 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2241}
2242
2243
2244
2245
2246
2247
2248static phys_addr_t xen_find_pt_base(pmd_t *pmd)
2249{
2250 phys_addr_t pt_base, paddr;
2251 unsigned pmdidx;
2252
2253 pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
2254
2255 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
2256 if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
2257 paddr = m2p(pmd[pmdidx].pmd);
2258 pt_base = min(pt_base, paddr);
2259 }
2260
2261 return pt_base;
2262}
2263
2264void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
2265{
2266 pmd_t *kernel_pmd;
2267
2268 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
2269
2270 xen_pt_base = xen_find_pt_base(kernel_pmd);
2271 xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
2272
2273 initial_kernel_pmd =
2274 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2275
2276 max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
2277
2278 copy_page(initial_kernel_pmd, kernel_pmd);
2279
2280 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2281
2282 copy_page(initial_page_table, pgd);
2283 initial_page_table[KERNEL_PGD_BOUNDARY] =
2284 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2285
2286 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
2287 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
2288 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
2289
2290 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
2291
2292 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
2293 PFN_DOWN(__pa(initial_page_table)));
2294 xen_write_cr3(__pa(initial_page_table));
2295
2296 memblock_reserve(xen_pt_base, xen_pt_size);
2297}
2298#endif
2299
2300void __init xen_reserve_special_pages(void)
2301{
2302 phys_addr_t paddr;
2303
2304 memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
2305 if (xen_start_info->store_mfn) {
2306 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
2307 memblock_reserve(paddr, PAGE_SIZE);
2308 }
2309 if (!xen_initial_domain()) {
2310 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
2311 memblock_reserve(paddr, PAGE_SIZE);
2312 }
2313}
2314
2315void __init xen_pt_check_e820(void)
2316{
2317 if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
2318 xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
2319 BUG();
2320 }
2321}
2322
2323static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
2324
2325static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2326{
2327 pte_t pte;
2328
2329 phys >>= PAGE_SHIFT;
2330
2331 switch (idx) {
2332 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2333 case FIX_RO_IDT:
2334#ifdef CONFIG_X86_32
2335 case FIX_WP_TEST:
2336# ifdef CONFIG_HIGHMEM
2337 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2338# endif
2339#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
2340 case VSYSCALL_PAGE:
2341#endif
2342 case FIX_TEXT_POKE0:
2343 case FIX_TEXT_POKE1:
2344
2345 pte = pfn_pte(phys, prot);
2346 break;
2347
2348#ifdef CONFIG_X86_LOCAL_APIC
2349 case FIX_APIC_BASE:
2350 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2351 break;
2352#endif
2353
2354#ifdef CONFIG_X86_IO_APIC
2355 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2356
2357
2358
2359
2360 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2361 break;
2362#endif
2363
2364 case FIX_PARAVIRT_BOOTMAP:
2365
2366
2367 pte = mfn_pte(phys, prot);
2368 break;
2369
2370 default:
2371
2372 pte = mfn_pte(phys, prot);
2373 break;
2374 }
2375
2376 __native_set_fixmap(idx, pte);
2377
2378#ifdef CONFIG_X86_VSYSCALL_EMULATION
2379
2380
2381 if (idx == VSYSCALL_PAGE) {
2382 unsigned long vaddr = __fix_to_virt(idx);
2383 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2384 }
2385#endif
2386}
2387
2388static void __init xen_post_allocator_init(void)
2389{
2390 if (xen_feature(XENFEAT_auto_translated_physmap))
2391 return;
2392
2393 pv_mmu_ops.set_pte = xen_set_pte;
2394 pv_mmu_ops.set_pmd = xen_set_pmd;
2395 pv_mmu_ops.set_pud = xen_set_pud;
2396#if CONFIG_PGTABLE_LEVELS == 4
2397 pv_mmu_ops.set_pgd = xen_set_pgd;
2398#endif
2399
2400
2401
2402 pv_mmu_ops.alloc_pte = xen_alloc_pte;
2403 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
2404 pv_mmu_ops.release_pte = xen_release_pte;
2405 pv_mmu_ops.release_pmd = xen_release_pmd;
2406#if CONFIG_PGTABLE_LEVELS == 4
2407 pv_mmu_ops.alloc_pud = xen_alloc_pud;
2408 pv_mmu_ops.release_pud = xen_release_pud;
2409#endif
2410
2411#ifdef CONFIG_X86_64
2412 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2413 SetPagePinned(virt_to_page(level3_user_vsyscall));
2414#endif
2415 xen_mark_init_mm_pinned();
2416}
2417
2418static void xen_leave_lazy_mmu(void)
2419{
2420 preempt_disable();
2421 xen_mc_flush();
2422 paravirt_leave_lazy_mmu();
2423 preempt_enable();
2424}
2425
2426static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2427 .read_cr2 = xen_read_cr2,
2428 .write_cr2 = xen_write_cr2,
2429
2430 .read_cr3 = xen_read_cr3,
2431 .write_cr3 = xen_write_cr3_init,
2432
2433 .flush_tlb_user = xen_flush_tlb,
2434 .flush_tlb_kernel = xen_flush_tlb,
2435 .flush_tlb_single = xen_flush_tlb_single,
2436 .flush_tlb_others = xen_flush_tlb_others,
2437
2438 .pte_update = paravirt_nop,
2439
2440 .pgd_alloc = xen_pgd_alloc,
2441 .pgd_free = xen_pgd_free,
2442
2443 .alloc_pte = xen_alloc_pte_init,
2444 .release_pte = xen_release_pte_init,
2445 .alloc_pmd = xen_alloc_pmd_init,
2446 .release_pmd = xen_release_pmd_init,
2447
2448 .set_pte = xen_set_pte_init,
2449 .set_pte_at = xen_set_pte_at,
2450 .set_pmd = xen_set_pmd_hyper,
2451
2452 .ptep_modify_prot_start = __ptep_modify_prot_start,
2453 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
2454
2455 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
2456 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2457
2458 .make_pte = PV_CALLEE_SAVE(xen_make_pte),
2459 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2460
2461#ifdef CONFIG_X86_PAE
2462 .set_pte_atomic = xen_set_pte_atomic,
2463 .pte_clear = xen_pte_clear,
2464 .pmd_clear = xen_pmd_clear,
2465#endif
2466 .set_pud = xen_set_pud_hyper,
2467
2468 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2469 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2470
2471#if CONFIG_PGTABLE_LEVELS == 4
2472 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2473 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2474 .set_pgd = xen_set_pgd_hyper,
2475
2476 .alloc_pud = xen_alloc_pmd_init,
2477 .release_pud = xen_release_pmd_init,
2478#endif
2479
2480 .activate_mm = xen_activate_mm,
2481 .dup_mmap = xen_dup_mmap,
2482 .exit_mmap = xen_exit_mmap,
2483
2484 .lazy_mode = {
2485 .enter = paravirt_enter_lazy_mmu,
2486 .leave = xen_leave_lazy_mmu,
2487 .flush = paravirt_flush_lazy_mmu,
2488 },
2489
2490 .set_fixmap = xen_set_fixmap,
2491};
2492
2493void __init xen_init_mmu_ops(void)
2494{
2495 x86_init.paging.pagetable_init = xen_pagetable_init;
2496
2497 if (xen_feature(XENFEAT_auto_translated_physmap))
2498 return;
2499
2500 pv_mmu_ops = xen_mmu_ops;
2501
2502 memset(dummy_mapping, 0xff, PAGE_SIZE);
2503}
2504
2505
2506#define MAX_CONTIG_ORDER 9
2507static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2508
2509#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2510static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2511 unsigned long *in_frames,
2512 unsigned long *out_frames)
2513{
2514 int i;
2515 struct multicall_space mcs;
2516
2517 xen_mc_batch();
2518 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2519 mcs = __xen_mc_entry(0);
2520
2521 if (in_frames)
2522 in_frames[i] = virt_to_mfn(vaddr);
2523
2524 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2525 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2526
2527 if (out_frames)
2528 out_frames[i] = virt_to_pfn(vaddr);
2529 }
2530 xen_mc_issue(0);
2531}
2532
2533
2534
2535
2536
2537
2538static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2539 unsigned long *mfns,
2540 unsigned long first_mfn)
2541{
2542 unsigned i, limit;
2543 unsigned long mfn;
2544
2545 xen_mc_batch();
2546
2547 limit = 1u << order;
2548 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2549 struct multicall_space mcs;
2550 unsigned flags;
2551
2552 mcs = __xen_mc_entry(0);
2553 if (mfns)
2554 mfn = mfns[i];
2555 else
2556 mfn = first_mfn + i;
2557
2558 if (i < (limit - 1))
2559 flags = 0;
2560 else {
2561 if (order == 0)
2562 flags = UVMF_INVLPG | UVMF_ALL;
2563 else
2564 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2565 }
2566
2567 MULTI_update_va_mapping(mcs.mc, vaddr,
2568 mfn_pte(mfn, PAGE_KERNEL), flags);
2569
2570 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2571 }
2572
2573 xen_mc_issue(0);
2574}
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2585 unsigned long *pfns_in,
2586 unsigned long extents_out,
2587 unsigned int order_out,
2588 unsigned long *mfns_out,
2589 unsigned int address_bits)
2590{
2591 long rc;
2592 int success;
2593
2594 struct xen_memory_exchange exchange = {
2595 .in = {
2596 .nr_extents = extents_in,
2597 .extent_order = order_in,
2598 .extent_start = pfns_in,
2599 .domid = DOMID_SELF
2600 },
2601 .out = {
2602 .nr_extents = extents_out,
2603 .extent_order = order_out,
2604 .extent_start = mfns_out,
2605 .address_bits = address_bits,
2606 .domid = DOMID_SELF
2607 }
2608 };
2609
2610 BUG_ON(extents_in << order_in != extents_out << order_out);
2611
2612 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2613 success = (exchange.nr_exchanged == extents_in);
2614
2615 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2616 BUG_ON(success && (rc != 0));
2617
2618 return success;
2619}
2620
2621int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
2622 unsigned int address_bits,
2623 dma_addr_t *dma_handle)
2624{
2625 unsigned long *in_frames = discontig_frames, out_frame;
2626 unsigned long flags;
2627 int success;
2628 unsigned long vstart = (unsigned long)phys_to_virt(pstart);
2629
2630
2631
2632
2633
2634
2635
2636 if (xen_feature(XENFEAT_auto_translated_physmap))
2637 return 0;
2638
2639 if (unlikely(order > MAX_CONTIG_ORDER))
2640 return -ENOMEM;
2641
2642 memset((void *) vstart, 0, PAGE_SIZE << order);
2643
2644 spin_lock_irqsave(&xen_reservation_lock, flags);
2645
2646
2647 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2648
2649
2650 out_frame = virt_to_pfn(vstart);
2651 success = xen_exchange_memory(1UL << order, 0, in_frames,
2652 1, order, &out_frame,
2653 address_bits);
2654
2655
2656 if (success)
2657 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2658 else
2659 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2660
2661 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2662
2663 *dma_handle = virt_to_machine(vstart).maddr;
2664 return success ? 0 : -ENOMEM;
2665}
2666EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2667
2668void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2669{
2670 unsigned long *out_frames = discontig_frames, in_frame;
2671 unsigned long flags;
2672 int success;
2673 unsigned long vstart;
2674
2675 if (xen_feature(XENFEAT_auto_translated_physmap))
2676 return;
2677
2678 if (unlikely(order > MAX_CONTIG_ORDER))
2679 return;
2680
2681 vstart = (unsigned long)phys_to_virt(pstart);
2682 memset((void *) vstart, 0, PAGE_SIZE << order);
2683
2684 spin_lock_irqsave(&xen_reservation_lock, flags);
2685
2686
2687 in_frame = virt_to_mfn(vstart);
2688
2689
2690 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2691
2692
2693 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2694 0, out_frames, 0);
2695
2696
2697 if (success)
2698 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2699 else
2700 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2701
2702 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2703}
2704EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2705
2706#ifdef CONFIG_XEN_PVHVM
2707#ifdef CONFIG_PROC_VMCORE
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718static int xen_oldmem_pfn_is_ram(unsigned long pfn)
2719{
2720 struct xen_hvm_get_mem_type a = {
2721 .domid = DOMID_SELF,
2722 .pfn = pfn,
2723 };
2724 int ram;
2725
2726 if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
2727 return -ENXIO;
2728
2729 switch (a.mem_type) {
2730 case HVMMEM_mmio_dm:
2731 ram = 0;
2732 break;
2733 case HVMMEM_ram_rw:
2734 case HVMMEM_ram_ro:
2735 default:
2736 ram = 1;
2737 break;
2738 }
2739
2740 return ram;
2741}
2742#endif
2743
2744static void xen_hvm_exit_mmap(struct mm_struct *mm)
2745{
2746 struct xen_hvm_pagetable_dying a;
2747 int rc;
2748
2749 a.domid = DOMID_SELF;
2750 a.gpa = __pa(mm->pgd);
2751 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2752 WARN_ON_ONCE(rc < 0);
2753}
2754
2755static int is_pagetable_dying_supported(void)
2756{
2757 struct xen_hvm_pagetable_dying a;
2758 int rc = 0;
2759
2760 a.domid = DOMID_SELF;
2761 a.gpa = 0x00;
2762 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2763 if (rc < 0) {
2764 printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
2765 return 0;
2766 }
2767 return 1;
2768}
2769
2770void __init xen_hvm_init_mmu_ops(void)
2771{
2772 if (is_pagetable_dying_supported())
2773 pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
2774#ifdef CONFIG_PROC_VMCORE
2775 register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
2776#endif
2777}
2778#endif
2779
2780#define REMAP_BATCH_SIZE 16
2781
2782struct remap_data {
2783 xen_pfn_t *mfn;
2784 bool contiguous;
2785 pgprot_t prot;
2786 struct mmu_update *mmu_update;
2787};
2788
2789static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2790 unsigned long addr, void *data)
2791{
2792 struct remap_data *rmd = data;
2793 pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
2794
2795
2796
2797 if (rmd->contiguous)
2798 (*rmd->mfn)++;
2799 else
2800 rmd->mfn++;
2801
2802 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2803 rmd->mmu_update->val = pte_val_ma(pte);
2804 rmd->mmu_update++;
2805
2806 return 0;
2807}
2808
2809static int do_remap_gfn(struct vm_area_struct *vma,
2810 unsigned long addr,
2811 xen_pfn_t *gfn, int nr,
2812 int *err_ptr, pgprot_t prot,
2813 unsigned domid,
2814 struct page **pages)
2815{
2816 int err = 0;
2817 struct remap_data rmd;
2818 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2819 unsigned long range;
2820 int mapped = 0;
2821
2822 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2823
2824 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2825#ifdef CONFIG_XEN_PVH
2826
2827 return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
2828 prot, domid, pages);
2829#else
2830 return -EINVAL;
2831#endif
2832 }
2833
2834 rmd.mfn = gfn;
2835 rmd.prot = prot;
2836
2837
2838 rmd.contiguous = !err_ptr;
2839
2840 while (nr) {
2841 int index = 0;
2842 int done = 0;
2843 int batch = min(REMAP_BATCH_SIZE, nr);
2844 int batch_left = batch;
2845 range = (unsigned long)batch << PAGE_SHIFT;
2846
2847 rmd.mmu_update = mmu_update;
2848 err = apply_to_page_range(vma->vm_mm, addr, range,
2849 remap_area_mfn_pte_fn, &rmd);
2850 if (err)
2851 goto out;
2852
2853
2854
2855 do {
2856 int i;
2857
2858 err = HYPERVISOR_mmu_update(&mmu_update[index],
2859 batch_left, &done, domid);
2860
2861
2862
2863
2864
2865
2866 if (err_ptr) {
2867 for (i = index; i < index + done; i++)
2868 err_ptr[i] = 0;
2869 }
2870 if (err < 0) {
2871 if (!err_ptr)
2872 goto out;
2873 err_ptr[i] = err;
2874 done++;
2875 } else
2876 mapped += done;
2877 batch_left -= done;
2878 index += done;
2879 } while (batch_left);
2880
2881 nr -= batch;
2882 addr += range;
2883 if (err_ptr)
2884 err_ptr += batch;
2885 cond_resched();
2886 }
2887out:
2888
2889 xen_flush_tlb_all();
2890
2891 return err < 0 ? err : mapped;
2892}
2893
2894int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
2895 unsigned long addr,
2896 xen_pfn_t gfn, int nr,
2897 pgprot_t prot, unsigned domid,
2898 struct page **pages)
2899{
2900 return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages);
2901}
2902EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
2903
2904int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
2905 unsigned long addr,
2906 xen_pfn_t *gfn, int nr,
2907 int *err_ptr, pgprot_t prot,
2908 unsigned domid, struct page **pages)
2909{
2910
2911
2912
2913
2914 BUG_ON(err_ptr == NULL);
2915 return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages);
2916}
2917EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
2918
2919
2920
2921int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
2922 int numpgs, struct page **pages)
2923{
2924 if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
2925 return 0;
2926
2927#ifdef CONFIG_XEN_PVH
2928 return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
2929#else
2930 return -EINVAL;
2931#endif
2932}
2933EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
2934