1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41#include <linux/sched.h>
42#include <linux/highmem.h>
43#include <linux/debugfs.h>
44#include <linux/bug.h>
45#include <linux/vmalloc.h>
46#include <linux/export.h>
47#include <linux/init.h>
48#include <linux/gfp.h>
49#include <linux/memblock.h>
50#include <linux/seq_file.h>
51#include <linux/crash_dump.h>
52
53#include <trace/events/xen.h>
54
55#include <asm/pgtable.h>
56#include <asm/tlbflush.h>
57#include <asm/fixmap.h>
58#include <asm/mmu_context.h>
59#include <asm/setup.h>
60#include <asm/paravirt.h>
61#include <asm/e820.h>
62#include <asm/linkage.h>
63#include <asm/page.h>
64#include <asm/init.h>
65#include <asm/pat.h>
66#include <asm/smp.h>
67
68#include <asm/xen/hypercall.h>
69#include <asm/xen/hypervisor.h>
70
71#include <xen/xen.h>
72#include <xen/page.h>
73#include <xen/interface/xen.h>
74#include <xen/interface/hvm/hvm_op.h>
75#include <xen/interface/version.h>
76#include <xen/interface/memory.h>
77#include <xen/hvc-console.h>
78
79#include "multicalls.h"
80#include "mmu.h"
81#include "debugfs.h"
82
83
84
85
86
87DEFINE_SPINLOCK(xen_reservation_lock);
88
89#ifdef CONFIG_X86_32
90
91
92
93
94
95#define LEVEL1_IDENT_ENTRIES (PTRS_PER_PTE * 4)
96static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
97#endif
98#ifdef CONFIG_X86_64
99
100static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
101#endif
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117DEFINE_PER_CPU(unsigned long, xen_cr3);
118DEFINE_PER_CPU(unsigned long, xen_current_cr3);
119
120static phys_addr_t xen_pt_base, xen_pt_size __initdata;
121
122
123
124
125
126#define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
127
128unsigned long arbitrary_virt_to_mfn(void *vaddr)
129{
130 xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
131
132 return PFN_DOWN(maddr.maddr);
133}
134
135xmaddr_t arbitrary_virt_to_machine(void *vaddr)
136{
137 unsigned long address = (unsigned long)vaddr;
138 unsigned int level;
139 pte_t *pte;
140 unsigned offset;
141
142
143
144
145
146 if (virt_addr_valid(vaddr))
147 return virt_to_machine(vaddr);
148
149
150
151 pte = lookup_address(address, &level);
152 BUG_ON(pte == NULL);
153 offset = address & ~PAGE_MASK;
154 return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset);
155}
156EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine);
157
158void make_lowmem_page_readonly(void *vaddr)
159{
160 pte_t *pte, ptev;
161 unsigned long address = (unsigned long)vaddr;
162 unsigned int level;
163
164 pte = lookup_address(address, &level);
165 if (pte == NULL)
166 return;
167
168 ptev = pte_wrprotect(*pte);
169
170 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
171 BUG();
172}
173
174void make_lowmem_page_readwrite(void *vaddr)
175{
176 pte_t *pte, ptev;
177 unsigned long address = (unsigned long)vaddr;
178 unsigned int level;
179
180 pte = lookup_address(address, &level);
181 if (pte == NULL)
182 return;
183
184 ptev = pte_mkwrite(*pte);
185
186 if (HYPERVISOR_update_va_mapping(address, ptev, 0))
187 BUG();
188}
189
190
191static bool xen_page_pinned(void *ptr)
192{
193 struct page *page = virt_to_page(ptr);
194
195 return PagePinned(page);
196}
197
198void xen_set_domain_pte(pte_t *ptep, pte_t pteval, unsigned domid)
199{
200 struct multicall_space mcs;
201 struct mmu_update *u;
202
203 trace_xen_mmu_set_domain_pte(ptep, pteval, domid);
204
205 mcs = xen_mc_entry(sizeof(*u));
206 u = mcs.args;
207
208
209 u->ptr = virt_to_machine(ptep).maddr;
210 u->val = pte_val_ma(pteval);
211
212 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, domid);
213
214 xen_mc_issue(PARAVIRT_LAZY_MMU);
215}
216EXPORT_SYMBOL_GPL(xen_set_domain_pte);
217
218static void xen_extend_mmu_update(const struct mmu_update *update)
219{
220 struct multicall_space mcs;
221 struct mmu_update *u;
222
223 mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
224
225 if (mcs.mc != NULL) {
226 mcs.mc->args[1]++;
227 } else {
228 mcs = __xen_mc_entry(sizeof(*u));
229 MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
230 }
231
232 u = mcs.args;
233 *u = *update;
234}
235
236static void xen_extend_mmuext_op(const struct mmuext_op *op)
237{
238 struct multicall_space mcs;
239 struct mmuext_op *u;
240
241 mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
242
243 if (mcs.mc != NULL) {
244 mcs.mc->args[1]++;
245 } else {
246 mcs = __xen_mc_entry(sizeof(*u));
247 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
248 }
249
250 u = mcs.args;
251 *u = *op;
252}
253
254static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
255{
256 struct mmu_update u;
257
258 preempt_disable();
259
260 xen_mc_batch();
261
262
263 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
264 u.val = pmd_val_ma(val);
265 xen_extend_mmu_update(&u);
266
267 xen_mc_issue(PARAVIRT_LAZY_MMU);
268
269 preempt_enable();
270}
271
272static void xen_set_pmd(pmd_t *ptr, pmd_t val)
273{
274 trace_xen_mmu_set_pmd(ptr, val);
275
276
277
278 if (!xen_page_pinned(ptr)) {
279 *ptr = val;
280 return;
281 }
282
283 xen_set_pmd_hyper(ptr, val);
284}
285
286
287
288
289
290void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
291{
292 set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
293}
294
295static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
296{
297 struct mmu_update u;
298
299 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
300 return false;
301
302 xen_mc_batch();
303
304 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
305 u.val = pte_val_ma(pteval);
306 xen_extend_mmu_update(&u);
307
308 xen_mc_issue(PARAVIRT_LAZY_MMU);
309
310 return true;
311}
312
313static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
314{
315 if (!xen_batched_set_pte(ptep, pteval)) {
316
317
318
319
320
321
322
323 struct mmu_update u;
324
325 u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
326 u.val = pte_val_ma(pteval);
327 HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
328 }
329}
330
331static void xen_set_pte(pte_t *ptep, pte_t pteval)
332{
333 trace_xen_mmu_set_pte(ptep, pteval);
334 __xen_set_pte(ptep, pteval);
335}
336
337static void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
338 pte_t *ptep, pte_t pteval)
339{
340 trace_xen_mmu_set_pte_at(mm, addr, ptep, pteval);
341 __xen_set_pte(ptep, pteval);
342}
343
344pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
345 unsigned long addr, pte_t *ptep)
346{
347
348 trace_xen_mmu_ptep_modify_prot_start(mm, addr, ptep, *ptep);
349 return *ptep;
350}
351
352void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
353 pte_t *ptep, pte_t pte)
354{
355 struct mmu_update u;
356
357 trace_xen_mmu_ptep_modify_prot_commit(mm, addr, ptep, pte);
358 xen_mc_batch();
359
360 u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
361 u.val = pte_val_ma(pte);
362 xen_extend_mmu_update(&u);
363
364 xen_mc_issue(PARAVIRT_LAZY_MMU);
365}
366
367
368static pteval_t pte_mfn_to_pfn(pteval_t val)
369{
370 if (val & _PAGE_PRESENT) {
371 unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
372 unsigned long pfn = mfn_to_pfn(mfn);
373
374 pteval_t flags = val & PTE_FLAGS_MASK;
375 if (unlikely(pfn == ~0))
376 val = flags & ~_PAGE_PRESENT;
377 else
378 val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
379 }
380
381 return val;
382}
383
384static pteval_t pte_pfn_to_mfn(pteval_t val)
385{
386 if (val & _PAGE_PRESENT) {
387 unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
388 pteval_t flags = val & PTE_FLAGS_MASK;
389 unsigned long mfn;
390
391 if (!xen_feature(XENFEAT_auto_translated_physmap))
392 mfn = __pfn_to_mfn(pfn);
393 else
394 mfn = pfn;
395
396
397
398
399
400
401 if (unlikely(mfn == INVALID_P2M_ENTRY)) {
402 mfn = 0;
403 flags = 0;
404 } else
405 mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
406 val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
407 }
408
409 return val;
410}
411
412__visible pteval_t xen_pte_val(pte_t pte)
413{
414 pteval_t pteval = pte.pte;
415
416 return pte_mfn_to_pfn(pteval);
417}
418PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
419
420__visible pgdval_t xen_pgd_val(pgd_t pgd)
421{
422 return pte_mfn_to_pfn(pgd.pgd);
423}
424PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
425
426__visible pte_t xen_make_pte(pteval_t pte)
427{
428 pte = pte_pfn_to_mfn(pte);
429
430 return native_make_pte(pte);
431}
432PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
433
434__visible pgd_t xen_make_pgd(pgdval_t pgd)
435{
436 pgd = pte_pfn_to_mfn(pgd);
437 return native_make_pgd(pgd);
438}
439PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
440
441__visible pmdval_t xen_pmd_val(pmd_t pmd)
442{
443 return pte_mfn_to_pfn(pmd.pmd);
444}
445PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
446
447static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
448{
449 struct mmu_update u;
450
451 preempt_disable();
452
453 xen_mc_batch();
454
455
456 u.ptr = arbitrary_virt_to_machine(ptr).maddr;
457 u.val = pud_val_ma(val);
458 xen_extend_mmu_update(&u);
459
460 xen_mc_issue(PARAVIRT_LAZY_MMU);
461
462 preempt_enable();
463}
464
465static void xen_set_pud(pud_t *ptr, pud_t val)
466{
467 trace_xen_mmu_set_pud(ptr, val);
468
469
470
471 if (!xen_page_pinned(ptr)) {
472 *ptr = val;
473 return;
474 }
475
476 xen_set_pud_hyper(ptr, val);
477}
478
479#ifdef CONFIG_X86_PAE
480static void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
481{
482 trace_xen_mmu_set_pte_atomic(ptep, pte);
483 set_64bit((u64 *)ptep, native_pte_val(pte));
484}
485
486static void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
487{
488 trace_xen_mmu_pte_clear(mm, addr, ptep);
489 if (!xen_batched_set_pte(ptep, native_make_pte(0)))
490 native_pte_clear(mm, addr, ptep);
491}
492
493static void xen_pmd_clear(pmd_t *pmdp)
494{
495 trace_xen_mmu_pmd_clear(pmdp);
496 set_pmd(pmdp, __pmd(0));
497}
498#endif
499
500__visible pmd_t xen_make_pmd(pmdval_t pmd)
501{
502 pmd = pte_pfn_to_mfn(pmd);
503 return native_make_pmd(pmd);
504}
505PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
506
507#if CONFIG_PGTABLE_LEVELS == 4
508__visible pudval_t xen_pud_val(pud_t pud)
509{
510 return pte_mfn_to_pfn(pud.pud);
511}
512PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
513
514__visible pud_t xen_make_pud(pudval_t pud)
515{
516 pud = pte_pfn_to_mfn(pud);
517
518 return native_make_pud(pud);
519}
520PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
521
522static pgd_t *xen_get_user_pgd(pgd_t *pgd)
523{
524 pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
525 unsigned offset = pgd - pgd_page;
526 pgd_t *user_ptr = NULL;
527
528 if (offset < pgd_index(USER_LIMIT)) {
529 struct page *page = virt_to_page(pgd_page);
530 user_ptr = (pgd_t *)page->private;
531 if (user_ptr)
532 user_ptr += offset;
533 }
534
535 return user_ptr;
536}
537
538static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
539{
540 struct mmu_update u;
541
542 u.ptr = virt_to_machine(ptr).maddr;
543 u.val = pgd_val_ma(val);
544 xen_extend_mmu_update(&u);
545}
546
547
548
549
550
551
552
553
554static void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
555{
556 preempt_disable();
557
558 xen_mc_batch();
559
560 __xen_set_pgd_hyper(ptr, val);
561
562 xen_mc_issue(PARAVIRT_LAZY_MMU);
563
564 preempt_enable();
565}
566
567static void xen_set_pgd(pgd_t *ptr, pgd_t val)
568{
569 pgd_t *user_ptr = xen_get_user_pgd(ptr);
570
571 trace_xen_mmu_set_pgd(ptr, user_ptr, val);
572
573
574
575 if (!xen_page_pinned(ptr)) {
576 *ptr = val;
577 if (user_ptr) {
578 WARN_ON(xen_page_pinned(user_ptr));
579 *user_ptr = val;
580 }
581 return;
582 }
583
584
585
586 xen_mc_batch();
587
588 __xen_set_pgd_hyper(ptr, val);
589 if (user_ptr)
590 __xen_set_pgd_hyper(user_ptr, val);
591
592 xen_mc_issue(PARAVIRT_LAZY_MMU);
593}
594#endif
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
612 int (*func)(struct mm_struct *mm, struct page *,
613 enum pt_level),
614 unsigned long limit)
615{
616 int flush = 0;
617 unsigned hole_low, hole_high;
618 unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
619 unsigned pgdidx, pudidx, pmdidx;
620
621
622 limit--;
623 BUG_ON(limit >= FIXADDR_TOP);
624
625 if (xen_feature(XENFEAT_auto_translated_physmap))
626 return 0;
627
628
629
630
631
632
633 hole_low = pgd_index(USER_LIMIT);
634 hole_high = pgd_index(PAGE_OFFSET);
635
636 pgdidx_limit = pgd_index(limit);
637#if PTRS_PER_PUD > 1
638 pudidx_limit = pud_index(limit);
639#else
640 pudidx_limit = 0;
641#endif
642#if PTRS_PER_PMD > 1
643 pmdidx_limit = pmd_index(limit);
644#else
645 pmdidx_limit = 0;
646#endif
647
648 for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
649 pud_t *pud;
650
651 if (pgdidx >= hole_low && pgdidx < hole_high)
652 continue;
653
654 if (!pgd_val(pgd[pgdidx]))
655 continue;
656
657 pud = pud_offset(&pgd[pgdidx], 0);
658
659 if (PTRS_PER_PUD > 1)
660 flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
661
662 for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
663 pmd_t *pmd;
664
665 if (pgdidx == pgdidx_limit &&
666 pudidx > pudidx_limit)
667 goto out;
668
669 if (pud_none(pud[pudidx]))
670 continue;
671
672 pmd = pmd_offset(&pud[pudidx], 0);
673
674 if (PTRS_PER_PMD > 1)
675 flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
676
677 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
678 struct page *pte;
679
680 if (pgdidx == pgdidx_limit &&
681 pudidx == pudidx_limit &&
682 pmdidx > pmdidx_limit)
683 goto out;
684
685 if (pmd_none(pmd[pmdidx]))
686 continue;
687
688 pte = pmd_page(pmd[pmdidx]);
689 flush |= (*func)(mm, pte, PT_PTE);
690 }
691 }
692 }
693
694out:
695
696
697 flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
698
699 return flush;
700}
701
702static int xen_pgd_walk(struct mm_struct *mm,
703 int (*func)(struct mm_struct *mm, struct page *,
704 enum pt_level),
705 unsigned long limit)
706{
707 return __xen_pgd_walk(mm, mm->pgd, func, limit);
708}
709
710
711
712static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
713{
714 spinlock_t *ptl = NULL;
715
716#if USE_SPLIT_PTE_PTLOCKS
717 ptl = ptlock_ptr(page);
718 spin_lock_nest_lock(ptl, &mm->page_table_lock);
719#endif
720
721 return ptl;
722}
723
724static void xen_pte_unlock(void *v)
725{
726 spinlock_t *ptl = v;
727 spin_unlock(ptl);
728}
729
730static void xen_do_pin(unsigned level, unsigned long pfn)
731{
732 struct mmuext_op op;
733
734 op.cmd = level;
735 op.arg1.mfn = pfn_to_mfn(pfn);
736
737 xen_extend_mmuext_op(&op);
738}
739
740static int xen_pin_page(struct mm_struct *mm, struct page *page,
741 enum pt_level level)
742{
743 unsigned pgfl = TestSetPagePinned(page);
744 int flush;
745
746 if (pgfl)
747 flush = 0;
748 else if (PageHighMem(page))
749
750
751 flush = 1;
752 else {
753 void *pt = lowmem_page_address(page);
754 unsigned long pfn = page_to_pfn(page);
755 struct multicall_space mcs = __xen_mc_entry(0);
756 spinlock_t *ptl;
757
758 flush = 0;
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780 ptl = NULL;
781 if (level == PT_PTE)
782 ptl = xen_pte_lock(page, mm);
783
784 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
785 pfn_pte(pfn, PAGE_KERNEL_RO),
786 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
787
788 if (ptl) {
789 xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
790
791
792
793 xen_mc_callback(xen_pte_unlock, ptl);
794 }
795 }
796
797 return flush;
798}
799
800
801
802
803static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
804{
805 trace_xen_mmu_pgd_pin(mm, pgd);
806
807 xen_mc_batch();
808
809 if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
810
811 xen_mc_issue(0);
812
813 kmap_flush_unused();
814
815 xen_mc_batch();
816 }
817
818#ifdef CONFIG_X86_64
819 {
820 pgd_t *user_pgd = xen_get_user_pgd(pgd);
821
822 xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
823
824 if (user_pgd) {
825 xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
826 xen_do_pin(MMUEXT_PIN_L4_TABLE,
827 PFN_DOWN(__pa(user_pgd)));
828 }
829 }
830#else
831#ifdef CONFIG_X86_PAE
832
833 xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
834 PT_PMD);
835#endif
836 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
837#endif
838 xen_mc_issue(0);
839}
840
841static void xen_pgd_pin(struct mm_struct *mm)
842{
843 __xen_pgd_pin(mm, mm->pgd);
844}
845
846
847
848
849
850
851
852
853
854
855
856void xen_mm_pin_all(void)
857{
858 struct page *page;
859
860 spin_lock(&pgd_lock);
861
862 list_for_each_entry(page, &pgd_list, lru) {
863 if (!PagePinned(page)) {
864 __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
865 SetPageSavePinned(page);
866 }
867 }
868
869 spin_unlock(&pgd_lock);
870}
871
872
873
874
875
876
877static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
878 enum pt_level level)
879{
880 SetPagePinned(page);
881 return 0;
882}
883
884static void __init xen_mark_init_mm_pinned(void)
885{
886 xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
887}
888
889static int xen_unpin_page(struct mm_struct *mm, struct page *page,
890 enum pt_level level)
891{
892 unsigned pgfl = TestClearPagePinned(page);
893
894 if (pgfl && !PageHighMem(page)) {
895 void *pt = lowmem_page_address(page);
896 unsigned long pfn = page_to_pfn(page);
897 spinlock_t *ptl = NULL;
898 struct multicall_space mcs;
899
900
901
902
903
904
905
906
907 if (level == PT_PTE) {
908 ptl = xen_pte_lock(page, mm);
909
910 if (ptl)
911 xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
912 }
913
914 mcs = __xen_mc_entry(0);
915
916 MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
917 pfn_pte(pfn, PAGE_KERNEL),
918 level == PT_PGD ? UVMF_TLB_FLUSH : 0);
919
920 if (ptl) {
921
922 xen_mc_callback(xen_pte_unlock, ptl);
923 }
924 }
925
926 return 0;
927}
928
929
930static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
931{
932 trace_xen_mmu_pgd_unpin(mm, pgd);
933
934 xen_mc_batch();
935
936 xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
937
938#ifdef CONFIG_X86_64
939 {
940 pgd_t *user_pgd = xen_get_user_pgd(pgd);
941
942 if (user_pgd) {
943 xen_do_pin(MMUEXT_UNPIN_TABLE,
944 PFN_DOWN(__pa(user_pgd)));
945 xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
946 }
947 }
948#endif
949
950#ifdef CONFIG_X86_PAE
951
952 xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]),
953 PT_PMD);
954#endif
955
956 __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
957
958 xen_mc_issue(0);
959}
960
961static void xen_pgd_unpin(struct mm_struct *mm)
962{
963 __xen_pgd_unpin(mm, mm->pgd);
964}
965
966
967
968
969
970void xen_mm_unpin_all(void)
971{
972 struct page *page;
973
974 spin_lock(&pgd_lock);
975
976 list_for_each_entry(page, &pgd_list, lru) {
977 if (PageSavePinned(page)) {
978 BUG_ON(!PagePinned(page));
979 __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
980 ClearPageSavePinned(page);
981 }
982 }
983
984 spin_unlock(&pgd_lock);
985}
986
987static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
988{
989 spin_lock(&next->page_table_lock);
990 xen_pgd_pin(next);
991 spin_unlock(&next->page_table_lock);
992}
993
994static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
995{
996 spin_lock(&mm->page_table_lock);
997 xen_pgd_pin(mm);
998 spin_unlock(&mm->page_table_lock);
999}
1000
1001
1002#ifdef CONFIG_SMP
1003
1004
1005static void drop_other_mm_ref(void *info)
1006{
1007 struct mm_struct *mm = info;
1008 struct mm_struct *active_mm;
1009
1010 active_mm = this_cpu_read(cpu_tlbstate.active_mm);
1011
1012 if (active_mm == mm && this_cpu_read(cpu_tlbstate.state) != TLBSTATE_OK)
1013 leave_mm(smp_processor_id());
1014
1015
1016
1017 if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
1018 load_cr3(swapper_pg_dir);
1019}
1020
1021static void xen_drop_mm_ref(struct mm_struct *mm)
1022{
1023 cpumask_var_t mask;
1024 unsigned cpu;
1025
1026 if (current->active_mm == mm) {
1027 if (current->mm == mm)
1028 load_cr3(swapper_pg_dir);
1029 else
1030 leave_mm(smp_processor_id());
1031 }
1032
1033
1034 if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
1035 for_each_online_cpu(cpu) {
1036 if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
1037 && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
1038 continue;
1039 smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
1040 }
1041 return;
1042 }
1043 cpumask_copy(mask, mm_cpumask(mm));
1044
1045
1046
1047
1048
1049
1050 for_each_online_cpu(cpu) {
1051 if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
1052 cpumask_set_cpu(cpu, mask);
1053 }
1054
1055 if (!cpumask_empty(mask))
1056 smp_call_function_many(mask, drop_other_mm_ref, mm, 1);
1057 free_cpumask_var(mask);
1058}
1059#else
1060static void xen_drop_mm_ref(struct mm_struct *mm)
1061{
1062 if (current->active_mm == mm)
1063 load_cr3(swapper_pg_dir);
1064}
1065#endif
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081static void xen_exit_mmap(struct mm_struct *mm)
1082{
1083 get_cpu();
1084 xen_drop_mm_ref(mm);
1085 put_cpu();
1086
1087 spin_lock(&mm->page_table_lock);
1088
1089
1090 if (xen_page_pinned(mm->pgd))
1091 xen_pgd_unpin(mm);
1092
1093 spin_unlock(&mm->page_table_lock);
1094}
1095
1096static void xen_post_allocator_init(void);
1097
1098static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1099{
1100 struct mmuext_op op;
1101
1102 op.cmd = cmd;
1103 op.arg1.mfn = pfn_to_mfn(pfn);
1104 if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
1105 BUG();
1106}
1107
1108#ifdef CONFIG_X86_64
1109static void __init xen_cleanhighmap(unsigned long vaddr,
1110 unsigned long vaddr_end)
1111{
1112 unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
1113 pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
1114
1115
1116
1117 for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
1118 pmd++, vaddr += PMD_SIZE) {
1119 if (pmd_none(*pmd))
1120 continue;
1121 if (vaddr < (unsigned long) _text || vaddr > kernel_end)
1122 set_pmd(pmd, __pmd(0));
1123 }
1124
1125
1126 xen_mc_flush();
1127}
1128
1129
1130
1131
1132static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
1133{
1134 void *vaddr = __va(paddr);
1135 void *vaddr_end = vaddr + size;
1136
1137 for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
1138 make_lowmem_page_readwrite(vaddr);
1139
1140 memblock_free(paddr, size);
1141}
1142
1143static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
1144{
1145 unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
1146
1147 if (unpin)
1148 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
1149 ClearPagePinned(virt_to_page(__va(pa)));
1150 xen_free_ro_pages(pa, PAGE_SIZE);
1151}
1152
1153
1154
1155
1156
1157static void __init xen_cleanmfnmap(unsigned long vaddr)
1158{
1159 unsigned long va = vaddr & PMD_MASK;
1160 unsigned long pa;
1161 pgd_t *pgd = pgd_offset_k(va);
1162 pud_t *pud_page = pud_offset(pgd, 0);
1163 pud_t *pud;
1164 pmd_t *pmd;
1165 pte_t *pte;
1166 unsigned int i;
1167 bool unpin;
1168
1169 unpin = (vaddr == 2 * PGDIR_SIZE);
1170 set_pgd(pgd, __pgd(0));
1171 do {
1172 pud = pud_page + pud_index(va);
1173 if (pud_none(*pud)) {
1174 va += PUD_SIZE;
1175 } else if (pud_large(*pud)) {
1176 pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
1177 xen_free_ro_pages(pa, PUD_SIZE);
1178 va += PUD_SIZE;
1179 } else {
1180 pmd = pmd_offset(pud, va);
1181 if (pmd_large(*pmd)) {
1182 pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
1183 xen_free_ro_pages(pa, PMD_SIZE);
1184 } else if (!pmd_none(*pmd)) {
1185 pte = pte_offset_kernel(pmd, va);
1186 set_pmd(pmd, __pmd(0));
1187 for (i = 0; i < PTRS_PER_PTE; ++i) {
1188 if (pte_none(pte[i]))
1189 break;
1190 pa = pte_pfn(pte[i]) << PAGE_SHIFT;
1191 xen_free_ro_pages(pa, PAGE_SIZE);
1192 }
1193 xen_cleanmfnmap_free_pgtbl(pte, unpin);
1194 }
1195 va += PMD_SIZE;
1196 if (pmd_index(va))
1197 continue;
1198 set_pud(pud, __pud(0));
1199 xen_cleanmfnmap_free_pgtbl(pmd, unpin);
1200 }
1201
1202 } while (pud_index(va) || pmd_index(va));
1203 xen_cleanmfnmap_free_pgtbl(pud_page, unpin);
1204}
1205
1206static void __init xen_pagetable_p2m_free(void)
1207{
1208 unsigned long size;
1209 unsigned long addr;
1210
1211 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
1212
1213
1214 if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
1215 return;
1216
1217
1218 memset((void *)xen_start_info->mfn_list, 0xff, size);
1219
1220 addr = xen_start_info->mfn_list;
1221
1222
1223
1224
1225
1226
1227
1228
1229 size = roundup(size, PMD_SIZE);
1230
1231 if (addr >= __START_KERNEL_map) {
1232 xen_cleanhighmap(addr, addr + size);
1233 size = PAGE_ALIGN(xen_start_info->nr_pages *
1234 sizeof(unsigned long));
1235 memblock_free(__pa(addr), size);
1236 } else {
1237 xen_cleanmfnmap(addr);
1238 }
1239}
1240
1241static void __init xen_pagetable_cleanhighmap(void)
1242{
1243 unsigned long size;
1244 unsigned long addr;
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254 addr = xen_start_info->pt_base;
1255 size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
1256
1257 xen_cleanhighmap(addr, addr + size);
1258 xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
1259#ifdef DEBUG
1260
1261
1262
1263 xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
1264#endif
1265}
1266#endif
1267
1268static void __init xen_pagetable_p2m_setup(void)
1269{
1270 if (xen_feature(XENFEAT_auto_translated_physmap))
1271 return;
1272
1273 xen_vmalloc_p2m_tree();
1274
1275#ifdef CONFIG_X86_64
1276 xen_pagetable_p2m_free();
1277
1278 xen_pagetable_cleanhighmap();
1279#endif
1280
1281 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
1282}
1283
1284static void __init xen_pagetable_init(void)
1285{
1286 paging_init();
1287 xen_post_allocator_init();
1288
1289 xen_pagetable_p2m_setup();
1290
1291
1292 xen_build_mfn_list_list();
1293
1294
1295 if (!xen_feature(XENFEAT_auto_translated_physmap))
1296 xen_remap_memory();
1297
1298 xen_setup_shared_info();
1299}
1300static void xen_write_cr2(unsigned long cr2)
1301{
1302 this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
1303}
1304
1305static unsigned long xen_read_cr2(void)
1306{
1307 return this_cpu_read(xen_vcpu)->arch.cr2;
1308}
1309
1310unsigned long xen_read_cr2_direct(void)
1311{
1312 return this_cpu_read(xen_vcpu_info.arch.cr2);
1313}
1314
1315void xen_flush_tlb_all(void)
1316{
1317 struct mmuext_op *op;
1318 struct multicall_space mcs;
1319
1320 trace_xen_mmu_flush_tlb_all(0);
1321
1322 preempt_disable();
1323
1324 mcs = xen_mc_entry(sizeof(*op));
1325
1326 op = mcs.args;
1327 op->cmd = MMUEXT_TLB_FLUSH_ALL;
1328 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1329
1330 xen_mc_issue(PARAVIRT_LAZY_MMU);
1331
1332 preempt_enable();
1333}
1334static void xen_flush_tlb(void)
1335{
1336 struct mmuext_op *op;
1337 struct multicall_space mcs;
1338
1339 trace_xen_mmu_flush_tlb(0);
1340
1341 preempt_disable();
1342
1343 mcs = xen_mc_entry(sizeof(*op));
1344
1345 op = mcs.args;
1346 op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
1347 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1348
1349 xen_mc_issue(PARAVIRT_LAZY_MMU);
1350
1351 preempt_enable();
1352}
1353
1354static void xen_flush_tlb_single(unsigned long addr)
1355{
1356 struct mmuext_op *op;
1357 struct multicall_space mcs;
1358
1359 trace_xen_mmu_flush_tlb_single(addr);
1360
1361 preempt_disable();
1362
1363 mcs = xen_mc_entry(sizeof(*op));
1364 op = mcs.args;
1365 op->cmd = MMUEXT_INVLPG_LOCAL;
1366 op->arg1.linear_addr = addr & PAGE_MASK;
1367 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
1368
1369 xen_mc_issue(PARAVIRT_LAZY_MMU);
1370
1371 preempt_enable();
1372}
1373
1374static void xen_flush_tlb_others(const struct cpumask *cpus,
1375 struct mm_struct *mm, unsigned long start,
1376 unsigned long end)
1377{
1378 struct {
1379 struct mmuext_op op;
1380#ifdef CONFIG_SMP
1381 DECLARE_BITMAP(mask, num_processors);
1382#else
1383 DECLARE_BITMAP(mask, NR_CPUS);
1384#endif
1385 } *args;
1386 struct multicall_space mcs;
1387
1388 trace_xen_mmu_flush_tlb_others(cpus, mm, start, end);
1389
1390 if (cpumask_empty(cpus))
1391 return;
1392
1393 mcs = xen_mc_entry(sizeof(*args));
1394 args = mcs.args;
1395 args->op.arg2.vcpumask = to_cpumask(args->mask);
1396
1397
1398 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
1399 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
1400
1401 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
1402 if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
1403 args->op.cmd = MMUEXT_INVLPG_MULTI;
1404 args->op.arg1.linear_addr = start;
1405 }
1406
1407 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
1408
1409 xen_mc_issue(PARAVIRT_LAZY_MMU);
1410}
1411
1412static unsigned long xen_read_cr3(void)
1413{
1414 return this_cpu_read(xen_cr3);
1415}
1416
1417static void set_current_cr3(void *v)
1418{
1419 this_cpu_write(xen_current_cr3, (unsigned long)v);
1420}
1421
1422static void __xen_write_cr3(bool kernel, unsigned long cr3)
1423{
1424 struct mmuext_op op;
1425 unsigned long mfn;
1426
1427 trace_xen_mmu_write_cr3(kernel, cr3);
1428
1429 if (cr3)
1430 mfn = pfn_to_mfn(PFN_DOWN(cr3));
1431 else
1432 mfn = 0;
1433
1434 WARN_ON(mfn == 0 && kernel);
1435
1436 op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
1437 op.arg1.mfn = mfn;
1438
1439 xen_extend_mmuext_op(&op);
1440
1441 if (kernel) {
1442 this_cpu_write(xen_cr3, cr3);
1443
1444
1445
1446 xen_mc_callback(set_current_cr3, (void *)cr3);
1447 }
1448}
1449static void xen_write_cr3(unsigned long cr3)
1450{
1451 BUG_ON(preemptible());
1452
1453 xen_mc_batch();
1454
1455
1456
1457 this_cpu_write(xen_cr3, cr3);
1458
1459 __xen_write_cr3(true, cr3);
1460
1461#ifdef CONFIG_X86_64
1462 {
1463 pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
1464 if (user_pgd)
1465 __xen_write_cr3(false, __pa(user_pgd));
1466 else
1467 __xen_write_cr3(false, 0);
1468 }
1469#endif
1470
1471 xen_mc_issue(PARAVIRT_LAZY_CPU);
1472}
1473
1474#ifdef CONFIG_X86_64
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495static void __init xen_write_cr3_init(unsigned long cr3)
1496{
1497 BUG_ON(preemptible());
1498
1499 xen_mc_batch();
1500
1501
1502
1503 this_cpu_write(xen_cr3, cr3);
1504
1505 __xen_write_cr3(true, cr3);
1506
1507 xen_mc_issue(PARAVIRT_LAZY_CPU);
1508}
1509#endif
1510
1511static int xen_pgd_alloc(struct mm_struct *mm)
1512{
1513 pgd_t *pgd = mm->pgd;
1514 int ret = 0;
1515
1516 BUG_ON(PagePinned(virt_to_page(pgd)));
1517
1518#ifdef CONFIG_X86_64
1519 {
1520 struct page *page = virt_to_page(pgd);
1521 pgd_t *user_pgd;
1522
1523 BUG_ON(page->private != 0);
1524
1525 ret = -ENOMEM;
1526
1527 user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
1528 page->private = (unsigned long)user_pgd;
1529
1530 if (user_pgd != NULL) {
1531#ifdef CONFIG_X86_VSYSCALL_EMULATION
1532 user_pgd[pgd_index(VSYSCALL_ADDR)] =
1533 __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
1534#endif
1535 ret = 0;
1536 }
1537
1538 BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
1539 }
1540#endif
1541
1542 return ret;
1543}
1544
1545static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
1546{
1547#ifdef CONFIG_X86_64
1548 pgd_t *user_pgd = xen_get_user_pgd(pgd);
1549
1550 if (user_pgd)
1551 free_page((unsigned long)user_pgd);
1552#endif
1553}
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569__visible pte_t xen_make_pte_init(pteval_t pte)
1570{
1571#ifdef CONFIG_X86_64
1572 unsigned long pfn;
1573
1574
1575
1576
1577
1578
1579
1580 pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
1581 if (xen_start_info->mfn_list < __START_KERNEL_map &&
1582 pfn >= xen_start_info->first_p2m_pfn &&
1583 pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
1584 pte &= ~_PAGE_RW;
1585#endif
1586 pte = pte_pfn_to_mfn(pte);
1587 return native_make_pte(pte);
1588}
1589PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
1590
1591static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
1592{
1593#ifdef CONFIG_X86_32
1594
1595 if (pte_mfn(pte) != INVALID_P2M_ENTRY
1596 && pte_val_ma(*ptep) & _PAGE_PRESENT)
1597 pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
1598 pte_val_ma(pte));
1599#endif
1600 native_set_pte(ptep, pte);
1601}
1602
1603
1604
1605static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
1606{
1607#ifdef CONFIG_FLATMEM
1608 BUG_ON(mem_map);
1609#endif
1610 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1611 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1612}
1613
1614
1615static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
1616{
1617#ifdef CONFIG_FLATMEM
1618 BUG_ON(mem_map);
1619#endif
1620 make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
1621}
1622
1623
1624
1625static void __init xen_release_pte_init(unsigned long pfn)
1626{
1627 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1628 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1629}
1630
1631static void __init xen_release_pmd_init(unsigned long pfn)
1632{
1633 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
1634}
1635
1636static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
1637{
1638 struct multicall_space mcs;
1639 struct mmuext_op *op;
1640
1641 mcs = __xen_mc_entry(sizeof(*op));
1642 op = mcs.args;
1643 op->cmd = cmd;
1644 op->arg1.mfn = pfn_to_mfn(pfn);
1645
1646 MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
1647}
1648
1649static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
1650{
1651 struct multicall_space mcs;
1652 unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
1653
1654 mcs = __xen_mc_entry(0);
1655 MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
1656 pfn_pte(pfn, prot), 0);
1657}
1658
1659
1660
1661static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
1662 unsigned level)
1663{
1664 bool pinned = PagePinned(virt_to_page(mm->pgd));
1665
1666 trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
1667
1668 if (pinned) {
1669 struct page *page = pfn_to_page(pfn);
1670
1671 SetPagePinned(page);
1672
1673 if (!PageHighMem(page)) {
1674 xen_mc_batch();
1675
1676 __set_pfn_prot(pfn, PAGE_KERNEL_RO);
1677
1678 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1679 __pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
1680
1681 xen_mc_issue(PARAVIRT_LAZY_MMU);
1682 } else {
1683
1684
1685 kmap_flush_unused();
1686 }
1687 }
1688}
1689
1690static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
1691{
1692 xen_alloc_ptpage(mm, pfn, PT_PTE);
1693}
1694
1695static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
1696{
1697 xen_alloc_ptpage(mm, pfn, PT_PMD);
1698}
1699
1700
1701static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
1702{
1703 struct page *page = pfn_to_page(pfn);
1704 bool pinned = PagePinned(page);
1705
1706 trace_xen_mmu_release_ptpage(pfn, level, pinned);
1707
1708 if (pinned) {
1709 if (!PageHighMem(page)) {
1710 xen_mc_batch();
1711
1712 if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
1713 __pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
1714
1715 __set_pfn_prot(pfn, PAGE_KERNEL);
1716
1717 xen_mc_issue(PARAVIRT_LAZY_MMU);
1718 }
1719 ClearPagePinned(page);
1720 }
1721}
1722
1723static void xen_release_pte(unsigned long pfn)
1724{
1725 xen_release_ptpage(pfn, PT_PTE);
1726}
1727
1728static void xen_release_pmd(unsigned long pfn)
1729{
1730 xen_release_ptpage(pfn, PT_PMD);
1731}
1732
1733#if CONFIG_PGTABLE_LEVELS == 4
1734static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
1735{
1736 xen_alloc_ptpage(mm, pfn, PT_PUD);
1737}
1738
1739static void xen_release_pud(unsigned long pfn)
1740{
1741 xen_release_ptpage(pfn, PT_PUD);
1742}
1743#endif
1744
1745void __init xen_reserve_top(void)
1746{
1747#ifdef CONFIG_X86_32
1748 unsigned long top = HYPERVISOR_VIRT_START;
1749 struct xen_platform_parameters pp;
1750
1751 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
1752 top = pp.virt_start;
1753
1754 reserve_top_address(-top);
1755#endif
1756}
1757
1758
1759
1760
1761
1762static void * __init __ka(phys_addr_t paddr)
1763{
1764#ifdef CONFIG_X86_64
1765 return (void *)(paddr + __START_KERNEL_map);
1766#else
1767 return __va(paddr);
1768#endif
1769}
1770
1771
1772static unsigned long __init m2p(phys_addr_t maddr)
1773{
1774 phys_addr_t paddr;
1775
1776 maddr &= PTE_PFN_MASK;
1777 paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
1778
1779 return paddr;
1780}
1781
1782
1783static void * __init m2v(phys_addr_t maddr)
1784{
1785 return __ka(m2p(maddr));
1786}
1787
1788
1789static void __init set_page_prot_flags(void *addr, pgprot_t prot,
1790 unsigned long flags)
1791{
1792 unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
1793 pte_t pte = pfn_pte(pfn, prot);
1794
1795
1796 if (xen_feature(XENFEAT_auto_translated_physmap))
1797 return;
1798
1799 if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
1800 BUG();
1801}
1802static void __init set_page_prot(void *addr, pgprot_t prot)
1803{
1804 return set_page_prot_flags(addr, prot, UVMF_NONE);
1805}
1806#ifdef CONFIG_X86_32
1807static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
1808{
1809 unsigned pmdidx, pteidx;
1810 unsigned ident_pte;
1811 unsigned long pfn;
1812
1813 level1_ident_pgt = extend_brk(sizeof(pte_t) * LEVEL1_IDENT_ENTRIES,
1814 PAGE_SIZE);
1815
1816 ident_pte = 0;
1817 pfn = 0;
1818 for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
1819 pte_t *pte_page;
1820
1821
1822 if (pmd_present(pmd[pmdidx]))
1823 pte_page = m2v(pmd[pmdidx].pmd);
1824 else {
1825
1826 if (ident_pte == LEVEL1_IDENT_ENTRIES)
1827 break;
1828
1829 pte_page = &level1_ident_pgt[ident_pte];
1830 ident_pte += PTRS_PER_PTE;
1831
1832 pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
1833 }
1834
1835
1836 for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
1837 pte_t pte;
1838
1839 if (pfn > max_pfn_mapped)
1840 max_pfn_mapped = pfn;
1841
1842 if (!pte_none(pte_page[pteidx]))
1843 continue;
1844
1845 pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
1846 pte_page[pteidx] = pte;
1847 }
1848 }
1849
1850 for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
1851 set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
1852
1853 set_page_prot(pmd, PAGE_KERNEL_RO);
1854}
1855#endif
1856void __init xen_setup_machphys_mapping(void)
1857{
1858 struct xen_machphys_mapping mapping;
1859
1860 if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
1861 machine_to_phys_mapping = (unsigned long *)mapping.v_start;
1862 machine_to_phys_nr = mapping.max_mfn + 1;
1863 } else {
1864 machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
1865 }
1866#ifdef CONFIG_X86_32
1867 WARN_ON((machine_to_phys_mapping + (machine_to_phys_nr - 1))
1868 < machine_to_phys_mapping);
1869#endif
1870}
1871
1872#ifdef CONFIG_X86_64
1873static void __init convert_pfn_mfn(void *v)
1874{
1875 pte_t *pte = v;
1876 int i;
1877
1878
1879
1880 for (i = 0; i < PTRS_PER_PTE; i++)
1881 pte[i] = xen_make_pte(pte[i].pte);
1882}
1883static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
1884 unsigned long addr)
1885{
1886 if (*pt_base == PFN_DOWN(__pa(addr))) {
1887 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1888 clear_page((void *)addr);
1889 (*pt_base)++;
1890 }
1891 if (*pt_end == PFN_DOWN(__pa(addr))) {
1892 set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
1893 clear_page((void *)addr);
1894 (*pt_end)--;
1895 }
1896}
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
1909{
1910 pud_t *l3;
1911 pmd_t *l2;
1912 unsigned long addr[3];
1913 unsigned long pt_base, pt_end;
1914 unsigned i;
1915
1916
1917
1918
1919
1920 if (xen_start_info->mfn_list < __START_KERNEL_map)
1921 max_pfn_mapped = xen_start_info->first_p2m_pfn;
1922 else
1923 max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
1924
1925 pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
1926 pt_end = pt_base + xen_start_info->nr_pt_frames;
1927
1928
1929 init_level4_pgt[0] = __pgd(0);
1930
1931 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1932
1933
1934
1935 convert_pfn_mfn(init_level4_pgt);
1936
1937
1938 convert_pfn_mfn(level3_ident_pgt);
1939
1940
1941 convert_pfn_mfn(level3_kernel_pgt);
1942
1943
1944 convert_pfn_mfn(level2_fixmap_pgt);
1945 }
1946
1947 l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
1948 l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
1949
1950 addr[0] = (unsigned long)pgd;
1951 addr[1] = (unsigned long)l3;
1952 addr[2] = (unsigned long)l2;
1953
1954
1955
1956
1957
1958
1959 copy_page(level2_ident_pgt, l2);
1960
1961 copy_page(level2_kernel_pgt, l2);
1962
1963
1964 i = pgd_index(xen_start_info->mfn_list);
1965 if (i && i < pgd_index(__START_KERNEL_map))
1966 init_level4_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
1967
1968 if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1969
1970 set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
1971 set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
1972 set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
1973 set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
1974 set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
1975 set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
1976 set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
1977 set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
1978
1979
1980 pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
1981 PFN_DOWN(__pa_symbol(init_level4_pgt)));
1982
1983
1984 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
1985
1986
1987
1988
1989
1990
1991 xen_mc_batch();
1992 __xen_write_cr3(true, __pa(init_level4_pgt));
1993 xen_mc_issue(PARAVIRT_LAZY_CPU);
1994 } else
1995 native_write_cr3(__pa(init_level4_pgt));
1996
1997
1998
1999
2000
2001
2002
2003 for (i = 0; i < ARRAY_SIZE(addr); i++)
2004 check_pt_base(&pt_base, &pt_end, addr[i]);
2005
2006
2007 xen_pt_base = PFN_PHYS(pt_base);
2008 xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
2009 memblock_reserve(xen_pt_base, xen_pt_size);
2010
2011
2012 xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
2013}
2014
2015
2016
2017
2018static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
2019{
2020 unsigned long *vaddr;
2021 unsigned long val;
2022
2023 vaddr = early_memremap_ro(addr, sizeof(val));
2024 val = *vaddr;
2025 early_memunmap(vaddr, sizeof(val));
2026 return val;
2027}
2028
2029
2030
2031
2032
2033static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
2034{
2035 phys_addr_t pa;
2036 pgd_t pgd;
2037 pud_t pud;
2038 pmd_t pmd;
2039 pte_t pte;
2040
2041 pa = read_cr3();
2042 pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
2043 sizeof(pgd)));
2044 if (!pgd_present(pgd))
2045 return 0;
2046
2047 pa = pgd_val(pgd) & PTE_PFN_MASK;
2048 pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
2049 sizeof(pud)));
2050 if (!pud_present(pud))
2051 return 0;
2052 pa = pud_pfn(pud) << PAGE_SHIFT;
2053 if (pud_large(pud))
2054 return pa + (vaddr & ~PUD_MASK);
2055
2056 pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
2057 sizeof(pmd)));
2058 if (!pmd_present(pmd))
2059 return 0;
2060 pa = pmd_pfn(pmd) << PAGE_SHIFT;
2061 if (pmd_large(pmd))
2062 return pa + (vaddr & ~PMD_MASK);
2063
2064 pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
2065 sizeof(pte)));
2066 if (!pte_present(pte))
2067 return 0;
2068 pa = pte_pfn(pte) << PAGE_SHIFT;
2069
2070 return pa | (vaddr & ~PAGE_MASK);
2071}
2072
2073
2074
2075
2076
2077void __init xen_relocate_p2m(void)
2078{
2079 phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
2080 unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
2081 int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
2082 pte_t *pt;
2083 pmd_t *pmd;
2084 pud_t *pud;
2085 pgd_t *pgd;
2086 unsigned long *new_p2m;
2087
2088 size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
2089 n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
2090 n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
2091 n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
2092 n_pud = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
2093 n_frames = n_pte + n_pt + n_pmd + n_pud;
2094
2095 new_area = xen_find_free_area(PFN_PHYS(n_frames));
2096 if (!new_area) {
2097 xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
2098 BUG();
2099 }
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109 pud_phys = new_area;
2110 pmd_phys = pud_phys + PFN_PHYS(n_pud);
2111 pt_phys = pmd_phys + PFN_PHYS(n_pmd);
2112 p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
2113
2114 pgd = __va(read_cr3());
2115 new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
2116 for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
2117 pud = early_memremap(pud_phys, PAGE_SIZE);
2118 clear_page(pud);
2119 for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
2120 idx_pmd++) {
2121 pmd = early_memremap(pmd_phys, PAGE_SIZE);
2122 clear_page(pmd);
2123 for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
2124 idx_pt++) {
2125 pt = early_memremap(pt_phys, PAGE_SIZE);
2126 clear_page(pt);
2127 for (idx_pte = 0;
2128 idx_pte < min(n_pte, PTRS_PER_PTE);
2129 idx_pte++) {
2130 set_pte(pt + idx_pte,
2131 pfn_pte(p2m_pfn, PAGE_KERNEL));
2132 p2m_pfn++;
2133 }
2134 n_pte -= PTRS_PER_PTE;
2135 early_memunmap(pt, PAGE_SIZE);
2136 make_lowmem_page_readonly(__va(pt_phys));
2137 pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
2138 PFN_DOWN(pt_phys));
2139 set_pmd(pmd + idx_pt,
2140 __pmd(_PAGE_TABLE | pt_phys));
2141 pt_phys += PAGE_SIZE;
2142 }
2143 n_pt -= PTRS_PER_PMD;
2144 early_memunmap(pmd, PAGE_SIZE);
2145 make_lowmem_page_readonly(__va(pmd_phys));
2146 pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
2147 PFN_DOWN(pmd_phys));
2148 set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
2149 pmd_phys += PAGE_SIZE;
2150 }
2151 n_pmd -= PTRS_PER_PUD;
2152 early_memunmap(pud, PAGE_SIZE);
2153 make_lowmem_page_readonly(__va(pud_phys));
2154 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
2155 set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
2156 pud_phys += PAGE_SIZE;
2157 }
2158
2159
2160 memcpy(new_p2m, xen_p2m_addr, size);
2161 xen_p2m_addr = new_p2m;
2162
2163
2164 p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
2165 BUG_ON(!p2m_pfn);
2166 p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
2167
2168 if (xen_start_info->mfn_list < __START_KERNEL_map) {
2169 pfn = xen_start_info->first_p2m_pfn;
2170 pfn_end = xen_start_info->first_p2m_pfn +
2171 xen_start_info->nr_p2m_frames;
2172 set_pgd(pgd + 1, __pgd(0));
2173 } else {
2174 pfn = p2m_pfn;
2175 pfn_end = p2m_pfn_end;
2176 }
2177
2178 memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
2179 while (pfn < pfn_end) {
2180 if (pfn == p2m_pfn) {
2181 pfn = p2m_pfn_end;
2182 continue;
2183 }
2184 make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
2185 pfn++;
2186 }
2187
2188 xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
2189 xen_start_info->first_p2m_pfn = PFN_DOWN(new_area);
2190 xen_start_info->nr_p2m_frames = n_frames;
2191}
2192
2193#else
2194static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
2195static RESERVE_BRK_ARRAY(pmd_t, swapper_kernel_pmd, PTRS_PER_PMD);
2196
2197static void __init xen_write_cr3_init(unsigned long cr3)
2198{
2199 unsigned long pfn = PFN_DOWN(__pa(swapper_pg_dir));
2200
2201 BUG_ON(read_cr3() != __pa(initial_page_table));
2202 BUG_ON(cr3 != __pa(swapper_pg_dir));
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214 swapper_kernel_pmd =
2215 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2216 copy_page(swapper_kernel_pmd, initial_kernel_pmd);
2217 swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
2218 __pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
2219 set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
2220
2221 set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
2222 xen_write_cr3(cr3);
2223 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, pfn);
2224
2225 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
2226 PFN_DOWN(__pa(initial_page_table)));
2227 set_page_prot(initial_page_table, PAGE_KERNEL);
2228 set_page_prot(initial_kernel_pmd, PAGE_KERNEL);
2229
2230 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2231}
2232
2233
2234
2235
2236
2237
2238static phys_addr_t xen_find_pt_base(pmd_t *pmd)
2239{
2240 phys_addr_t pt_base, paddr;
2241 unsigned pmdidx;
2242
2243 pt_base = min(__pa(xen_start_info->pt_base), __pa(pmd));
2244
2245 for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++)
2246 if (pmd_present(pmd[pmdidx]) && !pmd_large(pmd[pmdidx])) {
2247 paddr = m2p(pmd[pmdidx].pmd);
2248 pt_base = min(pt_base, paddr);
2249 }
2250
2251 return pt_base;
2252}
2253
2254void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
2255{
2256 pmd_t *kernel_pmd;
2257
2258 kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
2259
2260 xen_pt_base = xen_find_pt_base(kernel_pmd);
2261 xen_pt_size = xen_start_info->nr_pt_frames * PAGE_SIZE;
2262
2263 initial_kernel_pmd =
2264 extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
2265
2266 max_pfn_mapped = PFN_DOWN(xen_pt_base + xen_pt_size + 512 * 1024);
2267
2268 copy_page(initial_kernel_pmd, kernel_pmd);
2269
2270 xen_map_identity_early(initial_kernel_pmd, max_pfn);
2271
2272 copy_page(initial_page_table, pgd);
2273 initial_page_table[KERNEL_PGD_BOUNDARY] =
2274 __pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
2275
2276 set_page_prot(initial_kernel_pmd, PAGE_KERNEL_RO);
2277 set_page_prot(initial_page_table, PAGE_KERNEL_RO);
2278 set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
2279
2280 pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
2281
2282 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE,
2283 PFN_DOWN(__pa(initial_page_table)));
2284 xen_write_cr3(__pa(initial_page_table));
2285
2286 memblock_reserve(xen_pt_base, xen_pt_size);
2287}
2288#endif
2289
2290void __init xen_reserve_special_pages(void)
2291{
2292 phys_addr_t paddr;
2293
2294 memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
2295 if (xen_start_info->store_mfn) {
2296 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
2297 memblock_reserve(paddr, PAGE_SIZE);
2298 }
2299 if (!xen_initial_domain()) {
2300 paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
2301 memblock_reserve(paddr, PAGE_SIZE);
2302 }
2303}
2304
2305void __init xen_pt_check_e820(void)
2306{
2307 if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
2308 xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
2309 BUG();
2310 }
2311}
2312
2313static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
2314
2315static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
2316{
2317 pte_t pte;
2318
2319 phys >>= PAGE_SHIFT;
2320
2321 switch (idx) {
2322 case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
2323 case FIX_RO_IDT:
2324#ifdef CONFIG_X86_32
2325 case FIX_WP_TEST:
2326# ifdef CONFIG_HIGHMEM
2327 case FIX_KMAP_BEGIN ... FIX_KMAP_END:
2328# endif
2329#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
2330 case VSYSCALL_PAGE:
2331#endif
2332 case FIX_TEXT_POKE0:
2333 case FIX_TEXT_POKE1:
2334
2335 pte = pfn_pte(phys, prot);
2336 break;
2337
2338#ifdef CONFIG_X86_LOCAL_APIC
2339 case FIX_APIC_BASE:
2340 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2341 break;
2342#endif
2343
2344#ifdef CONFIG_X86_IO_APIC
2345 case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
2346
2347
2348
2349
2350 pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
2351 break;
2352#endif
2353
2354 case FIX_PARAVIRT_BOOTMAP:
2355
2356
2357 pte = mfn_pte(phys, prot);
2358 break;
2359
2360 default:
2361
2362 pte = mfn_pte(phys, prot);
2363 break;
2364 }
2365
2366 __native_set_fixmap(idx, pte);
2367
2368#ifdef CONFIG_X86_VSYSCALL_EMULATION
2369
2370
2371 if (idx == VSYSCALL_PAGE) {
2372 unsigned long vaddr = __fix_to_virt(idx);
2373 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
2374 }
2375#endif
2376}
2377
2378static void __init xen_post_allocator_init(void)
2379{
2380 if (xen_feature(XENFEAT_auto_translated_physmap))
2381 return;
2382
2383 pv_mmu_ops.set_pte = xen_set_pte;
2384 pv_mmu_ops.set_pmd = xen_set_pmd;
2385 pv_mmu_ops.set_pud = xen_set_pud;
2386#if CONFIG_PGTABLE_LEVELS == 4
2387 pv_mmu_ops.set_pgd = xen_set_pgd;
2388#endif
2389
2390
2391
2392 pv_mmu_ops.alloc_pte = xen_alloc_pte;
2393 pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
2394 pv_mmu_ops.release_pte = xen_release_pte;
2395 pv_mmu_ops.release_pmd = xen_release_pmd;
2396#if CONFIG_PGTABLE_LEVELS == 4
2397 pv_mmu_ops.alloc_pud = xen_alloc_pud;
2398 pv_mmu_ops.release_pud = xen_release_pud;
2399#endif
2400 pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte);
2401
2402#ifdef CONFIG_X86_64
2403 pv_mmu_ops.write_cr3 = &xen_write_cr3;
2404 SetPagePinned(virt_to_page(level3_user_vsyscall));
2405#endif
2406 xen_mark_init_mm_pinned();
2407}
2408
2409static void xen_leave_lazy_mmu(void)
2410{
2411 preempt_disable();
2412 xen_mc_flush();
2413 paravirt_leave_lazy_mmu();
2414 preempt_enable();
2415}
2416
2417static const struct pv_mmu_ops xen_mmu_ops __initconst = {
2418 .read_cr2 = xen_read_cr2,
2419 .write_cr2 = xen_write_cr2,
2420
2421 .read_cr3 = xen_read_cr3,
2422 .write_cr3 = xen_write_cr3_init,
2423
2424 .flush_tlb_user = xen_flush_tlb,
2425 .flush_tlb_kernel = xen_flush_tlb,
2426 .flush_tlb_single = xen_flush_tlb_single,
2427 .flush_tlb_others = xen_flush_tlb_others,
2428
2429 .pte_update = paravirt_nop,
2430
2431 .pgd_alloc = xen_pgd_alloc,
2432 .pgd_free = xen_pgd_free,
2433
2434 .alloc_pte = xen_alloc_pte_init,
2435 .release_pte = xen_release_pte_init,
2436 .alloc_pmd = xen_alloc_pmd_init,
2437 .release_pmd = xen_release_pmd_init,
2438
2439 .set_pte = xen_set_pte_init,
2440 .set_pte_at = xen_set_pte_at,
2441 .set_pmd = xen_set_pmd_hyper,
2442
2443 .ptep_modify_prot_start = __ptep_modify_prot_start,
2444 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
2445
2446 .pte_val = PV_CALLEE_SAVE(xen_pte_val),
2447 .pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
2448
2449 .make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
2450 .make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
2451
2452#ifdef CONFIG_X86_PAE
2453 .set_pte_atomic = xen_set_pte_atomic,
2454 .pte_clear = xen_pte_clear,
2455 .pmd_clear = xen_pmd_clear,
2456#endif
2457 .set_pud = xen_set_pud_hyper,
2458
2459 .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
2460 .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
2461
2462#if CONFIG_PGTABLE_LEVELS == 4
2463 .pud_val = PV_CALLEE_SAVE(xen_pud_val),
2464 .make_pud = PV_CALLEE_SAVE(xen_make_pud),
2465 .set_pgd = xen_set_pgd_hyper,
2466
2467 .alloc_pud = xen_alloc_pmd_init,
2468 .release_pud = xen_release_pmd_init,
2469#endif
2470
2471 .activate_mm = xen_activate_mm,
2472 .dup_mmap = xen_dup_mmap,
2473 .exit_mmap = xen_exit_mmap,
2474
2475 .lazy_mode = {
2476 .enter = paravirt_enter_lazy_mmu,
2477 .leave = xen_leave_lazy_mmu,
2478 .flush = paravirt_flush_lazy_mmu,
2479 },
2480
2481 .set_fixmap = xen_set_fixmap,
2482};
2483
2484void __init xen_init_mmu_ops(void)
2485{
2486 x86_init.paging.pagetable_init = xen_pagetable_init;
2487
2488 if (xen_feature(XENFEAT_auto_translated_physmap))
2489 return;
2490
2491 pv_mmu_ops = xen_mmu_ops;
2492
2493 memset(dummy_mapping, 0xff, PAGE_SIZE);
2494}
2495
2496
2497#define MAX_CONTIG_ORDER 9
2498static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
2499
2500#define VOID_PTE (mfn_pte(0, __pgprot(0)))
2501static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
2502 unsigned long *in_frames,
2503 unsigned long *out_frames)
2504{
2505 int i;
2506 struct multicall_space mcs;
2507
2508 xen_mc_batch();
2509 for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
2510 mcs = __xen_mc_entry(0);
2511
2512 if (in_frames)
2513 in_frames[i] = virt_to_mfn(vaddr);
2514
2515 MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
2516 __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
2517
2518 if (out_frames)
2519 out_frames[i] = virt_to_pfn(vaddr);
2520 }
2521 xen_mc_issue(0);
2522}
2523
2524
2525
2526
2527
2528
2529static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
2530 unsigned long *mfns,
2531 unsigned long first_mfn)
2532{
2533 unsigned i, limit;
2534 unsigned long mfn;
2535
2536 xen_mc_batch();
2537
2538 limit = 1u << order;
2539 for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
2540 struct multicall_space mcs;
2541 unsigned flags;
2542
2543 mcs = __xen_mc_entry(0);
2544 if (mfns)
2545 mfn = mfns[i];
2546 else
2547 mfn = first_mfn + i;
2548
2549 if (i < (limit - 1))
2550 flags = 0;
2551 else {
2552 if (order == 0)
2553 flags = UVMF_INVLPG | UVMF_ALL;
2554 else
2555 flags = UVMF_TLB_FLUSH | UVMF_ALL;
2556 }
2557
2558 MULTI_update_va_mapping(mcs.mc, vaddr,
2559 mfn_pte(mfn, PAGE_KERNEL), flags);
2560
2561 set_phys_to_machine(virt_to_pfn(vaddr), mfn);
2562 }
2563
2564 xen_mc_issue(0);
2565}
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
2576 unsigned long *pfns_in,
2577 unsigned long extents_out,
2578 unsigned int order_out,
2579 unsigned long *mfns_out,
2580 unsigned int address_bits)
2581{
2582 long rc;
2583 int success;
2584
2585 struct xen_memory_exchange exchange = {
2586 .in = {
2587 .nr_extents = extents_in,
2588 .extent_order = order_in,
2589 .extent_start = pfns_in,
2590 .domid = DOMID_SELF
2591 },
2592 .out = {
2593 .nr_extents = extents_out,
2594 .extent_order = order_out,
2595 .extent_start = mfns_out,
2596 .address_bits = address_bits,
2597 .domid = DOMID_SELF
2598 }
2599 };
2600
2601 BUG_ON(extents_in << order_in != extents_out << order_out);
2602
2603 rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
2604 success = (exchange.nr_exchanged == extents_in);
2605
2606 BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
2607 BUG_ON(success && (rc != 0));
2608
2609 return success;
2610}
2611
2612int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
2613 unsigned int address_bits,
2614 dma_addr_t *dma_handle)
2615{
2616 unsigned long *in_frames = discontig_frames, out_frame;
2617 unsigned long flags;
2618 int success;
2619 unsigned long vstart = (unsigned long)phys_to_virt(pstart);
2620
2621
2622
2623
2624
2625
2626
2627 if (xen_feature(XENFEAT_auto_translated_physmap))
2628 return 0;
2629
2630 if (unlikely(order > MAX_CONTIG_ORDER))
2631 return -ENOMEM;
2632
2633 memset((void *) vstart, 0, PAGE_SIZE << order);
2634
2635 spin_lock_irqsave(&xen_reservation_lock, flags);
2636
2637
2638 xen_zap_pfn_range(vstart, order, in_frames, NULL);
2639
2640
2641 out_frame = virt_to_pfn(vstart);
2642 success = xen_exchange_memory(1UL << order, 0, in_frames,
2643 1, order, &out_frame,
2644 address_bits);
2645
2646
2647 if (success)
2648 xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
2649 else
2650 xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
2651
2652 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2653
2654 *dma_handle = virt_to_machine(vstart).maddr;
2655 return success ? 0 : -ENOMEM;
2656}
2657EXPORT_SYMBOL_GPL(xen_create_contiguous_region);
2658
2659void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
2660{
2661 unsigned long *out_frames = discontig_frames, in_frame;
2662 unsigned long flags;
2663 int success;
2664 unsigned long vstart;
2665
2666 if (xen_feature(XENFEAT_auto_translated_physmap))
2667 return;
2668
2669 if (unlikely(order > MAX_CONTIG_ORDER))
2670 return;
2671
2672 vstart = (unsigned long)phys_to_virt(pstart);
2673 memset((void *) vstart, 0, PAGE_SIZE << order);
2674
2675 spin_lock_irqsave(&xen_reservation_lock, flags);
2676
2677
2678 in_frame = virt_to_mfn(vstart);
2679
2680
2681 xen_zap_pfn_range(vstart, order, NULL, out_frames);
2682
2683
2684 success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
2685 0, out_frames, 0);
2686
2687
2688 if (success)
2689 xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
2690 else
2691 xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
2692
2693 spin_unlock_irqrestore(&xen_reservation_lock, flags);
2694}
2695EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
2696
2697#ifdef CONFIG_XEN_PVHVM
2698#ifdef CONFIG_PROC_VMCORE
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709static int xen_oldmem_pfn_is_ram(unsigned long pfn)
2710{
2711 struct xen_hvm_get_mem_type a = {
2712 .domid = DOMID_SELF,
2713 .pfn = pfn,
2714 };
2715 int ram;
2716
2717 if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
2718 return -ENXIO;
2719
2720 switch (a.mem_type) {
2721 case HVMMEM_mmio_dm:
2722 ram = 0;
2723 break;
2724 case HVMMEM_ram_rw:
2725 case HVMMEM_ram_ro:
2726 default:
2727 ram = 1;
2728 break;
2729 }
2730
2731 return ram;
2732}
2733#endif
2734
2735static void xen_hvm_exit_mmap(struct mm_struct *mm)
2736{
2737 struct xen_hvm_pagetable_dying a;
2738 int rc;
2739
2740 a.domid = DOMID_SELF;
2741 a.gpa = __pa(mm->pgd);
2742 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2743 WARN_ON_ONCE(rc < 0);
2744}
2745
2746static int is_pagetable_dying_supported(void)
2747{
2748 struct xen_hvm_pagetable_dying a;
2749 int rc = 0;
2750
2751 a.domid = DOMID_SELF;
2752 a.gpa = 0x00;
2753 rc = HYPERVISOR_hvm_op(HVMOP_pagetable_dying, &a);
2754 if (rc < 0) {
2755 printk(KERN_DEBUG "HVMOP_pagetable_dying not supported\n");
2756 return 0;
2757 }
2758 return 1;
2759}
2760
2761void __init xen_hvm_init_mmu_ops(void)
2762{
2763 if (is_pagetable_dying_supported())
2764 pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
2765#ifdef CONFIG_PROC_VMCORE
2766 register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
2767#endif
2768}
2769#endif
2770
2771#define REMAP_BATCH_SIZE 16
2772
2773struct remap_data {
2774 xen_pfn_t *mfn;
2775 bool contiguous;
2776 pgprot_t prot;
2777 struct mmu_update *mmu_update;
2778};
2779
2780static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
2781 unsigned long addr, void *data)
2782{
2783 struct remap_data *rmd = data;
2784 pte_t pte = pte_mkspecial(mfn_pte(*rmd->mfn, rmd->prot));
2785
2786
2787
2788 if (rmd->contiguous)
2789 (*rmd->mfn)++;
2790 else
2791 rmd->mfn++;
2792
2793 rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
2794 rmd->mmu_update->val = pte_val_ma(pte);
2795 rmd->mmu_update++;
2796
2797 return 0;
2798}
2799
2800static int do_remap_gfn(struct vm_area_struct *vma,
2801 unsigned long addr,
2802 xen_pfn_t *gfn, int nr,
2803 int *err_ptr, pgprot_t prot,
2804 unsigned domid,
2805 struct page **pages)
2806{
2807 int err = 0;
2808 struct remap_data rmd;
2809 struct mmu_update mmu_update[REMAP_BATCH_SIZE];
2810 unsigned long range;
2811 int mapped = 0;
2812
2813 BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
2814
2815 if (xen_feature(XENFEAT_auto_translated_physmap)) {
2816#ifdef CONFIG_XEN_PVH
2817
2818 return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr,
2819 prot, domid, pages);
2820#else
2821 return -EINVAL;
2822#endif
2823 }
2824
2825 rmd.mfn = gfn;
2826 rmd.prot = prot;
2827
2828
2829 rmd.contiguous = !err_ptr;
2830
2831 while (nr) {
2832 int index = 0;
2833 int done = 0;
2834 int batch = min(REMAP_BATCH_SIZE, nr);
2835 int batch_left = batch;
2836 range = (unsigned long)batch << PAGE_SHIFT;
2837
2838 rmd.mmu_update = mmu_update;
2839 err = apply_to_page_range(vma->vm_mm, addr, range,
2840 remap_area_mfn_pte_fn, &rmd);
2841 if (err)
2842 goto out;
2843
2844
2845
2846 do {
2847 int i;
2848
2849 err = HYPERVISOR_mmu_update(&mmu_update[index],
2850 batch_left, &done, domid);
2851
2852
2853
2854
2855
2856
2857 if (err_ptr) {
2858 for (i = index; i < index + done; i++)
2859 err_ptr[i] = 0;
2860 }
2861 if (err < 0) {
2862 if (!err_ptr)
2863 goto out;
2864 err_ptr[i] = err;
2865 done++;
2866 } else
2867 mapped += done;
2868 batch_left -= done;
2869 index += done;
2870 } while (batch_left);
2871
2872 nr -= batch;
2873 addr += range;
2874 if (err_ptr)
2875 err_ptr += batch;
2876 cond_resched();
2877 }
2878out:
2879
2880 xen_flush_tlb_all();
2881
2882 return err < 0 ? err : mapped;
2883}
2884
2885int xen_remap_domain_gfn_range(struct vm_area_struct *vma,
2886 unsigned long addr,
2887 xen_pfn_t gfn, int nr,
2888 pgprot_t prot, unsigned domid,
2889 struct page **pages)
2890{
2891 return do_remap_gfn(vma, addr, &gfn, nr, NULL, prot, domid, pages);
2892}
2893EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_range);
2894
2895int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
2896 unsigned long addr,
2897 xen_pfn_t *gfn, int nr,
2898 int *err_ptr, pgprot_t prot,
2899 unsigned domid, struct page **pages)
2900{
2901
2902
2903
2904
2905 BUG_ON(err_ptr == NULL);
2906 return do_remap_gfn(vma, addr, gfn, nr, err_ptr, prot, domid, pages);
2907}
2908EXPORT_SYMBOL_GPL(xen_remap_domain_gfn_array);
2909
2910
2911
2912int xen_unmap_domain_gfn_range(struct vm_area_struct *vma,
2913 int numpgs, struct page **pages)
2914{
2915 if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
2916 return 0;
2917
2918#ifdef CONFIG_XEN_PVH
2919 return xen_xlate_unmap_gfn_range(vma, numpgs, pages);
2920#else
2921 return -EINVAL;
2922#endif
2923}
2924EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range);
2925