1
2
3
4
5
6
7
8
9
10
11
12
13
14
15#include <linux/cpu.h>
16#include <linux/kernel.h>
17#include <linux/init.h>
18#include <linux/smp.h>
19#include <linux/preempt.h>
20#include <linux/hardirq.h>
21#include <linux/percpu.h>
22#include <linux/delay.h>
23#include <linux/start_kernel.h>
24#include <linux/sched.h>
25#include <linux/kprobes.h>
26#include <linux/memblock.h>
27#include <linux/export.h>
28#include <linux/mm.h>
29#include <linux/page-flags.h>
30#include <linux/highmem.h>
31#include <linux/console.h>
32#include <linux/pci.h>
33#include <linux/gfp.h>
34#include <linux/edd.h>
35#include <linux/frame.h>
36
37#include <xen/xen.h>
38#include <xen/events.h>
39#include <xen/interface/xen.h>
40#include <xen/interface/version.h>
41#include <xen/interface/physdev.h>
42#include <xen/interface/vcpu.h>
43#include <xen/interface/memory.h>
44#include <xen/interface/nmi.h>
45#include <xen/interface/xen-mca.h>
46#include <xen/features.h>
47#include <xen/page.h>
48#include <xen/hvc-console.h>
49#include <xen/acpi.h>
50
51#include <asm/paravirt.h>
52#include <asm/apic.h>
53#include <asm/page.h>
54#include <asm/xen/pci.h>
55#include <asm/xen/hypercall.h>
56#include <asm/xen/hypervisor.h>
57#include <asm/xen/cpuid.h>
58#include <asm/fixmap.h>
59#include <asm/processor.h>
60#include <asm/proto.h>
61#include <asm/msr-index.h>
62#include <asm/traps.h>
63#include <asm/setup.h>
64#include <asm/desc.h>
65#include <asm/pgalloc.h>
66#include <asm/pgtable.h>
67#include <asm/tlbflush.h>
68#include <asm/reboot.h>
69#include <asm/stackprotector.h>
70#include <asm/hypervisor.h>
71#include <asm/mach_traps.h>
72#include <asm/mwait.h>
73#include <asm/pci_x86.h>
74#include <asm/cpu.h>
75#ifdef CONFIG_X86_IOPL_IOPERM
76#include <asm/io_bitmap.h>
77#endif
78
79#ifdef CONFIG_ACPI
80#include <linux/acpi.h>
81#include <asm/acpi.h>
82#include <acpi/pdc_intel.h>
83#include <acpi/processor.h>
84#include <xen/interface/platform.h>
85#endif
86
87#include "xen-ops.h"
88#include "mmu.h"
89#include "smp.h"
90#include "multicalls.h"
91#include "pmu.h"
92
93#include "../kernel/cpu/cpu.h"
94
95void *xen_initial_gdt;
96
97static int xen_cpu_up_prepare_pv(unsigned int cpu);
98static int xen_cpu_dead_pv(unsigned int cpu);
99
100struct tls_descs {
101 struct desc_struct desc[3];
102};
103
104
105
106
107
108
109
110
111static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
112
113static void __init xen_banner(void)
114{
115 unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
116 struct xen_extraversion extra;
117 HYPERVISOR_xen_version(XENVER_extraversion, &extra);
118
119 pr_info("Booting paravirtualized kernel on %s\n", pv_info.name);
120 printk(KERN_INFO "Xen version: %d.%d%s%s\n",
121 version >> 16, version & 0xffff, extra.extraversion,
122 xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
123
124#ifdef CONFIG_X86_32
125 pr_warn("WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n"
126 "Support for running as 32-bit PV-guest under Xen will soon be removed\n"
127 "from the Linux kernel!\n"
128 "Please use either a 64-bit kernel or switch to HVM or PVH mode!\n"
129 "WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n");
130#endif
131}
132
133static void __init xen_pv_init_platform(void)
134{
135 populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
136
137 set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
138 HYPERVISOR_shared_info = (void *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
139
140
141 xen_vcpu_info_reset(0);
142
143
144 xen_init_time_ops();
145}
146
147static void __init xen_pv_guest_late_init(void)
148{
149#ifndef CONFIG_SMP
150
151 xen_setup_vcpu_info_placement();
152#endif
153}
154
155
156bool
157xen_running_on_version_or_later(unsigned int major, unsigned int minor)
158{
159 unsigned int version;
160
161 if (!xen_domain())
162 return false;
163
164 version = HYPERVISOR_xen_version(XENVER_version, NULL);
165 if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
166 ((version >> 16) > major))
167 return true;
168 return false;
169}
170
171static __read_mostly unsigned int cpuid_leaf5_ecx_val;
172static __read_mostly unsigned int cpuid_leaf5_edx_val;
173
174static void xen_cpuid(unsigned int *ax, unsigned int *bx,
175 unsigned int *cx, unsigned int *dx)
176{
177 unsigned maskebx = ~0;
178
179
180
181
182
183 switch (*ax) {
184 case CPUID_MWAIT_LEAF:
185
186 *ax = 0;
187 *bx = 0;
188 *cx = cpuid_leaf5_ecx_val;
189 *dx = cpuid_leaf5_edx_val;
190 return;
191
192 case 0xb:
193
194 maskebx = 0;
195 break;
196 }
197
198 asm(XEN_EMULATE_PREFIX "cpuid"
199 : "=a" (*ax),
200 "=b" (*bx),
201 "=c" (*cx),
202 "=d" (*dx)
203 : "0" (*ax), "2" (*cx));
204
205 *bx &= maskebx;
206}
207STACK_FRAME_NON_STANDARD(xen_cpuid);
208
209static bool __init xen_check_mwait(void)
210{
211#ifdef CONFIG_ACPI
212 struct xen_platform_op op = {
213 .cmd = XENPF_set_processor_pminfo,
214 .u.set_pminfo.id = -1,
215 .u.set_pminfo.type = XEN_PM_PDC,
216 };
217 uint32_t buf[3];
218 unsigned int ax, bx, cx, dx;
219 unsigned int mwait_mask;
220
221
222
223
224
225
226
227
228
229 if (!xen_initial_domain())
230 return false;
231
232
233
234
235
236 if (!xen_running_on_version_or_later(4, 2))
237 return false;
238
239 ax = 1;
240 cx = 0;
241
242 native_cpuid(&ax, &bx, &cx, &dx);
243
244 mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
245 (1 << (X86_FEATURE_MWAIT % 32));
246
247 if ((cx & mwait_mask) != mwait_mask)
248 return false;
249
250
251
252
253
254 ax = CPUID_MWAIT_LEAF;
255 bx = 0;
256 cx = 0;
257 dx = 0;
258
259 native_cpuid(&ax, &bx, &cx, &dx);
260
261
262
263
264 buf[0] = ACPI_PDC_REVISION_ID;
265 buf[1] = 1;
266 buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
267
268 set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
269
270 if ((HYPERVISOR_platform_op(&op) == 0) &&
271 (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
272 cpuid_leaf5_ecx_val = cx;
273 cpuid_leaf5_edx_val = dx;
274 }
275 return true;
276#else
277 return false;
278#endif
279}
280
281static bool __init xen_check_xsave(void)
282{
283 unsigned int cx, xsave_mask;
284
285 cx = cpuid_ecx(1);
286
287 xsave_mask = (1 << (X86_FEATURE_XSAVE % 32)) |
288 (1 << (X86_FEATURE_OSXSAVE % 32));
289
290
291 return (cx & xsave_mask) == xsave_mask;
292}
293
294static void __init xen_init_capabilities(void)
295{
296 setup_force_cpu_cap(X86_FEATURE_XENPV);
297 setup_clear_cpu_cap(X86_FEATURE_DCA);
298 setup_clear_cpu_cap(X86_FEATURE_APERFMPERF);
299 setup_clear_cpu_cap(X86_FEATURE_MTRR);
300 setup_clear_cpu_cap(X86_FEATURE_ACC);
301 setup_clear_cpu_cap(X86_FEATURE_X2APIC);
302 setup_clear_cpu_cap(X86_FEATURE_SME);
303
304
305
306
307
308 setup_clear_cpu_cap(X86_FEATURE_PCID);
309
310 if (!xen_initial_domain())
311 setup_clear_cpu_cap(X86_FEATURE_ACPI);
312
313 if (xen_check_mwait())
314 setup_force_cpu_cap(X86_FEATURE_MWAIT);
315 else
316 setup_clear_cpu_cap(X86_FEATURE_MWAIT);
317
318 if (!xen_check_xsave()) {
319 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
320 setup_clear_cpu_cap(X86_FEATURE_OSXSAVE);
321 }
322}
323
324static void xen_set_debugreg(int reg, unsigned long val)
325{
326 HYPERVISOR_set_debugreg(reg, val);
327}
328
329static unsigned long xen_get_debugreg(int reg)
330{
331 return HYPERVISOR_get_debugreg(reg);
332}
333
334static void xen_end_context_switch(struct task_struct *next)
335{
336 xen_mc_flush();
337 paravirt_end_context_switch(next);
338}
339
340static unsigned long xen_store_tr(void)
341{
342 return 0;
343}
344
345
346
347
348
349
350
351static void set_aliased_prot(void *v, pgprot_t prot)
352{
353 int level;
354 pte_t *ptep;
355 pte_t pte;
356 unsigned long pfn;
357 struct page *page;
358 unsigned char dummy;
359
360 ptep = lookup_address((unsigned long)v, &level);
361 BUG_ON(ptep == NULL);
362
363 pfn = pte_pfn(*ptep);
364 page = pfn_to_page(pfn);
365
366 pte = pfn_pte(pfn, prot);
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388 preempt_disable();
389
390 probe_kernel_read(&dummy, v, 1);
391
392 if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
393 BUG();
394
395 if (!PageHighMem(page)) {
396 void *av = __va(PFN_PHYS(pfn));
397
398 if (av != v)
399 if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
400 BUG();
401 } else
402 kmap_flush_unused();
403
404 preempt_enable();
405}
406
407static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
408{
409 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
410 int i;
411
412
413
414
415
416
417
418
419
420
421
422
423 for (i = 0; i < entries; i += entries_per_page)
424 set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
425}
426
427static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
428{
429 const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
430 int i;
431
432 for (i = 0; i < entries; i += entries_per_page)
433 set_aliased_prot(ldt + i, PAGE_KERNEL);
434}
435
436static void xen_set_ldt(const void *addr, unsigned entries)
437{
438 struct mmuext_op *op;
439 struct multicall_space mcs = xen_mc_entry(sizeof(*op));
440
441 trace_xen_cpu_set_ldt(addr, entries);
442
443 op = mcs.args;
444 op->cmd = MMUEXT_SET_LDT;
445 op->arg1.linear_addr = (unsigned long)addr;
446 op->arg2.nr_ents = entries;
447
448 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
449
450 xen_mc_issue(PARAVIRT_LAZY_CPU);
451}
452
453static void xen_load_gdt(const struct desc_ptr *dtr)
454{
455 unsigned long va = dtr->address;
456 unsigned int size = dtr->size + 1;
457 unsigned long pfn, mfn;
458 int level;
459 pte_t *ptep;
460 void *virt;
461
462
463 BUG_ON(size > PAGE_SIZE);
464 BUG_ON(va & ~PAGE_MASK);
465
466
467
468
469
470
471
472
473 ptep = lookup_address(va, &level);
474 BUG_ON(ptep == NULL);
475
476 pfn = pte_pfn(*ptep);
477 mfn = pfn_to_mfn(pfn);
478 virt = __va(PFN_PHYS(pfn));
479
480 make_lowmem_page_readonly((void *)va);
481 make_lowmem_page_readonly(virt);
482
483 if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
484 BUG();
485}
486
487
488
489
490static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
491{
492 unsigned long va = dtr->address;
493 unsigned int size = dtr->size + 1;
494 unsigned long pfn, mfn;
495 pte_t pte;
496
497
498 BUG_ON(size > PAGE_SIZE);
499 BUG_ON(va & ~PAGE_MASK);
500
501 pfn = virt_to_pfn(va);
502 mfn = pfn_to_mfn(pfn);
503
504 pte = pfn_pte(pfn, PAGE_KERNEL_RO);
505
506 if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
507 BUG();
508
509 if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct)))
510 BUG();
511}
512
513static inline bool desc_equal(const struct desc_struct *d1,
514 const struct desc_struct *d2)
515{
516 return !memcmp(d1, d2, sizeof(*d1));
517}
518
519static void load_TLS_descriptor(struct thread_struct *t,
520 unsigned int cpu, unsigned int i)
521{
522 struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
523 struct desc_struct *gdt;
524 xmaddr_t maddr;
525 struct multicall_space mc;
526
527 if (desc_equal(shadow, &t->tls_array[i]))
528 return;
529
530 *shadow = t->tls_array[i];
531
532 gdt = get_cpu_gdt_rw(cpu);
533 maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
534 mc = __xen_mc_entry(0);
535
536 MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
537}
538
539static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
540{
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
560#ifdef CONFIG_X86_32
561 lazy_load_gs(0);
562#else
563 loadsegment(fs, 0);
564#endif
565 }
566
567 xen_mc_batch();
568
569 load_TLS_descriptor(t, cpu, 0);
570 load_TLS_descriptor(t, cpu, 1);
571 load_TLS_descriptor(t, cpu, 2);
572
573 xen_mc_issue(PARAVIRT_LAZY_CPU);
574}
575
576#ifdef CONFIG_X86_64
577static void xen_load_gs_index(unsigned int idx)
578{
579 if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
580 BUG();
581}
582#endif
583
584static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
585 const void *ptr)
586{
587 xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
588 u64 entry = *(u64 *)ptr;
589
590 trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
591
592 preempt_disable();
593
594 xen_mc_flush();
595 if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
596 BUG();
597
598 preempt_enable();
599}
600
601#ifdef CONFIG_X86_64
602struct trap_array_entry {
603 void (*orig)(void);
604 void (*xen)(void);
605 bool ist_okay;
606};
607
608static struct trap_array_entry trap_array[] = {
609 { debug, xen_xendebug, true },
610 { double_fault, xen_double_fault, true },
611#ifdef CONFIG_X86_MCE
612 { machine_check, xen_machine_check, true },
613#endif
614 { nmi, xen_xennmi, true },
615 { int3, xen_int3, false },
616 { overflow, xen_overflow, false },
617#ifdef CONFIG_IA32_EMULATION
618 { entry_INT80_compat, xen_entry_INT80_compat, false },
619#endif
620 { page_fault, xen_page_fault, false },
621 { divide_error, xen_divide_error, false },
622 { bounds, xen_bounds, false },
623 { invalid_op, xen_invalid_op, false },
624 { device_not_available, xen_device_not_available, false },
625 { coprocessor_segment_overrun, xen_coprocessor_segment_overrun, false },
626 { invalid_TSS, xen_invalid_TSS, false },
627 { segment_not_present, xen_segment_not_present, false },
628 { stack_segment, xen_stack_segment, false },
629 { general_protection, xen_general_protection, false },
630 { spurious_interrupt_bug, xen_spurious_interrupt_bug, false },
631 { coprocessor_error, xen_coprocessor_error, false },
632 { alignment_check, xen_alignment_check, false },
633 { simd_coprocessor_error, xen_simd_coprocessor_error, false },
634};
635
636static bool __ref get_trap_addr(void **addr, unsigned int ist)
637{
638 unsigned int nr;
639 bool ist_okay = false;
640
641
642
643
644
645
646
647
648 for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++) {
649 struct trap_array_entry *entry = trap_array + nr;
650
651 if (*addr == entry->orig) {
652 *addr = entry->xen;
653 ist_okay = entry->ist_okay;
654 break;
655 }
656 }
657
658 if (nr == ARRAY_SIZE(trap_array) &&
659 *addr >= (void *)early_idt_handler_array[0] &&
660 *addr < (void *)early_idt_handler_array[NUM_EXCEPTION_VECTORS]) {
661 nr = (*addr - (void *)early_idt_handler_array[0]) /
662 EARLY_IDT_HANDLER_SIZE;
663 *addr = (void *)xen_early_idt_handler_array[nr];
664 }
665
666 if (WARN_ON(ist != 0 && !ist_okay))
667 return false;
668
669 return true;
670}
671#endif
672
673static int cvt_gate_to_trap(int vector, const gate_desc *val,
674 struct trap_info *info)
675{
676 unsigned long addr;
677
678 if (val->bits.type != GATE_TRAP && val->bits.type != GATE_INTERRUPT)
679 return 0;
680
681 info->vector = vector;
682
683 addr = gate_offset(val);
684#ifdef CONFIG_X86_64
685 if (!get_trap_addr((void **)&addr, val->bits.ist))
686 return 0;
687#endif
688 info->address = addr;
689
690 info->cs = gate_segment(val);
691 info->flags = val->bits.dpl;
692
693 if (val->bits.type == GATE_INTERRUPT)
694 info->flags |= 1 << 2;
695
696 return 1;
697}
698
699
700static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
701
702
703
704static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
705{
706 unsigned long p = (unsigned long)&dt[entrynum];
707 unsigned long start, end;
708
709 trace_xen_cpu_write_idt_entry(dt, entrynum, g);
710
711 preempt_disable();
712
713 start = __this_cpu_read(idt_desc.address);
714 end = start + __this_cpu_read(idt_desc.size) + 1;
715
716 xen_mc_flush();
717
718 native_write_idt_entry(dt, entrynum, g);
719
720 if (p >= start && (p + 8) <= end) {
721 struct trap_info info[2];
722
723 info[1].address = 0;
724
725 if (cvt_gate_to_trap(entrynum, g, &info[0]))
726 if (HYPERVISOR_set_trap_table(info))
727 BUG();
728 }
729
730 preempt_enable();
731}
732
733static void xen_convert_trap_info(const struct desc_ptr *desc,
734 struct trap_info *traps)
735{
736 unsigned in, out, count;
737
738 count = (desc->size+1) / sizeof(gate_desc);
739 BUG_ON(count > 256);
740
741 for (in = out = 0; in < count; in++) {
742 gate_desc *entry = (gate_desc *)(desc->address) + in;
743
744 if (cvt_gate_to_trap(in, entry, &traps[out]))
745 out++;
746 }
747 traps[out].address = 0;
748}
749
750void xen_copy_trap_info(struct trap_info *traps)
751{
752 const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
753
754 xen_convert_trap_info(desc, traps);
755}
756
757
758
759
760static void xen_load_idt(const struct desc_ptr *desc)
761{
762 static DEFINE_SPINLOCK(lock);
763 static struct trap_info traps[257];
764
765 trace_xen_cpu_load_idt(desc);
766
767 spin_lock(&lock);
768
769 memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
770
771 xen_convert_trap_info(desc, traps);
772
773 xen_mc_flush();
774 if (HYPERVISOR_set_trap_table(traps))
775 BUG();
776
777 spin_unlock(&lock);
778}
779
780
781
782static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
783 const void *desc, int type)
784{
785 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
786
787 preempt_disable();
788
789 switch (type) {
790 case DESC_LDT:
791 case DESC_TSS:
792
793 break;
794
795 default: {
796 xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
797
798 xen_mc_flush();
799 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
800 BUG();
801 }
802
803 }
804
805 preempt_enable();
806}
807
808
809
810
811
812static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
813 const void *desc, int type)
814{
815 trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
816
817 switch (type) {
818 case DESC_LDT:
819 case DESC_TSS:
820
821 break;
822
823 default: {
824 xmaddr_t maddr = virt_to_machine(&dt[entry]);
825
826 if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
827 dt[entry] = *(struct desc_struct *)desc;
828 }
829
830 }
831}
832
833static void xen_load_sp0(unsigned long sp0)
834{
835 struct multicall_space mcs;
836
837 mcs = xen_mc_entry(0);
838 MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
839 xen_mc_issue(PARAVIRT_LAZY_CPU);
840 this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
841}
842
843#ifdef CONFIG_X86_IOPL_IOPERM
844static void xen_update_io_bitmap(void)
845{
846 struct physdev_set_iobitmap iobitmap;
847 struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
848
849 native_tss_update_io_bitmap();
850
851 iobitmap.bitmap = (uint8_t *)(&tss->x86_tss) +
852 tss->x86_tss.io_bitmap_base;
853 if (tss->x86_tss.io_bitmap_base == IO_BITMAP_OFFSET_INVALID)
854 iobitmap.nr_ports = 0;
855 else
856 iobitmap.nr_ports = IO_BITMAP_BITS;
857
858 HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap, &iobitmap);
859}
860#endif
861
862static void xen_io_delay(void)
863{
864}
865
866static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
867
868static unsigned long xen_read_cr0(void)
869{
870 unsigned long cr0 = this_cpu_read(xen_cr0_value);
871
872 if (unlikely(cr0 == 0)) {
873 cr0 = native_read_cr0();
874 this_cpu_write(xen_cr0_value, cr0);
875 }
876
877 return cr0;
878}
879
880static void xen_write_cr0(unsigned long cr0)
881{
882 struct multicall_space mcs;
883
884 this_cpu_write(xen_cr0_value, cr0);
885
886
887
888 mcs = xen_mc_entry(0);
889
890 MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
891
892 xen_mc_issue(PARAVIRT_LAZY_CPU);
893}
894
895static void xen_write_cr4(unsigned long cr4)
896{
897 cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
898
899 native_write_cr4(cr4);
900}
901
902static u64 xen_read_msr_safe(unsigned int msr, int *err)
903{
904 u64 val;
905
906 if (pmu_msr_read(msr, &val, err))
907 return val;
908
909 val = native_read_msr_safe(msr, err);
910 switch (msr) {
911 case MSR_IA32_APICBASE:
912 val &= ~X2APIC_ENABLE;
913 break;
914 }
915 return val;
916}
917
918static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
919{
920 int ret;
921#ifdef CONFIG_X86_64
922 unsigned int which;
923 u64 base;
924#endif
925
926 ret = 0;
927
928 switch (msr) {
929#ifdef CONFIG_X86_64
930 case MSR_FS_BASE: which = SEGBASE_FS; goto set;
931 case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set;
932 case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set;
933
934 set:
935 base = ((u64)high << 32) | low;
936 if (HYPERVISOR_set_segment_base(which, base) != 0)
937 ret = -EIO;
938 break;
939#endif
940
941 case MSR_STAR:
942 case MSR_CSTAR:
943 case MSR_LSTAR:
944 case MSR_SYSCALL_MASK:
945 case MSR_IA32_SYSENTER_CS:
946 case MSR_IA32_SYSENTER_ESP:
947 case MSR_IA32_SYSENTER_EIP:
948
949
950
951 break;
952
953 default:
954 if (!pmu_msr_write(msr, low, high, &ret))
955 ret = native_write_msr_safe(msr, low, high);
956 }
957
958 return ret;
959}
960
961static u64 xen_read_msr(unsigned int msr)
962{
963
964
965
966
967 int err;
968
969 return xen_read_msr_safe(msr, &err);
970}
971
972static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
973{
974
975
976
977
978 xen_write_msr_safe(msr, low, high);
979}
980
981
982void __init xen_setup_vcpu_info_placement(void)
983{
984 int cpu;
985
986 for_each_possible_cpu(cpu) {
987
988 per_cpu(xen_vcpu_id, cpu) = cpu;
989
990
991
992
993
994
995
996
997
998 (void) xen_vcpu_setup(cpu);
999 }
1000
1001
1002
1003
1004
1005 if (xen_have_vcpu_info_placement) {
1006 pv_ops.irq.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1007 pv_ops.irq.restore_fl =
1008 __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1009 pv_ops.irq.irq_disable =
1010 __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
1011 pv_ops.irq.irq_enable =
1012 __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
1013 pv_ops.mmu.read_cr2 =
1014 __PV_IS_CALLEE_SAVE(xen_read_cr2_direct);
1015 }
1016}
1017
1018static const struct pv_info xen_info __initconst = {
1019 .shared_kernel_pmd = 0,
1020
1021#ifdef CONFIG_X86_64
1022 .extra_user_64bit_cs = FLAT_USER_CS64,
1023#endif
1024 .name = "Xen",
1025};
1026
1027static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1028 .cpuid = xen_cpuid,
1029
1030 .set_debugreg = xen_set_debugreg,
1031 .get_debugreg = xen_get_debugreg,
1032
1033 .read_cr0 = xen_read_cr0,
1034 .write_cr0 = xen_write_cr0,
1035
1036 .write_cr4 = xen_write_cr4,
1037
1038 .wbinvd = native_wbinvd,
1039
1040 .read_msr = xen_read_msr,
1041 .write_msr = xen_write_msr,
1042
1043 .read_msr_safe = xen_read_msr_safe,
1044 .write_msr_safe = xen_write_msr_safe,
1045
1046 .read_pmc = xen_read_pmc,
1047
1048 .iret = xen_iret,
1049#ifdef CONFIG_X86_64
1050 .usergs_sysret64 = xen_sysret64,
1051#endif
1052
1053 .load_tr_desc = paravirt_nop,
1054 .set_ldt = xen_set_ldt,
1055 .load_gdt = xen_load_gdt,
1056 .load_idt = xen_load_idt,
1057 .load_tls = xen_load_tls,
1058#ifdef CONFIG_X86_64
1059 .load_gs_index = xen_load_gs_index,
1060#endif
1061
1062 .alloc_ldt = xen_alloc_ldt,
1063 .free_ldt = xen_free_ldt,
1064
1065 .store_tr = xen_store_tr,
1066
1067 .write_ldt_entry = xen_write_ldt_entry,
1068 .write_gdt_entry = xen_write_gdt_entry,
1069 .write_idt_entry = xen_write_idt_entry,
1070 .load_sp0 = xen_load_sp0,
1071
1072#ifdef CONFIG_X86_IOPL_IOPERM
1073 .update_io_bitmap = xen_update_io_bitmap,
1074#endif
1075 .io_delay = xen_io_delay,
1076
1077
1078 .swapgs = paravirt_nop,
1079
1080 .start_context_switch = paravirt_start_context_switch,
1081 .end_context_switch = xen_end_context_switch,
1082};
1083
1084static void xen_restart(char *msg)
1085{
1086 xen_reboot(SHUTDOWN_reboot);
1087}
1088
1089static void xen_machine_halt(void)
1090{
1091 xen_reboot(SHUTDOWN_poweroff);
1092}
1093
1094static void xen_machine_power_off(void)
1095{
1096 if (pm_power_off)
1097 pm_power_off();
1098 xen_reboot(SHUTDOWN_poweroff);
1099}
1100
1101static void xen_crash_shutdown(struct pt_regs *regs)
1102{
1103 xen_reboot(SHUTDOWN_crash);
1104}
1105
1106static const struct machine_ops xen_machine_ops __initconst = {
1107 .restart = xen_restart,
1108 .halt = xen_machine_halt,
1109 .power_off = xen_machine_power_off,
1110 .shutdown = xen_machine_halt,
1111 .crash_shutdown = xen_crash_shutdown,
1112 .emergency_restart = xen_emergency_restart,
1113};
1114
1115static unsigned char xen_get_nmi_reason(void)
1116{
1117 unsigned char reason = 0;
1118
1119
1120 if (test_bit(_XEN_NMIREASON_io_error,
1121 &HYPERVISOR_shared_info->arch.nmi_reason))
1122 reason |= NMI_REASON_IOCHK;
1123 if (test_bit(_XEN_NMIREASON_pci_serr,
1124 &HYPERVISOR_shared_info->arch.nmi_reason))
1125 reason |= NMI_REASON_SERR;
1126
1127 return reason;
1128}
1129
1130static void __init xen_boot_params_init_edd(void)
1131{
1132#if IS_ENABLED(CONFIG_EDD)
1133 struct xen_platform_op op;
1134 struct edd_info *edd_info;
1135 u32 *mbr_signature;
1136 unsigned nr;
1137 int ret;
1138
1139 edd_info = boot_params.eddbuf;
1140 mbr_signature = boot_params.edd_mbr_sig_buffer;
1141
1142 op.cmd = XENPF_firmware_info;
1143
1144 op.u.firmware_info.type = XEN_FW_DISK_INFO;
1145 for (nr = 0; nr < EDDMAXNR; nr++) {
1146 struct edd_info *info = edd_info + nr;
1147
1148 op.u.firmware_info.index = nr;
1149 info->params.length = sizeof(info->params);
1150 set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
1151 &info->params);
1152 ret = HYPERVISOR_platform_op(&op);
1153 if (ret)
1154 break;
1155
1156#define C(x) info->x = op.u.firmware_info.u.disk_info.x
1157 C(device);
1158 C(version);
1159 C(interface_support);
1160 C(legacy_max_cylinder);
1161 C(legacy_max_head);
1162 C(legacy_sectors_per_track);
1163#undef C
1164 }
1165 boot_params.eddbuf_entries = nr;
1166
1167 op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
1168 for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
1169 op.u.firmware_info.index = nr;
1170 ret = HYPERVISOR_platform_op(&op);
1171 if (ret)
1172 break;
1173 mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
1174 }
1175 boot_params.edd_mbr_sig_buf_entries = nr;
1176#endif
1177}
1178
1179
1180
1181
1182
1183
1184static void __init xen_setup_gdt(int cpu)
1185{
1186 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
1187 pv_ops.cpu.load_gdt = xen_load_gdt_boot;
1188
1189 setup_stack_canary_segment(cpu);
1190 switch_to_new_gdt(cpu);
1191
1192 pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
1193 pv_ops.cpu.load_gdt = xen_load_gdt;
1194}
1195
1196static void __init xen_dom0_set_legacy_features(void)
1197{
1198 x86_platform.legacy.rtc = 1;
1199}
1200
1201
1202asmlinkage __visible void __init xen_start_kernel(void)
1203{
1204 struct physdev_set_iopl set_iopl;
1205 unsigned long initrd_start = 0;
1206 int rc;
1207
1208 if (!xen_start_info)
1209 return;
1210
1211 xen_domain_type = XEN_PV_DOMAIN;
1212 xen_start_flags = xen_start_info->flags;
1213
1214 xen_setup_features();
1215
1216
1217 pv_info = xen_info;
1218 pv_ops.init.patch = paravirt_patch_default;
1219 pv_ops.cpu = xen_cpu_ops;
1220 xen_init_irq_ops();
1221
1222
1223
1224
1225
1226
1227
1228
1229 xen_vcpu_info_reset(0);
1230
1231 x86_platform.get_nmi_reason = xen_get_nmi_reason;
1232
1233 x86_init.resources.memory_setup = xen_memory_setup;
1234 x86_init.irqs.intr_mode_select = x86_init_noop;
1235 x86_init.irqs.intr_mode_init = x86_init_noop;
1236 x86_init.oem.arch_setup = xen_arch_setup;
1237 x86_init.oem.banner = xen_banner;
1238 x86_init.hyper.init_platform = xen_pv_init_platform;
1239 x86_init.hyper.guest_late_init = xen_pv_guest_late_init;
1240
1241
1242
1243
1244
1245 xen_setup_machphys_mapping();
1246 xen_init_mmu_ops();
1247
1248
1249 __supported_pte_mask &= ~_PAGE_GLOBAL;
1250 __default_kernel_pte_mask &= ~_PAGE_GLOBAL;
1251
1252
1253
1254
1255
1256 __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1257
1258
1259 xen_build_dynamic_phys_to_machine();
1260
1261
1262
1263
1264
1265 xen_setup_gdt(0);
1266
1267
1268 get_cpu_cap(&boot_cpu_data);
1269 x86_configure_nx();
1270
1271
1272 get_cpu_address_sizes(&boot_cpu_data);
1273
1274
1275 per_cpu(xen_vcpu_id, 0) = 0;
1276
1277 idt_setup_early_handler();
1278
1279 xen_init_capabilities();
1280
1281#ifdef CONFIG_X86_LOCAL_APIC
1282
1283
1284
1285 xen_init_apic();
1286#endif
1287
1288 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1289 pv_ops.mmu.ptep_modify_prot_start =
1290 xen_ptep_modify_prot_start;
1291 pv_ops.mmu.ptep_modify_prot_commit =
1292 xen_ptep_modify_prot_commit;
1293 }
1294
1295 machine_ops = xen_machine_ops;
1296
1297
1298
1299
1300
1301
1302 xen_initial_gdt = &per_cpu(gdt_page, 0);
1303
1304 xen_smp_init();
1305
1306#ifdef CONFIG_ACPI_NUMA
1307
1308
1309
1310
1311
1312 acpi_numa = -1;
1313#endif
1314 WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));
1315
1316 local_irq_disable();
1317 early_boot_irqs_disabled = true;
1318
1319 xen_raw_console_write("mapping kernel into physical memory\n");
1320 xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
1321 xen_start_info->nr_pages);
1322 xen_reserve_special_pages();
1323
1324
1325
1326#ifdef CONFIG_X86_32
1327 pv_info.kernel_rpl = 1;
1328 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1329 pv_info.kernel_rpl = 0;
1330#else
1331 pv_info.kernel_rpl = 0;
1332#endif
1333
1334 xen_reserve_top();
1335
1336
1337
1338
1339
1340
1341 set_iopl.iopl = 1;
1342 rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1343 if (rc != 0)
1344 xen_raw_printk("physdev_op failed %d\n", rc);
1345
1346#ifdef CONFIG_X86_32
1347
1348 cpu_detect(&new_cpu_data);
1349 set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
1350 new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
1351#endif
1352
1353 if (xen_start_info->mod_start) {
1354 if (xen_start_info->flags & SIF_MOD_START_PFN)
1355 initrd_start = PFN_PHYS(xen_start_info->mod_start);
1356 else
1357 initrd_start = __pa(xen_start_info->mod_start);
1358 }
1359
1360
1361 boot_params.hdr.type_of_loader = (9 << 4) | 0;
1362 boot_params.hdr.ramdisk_image = initrd_start;
1363 boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1364 boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1365 boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
1366
1367 if (!xen_initial_domain()) {
1368 add_preferred_console("xenboot", 0, NULL);
1369 if (pci_xen)
1370 x86_init.pci.arch_init = pci_xen_init;
1371 } else {
1372 const struct dom0_vga_console_info *info =
1373 (void *)((char *)xen_start_info +
1374 xen_start_info->console.dom0.info_off);
1375 struct xen_platform_op op = {
1376 .cmd = XENPF_firmware_info,
1377 .interface_version = XENPF_INTERFACE_VERSION,
1378 .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
1379 };
1380
1381 x86_platform.set_legacy_features =
1382 xen_dom0_set_legacy_features;
1383 xen_init_vga(info, xen_start_info->console.dom0.info_size);
1384 xen_start_info->console.domU.mfn = 0;
1385 xen_start_info->console.domU.evtchn = 0;
1386
1387 if (HYPERVISOR_platform_op(&op) == 0)
1388 boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
1389
1390
1391 pci_request_acs();
1392
1393 xen_acpi_sleep_register();
1394
1395
1396 x86_init.mpparse.find_smp_config = x86_init_noop;
1397 x86_init.mpparse.get_smp_config = x86_init_uint_noop;
1398
1399 xen_boot_params_init_edd();
1400 }
1401
1402 if (!boot_params.screen_info.orig_video_isVGA)
1403 add_preferred_console("tty", 0, NULL);
1404 add_preferred_console("hvc", 0, NULL);
1405 if (boot_params.screen_info.orig_video_isVGA)
1406 add_preferred_console("tty", 0, NULL);
1407
1408#ifdef CONFIG_PCI
1409
1410 pci_probe &= ~PCI_PROBE_BIOS;
1411#endif
1412 xen_raw_console_write("about to get started...\n");
1413
1414
1415 xen_setup_runstate_info(0);
1416
1417 xen_efi_init(&boot_params);
1418
1419
1420#ifdef CONFIG_X86_32
1421 i386_start_kernel();
1422#else
1423 cr4_init_shadow();
1424 x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1425#endif
1426}
1427
1428static int xen_cpu_up_prepare_pv(unsigned int cpu)
1429{
1430 int rc;
1431
1432 if (per_cpu(xen_vcpu, cpu) == NULL)
1433 return -ENODEV;
1434
1435 xen_setup_timer(cpu);
1436
1437 rc = xen_smp_intr_init(cpu);
1438 if (rc) {
1439 WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
1440 cpu, rc);
1441 return rc;
1442 }
1443
1444 rc = xen_smp_intr_init_pv(cpu);
1445 if (rc) {
1446 WARN(1, "xen_smp_intr_init_pv() for CPU %d failed: %d\n",
1447 cpu, rc);
1448 return rc;
1449 }
1450
1451 return 0;
1452}
1453
1454static int xen_cpu_dead_pv(unsigned int cpu)
1455{
1456 xen_smp_intr_free(cpu);
1457 xen_smp_intr_free_pv(cpu);
1458
1459 xen_teardown_timer(cpu);
1460
1461 return 0;
1462}
1463
1464static uint32_t __init xen_platform_pv(void)
1465{
1466 if (xen_pv_domain())
1467 return xen_cpuid_base();
1468
1469 return 0;
1470}
1471
1472const __initconst struct hypervisor_x86 x86_hyper_xen_pv = {
1473 .name = "Xen PV",
1474 .detect = xen_platform_pv,
1475 .type = X86_HYPER_XEN_PV,
1476 .runtime.pin_vcpu = xen_pin_vcpu,
1477 .ignore_nopv = true,
1478};
1479