linux/arch/x86/xen/enlighten.c
<<
>>
Prefs
   1/*
   2 * Core of Xen paravirt_ops implementation.
   3 *
   4 * This file contains the xen_paravirt_ops structure itself, and the
   5 * implementations for:
   6 * - privileged instructions
   7 * - interrupt flags
   8 * - segment operations
   9 * - booting and setup
  10 *
  11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  12 */
  13
  14#include <linux/cpu.h>
  15#include <linux/kernel.h>
  16#include <linux/init.h>
  17#include <linux/smp.h>
  18#include <linux/preempt.h>
  19#include <linux/hardirq.h>
  20#include <linux/percpu.h>
  21#include <linux/delay.h>
  22#include <linux/start_kernel.h>
  23#include <linux/sched.h>
  24#include <linux/kprobes.h>
  25#include <linux/bootmem.h>
  26#include <linux/export.h>
  27#include <linux/mm.h>
  28#include <linux/page-flags.h>
  29#include <linux/highmem.h>
  30#include <linux/console.h>
  31#include <linux/pci.h>
  32#include <linux/gfp.h>
  33#include <linux/memblock.h>
  34#include <linux/edd.h>
  35#include <linux/frame.h>
  36
  37#include <linux/kexec.h>
  38
  39#include <xen/xen.h>
  40#include <xen/events.h>
  41#include <xen/interface/xen.h>
  42#include <xen/interface/version.h>
  43#include <xen/interface/physdev.h>
  44#include <xen/interface/vcpu.h>
  45#include <xen/interface/memory.h>
  46#include <xen/interface/nmi.h>
  47#include <xen/interface/xen-mca.h>
  48#include <xen/features.h>
  49#include <xen/page.h>
  50#include <xen/hvm.h>
  51#include <xen/hvc-console.h>
  52#include <xen/acpi.h>
  53
  54#include <asm/paravirt.h>
  55#include <asm/apic.h>
  56#include <asm/page.h>
  57#include <asm/xen/pci.h>
  58#include <asm/xen/hypercall.h>
  59#include <asm/xen/hypervisor.h>
  60#include <asm/xen/cpuid.h>
  61#include <asm/fixmap.h>
  62#include <asm/processor.h>
  63#include <asm/proto.h>
  64#include <asm/msr-index.h>
  65#include <asm/traps.h>
  66#include <asm/setup.h>
  67#include <asm/desc.h>
  68#include <asm/pgalloc.h>
  69#include <asm/pgtable.h>
  70#include <asm/tlbflush.h>
  71#include <asm/reboot.h>
  72#include <asm/stackprotector.h>
  73#include <asm/hypervisor.h>
  74#include <asm/mach_traps.h>
  75#include <asm/mwait.h>
  76#include <asm/pci_x86.h>
  77#include <asm/cpu.h>
  78
  79#ifdef CONFIG_ACPI
  80#include <linux/acpi.h>
  81#include <asm/acpi.h>
  82#include <acpi/pdc_intel.h>
  83#include <acpi/processor.h>
  84#include <xen/interface/platform.h>
  85#endif
  86
  87#include "xen-ops.h"
  88#include "mmu.h"
  89#include "smp.h"
  90#include "multicalls.h"
  91#include "pmu.h"
  92
  93EXPORT_SYMBOL_GPL(hypercall_page);
  94
  95/*
  96 * Pointer to the xen_vcpu_info structure or
  97 * &HYPERVISOR_shared_info->vcpu_info[cpu]. See xen_hvm_init_shared_info
  98 * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info
  99 * but if the hypervisor supports VCPUOP_register_vcpu_info then it can point
 100 * to xen_vcpu_info. The pointer is used in __xen_evtchn_do_upcall to
 101 * acknowledge pending events.
 102 * Also more subtly it is used by the patched version of irq enable/disable
 103 * e.g. xen_irq_enable_direct and xen_iret in PV mode.
 104 *
 105 * The desire to be able to do those mask/unmask operations as a single
 106 * instruction by using the per-cpu offset held in %gs is the real reason
 107 * vcpu info is in a per-cpu pointer and the original reason for this
 108 * hypercall.
 109 *
 110 */
 111DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
 112
 113/*
 114 * Per CPU pages used if hypervisor supports VCPUOP_register_vcpu_info
 115 * hypercall. This can be used both in PV and PVHVM mode. The structure
 116 * overrides the default per_cpu(xen_vcpu, cpu) value.
 117 */
 118DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
 119
 120/* Linux <-> Xen vCPU id mapping */
 121DEFINE_PER_CPU(uint32_t, xen_vcpu_id);
 122EXPORT_PER_CPU_SYMBOL(xen_vcpu_id);
 123
 124enum xen_domain_type xen_domain_type = XEN_NATIVE;
 125EXPORT_SYMBOL_GPL(xen_domain_type);
 126
 127unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
 128EXPORT_SYMBOL(machine_to_phys_mapping);
 129unsigned long  machine_to_phys_nr;
 130EXPORT_SYMBOL(machine_to_phys_nr);
 131
 132struct start_info *xen_start_info;
 133EXPORT_SYMBOL_GPL(xen_start_info);
 134
 135struct shared_info xen_dummy_shared_info;
 136
 137void *xen_initial_gdt;
 138
 139RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
 140
 141static int xen_cpu_up_prepare(unsigned int cpu);
 142static int xen_cpu_up_online(unsigned int cpu);
 143static int xen_cpu_dead(unsigned int cpu);
 144
 145/*
 146 * Point at some empty memory to start with. We map the real shared_info
 147 * page as soon as fixmap is up and running.
 148 */
 149struct shared_info *HYPERVISOR_shared_info = &xen_dummy_shared_info;
 150
 151/*
 152 * Flag to determine whether vcpu info placement is available on all
 153 * VCPUs.  We assume it is to start with, and then set it to zero on
 154 * the first failure.  This is because it can succeed on some VCPUs
 155 * and not others, since it can involve hypervisor memory allocation,
 156 * or because the guest failed to guarantee all the appropriate
 157 * constraints on all VCPUs (ie buffer can't cross a page boundary).
 158 *
 159 * Note that any particular CPU may be using a placed vcpu structure,
 160 * but we can only optimise if the all are.
 161 *
 162 * 0: not available, 1: available
 163 */
 164static int have_vcpu_info_placement = 1;
 165
 166struct tls_descs {
 167        struct desc_struct desc[3];
 168};
 169
 170/*
 171 * Updating the 3 TLS descriptors in the GDT on every task switch is
 172 * surprisingly expensive so we avoid updating them if they haven't
 173 * changed.  Since Xen writes different descriptors than the one
 174 * passed in the update_descriptor hypercall we keep shadow copies to
 175 * compare against.
 176 */
 177static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
 178
 179static void clamp_max_cpus(void)
 180{
 181#ifdef CONFIG_SMP
 182        if (setup_max_cpus > MAX_VIRT_CPUS)
 183                setup_max_cpus = MAX_VIRT_CPUS;
 184#endif
 185}
 186
 187void xen_vcpu_setup(int cpu)
 188{
 189        struct vcpu_register_vcpu_info info;
 190        int err;
 191        struct vcpu_info *vcpup;
 192
 193        BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 194
 195        /*
 196         * This path is called twice on PVHVM - first during bootup via
 197         * smp_init -> xen_hvm_cpu_notify, and then if the VCPU is being
 198         * hotplugged: cpu_up -> xen_hvm_cpu_notify.
 199         * As we can only do the VCPUOP_register_vcpu_info once lets
 200         * not over-write its result.
 201         *
 202         * For PV it is called during restore (xen_vcpu_restore) and bootup
 203         * (xen_setup_vcpu_info_placement). The hotplug mechanism does not
 204         * use this function.
 205         */
 206        if (xen_hvm_domain()) {
 207                if (per_cpu(xen_vcpu, cpu) == &per_cpu(xen_vcpu_info, cpu))
 208                        return;
 209        }
 210        if (xen_vcpu_nr(cpu) < MAX_VIRT_CPUS)
 211                per_cpu(xen_vcpu, cpu) =
 212                        &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
 213
 214        if (!have_vcpu_info_placement) {
 215                if (cpu >= MAX_VIRT_CPUS)
 216                        clamp_max_cpus();
 217                return;
 218        }
 219
 220        vcpup = &per_cpu(xen_vcpu_info, cpu);
 221        info.mfn = arbitrary_virt_to_mfn(vcpup);
 222        info.offset = offset_in_page(vcpup);
 223
 224        /* Check to see if the hypervisor will put the vcpu_info
 225           structure where we want it, which allows direct access via
 226           a percpu-variable.
 227           N.B. This hypercall can _only_ be called once per CPU. Subsequent
 228           calls will error out with -EINVAL. This is due to the fact that
 229           hypervisor has no unregister variant and this hypercall does not
 230           allow to over-write info.mfn and info.offset.
 231         */
 232        err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu),
 233                                 &info);
 234
 235        if (err) {
 236                printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
 237                have_vcpu_info_placement = 0;
 238                clamp_max_cpus();
 239        } else {
 240                /* This cpu is using the registered vcpu info, even if
 241                   later ones fail to. */
 242                per_cpu(xen_vcpu, cpu) = vcpup;
 243        }
 244}
 245
 246/*
 247 * On restore, set the vcpu placement up again.
 248 * If it fails, then we're in a bad state, since
 249 * we can't back out from using it...
 250 */
 251void xen_vcpu_restore(void)
 252{
 253        int cpu;
 254
 255        for_each_possible_cpu(cpu) {
 256                bool other_cpu = (cpu != smp_processor_id());
 257                bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu),
 258                                                NULL);
 259
 260                if (other_cpu && is_up &&
 261                    HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL))
 262                        BUG();
 263
 264                xen_setup_runstate_info(cpu);
 265
 266                if (have_vcpu_info_placement)
 267                        xen_vcpu_setup(cpu);
 268
 269                if (other_cpu && is_up &&
 270                    HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL))
 271                        BUG();
 272        }
 273}
 274
 275static void __init xen_banner(void)
 276{
 277        unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
 278        struct xen_extraversion extra;
 279        HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 280
 281        pr_info("Booting paravirtualized kernel %son %s\n",
 282                xen_feature(XENFEAT_auto_translated_physmap) ?
 283                        "with PVH extensions " : "", pv_info.name);
 284        printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 285               version >> 16, version & 0xffff, extra.extraversion,
 286               xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 287}
 288/* Check if running on Xen version (major, minor) or later */
 289bool
 290xen_running_on_version_or_later(unsigned int major, unsigned int minor)
 291{
 292        unsigned int version;
 293
 294        if (!xen_domain())
 295                return false;
 296
 297        version = HYPERVISOR_xen_version(XENVER_version, NULL);
 298        if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
 299                ((version >> 16) > major))
 300                return true;
 301        return false;
 302}
 303
 304#define CPUID_THERM_POWER_LEAF 6
 305#define APERFMPERF_PRESENT 0
 306
 307static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
 308static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
 309
 310static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
 311static __read_mostly unsigned int cpuid_leaf5_ecx_val;
 312static __read_mostly unsigned int cpuid_leaf5_edx_val;
 313
 314static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 315                      unsigned int *cx, unsigned int *dx)
 316{
 317        unsigned maskebx = ~0;
 318        unsigned maskecx = ~0;
 319        unsigned maskedx = ~0;
 320        unsigned setecx = 0;
 321        /*
 322         * Mask out inconvenient features, to try and disable as many
 323         * unsupported kernel subsystems as possible.
 324         */
 325        switch (*ax) {
 326        case 1:
 327                maskecx = cpuid_leaf1_ecx_mask;
 328                setecx = cpuid_leaf1_ecx_set_mask;
 329                maskedx = cpuid_leaf1_edx_mask;
 330                break;
 331
 332        case CPUID_MWAIT_LEAF:
 333                /* Synthesize the values.. */
 334                *ax = 0;
 335                *bx = 0;
 336                *cx = cpuid_leaf5_ecx_val;
 337                *dx = cpuid_leaf5_edx_val;
 338                return;
 339
 340        case CPUID_THERM_POWER_LEAF:
 341                /* Disabling APERFMPERF for kernel usage */
 342                maskecx = ~(1 << APERFMPERF_PRESENT);
 343                break;
 344
 345        case 0xb:
 346                /* Suppress extended topology stuff */
 347                maskebx = 0;
 348                break;
 349        }
 350
 351        asm(XEN_EMULATE_PREFIX "cpuid"
 352                : "=a" (*ax),
 353                  "=b" (*bx),
 354                  "=c" (*cx),
 355                  "=d" (*dx)
 356                : "0" (*ax), "2" (*cx));
 357
 358        *bx &= maskebx;
 359        *cx &= maskecx;
 360        *cx |= setecx;
 361        *dx &= maskedx;
 362}
 363STACK_FRAME_NON_STANDARD(xen_cpuid); /* XEN_EMULATE_PREFIX */
 364
 365static bool __init xen_check_mwait(void)
 366{
 367#ifdef CONFIG_ACPI
 368        struct xen_platform_op op = {
 369                .cmd                    = XENPF_set_processor_pminfo,
 370                .u.set_pminfo.id        = -1,
 371                .u.set_pminfo.type      = XEN_PM_PDC,
 372        };
 373        uint32_t buf[3];
 374        unsigned int ax, bx, cx, dx;
 375        unsigned int mwait_mask;
 376
 377        /* We need to determine whether it is OK to expose the MWAIT
 378         * capability to the kernel to harvest deeper than C3 states from ACPI
 379         * _CST using the processor_harvest_xen.c module. For this to work, we
 380         * need to gather the MWAIT_LEAF values (which the cstate.c code
 381         * checks against). The hypervisor won't expose the MWAIT flag because
 382         * it would break backwards compatibility; so we will find out directly
 383         * from the hardware and hypercall.
 384         */
 385        if (!xen_initial_domain())
 386                return false;
 387
 388        /*
 389         * When running under platform earlier than Xen4.2, do not expose
 390         * mwait, to avoid the risk of loading native acpi pad driver
 391         */
 392        if (!xen_running_on_version_or_later(4, 2))
 393                return false;
 394
 395        ax = 1;
 396        cx = 0;
 397
 398        native_cpuid(&ax, &bx, &cx, &dx);
 399
 400        mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
 401                     (1 << (X86_FEATURE_MWAIT % 32));
 402
 403        if ((cx & mwait_mask) != mwait_mask)
 404                return false;
 405
 406        /* We need to emulate the MWAIT_LEAF and for that we need both
 407         * ecx and edx. The hypercall provides only partial information.
 408         */
 409
 410        ax = CPUID_MWAIT_LEAF;
 411        bx = 0;
 412        cx = 0;
 413        dx = 0;
 414
 415        native_cpuid(&ax, &bx, &cx, &dx);
 416
 417        /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
 418         * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
 419         */
 420        buf[0] = ACPI_PDC_REVISION_ID;
 421        buf[1] = 1;
 422        buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
 423
 424        set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
 425
 426        if ((HYPERVISOR_platform_op(&op) == 0) &&
 427            (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
 428                cpuid_leaf5_ecx_val = cx;
 429                cpuid_leaf5_edx_val = dx;
 430        }
 431        return true;
 432#else
 433        return false;
 434#endif
 435}
 436static void __init xen_init_cpuid_mask(void)
 437{
 438        unsigned int ax, bx, cx, dx;
 439        unsigned int xsave_mask;
 440
 441        cpuid_leaf1_edx_mask =
 442                ~((1 << X86_FEATURE_MTRR) |  /* disable MTRR */
 443                  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 444
 445        if (!xen_initial_domain())
 446                cpuid_leaf1_edx_mask &=
 447                        ~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
 448
 449        cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_X2APIC % 32));
 450
 451        ax = 1;
 452        cx = 0;
 453        cpuid(1, &ax, &bx, &cx, &dx);
 454
 455        xsave_mask =
 456                (1 << (X86_FEATURE_XSAVE % 32)) |
 457                (1 << (X86_FEATURE_OSXSAVE % 32));
 458
 459        /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
 460        if ((cx & xsave_mask) != xsave_mask)
 461                cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
 462        if (xen_check_mwait())
 463                cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
 464}
 465
 466static void xen_set_debugreg(int reg, unsigned long val)
 467{
 468        HYPERVISOR_set_debugreg(reg, val);
 469}
 470
 471static unsigned long xen_get_debugreg(int reg)
 472{
 473        return HYPERVISOR_get_debugreg(reg);
 474}
 475
 476static void xen_end_context_switch(struct task_struct *next)
 477{
 478        xen_mc_flush();
 479        paravirt_end_context_switch(next);
 480}
 481
 482static unsigned long xen_store_tr(void)
 483{
 484        return 0;
 485}
 486
 487/*
 488 * Set the page permissions for a particular virtual address.  If the
 489 * address is a vmalloc mapping (or other non-linear mapping), then
 490 * find the linear mapping of the page and also set its protections to
 491 * match.
 492 */
 493static void set_aliased_prot(void *v, pgprot_t prot)
 494{
 495        int level;
 496        pte_t *ptep;
 497        pte_t pte;
 498        unsigned long pfn;
 499        struct page *page;
 500        unsigned char dummy;
 501
 502        ptep = lookup_address((unsigned long)v, &level);
 503        BUG_ON(ptep == NULL);
 504
 505        pfn = pte_pfn(*ptep);
 506        page = pfn_to_page(pfn);
 507
 508        pte = pfn_pte(pfn, prot);
 509
 510        /*
 511         * Careful: update_va_mapping() will fail if the virtual address
 512         * we're poking isn't populated in the page tables.  We don't
 513         * need to worry about the direct map (that's always in the page
 514         * tables), but we need to be careful about vmap space.  In
 515         * particular, the top level page table can lazily propagate
 516         * entries between processes, so if we've switched mms since we
 517         * vmapped the target in the first place, we might not have the
 518         * top-level page table entry populated.
 519         *
 520         * We disable preemption because we want the same mm active when
 521         * we probe the target and when we issue the hypercall.  We'll
 522         * have the same nominal mm, but if we're a kernel thread, lazy
 523         * mm dropping could change our pgd.
 524         *
 525         * Out of an abundance of caution, this uses __get_user() to fault
 526         * in the target address just in case there's some obscure case
 527         * in which the target address isn't readable.
 528         */
 529
 530        preempt_disable();
 531
 532        probe_kernel_read(&dummy, v, 1);
 533
 534        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
 535                BUG();
 536
 537        if (!PageHighMem(page)) {
 538                void *av = __va(PFN_PHYS(pfn));
 539
 540                if (av != v)
 541                        if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
 542                                BUG();
 543        } else
 544                kmap_flush_unused();
 545
 546        preempt_enable();
 547}
 548
 549static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 550{
 551        const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 552        int i;
 553
 554        /*
 555         * We need to mark the all aliases of the LDT pages RO.  We
 556         * don't need to call vm_flush_aliases(), though, since that's
 557         * only responsible for flushing aliases out the TLBs, not the
 558         * page tables, and Xen will flush the TLB for us if needed.
 559         *
 560         * To avoid confusing future readers: none of this is necessary
 561         * to load the LDT.  The hypervisor only checks this when the
 562         * LDT is faulted in due to subsequent descriptor access.
 563         */
 564
 565        for(i = 0; i < entries; i += entries_per_page)
 566                set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
 567}
 568
 569static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
 570{
 571        const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 572        int i;
 573
 574        for(i = 0; i < entries; i += entries_per_page)
 575                set_aliased_prot(ldt + i, PAGE_KERNEL);
 576}
 577
 578static void xen_set_ldt(const void *addr, unsigned entries)
 579{
 580        struct mmuext_op *op;
 581        struct multicall_space mcs = xen_mc_entry(sizeof(*op));
 582
 583        trace_xen_cpu_set_ldt(addr, entries);
 584
 585        op = mcs.args;
 586        op->cmd = MMUEXT_SET_LDT;
 587        op->arg1.linear_addr = (unsigned long)addr;
 588        op->arg2.nr_ents = entries;
 589
 590        MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 591
 592        xen_mc_issue(PARAVIRT_LAZY_CPU);
 593}
 594
 595static void xen_load_gdt(const struct desc_ptr *dtr)
 596{
 597        unsigned long va = dtr->address;
 598        unsigned int size = dtr->size + 1;
 599        unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
 600        unsigned long frames[pages];
 601        int f;
 602
 603        /*
 604         * A GDT can be up to 64k in size, which corresponds to 8192
 605         * 8-byte entries, or 16 4k pages..
 606         */
 607
 608        BUG_ON(size > 65536);
 609        BUG_ON(va & ~PAGE_MASK);
 610
 611        for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 612                int level;
 613                pte_t *ptep;
 614                unsigned long pfn, mfn;
 615                void *virt;
 616
 617                /*
 618                 * The GDT is per-cpu and is in the percpu data area.
 619                 * That can be virtually mapped, so we need to do a
 620                 * page-walk to get the underlying MFN for the
 621                 * hypercall.  The page can also be in the kernel's
 622                 * linear range, so we need to RO that mapping too.
 623                 */
 624                ptep = lookup_address(va, &level);
 625                BUG_ON(ptep == NULL);
 626
 627                pfn = pte_pfn(*ptep);
 628                mfn = pfn_to_mfn(pfn);
 629                virt = __va(PFN_PHYS(pfn));
 630
 631                frames[f] = mfn;
 632
 633                make_lowmem_page_readonly((void *)va);
 634                make_lowmem_page_readonly(virt);
 635        }
 636
 637        if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
 638                BUG();
 639}
 640
 641/*
 642 * load_gdt for early boot, when the gdt is only mapped once
 643 */
 644static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 645{
 646        unsigned long va = dtr->address;
 647        unsigned int size = dtr->size + 1;
 648        unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE);
 649        unsigned long frames[pages];
 650        int f;
 651
 652        /*
 653         * A GDT can be up to 64k in size, which corresponds to 8192
 654         * 8-byte entries, or 16 4k pages..
 655         */
 656
 657        BUG_ON(size > 65536);
 658        BUG_ON(va & ~PAGE_MASK);
 659
 660        for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 661                pte_t pte;
 662                unsigned long pfn, mfn;
 663
 664                pfn = virt_to_pfn(va);
 665                mfn = pfn_to_mfn(pfn);
 666
 667                pte = pfn_pte(pfn, PAGE_KERNEL_RO);
 668
 669                if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
 670                        BUG();
 671
 672                frames[f] = mfn;
 673        }
 674
 675        if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
 676                BUG();
 677}
 678
 679static inline bool desc_equal(const struct desc_struct *d1,
 680                              const struct desc_struct *d2)
 681{
 682        return d1->a == d2->a && d1->b == d2->b;
 683}
 684
 685static void load_TLS_descriptor(struct thread_struct *t,
 686                                unsigned int cpu, unsigned int i)
 687{
 688        struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
 689        struct desc_struct *gdt;
 690        xmaddr_t maddr;
 691        struct multicall_space mc;
 692
 693        if (desc_equal(shadow, &t->tls_array[i]))
 694                return;
 695
 696        *shadow = t->tls_array[i];
 697
 698        gdt = get_cpu_gdt_table(cpu);
 699        maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 700        mc = __xen_mc_entry(0);
 701
 702        MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
 703}
 704
 705static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 706{
 707        /*
 708         * XXX sleazy hack: If we're being called in a lazy-cpu zone
 709         * and lazy gs handling is enabled, it means we're in a
 710         * context switch, and %gs has just been saved.  This means we
 711         * can zero it out to prevent faults on exit from the
 712         * hypervisor if the next process has no %gs.  Either way, it
 713         * has been saved, and the new value will get loaded properly.
 714         * This will go away as soon as Xen has been modified to not
 715         * save/restore %gs for normal hypercalls.
 716         *
 717         * On x86_64, this hack is not used for %gs, because gs points
 718         * to KERNEL_GS_BASE (and uses it for PDA references), so we
 719         * must not zero %gs on x86_64
 720         *
 721         * For x86_64, we need to zero %fs, otherwise we may get an
 722         * exception between the new %fs descriptor being loaded and
 723         * %fs being effectively cleared at __switch_to().
 724         */
 725        if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
 726#ifdef CONFIG_X86_32
 727                lazy_load_gs(0);
 728#else
 729                loadsegment(fs, 0);
 730#endif
 731        }
 732
 733        xen_mc_batch();
 734
 735        load_TLS_descriptor(t, cpu, 0);
 736        load_TLS_descriptor(t, cpu, 1);
 737        load_TLS_descriptor(t, cpu, 2);
 738
 739        xen_mc_issue(PARAVIRT_LAZY_CPU);
 740}
 741
 742#ifdef CONFIG_X86_64
 743static void xen_load_gs_index(unsigned int idx)
 744{
 745        if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
 746                BUG();
 747}
 748#endif
 749
 750static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 751                                const void *ptr)
 752{
 753        xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
 754        u64 entry = *(u64 *)ptr;
 755
 756        trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
 757
 758        preempt_disable();
 759
 760        xen_mc_flush();
 761        if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
 762                BUG();
 763
 764        preempt_enable();
 765}
 766
 767static int cvt_gate_to_trap(int vector, const gate_desc *val,
 768                            struct trap_info *info)
 769{
 770        unsigned long addr;
 771
 772        if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 773                return 0;
 774
 775        info->vector = vector;
 776
 777        addr = gate_offset(*val);
 778#ifdef CONFIG_X86_64
 779        /*
 780         * Look for known traps using IST, and substitute them
 781         * appropriately.  The debugger ones are the only ones we care
 782         * about.  Xen will handle faults like double_fault,
 783         * so we should never see them.  Warn if
 784         * there's an unexpected IST-using fault handler.
 785         */
 786        if (addr == (unsigned long)debug)
 787                addr = (unsigned long)xen_debug;
 788        else if (addr == (unsigned long)int3)
 789                addr = (unsigned long)xen_int3;
 790        else if (addr == (unsigned long)stack_segment)
 791                addr = (unsigned long)xen_stack_segment;
 792        else if (addr == (unsigned long)double_fault) {
 793                /* Don't need to handle these */
 794                return 0;
 795#ifdef CONFIG_X86_MCE
 796        } else if (addr == (unsigned long)machine_check) {
 797                /*
 798                 * when xen hypervisor inject vMCE to guest,
 799                 * use native mce handler to handle it
 800                 */
 801                ;
 802#endif
 803        } else if (addr == (unsigned long)nmi)
 804                /*
 805                 * Use the native version as well.
 806                 */
 807                ;
 808        else {
 809                /* Some other trap using IST? */
 810                if (WARN_ON(val->ist != 0))
 811                        return 0;
 812        }
 813#endif  /* CONFIG_X86_64 */
 814        info->address = addr;
 815
 816        info->cs = gate_segment(*val);
 817        info->flags = val->dpl;
 818        /* interrupt gates clear IF */
 819        if (val->type == GATE_INTERRUPT)
 820                info->flags |= 1 << 2;
 821
 822        return 1;
 823}
 824
 825/* Locations of each CPU's IDT */
 826static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
 827
 828/* Set an IDT entry.  If the entry is part of the current IDT, then
 829   also update Xen. */
 830static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
 831{
 832        unsigned long p = (unsigned long)&dt[entrynum];
 833        unsigned long start, end;
 834
 835        trace_xen_cpu_write_idt_entry(dt, entrynum, g);
 836
 837        preempt_disable();
 838
 839        start = __this_cpu_read(idt_desc.address);
 840        end = start + __this_cpu_read(idt_desc.size) + 1;
 841
 842        xen_mc_flush();
 843
 844        native_write_idt_entry(dt, entrynum, g);
 845
 846        if (p >= start && (p + 8) <= end) {
 847                struct trap_info info[2];
 848
 849                info[1].address = 0;
 850
 851                if (cvt_gate_to_trap(entrynum, g, &info[0]))
 852                        if (HYPERVISOR_set_trap_table(info))
 853                                BUG();
 854        }
 855
 856        preempt_enable();
 857}
 858
 859static void xen_convert_trap_info(const struct desc_ptr *desc,
 860                                  struct trap_info *traps)
 861{
 862        unsigned in, out, count;
 863
 864        count = (desc->size+1) / sizeof(gate_desc);
 865        BUG_ON(count > 256);
 866
 867        for (in = out = 0; in < count; in++) {
 868                gate_desc *entry = (gate_desc*)(desc->address) + in;
 869
 870                if (cvt_gate_to_trap(in, entry, &traps[out]))
 871                        out++;
 872        }
 873        traps[out].address = 0;
 874}
 875
 876void xen_copy_trap_info(struct trap_info *traps)
 877{
 878        const struct desc_ptr *desc = this_cpu_ptr(&idt_desc);
 879
 880        xen_convert_trap_info(desc, traps);
 881}
 882
 883/* Load a new IDT into Xen.  In principle this can be per-CPU, so we
 884   hold a spinlock to protect the static traps[] array (static because
 885   it avoids allocation, and saves stack space). */
 886static void xen_load_idt(const struct desc_ptr *desc)
 887{
 888        static DEFINE_SPINLOCK(lock);
 889        static struct trap_info traps[257];
 890
 891        trace_xen_cpu_load_idt(desc);
 892
 893        spin_lock(&lock);
 894
 895        memcpy(this_cpu_ptr(&idt_desc), desc, sizeof(idt_desc));
 896
 897        xen_convert_trap_info(desc, traps);
 898
 899        xen_mc_flush();
 900        if (HYPERVISOR_set_trap_table(traps))
 901                BUG();
 902
 903        spin_unlock(&lock);
 904}
 905
 906/* Write a GDT descriptor entry.  Ignore LDT descriptors, since
 907   they're handled differently. */
 908static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 909                                const void *desc, int type)
 910{
 911        trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
 912
 913        preempt_disable();
 914
 915        switch (type) {
 916        case DESC_LDT:
 917        case DESC_TSS:
 918                /* ignore */
 919                break;
 920
 921        default: {
 922                xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
 923
 924                xen_mc_flush();
 925                if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
 926                        BUG();
 927        }
 928
 929        }
 930
 931        preempt_enable();
 932}
 933
 934/*
 935 * Version of write_gdt_entry for use at early boot-time needed to
 936 * update an entry as simply as possible.
 937 */
 938static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 939                                            const void *desc, int type)
 940{
 941        trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
 942
 943        switch (type) {
 944        case DESC_LDT:
 945        case DESC_TSS:
 946                /* ignore */
 947                break;
 948
 949        default: {
 950                xmaddr_t maddr = virt_to_machine(&dt[entry]);
 951
 952                if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
 953                        dt[entry] = *(struct desc_struct *)desc;
 954        }
 955
 956        }
 957}
 958
 959static void xen_load_sp0(struct tss_struct *tss,
 960                         struct thread_struct *thread)
 961{
 962        struct multicall_space mcs;
 963
 964        mcs = xen_mc_entry(0);
 965        MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 966        xen_mc_issue(PARAVIRT_LAZY_CPU);
 967        tss->x86_tss.sp0 = thread->sp0;
 968}
 969
 970void xen_set_iopl_mask(unsigned mask)
 971{
 972        struct physdev_set_iopl set_iopl;
 973
 974        /* Force the change at ring 0. */
 975        set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
 976        HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 977}
 978
 979static void xen_io_delay(void)
 980{
 981}
 982
 983static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
 984
 985static unsigned long xen_read_cr0(void)
 986{
 987        unsigned long cr0 = this_cpu_read(xen_cr0_value);
 988
 989        if (unlikely(cr0 == 0)) {
 990                cr0 = native_read_cr0();
 991                this_cpu_write(xen_cr0_value, cr0);
 992        }
 993
 994        return cr0;
 995}
 996
 997static void xen_write_cr0(unsigned long cr0)
 998{
 999        struct multicall_space mcs;
1000
1001        this_cpu_write(xen_cr0_value, cr0);
1002
1003        /* Only pay attention to cr0.TS; everything else is
1004           ignored. */
1005        mcs = xen_mc_entry(0);
1006
1007        MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
1008
1009        xen_mc_issue(PARAVIRT_LAZY_CPU);
1010}
1011
1012static void xen_write_cr4(unsigned long cr4)
1013{
1014        cr4 &= ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PCE);
1015
1016        native_write_cr4(cr4);
1017}
1018#ifdef CONFIG_X86_64
1019static inline unsigned long xen_read_cr8(void)
1020{
1021        return 0;
1022}
1023static inline void xen_write_cr8(unsigned long val)
1024{
1025        BUG_ON(val);
1026}
1027#endif
1028
1029static u64 xen_read_msr_safe(unsigned int msr, int *err)
1030{
1031        u64 val;
1032
1033        if (pmu_msr_read(msr, &val, err))
1034                return val;
1035
1036        val = native_read_msr_safe(msr, err);
1037        switch (msr) {
1038        case MSR_IA32_APICBASE:
1039#ifdef CONFIG_X86_X2APIC
1040                if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
1041#endif
1042                        val &= ~X2APIC_ENABLE;
1043                break;
1044        }
1045        return val;
1046}
1047
1048static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
1049{
1050        int ret;
1051
1052        ret = 0;
1053
1054        switch (msr) {
1055#ifdef CONFIG_X86_64
1056                unsigned which;
1057                u64 base;
1058
1059        case MSR_FS_BASE:               which = SEGBASE_FS; goto set;
1060        case MSR_KERNEL_GS_BASE:        which = SEGBASE_GS_USER; goto set;
1061        case MSR_GS_BASE:               which = SEGBASE_GS_KERNEL; goto set;
1062
1063        set:
1064                base = ((u64)high << 32) | low;
1065                if (HYPERVISOR_set_segment_base(which, base) != 0)
1066                        ret = -EIO;
1067                break;
1068#endif
1069
1070        case MSR_STAR:
1071        case MSR_CSTAR:
1072        case MSR_LSTAR:
1073        case MSR_SYSCALL_MASK:
1074        case MSR_IA32_SYSENTER_CS:
1075        case MSR_IA32_SYSENTER_ESP:
1076        case MSR_IA32_SYSENTER_EIP:
1077                /* Fast syscall setup is all done in hypercalls, so
1078                   these are all ignored.  Stub them out here to stop
1079                   Xen console noise. */
1080                break;
1081
1082        default:
1083                if (!pmu_msr_write(msr, low, high, &ret))
1084                        ret = native_write_msr_safe(msr, low, high);
1085        }
1086
1087        return ret;
1088}
1089
1090static u64 xen_read_msr(unsigned int msr)
1091{
1092        /*
1093         * This will silently swallow a #GP from RDMSR.  It may be worth
1094         * changing that.
1095         */
1096        int err;
1097
1098        return xen_read_msr_safe(msr, &err);
1099}
1100
1101static void xen_write_msr(unsigned int msr, unsigned low, unsigned high)
1102{
1103        /*
1104         * This will silently swallow a #GP from WRMSR.  It may be worth
1105         * changing that.
1106         */
1107        xen_write_msr_safe(msr, low, high);
1108}
1109
1110void xen_setup_shared_info(void)
1111{
1112        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
1113                set_fixmap(FIX_PARAVIRT_BOOTMAP,
1114                           xen_start_info->shared_info);
1115
1116                HYPERVISOR_shared_info =
1117                        (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
1118        } else
1119                HYPERVISOR_shared_info =
1120                        (struct shared_info *)__va(xen_start_info->shared_info);
1121
1122#ifndef CONFIG_SMP
1123        /* In UP this is as good a place as any to set up shared info */
1124        xen_setup_vcpu_info_placement();
1125#endif
1126
1127        xen_setup_mfn_list_list();
1128}
1129
1130/* This is called once we have the cpu_possible_mask */
1131void xen_setup_vcpu_info_placement(void)
1132{
1133        int cpu;
1134
1135        for_each_possible_cpu(cpu) {
1136                /* Set up direct vCPU id mapping for PV guests. */
1137                per_cpu(xen_vcpu_id, cpu) = cpu;
1138                xen_vcpu_setup(cpu);
1139        }
1140
1141        /* xen_vcpu_setup managed to place the vcpu_info within the
1142         * percpu area for all cpus, so make use of it. Note that for
1143         * PVH we want to use native IRQ mechanism. */
1144        if (have_vcpu_info_placement && !xen_pvh_domain()) {
1145                pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
1146                pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
1147                pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
1148                pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
1149                pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
1150        }
1151}
1152
1153static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
1154                          unsigned long addr, unsigned len)
1155{
1156        char *start, *end, *reloc;
1157        unsigned ret;
1158
1159        start = end = reloc = NULL;
1160
1161#define SITE(op, x)                                                     \
1162        case PARAVIRT_PATCH(op.x):                                      \
1163        if (have_vcpu_info_placement) {                                 \
1164                start = (char *)xen_##x##_direct;                       \
1165                end = xen_##x##_direct_end;                             \
1166                reloc = xen_##x##_direct_reloc;                         \
1167        }                                                               \
1168        goto patch_site
1169
1170        switch (type) {
1171                SITE(pv_irq_ops, irq_enable);
1172                SITE(pv_irq_ops, irq_disable);
1173                SITE(pv_irq_ops, save_fl);
1174                SITE(pv_irq_ops, restore_fl);
1175#undef SITE
1176
1177        patch_site:
1178                if (start == NULL || (end-start) > len)
1179                        goto default_patch;
1180
1181                ret = paravirt_patch_insns(insnbuf, len, start, end);
1182
1183                /* Note: because reloc is assigned from something that
1184                   appears to be an array, gcc assumes it's non-null,
1185                   but doesn't know its relationship with start and
1186                   end. */
1187                if (reloc > start && reloc < end) {
1188                        int reloc_off = reloc - start;
1189                        long *relocp = (long *)(insnbuf + reloc_off);
1190                        long delta = start - (char *)addr;
1191
1192                        *relocp += delta;
1193                }
1194                break;
1195
1196        default_patch:
1197        default:
1198                ret = paravirt_patch_default(type, clobbers, insnbuf,
1199                                             addr, len);
1200                break;
1201        }
1202
1203        return ret;
1204}
1205
1206static const struct pv_info xen_info __initconst = {
1207        .shared_kernel_pmd = 0,
1208
1209#ifdef CONFIG_X86_64
1210        .extra_user_64bit_cs = FLAT_USER_CS64,
1211#endif
1212        .name = "Xen",
1213};
1214
1215static const struct pv_init_ops xen_init_ops __initconst = {
1216        .patch = xen_patch,
1217};
1218
1219static const struct pv_cpu_ops xen_cpu_ops __initconst = {
1220        .cpuid = xen_cpuid,
1221
1222        .set_debugreg = xen_set_debugreg,
1223        .get_debugreg = xen_get_debugreg,
1224
1225        .read_cr0 = xen_read_cr0,
1226        .write_cr0 = xen_write_cr0,
1227
1228        .read_cr4 = native_read_cr4,
1229        .write_cr4 = xen_write_cr4,
1230
1231#ifdef CONFIG_X86_64
1232        .read_cr8 = xen_read_cr8,
1233        .write_cr8 = xen_write_cr8,
1234#endif
1235
1236        .wbinvd = native_wbinvd,
1237
1238        .read_msr = xen_read_msr,
1239        .write_msr = xen_write_msr,
1240
1241        .read_msr_safe = xen_read_msr_safe,
1242        .write_msr_safe = xen_write_msr_safe,
1243
1244        .read_pmc = xen_read_pmc,
1245
1246        .iret = xen_iret,
1247#ifdef CONFIG_X86_64
1248        .usergs_sysret64 = xen_sysret64,
1249#endif
1250
1251        .load_tr_desc = paravirt_nop,
1252        .set_ldt = xen_set_ldt,
1253        .load_gdt = xen_load_gdt,
1254        .load_idt = xen_load_idt,
1255        .load_tls = xen_load_tls,
1256#ifdef CONFIG_X86_64
1257        .load_gs_index = xen_load_gs_index,
1258#endif
1259
1260        .alloc_ldt = xen_alloc_ldt,
1261        .free_ldt = xen_free_ldt,
1262
1263        .store_idt = native_store_idt,
1264        .store_tr = xen_store_tr,
1265
1266        .write_ldt_entry = xen_write_ldt_entry,
1267        .write_gdt_entry = xen_write_gdt_entry,
1268        .write_idt_entry = xen_write_idt_entry,
1269        .load_sp0 = xen_load_sp0,
1270
1271        .set_iopl_mask = xen_set_iopl_mask,
1272        .io_delay = xen_io_delay,
1273
1274        /* Xen takes care of %gs when switching to usermode for us */
1275        .swapgs = paravirt_nop,
1276
1277        .start_context_switch = paravirt_start_context_switch,
1278        .end_context_switch = xen_end_context_switch,
1279};
1280
1281static void xen_reboot(int reason)
1282{
1283        struct sched_shutdown r = { .reason = reason };
1284        int cpu;
1285
1286        for_each_online_cpu(cpu)
1287                xen_pmu_finish(cpu);
1288
1289        if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1290                BUG();
1291}
1292
1293static void xen_restart(char *msg)
1294{
1295        xen_reboot(SHUTDOWN_reboot);
1296}
1297
1298static void xen_emergency_restart(void)
1299{
1300        xen_reboot(SHUTDOWN_reboot);
1301}
1302
1303static void xen_machine_halt(void)
1304{
1305        xen_reboot(SHUTDOWN_poweroff);
1306}
1307
1308static void xen_machine_power_off(void)
1309{
1310        if (pm_power_off)
1311                pm_power_off();
1312        xen_reboot(SHUTDOWN_poweroff);
1313}
1314
1315static void xen_crash_shutdown(struct pt_regs *regs)
1316{
1317        xen_reboot(SHUTDOWN_crash);
1318}
1319
1320static int
1321xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1322{
1323        if (!kexec_crash_loaded())
1324                xen_reboot(SHUTDOWN_crash);
1325        return NOTIFY_DONE;
1326}
1327
1328static struct notifier_block xen_panic_block = {
1329        .notifier_call= xen_panic_event,
1330        .priority = INT_MIN
1331};
1332
1333int xen_panic_handler_init(void)
1334{
1335        atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1336        return 0;
1337}
1338
1339static const struct machine_ops xen_machine_ops __initconst = {
1340        .restart = xen_restart,
1341        .halt = xen_machine_halt,
1342        .power_off = xen_machine_power_off,
1343        .shutdown = xen_machine_halt,
1344        .crash_shutdown = xen_crash_shutdown,
1345        .emergency_restart = xen_emergency_restart,
1346};
1347
1348static unsigned char xen_get_nmi_reason(void)
1349{
1350        unsigned char reason = 0;
1351
1352        /* Construct a value which looks like it came from port 0x61. */
1353        if (test_bit(_XEN_NMIREASON_io_error,
1354                     &HYPERVISOR_shared_info->arch.nmi_reason))
1355                reason |= NMI_REASON_IOCHK;
1356        if (test_bit(_XEN_NMIREASON_pci_serr,
1357                     &HYPERVISOR_shared_info->arch.nmi_reason))
1358                reason |= NMI_REASON_SERR;
1359
1360        return reason;
1361}
1362
1363static void __init xen_boot_params_init_edd(void)
1364{
1365#if IS_ENABLED(CONFIG_EDD)
1366        struct xen_platform_op op;
1367        struct edd_info *edd_info;
1368        u32 *mbr_signature;
1369        unsigned nr;
1370        int ret;
1371
1372        edd_info = boot_params.eddbuf;
1373        mbr_signature = boot_params.edd_mbr_sig_buffer;
1374
1375        op.cmd = XENPF_firmware_info;
1376
1377        op.u.firmware_info.type = XEN_FW_DISK_INFO;
1378        for (nr = 0; nr < EDDMAXNR; nr++) {
1379                struct edd_info *info = edd_info + nr;
1380
1381                op.u.firmware_info.index = nr;
1382                info->params.length = sizeof(info->params);
1383                set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
1384                                     &info->params);
1385                ret = HYPERVISOR_platform_op(&op);
1386                if (ret)
1387                        break;
1388
1389#define C(x) info->x = op.u.firmware_info.u.disk_info.x
1390                C(device);
1391                C(version);
1392                C(interface_support);
1393                C(legacy_max_cylinder);
1394                C(legacy_max_head);
1395                C(legacy_sectors_per_track);
1396#undef C
1397        }
1398        boot_params.eddbuf_entries = nr;
1399
1400        op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
1401        for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
1402                op.u.firmware_info.index = nr;
1403                ret = HYPERVISOR_platform_op(&op);
1404                if (ret)
1405                        break;
1406                mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
1407        }
1408        boot_params.edd_mbr_sig_buf_entries = nr;
1409#endif
1410}
1411
1412/*
1413 * Set up the GDT and segment registers for -fstack-protector.  Until
1414 * we do this, we have to be careful not to call any stack-protected
1415 * function, which is most of the kernel.
1416 *
1417 * Note, that it is __ref because the only caller of this after init
1418 * is PVH which is not going to use xen_load_gdt_boot or other
1419 * __init functions.
1420 */
1421static void __ref xen_setup_gdt(int cpu)
1422{
1423        if (xen_feature(XENFEAT_auto_translated_physmap)) {
1424#ifdef CONFIG_X86_64
1425                unsigned long dummy;
1426
1427                load_percpu_segment(cpu); /* We need to access per-cpu area */
1428                switch_to_new_gdt(cpu); /* GDT and GS set */
1429
1430                /* We are switching of the Xen provided GDT to our HVM mode
1431                 * GDT. The new GDT has  __KERNEL_CS with CS.L = 1
1432                 * and we are jumping to reload it.
1433                 */
1434                asm volatile ("pushq %0\n"
1435                              "leaq 1f(%%rip),%0\n"
1436                              "pushq %0\n"
1437                              "lretq\n"
1438                              "1:\n"
1439                              : "=&r" (dummy) : "0" (__KERNEL_CS));
1440
1441                /*
1442                 * While not needed, we also set the %es, %ds, and %fs
1443                 * to zero. We don't care about %ss as it is NULL.
1444                 * Strictly speaking this is not needed as Xen zeros those
1445                 * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE)
1446                 *
1447                 * Linux zeros them in cpu_init() and in secondary_startup_64
1448                 * (for BSP).
1449                 */
1450                loadsegment(es, 0);
1451                loadsegment(ds, 0);
1452                loadsegment(fs, 0);
1453#else
1454                /* PVH: TODO Implement. */
1455                BUG();
1456#endif
1457                return; /* PVH does not need any PV GDT ops. */
1458        }
1459        pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1460        pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1461
1462        setup_stack_canary_segment(0);
1463        switch_to_new_gdt(0);
1464
1465        pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
1466        pv_cpu_ops.load_gdt = xen_load_gdt;
1467}
1468
1469#ifdef CONFIG_XEN_PVH
1470/*
1471 * A PV guest starts with default flags that are not set for PVH, set them
1472 * here asap.
1473 */
1474static void xen_pvh_set_cr_flags(int cpu)
1475{
1476
1477        /* Some of these are setup in 'secondary_startup_64'. The others:
1478         * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests
1479         * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */
1480        write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM);
1481
1482        if (!cpu)
1483                return;
1484        /*
1485         * For BSP, PSE PGE are set in probe_page_size_mask(), for APs
1486         * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu().
1487        */
1488        if (boot_cpu_has(X86_FEATURE_PSE))
1489                cr4_set_bits_and_update_boot(X86_CR4_PSE);
1490
1491        if (boot_cpu_has(X86_FEATURE_PGE))
1492                cr4_set_bits_and_update_boot(X86_CR4_PGE);
1493}
1494
1495/*
1496 * Note, that it is ref - because the only caller of this after init
1497 * is PVH which is not going to use xen_load_gdt_boot or other
1498 * __init functions.
1499 */
1500void __ref xen_pvh_secondary_vcpu_init(int cpu)
1501{
1502        xen_setup_gdt(cpu);
1503        xen_pvh_set_cr_flags(cpu);
1504}
1505
1506static void __init xen_pvh_early_guest_init(void)
1507{
1508        if (!xen_feature(XENFEAT_auto_translated_physmap))
1509                return;
1510
1511        BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
1512
1513        xen_pvh_early_cpu_init(0, false);
1514        xen_pvh_set_cr_flags(0);
1515
1516#ifdef CONFIG_X86_32
1517        BUG(); /* PVH: Implement proper support. */
1518#endif
1519}
1520#endif    /* CONFIG_XEN_PVH */
1521
1522static void __init xen_dom0_set_legacy_features(void)
1523{
1524        x86_platform.legacy.rtc = 1;
1525}
1526
1527static int xen_cpuhp_setup(void)
1528{
1529        int rc;
1530
1531        rc = cpuhp_setup_state_nocalls(CPUHP_XEN_PREPARE,
1532                                       "x86/xen/hvm_guest:prepare",
1533                                       xen_cpu_up_prepare, xen_cpu_dead);
1534        if (rc >= 0) {
1535                rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
1536                                               "x86/xen/hvm_guest:online",
1537                                               xen_cpu_up_online, NULL);
1538                if (rc < 0)
1539                        cpuhp_remove_state_nocalls(CPUHP_XEN_PREPARE);
1540        }
1541
1542        return rc >= 0 ? 0 : rc;
1543}
1544
1545/* First C function to be called on Xen boot */
1546asmlinkage __visible void __init xen_start_kernel(void)
1547{
1548        struct physdev_set_iopl set_iopl;
1549        unsigned long initrd_start = 0;
1550        int rc;
1551
1552        if (!xen_start_info)
1553                return;
1554
1555        xen_domain_type = XEN_PV_DOMAIN;
1556
1557        xen_setup_features();
1558#ifdef CONFIG_XEN_PVH
1559        xen_pvh_early_guest_init();
1560#endif
1561        xen_setup_machphys_mapping();
1562
1563        /* Install Xen paravirt ops */
1564        pv_info = xen_info;
1565        pv_init_ops = xen_init_ops;
1566        if (!xen_pvh_domain()) {
1567                pv_cpu_ops = xen_cpu_ops;
1568
1569                x86_platform.get_nmi_reason = xen_get_nmi_reason;
1570        }
1571
1572        if (xen_feature(XENFEAT_auto_translated_physmap))
1573                x86_init.resources.memory_setup = xen_auto_xlated_memory_setup;
1574        else
1575                x86_init.resources.memory_setup = xen_memory_setup;
1576        x86_init.oem.arch_setup = xen_arch_setup;
1577        x86_init.oem.banner = xen_banner;
1578
1579        xen_init_time_ops();
1580
1581        /*
1582         * Set up some pagetable state before starting to set any ptes.
1583         */
1584
1585        xen_init_mmu_ops();
1586
1587        /* Prevent unwanted bits from being set in PTEs. */
1588        __supported_pte_mask &= ~_PAGE_GLOBAL;
1589
1590        /*
1591         * Prevent page tables from being allocated in highmem, even
1592         * if CONFIG_HIGHPTE is enabled.
1593         */
1594        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1595
1596        /* Work out if we support NX */
1597        x86_configure_nx();
1598
1599        /* Get mfn list */
1600        xen_build_dynamic_phys_to_machine();
1601
1602        /*
1603         * Set up kernel GDT and segment registers, mainly so that
1604         * -fstack-protector code can be executed.
1605         */
1606        xen_setup_gdt(0);
1607
1608        xen_init_irq_ops();
1609        xen_init_cpuid_mask();
1610
1611#ifdef CONFIG_X86_LOCAL_APIC
1612        /*
1613         * set up the basic apic ops.
1614         */
1615        xen_init_apic();
1616#endif
1617
1618        if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1619                pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1620                pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1621        }
1622
1623        machine_ops = xen_machine_ops;
1624
1625        /*
1626         * The only reliable way to retain the initial address of the
1627         * percpu gdt_page is to remember it here, so we can go and
1628         * mark it RW later, when the initial percpu area is freed.
1629         */
1630        xen_initial_gdt = &per_cpu(gdt_page, 0);
1631
1632        xen_smp_init();
1633
1634#ifdef CONFIG_ACPI_NUMA
1635        /*
1636         * The pages we from Xen are not related to machine pages, so
1637         * any NUMA information the kernel tries to get from ACPI will
1638         * be meaningless.  Prevent it from trying.
1639         */
1640        acpi_numa = -1;
1641#endif
1642        /* Don't do the full vcpu_info placement stuff until we have a
1643           possible map and a non-dummy shared_info. */
1644        per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1645
1646        WARN_ON(xen_cpuhp_setup());
1647
1648        local_irq_disable();
1649        early_boot_irqs_disabled = true;
1650
1651        xen_raw_console_write("mapping kernel into physical memory\n");
1652        xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base,
1653                                   xen_start_info->nr_pages);
1654        xen_reserve_special_pages();
1655
1656        /* keep using Xen gdt for now; no urgent need to change it */
1657
1658#ifdef CONFIG_X86_32
1659        pv_info.kernel_rpl = 1;
1660        if (xen_feature(XENFEAT_supervisor_mode_kernel))
1661                pv_info.kernel_rpl = 0;
1662#else
1663        pv_info.kernel_rpl = 0;
1664#endif
1665        /* set the limit of our address space */
1666        xen_reserve_top();
1667
1668        /* PVH: runs at default kernel iopl of 0 */
1669        if (!xen_pvh_domain()) {
1670                /*
1671                 * We used to do this in xen_arch_setup, but that is too late
1672                 * on AMD were early_cpu_init (run before ->arch_setup()) calls
1673                 * early_amd_init which pokes 0xcf8 port.
1674                 */
1675                set_iopl.iopl = 1;
1676                rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1677                if (rc != 0)
1678                        xen_raw_printk("physdev_op failed %d\n", rc);
1679        }
1680
1681#ifdef CONFIG_X86_32
1682        /* set up basic CPUID stuff */
1683        cpu_detect(&new_cpu_data);
1684        set_cpu_cap(&new_cpu_data, X86_FEATURE_FPU);
1685        new_cpu_data.wp_works_ok = 1;
1686        new_cpu_data.x86_capability[CPUID_1_EDX] = cpuid_edx(1);
1687#endif
1688
1689        if (xen_start_info->mod_start) {
1690            if (xen_start_info->flags & SIF_MOD_START_PFN)
1691                initrd_start = PFN_PHYS(xen_start_info->mod_start);
1692            else
1693                initrd_start = __pa(xen_start_info->mod_start);
1694        }
1695
1696        /* Poke various useful things into boot_params */
1697        boot_params.hdr.type_of_loader = (9 << 4) | 0;
1698        boot_params.hdr.ramdisk_image = initrd_start;
1699        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1700        boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1701        boot_params.hdr.hardware_subarch = X86_SUBARCH_XEN;
1702
1703        if (!xen_initial_domain()) {
1704                add_preferred_console("xenboot", 0, NULL);
1705                add_preferred_console("tty", 0, NULL);
1706                add_preferred_console("hvc", 0, NULL);
1707                if (pci_xen)
1708                        x86_init.pci.arch_init = pci_xen_init;
1709        } else {
1710                const struct dom0_vga_console_info *info =
1711                        (void *)((char *)xen_start_info +
1712                                 xen_start_info->console.dom0.info_off);
1713                struct xen_platform_op op = {
1714                        .cmd = XENPF_firmware_info,
1715                        .interface_version = XENPF_INTERFACE_VERSION,
1716                        .u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
1717                };
1718
1719                x86_platform.set_legacy_features =
1720                                xen_dom0_set_legacy_features;
1721                xen_init_vga(info, xen_start_info->console.dom0.info_size);
1722                xen_start_info->console.domU.mfn = 0;
1723                xen_start_info->console.domU.evtchn = 0;
1724
1725                if (HYPERVISOR_platform_op(&op) == 0)
1726                        boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
1727
1728                /* Make sure ACS will be enabled */
1729                pci_request_acs();
1730
1731                xen_acpi_sleep_register();
1732
1733                /* Avoid searching for BIOS MP tables */
1734                x86_init.mpparse.find_smp_config = x86_init_noop;
1735                x86_init.mpparse.get_smp_config = x86_init_uint_noop;
1736
1737                xen_boot_params_init_edd();
1738        }
1739#ifdef CONFIG_PCI
1740        /* PCI BIOS service won't work from a PV guest. */
1741        pci_probe &= ~PCI_PROBE_BIOS;
1742#endif
1743        xen_raw_console_write("about to get started...\n");
1744
1745        /* Let's presume PV guests always boot on vCPU with id 0. */
1746        per_cpu(xen_vcpu_id, 0) = 0;
1747
1748        xen_setup_runstate_info(0);
1749
1750        xen_efi_init();
1751
1752        /* Start the world */
1753#ifdef CONFIG_X86_32
1754        i386_start_kernel();
1755#else
1756        cr4_init_shadow(); /* 32b kernel does this in i386_start_kernel() */
1757        x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1758#endif
1759}
1760
1761void __ref xen_hvm_init_shared_info(void)
1762{
1763        int cpu;
1764        struct xen_add_to_physmap xatp;
1765        static struct shared_info *shared_info_page = 0;
1766
1767        if (!shared_info_page)
1768                shared_info_page = (struct shared_info *)
1769                        extend_brk(PAGE_SIZE, PAGE_SIZE);
1770        xatp.domid = DOMID_SELF;
1771        xatp.idx = 0;
1772        xatp.space = XENMAPSPACE_shared_info;
1773        xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1774        if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1775                BUG();
1776
1777        HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1778
1779        /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1780         * page, we use it in the event channel upcall and in some pvclock
1781         * related functions. We don't need the vcpu_info placement
1782         * optimizations because we don't use any pv_mmu or pv_irq op on
1783         * HVM.
1784         * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1785         * online but xen_hvm_init_shared_info is run at resume time too and
1786         * in that case multiple vcpus might be online. */
1787        for_each_online_cpu(cpu) {
1788                /* Leave it to be NULL. */
1789                if (xen_vcpu_nr(cpu) >= MAX_VIRT_CPUS)
1790                        continue;
1791                per_cpu(xen_vcpu, cpu) =
1792                        &HYPERVISOR_shared_info->vcpu_info[xen_vcpu_nr(cpu)];
1793        }
1794}
1795
1796#ifdef CONFIG_XEN_PVHVM
1797static void __init init_hvm_pv_info(void)
1798{
1799        int major, minor;
1800        uint32_t eax, ebx, ecx, edx, pages, msr, base;
1801        u64 pfn;
1802
1803        base = xen_cpuid_base();
1804        cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1805
1806        major = eax >> 16;
1807        minor = eax & 0xffff;
1808        printk(KERN_INFO "Xen version %d.%d.\n", major, minor);
1809
1810        cpuid(base + 2, &pages, &msr, &ecx, &edx);
1811
1812        pfn = __pa(hypercall_page);
1813        wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1814
1815        xen_setup_features();
1816
1817        cpuid(base + 4, &eax, &ebx, &ecx, &edx);
1818        if (eax & XEN_HVM_CPUID_VCPU_ID_PRESENT)
1819                this_cpu_write(xen_vcpu_id, ebx);
1820        else
1821                this_cpu_write(xen_vcpu_id, smp_processor_id());
1822
1823        pv_info.name = "Xen HVM";
1824
1825        xen_domain_type = XEN_HVM_DOMAIN;
1826}
1827#endif
1828
1829static int xen_cpu_up_prepare(unsigned int cpu)
1830{
1831        int rc;
1832
1833        if (xen_hvm_domain()) {
1834                /*
1835                 * This can happen if CPU was offlined earlier and
1836                 * offlining timed out in common_cpu_die().
1837                 */
1838                if (cpu_report_state(cpu) == CPU_DEAD_FROZEN) {
1839                        xen_smp_intr_free(cpu);
1840                        xen_uninit_lock_cpu(cpu);
1841                }
1842
1843                if (cpu_acpi_id(cpu) != U32_MAX)
1844                        per_cpu(xen_vcpu_id, cpu) = cpu_acpi_id(cpu);
1845                else
1846                        per_cpu(xen_vcpu_id, cpu) = cpu;
1847                xen_vcpu_setup(cpu);
1848        }
1849
1850        if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
1851                xen_setup_timer(cpu);
1852
1853        rc = xen_smp_intr_init(cpu);
1854        if (rc) {
1855                WARN(1, "xen_smp_intr_init() for CPU %d failed: %d\n",
1856                     cpu, rc);
1857                return rc;
1858        }
1859        return 0;
1860}
1861
1862static int xen_cpu_dead(unsigned int cpu)
1863{
1864        xen_smp_intr_free(cpu);
1865
1866        if (xen_pv_domain() || xen_feature(XENFEAT_hvm_safe_pvclock))
1867                xen_teardown_timer(cpu);
1868
1869        return 0;
1870}
1871
1872static int xen_cpu_up_online(unsigned int cpu)
1873{
1874        xen_init_lock_cpu(cpu);
1875        return 0;
1876}
1877
1878#ifdef CONFIG_XEN_PVHVM
1879#ifdef CONFIG_KEXEC_CORE
1880static void xen_hvm_shutdown(void)
1881{
1882        native_machine_shutdown();
1883        if (kexec_in_progress)
1884                xen_reboot(SHUTDOWN_soft_reset);
1885}
1886
1887static void xen_hvm_crash_shutdown(struct pt_regs *regs)
1888{
1889        native_machine_crash_shutdown(regs);
1890        xen_reboot(SHUTDOWN_soft_reset);
1891}
1892#endif
1893
1894static void __init xen_hvm_guest_init(void)
1895{
1896        if (xen_pv_domain())
1897                return;
1898
1899        init_hvm_pv_info();
1900
1901        xen_hvm_init_shared_info();
1902
1903        xen_panic_handler_init();
1904
1905        BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector));
1906
1907        xen_hvm_smp_init();
1908        WARN_ON(xen_cpuhp_setup());
1909        xen_unplug_emulated_devices();
1910        x86_init.irqs.intr_init = xen_init_IRQ;
1911        xen_hvm_init_time_ops();
1912        xen_hvm_init_mmu_ops();
1913#ifdef CONFIG_KEXEC_CORE
1914        machine_ops.shutdown = xen_hvm_shutdown;
1915        machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
1916#endif
1917}
1918#endif
1919
1920static bool xen_nopv = false;
1921static __init int xen_parse_nopv(char *arg)
1922{
1923       xen_nopv = true;
1924       return 0;
1925}
1926early_param("xen_nopv", xen_parse_nopv);
1927
1928static uint32_t __init xen_platform(void)
1929{
1930        if (xen_nopv)
1931                return 0;
1932
1933        return xen_cpuid_base();
1934}
1935
1936bool xen_hvm_need_lapic(void)
1937{
1938        if (xen_nopv)
1939                return false;
1940        if (xen_pv_domain())
1941                return false;
1942        if (!xen_hvm_domain())
1943                return false;
1944        if (xen_feature(XENFEAT_hvm_pirqs))
1945                return false;
1946        return true;
1947}
1948EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
1949
1950static void xen_set_cpu_features(struct cpuinfo_x86 *c)
1951{
1952        if (xen_pv_domain()) {
1953                clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
1954                set_cpu_cap(c, X86_FEATURE_XENPV);
1955        }
1956}
1957
1958static void xen_pin_vcpu(int cpu)
1959{
1960        static bool disable_pinning;
1961        struct sched_pin_override pin_override;
1962        int ret;
1963
1964        if (disable_pinning)
1965                return;
1966
1967        pin_override.pcpu = cpu;
1968        ret = HYPERVISOR_sched_op(SCHEDOP_pin_override, &pin_override);
1969
1970        /* Ignore errors when removing override. */
1971        if (cpu < 0)
1972                return;
1973
1974        switch (ret) {
1975        case -ENOSYS:
1976                pr_warn("Unable to pin on physical cpu %d. In case of problems consider vcpu pinning.\n",
1977                        cpu);
1978                disable_pinning = true;
1979                break;
1980        case -EPERM:
1981                WARN(1, "Trying to pin vcpu without having privilege to do so\n");
1982                disable_pinning = true;
1983                break;
1984        case -EINVAL:
1985        case -EBUSY:
1986                pr_warn("Physical cpu %d not available for pinning. Check Xen cpu configuration.\n",
1987                        cpu);
1988                break;
1989        case 0:
1990                break;
1991        default:
1992                WARN(1, "rc %d while trying to pin vcpu\n", ret);
1993                disable_pinning = true;
1994        }
1995}
1996
1997const struct hypervisor_x86 x86_hyper_xen = {
1998        .name                   = "Xen",
1999        .detect                 = xen_platform,
2000#ifdef CONFIG_XEN_PVHVM
2001        .init_platform          = xen_hvm_guest_init,
2002#endif
2003        .x2apic_available       = xen_x2apic_para_available,
2004        .set_cpu_features       = xen_set_cpu_features,
2005        .pin_vcpu               = xen_pin_vcpu,
2006};
2007EXPORT_SYMBOL(x86_hyper_xen);
2008
2009#ifdef CONFIG_HOTPLUG_CPU
2010void xen_arch_register_cpu(int num)
2011{
2012        arch_register_cpu(num);
2013}
2014EXPORT_SYMBOL(xen_arch_register_cpu);
2015
2016void xen_arch_unregister_cpu(int num)
2017{
2018        arch_unregister_cpu(num);
2019}
2020EXPORT_SYMBOL(xen_arch_unregister_cpu);
2021#endif
2022