linux/arch/x86/xen/enlighten.c
<<
>>
Prefs
   1/*
   2 * Core of Xen paravirt_ops implementation.
   3 *
   4 * This file contains the xen_paravirt_ops structure itself, and the
   5 * implementations for:
   6 * - privileged instructions
   7 * - interrupt flags
   8 * - segment operations
   9 * - booting and setup
  10 *
  11 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
  12 */
  13
  14#include <linux/cpu.h>
  15#include <linux/kernel.h>
  16#include <linux/init.h>
  17#include <linux/smp.h>
  18#include <linux/preempt.h>
  19#include <linux/hardirq.h>
  20#include <linux/percpu.h>
  21#include <linux/delay.h>
  22#include <linux/start_kernel.h>
  23#include <linux/sched.h>
  24#include <linux/kprobes.h>
  25#include <linux/bootmem.h>
  26#include <linux/module.h>
  27#include <linux/mm.h>
  28#include <linux/page-flags.h>
  29#include <linux/highmem.h>
  30#include <linux/console.h>
  31#include <linux/pci.h>
  32#include <linux/gfp.h>
  33#include <linux/memblock.h>
  34
  35#include <xen/xen.h>
  36#include <xen/interface/xen.h>
  37#include <xen/interface/version.h>
  38#include <xen/interface/physdev.h>
  39#include <xen/interface/vcpu.h>
  40#include <xen/interface/memory.h>
  41#include <xen/features.h>
  42#include <xen/page.h>
  43#include <xen/hvm.h>
  44#include <xen/hvc-console.h>
  45
  46#include <asm/paravirt.h>
  47#include <asm/apic.h>
  48#include <asm/page.h>
  49#include <asm/xen/pci.h>
  50#include <asm/xen/hypercall.h>
  51#include <asm/xen/hypervisor.h>
  52#include <asm/fixmap.h>
  53#include <asm/processor.h>
  54#include <asm/proto.h>
  55#include <asm/msr-index.h>
  56#include <asm/traps.h>
  57#include <asm/setup.h>
  58#include <asm/desc.h>
  59#include <asm/pgalloc.h>
  60#include <asm/pgtable.h>
  61#include <asm/tlbflush.h>
  62#include <asm/reboot.h>
  63#include <asm/stackprotector.h>
  64#include <asm/hypervisor.h>
  65
  66#include "xen-ops.h"
  67#include "mmu.h"
  68#include "multicalls.h"
  69
  70EXPORT_SYMBOL_GPL(hypercall_page);
  71
  72DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
  73DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
  74
  75enum xen_domain_type xen_domain_type = XEN_NATIVE;
  76EXPORT_SYMBOL_GPL(xen_domain_type);
  77
  78unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
  79EXPORT_SYMBOL(machine_to_phys_mapping);
  80unsigned long  machine_to_phys_nr;
  81EXPORT_SYMBOL(machine_to_phys_nr);
  82
  83struct start_info *xen_start_info;
  84EXPORT_SYMBOL_GPL(xen_start_info);
  85
  86struct shared_info xen_dummy_shared_info;
  87
  88void *xen_initial_gdt;
  89
  90RESERVE_BRK(shared_info_page_brk, PAGE_SIZE);
  91__read_mostly int xen_have_vector_callback;
  92EXPORT_SYMBOL_GPL(xen_have_vector_callback);
  93
  94/*
  95 * Point at some empty memory to start with. We map the real shared_info
  96 * page as soon as fixmap is up and running.
  97 */
  98struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
  99
 100/*
 101 * Flag to determine whether vcpu info placement is available on all
 102 * VCPUs.  We assume it is to start with, and then set it to zero on
 103 * the first failure.  This is because it can succeed on some VCPUs
 104 * and not others, since it can involve hypervisor memory allocation,
 105 * or because the guest failed to guarantee all the appropriate
 106 * constraints on all VCPUs (ie buffer can't cross a page boundary).
 107 *
 108 * Note that any particular CPU may be using a placed vcpu structure,
 109 * but we can only optimise if the all are.
 110 *
 111 * 0: not available, 1: available
 112 */
 113static int have_vcpu_info_placement = 1;
 114
 115static void clamp_max_cpus(void)
 116{
 117#ifdef CONFIG_SMP
 118        if (setup_max_cpus > MAX_VIRT_CPUS)
 119                setup_max_cpus = MAX_VIRT_CPUS;
 120#endif
 121}
 122
 123static void xen_vcpu_setup(int cpu)
 124{
 125        struct vcpu_register_vcpu_info info;
 126        int err;
 127        struct vcpu_info *vcpup;
 128
 129        BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 130
 131        if (cpu < MAX_VIRT_CPUS)
 132                per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
 133
 134        if (!have_vcpu_info_placement) {
 135                if (cpu >= MAX_VIRT_CPUS)
 136                        clamp_max_cpus();
 137                return;
 138        }
 139
 140        vcpup = &per_cpu(xen_vcpu_info, cpu);
 141        info.mfn = arbitrary_virt_to_mfn(vcpup);
 142        info.offset = offset_in_page(vcpup);
 143
 144        /* Check to see if the hypervisor will put the vcpu_info
 145           structure where we want it, which allows direct access via
 146           a percpu-variable. */
 147        err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info);
 148
 149        if (err) {
 150                printk(KERN_DEBUG "register_vcpu_info failed: err=%d\n", err);
 151                have_vcpu_info_placement = 0;
 152                clamp_max_cpus();
 153        } else {
 154                /* This cpu is using the registered vcpu info, even if
 155                   later ones fail to. */
 156                per_cpu(xen_vcpu, cpu) = vcpup;
 157        }
 158}
 159
 160/*
 161 * On restore, set the vcpu placement up again.
 162 * If it fails, then we're in a bad state, since
 163 * we can't back out from using it...
 164 */
 165void xen_vcpu_restore(void)
 166{
 167        int cpu;
 168
 169        for_each_online_cpu(cpu) {
 170                bool other_cpu = (cpu != smp_processor_id());
 171
 172                if (other_cpu &&
 173                    HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
 174                        BUG();
 175
 176                xen_setup_runstate_info(cpu);
 177
 178                if (have_vcpu_info_placement)
 179                        xen_vcpu_setup(cpu);
 180
 181                if (other_cpu &&
 182                    HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
 183                        BUG();
 184        }
 185}
 186
 187static void __init xen_banner(void)
 188{
 189        unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL);
 190        struct xen_extraversion extra;
 191        HYPERVISOR_xen_version(XENVER_extraversion, &extra);
 192
 193        printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
 194               pv_info.name);
 195        printk(KERN_INFO "Xen version: %d.%d%s%s\n",
 196               version >> 16, version & 0xffff, extra.extraversion,
 197               xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 198}
 199
 200static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
 201static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
 202
 203static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 204                      unsigned int *cx, unsigned int *dx)
 205{
 206        unsigned maskebx = ~0;
 207        unsigned maskecx = ~0;
 208        unsigned maskedx = ~0;
 209
 210        /*
 211         * Mask out inconvenient features, to try and disable as many
 212         * unsupported kernel subsystems as possible.
 213         */
 214        switch (*ax) {
 215        case 1:
 216                maskecx = cpuid_leaf1_ecx_mask;
 217                maskedx = cpuid_leaf1_edx_mask;
 218                break;
 219
 220        case 0xb:
 221                /* Suppress extended topology stuff */
 222                maskebx = 0;
 223                break;
 224        }
 225
 226        asm(XEN_EMULATE_PREFIX "cpuid"
 227                : "=a" (*ax),
 228                  "=b" (*bx),
 229                  "=c" (*cx),
 230                  "=d" (*dx)
 231                : "0" (*ax), "2" (*cx));
 232
 233        *bx &= maskebx;
 234        *cx &= maskecx;
 235        *dx &= maskedx;
 236}
 237
 238static void __init xen_init_cpuid_mask(void)
 239{
 240        unsigned int ax, bx, cx, dx;
 241        unsigned int xsave_mask;
 242
 243        cpuid_leaf1_edx_mask =
 244                ~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
 245                  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 246                  (1 << X86_FEATURE_MTRR) |  /* disable MTRR */
 247                  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 248
 249        if (!xen_initial_domain())
 250                cpuid_leaf1_edx_mask &=
 251                        ~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
 252                          (1 << X86_FEATURE_ACPI));  /* disable ACPI */
 253        ax = 1;
 254        cx = 0;
 255        xen_cpuid(&ax, &bx, &cx, &dx);
 256
 257        xsave_mask =
 258                (1 << (X86_FEATURE_XSAVE % 32)) |
 259                (1 << (X86_FEATURE_OSXSAVE % 32));
 260
 261        /* Xen will set CR4.OSXSAVE if supported and not disabled by force */
 262        if ((cx & xsave_mask) != xsave_mask)
 263                cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
 264}
 265
 266static void xen_set_debugreg(int reg, unsigned long val)
 267{
 268        HYPERVISOR_set_debugreg(reg, val);
 269}
 270
 271static unsigned long xen_get_debugreg(int reg)
 272{
 273        return HYPERVISOR_get_debugreg(reg);
 274}
 275
 276static void xen_end_context_switch(struct task_struct *next)
 277{
 278        xen_mc_flush();
 279        paravirt_end_context_switch(next);
 280}
 281
 282static unsigned long xen_store_tr(void)
 283{
 284        return 0;
 285}
 286
 287/*
 288 * Set the page permissions for a particular virtual address.  If the
 289 * address is a vmalloc mapping (or other non-linear mapping), then
 290 * find the linear mapping of the page and also set its protections to
 291 * match.
 292 */
 293static void set_aliased_prot(void *v, pgprot_t prot)
 294{
 295        int level;
 296        pte_t *ptep;
 297        pte_t pte;
 298        unsigned long pfn;
 299        struct page *page;
 300
 301        ptep = lookup_address((unsigned long)v, &level);
 302        BUG_ON(ptep == NULL);
 303
 304        pfn = pte_pfn(*ptep);
 305        page = pfn_to_page(pfn);
 306
 307        pte = pfn_pte(pfn, prot);
 308
 309        if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
 310                BUG();
 311
 312        if (!PageHighMem(page)) {
 313                void *av = __va(PFN_PHYS(pfn));
 314
 315                if (av != v)
 316                        if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
 317                                BUG();
 318        } else
 319                kmap_flush_unused();
 320}
 321
 322static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
 323{
 324        const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 325        int i;
 326
 327        for(i = 0; i < entries; i += entries_per_page)
 328                set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
 329}
 330
 331static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
 332{
 333        const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
 334        int i;
 335
 336        for(i = 0; i < entries; i += entries_per_page)
 337                set_aliased_prot(ldt + i, PAGE_KERNEL);
 338}
 339
 340static void xen_set_ldt(const void *addr, unsigned entries)
 341{
 342        struct mmuext_op *op;
 343        struct multicall_space mcs = xen_mc_entry(sizeof(*op));
 344
 345        trace_xen_cpu_set_ldt(addr, entries);
 346
 347        op = mcs.args;
 348        op->cmd = MMUEXT_SET_LDT;
 349        op->arg1.linear_addr = (unsigned long)addr;
 350        op->arg2.nr_ents = entries;
 351
 352        MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 353
 354        xen_mc_issue(PARAVIRT_LAZY_CPU);
 355}
 356
 357static void xen_load_gdt(const struct desc_ptr *dtr)
 358{
 359        unsigned long va = dtr->address;
 360        unsigned int size = dtr->size + 1;
 361        unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 362        unsigned long frames[pages];
 363        int f;
 364
 365        /*
 366         * A GDT can be up to 64k in size, which corresponds to 8192
 367         * 8-byte entries, or 16 4k pages..
 368         */
 369
 370        BUG_ON(size > 65536);
 371        BUG_ON(va & ~PAGE_MASK);
 372
 373        for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 374                int level;
 375                pte_t *ptep;
 376                unsigned long pfn, mfn;
 377                void *virt;
 378
 379                /*
 380                 * The GDT is per-cpu and is in the percpu data area.
 381                 * That can be virtually mapped, so we need to do a
 382                 * page-walk to get the underlying MFN for the
 383                 * hypercall.  The page can also be in the kernel's
 384                 * linear range, so we need to RO that mapping too.
 385                 */
 386                ptep = lookup_address(va, &level);
 387                BUG_ON(ptep == NULL);
 388
 389                pfn = pte_pfn(*ptep);
 390                mfn = pfn_to_mfn(pfn);
 391                virt = __va(PFN_PHYS(pfn));
 392
 393                frames[f] = mfn;
 394
 395                make_lowmem_page_readonly((void *)va);
 396                make_lowmem_page_readonly(virt);
 397        }
 398
 399        if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
 400                BUG();
 401}
 402
 403/*
 404 * load_gdt for early boot, when the gdt is only mapped once
 405 */
 406static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
 407{
 408        unsigned long va = dtr->address;
 409        unsigned int size = dtr->size + 1;
 410        unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
 411        unsigned long frames[pages];
 412        int f;
 413
 414        /*
 415         * A GDT can be up to 64k in size, which corresponds to 8192
 416         * 8-byte entries, or 16 4k pages..
 417         */
 418
 419        BUG_ON(size > 65536);
 420        BUG_ON(va & ~PAGE_MASK);
 421
 422        for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
 423                pte_t pte;
 424                unsigned long pfn, mfn;
 425
 426                pfn = virt_to_pfn(va);
 427                mfn = pfn_to_mfn(pfn);
 428
 429                pte = pfn_pte(pfn, PAGE_KERNEL_RO);
 430
 431                if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0))
 432                        BUG();
 433
 434                frames[f] = mfn;
 435        }
 436
 437        if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
 438                BUG();
 439}
 440
 441static void load_TLS_descriptor(struct thread_struct *t,
 442                                unsigned int cpu, unsigned int i)
 443{
 444        struct desc_struct *gdt = get_cpu_gdt_table(cpu);
 445        xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
 446        struct multicall_space mc = __xen_mc_entry(0);
 447
 448        MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
 449}
 450
 451static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
 452{
 453        /*
 454         * XXX sleazy hack: If we're being called in a lazy-cpu zone
 455         * and lazy gs handling is enabled, it means we're in a
 456         * context switch, and %gs has just been saved.  This means we
 457         * can zero it out to prevent faults on exit from the
 458         * hypervisor if the next process has no %gs.  Either way, it
 459         * has been saved, and the new value will get loaded properly.
 460         * This will go away as soon as Xen has been modified to not
 461         * save/restore %gs for normal hypercalls.
 462         *
 463         * On x86_64, this hack is not used for %gs, because gs points
 464         * to KERNEL_GS_BASE (and uses it for PDA references), so we
 465         * must not zero %gs on x86_64
 466         *
 467         * For x86_64, we need to zero %fs, otherwise we may get an
 468         * exception between the new %fs descriptor being loaded and
 469         * %fs being effectively cleared at __switch_to().
 470         */
 471        if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
 472#ifdef CONFIG_X86_32
 473                lazy_load_gs(0);
 474#else
 475                loadsegment(fs, 0);
 476#endif
 477        }
 478
 479        xen_mc_batch();
 480
 481        load_TLS_descriptor(t, cpu, 0);
 482        load_TLS_descriptor(t, cpu, 1);
 483        load_TLS_descriptor(t, cpu, 2);
 484
 485        xen_mc_issue(PARAVIRT_LAZY_CPU);
 486}
 487
 488#ifdef CONFIG_X86_64
 489static void xen_load_gs_index(unsigned int idx)
 490{
 491        if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx))
 492                BUG();
 493}
 494#endif
 495
 496static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 497                                const void *ptr)
 498{
 499        xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
 500        u64 entry = *(u64 *)ptr;
 501
 502        trace_xen_cpu_write_ldt_entry(dt, entrynum, entry);
 503
 504        preempt_disable();
 505
 506        xen_mc_flush();
 507        if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
 508                BUG();
 509
 510        preempt_enable();
 511}
 512
 513static int cvt_gate_to_trap(int vector, const gate_desc *val,
 514                            struct trap_info *info)
 515{
 516        unsigned long addr;
 517
 518        if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 519                return 0;
 520
 521        info->vector = vector;
 522
 523        addr = gate_offset(*val);
 524#ifdef CONFIG_X86_64
 525        /*
 526         * Look for known traps using IST, and substitute them
 527         * appropriately.  The debugger ones are the only ones we care
 528         * about.  Xen will handle faults like double_fault and
 529         * machine_check, so we should never see them.  Warn if
 530         * there's an unexpected IST-using fault handler.
 531         */
 532        if (addr == (unsigned long)debug)
 533                addr = (unsigned long)xen_debug;
 534        else if (addr == (unsigned long)int3)
 535                addr = (unsigned long)xen_int3;
 536        else if (addr == (unsigned long)stack_segment)
 537                addr = (unsigned long)xen_stack_segment;
 538        else if (addr == (unsigned long)double_fault ||
 539                 addr == (unsigned long)nmi) {
 540                /* Don't need to handle these */
 541                return 0;
 542#ifdef CONFIG_X86_MCE
 543        } else if (addr == (unsigned long)machine_check) {
 544                return 0;
 545#endif
 546        } else {
 547                /* Some other trap using IST? */
 548                if (WARN_ON(val->ist != 0))
 549                        return 0;
 550        }
 551#endif  /* CONFIG_X86_64 */
 552        info->address = addr;
 553
 554        info->cs = gate_segment(*val);
 555        info->flags = val->dpl;
 556        /* interrupt gates clear IF */
 557        if (val->type == GATE_INTERRUPT)
 558                info->flags |= 1 << 2;
 559
 560        return 1;
 561}
 562
 563/* Locations of each CPU's IDT */
 564static DEFINE_PER_CPU(struct desc_ptr, idt_desc);
 565
 566/* Set an IDT entry.  If the entry is part of the current IDT, then
 567   also update Xen. */
 568static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g)
 569{
 570        unsigned long p = (unsigned long)&dt[entrynum];
 571        unsigned long start, end;
 572
 573        trace_xen_cpu_write_idt_entry(dt, entrynum, g);
 574
 575        preempt_disable();
 576
 577        start = __this_cpu_read(idt_desc.address);
 578        end = start + __this_cpu_read(idt_desc.size) + 1;
 579
 580        xen_mc_flush();
 581
 582        native_write_idt_entry(dt, entrynum, g);
 583
 584        if (p >= start && (p + 8) <= end) {
 585                struct trap_info info[2];
 586
 587                info[1].address = 0;
 588
 589                if (cvt_gate_to_trap(entrynum, g, &info[0]))
 590                        if (HYPERVISOR_set_trap_table(info))
 591                                BUG();
 592        }
 593
 594        preempt_enable();
 595}
 596
 597static void xen_convert_trap_info(const struct desc_ptr *desc,
 598                                  struct trap_info *traps)
 599{
 600        unsigned in, out, count;
 601
 602        count = (desc->size+1) / sizeof(gate_desc);
 603        BUG_ON(count > 256);
 604
 605        for (in = out = 0; in < count; in++) {
 606                gate_desc *entry = (gate_desc*)(desc->address) + in;
 607
 608                if (cvt_gate_to_trap(in, entry, &traps[out]))
 609                        out++;
 610        }
 611        traps[out].address = 0;
 612}
 613
 614void xen_copy_trap_info(struct trap_info *traps)
 615{
 616        const struct desc_ptr *desc = &__get_cpu_var(idt_desc);
 617
 618        xen_convert_trap_info(desc, traps);
 619}
 620
 621/* Load a new IDT into Xen.  In principle this can be per-CPU, so we
 622   hold a spinlock to protect the static traps[] array (static because
 623   it avoids allocation, and saves stack space). */
 624static void xen_load_idt(const struct desc_ptr *desc)
 625{
 626        static DEFINE_SPINLOCK(lock);
 627        static struct trap_info traps[257];
 628
 629        trace_xen_cpu_load_idt(desc);
 630
 631        spin_lock(&lock);
 632
 633        __get_cpu_var(idt_desc) = *desc;
 634
 635        xen_convert_trap_info(desc, traps);
 636
 637        xen_mc_flush();
 638        if (HYPERVISOR_set_trap_table(traps))
 639                BUG();
 640
 641        spin_unlock(&lock);
 642}
 643
 644/* Write a GDT descriptor entry.  Ignore LDT descriptors, since
 645   they're handled differently. */
 646static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
 647                                const void *desc, int type)
 648{
 649        trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
 650
 651        preempt_disable();
 652
 653        switch (type) {
 654        case DESC_LDT:
 655        case DESC_TSS:
 656                /* ignore */
 657                break;
 658
 659        default: {
 660                xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
 661
 662                xen_mc_flush();
 663                if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
 664                        BUG();
 665        }
 666
 667        }
 668
 669        preempt_enable();
 670}
 671
 672/*
 673 * Version of write_gdt_entry for use at early boot-time needed to
 674 * update an entry as simply as possible.
 675 */
 676static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 677                                            const void *desc, int type)
 678{
 679        trace_xen_cpu_write_gdt_entry(dt, entry, desc, type);
 680
 681        switch (type) {
 682        case DESC_LDT:
 683        case DESC_TSS:
 684                /* ignore */
 685                break;
 686
 687        default: {
 688                xmaddr_t maddr = virt_to_machine(&dt[entry]);
 689
 690                if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
 691                        dt[entry] = *(struct desc_struct *)desc;
 692        }
 693
 694        }
 695}
 696
 697static void xen_load_sp0(struct tss_struct *tss,
 698                         struct thread_struct *thread)
 699{
 700        struct multicall_space mcs;
 701
 702        mcs = xen_mc_entry(0);
 703        MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
 704        xen_mc_issue(PARAVIRT_LAZY_CPU);
 705}
 706
 707static void xen_set_iopl_mask(unsigned mask)
 708{
 709        struct physdev_set_iopl set_iopl;
 710
 711        /* Force the change at ring 0. */
 712        set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
 713        HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 714}
 715
 716static void xen_io_delay(void)
 717{
 718}
 719
 720#ifdef CONFIG_X86_LOCAL_APIC
 721static u32 xen_apic_read(u32 reg)
 722{
 723        return 0;
 724}
 725
 726static void xen_apic_write(u32 reg, u32 val)
 727{
 728        /* Warn to see if there's any stray references */
 729        WARN_ON(1);
 730}
 731
 732static u64 xen_apic_icr_read(void)
 733{
 734        return 0;
 735}
 736
 737static void xen_apic_icr_write(u32 low, u32 id)
 738{
 739        /* Warn to see if there's any stray references */
 740        WARN_ON(1);
 741}
 742
 743static void xen_apic_wait_icr_idle(void)
 744{
 745        return;
 746}
 747
 748static u32 xen_safe_apic_wait_icr_idle(void)
 749{
 750        return 0;
 751}
 752
 753static void set_xen_basic_apic_ops(void)
 754{
 755        apic->read = xen_apic_read;
 756        apic->write = xen_apic_write;
 757        apic->icr_read = xen_apic_icr_read;
 758        apic->icr_write = xen_apic_icr_write;
 759        apic->wait_icr_idle = xen_apic_wait_icr_idle;
 760        apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
 761}
 762
 763#endif
 764
 765static void xen_clts(void)
 766{
 767        struct multicall_space mcs;
 768
 769        mcs = xen_mc_entry(0);
 770
 771        MULTI_fpu_taskswitch(mcs.mc, 0);
 772
 773        xen_mc_issue(PARAVIRT_LAZY_CPU);
 774}
 775
 776static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
 777
 778static unsigned long xen_read_cr0(void)
 779{
 780        unsigned long cr0 = percpu_read(xen_cr0_value);
 781
 782        if (unlikely(cr0 == 0)) {
 783                cr0 = native_read_cr0();
 784                percpu_write(xen_cr0_value, cr0);
 785        }
 786
 787        return cr0;
 788}
 789
 790static void xen_write_cr0(unsigned long cr0)
 791{
 792        struct multicall_space mcs;
 793
 794        percpu_write(xen_cr0_value, cr0);
 795
 796        /* Only pay attention to cr0.TS; everything else is
 797           ignored. */
 798        mcs = xen_mc_entry(0);
 799
 800        MULTI_fpu_taskswitch(mcs.mc, (cr0 & X86_CR0_TS) != 0);
 801
 802        xen_mc_issue(PARAVIRT_LAZY_CPU);
 803}
 804
 805static void xen_write_cr4(unsigned long cr4)
 806{
 807        cr4 &= ~X86_CR4_PGE;
 808        cr4 &= ~X86_CR4_PSE;
 809
 810        native_write_cr4(cr4);
 811}
 812
 813static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 814{
 815        int ret;
 816
 817        ret = 0;
 818
 819        switch (msr) {
 820#ifdef CONFIG_X86_64
 821                unsigned which;
 822                u64 base;
 823
 824        case MSR_FS_BASE:               which = SEGBASE_FS; goto set;
 825        case MSR_KERNEL_GS_BASE:        which = SEGBASE_GS_USER; goto set;
 826        case MSR_GS_BASE:               which = SEGBASE_GS_KERNEL; goto set;
 827
 828        set:
 829                base = ((u64)high << 32) | low;
 830                if (HYPERVISOR_set_segment_base(which, base) != 0)
 831                        ret = -EIO;
 832                break;
 833#endif
 834
 835        case MSR_STAR:
 836        case MSR_CSTAR:
 837        case MSR_LSTAR:
 838        case MSR_SYSCALL_MASK:
 839        case MSR_IA32_SYSENTER_CS:
 840        case MSR_IA32_SYSENTER_ESP:
 841        case MSR_IA32_SYSENTER_EIP:
 842                /* Fast syscall setup is all done in hypercalls, so
 843                   these are all ignored.  Stub them out here to stop
 844                   Xen console noise. */
 845                break;
 846
 847        case MSR_IA32_CR_PAT:
 848                if (smp_processor_id() == 0)
 849                        xen_set_pat(((u64)high << 32) | low);
 850                break;
 851
 852        default:
 853                ret = native_write_msr_safe(msr, low, high);
 854        }
 855
 856        return ret;
 857}
 858
 859void xen_setup_shared_info(void)
 860{
 861        if (!xen_feature(XENFEAT_auto_translated_physmap)) {
 862                set_fixmap(FIX_PARAVIRT_BOOTMAP,
 863                           xen_start_info->shared_info);
 864
 865                HYPERVISOR_shared_info =
 866                        (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP);
 867        } else
 868                HYPERVISOR_shared_info =
 869                        (struct shared_info *)__va(xen_start_info->shared_info);
 870
 871#ifndef CONFIG_SMP
 872        /* In UP this is as good a place as any to set up shared info */
 873        xen_setup_vcpu_info_placement();
 874#endif
 875
 876        xen_setup_mfn_list_list();
 877}
 878
 879/* This is called once we have the cpu_possible_map */
 880void xen_setup_vcpu_info_placement(void)
 881{
 882        int cpu;
 883
 884        for_each_possible_cpu(cpu)
 885                xen_vcpu_setup(cpu);
 886
 887        /* xen_vcpu_setup managed to place the vcpu_info within the
 888           percpu area for all cpus, so make use of it */
 889        if (have_vcpu_info_placement) {
 890                pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
 891                pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
 892                pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
 893                pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
 894                pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
 895        }
 896}
 897
 898static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
 899                          unsigned long addr, unsigned len)
 900{
 901        char *start, *end, *reloc;
 902        unsigned ret;
 903
 904        start = end = reloc = NULL;
 905
 906#define SITE(op, x)                                                     \
 907        case PARAVIRT_PATCH(op.x):                                      \
 908        if (have_vcpu_info_placement) {                                 \
 909                start = (char *)xen_##x##_direct;                       \
 910                end = xen_##x##_direct_end;                             \
 911                reloc = xen_##x##_direct_reloc;                         \
 912        }                                                               \
 913        goto patch_site
 914
 915        switch (type) {
 916                SITE(pv_irq_ops, irq_enable);
 917                SITE(pv_irq_ops, irq_disable);
 918                SITE(pv_irq_ops, save_fl);
 919                SITE(pv_irq_ops, restore_fl);
 920#undef SITE
 921
 922        patch_site:
 923                if (start == NULL || (end-start) > len)
 924                        goto default_patch;
 925
 926                ret = paravirt_patch_insns(insnbuf, len, start, end);
 927
 928                /* Note: because reloc is assigned from something that
 929                   appears to be an array, gcc assumes it's non-null,
 930                   but doesn't know its relationship with start and
 931                   end. */
 932                if (reloc > start && reloc < end) {
 933                        int reloc_off = reloc - start;
 934                        long *relocp = (long *)(insnbuf + reloc_off);
 935                        long delta = start - (char *)addr;
 936
 937                        *relocp += delta;
 938                }
 939                break;
 940
 941        default_patch:
 942        default:
 943                ret = paravirt_patch_default(type, clobbers, insnbuf,
 944                                             addr, len);
 945                break;
 946        }
 947
 948        return ret;
 949}
 950
 951static const struct pv_info xen_info __initconst = {
 952        .paravirt_enabled = 1,
 953        .shared_kernel_pmd = 0,
 954
 955#ifdef CONFIG_X86_64
 956        .extra_user_64bit_cs = FLAT_USER_CS64,
 957#endif
 958
 959        .name = "Xen",
 960};
 961
 962static const struct pv_init_ops xen_init_ops __initconst = {
 963        .patch = xen_patch,
 964};
 965
 966static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 967        .cpuid = xen_cpuid,
 968
 969        .set_debugreg = xen_set_debugreg,
 970        .get_debugreg = xen_get_debugreg,
 971
 972        .clts = xen_clts,
 973
 974        .read_cr0 = xen_read_cr0,
 975        .write_cr0 = xen_write_cr0,
 976
 977        .read_cr4 = native_read_cr4,
 978        .read_cr4_safe = native_read_cr4_safe,
 979        .write_cr4 = xen_write_cr4,
 980
 981        .wbinvd = native_wbinvd,
 982
 983        .read_msr = native_read_msr_safe,
 984        .write_msr = xen_write_msr_safe,
 985        .read_tsc = native_read_tsc,
 986        .read_pmc = native_read_pmc,
 987
 988        .iret = xen_iret,
 989        .irq_enable_sysexit = xen_sysexit,
 990#ifdef CONFIG_X86_64
 991        .usergs_sysret32 = xen_sysret32,
 992        .usergs_sysret64 = xen_sysret64,
 993#endif
 994
 995        .load_tr_desc = paravirt_nop,
 996        .set_ldt = xen_set_ldt,
 997        .load_gdt = xen_load_gdt,
 998        .load_idt = xen_load_idt,
 999        .load_tls = xen_load_tls,
1000#ifdef CONFIG_X86_64
1001        .load_gs_index = xen_load_gs_index,
1002#endif
1003
1004        .alloc_ldt = xen_alloc_ldt,
1005        .free_ldt = xen_free_ldt,
1006
1007        .store_gdt = native_store_gdt,
1008        .store_idt = native_store_idt,
1009        .store_tr = xen_store_tr,
1010
1011        .write_ldt_entry = xen_write_ldt_entry,
1012        .write_gdt_entry = xen_write_gdt_entry,
1013        .write_idt_entry = xen_write_idt_entry,
1014        .load_sp0 = xen_load_sp0,
1015
1016        .set_iopl_mask = xen_set_iopl_mask,
1017        .io_delay = xen_io_delay,
1018
1019        /* Xen takes care of %gs when switching to usermode for us */
1020        .swapgs = paravirt_nop,
1021
1022        .start_context_switch = paravirt_start_context_switch,
1023        .end_context_switch = xen_end_context_switch,
1024};
1025
1026static const struct pv_apic_ops xen_apic_ops __initconst = {
1027#ifdef CONFIG_X86_LOCAL_APIC
1028        .startup_ipi_hook = paravirt_nop,
1029#endif
1030};
1031
1032static void xen_reboot(int reason)
1033{
1034        struct sched_shutdown r = { .reason = reason };
1035
1036        if (HYPERVISOR_sched_op(SCHEDOP_shutdown, &r))
1037                BUG();
1038}
1039
1040static void xen_restart(char *msg)
1041{
1042        xen_reboot(SHUTDOWN_reboot);
1043}
1044
1045static void xen_emergency_restart(void)
1046{
1047        xen_reboot(SHUTDOWN_reboot);
1048}
1049
1050static void xen_machine_halt(void)
1051{
1052        xen_reboot(SHUTDOWN_poweroff);
1053}
1054
1055static void xen_machine_power_off(void)
1056{
1057        if (pm_power_off)
1058                pm_power_off();
1059        xen_reboot(SHUTDOWN_poweroff);
1060}
1061
1062static void xen_crash_shutdown(struct pt_regs *regs)
1063{
1064        xen_reboot(SHUTDOWN_crash);
1065}
1066
1067static int
1068xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
1069{
1070        xen_reboot(SHUTDOWN_crash);
1071        return NOTIFY_DONE;
1072}
1073
1074static struct notifier_block xen_panic_block = {
1075        .notifier_call= xen_panic_event,
1076};
1077
1078int xen_panic_handler_init(void)
1079{
1080        atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
1081        return 0;
1082}
1083
1084static const struct machine_ops xen_machine_ops __initconst = {
1085        .restart = xen_restart,
1086        .halt = xen_machine_halt,
1087        .power_off = xen_machine_power_off,
1088        .shutdown = xen_machine_halt,
1089        .crash_shutdown = xen_crash_shutdown,
1090        .emergency_restart = xen_emergency_restart,
1091};
1092
1093/*
1094 * Set up the GDT and segment registers for -fstack-protector.  Until
1095 * we do this, we have to be careful not to call any stack-protected
1096 * function, which is most of the kernel.
1097 */
1098static void __init xen_setup_stackprotector(void)
1099{
1100        pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot;
1101        pv_cpu_ops.load_gdt = xen_load_gdt_boot;
1102
1103        setup_stack_canary_segment(0);
1104        switch_to_new_gdt(0);
1105
1106        pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry;
1107        pv_cpu_ops.load_gdt = xen_load_gdt;
1108}
1109
1110/* First C function to be called on Xen boot */
1111asmlinkage void __init xen_start_kernel(void)
1112{
1113        struct physdev_set_iopl set_iopl;
1114        int rc;
1115        pgd_t *pgd;
1116
1117        if (!xen_start_info)
1118                return;
1119
1120        xen_domain_type = XEN_PV_DOMAIN;
1121
1122        xen_setup_machphys_mapping();
1123
1124        /* Install Xen paravirt ops */
1125        pv_info = xen_info;
1126        pv_init_ops = xen_init_ops;
1127        pv_cpu_ops = xen_cpu_ops;
1128        pv_apic_ops = xen_apic_ops;
1129
1130        x86_init.resources.memory_setup = xen_memory_setup;
1131        x86_init.oem.arch_setup = xen_arch_setup;
1132        x86_init.oem.banner = xen_banner;
1133
1134        xen_init_time_ops();
1135
1136        /*
1137         * Set up some pagetable state before starting to set any ptes.
1138         */
1139
1140        xen_init_mmu_ops();
1141
1142        /* Prevent unwanted bits from being set in PTEs. */
1143        __supported_pte_mask &= ~_PAGE_GLOBAL;
1144#if 0
1145        if (!xen_initial_domain())
1146#endif
1147                __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1148
1149        __supported_pte_mask |= _PAGE_IOMAP;
1150
1151        /*
1152         * Prevent page tables from being allocated in highmem, even
1153         * if CONFIG_HIGHPTE is enabled.
1154         */
1155        __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
1156
1157        /* Work out if we support NX */
1158        x86_configure_nx();
1159
1160        xen_setup_features();
1161
1162        /* Get mfn list */
1163        if (!xen_feature(XENFEAT_auto_translated_physmap))
1164                xen_build_dynamic_phys_to_machine();
1165
1166        /*
1167         * Set up kernel GDT and segment registers, mainly so that
1168         * -fstack-protector code can be executed.
1169         */
1170        xen_setup_stackprotector();
1171
1172        xen_init_irq_ops();
1173        xen_init_cpuid_mask();
1174
1175#ifdef CONFIG_X86_LOCAL_APIC
1176        /*
1177         * set up the basic apic ops.
1178         */
1179        set_xen_basic_apic_ops();
1180#endif
1181
1182        if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1183                pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1184                pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
1185        }
1186
1187        machine_ops = xen_machine_ops;
1188
1189        /*
1190         * The only reliable way to retain the initial address of the
1191         * percpu gdt_page is to remember it here, so we can go and
1192         * mark it RW later, when the initial percpu area is freed.
1193         */
1194        xen_initial_gdt = &per_cpu(gdt_page, 0);
1195
1196        xen_smp_init();
1197
1198#ifdef CONFIG_ACPI_NUMA
1199        /*
1200         * The pages we from Xen are not related to machine pages, so
1201         * any NUMA information the kernel tries to get from ACPI will
1202         * be meaningless.  Prevent it from trying.
1203         */
1204        acpi_numa = -1;
1205#endif
1206
1207        pgd = (pgd_t *)xen_start_info->pt_base;
1208
1209        /* Don't do the full vcpu_info placement stuff until we have a
1210           possible map and a non-dummy shared_info. */
1211        per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
1212
1213        local_irq_disable();
1214        early_boot_irqs_disabled = true;
1215
1216        xen_raw_console_write("mapping kernel into physical memory\n");
1217        pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
1218        xen_ident_map_ISA();
1219
1220        /* Allocate and initialize top and mid mfn levels for p2m structure */
1221        xen_build_mfn_list_list();
1222
1223        /* keep using Xen gdt for now; no urgent need to change it */
1224
1225#ifdef CONFIG_X86_32
1226        pv_info.kernel_rpl = 1;
1227        if (xen_feature(XENFEAT_supervisor_mode_kernel))
1228                pv_info.kernel_rpl = 0;
1229#else
1230        pv_info.kernel_rpl = 0;
1231#endif
1232        /* set the limit of our address space */
1233        xen_reserve_top();
1234
1235        /* We used to do this in xen_arch_setup, but that is too late on AMD
1236         * were early_cpu_init (run before ->arch_setup()) calls early_amd_init
1237         * which pokes 0xcf8 port.
1238         */
1239        set_iopl.iopl = 1;
1240        rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1241        if (rc != 0)
1242                xen_raw_printk("physdev_op failed %d\n", rc);
1243
1244#ifdef CONFIG_X86_32
1245        /* set up basic CPUID stuff */
1246        cpu_detect(&new_cpu_data);
1247        new_cpu_data.hard_math = 1;
1248        new_cpu_data.wp_works_ok = 1;
1249        new_cpu_data.x86_capability[0] = cpuid_edx(1);
1250#endif
1251
1252        /* Poke various useful things into boot_params */
1253        boot_params.hdr.type_of_loader = (9 << 4) | 0;
1254        boot_params.hdr.ramdisk_image = xen_start_info->mod_start
1255                ? __pa(xen_start_info->mod_start) : 0;
1256        boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
1257        boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
1258
1259        if (!xen_initial_domain()) {
1260                add_preferred_console("xenboot", 0, NULL);
1261                add_preferred_console("tty", 0, NULL);
1262                add_preferred_console("hvc", 0, NULL);
1263                if (pci_xen)
1264                        x86_init.pci.arch_init = pci_xen_init;
1265        } else {
1266                const struct dom0_vga_console_info *info =
1267                        (void *)((char *)xen_start_info +
1268                                 xen_start_info->console.dom0.info_off);
1269
1270                xen_init_vga(info, xen_start_info->console.dom0.info_size);
1271                xen_start_info->console.domU.mfn = 0;
1272                xen_start_info->console.domU.evtchn = 0;
1273
1274                /* Make sure ACS will be enabled */
1275                pci_request_acs();
1276        }
1277                
1278
1279        xen_raw_console_write("about to get started...\n");
1280
1281        xen_setup_runstate_info(0);
1282
1283        /* Start the world */
1284#ifdef CONFIG_X86_32
1285        i386_start_kernel();
1286#else
1287        x86_64_start_reservations((char *)__pa_symbol(&boot_params));
1288#endif
1289}
1290
1291static int init_hvm_pv_info(int *major, int *minor)
1292{
1293        uint32_t eax, ebx, ecx, edx, pages, msr, base;
1294        u64 pfn;
1295
1296        base = xen_cpuid_base();
1297        cpuid(base + 1, &eax, &ebx, &ecx, &edx);
1298
1299        *major = eax >> 16;
1300        *minor = eax & 0xffff;
1301        printk(KERN_INFO "Xen version %d.%d.\n", *major, *minor);
1302
1303        cpuid(base + 2, &pages, &msr, &ecx, &edx);
1304
1305        pfn = __pa(hypercall_page);
1306        wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32));
1307
1308        xen_setup_features();
1309
1310        pv_info.name = "Xen HVM";
1311
1312        xen_domain_type = XEN_HVM_DOMAIN;
1313
1314        return 0;
1315}
1316
1317void __ref xen_hvm_init_shared_info(void)
1318{
1319        int cpu;
1320        struct xen_add_to_physmap xatp;
1321        static struct shared_info *shared_info_page = 0;
1322
1323        if (!shared_info_page)
1324                shared_info_page = (struct shared_info *)
1325                        extend_brk(PAGE_SIZE, PAGE_SIZE);
1326        xatp.domid = DOMID_SELF;
1327        xatp.idx = 0;
1328        xatp.space = XENMAPSPACE_shared_info;
1329        xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT;
1330        if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
1331                BUG();
1332
1333        HYPERVISOR_shared_info = (struct shared_info *)shared_info_page;
1334
1335        /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info
1336         * page, we use it in the event channel upcall and in some pvclock
1337         * related functions. We don't need the vcpu_info placement
1338         * optimizations because we don't use any pv_mmu or pv_irq op on
1339         * HVM.
1340         * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is
1341         * online but xen_hvm_init_shared_info is run at resume time too and
1342         * in that case multiple vcpus might be online. */
1343        for_each_online_cpu(cpu) {
1344                per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
1345        }
1346}
1347
1348#ifdef CONFIG_XEN_PVHVM
1349static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
1350                                    unsigned long action, void *hcpu)
1351{
1352        int cpu = (long)hcpu;
1353        switch (action) {
1354        case CPU_UP_PREPARE:
1355                xen_vcpu_setup(cpu);
1356                if (xen_have_vector_callback)
1357                        xen_init_lock_cpu(cpu);
1358                break;
1359        default:
1360                break;
1361        }
1362        return NOTIFY_OK;
1363}
1364
1365static struct notifier_block xen_hvm_cpu_notifier __cpuinitdata = {
1366        .notifier_call  = xen_hvm_cpu_notify,
1367};
1368
1369static void __init xen_hvm_guest_init(void)
1370{
1371        int r;
1372        int major, minor;
1373
1374        r = init_hvm_pv_info(&major, &minor);
1375        if (r < 0)
1376                return;
1377
1378        xen_hvm_init_shared_info();
1379
1380        if (xen_feature(XENFEAT_hvm_callback_vector))
1381                xen_have_vector_callback = 1;
1382        xen_hvm_smp_init();
1383        register_cpu_notifier(&xen_hvm_cpu_notifier);
1384        xen_unplug_emulated_devices();
1385        x86_init.irqs.intr_init = xen_init_IRQ;
1386        xen_hvm_init_time_ops();
1387        xen_hvm_init_mmu_ops();
1388}
1389
1390static bool __init xen_hvm_platform(void)
1391{
1392        if (xen_pv_domain())
1393                return false;
1394
1395        if (!xen_cpuid_base())
1396                return false;
1397
1398        return true;
1399}
1400
1401bool xen_hvm_need_lapic(void)
1402{
1403        if (xen_pv_domain())
1404                return false;
1405        if (!xen_hvm_domain())
1406                return false;
1407        if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback)
1408                return false;
1409        return true;
1410}
1411EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
1412
1413const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
1414        .name                   = "Xen HVM",
1415        .detect                 = xen_hvm_platform,
1416        .init_platform          = xen_hvm_guest_init,
1417};
1418EXPORT_SYMBOL(x86_hyper_xen_hvm);
1419#endif
1420